From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- gcc/config/README | 5 + gcc/config/alpha/alpha-modes.def | 27 + gcc/config/alpha/alpha-protos.h | 131 + gcc/config/alpha/alpha.c | 11210 +++++++ gcc/config/alpha/alpha.h | 1344 + gcc/config/alpha/alpha.md | 7999 +++++ gcc/config/alpha/alpha.opt | 134 + gcc/config/alpha/constraints.md | 121 + gcc/config/alpha/crtfastmath.c | 36 + gcc/config/alpha/driver-alpha.c | 100 + gcc/config/alpha/elf.h | 452 + gcc/config/alpha/elf.opt | 30 + gcc/config/alpha/ev4.md | 161 + gcc/config/alpha/ev5.md | 194 + gcc/config/alpha/ev6.md | 177 + gcc/config/alpha/freebsd.h | 81 + gcc/config/alpha/gnu.h | 49 + gcc/config/alpha/host-osf.c | 147 + gcc/config/alpha/libgcc-alpha-ldbl.ver | 50 + gcc/config/alpha/linux-elf.h | 57 + gcc/config/alpha/linux-unwind.h | 99 + gcc/config/alpha/linux.h | 106 + gcc/config/alpha/netbsd.h | 83 + gcc/config/alpha/openbsd.h | 45 + gcc/config/alpha/osf5-unwind.h | 329 + gcc/config/alpha/osf5.h | 278 + gcc/config/alpha/osf5.opt | 42 + gcc/config/alpha/predicates.md | 621 + gcc/config/alpha/qrnnd.asm | 163 + gcc/config/alpha/sync.md | 308 + gcc/config/alpha/t-alpha | 2 + gcc/config/alpha/t-crtfm | 5 + gcc/config/alpha/t-ieee | 2 + gcc/config/alpha/t-linux | 2 + gcc/config/alpha/t-osf-pthread | 5 + gcc/config/alpha/t-osf5 | 48 + gcc/config/alpha/t-vms | 65 + gcc/config/alpha/va_list.h | 42 + gcc/config/alpha/vms-dwarf2.asm | 77 + gcc/config/alpha/vms-dwarf2eh.asm | 30 + gcc/config/alpha/vms-gcc_shell_handler.c | 124 + gcc/config/alpha/vms-unwind.h | 293 + gcc/config/alpha/vms.h | 360 + gcc/config/alpha/vms64.h | 53 + gcc/config/alpha/x-alpha | 3 + gcc/config/alpha/x-osf | 4 + gcc/config/arc/arc-modes.def | 24 + gcc/config/arc/arc-protos.h | 63 + gcc/config/arc/arc.c | 2491 ++ gcc/config/arc/arc.h | 935 + gcc/config/arc/arc.md | 1376 + gcc/config/arc/arc.opt | 60 + gcc/config/arc/initfini.c | 155 + gcc/config/arc/lib1funcs.asm | 266 + gcc/config/arc/t-arc | 60 + gcc/config/arm/README-interworking | 749 + gcc/config/arm/aout.h | 380 + gcc/config/arm/arm-c.c | 45 + gcc/config/arm/arm-cores.def | 136 + gcc/config/arm/arm-generic.md | 153 + gcc/config/arm/arm-ldmstm.ml | 332 + gcc/config/arm/arm-modes.def | 78 + gcc/config/arm/arm-protos.h | 231 + gcc/config/arm/arm-tune.md | 5 + gcc/config/arm/arm.c | 23712 ++++++++++++++ gcc/config/arm/arm.h | 2464 ++ gcc/config/arm/arm.md | 10746 +++++++ gcc/config/arm/arm.opt | 171 + gcc/config/arm/arm1020e.md | 375 + gcc/config/arm/arm1026ejs.md | 240 + gcc/config/arm/arm1136jfs.md | 376 + gcc/config/arm/arm926ejs.md | 187 + gcc/config/arm/arm_neon.h | 12176 +++++++ gcc/config/arm/bpabi-v6m.S | 318 + gcc/config/arm/bpabi.S | 163 + gcc/config/arm/bpabi.c | 56 + gcc/config/arm/bpabi.h | 125 + gcc/config/arm/cirrus.md | 540 + gcc/config/arm/coff.h | 86 + gcc/config/arm/constraints.md | 335 + gcc/config/arm/cortex-a5.md | 297 + gcc/config/arm/cortex-a8-neon.md | 1312 + gcc/config/arm/cortex-a8.md | 275 + gcc/config/arm/cortex-a9-neon.md | 1237 + gcc/config/arm/cortex-a9.md | 269 + gcc/config/arm/cortex-m4-fpu.md | 111 + gcc/config/arm/cortex-m4.md | 111 + gcc/config/arm/cortex-r4.md | 292 + gcc/config/arm/cortex-r4f.md | 161 + gcc/config/arm/crti.asm | 86 + gcc/config/arm/crtn.asm | 82 + gcc/config/arm/ecos-elf.h | 27 + gcc/config/arm/elf.h | 166 + gcc/config/arm/fa526.md | 161 + gcc/config/arm/fa606te.md | 171 + gcc/config/arm/fa626te.md | 165 + gcc/config/arm/fa726te.md | 218 + gcc/config/arm/fmp626.md | 182 + gcc/config/arm/fp16.c | 145 + gcc/config/arm/fpa.md | 889 + gcc/config/arm/freebsd.h | 67 + gcc/config/arm/gentune.sh | 29 + gcc/config/arm/ieee754-df.S | 1447 + gcc/config/arm/ieee754-sf.S | 1060 + gcc/config/arm/iterators.md | 405 + gcc/config/arm/iwmmxt.md | 1332 + gcc/config/arm/ldmstm.md | 1191 + gcc/config/arm/lib1funcs.asm | 1829 ++ gcc/config/arm/libgcc-bpabi.ver | 108 + gcc/config/arm/libunwind.S | 363 + gcc/config/arm/linux-atomic.c | 278 + gcc/config/arm/linux-eabi.h | 103 + gcc/config/arm/linux-elf.h | 120 + gcc/config/arm/linux-gas.h | 56 + gcc/config/arm/mmintrin.h | 1254 + gcc/config/arm/neon-docgen.ml | 337 + gcc/config/arm/neon-gen.ml | 416 + gcc/config/arm/neon-schedgen.ml | 543 + gcc/config/arm/neon-testgen.ml | 283 + gcc/config/arm/neon.md | 5476 ++++ gcc/config/arm/neon.ml | 1857 ++ gcc/config/arm/netbsd-elf.h | 157 + gcc/config/arm/netbsd.h | 150 + gcc/config/arm/pe.c | 257 + gcc/config/arm/pe.h | 148 + gcc/config/arm/pe.opt | 23 + gcc/config/arm/pr-support.c | 401 + gcc/config/arm/predicates.md | 688 + gcc/config/arm/rtems-eabi.h | 29 + gcc/config/arm/rtems-elf.h | 45 + gcc/config/arm/semi.h | 75 + gcc/config/arm/sfp-machine.h | 105 + gcc/config/arm/symbian.h | 105 + gcc/config/arm/sync.md | 602 + gcc/config/arm/t-arm | 66 + gcc/config/arm/t-arm-elf | 128 + gcc/config/arm/t-arm-softfp | 29 + gcc/config/arm/t-bpabi | 36 + gcc/config/arm/t-linux | 34 + gcc/config/arm/t-linux-androideabi | 10 + gcc/config/arm/t-linux-eabi | 43 + gcc/config/arm/t-netbsd | 47 + gcc/config/arm/t-pe | 52 + gcc/config/arm/t-rtems | 10 + gcc/config/arm/t-rtems-eabi | 8 + gcc/config/arm/t-strongarm-elf | 61 + gcc/config/arm/t-symbian | 53 + gcc/config/arm/t-vxworks | 44 + gcc/config/arm/t-wince-pe | 56 + gcc/config/arm/thumb2.md | 1121 + gcc/config/arm/uclinux-eabi.h | 66 + gcc/config/arm/uclinux-elf.h | 88 + gcc/config/arm/unaligned-funcs.c | 57 + gcc/config/arm/unknown-elf.h | 100 + gcc/config/arm/unwind-arm.c | 1263 + gcc/config/arm/unwind-arm.h | 281 + gcc/config/arm/vec-common.md | 110 + gcc/config/arm/vfp.md | 1153 + gcc/config/arm/vfp11.md | 92 + gcc/config/arm/vxworks.h | 113 + gcc/config/arm/vxworks.opt | 60 + gcc/config/arm/wince-pe.h | 26 + gcc/config/avr/avr-c.c | 85 + gcc/config/avr/avr-devices.c | 229 + gcc/config/avr/avr-protos.h | 121 + gcc/config/avr/avr-stdint.h | 66 + gcc/config/avr/avr.c | 6416 ++++ gcc/config/avr/avr.h | 835 + gcc/config/avr/avr.md | 3248 ++ gcc/config/avr/avr.opt | 60 + gcc/config/avr/constraints.md | 109 + gcc/config/avr/driver-avr.c | 114 + gcc/config/avr/libgcc.S | 901 + gcc/config/avr/predicates.md | 140 + gcc/config/avr/rtems.h | 28 + gcc/config/avr/t-avr | 225 + gcc/config/avr/t-rtems | 3 + gcc/config/bfin/bfin-modes.def | 28 + gcc/config/bfin/bfin-protos.h | 122 + gcc/config/bfin/bfin.c | 6695 ++++ gcc/config/bfin/bfin.h | 1220 + gcc/config/bfin/bfin.md | 4211 +++ gcc/config/bfin/bfin.opt | 101 + gcc/config/bfin/constraints.md | 225 + gcc/config/bfin/crti.s | 59 + gcc/config/bfin/crtlibid.s | 29 + gcc/config/bfin/crtn.s | 50 + gcc/config/bfin/elf.h | 73 + gcc/config/bfin/lib1funcs.asm | 146 + gcc/config/bfin/libgcc-bfin.ver | 1914 ++ gcc/config/bfin/linux-unwind.h | 164 + gcc/config/bfin/linux.h | 54 + gcc/config/bfin/predicates.md | 241 + gcc/config/bfin/print-sysroot-suffix.sh | 81 + gcc/config/bfin/rtems.h | 28 + gcc/config/bfin/sync.md | 178 + gcc/config/bfin/t-bfin | 43 + gcc/config/bfin/t-bfin-elf | 81 + gcc/config/bfin/t-bfin-linux | 72 + gcc/config/bfin/t-bfin-uclinux | 72 + gcc/config/bfin/t-rtems | 6 + gcc/config/bfin/uclinux.h | 41 + gcc/config/cris/arit.c | 304 + gcc/config/cris/cris-protos.h | 68 + gcc/config/cris/cris.c | 4132 +++ gcc/config/cris/cris.h | 1335 + gcc/config/cris/cris.md | 5110 +++ gcc/config/cris/cris.opt | 190 + gcc/config/cris/cris_abi_symbol.c | 45 + gcc/config/cris/elf.opt | 25 + gcc/config/cris/libgcc.ver | 7 + gcc/config/cris/linux.h | 151 + gcc/config/cris/linux.opt | 33 + gcc/config/cris/mulsi3.asm | 255 + gcc/config/cris/predicates.md | 174 + gcc/config/cris/t-cris | 58 + gcc/config/cris/t-elfmulti | 34 + gcc/config/cris/t-linux | 9 + gcc/config/crx/crx-protos.h | 79 + gcc/config/crx/crx.c | 1466 + gcc/config/crx/crx.h | 478 + gcc/config/crx/crx.md | 899 + gcc/config/crx/crx.opt | 34 + gcc/config/crx/t-crx | 37 + gcc/config/darwin-64.c | 72 + gcc/config/darwin-c.c | 717 + gcc/config/darwin-crt2.c | 153 + gcc/config/darwin-crt3.c | 532 + gcc/config/darwin-driver.c | 189 + gcc/config/darwin-f.c | 60 + gcc/config/darwin-ppc-ldouble-patch.def | 113 + gcc/config/darwin-protos.h | 127 + gcc/config/darwin-sections.def | 195 + gcc/config/darwin.c | 3472 ++ gcc/config/darwin.h | 990 + gcc/config/darwin.opt | 390 + gcc/config/darwin10.h | 32 + gcc/config/darwin9.h | 50 + gcc/config/dbx.h | 27 + gcc/config/dbxcoff.h | 62 + gcc/config/dbxelf.h | 68 + gcc/config/dfp-bit.c | 680 + gcc/config/dfp-bit.h | 626 + gcc/config/divmod.c | 73 + gcc/config/elfos.h | 531 + gcc/config/fixed-bit.c | 1216 + gcc/config/fixed-bit.h | 1273 + gcc/config/flat.h | 22 + gcc/config/floatunsidf.c | 15 + gcc/config/floatunsisf.c | 18 + gcc/config/floatunsitf.c | 15 + gcc/config/floatunsixf.c | 15 + gcc/config/fp-bit.c | 1657 + gcc/config/fp-bit.h | 499 + gcc/config/fr30/constraints.md | 72 + gcc/config/fr30/crti.asm | 61 + gcc/config/fr30/crtn.asm | 44 + gcc/config/fr30/fr30-protos.h | 35 + gcc/config/fr30/fr30.c | 1066 + gcc/config/fr30/fr30.h | 871 + gcc/config/fr30/fr30.md | 1268 + gcc/config/fr30/fr30.opt | 27 + gcc/config/fr30/lib1funcs.asm | 115 + gcc/config/fr30/predicates.md | 123 + gcc/config/fr30/t-fr30 | 56 + gcc/config/freebsd-nthr.h | 21 + gcc/config/freebsd-spec.h | 144 + gcc/config/freebsd-stdint.h | 56 + gcc/config/freebsd.h | 78 + gcc/config/freebsd.opt | 65 + gcc/config/frv/cmovd.c | 51 + gcc/config/frv/cmovh.c | 47 + gcc/config/frv/cmovw.c | 51 + gcc/config/frv/constraints.md | 174 + gcc/config/frv/frv-asm.h | 48 + gcc/config/frv/frv-modes.def | 34 + gcc/config/frv/frv-protos.h | 195 + gcc/config/frv/frv.c | 9680 ++++++ gcc/config/frv/frv.h | 2188 ++ gcc/config/frv/frv.md | 8022 +++++ gcc/config/frv/frv.opt | 199 + gcc/config/frv/frvbegin.c | 157 + gcc/config/frv/frvend.c | 70 + gcc/config/frv/lib1funcs.asm | 269 + gcc/config/frv/libgcc-frv.ver | 73 + gcc/config/frv/linux.h | 75 + gcc/config/frv/modi.c | 4 + gcc/config/frv/predicates.md | 1543 + gcc/config/frv/t-frv | 113 + gcc/config/frv/t-linux | 33 + gcc/config/frv/uitod.c | 4 + gcc/config/frv/uitof.c | 4 + gcc/config/frv/ulltod.c | 4 + gcc/config/frv/ulltof.c | 4 + gcc/config/frv/umodi.c | 4 + gcc/config/fused-madd.opt | 25 + gcc/config/g.opt | 30 + gcc/config/glibc-stdint.h | 55 + gcc/config/gnu-user.h | 97 + gcc/config/gnu-user.opt | 39 + gcc/config/gnu.h | 46 + gcc/config/h8300/clzhi2.c | 35 + gcc/config/h8300/crti.asm | 63 + gcc/config/h8300/crtn.asm | 53 + gcc/config/h8300/ctzhi2.c | 35 + gcc/config/h8300/elf.h | 44 + gcc/config/h8300/fixunssfsi.c | 41 + gcc/config/h8300/genmova.sh | 198 + gcc/config/h8300/h8300-protos.h | 122 + gcc/config/h8300/h8300.c | 5980 ++++ gcc/config/h8300/h8300.h | 1071 + gcc/config/h8300/h8300.md | 6267 ++++ gcc/config/h8300/h8300.opt | 62 + gcc/config/h8300/lib1funcs.asm | 838 + gcc/config/h8300/mova.md | 858 + gcc/config/h8300/parityhi2.c | 36 + gcc/config/h8300/popcounthi2.c | 36 + gcc/config/h8300/predicates.md | 493 + gcc/config/h8300/rtems.h | 29 + gcc/config/h8300/t-elf | 6 + gcc/config/h8300/t-h8300 | 62 + gcc/config/h8300/t-rtems | 7 + gcc/config/host-darwin.c | 77 + gcc/config/host-darwin.h | 27 + gcc/config/host-hpux.c | 129 + gcc/config/host-linux.c | 220 + gcc/config/host-solaris.c | 125 + gcc/config/hpux-stdint.h | 34 + gcc/config/hpux11.opt | 33 + gcc/config/i386/abmintrin.h | 55 + gcc/config/i386/ammintrin.h | 88 + gcc/config/i386/athlon.md | 1187 + gcc/config/i386/atom.md | 796 + gcc/config/i386/att.h | 92 + gcc/config/i386/avxintrin.h | 1426 + gcc/config/i386/avxmath.h | 29 + gcc/config/i386/bdver1.md | 796 + gcc/config/i386/biarch64.h | 29 + gcc/config/i386/bmiintrin.h | 145 + gcc/config/i386/bmmintrin.h | 29 + gcc/config/i386/bsd.h | 100 + gcc/config/i386/constraints.md | 175 + gcc/config/i386/core2.md | 691 + gcc/config/i386/cpuid.h | 188 + gcc/config/i386/cross-stdarg.h | 73 + gcc/config/i386/crtdll.h | 42 + gcc/config/i386/crtfastmath.c | 89 + gcc/config/i386/crtprec.c | 47 + gcc/config/i386/cygming-crtbegin.c | 135 + gcc/config/i386/cygming-crtend.c | 88 + gcc/config/i386/cygming.h | 478 + gcc/config/i386/cygming.opt | 54 + gcc/config/i386/cygwin-stdint.h | 62 + gcc/config/i386/cygwin.asm | 188 + gcc/config/i386/cygwin.h | 142 + gcc/config/i386/darwin-libgcc.10.4.ver | 98 + gcc/config/i386/darwin-libgcc.10.5.ver | 102 + gcc/config/i386/darwin.h | 323 + gcc/config/i386/darwin64.h | 35 + gcc/config/i386/djgpp-stdint.h | 62 + gcc/config/i386/djgpp.h | 182 + gcc/config/i386/djgpp.opt | 28 + gcc/config/i386/driver-i386.c | 769 + gcc/config/i386/emmintrin.h | 1513 + gcc/config/i386/fma4intrin.h | 236 + gcc/config/i386/freebsd.h | 152 + gcc/config/i386/freebsd64.h | 46 + gcc/config/i386/gas.h | 124 + gcc/config/i386/geode.md | 152 + gcc/config/i386/gmm_malloc.h | 74 + gcc/config/i386/gmon-sol2.c | 459 + gcc/config/i386/gnu.h | 56 + gcc/config/i386/gstabs.h | 7 + gcc/config/i386/gthr-win32.c | 260 + gcc/config/i386/host-cygwin.c | 78 + gcc/config/i386/host-i386-darwin.c | 30 + gcc/config/i386/host-mingw32.c | 179 + gcc/config/i386/i386-builtin-types.awk | 280 + gcc/config/i386/i386-builtin-types.def | 420 + gcc/config/i386/i386-c.c | 401 + gcc/config/i386/i386-interix.h | 357 + gcc/config/i386/i386-interix3.h | 23 + gcc/config/i386/i386-modes.def | 91 + gcc/config/i386/i386-protos.h | 292 + gcc/config/i386/i386.c | 35376 +++++++++++++++++++++ gcc/config/i386/i386.h | 2400 ++ gcc/config/i386/i386.md | 18347 +++++++++++ gcc/config/i386/i386.opt | 425 + gcc/config/i386/i386elf.h | 125 + gcc/config/i386/ia32intrin.h | 234 + gcc/config/i386/immintrin.h | 203 + gcc/config/i386/k6.md | 267 + gcc/config/i386/kfreebsd-gnu.h | 25 + gcc/config/i386/knetbsd-gnu.h | 23 + gcc/config/i386/kopensolaris-gnu.h | 22 + gcc/config/i386/libgcc-glibc.ver | 186 + gcc/config/i386/linux-unwind.h | 197 + gcc/config/i386/linux.h | 215 + gcc/config/i386/linux64.h | 132 + gcc/config/i386/lwpintrin.h | 100 + gcc/config/i386/lynx.h | 90 + gcc/config/i386/mingw-stdint.h | 50 + gcc/config/i386/mingw-w64.h | 79 + gcc/config/i386/mingw-w64.opt | 23 + gcc/config/i386/mingw.opt | 27 + gcc/config/i386/mingw32.h | 247 + gcc/config/i386/mm3dnow.h | 215 + gcc/config/i386/mmintrin.h | 921 + gcc/config/i386/mmx.md | 1716 + gcc/config/i386/msformat-c.c | 197 + gcc/config/i386/netbsd-elf.h | 124 + gcc/config/i386/netbsd.h | 96 + gcc/config/i386/netbsd64.h | 72 + gcc/config/i386/netware-crt0.c | 79 + gcc/config/i386/netware-libgcc.c | 58 + gcc/config/i386/netware-libgcc.def | 2 + gcc/config/i386/netware-libgcc.exp | 83 + gcc/config/i386/netware.c | 229 + gcc/config/i386/netware.h | 177 + gcc/config/i386/netware.opt | 33 + gcc/config/i386/nmmintrin.h | 37 + gcc/config/i386/nto.h | 108 + gcc/config/i386/nto.opt | 33 + gcc/config/i386/nwld.c | 73 + gcc/config/i386/nwld.h | 69 + gcc/config/i386/openbsd.h | 101 + gcc/config/i386/openbsdelf.h | 134 + gcc/config/i386/pentium.md | 306 + gcc/config/i386/pmm_malloc.h | 57 + gcc/config/i386/pmmintrin.h | 128 + gcc/config/i386/popcntintrin.h | 46 + gcc/config/i386/ppro.md | 758 + gcc/config/i386/predicates.md | 1226 + gcc/config/i386/rtemself.h | 32 + gcc/config/i386/sfp-machine.h | 5 + gcc/config/i386/smmintrin.h | 831 + gcc/config/i386/sol2-10.h | 138 + gcc/config/i386/sol2-c1.asm | 151 + gcc/config/i386/sol2-ci.asm | 40 + gcc/config/i386/sol2-cn.asm | 35 + gcc/config/i386/sol2-gas.h | 31 + gcc/config/i386/sol2-gc1.asm | 155 + gcc/config/i386/sol2-unwind.h | 289 + gcc/config/i386/sol2.h | 182 + gcc/config/i386/sse.md | 12125 +++++++ gcc/config/i386/ssemath.h | 25 + gcc/config/i386/sync.md | 242 + gcc/config/i386/sysv4.h | 73 + gcc/config/i386/t-crtfm | 8 + gcc/config/i386/t-crtpc | 34 + gcc/config/i386/t-crtpic | 10 + gcc/config/i386/t-crtstuff | 7 + gcc/config/i386/t-cygming | 109 + gcc/config/i386/t-cygwin | 39 + gcc/config/i386/t-darwin | 5 + gcc/config/i386/t-darwin64 | 8 + gcc/config/i386/t-djgpp | 2 + gcc/config/i386/t-dlldir | 6 + gcc/config/i386/t-dlldir-x | 9 + gcc/config/i386/t-dw2-eh | 3 + gcc/config/i386/t-fprules-softfp | 6 + gcc/config/i386/t-gmm_malloc | 6 + gcc/config/i386/t-gnu | 1 + gcc/config/i386/t-gthr-win32 | 2 + gcc/config/i386/t-i386 | 41 + gcc/config/i386/t-i386elf | 4 + gcc/config/i386/t-interix | 8 + gcc/config/i386/t-kfreebsd | 5 + gcc/config/i386/t-linux | 9 + gcc/config/i386/t-linux64 | 36 + gcc/config/i386/t-mingw-w32 | 12 + gcc/config/i386/t-mingw-w64 | 12 + gcc/config/i386/t-mingw32 | 5 + gcc/config/i386/t-netware | 10 + gcc/config/i386/t-nto | 4 + gcc/config/i386/t-nwld | 50 + gcc/config/i386/t-openbsd | 6 + gcc/config/i386/t-pmm_malloc | 6 + gcc/config/i386/t-rtems-i386 | 69 + gcc/config/i386/t-sjlj-eh | 3 + gcc/config/i386/t-sol2-10 | 29 + gcc/config/i386/t-svr3dbx | 7 + gcc/config/i386/t-vxworks | 8 + gcc/config/i386/t-vxworksae | 5 + gcc/config/i386/tbmintrin.h | 191 + gcc/config/i386/tmmintrin.h | 244 + gcc/config/i386/unix.h | 81 + gcc/config/i386/vx-common.h | 33 + gcc/config/i386/vxworks.h | 76 + gcc/config/i386/vxworksae.h | 35 + gcc/config/i386/w32-unwind.h | 204 + gcc/config/i386/winnt-cxx.c | 175 + gcc/config/i386/winnt-stubs.c | 52 + gcc/config/i386/winnt.c | 1134 + gcc/config/i386/wmmintrin.h | 120 + gcc/config/i386/x-cygwin | 4 + gcc/config/i386/x-darwin | 4 + gcc/config/i386/x-i386 | 4 + gcc/config/i386/x-mingw32 | 31 + gcc/config/i386/x86-64.h | 106 + gcc/config/i386/x86intrin.h | 96 + gcc/config/i386/xm-cygwin.h | 22 + gcc/config/i386/xm-djgpp.h | 84 + gcc/config/i386/xm-mingw32.h | 35 + gcc/config/i386/xmmintrin.h | 1251 + gcc/config/i386/xopintrin.h | 835 + gcc/config/ia64/constraints.md | 154 + gcc/config/ia64/crtbegin.asm | 254 + gcc/config/ia64/crtend.asm | 121 + gcc/config/ia64/crtfastmath.c | 34 + gcc/config/ia64/crti.asm | 53 + gcc/config/ia64/crtn.asm | 43 + gcc/config/ia64/div.md | 1221 + gcc/config/ia64/elf.h | 73 + gcc/config/ia64/fde-glibc.c | 162 + gcc/config/ia64/fde-vms.c | 157 + gcc/config/ia64/freebsd.h | 55 + gcc/config/ia64/hpux.h | 235 + gcc/config/ia64/ia64-c.c | 191 + gcc/config/ia64/ia64-modes.def | 86 + gcc/config/ia64/ia64-protos.h | 101 + gcc/config/ia64/ia64.c | 11055 +++++++ gcc/config/ia64/ia64.h | 1823 ++ gcc/config/ia64/ia64.md | 5188 +++ gcc/config/ia64/ia64.opt | 181 + gcc/config/ia64/ia64intrin.h | 2 + gcc/config/ia64/ilp32.opt | 7 + gcc/config/ia64/itanium2.md | 1867 ++ gcc/config/ia64/lib1funcs.asm | 795 + gcc/config/ia64/libgcc-glibc.ver | 97 + gcc/config/ia64/libgcc-ia64.ver | 30 + gcc/config/ia64/linux-unwind.h | 199 + gcc/config/ia64/linux.h | 95 + gcc/config/ia64/predicates.md | 630 + gcc/config/ia64/quadlib.c | 78 + gcc/config/ia64/sfp-machine.h | 116 + gcc/config/ia64/sync.md | 187 + gcc/config/ia64/sysv4.h | 145 + gcc/config/ia64/t-fprules-softfp | 6 + gcc/config/ia64/t-glibc | 7 + gcc/config/ia64/t-glibc-libunwind | 4 + gcc/config/ia64/t-hpux | 75 + gcc/config/ia64/t-ia64 | 57 + gcc/config/ia64/t-vms | 49 + gcc/config/ia64/unwind-ia64.c | 2460 ++ gcc/config/ia64/unwind-ia64.h | 43 + gcc/config/ia64/vect.md | 1730 + gcc/config/ia64/vms-crtinit.asm | 24 + gcc/config/ia64/vms-unwind.h | 307 + gcc/config/ia64/vms.h | 211 + gcc/config/ia64/vms.opt | 30 + gcc/config/ia64/vms64.h | 41 + gcc/config/ia64/vms_symvec_libgcc_s.opt | 89 + gcc/config/interix.h | 110 + gcc/config/interix.opt | 36 + gcc/config/interix3.h | 34 + gcc/config/iq2000/abi | 239 + gcc/config/iq2000/constraints.md | 79 + gcc/config/iq2000/iq2000-protos.h | 48 + gcc/config/iq2000/iq2000.c | 3523 ++ gcc/config/iq2000/iq2000.h | 913 + gcc/config/iq2000/iq2000.md | 2180 ++ gcc/config/iq2000/iq2000.opt | 44 + gcc/config/iq2000/lib2extra-funcs.c | 40 + gcc/config/iq2000/predicates.md | 240 + gcc/config/iq2000/t-iq2000 | 50 + gcc/config/kfreebsd-gnu.h | 37 + gcc/config/knetbsd-gnu.h | 38 + gcc/config/kopensolaris-gnu.h | 37 + gcc/config/libgcc-glibc.ver | 55 + gcc/config/linux-android.h | 60 + gcc/config/linux-android.opt | 30 + gcc/config/linux.h | 99 + gcc/config/linux.opt | 32 + gcc/config/lm32/constraints.md | 57 + gcc/config/lm32/lm32-protos.h | 39 + gcc/config/lm32/lm32.c | 1248 + gcc/config/lm32/lm32.h | 556 + gcc/config/lm32/lm32.md | 996 + gcc/config/lm32/lm32.opt | 40 + gcc/config/lm32/predicates.md | 77 + gcc/config/lm32/rtems.h | 32 + gcc/config/lm32/sfp-machine.h | 51 + gcc/config/lm32/t-fprules-softfp | 5 + gcc/config/lm32/t-lm32 | 2 + gcc/config/lm32/uclinux-elf.h | 84 + gcc/config/lynx.h | 177 + gcc/config/lynx.opt | 31 + gcc/config/m32c/addsub.md | 260 + gcc/config/m32c/bitops.md | 422 + gcc/config/m32c/blkmov.md | 242 + gcc/config/m32c/cond.md | 293 + gcc/config/m32c/jump.md | 135 + gcc/config/m32c/m32c-lib1.S | 231 + gcc/config/m32c/m32c-lib2-trapv.c | 43 + gcc/config/m32c/m32c-lib2.c | 134 + gcc/config/m32c/m32c-modes.def | 29 + gcc/config/m32c/m32c-pragma.c | 135 + gcc/config/m32c/m32c-protos.h | 103 + gcc/config/m32c/m32c.abi | 132 + gcc/config/m32c/m32c.c | 4860 +++ gcc/config/m32c/m32c.h | 688 + gcc/config/m32c/m32c.md | 80 + gcc/config/m32c/m32c.opt | 44 + gcc/config/m32c/minmax.md | 58 + gcc/config/m32c/mov.md | 491 + gcc/config/m32c/muldiv.md | 288 + gcc/config/m32c/predicates.md | 299 + gcc/config/m32c/prologue.md | 199 + gcc/config/m32c/rtems.h | 33 + gcc/config/m32c/shift.md | 352 + gcc/config/m32c/t-m32c | 69 + gcc/config/m32r/constraints.md | 147 + gcc/config/m32r/initfini.c | 168 + gcc/config/m32r/libgcc-glibc.ver | 48 + gcc/config/m32r/linux.h | 101 + gcc/config/m32r/little.h | 21 + gcc/config/m32r/m32r-protos.h | 66 + gcc/config/m32r/m32r.c | 2959 ++ gcc/config/m32r/m32r.h | 1165 + gcc/config/m32r/m32r.md | 2279 ++ gcc/config/m32r/m32r.opt | 82 + gcc/config/m32r/predicates.md | 440 + gcc/config/m32r/rtems.h | 33 + gcc/config/m32r/t-linux | 57 + gcc/config/m32r/t-m32r | 82 + gcc/config/m68hc11/larith.asm | 1333 + gcc/config/m68hc11/m68hc11-crt0.S | 86 + gcc/config/m68hc11/m68hc11-protos.h | 109 + gcc/config/m68hc11/m68hc11.c | 5582 ++++ gcc/config/m68hc11/m68hc11.h | 1382 + gcc/config/m68hc11/m68hc11.md | 7579 +++++ gcc/config/m68hc11/m68hc11.opt | 94 + gcc/config/m68hc11/m68hc12.h | 45 + gcc/config/m68hc11/predicates.md | 228 + gcc/config/m68hc11/t-m68hc11 | 96 + gcc/config/m68k/cf.md | 2250 ++ gcc/config/m68k/constraints.md | 164 + gcc/config/m68k/crti.s | 44 + gcc/config/m68k/crtn.s | 40 + gcc/config/m68k/fpgnulib.c | 595 + gcc/config/m68k/ieee.opt | 24 + gcc/config/m68k/lb1sf68.asm | 4116 +++ gcc/config/m68k/linux-unwind.h | 158 + gcc/config/m68k/linux.h | 242 + gcc/config/m68k/m68020-elf.h | 30 + gcc/config/m68k/m68k-devices.def | 189 + gcc/config/m68k/m68k-modes.def | 25 + gcc/config/m68k/m68k-none.h | 19 + gcc/config/m68k/m68k-protos.h | 102 + gcc/config/m68k/m68k.c | 6615 ++++ gcc/config/m68k/m68k.h | 1034 + gcc/config/m68k/m68k.md | 7808 +++++ gcc/config/m68k/m68k.opt | 188 + gcc/config/m68k/m68kelf.h | 164 + gcc/config/m68k/m68kemb.h | 53 + gcc/config/m68k/math-68881.h | 529 + gcc/config/m68k/netbsd-elf.h | 315 + gcc/config/m68k/openbsd.h | 89 + gcc/config/m68k/predicates.md | 246 + gcc/config/m68k/print-sysroot-suffix.sh | 81 + gcc/config/m68k/rtemself.h | 33 + gcc/config/m68k/t-cf | 7 + gcc/config/m68k/t-crtstuff | 10 + gcc/config/m68k/t-floatlib | 31 + gcc/config/m68k/t-linux | 33 + gcc/config/m68k/t-m68k | 4 + gcc/config/m68k/t-m68kbare | 4 + gcc/config/m68k/t-m68kelf | 4 + gcc/config/m68k/t-mlibs | 115 + gcc/config/m68k/t-openbsd | 4 + gcc/config/m68k/t-rtems | 9 + gcc/config/m68k/t-slibgcc-elf-ver | 3 + gcc/config/m68k/t-uclinux | 36 + gcc/config/m68k/uclinux-oldabi.h | 70 + gcc/config/m68k/uclinux.h | 72 + gcc/config/m68k/uclinux.opt | 36 + gcc/config/mcore/constraints.md | 112 + gcc/config/mcore/crti.asm | 62 + gcc/config/mcore/crtn.asm | 44 + gcc/config/mcore/lib1.asm | 303 + gcc/config/mcore/mcore-elf.h | 129 + gcc/config/mcore/mcore-pe.h | 102 + gcc/config/mcore/mcore-protos.h | 69 + gcc/config/mcore/mcore.c | 3206 ++ gcc/config/mcore/mcore.h | 851 + gcc/config/mcore/mcore.md | 3085 ++ gcc/config/mcore/mcore.opt | 75 + gcc/config/mcore/predicates.md | 338 + gcc/config/mcore/t-mcore | 73 + gcc/config/mcore/t-mcore-pe | 56 + gcc/config/memcmp.c | 16 + gcc/config/memcpy.c | 12 + gcc/config/memmove.c | 20 + gcc/config/memset.c | 11 + gcc/config/mep/constraints.md | 162 + gcc/config/mep/default.h | 10 + gcc/config/mep/intrinsics.h | 620 + gcc/config/mep/intrinsics.md | 21568 +++++++++++++ gcc/config/mep/ivc2-template.h | 9 + gcc/config/mep/mep-c5.cpu | 278 + gcc/config/mep/mep-core.cpu | 3081 ++ gcc/config/mep/mep-default.cpu | 26 + gcc/config/mep/mep-ext-cop.cpu | 24 + gcc/config/mep/mep-intrin.h | 8933 ++++++ gcc/config/mep/mep-ivc2.cpu | 9776 ++++++ gcc/config/mep/mep-lib1.asm | 125 + gcc/config/mep/mep-lib2.c | 139 + gcc/config/mep/mep-pragma.c | 404 + gcc/config/mep/mep-protos.h | 133 + gcc/config/mep/mep-tramp.c | 103 + gcc/config/mep/mep.c | 7464 +++++ gcc/config/mep/mep.cpu | 21 + gcc/config/mep/mep.h | 824 + gcc/config/mep/mep.md | 2266 ++ gcc/config/mep/mep.opt | 162 + gcc/config/mep/predicates.md | 184 + gcc/config/mep/t-mep | 105 + gcc/config/microblaze/constraints.md | 72 + gcc/config/microblaze/crti.s | 39 + gcc/config/microblaze/crtn.s | 35 + gcc/config/microblaze/linux.h | 35 + gcc/config/microblaze/microblaze-c.c | 92 + gcc/config/microblaze/microblaze-protos.h | 58 + gcc/config/microblaze/microblaze.c | 3062 ++ gcc/config/microblaze/microblaze.h | 938 + gcc/config/microblaze/microblaze.md | 2231 ++ gcc/config/microblaze/microblaze.opt | 115 + gcc/config/microblaze/predicates.md | 64 + gcc/config/microblaze/t-microblaze | 33 + gcc/config/mips/10000.md | 253 + gcc/config/mips/20kc.md | 284 + gcc/config/mips/24k.md | 457 + gcc/config/mips/3000.md | 71 + gcc/config/mips/4000.md | 32 + gcc/config/mips/4100.md | 51 + gcc/config/mips/4130.md | 153 + gcc/config/mips/4300.md | 85 + gcc/config/mips/4600.md | 87 + gcc/config/mips/4k.md | 153 + gcc/config/mips/5000.md | 80 + gcc/config/mips/5400.md | 184 + gcc/config/mips/5500.md | 227 + gcc/config/mips/5k.md | 229 + gcc/config/mips/6000.md | 56 + gcc/config/mips/7000.md | 214 + gcc/config/mips/74k.md | 418 + gcc/config/mips/9000.md | 151 + gcc/config/mips/constraints.md | 233 + gcc/config/mips/crtfastmath.c | 53 + gcc/config/mips/crti.asm | 49 + gcc/config/mips/crtn.asm | 52 + gcc/config/mips/driver-native.c | 81 + gcc/config/mips/elf.h | 51 + gcc/config/mips/elfoabi.h | 40 + gcc/config/mips/elforion.h | 20 + gcc/config/mips/generic.md | 105 + gcc/config/mips/iris6.h | 341 + gcc/config/mips/iris6.opt | 45 + gcc/config/mips/irix-crti.asm | 81 + gcc/config/mips/irix-crtn.asm | 50 + gcc/config/mips/libgcc-mips16.ver | 86 + gcc/config/mips/linux-unwind.h | 121 + gcc/config/mips/linux.h | 151 + gcc/config/mips/linux64.h | 70 + gcc/config/mips/loongson.h | 690 + gcc/config/mips/loongson.md | 529 + gcc/config/mips/loongson2ef.md | 252 + gcc/config/mips/loongson3a.md | 137 + gcc/config/mips/mips-dsp.md | 1198 + gcc/config/mips/mips-dspr2.md | 623 + gcc/config/mips/mips-fixed.md | 156 + gcc/config/mips/mips-ftypes.def | 126 + gcc/config/mips/mips-modes.def | 48 + gcc/config/mips/mips-protos.h | 341 + gcc/config/mips/mips-ps-3d.md | 635 + gcc/config/mips/mips.c | 16662 ++++++++++ gcc/config/mips/mips.h | 2984 ++ gcc/config/mips/mips.md | 6486 ++++ gcc/config/mips/mips.opt | 310 + gcc/config/mips/mips16.S | 712 + gcc/config/mips/netbsd.h | 187 + gcc/config/mips/octeon.md | 88 + gcc/config/mips/openbsd.h | 101 + gcc/config/mips/predicates.md | 342 + gcc/config/mips/r3900.h | 40 + gcc/config/mips/rtems.h | 35 + gcc/config/mips/sb1.md | 579 + gcc/config/mips/sdb.h | 87 + gcc/config/mips/sde.h | 135 + gcc/config/mips/sde.opt | 28 + gcc/config/mips/sdemtk.h | 103 + gcc/config/mips/sr71k.md | 337 + gcc/config/mips/st.h | 31 + gcc/config/mips/sync.md | 560 + gcc/config/mips/t-elf | 44 + gcc/config/mips/t-iris | 9 + gcc/config/mips/t-iris6 | 38 + gcc/config/mips/t-isa3264 | 54 + gcc/config/mips/t-libgcc-mips16 | 45 + gcc/config/mips/t-linux64 | 40 + gcc/config/mips/t-mips | 41 + gcc/config/mips/t-r3900 | 35 + gcc/config/mips/t-rtems | 34 + gcc/config/mips/t-sb1 | 62 + gcc/config/mips/t-sde | 53 + gcc/config/mips/t-sdemtk | 44 + gcc/config/mips/t-slibgcc-irix | 52 + gcc/config/mips/t-sr71k | 67 + gcc/config/mips/t-st | 32 + gcc/config/mips/t-vr | 130 + gcc/config/mips/t-vxworks | 35 + gcc/config/mips/vr.h | 58 + gcc/config/mips/vr4120-div.S | 74 + gcc/config/mips/vxworks.h | 82 + gcc/config/mips/x-native | 3 + gcc/config/mips/xlr.md | 89 + gcc/config/mmix/crti.asm | 116 + gcc/config/mmix/crtn.asm | 87 + gcc/config/mmix/mmix-modes.def | 49 + gcc/config/mmix/mmix-protos.h | 97 + gcc/config/mmix/mmix.c | 2838 ++ gcc/config/mmix/mmix.h | 899 + gcc/config/mmix/mmix.md | 1240 + gcc/config/mmix/mmix.opt | 99 + gcc/config/mmix/predicates.md | 155 + gcc/config/mmix/t-mmix | 31 + gcc/config/mn10300/constraints.md | 107 + gcc/config/mn10300/linux.h | 90 + gcc/config/mn10300/mn10300-modes.def | 24 + gcc/config/mn10300/mn10300-protos.h | 57 + gcc/config/mn10300/mn10300.c | 3254 ++ gcc/config/mn10300/mn10300.h | 766 + gcc/config/mn10300/mn10300.md | 2154 ++ gcc/config/mn10300/mn10300.opt | 56 + gcc/config/mn10300/predicates.md | 69 + gcc/config/mn10300/t-linux | 29 + gcc/config/mn10300/t-mn10300 | 36 + gcc/config/moxie/constraints.md | 56 + gcc/config/moxie/crti.asm | 40 + gcc/config/moxie/crtn.asm | 34 + gcc/config/moxie/moxie-protos.h | 25 + gcc/config/moxie/moxie.c | 592 + gcc/config/moxie/moxie.h | 500 + gcc/config/moxie/moxie.md | 450 + gcc/config/moxie/predicates.md | 55 + gcc/config/moxie/rtems.h | 35 + gcc/config/moxie/sfp-machine.h | 57 + gcc/config/moxie/t-moxie | 20 + gcc/config/moxie/t-moxie-softfp | 9 + gcc/config/moxie/uclinux.h | 39 + gcc/config/netbsd-aout.h | 196 + gcc/config/netbsd-elf.h | 86 + gcc/config/netbsd-elf.opt | 33 + gcc/config/netbsd.h | 222 + gcc/config/netbsd.opt | 36 + gcc/config/newlib-stdint.h | 64 + gcc/config/openbsd-libpthread.h | 22 + gcc/config/openbsd-oldgas.h | 22 + gcc/config/openbsd-stdint.h | 31 + gcc/config/openbsd.h | 300 + gcc/config/openbsd.opt | 36 + gcc/config/pa/constraints.md | 140 + gcc/config/pa/elf.h | 92 + gcc/config/pa/fptr.c | 131 + gcc/config/pa/hpux-unwind.h | 361 + gcc/config/pa/lib2funcs.asm | 74 + gcc/config/pa/linux-atomic.c | 305 + gcc/config/pa/linux-unwind.h | 141 + gcc/config/pa/milli64.S | 2134 ++ gcc/config/pa/pa-64.h | 100 + gcc/config/pa/pa-hpux.h | 119 + gcc/config/pa/pa-hpux.opt | 37 + gcc/config/pa/pa-hpux10.h | 144 + gcc/config/pa/pa-hpux10.opt | 22 + gcc/config/pa/pa-hpux1010.h | 27 + gcc/config/pa/pa-hpux1010.opt | 23 + gcc/config/pa/pa-hpux11.h | 189 + gcc/config/pa/pa-hpux1111.h | 27 + gcc/config/pa/pa-hpux1111.opt | 23 + gcc/config/pa/pa-linux.h | 138 + gcc/config/pa/pa-modes.def | 32 + gcc/config/pa/pa-protos.h | 172 + gcc/config/pa/pa.c | 10471 ++++++ gcc/config/pa/pa.h | 1572 + gcc/config/pa/pa.md | 9543 ++++++ gcc/config/pa/pa.opt | 118 + gcc/config/pa/pa32-linux.h | 67 + gcc/config/pa/pa32-regs.h | 373 + gcc/config/pa/pa64-hpux.h | 442 + gcc/config/pa/pa64-hpux.opt | 27 + gcc/config/pa/pa64-linux.h | 64 + gcc/config/pa/pa64-regs.h | 294 + gcc/config/pa/pa64-start.h | 8 + gcc/config/pa/predicates.md | 524 + gcc/config/pa/quadlib.c | 245 + gcc/config/pa/som.h | 341 + gcc/config/pa/stublib.c | 97 + gcc/config/pa/t-dce-thr | 5 + gcc/config/pa/t-hpux-shlib | 46 + gcc/config/pa/t-linux | 39 + gcc/config/pa/t-linux64 | 34 + gcc/config/pa/t-pa | 7 + gcc/config/pa/t-pa-hpux | 7 + gcc/config/pa/t-pa-hpux10 | 2 + gcc/config/pa/t-pa-hpux11 | 31 + gcc/config/pa/t-pa64 | 67 + gcc/config/pa/t-slibgcc-dwarf-ver | 3 + gcc/config/pa/t-slibgcc-sjlj-ver | 3 + gcc/config/pdp11/constraints.md | 81 + gcc/config/pdp11/pdp11-modes.def | 26 + gcc/config/pdp11/pdp11-protos.h | 47 + gcc/config/pdp11/pdp11.c | 1923 ++ gcc/config/pdp11/pdp11.h | 685 + gcc/config/pdp11/pdp11.md | 1386 + gcc/config/pdp11/pdp11.opt | 87 + gcc/config/pdp11/predicates.md | 55 + gcc/config/pdp11/t-pdp11 | 44 + gcc/config/picochip/constraints.md | 64 + gcc/config/picochip/dfa_space.md | 43 + gcc/config/picochip/dfa_speed.md | 123 + gcc/config/picochip/libgccExtras/adddi3.asm | 194 + gcc/config/picochip/libgccExtras/ashlsi3.asm | 193 + gcc/config/picochip/libgccExtras/ashlsi3.c | 82 + gcc/config/picochip/libgccExtras/ashrsi3.asm | 202 + gcc/config/picochip/libgccExtras/ashrsi3.c | 113 + gcc/config/picochip/libgccExtras/clzsi2.asm | 189 + gcc/config/picochip/libgccExtras/cmpsi2.asm | 212 + gcc/config/picochip/libgccExtras/divmod15.asm | 261 + gcc/config/picochip/libgccExtras/divmodhi4.asm | 246 + gcc/config/picochip/libgccExtras/divmodsi4.asm | 233 + gcc/config/picochip/libgccExtras/fake_libgcc.asm | 6 + gcc/config/picochip/libgccExtras/longjmp.asm | 182 + gcc/config/picochip/libgccExtras/lshrsi3.asm | 190 + gcc/config/picochip/libgccExtras/lshrsi3.c | 76 + gcc/config/picochip/libgccExtras/parityhi2.asm | 179 + gcc/config/picochip/libgccExtras/popcounthi2.asm | 201 + gcc/config/picochip/libgccExtras/setjmp.asm | 182 + gcc/config/picochip/libgccExtras/subdi3.asm | 191 + gcc/config/picochip/libgccExtras/ucmpsi2.asm | 209 + gcc/config/picochip/libgccExtras/udivmodhi4.asm | 238 + gcc/config/picochip/libgccExtras/udivmodsi4.asm | 318 + gcc/config/picochip/picochip-protos.h | 128 + gcc/config/picochip/picochip.c | 4703 +++ gcc/config/picochip/picochip.h | 678 + gcc/config/picochip/picochip.md | 2622 ++ gcc/config/picochip/picochip.opt | 46 + gcc/config/picochip/predicates.md | 72 + gcc/config/picochip/t-picochip | 80 + gcc/config/print-sysroot-suffix.sh | 145 + gcc/config/rpath.opt | 29 + gcc/config/rs6000/40x.md | 120 + gcc/config/rs6000/440.md | 133 + gcc/config/rs6000/476.md | 142 + gcc/config/rs6000/603.md | 143 + gcc/config/rs6000/6xx.md | 275 + gcc/config/rs6000/7450.md | 185 + gcc/config/rs6000/750cl.h | 30 + gcc/config/rs6000/7xx.md | 184 + gcc/config/rs6000/8540.md | 250 + gcc/config/rs6000/a2.md | 134 + gcc/config/rs6000/aix-stdint.h | 51 + gcc/config/rs6000/aix.h | 260 + gcc/config/rs6000/aix43.h | 185 + gcc/config/rs6000/aix51.h | 189 + gcc/config/rs6000/aix52.h | 199 + gcc/config/rs6000/aix53.h | 199 + gcc/config/rs6000/aix61.h | 200 + gcc/config/rs6000/aix64.opt | 38 + gcc/config/rs6000/altivec.h | 493 + gcc/config/rs6000/altivec.md | 2749 ++ gcc/config/rs6000/biarch64.h | 26 + gcc/config/rs6000/cell.md | 400 + gcc/config/rs6000/constraints.md | 201 + gcc/config/rs6000/crtresfpr.asm | 81 + gcc/config/rs6000/crtresgpr.asm | 81 + gcc/config/rs6000/crtresxfpr.asm | 126 + gcc/config/rs6000/crtresxgpr.asm | 124 + gcc/config/rs6000/crtsavfpr.asm | 81 + gcc/config/rs6000/crtsavgpr.asm | 81 + gcc/config/rs6000/darwin-asm.h | 51 + gcc/config/rs6000/darwin-fallback.c | 487 + gcc/config/rs6000/darwin-fpsave.asm | 92 + gcc/config/rs6000/darwin-ldouble-format | 91 + gcc/config/rs6000/darwin-ldouble.c | 438 + gcc/config/rs6000/darwin-libgcc.10.4.ver | 93 + gcc/config/rs6000/darwin-libgcc.10.5.ver | 106 + gcc/config/rs6000/darwin-tramp.asm | 125 + gcc/config/rs6000/darwin-unwind.h | 30 + gcc/config/rs6000/darwin-vecsave.asm | 155 + gcc/config/rs6000/darwin-world.asm | 259 + gcc/config/rs6000/darwin.h | 438 + gcc/config/rs6000/darwin.md | 442 + gcc/config/rs6000/darwin.opt | 42 + gcc/config/rs6000/darwin64.h | 35 + gcc/config/rs6000/darwin7.h | 30 + gcc/config/rs6000/darwin8.h | 32 + gcc/config/rs6000/default64.h | 24 + gcc/config/rs6000/dfp.md | 594 + gcc/config/rs6000/driver-rs6000.c | 547 + gcc/config/rs6000/e300c2c3.md | 189 + gcc/config/rs6000/e500-double.h | 24 + gcc/config/rs6000/e500.h | 57 + gcc/config/rs6000/e500crtres32gpr.asm | 73 + gcc/config/rs6000/e500crtres64gpr.asm | 73 + gcc/config/rs6000/e500crtres64gprctr.asm | 90 + gcc/config/rs6000/e500crtrest32gpr.asm | 75 + gcc/config/rs6000/e500crtrest64gpr.asm | 74 + gcc/config/rs6000/e500crtresx32gpr.asm | 75 + gcc/config/rs6000/e500crtresx64gpr.asm | 75 + gcc/config/rs6000/e500crtsav32gpr.asm | 73 + gcc/config/rs6000/e500crtsav64gpr.asm | 72 + gcc/config/rs6000/e500crtsav64gprctr.asm | 91 + gcc/config/rs6000/e500crtsavg32gpr.asm | 73 + gcc/config/rs6000/e500crtsavg64gpr.asm | 73 + gcc/config/rs6000/e500crtsavg64gprctr.asm | 90 + gcc/config/rs6000/e500mc.md | 200 + gcc/config/rs6000/e500mc64.md | 191 + gcc/config/rs6000/eabi-ci.asm | 113 + gcc/config/rs6000/eabi-cn.asm | 104 + gcc/config/rs6000/eabi.asm | 289 + gcc/config/rs6000/eabi.h | 44 + gcc/config/rs6000/eabialtivec.h | 30 + gcc/config/rs6000/eabisim.h | 54 + gcc/config/rs6000/eabispe.h | 54 + gcc/config/rs6000/freebsd.h | 80 + gcc/config/rs6000/gnu.h | 37 + gcc/config/rs6000/host-darwin.c | 154 + gcc/config/rs6000/host-ppc64-darwin.c | 30 + gcc/config/rs6000/libgcc-ppc-glibc.ver | 73 + gcc/config/rs6000/libgcc-ppc64.ver | 7 + gcc/config/rs6000/linux-unwind.h | 355 + gcc/config/rs6000/linux.h | 134 + gcc/config/rs6000/linux64.h | 569 + gcc/config/rs6000/linux64.opt | 28 + gcc/config/rs6000/linuxaltivec.h | 30 + gcc/config/rs6000/linuxspe.h | 44 + gcc/config/rs6000/lynx.h | 125 + gcc/config/rs6000/milli.exp | 7 + gcc/config/rs6000/mpc.md | 111 + gcc/config/rs6000/netbsd.h | 93 + gcc/config/rs6000/option-defaults.h | 64 + gcc/config/rs6000/paired.h | 75 + gcc/config/rs6000/paired.md | 527 + gcc/config/rs6000/power4.md | 410 + gcc/config/rs6000/power5.md | 308 + gcc/config/rs6000/power6.md | 573 + gcc/config/rs6000/power7.md | 318 + gcc/config/rs6000/ppc-asm.h | 358 + gcc/config/rs6000/ppc64-fp.c | 239 + gcc/config/rs6000/ppu_intrinsics.h | 727 + gcc/config/rs6000/predicates.md | 1423 + gcc/config/rs6000/rios1.md | 191 + gcc/config/rs6000/rios2.md | 129 + gcc/config/rs6000/rs6000-builtin.def | 1020 + gcc/config/rs6000/rs6000-c.c | 3772 +++ gcc/config/rs6000/rs6000-modes.def | 41 + gcc/config/rs6000/rs6000-opts.h | 144 + gcc/config/rs6000/rs6000-protos.h | 198 + gcc/config/rs6000/rs6000.c | 28250 ++++++++++++++++ gcc/config/rs6000/rs6000.h | 2439 ++ gcc/config/rs6000/rs6000.md | 16361 ++++++++++ gcc/config/rs6000/rs6000.opt | 464 + gcc/config/rs6000/rs64.md | 154 + gcc/config/rs6000/rtems.h | 56 + gcc/config/rs6000/secureplt.h | 20 + gcc/config/rs6000/sfp-machine.h | 68 + gcc/config/rs6000/si2vmx.h | 2048 ++ gcc/config/rs6000/singlefp.h | 40 + gcc/config/rs6000/sol-ci.asm | 94 + gcc/config/rs6000/sol-cn.asm | 72 + gcc/config/rs6000/spe.h | 1107 + gcc/config/rs6000/spe.md | 3190 ++ gcc/config/rs6000/spu2vmx.h | 2415 ++ gcc/config/rs6000/sync.md | 622 + gcc/config/rs6000/sysv4.h | 1039 + gcc/config/rs6000/sysv4.opt | 145 + gcc/config/rs6000/sysv4le.h | 36 + gcc/config/rs6000/t-aix43 | 95 + gcc/config/rs6000/t-aix52 | 75 + gcc/config/rs6000/t-darwin | 55 + gcc/config/rs6000/t-darwin64 | 12 + gcc/config/rs6000/t-darwin8 | 3 + gcc/config/rs6000/t-fprules | 29 + gcc/config/rs6000/t-fprules-fpbit | 29 + gcc/config/rs6000/t-fprules-softfp | 6 + gcc/config/rs6000/t-freebsd | 24 + gcc/config/rs6000/t-linux | 9 + gcc/config/rs6000/t-linux64 | 45 + gcc/config/rs6000/t-lynx | 56 + gcc/config/rs6000/t-netbsd | 90 + gcc/config/rs6000/t-ppccomm | 75 + gcc/config/rs6000/t-ppcendian | 30 + gcc/config/rs6000/t-ppcgas | 33 + gcc/config/rs6000/t-ppcos | 8 + gcc/config/rs6000/t-rs6000 | 71 + gcc/config/rs6000/t-rtems | 82 + gcc/config/rs6000/t-spe | 86 + gcc/config/rs6000/t-vxworks | 34 + gcc/config/rs6000/t-vxworksae | 5 + gcc/config/rs6000/t-xilinx | 56 + gcc/config/rs6000/titan.md | 171 + gcc/config/rs6000/tramp.asm | 107 + gcc/config/rs6000/vec_types.h | 52 + gcc/config/rs6000/vector.md | 1175 + gcc/config/rs6000/vsx.md | 1152 + gcc/config/rs6000/vxworks.h | 146 + gcc/config/rs6000/vxworksae.h | 23 + gcc/config/rs6000/x-aix | 6 + gcc/config/rs6000/x-darwin | 5 + gcc/config/rs6000/x-darwin64 | 5 + gcc/config/rs6000/x-linux-relax | 2 + gcc/config/rs6000/x-rs6000 | 3 + gcc/config/rs6000/xcoff.h | 333 + gcc/config/rs6000/xfpu.h | 26 + gcc/config/rs6000/xfpu.md | 140 + gcc/config/rs6000/xilinx.h | 47 + gcc/config/rs6000/xilinx.opt | 33 + gcc/config/rtems.h | 45 + gcc/config/rtems.opt | 33 + gcc/config/rx/constraints.md | 88 + gcc/config/rx/predicates.md | 297 + gcc/config/rx/rx-modes.def | 25 + gcc/config/rx/rx-protos.h | 46 + gcc/config/rx/rx.c | 2935 ++ gcc/config/rx/rx.h | 643 + gcc/config/rx/rx.md | 2501 ++ gcc/config/rx/rx.opt | 99 + gcc/config/rx/t-rx | 34 + gcc/config/s390/2064.md | 135 + gcc/config/s390/2084.md | 310 + gcc/config/s390/2097.md | 764 + gcc/config/s390/2817.md | 315 + gcc/config/s390/constraints.md | 492 + gcc/config/s390/linux-unwind.h | 130 + gcc/config/s390/linux.h | 104 + gcc/config/s390/predicates.md | 406 + gcc/config/s390/s390-modes.def | 174 + gcc/config/s390/s390-protos.h | 114 + gcc/config/s390/s390.c | 10845 +++++++ gcc/config/s390/s390.h | 954 + gcc/config/s390/s390.md | 9410 ++++++ gcc/config/s390/s390.opt | 99 + gcc/config/s390/s390x.h | 27 + gcc/config/s390/t-linux64 | 11 + gcc/config/s390/tpf-unwind.h | 252 + gcc/config/s390/tpf.h | 130 + gcc/config/s390/tpf.md | 33 + gcc/config/s390/tpf.opt | 27 + gcc/config/score/constraints.md | 93 + gcc/config/score/crti.asm | 131 + gcc/config/score/crtn.asm | 50 + gcc/config/score/elf.h | 97 + gcc/config/score/predicates.md | 152 + gcc/config/score/score-conv.h | 78 + gcc/config/score/score-generic.md | 45 + gcc/config/score/score-modes.def | 24 + gcc/config/score/score-protos.h | 86 + gcc/config/score/score.c | 736 + gcc/config/score/score.h | 898 + gcc/config/score/score.md | 1880 ++ gcc/config/score/score.opt | 47 + gcc/config/score/score7.c | 1797 ++ gcc/config/score/score7.h | 158 + gcc/config/score/sfp-machine.h | 57 + gcc/config/score/t-score-elf | 33 + gcc/config/score/t-score-softfp | 9 + gcc/config/sh/constraints.md | 265 + gcc/config/sh/crt1.asm | 1369 + gcc/config/sh/crti.asm | 125 + gcc/config/sh/crtn.asm | 77 + gcc/config/sh/divcost-analysis | 88 + gcc/config/sh/divtab-sh4-300.c | 77 + gcc/config/sh/divtab-sh4.c | 85 + gcc/config/sh/divtab.c | 200 + gcc/config/sh/elf.h | 90 + gcc/config/sh/embed-elf.h | 36 + gcc/config/sh/lib1funcs-4-300.asm | 936 + gcc/config/sh/lib1funcs-Os-4-200.asm | 322 + gcc/config/sh/lib1funcs.asm | 3933 +++ gcc/config/sh/lib1funcs.h | 76 + gcc/config/sh/libgcc-excl.ver | 8 + gcc/config/sh/libgcc-glibc.ver | 48 + gcc/config/sh/linux-atomic.asm | 223 + gcc/config/sh/linux-unwind.h | 256 + gcc/config/sh/linux.h | 137 + gcc/config/sh/little.h | 21 + gcc/config/sh/netbsd-elf.h | 117 + gcc/config/sh/newlib.h | 25 + gcc/config/sh/predicates.md | 833 + gcc/config/sh/rtems.h | 26 + gcc/config/sh/rtemself.h | 26 + gcc/config/sh/sh-c.c | 68 + gcc/config/sh/sh-modes.def | 34 + gcc/config/sh/sh-protos.h | 186 + gcc/config/sh/sh-symbian.h | 42 + gcc/config/sh/sh.c | 12610 ++++++++ gcc/config/sh/sh.h | 2511 ++ gcc/config/sh/sh.md | 13490 ++++++++ gcc/config/sh/sh.opt | 338 + gcc/config/sh/sh1.md | 85 + gcc/config/sh/sh4-300.md | 287 + gcc/config/sh/sh4.md | 486 + gcc/config/sh/sh4a.md | 236 + gcc/config/sh/sh64.h | 26 + gcc/config/sh/shmedia.h | 30 + gcc/config/sh/shmedia.md | 94 + gcc/config/sh/sshmedia.h | 78 + gcc/config/sh/superh.h | 107 + gcc/config/sh/superh.opt | 10 + gcc/config/sh/symbian-base.c | 244 + gcc/config/sh/symbian-c.c | 181 + gcc/config/sh/symbian-cxx.c | 662 + gcc/config/sh/symbian-post.h | 88 + gcc/config/sh/symbian-pre.h | 40 + gcc/config/sh/t-elf | 10 + gcc/config/sh/t-linux | 8 + gcc/config/sh/t-linux64 | 1 + gcc/config/sh/t-netbsd | 31 + gcc/config/sh/t-netbsd-sh5-64 | 1 + gcc/config/sh/t-rtems | 7 + gcc/config/sh/t-sh | 166 + gcc/config/sh/t-sh64 | 29 + gcc/config/sh/t-superh | 33 + gcc/config/sh/t-symbian | 81 + gcc/config/sh/t-vxworks | 9 + gcc/config/sh/ushmedia.h | 1087 + gcc/config/sh/vxworks.h | 69 + gcc/config/soft-fp/README | 5 + gcc/config/soft-fp/adddf3.c | 49 + gcc/config/soft-fp/addsf3.c | 50 + gcc/config/soft-fp/addtf3.c | 49 + gcc/config/soft-fp/divdf3.c | 49 + gcc/config/soft-fp/divsf3.c | 49 + gcc/config/soft-fp/divtf3.c | 49 + gcc/config/soft-fp/double.h | 265 + gcc/config/soft-fp/eqdf2.c | 51 + gcc/config/soft-fp/eqsf2.c | 51 + gcc/config/soft-fp/eqtf2.c | 51 + gcc/config/soft-fp/extenddftf2.c | 54 + gcc/config/soft-fp/extended.h | 431 + gcc/config/soft-fp/extendsfdf2.c | 54 + gcc/config/soft-fp/extendsftf2.c | 54 + gcc/config/soft-fp/extendxftf2.c | 53 + gcc/config/soft-fp/fixdfdi.c | 46 + gcc/config/soft-fp/fixdfsi.c | 46 + gcc/config/soft-fp/fixdfti.c | 45 + gcc/config/soft-fp/fixsfdi.c | 46 + gcc/config/soft-fp/fixsfsi.c | 46 + gcc/config/soft-fp/fixsfti.c | 45 + gcc/config/soft-fp/fixtfdi.c | 46 + gcc/config/soft-fp/fixtfsi.c | 46 + gcc/config/soft-fp/fixtfti.c | 45 + gcc/config/soft-fp/fixunsdfdi.c | 46 + gcc/config/soft-fp/fixunsdfsi.c | 46 + gcc/config/soft-fp/fixunsdfti.c | 45 + gcc/config/soft-fp/fixunssfdi.c | 46 + gcc/config/soft-fp/fixunssfsi.c | 46 + gcc/config/soft-fp/fixunssfti.c | 45 + gcc/config/soft-fp/fixunstfdi.c | 46 + gcc/config/soft-fp/fixunstfsi.c | 46 + gcc/config/soft-fp/fixunstfti.c | 45 + gcc/config/soft-fp/floatdidf.c | 46 + gcc/config/soft-fp/floatdisf.c | 46 + gcc/config/soft-fp/floatditf.c | 46 + gcc/config/soft-fp/floatsidf.c | 46 + gcc/config/soft-fp/floatsisf.c | 46 + gcc/config/soft-fp/floatsitf.c | 46 + gcc/config/soft-fp/floattidf.c | 45 + gcc/config/soft-fp/floattisf.c | 45 + gcc/config/soft-fp/floattitf.c | 45 + gcc/config/soft-fp/floatundidf.c | 46 + gcc/config/soft-fp/floatundisf.c | 46 + gcc/config/soft-fp/floatunditf.c | 47 + gcc/config/soft-fp/floatunsidf.c | 46 + gcc/config/soft-fp/floatunsisf.c | 46 + gcc/config/soft-fp/floatunsitf.c | 47 + gcc/config/soft-fp/floatuntidf.c | 45 + gcc/config/soft-fp/floatuntisf.c | 45 + gcc/config/soft-fp/floatuntitf.c | 45 + gcc/config/soft-fp/gedf2.c | 51 + gcc/config/soft-fp/gesf2.c | 51 + gcc/config/soft-fp/getf2.c | 51 + gcc/config/soft-fp/ledf2.c | 51 + gcc/config/soft-fp/lesf2.c | 51 + gcc/config/soft-fp/letf2.c | 51 + gcc/config/soft-fp/muldf3.c | 49 + gcc/config/soft-fp/mulsf3.c | 49 + gcc/config/soft-fp/multf3.c | 49 + gcc/config/soft-fp/negdf2.c | 48 + gcc/config/soft-fp/negsf2.c | 48 + gcc/config/soft-fp/negtf2.c | 48 + gcc/config/soft-fp/op-1.h | 302 + gcc/config/soft-fp/op-2.h | 617 + gcc/config/soft-fp/op-4.h | 688 + gcc/config/soft-fp/op-8.h | 111 + gcc/config/soft-fp/op-common.h | 1359 + gcc/config/soft-fp/quad.h | 271 + gcc/config/soft-fp/single.h | 151 + gcc/config/soft-fp/soft-fp.h | 213 + gcc/config/soft-fp/subdf3.c | 49 + gcc/config/soft-fp/subsf3.c | 49 + gcc/config/soft-fp/subtf3.c | 49 + gcc/config/soft-fp/t-softfp | 107 + gcc/config/soft-fp/truncdfsf2.c | 54 + gcc/config/soft-fp/trunctfdf2.c | 54 + gcc/config/soft-fp/trunctfsf2.c | 54 + gcc/config/soft-fp/trunctfxf2.c | 53 + gcc/config/soft-fp/unorddf2.c | 44 + gcc/config/soft-fp/unordsf2.c | 45 + gcc/config/soft-fp/unordtf2.c | 45 + gcc/config/sol2-10.h | 23 + gcc/config/sol2-c.c | 272 + gcc/config/sol2-gld.h | 36 + gcc/config/sol2-protos.h | 24 + gcc/config/sol2.c | 156 + gcc/config/sol2.h | 310 + gcc/config/sol2.opt | 47 + gcc/config/sparc/biarch64.h | 23 + gcc/config/sparc/constraints.md | 148 + gcc/config/sparc/crtfastmath.c | 44 + gcc/config/sparc/cypress.md | 50 + gcc/config/sparc/freebsd.h | 177 + gcc/config/sparc/gmon-sol2.c | 420 + gcc/config/sparc/hypersparc.md | 82 + gcc/config/sparc/lb1spc.asm | 784 + gcc/config/sparc/lb1spl.asm | 246 + gcc/config/sparc/leon.md | 56 + gcc/config/sparc/libgcc-sparc-glibc.ver | 93 + gcc/config/sparc/linux-unwind.h | 202 + gcc/config/sparc/linux.h | 168 + gcc/config/sparc/linux64.h | 289 + gcc/config/sparc/little-endian.opt | 27 + gcc/config/sparc/long-double-switch.opt | 27 + gcc/config/sparc/netbsd-elf.h | 246 + gcc/config/sparc/niagara.md | 118 + gcc/config/sparc/niagara2.md | 90 + gcc/config/sparc/openbsd1-64.h | 23 + gcc/config/sparc/openbsd64.h | 85 + gcc/config/sparc/predicates.md | 475 + gcc/config/sparc/rtemself.h | 33 + gcc/config/sparc/sol2-64.h | 22 + gcc/config/sparc/sol2-bi.h | 271 + gcc/config/sparc/sol2-c1.asm | 103 + gcc/config/sparc/sol2-ci.asm | 55 + gcc/config/sparc/sol2-cn.asm | 41 + gcc/config/sparc/sol2-gas-bi.h | 23 + gcc/config/sparc/sol2-gas.h | 47 + gcc/config/sparc/sol2-gld-bi.h | 67 + gcc/config/sparc/sol2-unwind.h | 480 + gcc/config/sparc/sol2.h | 205 + gcc/config/sparc/sp-elf.h | 69 + gcc/config/sparc/sp64-elf.h | 93 + gcc/config/sparc/sparc-modes.def | 47 + gcc/config/sparc/sparc-protos.h | 108 + gcc/config/sparc/sparc.c | 9873 ++++++ gcc/config/sparc/sparc.h | 2122 ++ gcc/config/sparc/sparc.md | 7828 +++++ gcc/config/sparc/sparc.opt | 126 + gcc/config/sparc/sparclet.md | 43 + gcc/config/sparc/supersparc.md | 92 + gcc/config/sparc/sync.md | 199 + gcc/config/sparc/sysv4.h | 125 + gcc/config/sparc/t-crtfm | 4 + gcc/config/sparc/t-crtin | 6 + gcc/config/sparc/t-elf | 40 + gcc/config/sparc/t-leon | 42 + gcc/config/sparc/t-leon3 | 37 + gcc/config/sparc/t-linux | 7 + gcc/config/sparc/t-linux64 | 37 + gcc/config/sparc/t-netbsd64 | 8 + gcc/config/sparc/t-sol2 | 39 + gcc/config/sparc/t-sol2-64 | 7 + gcc/config/sparc/t-vxworks | 5 + gcc/config/sparc/ultra1_2.md | 301 + gcc/config/sparc/ultra3.md | 189 + gcc/config/sparc/vxworks.h | 60 + gcc/config/spu/cache.S | 43 + gcc/config/spu/cachemgr.c | 438 + gcc/config/spu/constraints.md | 179 + gcc/config/spu/divmodti4.c | 166 + gcc/config/spu/divv2df3.c | 195 + gcc/config/spu/float_disf.c | 31 + gcc/config/spu/float_unsdidf.c | 54 + gcc/config/spu/float_unsdisf.c | 31 + gcc/config/spu/float_unssidf.c | 45 + gcc/config/spu/mfc_multi_tag_release.c | 72 + gcc/config/spu/mfc_multi_tag_reserve.c | 84 + gcc/config/spu/mfc_tag_release.c | 59 + gcc/config/spu/mfc_tag_reserve.c | 51 + gcc/config/spu/mfc_tag_table.c | 39 + gcc/config/spu/multi3.c | 97 + gcc/config/spu/predicates.md | 122 + gcc/config/spu/spu-builtins.def | 781 + gcc/config/spu/spu-builtins.md | 929 + gcc/config/spu/spu-c.c | 234 + gcc/config/spu/spu-elf.h | 80 + gcc/config/spu/spu-modes.def | 29 + gcc/config/spu/spu-protos.h | 96 + gcc/config/spu/spu.c | 7180 +++++ gcc/config/spu/spu.h | 564 + gcc/config/spu/spu.md | 5431 ++++ gcc/config/spu/spu.opt | 105 + gcc/config/spu/spu_cache.h | 39 + gcc/config/spu/spu_internals.h | 421 + gcc/config/spu/spu_intrinsics.h | 83 + gcc/config/spu/spu_mfcio.h | 342 + gcc/config/spu/t-spu-elf | 123 + gcc/config/spu/vec_types.h | 36 + gcc/config/spu/vmx2spu.h | 3985 +++ gcc/config/stormy16/constraints.md | 119 + gcc/config/stormy16/predicates.md | 178 + gcc/config/stormy16/stormy-abi | 174 + gcc/config/stormy16/stormy16-lib2-ashlsi3.c | 2 + gcc/config/stormy16/stormy16-lib2-ashrsi3.c | 2 + gcc/config/stormy16/stormy16-lib2-clzhi2.c | 2 + gcc/config/stormy16/stormy16-lib2-cmpsi2.c | 2 + gcc/config/stormy16/stormy16-lib2-ctzhi2.c | 2 + gcc/config/stormy16/stormy16-lib2-divsi3.c | 2 + gcc/config/stormy16/stormy16-lib2-ffshi2.c | 2 + gcc/config/stormy16/stormy16-lib2-lshrsi3.c | 2 + gcc/config/stormy16/stormy16-lib2-modsi3.c | 2 + gcc/config/stormy16/stormy16-lib2-parityhi2.c | 2 + gcc/config/stormy16/stormy16-lib2-popcounthi2.c | 2 + gcc/config/stormy16/stormy16-lib2-ucmpsi2.c | 2 + gcc/config/stormy16/stormy16-lib2-udivmodsi4.c | 2 + gcc/config/stormy16/stormy16-lib2-udivsi3.c | 2 + gcc/config/stormy16/stormy16-lib2-umodsi3.c | 2 + gcc/config/stormy16/stormy16-lib2.c | 357 + gcc/config/stormy16/stormy16-protos.h | 70 + gcc/config/stormy16/stormy16.c | 2677 ++ gcc/config/stormy16/stormy16.h | 520 + gcc/config/stormy16/stormy16.md | 1251 + gcc/config/stormy16/stormy16.opt | 24 + gcc/config/stormy16/t-stormy16 | 50 + gcc/config/svr3.h | 146 + gcc/config/sync.c | 180 + gcc/config/t-darwin | 63 + gcc/config/t-dfprules | 10 + gcc/config/t-freebsd | 10 + gcc/config/t-freebsd-thread | 2 + gcc/config/t-gnu | 2 + gcc/config/t-libc-ok | 1 + gcc/config/t-libgcc-pic | 2 + gcc/config/t-libunwind | 30 + gcc/config/t-libunwind-elf | 49 + gcc/config/t-linux | 32 + gcc/config/t-lynx | 33 + gcc/config/t-netbsd | 2 + gcc/config/t-openbsd | 2 + gcc/config/t-openbsd-thread | 3 + gcc/config/t-pnt16-warn | 27 + gcc/config/t-rtems | 7 + gcc/config/t-slibgcc-darwin | 2 + gcc/config/t-slibgcc-elf-ver | 56 + gcc/config/t-slibgcc-libgcc | 32 + gcc/config/t-slibgcc-nolc-override | 1 + gcc/config/t-slibgcc-sld | 50 + gcc/config/t-sol2 | 36 + gcc/config/t-svr4 | 8 + gcc/config/t-sysroot-suffix | 7 + gcc/config/t-vxworks | 53 + gcc/config/tm-dwarf2.h | 4 + gcc/config/udivmod.c | 37 + gcc/config/udivmodsi4.c | 47 + gcc/config/usegas.h | 20 + gcc/config/v850/constraints.md | 108 + gcc/config/v850/lib1funcs.asm | 2330 ++ gcc/config/v850/predicates.md | 501 + gcc/config/v850/t-v850 | 114 + gcc/config/v850/t-v850e | 112 + gcc/config/v850/v850-c.c | 273 + gcc/config/v850/v850-modes.def | 29 + gcc/config/v850/v850-protos.h | 73 + gcc/config/v850/v850.c | 3226 ++ gcc/config/v850/v850.h | 987 + gcc/config/v850/v850.md | 2667 ++ gcc/config/v850/v850.opt | 106 + gcc/config/vax/builtins.md | 192 + gcc/config/vax/constraints.md | 117 + gcc/config/vax/elf.h | 111 + gcc/config/vax/elf.opt | 30 + gcc/config/vax/lib1funcs.asm | 92 + gcc/config/vax/linux.h | 54 + gcc/config/vax/netbsd-elf.h | 68 + gcc/config/vax/netbsd.h | 47 + gcc/config/vax/openbsd.h | 48 + gcc/config/vax/openbsd1.h | 22 + gcc/config/vax/predicates.md | 111 + gcc/config/vax/t-linux | 2 + gcc/config/vax/vax-modes.def | 22 + gcc/config/vax/vax-protos.h | 41 + gcc/config/vax/vax.c | 2140 ++ gcc/config/vax/vax.h | 751 + gcc/config/vax/vax.md | 1636 + gcc/config/vax/vax.opt | 51 + gcc/config/vms/t-vms | 37 + gcc/config/vms/vms-crtl-64.h | 195 + gcc/config/vms/vms-crtl.h | 191 + gcc/config/vms/vms-ucrt0.c | 127 + gcc/config/vms/vms.opt | 31 + gcc/config/vms/x-vms | 27 + gcc/config/vms/xm-vms.h | 58 + gcc/config/vms/xm-vms64.h | 23 + gcc/config/vx-common.h | 94 + gcc/config/vxlib-tls.c | 362 + gcc/config/vxlib.c | 95 + gcc/config/vxworks-dummy.h | 40 + gcc/config/vxworks.c | 147 + gcc/config/vxworks.h | 138 + gcc/config/vxworks.opt | 46 + gcc/config/vxworksae.h | 70 + gcc/config/x-cflags-O1 | 5 + gcc/config/x-darwin | 3 + gcc/config/x-hpux | 4 + gcc/config/x-linux | 4 + gcc/config/x-solaris | 4 + gcc/config/xtensa/constraints.md | 139 + gcc/config/xtensa/crti.asm | 51 + gcc/config/xtensa/crtn.asm | 46 + gcc/config/xtensa/elf.h | 104 + gcc/config/xtensa/elf.opt | 30 + gcc/config/xtensa/ieee754-df.S | 2388 ++ gcc/config/xtensa/ieee754-sf.S | 1757 + gcc/config/xtensa/lib1funcs.asm | 845 + gcc/config/xtensa/lib2funcs.S | 186 + gcc/config/xtensa/libgcc-xtensa.ver | 3 + gcc/config/xtensa/linux-unwind.h | 97 + gcc/config/xtensa/linux.h | 71 + gcc/config/xtensa/predicates.md | 175 + gcc/config/xtensa/t-elf | 6 + gcc/config/xtensa/t-linux | 3 + gcc/config/xtensa/t-xtensa | 42 + gcc/config/xtensa/unwind-dw2-xtensa.c | 546 + gcc/config/xtensa/unwind-dw2-xtensa.h | 50 + gcc/config/xtensa/xtensa-protos.h | 74 + gcc/config/xtensa/xtensa.c | 3715 +++ gcc/config/xtensa/xtensa.h | 847 + gcc/config/xtensa/xtensa.md | 1914 ++ gcc/config/xtensa/xtensa.opt | 43 + 1540 files changed, 859149 insertions(+) create mode 100644 gcc/config/README create mode 100644 gcc/config/alpha/alpha-modes.def create mode 100644 gcc/config/alpha/alpha-protos.h create mode 100644 gcc/config/alpha/alpha.c create mode 100644 gcc/config/alpha/alpha.h create mode 100644 gcc/config/alpha/alpha.md create mode 100644 gcc/config/alpha/alpha.opt create mode 100644 gcc/config/alpha/constraints.md create mode 100644 gcc/config/alpha/crtfastmath.c create mode 100644 gcc/config/alpha/driver-alpha.c create mode 100644 gcc/config/alpha/elf.h create mode 100644 gcc/config/alpha/elf.opt create mode 100644 gcc/config/alpha/ev4.md create mode 100644 gcc/config/alpha/ev5.md create mode 100644 gcc/config/alpha/ev6.md create mode 100644 gcc/config/alpha/freebsd.h create mode 100644 gcc/config/alpha/gnu.h create mode 100644 gcc/config/alpha/host-osf.c create mode 100644 gcc/config/alpha/libgcc-alpha-ldbl.ver create mode 100644 gcc/config/alpha/linux-elf.h create mode 100644 gcc/config/alpha/linux-unwind.h create mode 100644 gcc/config/alpha/linux.h create mode 100644 gcc/config/alpha/netbsd.h create mode 100644 gcc/config/alpha/openbsd.h create mode 100644 gcc/config/alpha/osf5-unwind.h create mode 100644 gcc/config/alpha/osf5.h create mode 100644 gcc/config/alpha/osf5.opt create mode 100644 gcc/config/alpha/predicates.md create mode 100644 gcc/config/alpha/qrnnd.asm create mode 100644 gcc/config/alpha/sync.md create mode 100644 gcc/config/alpha/t-alpha create mode 100644 gcc/config/alpha/t-crtfm create mode 100644 gcc/config/alpha/t-ieee create mode 100644 gcc/config/alpha/t-linux create mode 100644 gcc/config/alpha/t-osf-pthread create mode 100644 gcc/config/alpha/t-osf5 create mode 100644 gcc/config/alpha/t-vms create mode 100644 gcc/config/alpha/va_list.h create mode 100644 gcc/config/alpha/vms-dwarf2.asm create mode 100644 gcc/config/alpha/vms-dwarf2eh.asm create mode 100644 gcc/config/alpha/vms-gcc_shell_handler.c create mode 100644 gcc/config/alpha/vms-unwind.h create mode 100644 gcc/config/alpha/vms.h create mode 100644 gcc/config/alpha/vms64.h create mode 100644 gcc/config/alpha/x-alpha create mode 100644 gcc/config/alpha/x-osf create mode 100644 gcc/config/arc/arc-modes.def create mode 100644 gcc/config/arc/arc-protos.h create mode 100644 gcc/config/arc/arc.c create mode 100644 gcc/config/arc/arc.h create mode 100644 gcc/config/arc/arc.md create mode 100644 gcc/config/arc/arc.opt create mode 100644 gcc/config/arc/initfini.c create mode 100644 gcc/config/arc/lib1funcs.asm create mode 100644 gcc/config/arc/t-arc create mode 100644 gcc/config/arm/README-interworking create mode 100644 gcc/config/arm/aout.h create mode 100644 gcc/config/arm/arm-c.c create mode 100644 gcc/config/arm/arm-cores.def create mode 100644 gcc/config/arm/arm-generic.md create mode 100644 gcc/config/arm/arm-ldmstm.ml create mode 100644 gcc/config/arm/arm-modes.def create mode 100644 gcc/config/arm/arm-protos.h create mode 100644 gcc/config/arm/arm-tune.md create mode 100644 gcc/config/arm/arm.c create mode 100644 gcc/config/arm/arm.h create mode 100644 gcc/config/arm/arm.md create mode 100644 gcc/config/arm/arm.opt create mode 100644 gcc/config/arm/arm1020e.md create mode 100644 gcc/config/arm/arm1026ejs.md create mode 100644 gcc/config/arm/arm1136jfs.md create mode 100644 gcc/config/arm/arm926ejs.md create mode 100644 gcc/config/arm/arm_neon.h create mode 100644 gcc/config/arm/bpabi-v6m.S create mode 100644 gcc/config/arm/bpabi.S create mode 100644 gcc/config/arm/bpabi.c create mode 100644 gcc/config/arm/bpabi.h create mode 100644 gcc/config/arm/cirrus.md create mode 100644 gcc/config/arm/coff.h create mode 100644 gcc/config/arm/constraints.md create mode 100644 gcc/config/arm/cortex-a5.md create mode 100644 gcc/config/arm/cortex-a8-neon.md create mode 100644 gcc/config/arm/cortex-a8.md create mode 100644 gcc/config/arm/cortex-a9-neon.md create mode 100644 gcc/config/arm/cortex-a9.md create mode 100644 gcc/config/arm/cortex-m4-fpu.md create mode 100644 gcc/config/arm/cortex-m4.md create mode 100644 gcc/config/arm/cortex-r4.md create mode 100644 gcc/config/arm/cortex-r4f.md create mode 100644 gcc/config/arm/crti.asm create mode 100644 gcc/config/arm/crtn.asm create mode 100644 gcc/config/arm/ecos-elf.h create mode 100644 gcc/config/arm/elf.h create mode 100644 gcc/config/arm/fa526.md create mode 100644 gcc/config/arm/fa606te.md create mode 100644 gcc/config/arm/fa626te.md create mode 100644 gcc/config/arm/fa726te.md create mode 100644 gcc/config/arm/fmp626.md create mode 100644 gcc/config/arm/fp16.c create mode 100644 gcc/config/arm/fpa.md create mode 100644 gcc/config/arm/freebsd.h create mode 100755 gcc/config/arm/gentune.sh create mode 100644 gcc/config/arm/ieee754-df.S create mode 100644 gcc/config/arm/ieee754-sf.S create mode 100644 gcc/config/arm/iterators.md create mode 100644 gcc/config/arm/iwmmxt.md create mode 100644 gcc/config/arm/ldmstm.md create mode 100644 gcc/config/arm/lib1funcs.asm create mode 100644 gcc/config/arm/libgcc-bpabi.ver create mode 100644 gcc/config/arm/libunwind.S create mode 100644 gcc/config/arm/linux-atomic.c create mode 100644 gcc/config/arm/linux-eabi.h create mode 100644 gcc/config/arm/linux-elf.h create mode 100644 gcc/config/arm/linux-gas.h create mode 100644 gcc/config/arm/mmintrin.h create mode 100644 gcc/config/arm/neon-docgen.ml create mode 100644 gcc/config/arm/neon-gen.ml create mode 100644 gcc/config/arm/neon-schedgen.ml create mode 100644 gcc/config/arm/neon-testgen.ml create mode 100644 gcc/config/arm/neon.md create mode 100644 gcc/config/arm/neon.ml create mode 100644 gcc/config/arm/netbsd-elf.h create mode 100644 gcc/config/arm/netbsd.h create mode 100644 gcc/config/arm/pe.c create mode 100644 gcc/config/arm/pe.h create mode 100644 gcc/config/arm/pe.opt create mode 100644 gcc/config/arm/pr-support.c create mode 100644 gcc/config/arm/predicates.md create mode 100644 gcc/config/arm/rtems-eabi.h create mode 100644 gcc/config/arm/rtems-elf.h create mode 100644 gcc/config/arm/semi.h create mode 100644 gcc/config/arm/sfp-machine.h create mode 100644 gcc/config/arm/symbian.h create mode 100644 gcc/config/arm/sync.md create mode 100644 gcc/config/arm/t-arm create mode 100644 gcc/config/arm/t-arm-elf create mode 100644 gcc/config/arm/t-arm-softfp create mode 100644 gcc/config/arm/t-bpabi create mode 100644 gcc/config/arm/t-linux create mode 100644 gcc/config/arm/t-linux-androideabi create mode 100644 gcc/config/arm/t-linux-eabi create mode 100644 gcc/config/arm/t-netbsd create mode 100644 gcc/config/arm/t-pe create mode 100644 gcc/config/arm/t-rtems create mode 100644 gcc/config/arm/t-rtems-eabi create mode 100644 gcc/config/arm/t-strongarm-elf create mode 100644 gcc/config/arm/t-symbian create mode 100644 gcc/config/arm/t-vxworks create mode 100644 gcc/config/arm/t-wince-pe create mode 100644 gcc/config/arm/thumb2.md create mode 100644 gcc/config/arm/uclinux-eabi.h create mode 100644 gcc/config/arm/uclinux-elf.h create mode 100644 gcc/config/arm/unaligned-funcs.c create mode 100644 gcc/config/arm/unknown-elf.h create mode 100644 gcc/config/arm/unwind-arm.c create mode 100644 gcc/config/arm/unwind-arm.h create mode 100644 gcc/config/arm/vec-common.md create mode 100644 gcc/config/arm/vfp.md create mode 100644 gcc/config/arm/vfp11.md create mode 100644 gcc/config/arm/vxworks.h create mode 100644 gcc/config/arm/vxworks.opt create mode 100644 gcc/config/arm/wince-pe.h create mode 100644 gcc/config/avr/avr-c.c create mode 100755 gcc/config/avr/avr-devices.c create mode 100644 gcc/config/avr/avr-protos.h create mode 100644 gcc/config/avr/avr-stdint.h create mode 100644 gcc/config/avr/avr.c create mode 100644 gcc/config/avr/avr.h create mode 100644 gcc/config/avr/avr.md create mode 100644 gcc/config/avr/avr.opt create mode 100644 gcc/config/avr/constraints.md create mode 100755 gcc/config/avr/driver-avr.c create mode 100644 gcc/config/avr/libgcc.S create mode 100755 gcc/config/avr/predicates.md create mode 100644 gcc/config/avr/rtems.h create mode 100644 gcc/config/avr/t-avr create mode 100644 gcc/config/avr/t-rtems create mode 100644 gcc/config/bfin/bfin-modes.def create mode 100644 gcc/config/bfin/bfin-protos.h create mode 100644 gcc/config/bfin/bfin.c create mode 100644 gcc/config/bfin/bfin.h create mode 100644 gcc/config/bfin/bfin.md create mode 100644 gcc/config/bfin/bfin.opt create mode 100644 gcc/config/bfin/constraints.md create mode 100644 gcc/config/bfin/crti.s create mode 100644 gcc/config/bfin/crtlibid.s create mode 100644 gcc/config/bfin/crtn.s create mode 100644 gcc/config/bfin/elf.h create mode 100644 gcc/config/bfin/lib1funcs.asm create mode 100644 gcc/config/bfin/libgcc-bfin.ver create mode 100644 gcc/config/bfin/linux-unwind.h create mode 100644 gcc/config/bfin/linux.h create mode 100644 gcc/config/bfin/predicates.md create mode 100644 gcc/config/bfin/print-sysroot-suffix.sh create mode 100644 gcc/config/bfin/rtems.h create mode 100644 gcc/config/bfin/sync.md create mode 100644 gcc/config/bfin/t-bfin create mode 100644 gcc/config/bfin/t-bfin-elf create mode 100644 gcc/config/bfin/t-bfin-linux create mode 100644 gcc/config/bfin/t-bfin-uclinux create mode 100644 gcc/config/bfin/t-rtems create mode 100644 gcc/config/bfin/uclinux.h create mode 100644 gcc/config/cris/arit.c create mode 100644 gcc/config/cris/cris-protos.h create mode 100644 gcc/config/cris/cris.c create mode 100644 gcc/config/cris/cris.h create mode 100644 gcc/config/cris/cris.md create mode 100644 gcc/config/cris/cris.opt create mode 100644 gcc/config/cris/cris_abi_symbol.c create mode 100644 gcc/config/cris/elf.opt create mode 100644 gcc/config/cris/libgcc.ver create mode 100644 gcc/config/cris/linux.h create mode 100644 gcc/config/cris/linux.opt create mode 100644 gcc/config/cris/mulsi3.asm create mode 100644 gcc/config/cris/predicates.md create mode 100644 gcc/config/cris/t-cris create mode 100644 gcc/config/cris/t-elfmulti create mode 100644 gcc/config/cris/t-linux create mode 100644 gcc/config/crx/crx-protos.h create mode 100644 gcc/config/crx/crx.c create mode 100644 gcc/config/crx/crx.h create mode 100644 gcc/config/crx/crx.md create mode 100644 gcc/config/crx/crx.opt create mode 100644 gcc/config/crx/t-crx create mode 100644 gcc/config/darwin-64.c create mode 100644 gcc/config/darwin-c.c create mode 100644 gcc/config/darwin-crt2.c create mode 100644 gcc/config/darwin-crt3.c create mode 100644 gcc/config/darwin-driver.c create mode 100644 gcc/config/darwin-f.c create mode 100644 gcc/config/darwin-ppc-ldouble-patch.def create mode 100644 gcc/config/darwin-protos.h create mode 100644 gcc/config/darwin-sections.def create mode 100644 gcc/config/darwin.c create mode 100644 gcc/config/darwin.h create mode 100644 gcc/config/darwin.opt create mode 100644 gcc/config/darwin10.h create mode 100644 gcc/config/darwin9.h create mode 100644 gcc/config/dbx.h create mode 100644 gcc/config/dbxcoff.h create mode 100644 gcc/config/dbxelf.h create mode 100644 gcc/config/dfp-bit.c create mode 100644 gcc/config/dfp-bit.h create mode 100644 gcc/config/divmod.c create mode 100644 gcc/config/elfos.h create mode 100644 gcc/config/fixed-bit.c create mode 100644 gcc/config/fixed-bit.h create mode 100644 gcc/config/flat.h create mode 100644 gcc/config/floatunsidf.c create mode 100644 gcc/config/floatunsisf.c create mode 100644 gcc/config/floatunsitf.c create mode 100644 gcc/config/floatunsixf.c create mode 100644 gcc/config/fp-bit.c create mode 100644 gcc/config/fp-bit.h create mode 100644 gcc/config/fr30/constraints.md create mode 100644 gcc/config/fr30/crti.asm create mode 100644 gcc/config/fr30/crtn.asm create mode 100644 gcc/config/fr30/fr30-protos.h create mode 100644 gcc/config/fr30/fr30.c create mode 100644 gcc/config/fr30/fr30.h create mode 100644 gcc/config/fr30/fr30.md create mode 100644 gcc/config/fr30/fr30.opt create mode 100644 gcc/config/fr30/lib1funcs.asm create mode 100644 gcc/config/fr30/predicates.md create mode 100644 gcc/config/fr30/t-fr30 create mode 100644 gcc/config/freebsd-nthr.h create mode 100644 gcc/config/freebsd-spec.h create mode 100644 gcc/config/freebsd-stdint.h create mode 100644 gcc/config/freebsd.h create mode 100644 gcc/config/freebsd.opt create mode 100644 gcc/config/frv/cmovd.c create mode 100644 gcc/config/frv/cmovh.c create mode 100644 gcc/config/frv/cmovw.c create mode 100644 gcc/config/frv/constraints.md create mode 100644 gcc/config/frv/frv-asm.h create mode 100644 gcc/config/frv/frv-modes.def create mode 100644 gcc/config/frv/frv-protos.h create mode 100644 gcc/config/frv/frv.c create mode 100644 gcc/config/frv/frv.h create mode 100644 gcc/config/frv/frv.md create mode 100644 gcc/config/frv/frv.opt create mode 100644 gcc/config/frv/frvbegin.c create mode 100644 gcc/config/frv/frvend.c create mode 100644 gcc/config/frv/lib1funcs.asm create mode 100644 gcc/config/frv/libgcc-frv.ver create mode 100644 gcc/config/frv/linux.h create mode 100644 gcc/config/frv/modi.c create mode 100644 gcc/config/frv/predicates.md create mode 100644 gcc/config/frv/t-frv create mode 100644 gcc/config/frv/t-linux create mode 100644 gcc/config/frv/uitod.c create mode 100644 gcc/config/frv/uitof.c create mode 100644 gcc/config/frv/ulltod.c create mode 100644 gcc/config/frv/ulltof.c create mode 100644 gcc/config/frv/umodi.c create mode 100644 gcc/config/fused-madd.opt create mode 100644 gcc/config/g.opt create mode 100644 gcc/config/glibc-stdint.h create mode 100644 gcc/config/gnu-user.h create mode 100644 gcc/config/gnu-user.opt create mode 100644 gcc/config/gnu.h create mode 100644 gcc/config/h8300/clzhi2.c create mode 100644 gcc/config/h8300/crti.asm create mode 100644 gcc/config/h8300/crtn.asm create mode 100644 gcc/config/h8300/ctzhi2.c create mode 100644 gcc/config/h8300/elf.h create mode 100644 gcc/config/h8300/fixunssfsi.c create mode 100644 gcc/config/h8300/genmova.sh create mode 100644 gcc/config/h8300/h8300-protos.h create mode 100644 gcc/config/h8300/h8300.c create mode 100644 gcc/config/h8300/h8300.h create mode 100644 gcc/config/h8300/h8300.md create mode 100644 gcc/config/h8300/h8300.opt create mode 100644 gcc/config/h8300/lib1funcs.asm create mode 100644 gcc/config/h8300/mova.md create mode 100644 gcc/config/h8300/parityhi2.c create mode 100644 gcc/config/h8300/popcounthi2.c create mode 100644 gcc/config/h8300/predicates.md create mode 100644 gcc/config/h8300/rtems.h create mode 100644 gcc/config/h8300/t-elf create mode 100644 gcc/config/h8300/t-h8300 create mode 100644 gcc/config/h8300/t-rtems create mode 100644 gcc/config/host-darwin.c create mode 100644 gcc/config/host-darwin.h create mode 100644 gcc/config/host-hpux.c create mode 100644 gcc/config/host-linux.c create mode 100644 gcc/config/host-solaris.c create mode 100644 gcc/config/hpux-stdint.h create mode 100644 gcc/config/hpux11.opt create mode 100644 gcc/config/i386/abmintrin.h create mode 100644 gcc/config/i386/ammintrin.h create mode 100644 gcc/config/i386/athlon.md create mode 100644 gcc/config/i386/atom.md create mode 100644 gcc/config/i386/att.h create mode 100644 gcc/config/i386/avxintrin.h create mode 100644 gcc/config/i386/avxmath.h create mode 100644 gcc/config/i386/bdver1.md create mode 100644 gcc/config/i386/biarch64.h create mode 100644 gcc/config/i386/bmiintrin.h create mode 100644 gcc/config/i386/bmmintrin.h create mode 100644 gcc/config/i386/bsd.h create mode 100644 gcc/config/i386/constraints.md create mode 100644 gcc/config/i386/core2.md create mode 100644 gcc/config/i386/cpuid.h create mode 100644 gcc/config/i386/cross-stdarg.h create mode 100644 gcc/config/i386/crtdll.h create mode 100644 gcc/config/i386/crtfastmath.c create mode 100644 gcc/config/i386/crtprec.c create mode 100644 gcc/config/i386/cygming-crtbegin.c create mode 100644 gcc/config/i386/cygming-crtend.c create mode 100644 gcc/config/i386/cygming.h create mode 100644 gcc/config/i386/cygming.opt create mode 100644 gcc/config/i386/cygwin-stdint.h create mode 100644 gcc/config/i386/cygwin.asm create mode 100644 gcc/config/i386/cygwin.h create mode 100644 gcc/config/i386/darwin-libgcc.10.4.ver create mode 100644 gcc/config/i386/darwin-libgcc.10.5.ver create mode 100644 gcc/config/i386/darwin.h create mode 100644 gcc/config/i386/darwin64.h create mode 100644 gcc/config/i386/djgpp-stdint.h create mode 100644 gcc/config/i386/djgpp.h create mode 100644 gcc/config/i386/djgpp.opt create mode 100644 gcc/config/i386/driver-i386.c create mode 100644 gcc/config/i386/emmintrin.h create mode 100644 gcc/config/i386/fma4intrin.h create mode 100644 gcc/config/i386/freebsd.h create mode 100644 gcc/config/i386/freebsd64.h create mode 100644 gcc/config/i386/gas.h create mode 100644 gcc/config/i386/geode.md create mode 100644 gcc/config/i386/gmm_malloc.h create mode 100644 gcc/config/i386/gmon-sol2.c create mode 100644 gcc/config/i386/gnu.h create mode 100644 gcc/config/i386/gstabs.h create mode 100644 gcc/config/i386/gthr-win32.c create mode 100644 gcc/config/i386/host-cygwin.c create mode 100644 gcc/config/i386/host-i386-darwin.c create mode 100644 gcc/config/i386/host-mingw32.c create mode 100644 gcc/config/i386/i386-builtin-types.awk create mode 100644 gcc/config/i386/i386-builtin-types.def create mode 100644 gcc/config/i386/i386-c.c create mode 100644 gcc/config/i386/i386-interix.h create mode 100644 gcc/config/i386/i386-interix3.h create mode 100644 gcc/config/i386/i386-modes.def create mode 100644 gcc/config/i386/i386-protos.h create mode 100644 gcc/config/i386/i386.c create mode 100644 gcc/config/i386/i386.h create mode 100644 gcc/config/i386/i386.md create mode 100644 gcc/config/i386/i386.opt create mode 100644 gcc/config/i386/i386elf.h create mode 100644 gcc/config/i386/ia32intrin.h create mode 100644 gcc/config/i386/immintrin.h create mode 100644 gcc/config/i386/k6.md create mode 100644 gcc/config/i386/kfreebsd-gnu.h create mode 100644 gcc/config/i386/knetbsd-gnu.h create mode 100644 gcc/config/i386/kopensolaris-gnu.h create mode 100644 gcc/config/i386/libgcc-glibc.ver create mode 100644 gcc/config/i386/linux-unwind.h create mode 100644 gcc/config/i386/linux.h create mode 100644 gcc/config/i386/linux64.h create mode 100644 gcc/config/i386/lwpintrin.h create mode 100644 gcc/config/i386/lynx.h create mode 100644 gcc/config/i386/mingw-stdint.h create mode 100644 gcc/config/i386/mingw-w64.h create mode 100644 gcc/config/i386/mingw-w64.opt create mode 100644 gcc/config/i386/mingw.opt create mode 100644 gcc/config/i386/mingw32.h create mode 100644 gcc/config/i386/mm3dnow.h create mode 100644 gcc/config/i386/mmintrin.h create mode 100644 gcc/config/i386/mmx.md create mode 100644 gcc/config/i386/msformat-c.c create mode 100644 gcc/config/i386/netbsd-elf.h create mode 100644 gcc/config/i386/netbsd.h create mode 100644 gcc/config/i386/netbsd64.h create mode 100644 gcc/config/i386/netware-crt0.c create mode 100644 gcc/config/i386/netware-libgcc.c create mode 100644 gcc/config/i386/netware-libgcc.def create mode 100644 gcc/config/i386/netware-libgcc.exp create mode 100644 gcc/config/i386/netware.c create mode 100644 gcc/config/i386/netware.h create mode 100644 gcc/config/i386/netware.opt create mode 100644 gcc/config/i386/nmmintrin.h create mode 100644 gcc/config/i386/nto.h create mode 100644 gcc/config/i386/nto.opt create mode 100644 gcc/config/i386/nwld.c create mode 100644 gcc/config/i386/nwld.h create mode 100644 gcc/config/i386/openbsd.h create mode 100644 gcc/config/i386/openbsdelf.h create mode 100644 gcc/config/i386/pentium.md create mode 100644 gcc/config/i386/pmm_malloc.h create mode 100644 gcc/config/i386/pmmintrin.h create mode 100644 gcc/config/i386/popcntintrin.h create mode 100644 gcc/config/i386/ppro.md create mode 100644 gcc/config/i386/predicates.md create mode 100644 gcc/config/i386/rtemself.h create mode 100644 gcc/config/i386/sfp-machine.h create mode 100644 gcc/config/i386/smmintrin.h create mode 100644 gcc/config/i386/sol2-10.h create mode 100644 gcc/config/i386/sol2-c1.asm create mode 100644 gcc/config/i386/sol2-ci.asm create mode 100644 gcc/config/i386/sol2-cn.asm create mode 100644 gcc/config/i386/sol2-gas.h create mode 100644 gcc/config/i386/sol2-gc1.asm create mode 100644 gcc/config/i386/sol2-unwind.h create mode 100644 gcc/config/i386/sol2.h create mode 100644 gcc/config/i386/sse.md create mode 100644 gcc/config/i386/ssemath.h create mode 100644 gcc/config/i386/sync.md create mode 100644 gcc/config/i386/sysv4.h create mode 100644 gcc/config/i386/t-crtfm create mode 100644 gcc/config/i386/t-crtpc create mode 100644 gcc/config/i386/t-crtpic create mode 100644 gcc/config/i386/t-crtstuff create mode 100644 gcc/config/i386/t-cygming create mode 100644 gcc/config/i386/t-cygwin create mode 100644 gcc/config/i386/t-darwin create mode 100644 gcc/config/i386/t-darwin64 create mode 100644 gcc/config/i386/t-djgpp create mode 100644 gcc/config/i386/t-dlldir create mode 100644 gcc/config/i386/t-dlldir-x create mode 100644 gcc/config/i386/t-dw2-eh create mode 100644 gcc/config/i386/t-fprules-softfp create mode 100644 gcc/config/i386/t-gmm_malloc create mode 100644 gcc/config/i386/t-gnu create mode 100644 gcc/config/i386/t-gthr-win32 create mode 100644 gcc/config/i386/t-i386 create mode 100644 gcc/config/i386/t-i386elf create mode 100644 gcc/config/i386/t-interix create mode 100644 gcc/config/i386/t-kfreebsd create mode 100644 gcc/config/i386/t-linux create mode 100644 gcc/config/i386/t-linux64 create mode 100644 gcc/config/i386/t-mingw-w32 create mode 100644 gcc/config/i386/t-mingw-w64 create mode 100644 gcc/config/i386/t-mingw32 create mode 100644 gcc/config/i386/t-netware create mode 100644 gcc/config/i386/t-nto create mode 100644 gcc/config/i386/t-nwld create mode 100644 gcc/config/i386/t-openbsd create mode 100644 gcc/config/i386/t-pmm_malloc create mode 100644 gcc/config/i386/t-rtems-i386 create mode 100644 gcc/config/i386/t-sjlj-eh create mode 100644 gcc/config/i386/t-sol2-10 create mode 100644 gcc/config/i386/t-svr3dbx create mode 100644 gcc/config/i386/t-vxworks create mode 100644 gcc/config/i386/t-vxworksae create mode 100644 gcc/config/i386/tbmintrin.h create mode 100644 gcc/config/i386/tmmintrin.h create mode 100644 gcc/config/i386/unix.h create mode 100644 gcc/config/i386/vx-common.h create mode 100644 gcc/config/i386/vxworks.h create mode 100644 gcc/config/i386/vxworksae.h create mode 100644 gcc/config/i386/w32-unwind.h create mode 100644 gcc/config/i386/winnt-cxx.c create mode 100644 gcc/config/i386/winnt-stubs.c create mode 100644 gcc/config/i386/winnt.c create mode 100644 gcc/config/i386/wmmintrin.h create mode 100644 gcc/config/i386/x-cygwin create mode 100644 gcc/config/i386/x-darwin create mode 100644 gcc/config/i386/x-i386 create mode 100644 gcc/config/i386/x-mingw32 create mode 100644 gcc/config/i386/x86-64.h create mode 100644 gcc/config/i386/x86intrin.h create mode 100644 gcc/config/i386/xm-cygwin.h create mode 100644 gcc/config/i386/xm-djgpp.h create mode 100644 gcc/config/i386/xm-mingw32.h create mode 100644 gcc/config/i386/xmmintrin.h create mode 100644 gcc/config/i386/xopintrin.h create mode 100644 gcc/config/ia64/constraints.md create mode 100644 gcc/config/ia64/crtbegin.asm create mode 100644 gcc/config/ia64/crtend.asm create mode 100644 gcc/config/ia64/crtfastmath.c create mode 100644 gcc/config/ia64/crti.asm create mode 100644 gcc/config/ia64/crtn.asm create mode 100644 gcc/config/ia64/div.md create mode 100644 gcc/config/ia64/elf.h create mode 100644 gcc/config/ia64/fde-glibc.c create mode 100644 gcc/config/ia64/fde-vms.c create mode 100644 gcc/config/ia64/freebsd.h create mode 100644 gcc/config/ia64/hpux.h create mode 100644 gcc/config/ia64/ia64-c.c create mode 100644 gcc/config/ia64/ia64-modes.def create mode 100644 gcc/config/ia64/ia64-protos.h create mode 100644 gcc/config/ia64/ia64.c create mode 100644 gcc/config/ia64/ia64.h create mode 100644 gcc/config/ia64/ia64.md create mode 100644 gcc/config/ia64/ia64.opt create mode 100644 gcc/config/ia64/ia64intrin.h create mode 100644 gcc/config/ia64/ilp32.opt create mode 100644 gcc/config/ia64/itanium2.md create mode 100644 gcc/config/ia64/lib1funcs.asm create mode 100644 gcc/config/ia64/libgcc-glibc.ver create mode 100644 gcc/config/ia64/libgcc-ia64.ver create mode 100644 gcc/config/ia64/linux-unwind.h create mode 100644 gcc/config/ia64/linux.h create mode 100644 gcc/config/ia64/predicates.md create mode 100644 gcc/config/ia64/quadlib.c create mode 100644 gcc/config/ia64/sfp-machine.h create mode 100644 gcc/config/ia64/sync.md create mode 100644 gcc/config/ia64/sysv4.h create mode 100644 gcc/config/ia64/t-fprules-softfp create mode 100644 gcc/config/ia64/t-glibc create mode 100644 gcc/config/ia64/t-glibc-libunwind create mode 100644 gcc/config/ia64/t-hpux create mode 100644 gcc/config/ia64/t-ia64 create mode 100644 gcc/config/ia64/t-vms create mode 100644 gcc/config/ia64/unwind-ia64.c create mode 100644 gcc/config/ia64/unwind-ia64.h create mode 100644 gcc/config/ia64/vect.md create mode 100644 gcc/config/ia64/vms-crtinit.asm create mode 100644 gcc/config/ia64/vms-unwind.h create mode 100644 gcc/config/ia64/vms.h create mode 100644 gcc/config/ia64/vms.opt create mode 100644 gcc/config/ia64/vms64.h create mode 100644 gcc/config/ia64/vms_symvec_libgcc_s.opt create mode 100644 gcc/config/interix.h create mode 100644 gcc/config/interix.opt create mode 100644 gcc/config/interix3.h create mode 100644 gcc/config/iq2000/abi create mode 100644 gcc/config/iq2000/constraints.md create mode 100644 gcc/config/iq2000/iq2000-protos.h create mode 100644 gcc/config/iq2000/iq2000.c create mode 100644 gcc/config/iq2000/iq2000.h create mode 100644 gcc/config/iq2000/iq2000.md create mode 100644 gcc/config/iq2000/iq2000.opt create mode 100644 gcc/config/iq2000/lib2extra-funcs.c create mode 100644 gcc/config/iq2000/predicates.md create mode 100644 gcc/config/iq2000/t-iq2000 create mode 100644 gcc/config/kfreebsd-gnu.h create mode 100644 gcc/config/knetbsd-gnu.h create mode 100644 gcc/config/kopensolaris-gnu.h create mode 100644 gcc/config/libgcc-glibc.ver create mode 100644 gcc/config/linux-android.h create mode 100644 gcc/config/linux-android.opt create mode 100644 gcc/config/linux.h create mode 100644 gcc/config/linux.opt create mode 100644 gcc/config/lm32/constraints.md create mode 100644 gcc/config/lm32/lm32-protos.h create mode 100644 gcc/config/lm32/lm32.c create mode 100644 gcc/config/lm32/lm32.h create mode 100644 gcc/config/lm32/lm32.md create mode 100644 gcc/config/lm32/lm32.opt create mode 100644 gcc/config/lm32/predicates.md create mode 100644 gcc/config/lm32/rtems.h create mode 100644 gcc/config/lm32/sfp-machine.h create mode 100644 gcc/config/lm32/t-fprules-softfp create mode 100644 gcc/config/lm32/t-lm32 create mode 100644 gcc/config/lm32/uclinux-elf.h create mode 100644 gcc/config/lynx.h create mode 100644 gcc/config/lynx.opt create mode 100644 gcc/config/m32c/addsub.md create mode 100644 gcc/config/m32c/bitops.md create mode 100644 gcc/config/m32c/blkmov.md create mode 100644 gcc/config/m32c/cond.md create mode 100644 gcc/config/m32c/jump.md create mode 100644 gcc/config/m32c/m32c-lib1.S create mode 100644 gcc/config/m32c/m32c-lib2-trapv.c create mode 100644 gcc/config/m32c/m32c-lib2.c create mode 100644 gcc/config/m32c/m32c-modes.def create mode 100644 gcc/config/m32c/m32c-pragma.c create mode 100644 gcc/config/m32c/m32c-protos.h create mode 100644 gcc/config/m32c/m32c.abi create mode 100644 gcc/config/m32c/m32c.c create mode 100644 gcc/config/m32c/m32c.h create mode 100644 gcc/config/m32c/m32c.md create mode 100644 gcc/config/m32c/m32c.opt create mode 100644 gcc/config/m32c/minmax.md create mode 100644 gcc/config/m32c/mov.md create mode 100644 gcc/config/m32c/muldiv.md create mode 100644 gcc/config/m32c/predicates.md create mode 100644 gcc/config/m32c/prologue.md create mode 100644 gcc/config/m32c/rtems.h create mode 100644 gcc/config/m32c/shift.md create mode 100644 gcc/config/m32c/t-m32c create mode 100644 gcc/config/m32r/constraints.md create mode 100644 gcc/config/m32r/initfini.c create mode 100644 gcc/config/m32r/libgcc-glibc.ver create mode 100644 gcc/config/m32r/linux.h create mode 100644 gcc/config/m32r/little.h create mode 100644 gcc/config/m32r/m32r-protos.h create mode 100644 gcc/config/m32r/m32r.c create mode 100644 gcc/config/m32r/m32r.h create mode 100644 gcc/config/m32r/m32r.md create mode 100644 gcc/config/m32r/m32r.opt create mode 100644 gcc/config/m32r/predicates.md create mode 100644 gcc/config/m32r/rtems.h create mode 100644 gcc/config/m32r/t-linux create mode 100644 gcc/config/m32r/t-m32r create mode 100644 gcc/config/m68hc11/larith.asm create mode 100644 gcc/config/m68hc11/m68hc11-crt0.S create mode 100644 gcc/config/m68hc11/m68hc11-protos.h create mode 100644 gcc/config/m68hc11/m68hc11.c create mode 100644 gcc/config/m68hc11/m68hc11.h create mode 100644 gcc/config/m68hc11/m68hc11.md create mode 100644 gcc/config/m68hc11/m68hc11.opt create mode 100644 gcc/config/m68hc11/m68hc12.h create mode 100644 gcc/config/m68hc11/predicates.md create mode 100644 gcc/config/m68hc11/t-m68hc11 create mode 100644 gcc/config/m68k/cf.md create mode 100644 gcc/config/m68k/constraints.md create mode 100644 gcc/config/m68k/crti.s create mode 100644 gcc/config/m68k/crtn.s create mode 100644 gcc/config/m68k/fpgnulib.c create mode 100644 gcc/config/m68k/ieee.opt create mode 100644 gcc/config/m68k/lb1sf68.asm create mode 100644 gcc/config/m68k/linux-unwind.h create mode 100644 gcc/config/m68k/linux.h create mode 100644 gcc/config/m68k/m68020-elf.h create mode 100644 gcc/config/m68k/m68k-devices.def create mode 100644 gcc/config/m68k/m68k-modes.def create mode 100644 gcc/config/m68k/m68k-none.h create mode 100644 gcc/config/m68k/m68k-protos.h create mode 100644 gcc/config/m68k/m68k.c create mode 100644 gcc/config/m68k/m68k.h create mode 100644 gcc/config/m68k/m68k.md create mode 100644 gcc/config/m68k/m68k.opt create mode 100644 gcc/config/m68k/m68kelf.h create mode 100644 gcc/config/m68k/m68kemb.h create mode 100644 gcc/config/m68k/math-68881.h create mode 100644 gcc/config/m68k/netbsd-elf.h create mode 100644 gcc/config/m68k/openbsd.h create mode 100644 gcc/config/m68k/predicates.md create mode 100644 gcc/config/m68k/print-sysroot-suffix.sh create mode 100644 gcc/config/m68k/rtemself.h create mode 100644 gcc/config/m68k/t-cf create mode 100644 gcc/config/m68k/t-crtstuff create mode 100644 gcc/config/m68k/t-floatlib create mode 100644 gcc/config/m68k/t-linux create mode 100644 gcc/config/m68k/t-m68k create mode 100644 gcc/config/m68k/t-m68kbare create mode 100644 gcc/config/m68k/t-m68kelf create mode 100644 gcc/config/m68k/t-mlibs create mode 100644 gcc/config/m68k/t-openbsd create mode 100644 gcc/config/m68k/t-rtems create mode 100644 gcc/config/m68k/t-slibgcc-elf-ver create mode 100644 gcc/config/m68k/t-uclinux create mode 100644 gcc/config/m68k/uclinux-oldabi.h create mode 100644 gcc/config/m68k/uclinux.h create mode 100644 gcc/config/m68k/uclinux.opt create mode 100644 gcc/config/mcore/constraints.md create mode 100644 gcc/config/mcore/crti.asm create mode 100644 gcc/config/mcore/crtn.asm create mode 100644 gcc/config/mcore/lib1.asm create mode 100644 gcc/config/mcore/mcore-elf.h create mode 100644 gcc/config/mcore/mcore-pe.h create mode 100644 gcc/config/mcore/mcore-protos.h create mode 100644 gcc/config/mcore/mcore.c create mode 100644 gcc/config/mcore/mcore.h create mode 100644 gcc/config/mcore/mcore.md create mode 100644 gcc/config/mcore/mcore.opt create mode 100644 gcc/config/mcore/predicates.md create mode 100644 gcc/config/mcore/t-mcore create mode 100644 gcc/config/mcore/t-mcore-pe create mode 100644 gcc/config/memcmp.c create mode 100644 gcc/config/memcpy.c create mode 100644 gcc/config/memmove.c create mode 100644 gcc/config/memset.c create mode 100644 gcc/config/mep/constraints.md create mode 100644 gcc/config/mep/default.h create mode 100644 gcc/config/mep/intrinsics.h create mode 100644 gcc/config/mep/intrinsics.md create mode 100644 gcc/config/mep/ivc2-template.h create mode 100644 gcc/config/mep/mep-c5.cpu create mode 100644 gcc/config/mep/mep-core.cpu create mode 100644 gcc/config/mep/mep-default.cpu create mode 100644 gcc/config/mep/mep-ext-cop.cpu create mode 100644 gcc/config/mep/mep-intrin.h create mode 100644 gcc/config/mep/mep-ivc2.cpu create mode 100644 gcc/config/mep/mep-lib1.asm create mode 100644 gcc/config/mep/mep-lib2.c create mode 100644 gcc/config/mep/mep-pragma.c create mode 100644 gcc/config/mep/mep-protos.h create mode 100644 gcc/config/mep/mep-tramp.c create mode 100644 gcc/config/mep/mep.c create mode 100644 gcc/config/mep/mep.cpu create mode 100644 gcc/config/mep/mep.h create mode 100644 gcc/config/mep/mep.md create mode 100644 gcc/config/mep/mep.opt create mode 100644 gcc/config/mep/predicates.md create mode 100644 gcc/config/mep/t-mep create mode 100644 gcc/config/microblaze/constraints.md create mode 100644 gcc/config/microblaze/crti.s create mode 100644 gcc/config/microblaze/crtn.s create mode 100644 gcc/config/microblaze/linux.h create mode 100644 gcc/config/microblaze/microblaze-c.c create mode 100644 gcc/config/microblaze/microblaze-protos.h create mode 100644 gcc/config/microblaze/microblaze.c create mode 100644 gcc/config/microblaze/microblaze.h create mode 100644 gcc/config/microblaze/microblaze.md create mode 100644 gcc/config/microblaze/microblaze.opt create mode 100644 gcc/config/microblaze/predicates.md create mode 100644 gcc/config/microblaze/t-microblaze create mode 100644 gcc/config/mips/10000.md create mode 100644 gcc/config/mips/20kc.md create mode 100644 gcc/config/mips/24k.md create mode 100644 gcc/config/mips/3000.md create mode 100644 gcc/config/mips/4000.md create mode 100644 gcc/config/mips/4100.md create mode 100644 gcc/config/mips/4130.md create mode 100644 gcc/config/mips/4300.md create mode 100644 gcc/config/mips/4600.md create mode 100644 gcc/config/mips/4k.md create mode 100644 gcc/config/mips/5000.md create mode 100644 gcc/config/mips/5400.md create mode 100644 gcc/config/mips/5500.md create mode 100644 gcc/config/mips/5k.md create mode 100644 gcc/config/mips/6000.md create mode 100644 gcc/config/mips/7000.md create mode 100644 gcc/config/mips/74k.md create mode 100644 gcc/config/mips/9000.md create mode 100644 gcc/config/mips/constraints.md create mode 100644 gcc/config/mips/crtfastmath.c create mode 100644 gcc/config/mips/crti.asm create mode 100644 gcc/config/mips/crtn.asm create mode 100644 gcc/config/mips/driver-native.c create mode 100644 gcc/config/mips/elf.h create mode 100644 gcc/config/mips/elfoabi.h create mode 100644 gcc/config/mips/elforion.h create mode 100644 gcc/config/mips/generic.md create mode 100644 gcc/config/mips/iris6.h create mode 100644 gcc/config/mips/iris6.opt create mode 100644 gcc/config/mips/irix-crti.asm create mode 100644 gcc/config/mips/irix-crtn.asm create mode 100644 gcc/config/mips/libgcc-mips16.ver create mode 100644 gcc/config/mips/linux-unwind.h create mode 100644 gcc/config/mips/linux.h create mode 100644 gcc/config/mips/linux64.h create mode 100644 gcc/config/mips/loongson.h create mode 100644 gcc/config/mips/loongson.md create mode 100644 gcc/config/mips/loongson2ef.md create mode 100644 gcc/config/mips/loongson3a.md create mode 100644 gcc/config/mips/mips-dsp.md create mode 100644 gcc/config/mips/mips-dspr2.md create mode 100644 gcc/config/mips/mips-fixed.md create mode 100644 gcc/config/mips/mips-ftypes.def create mode 100644 gcc/config/mips/mips-modes.def create mode 100644 gcc/config/mips/mips-protos.h create mode 100644 gcc/config/mips/mips-ps-3d.md create mode 100644 gcc/config/mips/mips.c create mode 100644 gcc/config/mips/mips.h create mode 100644 gcc/config/mips/mips.md create mode 100644 gcc/config/mips/mips.opt create mode 100644 gcc/config/mips/mips16.S create mode 100644 gcc/config/mips/netbsd.h create mode 100644 gcc/config/mips/octeon.md create mode 100644 gcc/config/mips/openbsd.h create mode 100644 gcc/config/mips/predicates.md create mode 100644 gcc/config/mips/r3900.h create mode 100644 gcc/config/mips/rtems.h create mode 100644 gcc/config/mips/sb1.md create mode 100644 gcc/config/mips/sdb.h create mode 100644 gcc/config/mips/sde.h create mode 100644 gcc/config/mips/sde.opt create mode 100644 gcc/config/mips/sdemtk.h create mode 100644 gcc/config/mips/sr71k.md create mode 100644 gcc/config/mips/st.h create mode 100644 gcc/config/mips/sync.md create mode 100644 gcc/config/mips/t-elf create mode 100644 gcc/config/mips/t-iris create mode 100644 gcc/config/mips/t-iris6 create mode 100644 gcc/config/mips/t-isa3264 create mode 100644 gcc/config/mips/t-libgcc-mips16 create mode 100644 gcc/config/mips/t-linux64 create mode 100644 gcc/config/mips/t-mips create mode 100644 gcc/config/mips/t-r3900 create mode 100644 gcc/config/mips/t-rtems create mode 100644 gcc/config/mips/t-sb1 create mode 100644 gcc/config/mips/t-sde create mode 100644 gcc/config/mips/t-sdemtk create mode 100644 gcc/config/mips/t-slibgcc-irix create mode 100644 gcc/config/mips/t-sr71k create mode 100644 gcc/config/mips/t-st create mode 100644 gcc/config/mips/t-vr create mode 100644 gcc/config/mips/t-vxworks create mode 100644 gcc/config/mips/vr.h create mode 100644 gcc/config/mips/vr4120-div.S create mode 100644 gcc/config/mips/vxworks.h create mode 100644 gcc/config/mips/x-native create mode 100644 gcc/config/mips/xlr.md create mode 100644 gcc/config/mmix/crti.asm create mode 100644 gcc/config/mmix/crtn.asm create mode 100644 gcc/config/mmix/mmix-modes.def create mode 100644 gcc/config/mmix/mmix-protos.h create mode 100644 gcc/config/mmix/mmix.c create mode 100644 gcc/config/mmix/mmix.h create mode 100644 gcc/config/mmix/mmix.md create mode 100644 gcc/config/mmix/mmix.opt create mode 100644 gcc/config/mmix/predicates.md create mode 100644 gcc/config/mmix/t-mmix create mode 100644 gcc/config/mn10300/constraints.md create mode 100644 gcc/config/mn10300/linux.h create mode 100644 gcc/config/mn10300/mn10300-modes.def create mode 100644 gcc/config/mn10300/mn10300-protos.h create mode 100644 gcc/config/mn10300/mn10300.c create mode 100644 gcc/config/mn10300/mn10300.h create mode 100644 gcc/config/mn10300/mn10300.md create mode 100644 gcc/config/mn10300/mn10300.opt create mode 100644 gcc/config/mn10300/predicates.md create mode 100644 gcc/config/mn10300/t-linux create mode 100644 gcc/config/mn10300/t-mn10300 create mode 100644 gcc/config/moxie/constraints.md create mode 100644 gcc/config/moxie/crti.asm create mode 100644 gcc/config/moxie/crtn.asm create mode 100644 gcc/config/moxie/moxie-protos.h create mode 100644 gcc/config/moxie/moxie.c create mode 100644 gcc/config/moxie/moxie.h create mode 100644 gcc/config/moxie/moxie.md create mode 100644 gcc/config/moxie/predicates.md create mode 100644 gcc/config/moxie/rtems.h create mode 100644 gcc/config/moxie/sfp-machine.h create mode 100644 gcc/config/moxie/t-moxie create mode 100644 gcc/config/moxie/t-moxie-softfp create mode 100644 gcc/config/moxie/uclinux.h create mode 100644 gcc/config/netbsd-aout.h create mode 100644 gcc/config/netbsd-elf.h create mode 100644 gcc/config/netbsd-elf.opt create mode 100644 gcc/config/netbsd.h create mode 100644 gcc/config/netbsd.opt create mode 100644 gcc/config/newlib-stdint.h create mode 100644 gcc/config/openbsd-libpthread.h create mode 100644 gcc/config/openbsd-oldgas.h create mode 100644 gcc/config/openbsd-stdint.h create mode 100644 gcc/config/openbsd.h create mode 100644 gcc/config/openbsd.opt create mode 100644 gcc/config/pa/constraints.md create mode 100644 gcc/config/pa/elf.h create mode 100644 gcc/config/pa/fptr.c create mode 100644 gcc/config/pa/hpux-unwind.h create mode 100644 gcc/config/pa/lib2funcs.asm create mode 100644 gcc/config/pa/linux-atomic.c create mode 100644 gcc/config/pa/linux-unwind.h create mode 100644 gcc/config/pa/milli64.S create mode 100644 gcc/config/pa/pa-64.h create mode 100644 gcc/config/pa/pa-hpux.h create mode 100644 gcc/config/pa/pa-hpux.opt create mode 100644 gcc/config/pa/pa-hpux10.h create mode 100644 gcc/config/pa/pa-hpux10.opt create mode 100644 gcc/config/pa/pa-hpux1010.h create mode 100644 gcc/config/pa/pa-hpux1010.opt create mode 100644 gcc/config/pa/pa-hpux11.h create mode 100644 gcc/config/pa/pa-hpux1111.h create mode 100644 gcc/config/pa/pa-hpux1111.opt create mode 100644 gcc/config/pa/pa-linux.h create mode 100644 gcc/config/pa/pa-modes.def create mode 100644 gcc/config/pa/pa-protos.h create mode 100644 gcc/config/pa/pa.c create mode 100644 gcc/config/pa/pa.h create mode 100644 gcc/config/pa/pa.md create mode 100644 gcc/config/pa/pa.opt create mode 100644 gcc/config/pa/pa32-linux.h create mode 100644 gcc/config/pa/pa32-regs.h create mode 100644 gcc/config/pa/pa64-hpux.h create mode 100644 gcc/config/pa/pa64-hpux.opt create mode 100644 gcc/config/pa/pa64-linux.h create mode 100644 gcc/config/pa/pa64-regs.h create mode 100644 gcc/config/pa/pa64-start.h create mode 100644 gcc/config/pa/predicates.md create mode 100644 gcc/config/pa/quadlib.c create mode 100644 gcc/config/pa/som.h create mode 100644 gcc/config/pa/stublib.c create mode 100644 gcc/config/pa/t-dce-thr create mode 100644 gcc/config/pa/t-hpux-shlib create mode 100644 gcc/config/pa/t-linux create mode 100644 gcc/config/pa/t-linux64 create mode 100644 gcc/config/pa/t-pa create mode 100644 gcc/config/pa/t-pa-hpux create mode 100644 gcc/config/pa/t-pa-hpux10 create mode 100644 gcc/config/pa/t-pa-hpux11 create mode 100644 gcc/config/pa/t-pa64 create mode 100644 gcc/config/pa/t-slibgcc-dwarf-ver create mode 100644 gcc/config/pa/t-slibgcc-sjlj-ver create mode 100644 gcc/config/pdp11/constraints.md create mode 100644 gcc/config/pdp11/pdp11-modes.def create mode 100644 gcc/config/pdp11/pdp11-protos.h create mode 100644 gcc/config/pdp11/pdp11.c create mode 100644 gcc/config/pdp11/pdp11.h create mode 100644 gcc/config/pdp11/pdp11.md create mode 100644 gcc/config/pdp11/pdp11.opt create mode 100644 gcc/config/pdp11/predicates.md create mode 100644 gcc/config/pdp11/t-pdp11 create mode 100644 gcc/config/picochip/constraints.md create mode 100644 gcc/config/picochip/dfa_space.md create mode 100644 gcc/config/picochip/dfa_speed.md create mode 100644 gcc/config/picochip/libgccExtras/adddi3.asm create mode 100644 gcc/config/picochip/libgccExtras/ashlsi3.asm create mode 100644 gcc/config/picochip/libgccExtras/ashlsi3.c create mode 100644 gcc/config/picochip/libgccExtras/ashrsi3.asm create mode 100644 gcc/config/picochip/libgccExtras/ashrsi3.c create mode 100644 gcc/config/picochip/libgccExtras/clzsi2.asm create mode 100644 gcc/config/picochip/libgccExtras/cmpsi2.asm create mode 100644 gcc/config/picochip/libgccExtras/divmod15.asm create mode 100644 gcc/config/picochip/libgccExtras/divmodhi4.asm create mode 100644 gcc/config/picochip/libgccExtras/divmodsi4.asm create mode 100644 gcc/config/picochip/libgccExtras/fake_libgcc.asm create mode 100644 gcc/config/picochip/libgccExtras/longjmp.asm create mode 100644 gcc/config/picochip/libgccExtras/lshrsi3.asm create mode 100644 gcc/config/picochip/libgccExtras/lshrsi3.c create mode 100644 gcc/config/picochip/libgccExtras/parityhi2.asm create mode 100644 gcc/config/picochip/libgccExtras/popcounthi2.asm create mode 100644 gcc/config/picochip/libgccExtras/setjmp.asm create mode 100644 gcc/config/picochip/libgccExtras/subdi3.asm create mode 100644 gcc/config/picochip/libgccExtras/ucmpsi2.asm create mode 100644 gcc/config/picochip/libgccExtras/udivmodhi4.asm create mode 100644 gcc/config/picochip/libgccExtras/udivmodsi4.asm create mode 100644 gcc/config/picochip/picochip-protos.h create mode 100644 gcc/config/picochip/picochip.c create mode 100644 gcc/config/picochip/picochip.h create mode 100644 gcc/config/picochip/picochip.md create mode 100644 gcc/config/picochip/picochip.opt create mode 100644 gcc/config/picochip/predicates.md create mode 100644 gcc/config/picochip/t-picochip create mode 100644 gcc/config/print-sysroot-suffix.sh create mode 100644 gcc/config/rpath.opt create mode 100644 gcc/config/rs6000/40x.md create mode 100644 gcc/config/rs6000/440.md create mode 100644 gcc/config/rs6000/476.md create mode 100644 gcc/config/rs6000/603.md create mode 100644 gcc/config/rs6000/6xx.md create mode 100644 gcc/config/rs6000/7450.md create mode 100644 gcc/config/rs6000/750cl.h create mode 100644 gcc/config/rs6000/7xx.md create mode 100644 gcc/config/rs6000/8540.md create mode 100644 gcc/config/rs6000/a2.md create mode 100644 gcc/config/rs6000/aix-stdint.h create mode 100644 gcc/config/rs6000/aix.h create mode 100644 gcc/config/rs6000/aix43.h create mode 100644 gcc/config/rs6000/aix51.h create mode 100644 gcc/config/rs6000/aix52.h create mode 100644 gcc/config/rs6000/aix53.h create mode 100644 gcc/config/rs6000/aix61.h create mode 100644 gcc/config/rs6000/aix64.opt create mode 100644 gcc/config/rs6000/altivec.h create mode 100644 gcc/config/rs6000/altivec.md create mode 100644 gcc/config/rs6000/biarch64.h create mode 100644 gcc/config/rs6000/cell.md create mode 100644 gcc/config/rs6000/constraints.md create mode 100644 gcc/config/rs6000/crtresfpr.asm create mode 100644 gcc/config/rs6000/crtresgpr.asm create mode 100644 gcc/config/rs6000/crtresxfpr.asm create mode 100644 gcc/config/rs6000/crtresxgpr.asm create mode 100644 gcc/config/rs6000/crtsavfpr.asm create mode 100644 gcc/config/rs6000/crtsavgpr.asm create mode 100644 gcc/config/rs6000/darwin-asm.h create mode 100644 gcc/config/rs6000/darwin-fallback.c create mode 100644 gcc/config/rs6000/darwin-fpsave.asm create mode 100644 gcc/config/rs6000/darwin-ldouble-format create mode 100644 gcc/config/rs6000/darwin-ldouble.c create mode 100644 gcc/config/rs6000/darwin-libgcc.10.4.ver create mode 100644 gcc/config/rs6000/darwin-libgcc.10.5.ver create mode 100644 gcc/config/rs6000/darwin-tramp.asm create mode 100644 gcc/config/rs6000/darwin-unwind.h create mode 100644 gcc/config/rs6000/darwin-vecsave.asm create mode 100644 gcc/config/rs6000/darwin-world.asm create mode 100644 gcc/config/rs6000/darwin.h create mode 100644 gcc/config/rs6000/darwin.md create mode 100644 gcc/config/rs6000/darwin.opt create mode 100644 gcc/config/rs6000/darwin64.h create mode 100644 gcc/config/rs6000/darwin7.h create mode 100644 gcc/config/rs6000/darwin8.h create mode 100644 gcc/config/rs6000/default64.h create mode 100644 gcc/config/rs6000/dfp.md create mode 100644 gcc/config/rs6000/driver-rs6000.c create mode 100644 gcc/config/rs6000/e300c2c3.md create mode 100644 gcc/config/rs6000/e500-double.h create mode 100644 gcc/config/rs6000/e500.h create mode 100644 gcc/config/rs6000/e500crtres32gpr.asm create mode 100644 gcc/config/rs6000/e500crtres64gpr.asm create mode 100644 gcc/config/rs6000/e500crtres64gprctr.asm create mode 100644 gcc/config/rs6000/e500crtrest32gpr.asm create mode 100644 gcc/config/rs6000/e500crtrest64gpr.asm create mode 100644 gcc/config/rs6000/e500crtresx32gpr.asm create mode 100644 gcc/config/rs6000/e500crtresx64gpr.asm create mode 100644 gcc/config/rs6000/e500crtsav32gpr.asm create mode 100644 gcc/config/rs6000/e500crtsav64gpr.asm create mode 100644 gcc/config/rs6000/e500crtsav64gprctr.asm create mode 100644 gcc/config/rs6000/e500crtsavg32gpr.asm create mode 100644 gcc/config/rs6000/e500crtsavg64gpr.asm create mode 100644 gcc/config/rs6000/e500crtsavg64gprctr.asm create mode 100644 gcc/config/rs6000/e500mc.md create mode 100644 gcc/config/rs6000/e500mc64.md create mode 100644 gcc/config/rs6000/eabi-ci.asm create mode 100644 gcc/config/rs6000/eabi-cn.asm create mode 100644 gcc/config/rs6000/eabi.asm create mode 100644 gcc/config/rs6000/eabi.h create mode 100644 gcc/config/rs6000/eabialtivec.h create mode 100644 gcc/config/rs6000/eabisim.h create mode 100644 gcc/config/rs6000/eabispe.h create mode 100644 gcc/config/rs6000/freebsd.h create mode 100644 gcc/config/rs6000/gnu.h create mode 100644 gcc/config/rs6000/host-darwin.c create mode 100644 gcc/config/rs6000/host-ppc64-darwin.c create mode 100644 gcc/config/rs6000/libgcc-ppc-glibc.ver create mode 100644 gcc/config/rs6000/libgcc-ppc64.ver create mode 100644 gcc/config/rs6000/linux-unwind.h create mode 100644 gcc/config/rs6000/linux.h create mode 100644 gcc/config/rs6000/linux64.h create mode 100644 gcc/config/rs6000/linux64.opt create mode 100644 gcc/config/rs6000/linuxaltivec.h create mode 100644 gcc/config/rs6000/linuxspe.h create mode 100644 gcc/config/rs6000/lynx.h create mode 100644 gcc/config/rs6000/milli.exp create mode 100644 gcc/config/rs6000/mpc.md create mode 100644 gcc/config/rs6000/netbsd.h create mode 100644 gcc/config/rs6000/option-defaults.h create mode 100644 gcc/config/rs6000/paired.h create mode 100644 gcc/config/rs6000/paired.md create mode 100644 gcc/config/rs6000/power4.md create mode 100644 gcc/config/rs6000/power5.md create mode 100644 gcc/config/rs6000/power6.md create mode 100644 gcc/config/rs6000/power7.md create mode 100644 gcc/config/rs6000/ppc-asm.h create mode 100644 gcc/config/rs6000/ppc64-fp.c create mode 100644 gcc/config/rs6000/ppu_intrinsics.h create mode 100644 gcc/config/rs6000/predicates.md create mode 100644 gcc/config/rs6000/rios1.md create mode 100644 gcc/config/rs6000/rios2.md create mode 100644 gcc/config/rs6000/rs6000-builtin.def create mode 100644 gcc/config/rs6000/rs6000-c.c create mode 100644 gcc/config/rs6000/rs6000-modes.def create mode 100644 gcc/config/rs6000/rs6000-opts.h create mode 100644 gcc/config/rs6000/rs6000-protos.h create mode 100644 gcc/config/rs6000/rs6000.c create mode 100644 gcc/config/rs6000/rs6000.h create mode 100644 gcc/config/rs6000/rs6000.md create mode 100644 gcc/config/rs6000/rs6000.opt create mode 100644 gcc/config/rs6000/rs64.md create mode 100644 gcc/config/rs6000/rtems.h create mode 100644 gcc/config/rs6000/secureplt.h create mode 100644 gcc/config/rs6000/sfp-machine.h create mode 100644 gcc/config/rs6000/si2vmx.h create mode 100644 gcc/config/rs6000/singlefp.h create mode 100644 gcc/config/rs6000/sol-ci.asm create mode 100644 gcc/config/rs6000/sol-cn.asm create mode 100644 gcc/config/rs6000/spe.h create mode 100644 gcc/config/rs6000/spe.md create mode 100644 gcc/config/rs6000/spu2vmx.h create mode 100644 gcc/config/rs6000/sync.md create mode 100644 gcc/config/rs6000/sysv4.h create mode 100644 gcc/config/rs6000/sysv4.opt create mode 100644 gcc/config/rs6000/sysv4le.h create mode 100644 gcc/config/rs6000/t-aix43 create mode 100644 gcc/config/rs6000/t-aix52 create mode 100644 gcc/config/rs6000/t-darwin create mode 100644 gcc/config/rs6000/t-darwin64 create mode 100644 gcc/config/rs6000/t-darwin8 create mode 100644 gcc/config/rs6000/t-fprules create mode 100644 gcc/config/rs6000/t-fprules-fpbit create mode 100644 gcc/config/rs6000/t-fprules-softfp create mode 100644 gcc/config/rs6000/t-freebsd create mode 100644 gcc/config/rs6000/t-linux create mode 100644 gcc/config/rs6000/t-linux64 create mode 100644 gcc/config/rs6000/t-lynx create mode 100644 gcc/config/rs6000/t-netbsd create mode 100644 gcc/config/rs6000/t-ppccomm create mode 100644 gcc/config/rs6000/t-ppcendian create mode 100644 gcc/config/rs6000/t-ppcgas create mode 100644 gcc/config/rs6000/t-ppcos create mode 100644 gcc/config/rs6000/t-rs6000 create mode 100644 gcc/config/rs6000/t-rtems create mode 100644 gcc/config/rs6000/t-spe create mode 100644 gcc/config/rs6000/t-vxworks create mode 100644 gcc/config/rs6000/t-vxworksae create mode 100644 gcc/config/rs6000/t-xilinx create mode 100644 gcc/config/rs6000/titan.md create mode 100644 gcc/config/rs6000/tramp.asm create mode 100644 gcc/config/rs6000/vec_types.h create mode 100644 gcc/config/rs6000/vector.md create mode 100644 gcc/config/rs6000/vsx.md create mode 100644 gcc/config/rs6000/vxworks.h create mode 100644 gcc/config/rs6000/vxworksae.h create mode 100644 gcc/config/rs6000/x-aix create mode 100644 gcc/config/rs6000/x-darwin create mode 100644 gcc/config/rs6000/x-darwin64 create mode 100644 gcc/config/rs6000/x-linux-relax create mode 100644 gcc/config/rs6000/x-rs6000 create mode 100644 gcc/config/rs6000/xcoff.h create mode 100644 gcc/config/rs6000/xfpu.h create mode 100644 gcc/config/rs6000/xfpu.md create mode 100644 gcc/config/rs6000/xilinx.h create mode 100644 gcc/config/rs6000/xilinx.opt create mode 100644 gcc/config/rtems.h create mode 100644 gcc/config/rtems.opt create mode 100644 gcc/config/rx/constraints.md create mode 100644 gcc/config/rx/predicates.md create mode 100644 gcc/config/rx/rx-modes.def create mode 100644 gcc/config/rx/rx-protos.h create mode 100644 gcc/config/rx/rx.c create mode 100644 gcc/config/rx/rx.h create mode 100644 gcc/config/rx/rx.md create mode 100644 gcc/config/rx/rx.opt create mode 100644 gcc/config/rx/t-rx create mode 100644 gcc/config/s390/2064.md create mode 100644 gcc/config/s390/2084.md create mode 100644 gcc/config/s390/2097.md create mode 100644 gcc/config/s390/2817.md create mode 100644 gcc/config/s390/constraints.md create mode 100644 gcc/config/s390/linux-unwind.h create mode 100644 gcc/config/s390/linux.h create mode 100644 gcc/config/s390/predicates.md create mode 100644 gcc/config/s390/s390-modes.def create mode 100644 gcc/config/s390/s390-protos.h create mode 100644 gcc/config/s390/s390.c create mode 100644 gcc/config/s390/s390.h create mode 100644 gcc/config/s390/s390.md create mode 100644 gcc/config/s390/s390.opt create mode 100644 gcc/config/s390/s390x.h create mode 100644 gcc/config/s390/t-linux64 create mode 100644 gcc/config/s390/tpf-unwind.h create mode 100644 gcc/config/s390/tpf.h create mode 100644 gcc/config/s390/tpf.md create mode 100644 gcc/config/s390/tpf.opt create mode 100644 gcc/config/score/constraints.md create mode 100644 gcc/config/score/crti.asm create mode 100644 gcc/config/score/crtn.asm create mode 100644 gcc/config/score/elf.h create mode 100644 gcc/config/score/predicates.md create mode 100644 gcc/config/score/score-conv.h create mode 100644 gcc/config/score/score-generic.md create mode 100644 gcc/config/score/score-modes.def create mode 100644 gcc/config/score/score-protos.h create mode 100644 gcc/config/score/score.c create mode 100644 gcc/config/score/score.h create mode 100644 gcc/config/score/score.md create mode 100644 gcc/config/score/score.opt create mode 100644 gcc/config/score/score7.c create mode 100644 gcc/config/score/score7.h create mode 100644 gcc/config/score/sfp-machine.h create mode 100644 gcc/config/score/t-score-elf create mode 100644 gcc/config/score/t-score-softfp create mode 100644 gcc/config/sh/constraints.md create mode 100644 gcc/config/sh/crt1.asm create mode 100644 gcc/config/sh/crti.asm create mode 100644 gcc/config/sh/crtn.asm create mode 100644 gcc/config/sh/divcost-analysis create mode 100644 gcc/config/sh/divtab-sh4-300.c create mode 100644 gcc/config/sh/divtab-sh4.c create mode 100644 gcc/config/sh/divtab.c create mode 100644 gcc/config/sh/elf.h create mode 100644 gcc/config/sh/embed-elf.h create mode 100644 gcc/config/sh/lib1funcs-4-300.asm create mode 100644 gcc/config/sh/lib1funcs-Os-4-200.asm create mode 100644 gcc/config/sh/lib1funcs.asm create mode 100644 gcc/config/sh/lib1funcs.h create mode 100644 gcc/config/sh/libgcc-excl.ver create mode 100644 gcc/config/sh/libgcc-glibc.ver create mode 100644 gcc/config/sh/linux-atomic.asm create mode 100644 gcc/config/sh/linux-unwind.h create mode 100644 gcc/config/sh/linux.h create mode 100644 gcc/config/sh/little.h create mode 100644 gcc/config/sh/netbsd-elf.h create mode 100644 gcc/config/sh/newlib.h create mode 100644 gcc/config/sh/predicates.md create mode 100644 gcc/config/sh/rtems.h create mode 100644 gcc/config/sh/rtemself.h create mode 100644 gcc/config/sh/sh-c.c create mode 100644 gcc/config/sh/sh-modes.def create mode 100644 gcc/config/sh/sh-protos.h create mode 100644 gcc/config/sh/sh-symbian.h create mode 100644 gcc/config/sh/sh.c create mode 100644 gcc/config/sh/sh.h create mode 100644 gcc/config/sh/sh.md create mode 100644 gcc/config/sh/sh.opt create mode 100644 gcc/config/sh/sh1.md create mode 100644 gcc/config/sh/sh4-300.md create mode 100644 gcc/config/sh/sh4.md create mode 100644 gcc/config/sh/sh4a.md create mode 100644 gcc/config/sh/sh64.h create mode 100644 gcc/config/sh/shmedia.h create mode 100644 gcc/config/sh/shmedia.md create mode 100644 gcc/config/sh/sshmedia.h create mode 100644 gcc/config/sh/superh.h create mode 100644 gcc/config/sh/superh.opt create mode 100644 gcc/config/sh/symbian-base.c create mode 100644 gcc/config/sh/symbian-c.c create mode 100644 gcc/config/sh/symbian-cxx.c create mode 100644 gcc/config/sh/symbian-post.h create mode 100644 gcc/config/sh/symbian-pre.h create mode 100644 gcc/config/sh/t-elf create mode 100644 gcc/config/sh/t-linux create mode 100644 gcc/config/sh/t-linux64 create mode 100644 gcc/config/sh/t-netbsd create mode 100644 gcc/config/sh/t-netbsd-sh5-64 create mode 100644 gcc/config/sh/t-rtems create mode 100644 gcc/config/sh/t-sh create mode 100644 gcc/config/sh/t-sh64 create mode 100644 gcc/config/sh/t-superh create mode 100644 gcc/config/sh/t-symbian create mode 100644 gcc/config/sh/t-vxworks create mode 100644 gcc/config/sh/ushmedia.h create mode 100644 gcc/config/sh/vxworks.h create mode 100644 gcc/config/soft-fp/README create mode 100644 gcc/config/soft-fp/adddf3.c create mode 100644 gcc/config/soft-fp/addsf3.c create mode 100644 gcc/config/soft-fp/addtf3.c create mode 100644 gcc/config/soft-fp/divdf3.c create mode 100644 gcc/config/soft-fp/divsf3.c create mode 100644 gcc/config/soft-fp/divtf3.c create mode 100644 gcc/config/soft-fp/double.h create mode 100644 gcc/config/soft-fp/eqdf2.c create mode 100644 gcc/config/soft-fp/eqsf2.c create mode 100644 gcc/config/soft-fp/eqtf2.c create mode 100644 gcc/config/soft-fp/extenddftf2.c create mode 100644 gcc/config/soft-fp/extended.h create mode 100644 gcc/config/soft-fp/extendsfdf2.c create mode 100644 gcc/config/soft-fp/extendsftf2.c create mode 100644 gcc/config/soft-fp/extendxftf2.c create mode 100644 gcc/config/soft-fp/fixdfdi.c create mode 100644 gcc/config/soft-fp/fixdfsi.c create mode 100644 gcc/config/soft-fp/fixdfti.c create mode 100644 gcc/config/soft-fp/fixsfdi.c create mode 100644 gcc/config/soft-fp/fixsfsi.c create mode 100644 gcc/config/soft-fp/fixsfti.c create mode 100644 gcc/config/soft-fp/fixtfdi.c create mode 100644 gcc/config/soft-fp/fixtfsi.c create mode 100644 gcc/config/soft-fp/fixtfti.c create mode 100644 gcc/config/soft-fp/fixunsdfdi.c create mode 100644 gcc/config/soft-fp/fixunsdfsi.c create mode 100644 gcc/config/soft-fp/fixunsdfti.c create mode 100644 gcc/config/soft-fp/fixunssfdi.c create mode 100644 gcc/config/soft-fp/fixunssfsi.c create mode 100644 gcc/config/soft-fp/fixunssfti.c create mode 100644 gcc/config/soft-fp/fixunstfdi.c create mode 100644 gcc/config/soft-fp/fixunstfsi.c create mode 100644 gcc/config/soft-fp/fixunstfti.c create mode 100644 gcc/config/soft-fp/floatdidf.c create mode 100644 gcc/config/soft-fp/floatdisf.c create mode 100644 gcc/config/soft-fp/floatditf.c create mode 100644 gcc/config/soft-fp/floatsidf.c create mode 100644 gcc/config/soft-fp/floatsisf.c create mode 100644 gcc/config/soft-fp/floatsitf.c create mode 100644 gcc/config/soft-fp/floattidf.c create mode 100644 gcc/config/soft-fp/floattisf.c create mode 100644 gcc/config/soft-fp/floattitf.c create mode 100644 gcc/config/soft-fp/floatundidf.c create mode 100644 gcc/config/soft-fp/floatundisf.c create mode 100644 gcc/config/soft-fp/floatunditf.c create mode 100644 gcc/config/soft-fp/floatunsidf.c create mode 100644 gcc/config/soft-fp/floatunsisf.c create mode 100644 gcc/config/soft-fp/floatunsitf.c create mode 100644 gcc/config/soft-fp/floatuntidf.c create mode 100644 gcc/config/soft-fp/floatuntisf.c create mode 100644 gcc/config/soft-fp/floatuntitf.c create mode 100644 gcc/config/soft-fp/gedf2.c create mode 100644 gcc/config/soft-fp/gesf2.c create mode 100644 gcc/config/soft-fp/getf2.c create mode 100644 gcc/config/soft-fp/ledf2.c create mode 100644 gcc/config/soft-fp/lesf2.c create mode 100644 gcc/config/soft-fp/letf2.c create mode 100644 gcc/config/soft-fp/muldf3.c create mode 100644 gcc/config/soft-fp/mulsf3.c create mode 100644 gcc/config/soft-fp/multf3.c create mode 100644 gcc/config/soft-fp/negdf2.c create mode 100644 gcc/config/soft-fp/negsf2.c create mode 100644 gcc/config/soft-fp/negtf2.c create mode 100644 gcc/config/soft-fp/op-1.h create mode 100644 gcc/config/soft-fp/op-2.h create mode 100644 gcc/config/soft-fp/op-4.h create mode 100644 gcc/config/soft-fp/op-8.h create mode 100644 gcc/config/soft-fp/op-common.h create mode 100644 gcc/config/soft-fp/quad.h create mode 100644 gcc/config/soft-fp/single.h create mode 100644 gcc/config/soft-fp/soft-fp.h create mode 100644 gcc/config/soft-fp/subdf3.c create mode 100644 gcc/config/soft-fp/subsf3.c create mode 100644 gcc/config/soft-fp/subtf3.c create mode 100644 gcc/config/soft-fp/t-softfp create mode 100644 gcc/config/soft-fp/truncdfsf2.c create mode 100644 gcc/config/soft-fp/trunctfdf2.c create mode 100644 gcc/config/soft-fp/trunctfsf2.c create mode 100644 gcc/config/soft-fp/trunctfxf2.c create mode 100644 gcc/config/soft-fp/unorddf2.c create mode 100644 gcc/config/soft-fp/unordsf2.c create mode 100644 gcc/config/soft-fp/unordtf2.c create mode 100644 gcc/config/sol2-10.h create mode 100644 gcc/config/sol2-c.c create mode 100644 gcc/config/sol2-gld.h create mode 100644 gcc/config/sol2-protos.h create mode 100644 gcc/config/sol2.c create mode 100644 gcc/config/sol2.h create mode 100644 gcc/config/sol2.opt create mode 100644 gcc/config/sparc/biarch64.h create mode 100644 gcc/config/sparc/constraints.md create mode 100644 gcc/config/sparc/crtfastmath.c create mode 100644 gcc/config/sparc/cypress.md create mode 100644 gcc/config/sparc/freebsd.h create mode 100644 gcc/config/sparc/gmon-sol2.c create mode 100644 gcc/config/sparc/hypersparc.md create mode 100644 gcc/config/sparc/lb1spc.asm create mode 100644 gcc/config/sparc/lb1spl.asm create mode 100644 gcc/config/sparc/leon.md create mode 100644 gcc/config/sparc/libgcc-sparc-glibc.ver create mode 100644 gcc/config/sparc/linux-unwind.h create mode 100644 gcc/config/sparc/linux.h create mode 100644 gcc/config/sparc/linux64.h create mode 100644 gcc/config/sparc/little-endian.opt create mode 100644 gcc/config/sparc/long-double-switch.opt create mode 100644 gcc/config/sparc/netbsd-elf.h create mode 100644 gcc/config/sparc/niagara.md create mode 100644 gcc/config/sparc/niagara2.md create mode 100644 gcc/config/sparc/openbsd1-64.h create mode 100644 gcc/config/sparc/openbsd64.h create mode 100644 gcc/config/sparc/predicates.md create mode 100644 gcc/config/sparc/rtemself.h create mode 100644 gcc/config/sparc/sol2-64.h create mode 100644 gcc/config/sparc/sol2-bi.h create mode 100644 gcc/config/sparc/sol2-c1.asm create mode 100644 gcc/config/sparc/sol2-ci.asm create mode 100644 gcc/config/sparc/sol2-cn.asm create mode 100644 gcc/config/sparc/sol2-gas-bi.h create mode 100644 gcc/config/sparc/sol2-gas.h create mode 100644 gcc/config/sparc/sol2-gld-bi.h create mode 100644 gcc/config/sparc/sol2-unwind.h create mode 100644 gcc/config/sparc/sol2.h create mode 100644 gcc/config/sparc/sp-elf.h create mode 100644 gcc/config/sparc/sp64-elf.h create mode 100644 gcc/config/sparc/sparc-modes.def create mode 100644 gcc/config/sparc/sparc-protos.h create mode 100644 gcc/config/sparc/sparc.c create mode 100644 gcc/config/sparc/sparc.h create mode 100644 gcc/config/sparc/sparc.md create mode 100644 gcc/config/sparc/sparc.opt create mode 100644 gcc/config/sparc/sparclet.md create mode 100644 gcc/config/sparc/supersparc.md create mode 100644 gcc/config/sparc/sync.md create mode 100644 gcc/config/sparc/sysv4.h create mode 100644 gcc/config/sparc/t-crtfm create mode 100644 gcc/config/sparc/t-crtin create mode 100644 gcc/config/sparc/t-elf create mode 100644 gcc/config/sparc/t-leon create mode 100644 gcc/config/sparc/t-leon3 create mode 100644 gcc/config/sparc/t-linux create mode 100644 gcc/config/sparc/t-linux64 create mode 100644 gcc/config/sparc/t-netbsd64 create mode 100644 gcc/config/sparc/t-sol2 create mode 100644 gcc/config/sparc/t-sol2-64 create mode 100644 gcc/config/sparc/t-vxworks create mode 100644 gcc/config/sparc/ultra1_2.md create mode 100644 gcc/config/sparc/ultra3.md create mode 100644 gcc/config/sparc/vxworks.h create mode 100644 gcc/config/spu/cache.S create mode 100644 gcc/config/spu/cachemgr.c create mode 100644 gcc/config/spu/constraints.md create mode 100644 gcc/config/spu/divmodti4.c create mode 100644 gcc/config/spu/divv2df3.c create mode 100644 gcc/config/spu/float_disf.c create mode 100644 gcc/config/spu/float_unsdidf.c create mode 100644 gcc/config/spu/float_unsdisf.c create mode 100644 gcc/config/spu/float_unssidf.c create mode 100644 gcc/config/spu/mfc_multi_tag_release.c create mode 100644 gcc/config/spu/mfc_multi_tag_reserve.c create mode 100644 gcc/config/spu/mfc_tag_release.c create mode 100644 gcc/config/spu/mfc_tag_reserve.c create mode 100644 gcc/config/spu/mfc_tag_table.c create mode 100644 gcc/config/spu/multi3.c create mode 100644 gcc/config/spu/predicates.md create mode 100644 gcc/config/spu/spu-builtins.def create mode 100644 gcc/config/spu/spu-builtins.md create mode 100644 gcc/config/spu/spu-c.c create mode 100644 gcc/config/spu/spu-elf.h create mode 100644 gcc/config/spu/spu-modes.def create mode 100644 gcc/config/spu/spu-protos.h create mode 100644 gcc/config/spu/spu.c create mode 100644 gcc/config/spu/spu.h create mode 100644 gcc/config/spu/spu.md create mode 100644 gcc/config/spu/spu.opt create mode 100644 gcc/config/spu/spu_cache.h create mode 100644 gcc/config/spu/spu_internals.h create mode 100644 gcc/config/spu/spu_intrinsics.h create mode 100644 gcc/config/spu/spu_mfcio.h create mode 100644 gcc/config/spu/t-spu-elf create mode 100644 gcc/config/spu/vec_types.h create mode 100644 gcc/config/spu/vmx2spu.h create mode 100644 gcc/config/stormy16/constraints.md create mode 100644 gcc/config/stormy16/predicates.md create mode 100644 gcc/config/stormy16/stormy-abi create mode 100644 gcc/config/stormy16/stormy16-lib2-ashlsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2-ashrsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2-clzhi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-cmpsi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-ctzhi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-divsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2-ffshi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-lshrsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2-modsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2-parityhi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-popcounthi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-ucmpsi2.c create mode 100644 gcc/config/stormy16/stormy16-lib2-udivmodsi4.c create mode 100644 gcc/config/stormy16/stormy16-lib2-udivsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2-umodsi3.c create mode 100644 gcc/config/stormy16/stormy16-lib2.c create mode 100644 gcc/config/stormy16/stormy16-protos.h create mode 100644 gcc/config/stormy16/stormy16.c create mode 100644 gcc/config/stormy16/stormy16.h create mode 100644 gcc/config/stormy16/stormy16.md create mode 100644 gcc/config/stormy16/stormy16.opt create mode 100644 gcc/config/stormy16/t-stormy16 create mode 100644 gcc/config/svr3.h create mode 100644 gcc/config/sync.c create mode 100644 gcc/config/t-darwin create mode 100644 gcc/config/t-dfprules create mode 100644 gcc/config/t-freebsd create mode 100644 gcc/config/t-freebsd-thread create mode 100644 gcc/config/t-gnu create mode 100644 gcc/config/t-libc-ok create mode 100644 gcc/config/t-libgcc-pic create mode 100644 gcc/config/t-libunwind create mode 100644 gcc/config/t-libunwind-elf create mode 100644 gcc/config/t-linux create mode 100644 gcc/config/t-lynx create mode 100644 gcc/config/t-netbsd create mode 100644 gcc/config/t-openbsd create mode 100644 gcc/config/t-openbsd-thread create mode 100644 gcc/config/t-pnt16-warn create mode 100644 gcc/config/t-rtems create mode 100644 gcc/config/t-slibgcc-darwin create mode 100644 gcc/config/t-slibgcc-elf-ver create mode 100644 gcc/config/t-slibgcc-libgcc create mode 100644 gcc/config/t-slibgcc-nolc-override create mode 100644 gcc/config/t-slibgcc-sld create mode 100644 gcc/config/t-sol2 create mode 100644 gcc/config/t-svr4 create mode 100644 gcc/config/t-sysroot-suffix create mode 100644 gcc/config/t-vxworks create mode 100644 gcc/config/tm-dwarf2.h create mode 100644 gcc/config/udivmod.c create mode 100644 gcc/config/udivmodsi4.c create mode 100644 gcc/config/usegas.h create mode 100644 gcc/config/v850/constraints.md create mode 100644 gcc/config/v850/lib1funcs.asm create mode 100644 gcc/config/v850/predicates.md create mode 100644 gcc/config/v850/t-v850 create mode 100644 gcc/config/v850/t-v850e create mode 100644 gcc/config/v850/v850-c.c create mode 100644 gcc/config/v850/v850-modes.def create mode 100644 gcc/config/v850/v850-protos.h create mode 100644 gcc/config/v850/v850.c create mode 100644 gcc/config/v850/v850.h create mode 100644 gcc/config/v850/v850.md create mode 100644 gcc/config/v850/v850.opt create mode 100644 gcc/config/vax/builtins.md create mode 100644 gcc/config/vax/constraints.md create mode 100644 gcc/config/vax/elf.h create mode 100644 gcc/config/vax/elf.opt create mode 100644 gcc/config/vax/lib1funcs.asm create mode 100644 gcc/config/vax/linux.h create mode 100644 gcc/config/vax/netbsd-elf.h create mode 100644 gcc/config/vax/netbsd.h create mode 100644 gcc/config/vax/openbsd.h create mode 100644 gcc/config/vax/openbsd1.h create mode 100644 gcc/config/vax/predicates.md create mode 100644 gcc/config/vax/t-linux create mode 100644 gcc/config/vax/vax-modes.def create mode 100644 gcc/config/vax/vax-protos.h create mode 100644 gcc/config/vax/vax.c create mode 100644 gcc/config/vax/vax.h create mode 100644 gcc/config/vax/vax.md create mode 100644 gcc/config/vax/vax.opt create mode 100644 gcc/config/vms/t-vms create mode 100644 gcc/config/vms/vms-crtl-64.h create mode 100644 gcc/config/vms/vms-crtl.h create mode 100644 gcc/config/vms/vms-ucrt0.c create mode 100644 gcc/config/vms/vms.opt create mode 100644 gcc/config/vms/x-vms create mode 100644 gcc/config/vms/xm-vms.h create mode 100644 gcc/config/vms/xm-vms64.h create mode 100644 gcc/config/vx-common.h create mode 100644 gcc/config/vxlib-tls.c create mode 100644 gcc/config/vxlib.c create mode 100644 gcc/config/vxworks-dummy.h create mode 100644 gcc/config/vxworks.c create mode 100644 gcc/config/vxworks.h create mode 100644 gcc/config/vxworks.opt create mode 100644 gcc/config/vxworksae.h create mode 100644 gcc/config/x-cflags-O1 create mode 100644 gcc/config/x-darwin create mode 100644 gcc/config/x-hpux create mode 100644 gcc/config/x-linux create mode 100644 gcc/config/x-solaris create mode 100644 gcc/config/xtensa/constraints.md create mode 100644 gcc/config/xtensa/crti.asm create mode 100644 gcc/config/xtensa/crtn.asm create mode 100644 gcc/config/xtensa/elf.h create mode 100644 gcc/config/xtensa/elf.opt create mode 100644 gcc/config/xtensa/ieee754-df.S create mode 100644 gcc/config/xtensa/ieee754-sf.S create mode 100644 gcc/config/xtensa/lib1funcs.asm create mode 100644 gcc/config/xtensa/lib2funcs.S create mode 100644 gcc/config/xtensa/libgcc-xtensa.ver create mode 100644 gcc/config/xtensa/linux-unwind.h create mode 100644 gcc/config/xtensa/linux.h create mode 100644 gcc/config/xtensa/predicates.md create mode 100644 gcc/config/xtensa/t-elf create mode 100644 gcc/config/xtensa/t-linux create mode 100644 gcc/config/xtensa/t-xtensa create mode 100644 gcc/config/xtensa/unwind-dw2-xtensa.c create mode 100644 gcc/config/xtensa/unwind-dw2-xtensa.h create mode 100644 gcc/config/xtensa/xtensa-protos.h create mode 100644 gcc/config/xtensa/xtensa.c create mode 100644 gcc/config/xtensa/xtensa.h create mode 100644 gcc/config/xtensa/xtensa.md create mode 100644 gcc/config/xtensa/xtensa.opt (limited to 'gcc/config') diff --git a/gcc/config/README b/gcc/config/README new file mode 100644 index 000000000..60328ec5b --- /dev/null +++ b/gcc/config/README @@ -0,0 +1,5 @@ +This directory contains machine-specific files for the GNU C compiler. +It has a subdirectory for each basic CPU type. +The only files in this directory itself +are some .h files that pertain to particular operating systems +and are used for more than one CPU type. diff --git a/gcc/config/alpha/alpha-modes.def b/gcc/config/alpha/alpha-modes.def new file mode 100644 index 000000000..9d5a04246 --- /dev/null +++ b/gcc/config/alpha/alpha-modes.def @@ -0,0 +1,27 @@ +/* Alpha extra machine modes. + Copyright (C) 2003, 2004, 2007, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* 128-bit floating point. This gets reset in alpha_option_override + if VAX float format is in use. */ +FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODE (INT, QI, 4); /* V4QI */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h new file mode 100644 index 000000000..747716960 --- /dev/null +++ b/gcc/config/alpha/alpha-protos.h @@ -0,0 +1,131 @@ +/* Prototypes for alpha.c functions used in the md file & elsewhere. + Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2009, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern int alpha_next_sequence_number; + +extern void literal_section (void); +extern int zap_mask (HOST_WIDE_INT); +extern int direct_return (void); + +extern int alpha_sa_size (void); +extern HOST_WIDE_INT alpha_initial_elimination_offset (unsigned int, + unsigned int); +extern void alpha_expand_prologue (void); +extern void alpha_expand_epilogue (void); +extern void alpha_output_filename (FILE *, const char *); + +extern rtx alpha_tablejump_addr_vec (rtx); +extern rtx alpha_tablejump_best_label (rtx); + +extern bool alpha_legitimate_constant_p (rtx); +extern rtx alpha_legitimize_reload_address (rtx, enum machine_mode, + int, int, int); + +extern rtx split_small_symbolic_operand (rtx); + +extern void get_aligned_mem (rtx, rtx *, rtx *); +extern rtx get_unaligned_address (rtx); +extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT); +extern enum reg_class alpha_preferred_reload_class (rtx, enum reg_class); + +extern void alpha_set_memflags (rtx, rtx); +extern bool alpha_split_const_mov (enum machine_mode, rtx *); +extern bool alpha_expand_mov (enum machine_mode, rtx *); +extern bool alpha_expand_mov_nobwx (enum machine_mode, rtx *); +extern void alpha_expand_movmisalign (enum machine_mode, rtx *); +extern void alpha_emit_floatuns (rtx[]); +extern rtx alpha_emit_conditional_move (rtx, enum machine_mode); +extern void alpha_split_tmode_pair (rtx[], enum machine_mode, bool); +extern void alpha_split_tfmode_frobsign (rtx[], rtx (*)(rtx, rtx, rtx)); +extern void alpha_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT, int); +extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT); +extern int alpha_expand_block_move (rtx []); +extern int alpha_expand_block_clear (rtx []); +extern rtx alpha_expand_zap_mask (HOST_WIDE_INT); +extern void alpha_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx), + enum machine_mode, + rtx, rtx, rtx); +extern void alpha_expand_builtin_establish_vms_condition_handler (rtx, rtx); +extern void alpha_expand_builtin_revert_vms_condition_handler (rtx); + +extern rtx alpha_return_addr (int, rtx); +extern rtx alpha_gp_save_rtx (void); +extern void print_operand (FILE *, rtx, int); +extern void print_operand_address (FILE *, rtx); +extern void alpha_initialize_trampoline (rtx, rtx, rtx, int, int, int); + +extern rtx alpha_va_arg (tree, tree); +extern rtx function_value (const_tree, const_tree, enum machine_mode); + +extern void alpha_start_function (FILE *, const char *, tree); +extern void alpha_end_function (FILE *, const char *, tree); + +extern int alpha_find_lo_sum_using_gp (rtx); + +#ifdef REAL_VALUE_TYPE +extern int check_float_value (enum machine_mode, REAL_VALUE_TYPE *, int); +#endif + +#ifdef RTX_CODE +extern void alpha_emit_conditional_branch (rtx[], enum machine_mode); +extern bool alpha_emit_setcc (rtx[], enum machine_mode); +extern int alpha_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx); +extern void alpha_emit_xfloating_arith (enum rtx_code, rtx[]); +extern void alpha_emit_xfloating_cvt (enum rtx_code, rtx[]); +extern void alpha_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void alpha_split_compare_and_swap (rtx, rtx, rtx, rtx, rtx); +extern void alpha_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx); +extern void alpha_split_compare_and_swap_12 (enum machine_mode, rtx, rtx, + rtx, rtx, rtx, rtx, rtx); +extern void alpha_split_lock_test_and_set (rtx, rtx, rtx, rtx); +extern void alpha_expand_lock_test_and_set_12 (rtx, rtx, rtx); +extern void alpha_split_lock_test_and_set_12 (enum machine_mode, rtx, rtx, + rtx, rtx, rtx); +#endif + +extern rtx alpha_need_linkage (const char *, int); +extern rtx alpha_use_linkage (rtx, tree, int, int); + +#if TARGET_ABI_OPEN_VMS +extern enum avms_arg_type alpha_arg_type (enum machine_mode); +extern rtx alpha_arg_info_reg_val (CUMULATIVE_ARGS); +extern void avms_asm_output_external (FILE *, tree, const char *); +extern void vms_output_aligned_decl_common (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern HOST_WIDE_INT alpha_vms_initial_elimination_offset (unsigned int, + unsigned int); +#endif + +extern rtx unicosmk_add_call_info_word (rtx); + +#if TARGET_ABI_UNICOSMK +extern void unicosmk_defer_case_vector (rtx, rtx); +extern void unicosmk_add_extern (const char *); +extern void unicosmk_output_align (FILE *, int); +extern void unicosmk_output_common (FILE *, const char *, int, int); +extern int unicosmk_initial_elimination_offset (int, int); +#endif + +extern int some_small_symbolic_operand_int (rtx *, void *); +extern int tls_symbolic_operand_1 (rtx, int, int); +extern rtx resolve_reload_operand (rtx); diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c new file mode 100644 index 000000000..ba6179e71 --- /dev/null +++ b/gcc/config/alpha/alpha.c @@ -0,0 +1,11210 @@ +/* Subroutines used for code generation on the DEC Alpha. + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "expr.h" +#include "optabs.h" +#include "reload.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "diagnostic-core.h" +#include "ggc.h" +#include "integrate.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "debug.h" +#include "langhooks.h" +#include "splay-tree.h" +#include "cfglayout.h" +#include "gimple.h" +#include "tree-flow.h" +#include "tree-stdarg.h" +#include "tm-constrs.h" +#include "df.h" +#include "libfuncs.h" + +/* Specify which cpu to schedule for. */ +enum processor_type alpha_tune; + +/* Which cpu we're generating code for. */ +enum processor_type alpha_cpu; + +static const char * const alpha_cpu_name[] = +{ + "ev4", "ev5", "ev6" +}; + +/* Specify how accurate floating-point traps need to be. */ + +enum alpha_trap_precision alpha_tp; + +/* Specify the floating-point rounding mode. */ + +enum alpha_fp_rounding_mode alpha_fprm; + +/* Specify which things cause traps. */ + +enum alpha_fp_trap_mode alpha_fptm; + +/* Nonzero if inside of a function, because the Alpha asm can't + handle .files inside of functions. */ + +static int inside_function = FALSE; + +/* The number of cycles of latency we should assume on memory reads. */ + +int alpha_memory_latency = 3; + +/* Whether the function needs the GP. */ + +static int alpha_function_needs_gp; + +/* The assembler name of the current function. */ + +static const char *alpha_fnname; + +/* The next explicit relocation sequence number. */ +extern GTY(()) int alpha_next_sequence_number; +int alpha_next_sequence_number = 1; + +/* The literal and gpdisp sequence numbers for this insn, as printed + by %# and %* respectively. */ +extern GTY(()) int alpha_this_literal_sequence_number; +extern GTY(()) int alpha_this_gpdisp_sequence_number; +int alpha_this_literal_sequence_number; +int alpha_this_gpdisp_sequence_number; + +/* Costs of various operations on the different architectures. */ + +struct alpha_rtx_cost_data +{ + unsigned char fp_add; + unsigned char fp_mult; + unsigned char fp_div_sf; + unsigned char fp_div_df; + unsigned char int_mult_si; + unsigned char int_mult_di; + unsigned char int_shift; + unsigned char int_cmov; + unsigned short int_div; +}; + +static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] = +{ + { /* EV4 */ + COSTS_N_INSNS (6), /* fp_add */ + COSTS_N_INSNS (6), /* fp_mult */ + COSTS_N_INSNS (34), /* fp_div_sf */ + COSTS_N_INSNS (63), /* fp_div_df */ + COSTS_N_INSNS (23), /* int_mult_si */ + COSTS_N_INSNS (23), /* int_mult_di */ + COSTS_N_INSNS (2), /* int_shift */ + COSTS_N_INSNS (2), /* int_cmov */ + COSTS_N_INSNS (97), /* int_div */ + }, + { /* EV5 */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult */ + COSTS_N_INSNS (15), /* fp_div_sf */ + COSTS_N_INSNS (22), /* fp_div_df */ + COSTS_N_INSNS (8), /* int_mult_si */ + COSTS_N_INSNS (12), /* int_mult_di */ + COSTS_N_INSNS (1) + 1, /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (83), /* int_div */ + }, + { /* EV6 */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (15), /* fp_div_df */ + COSTS_N_INSNS (7), /* int_mult_si */ + COSTS_N_INSNS (7), /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (2), /* int_cmov */ + COSTS_N_INSNS (86), /* int_div */ + }, +}; + +/* Similar but tuned for code size instead of execution latency. The + extra +N is fractional cost tuning based on latency. It's used to + encourage use of cheaper insns like shift, but only if there's just + one of them. */ + +static struct alpha_rtx_cost_data const alpha_rtx_cost_size = +{ + COSTS_N_INSNS (1), /* fp_add */ + COSTS_N_INSNS (1), /* fp_mult */ + COSTS_N_INSNS (1), /* fp_div_sf */ + COSTS_N_INSNS (1) + 1, /* fp_div_df */ + COSTS_N_INSNS (1) + 1, /* int_mult_si */ + COSTS_N_INSNS (1) + 2, /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (6), /* int_div */ +}; + +/* Get the number of args of a function in one of two ways. */ +#if TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK +#define NUM_ARGS crtl->args.info.num_args +#else +#define NUM_ARGS crtl->args.info +#endif + +#define REG_PV 27 +#define REG_RA 26 + +/* Declarations of static functions. */ +static struct machine_function *alpha_init_machine_status (void); +static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx); + +#if TARGET_ABI_OPEN_VMS +static void alpha_write_linkage (FILE *, const char *, tree); +static bool vms_valid_pointer_mode (enum machine_mode); +#endif + +static void unicosmk_output_deferred_case_vectors (FILE *); +static void unicosmk_gen_dsib (unsigned long *); +static void unicosmk_output_ssib (FILE *, const char *); +static int unicosmk_need_dex (rtx); + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options alpha_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +alpha_handle_option (size_t code, const char *arg, int value) +{ + switch (code) + { + case OPT_mfp_regs: + if (value == 0) + target_flags |= MASK_SOFT_FP; + break; + + case OPT_mieee: + case OPT_mieee_with_inexact: + target_flags |= MASK_IEEE_CONFORMANT; + break; + + case OPT_mtls_size_: + if (value != 16 && value != 32 && value != 64) + error ("bad value %qs for -mtls-size switch", arg); + break; + } + + return true; +} + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +/* Implement TARGET_MANGLE_TYPE. */ + +static const char * +alpha_mangle_type (const_tree type) +{ + if (TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_LONG_DOUBLE_128) + return "g"; + + /* For all other types, use normal C++ mangling. */ + return NULL; +} +#endif + +/* Parse target option strings. */ + +static void +alpha_option_override (void) +{ + static const struct cpu_table { + const char *const name; + const enum processor_type processor; + const int flags; + } cpu_table[] = { + { "ev4", PROCESSOR_EV4, 0 }, + { "ev45", PROCESSOR_EV4, 0 }, + { "21064", PROCESSOR_EV4, 0 }, + { "ev5", PROCESSOR_EV5, 0 }, + { "21164", PROCESSOR_EV5, 0 }, + { "ev56", PROCESSOR_EV5, MASK_BWX }, + { "21164a", PROCESSOR_EV5, MASK_BWX }, + { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX }, + { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX }, + { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX }, + { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX }, + { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX }, + { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX }, + { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX } + }; + + int const ct_size = ARRAY_SIZE (cpu_table); + int i; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + + /* Unicos/Mk doesn't have shared libraries. */ + if (TARGET_ABI_UNICOSMK && flag_pic) + { + warning (0, "-f%s ignored for Unicos/Mk (not supported)", + (flag_pic > 1) ? "PIC" : "pic"); + flag_pic = 0; + } + + /* On Unicos/Mk, the native compiler consistently generates /d suffices for + floating-point instructions. Make that the default for this target. */ + if (TARGET_ABI_UNICOSMK) + alpha_fprm = ALPHA_FPRM_DYN; + else + alpha_fprm = ALPHA_FPRM_NORM; + + alpha_tp = ALPHA_TP_PROG; + alpha_fptm = ALPHA_FPTM_N; + + /* We cannot use su and sui qualifiers for conversion instructions on + Unicos/Mk. I'm not sure if this is due to assembler or hardware + limitations. Right now, we issue a warning if -mieee is specified + and then ignore it; eventually, we should either get it right or + disable the option altogether. */ + + if (TARGET_IEEE) + { + if (TARGET_ABI_UNICOSMK) + warning (0, "-mieee not supported on Unicos/Mk"); + else + { + alpha_tp = ALPHA_TP_INSN; + alpha_fptm = ALPHA_FPTM_SU; + } + } + + if (TARGET_IEEE_WITH_INEXACT) + { + if (TARGET_ABI_UNICOSMK) + warning (0, "-mieee-with-inexact not supported on Unicos/Mk"); + else + { + alpha_tp = ALPHA_TP_INSN; + alpha_fptm = ALPHA_FPTM_SUI; + } + } + + if (alpha_tp_string) + { + if (! strcmp (alpha_tp_string, "p")) + alpha_tp = ALPHA_TP_PROG; + else if (! strcmp (alpha_tp_string, "f")) + alpha_tp = ALPHA_TP_FUNC; + else if (! strcmp (alpha_tp_string, "i")) + alpha_tp = ALPHA_TP_INSN; + else + error ("bad value %qs for -mtrap-precision switch", alpha_tp_string); + } + + if (alpha_fprm_string) + { + if (! strcmp (alpha_fprm_string, "n")) + alpha_fprm = ALPHA_FPRM_NORM; + else if (! strcmp (alpha_fprm_string, "m")) + alpha_fprm = ALPHA_FPRM_MINF; + else if (! strcmp (alpha_fprm_string, "c")) + alpha_fprm = ALPHA_FPRM_CHOP; + else if (! strcmp (alpha_fprm_string,"d")) + alpha_fprm = ALPHA_FPRM_DYN; + else + error ("bad value %qs for -mfp-rounding-mode switch", + alpha_fprm_string); + } + + if (alpha_fptm_string) + { + if (strcmp (alpha_fptm_string, "n") == 0) + alpha_fptm = ALPHA_FPTM_N; + else if (strcmp (alpha_fptm_string, "u") == 0) + alpha_fptm = ALPHA_FPTM_U; + else if (strcmp (alpha_fptm_string, "su") == 0) + alpha_fptm = ALPHA_FPTM_SU; + else if (strcmp (alpha_fptm_string, "sui") == 0) + alpha_fptm = ALPHA_FPTM_SUI; + else + error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string); + } + + if (alpha_cpu_string) + { + for (i = 0; i < ct_size; i++) + if (! strcmp (alpha_cpu_string, cpu_table [i].name)) + { + alpha_tune = alpha_cpu = cpu_table [i].processor; + target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX); + target_flags |= cpu_table [i].flags; + break; + } + if (i == ct_size) + error ("bad value %qs for -mcpu switch", alpha_cpu_string); + } + + if (alpha_tune_string) + { + for (i = 0; i < ct_size; i++) + if (! strcmp (alpha_tune_string, cpu_table [i].name)) + { + alpha_tune = cpu_table [i].processor; + break; + } + if (i == ct_size) + error ("bad value %qs for -mtune switch", alpha_tune_string); + } + + /* Do some sanity checks on the above options. */ + + if (TARGET_ABI_UNICOSMK && alpha_fptm != ALPHA_FPTM_N) + { + warning (0, "trap mode not supported on Unicos/Mk"); + alpha_fptm = ALPHA_FPTM_N; + } + + if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI) + && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6) + { + warning (0, "fp software completion requires -mtrap-precision=i"); + alpha_tp = ALPHA_TP_INSN; + } + + if (alpha_cpu == PROCESSOR_EV6) + { + /* Except for EV6 pass 1 (not released), we always have precise + arithmetic traps. Which means we can do software completion + without minding trap shadows. */ + alpha_tp = ALPHA_TP_PROG; + } + + if (TARGET_FLOAT_VAX) + { + if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN) + { + warning (0, "rounding mode not supported for VAX floats"); + alpha_fprm = ALPHA_FPRM_NORM; + } + if (alpha_fptm == ALPHA_FPTM_SUI) + { + warning (0, "trap mode not supported for VAX floats"); + alpha_fptm = ALPHA_FPTM_SU; + } + if (target_flags_explicit & MASK_LONG_DOUBLE_128) + warning (0, "128-bit long double not supported for VAX floats"); + target_flags &= ~MASK_LONG_DOUBLE_128; + } + + { + char *end; + int lat; + + if (!alpha_mlat_string) + alpha_mlat_string = "L1"; + + if (ISDIGIT ((unsigned char)alpha_mlat_string[0]) + && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) + ; + else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') + && ISDIGIT ((unsigned char)alpha_mlat_string[1]) + && alpha_mlat_string[2] == '\0') + { + static int const cache_latency[][4] = + { + { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ + { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ + { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */ + }; + + lat = alpha_mlat_string[1] - '0'; + if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1) + { + warning (0, "L%d cache latency unknown for %s", + lat, alpha_cpu_name[alpha_tune]); + lat = 3; + } + else + lat = cache_latency[alpha_tune][lat-1]; + } + else if (! strcmp (alpha_mlat_string, "main")) + { + /* Most current memories have about 370ns latency. This is + a reasonable guess for a fast cpu. */ + lat = 150; + } + else + { + warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string); + lat = 3; + } + + alpha_memory_latency = lat; + } + + /* Default the definition of "small data" to 8 bytes. */ + if (!global_options_set.x_g_switch_value) + g_switch_value = 8; + + /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ + if (flag_pic == 1) + target_flags |= MASK_SMALL_DATA; + else if (flag_pic == 2) + target_flags &= ~MASK_SMALL_DATA; + + /* Align labels and loops for optimal branching. */ + /* ??? Kludge these by not doing anything if we don't optimize and also if + we are writing ECOFF symbols to work around a bug in DEC's assembler. */ + if (optimize > 0 && write_symbols != SDB_DEBUG) + { + if (align_loops <= 0) + align_loops = 16; + if (align_jumps <= 0) + align_jumps = 16; + } + if (align_functions <= 0) + align_functions = 16; + + /* Register variables and functions with the garbage collector. */ + + /* Set up function hooks. */ + init_machine_status = alpha_init_machine_status; + + /* Tell the compiler when we're using VAX floating point. */ + if (TARGET_FLOAT_VAX) + { + REAL_MODE_FORMAT (SFmode) = &vax_f_format; + REAL_MODE_FORMAT (DFmode) = &vax_g_format; + REAL_MODE_FORMAT (TFmode) = NULL; + } + +#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 + if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) + target_flags |= MASK_LONG_DOUBLE_128; +#endif + + /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) + can be optimized to ap = __builtin_next_arg (0). */ + if (TARGET_ABI_UNICOSMK) + targetm.expand_builtin_va_start = NULL; +} + +/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ + +int +zap_mask (HOST_WIDE_INT value) +{ + int i; + + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; + i++, value >>= 8) + if ((value & 0xff) != 0 && (value & 0xff) != 0xff) + return 0; + + return 1; +} + +/* Return true if OP is valid for a particular TLS relocation. + We are already guaranteed that OP is a CONST. */ + +int +tls_symbolic_operand_1 (rtx op, int size, int unspec) +{ + op = XEXP (op, 0); + + if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) + return 0; + op = XVECEXP (op, 0, 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + switch (SYMBOL_REF_TLS_MODEL (op)) + { + case TLS_MODEL_LOCAL_DYNAMIC: + return unspec == UNSPEC_DTPREL && size == alpha_tls_size; + case TLS_MODEL_INITIAL_EXEC: + return unspec == UNSPEC_TPREL && size == 64; + case TLS_MODEL_LOCAL_EXEC: + return unspec == UNSPEC_TPREL && size == alpha_tls_size; + default: + gcc_unreachable (); + } +} + +/* Used by aligned_memory_operand and unaligned_memory_operand to + resolve what reload is going to do with OP if it's a register. */ + +rtx +resolve_reload_operand (rtx op) +{ + if (reload_in_progress) + { + rtx tmp = op; + if (GET_CODE (tmp) == SUBREG) + tmp = SUBREG_REG (tmp); + if (REG_P (tmp) + && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) + { + op = reg_equiv_memory_loc[REGNO (tmp)]; + if (op == 0) + return 0; + } + } + return op; +} + +/* The scalar modes supported differs from the default check-what-c-supports + version in that sometimes TFmode is available even when long double + indicates only DFmode. On unicosmk, we have the situation that HImode + doesn't map to any C type, but of course we still support that. */ + +static bool +alpha_scalar_mode_supported_p (enum machine_mode mode) +{ + switch (mode) + { + case QImode: + case HImode: + case SImode: + case DImode: + case TImode: /* via optabs.c */ + return true; + + case SFmode: + case DFmode: + return true; + + case TFmode: + return TARGET_HAS_XFLOATING_LIBS; + + default: + return false; + } +} + +/* Alpha implements a couple of integer vector mode operations when + TARGET_MAX is enabled. We do not check TARGET_MAX here, however, + which allows the vectorizer to operate on e.g. move instructions, + or when expand_vector_operations can do something useful. */ + +static bool +alpha_vector_mode_supported_p (enum machine_mode mode) +{ + return mode == V8QImode || mode == V4HImode || mode == V2SImode; +} + +/* Return 1 if this function can directly return via $26. */ + +int +direct_return (void) +{ + return (! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK + && reload_completed + && alpha_sa_size () == 0 + && get_frame_size () == 0 + && crtl->outgoing_args_size == 0 + && crtl->args.pretend_args_size == 0); +} + +/* Return the ADDR_VEC associated with a tablejump insn. */ + +rtx +alpha_tablejump_addr_vec (rtx insn) +{ + rtx tmp; + + tmp = JUMP_LABEL (insn); + if (!tmp) + return NULL_RTX; + tmp = NEXT_INSN (tmp); + if (!tmp) + return NULL_RTX; + if (JUMP_P (tmp) + && GET_CODE (PATTERN (tmp)) == ADDR_DIFF_VEC) + return PATTERN (tmp); + return NULL_RTX; +} + +/* Return the label of the predicted edge, or CONST0_RTX if we don't know. */ + +rtx +alpha_tablejump_best_label (rtx insn) +{ + rtx jump_table = alpha_tablejump_addr_vec (insn); + rtx best_label = NULL_RTX; + + /* ??? Once the CFG doesn't keep getting completely rebuilt, look + there for edge frequency counts from profile data. */ + + if (jump_table) + { + int n_labels = XVECLEN (jump_table, 1); + int best_count = -1; + int i, j; + + for (i = 0; i < n_labels; i++) + { + int count = 1; + + for (j = i + 1; j < n_labels; j++) + if (XEXP (XVECEXP (jump_table, 1, i), 0) + == XEXP (XVECEXP (jump_table, 1, j), 0)) + count++; + + if (count > best_count) + best_count = count, best_label = XVECEXP (jump_table, 1, i); + } + } + + return best_label ? best_label : const0_rtx; +} + +/* Return the TLS model to use for SYMBOL. */ + +static enum tls_model +tls_symbolic_operand_type (rtx symbol) +{ + enum tls_model model; + + if (GET_CODE (symbol) != SYMBOL_REF) + return TLS_MODEL_NONE; + model = SYMBOL_REF_TLS_MODEL (symbol); + + /* Local-exec with a 64-bit size is the same code as initial-exec. */ + if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64) + model = TLS_MODEL_INITIAL_EXEC; + + return model; +} + +/* Return true if the function DECL will share the same GP as any + function in the current unit of translation. */ + +static bool +decl_has_samegp (const_tree decl) +{ + /* Functions that are not local can be overridden, and thus may + not share the same gp. */ + if (!(*targetm.binds_local_p) (decl)) + return false; + + /* If -msmall-data is in effect, assume that there is only one GP + for the module, and so any local symbol has this property. We + need explicit relocations to be able to enforce this for symbols + not defined in this unit of translation, however. */ + if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) + return true; + + /* Functions that are not external are defined in this UoT. */ + /* ??? Irritatingly, static functions not yet emitted are still + marked "external". Apply this to non-static functions only. */ + return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); +} + +/* Return true if EXP should be placed in the small data section. */ + +static bool +alpha_in_small_data_p (const_tree exp) +{ + /* We want to merge strings, so we never consider them small data. */ + if (TREE_CODE (exp) == STRING_CST) + return false; + + /* Functions are never in the small data area. Duh. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); + if (strcmp (section, ".sdata") == 0 + || strcmp (section, ".sbss") == 0) + return true; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in sdata because it might be too big when completed. */ + if (size > 0 && size <= g_switch_value) + return true; + } + + return false; +} + +#if TARGET_ABI_OPEN_VMS +static bool +vms_valid_pointer_mode (enum machine_mode mode) +{ + return (mode == SImode || mode == DImode); +} + +static bool +alpha_linkage_symbol_p (const char *symname) +{ + int symlen = strlen (symname); + + if (symlen > 4) + return strcmp (&symname [symlen - 4], "..lk") == 0; + + return false; +} + +#define LINKAGE_SYMBOL_REF_P(X) \ + ((GET_CODE (X) == SYMBOL_REF \ + && alpha_linkage_symbol_p (XSTR (X, 0))) \ + || (GET_CODE (X) == CONST \ + && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \ + && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0)))) +#endif + +/* legitimate_address_p recognizes an RTL expression that is a valid + memory address for an instruction. The MODE argument is the + machine mode for the MEM expression that wants to use this address. + + For Alpha, we have either a constant address or the sum of a + register and a constant address, or just a register. For DImode, + any of those forms can be surrounded with an AND that clear the + low-order three bits; this is an "unaligned" access. */ + +static bool +alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + /* If this is an ldq_u type address, discard the outer AND. */ + if (mode == DImode + && GET_CODE (x) == AND + && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) == -8) + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (GET_CODE (x) == SUBREG + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + /* Unadorned general registers are valid. */ + if (REG_P (x) + && (strict + ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x))) + return true; + + /* Constant addresses (i.e. +/- 32k) are valid. */ + if (CONSTANT_ADDRESS_P (x)) + return true; + +#if TARGET_ABI_OPEN_VMS + if (LINKAGE_SYMBOL_REF_P (x)) + return true; +#endif + + /* Register plus a small constant offset is valid. */ + if (GET_CODE (x) == PLUS) + { + rtx ofs = XEXP (x, 1); + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (GET_CODE (x) == SUBREG + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + if (REG_P (x)) + { + if (! strict + && NONSTRICT_REG_OK_FP_BASE_P (x) + && CONST_INT_P (ofs)) + return true; + if ((strict + ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x)) + && CONSTANT_ADDRESS_P (ofs)) + return true; + } + } + + /* If we're managing explicit relocations, LO_SUM is valid, as are small + data symbols. Avoid explicit relocations of modes larger than word + mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ + else if (TARGET_EXPLICIT_RELOCS + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) + { + if (small_symbolic_operand (x, Pmode)) + return true; + + if (GET_CODE (x) == LO_SUM) + { + rtx ofs = XEXP (x, 1); + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (GET_CODE (x) == SUBREG + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + /* Must have a valid base register. */ + if (! (REG_P (x) + && (strict + ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x)))) + return false; + + /* The symbol must be local. */ + if (local_symbolic_operand (ofs, Pmode) + || dtp32_symbolic_operand (ofs, Pmode) + || tp32_symbolic_operand (ofs, Pmode)) + return true; + } + } + + return false; +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY(()) rtx tls_get_addr_libfunc; + +static rtx +get_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. */ + +static rtx +alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode) +{ + HOST_WIDE_INT addend; + + /* If the address is (plus reg const_int) and the CONST_INT is not a + valid offset, compute the high part of the constant and add it to + the register. Then our address is (plus temp low-part-const). */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1)) + && ! CONSTANT_ADDRESS_P (XEXP (x, 1))) + { + addend = INTVAL (XEXP (x, 1)); + x = XEXP (x, 0); + goto split_addend; + } + + /* If the address is (const (plus FOO const_int)), find the low-order + part of the CONST_INT. Then load FOO plus any high-order part of the + CONST_INT into a register. Our address is (plus reg low-part-const). + This is done to reduce the number of GOT entries. */ + if (can_create_pseudo_p () + && GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + addend = INTVAL (XEXP (XEXP (x, 0), 1)); + x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); + goto split_addend; + } + + /* If we have a (plus reg const), emit the load as in (2), then add + the two registers, and finally generate (plus reg low-part-const) as + our address. */ + if (can_create_pseudo_p () + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) + { + addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); + x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), + XEXP (XEXP (XEXP (x, 1), 0), 0), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + goto split_addend; + } + + /* If this is a local symbol, split the address into HIGH/LO_SUM parts. + Avoid modes larger than word mode since i.e. $LC0+8($1) can fold + around +/- 32k offset. */ + if (TARGET_EXPLICIT_RELOCS + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD + && symbolic_operand (x, Pmode)) + { + rtx r0, r16, eqv, tga, tp, insn, dest, seq; + + switch (tls_symbolic_operand_type (x)) + { + case TLS_MODEL_NONE: + break; + + case TLS_MODEL_GLOBAL_DYNAMIC: + start_sequence (); + + r0 = gen_rtx_REG (Pmode, 0); + r16 = gen_rtx_REG (Pmode, 16); + tga = get_tls_get_addr (); + dest = gen_reg_rtx (Pmode); + seq = GEN_INT (alpha_next_sequence_number++); + + emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); + insn = gen_call_value_osf_tlsgd (r0, tga, seq); + insn = emit_call_insn (insn); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); + + insn = get_insns (); + end_sequence (); + + emit_libcall_block (insn, dest, r0, x); + return dest; + + case TLS_MODEL_LOCAL_DYNAMIC: + start_sequence (); + + r0 = gen_rtx_REG (Pmode, 0); + r16 = gen_rtx_REG (Pmode, 16); + tga = get_tls_get_addr (); + scratch = gen_reg_rtx (Pmode); + seq = GEN_INT (alpha_next_sequence_number++); + + emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); + insn = gen_call_value_osf_tlsldm (r0, tga, seq); + insn = emit_call_insn (insn); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); + + insn = get_insns (); + end_sequence (); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLSLDM_CALL); + emit_libcall_block (insn, scratch, r0, eqv); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + + if (alpha_tls_size == 64) + { + dest = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, dest, eqv)); + emit_insn (gen_adddi3 (dest, dest, scratch)); + return dest; + } + if (alpha_tls_size == 32) + { + insn = gen_rtx_HIGH (Pmode, eqv); + insn = gen_rtx_PLUS (Pmode, scratch, insn); + scratch = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, insn)); + } + return gen_rtx_LO_SUM (Pmode, scratch, eqv); + + case TLS_MODEL_INITIAL_EXEC: + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + tp = gen_reg_rtx (Pmode); + scratch = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); + + emit_insn (gen_load_tp (tp)); + emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv)); + emit_insn (gen_adddi3 (dest, tp, scratch)); + return dest; + + case TLS_MODEL_LOCAL_EXEC: + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + tp = gen_reg_rtx (Pmode); + + emit_insn (gen_load_tp (tp)); + if (alpha_tls_size == 32) + { + insn = gen_rtx_HIGH (Pmode, eqv); + insn = gen_rtx_PLUS (Pmode, tp, insn); + tp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, tp, insn)); + } + return gen_rtx_LO_SUM (Pmode, tp, eqv); + + default: + gcc_unreachable (); + } + + if (local_symbolic_operand (x, Pmode)) + { + if (small_symbolic_operand (x, Pmode)) + return x; + else + { + if (can_create_pseudo_p ()) + scratch = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, + gen_rtx_HIGH (Pmode, x))); + return gen_rtx_LO_SUM (Pmode, scratch, x); + } + } + } + + return NULL; + + split_addend: + { + HOST_WIDE_INT low, high; + + low = ((addend & 0xffff) ^ 0x8000) - 0x8000; + addend -= low; + high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; + addend -= high; + + if (addend) + x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), + (!can_create_pseudo_p () ? scratch : NULL_RTX), + 1, OPTAB_LIB_WIDEN); + if (high) + x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), + (!can_create_pseudo_p () ? scratch : NULL_RTX), + 1, OPTAB_LIB_WIDEN); + + return plus_constant (x, low); + } +} + + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. Return X or the new, valid address. */ + +static rtx +alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode); + return new_x ? new_x : x; +} + +/* Primarily this is required for TLS symbols, but given that our move + patterns *ought* to be able to handle any symbol at any time, we + should never be spilling symbolic operands to the constant pool, ever. */ + +static bool +alpha_cannot_force_const_mem (rtx x) +{ + enum rtx_code code = GET_CODE (x); + return code == SYMBOL_REF || code == LABEL_REF || code == CONST; +} + +/* We do not allow indirect calls to be optimized into sibling calls, nor + can we allow a call to a function with a different GP to be optimized + into a sibcall. */ + +static bool +alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + /* Can't do indirect tail calls, since we don't know if the target + uses the same GP. */ + if (!decl) + return false; + + /* Otherwise, we can make a tail call if the target function shares + the same GP. */ + return decl_has_samegp (decl); +} + +int +some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + /* Don't re-split. */ + if (GET_CODE (x) == LO_SUM) + return -1; + + return small_symbolic_operand (x, Pmode) != 0; +} + +static int +split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + /* Don't re-split. */ + if (GET_CODE (x) == LO_SUM) + return -1; + + if (small_symbolic_operand (x, Pmode)) + { + x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); + *px = x; + return -1; + } + + return 0; +} + +rtx +split_small_symbolic_operand (rtx x) +{ + x = copy_insn (x); + for_each_rtx (&x, split_small_symbolic_operand_1, NULL); + return x; +} + +/* Indicate that INSN cannot be duplicated. This is true for any insn + that we've marked with gpdisp relocs, since those have to stay in + 1-1 correspondence with one another. + + Technically we could copy them if we could set up a mapping from one + sequence number to another, across the set of insns to be duplicated. + This seems overly complicated and error-prone since interblock motion + from sched-ebb could move one of the pair of insns to a different block. + + Also cannot allow jsr insns to be duplicated. If they throw exceptions, + then they'll be in a different block from their ldgp. Which could lead + the bb reorder code to think that it would be ok to copy just the block + containing the call and branch to the block containing the ldgp. */ + +static bool +alpha_cannot_copy_insn_p (rtx insn) +{ + if (!reload_completed || !TARGET_EXPLICIT_RELOCS) + return false; + if (recog_memoized (insn) >= 0) + return get_attr_cannot_copy (insn); + else + return false; +} + + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and return the new rtx. */ + +rtx +alpha_legitimize_reload_address (rtx x, + enum machine_mode mode ATTRIBUTE_UNUSED, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && CONST_INT_P (XEXP (x, 1))) + { + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + /* We wish to handle large displacements off a base register by + splitting the addend across an ldah and the mem insn. This + cuts number of extra insns needed from 3 to 1. */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER + && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) + && GET_CODE (XEXP (x, 1)) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT high + = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; + + /* Check for 32-bit overflow. */ + if (high + low != val) + return NULL_RTX; + + /* Reload the high part into a base reg; leave the low part + in the mem directly. */ + x = gen_rtx_PLUS (GET_MODE (x), + gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), + GEN_INT (high)), + GEN_INT (low)); + + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + return NULL_RTX; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +alpha_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) +{ + enum machine_mode mode = GET_MODE (x); + bool float_mode_p = FLOAT_MODE_P (mode); + const struct alpha_rtx_cost_data *cost_data; + + if (!speed) + cost_data = &alpha_rtx_cost_size; + else + cost_data = &alpha_rtx_cost_data[alpha_tune]; + + switch (code) + { + case CONST_INT: + /* If this is an 8-bit constant, return zero since it can be used + nearly anywhere with no cost. If it is a valid operand for an + ADD or AND, likewise return 0 if we know it will be used in that + context. Otherwise, return 2 since it might be used there later. + All other constants take at least two insns. */ + if (INTVAL (x) >= 0 && INTVAL (x) < 256) + { + *total = 0; + return true; + } + /* FALLTHRU */ + + case CONST_DOUBLE: + if (x == CONST0_RTX (mode)) + *total = 0; + else if ((outer_code == PLUS && add_operand (x, VOIDmode)) + || (outer_code == AND && and_operand (x, VOIDmode))) + *total = 0; + else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) + *total = 2; + else + *total = COSTS_N_INSNS (2); + return true; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) + *total = COSTS_N_INSNS (outer_code != MEM); + else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) + *total = COSTS_N_INSNS (1 + (outer_code != MEM)); + else if (tls_symbolic_operand_type (x)) + /* Estimate of cost for call_pal rduniq. */ + /* ??? How many insns do we emit here? More than one... */ + *total = COSTS_N_INSNS (15); + else + /* Otherwise we do a load from the GOT. */ + *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); + return true; + + case HIGH: + /* This is effectively an add_operand. */ + *total = 2; + return true; + + case PLUS: + case MINUS: + if (float_mode_p) + *total = cost_data->fp_add; + else if (GET_CODE (XEXP (x, 0)) == MULT + && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) + { + *total = (rtx_cost (XEXP (XEXP (x, 0), 0), + (enum rtx_code) outer_code, speed) + + rtx_cost (XEXP (x, 1), + (enum rtx_code) outer_code, speed) + + COSTS_N_INSNS (1)); + return true; + } + return false; + + case MULT: + if (float_mode_p) + *total = cost_data->fp_mult; + else if (mode == DImode) + *total = cost_data->int_mult_di; + else + *total = cost_data->int_mult_si; + return false; + + case ASHIFT: + if (CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) <= 3) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ASHIFTRT: + case LSHIFTRT: + *total = cost_data->int_shift; + return false; + + case IF_THEN_ELSE: + if (float_mode_p) + *total = cost_data->fp_add; + else + *total = cost_data->int_cmov; + return false; + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (!float_mode_p) + *total = cost_data->int_div; + else if (mode == SFmode) + *total = cost_data->fp_div_sf; + else + *total = cost_data->fp_div_df; + return false; + + case MEM: + *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); + return true; + + case NEG: + if (! float_mode_p) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ABS: + if (! float_mode_p) + { + *total = COSTS_N_INSNS (1) + cost_data->int_cmov; + return false; + } + /* FALLTHRU */ + + case FLOAT: + case UNSIGNED_FLOAT: + case FIX: + case UNSIGNED_FIX: + case FLOAT_TRUNCATE: + *total = cost_data->fp_add; + return false; + + case FLOAT_EXTEND: + if (MEM_P (XEXP (x, 0))) + *total = 0; + else + *total = cost_data->fp_add; + return false; + + default: + return false; + } +} + +/* REF is an alignable memory location. Place an aligned SImode + reference into *PALIGNED_MEM and the number of bits to shift into + *PBITNUM. SCRATCH is a free register for use in reloading out + of range stack slots. */ + +void +get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) +{ + rtx base; + HOST_WIDE_INT disp, offset; + + gcc_assert (MEM_P (ref)); + + if (reload_in_progress + && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0))) + { + base = find_replacement (&XEXP (ref, 0)); + gcc_assert (memory_address_p (GET_MODE (ref), base)); + } + else + base = XEXP (ref, 0); + + if (GET_CODE (base) == PLUS) + disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); + else + disp = 0; + + /* Find the byte offset within an aligned word. If the memory itself is + claimed to be aligned, believe it. Otherwise, aligned_memory_operand + will have examined the base register and determined it is aligned, and + thus displacements from it are naturally alignable. */ + if (MEM_ALIGN (ref) >= 32) + offset = 0; + else + offset = disp & 3; + + /* The location should not cross aligned word boundary. */ + gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) + <= GET_MODE_SIZE (SImode)); + + /* Access the entire aligned word. */ + *paligned_mem = widen_memory_access (ref, SImode, -offset); + + /* Convert the byte offset within the word to a bit offset. */ + if (WORDS_BIG_ENDIAN) + offset = 32 - (GET_MODE_BITSIZE (GET_MODE (ref)) + offset * 8); + else + offset *= 8; + *pbitnum = GEN_INT (offset); +} + +/* Similar, but just get the address. Handle the two reload cases. + Add EXTRA_OFFSET to the address we return. */ + +rtx +get_unaligned_address (rtx ref) +{ + rtx base; + HOST_WIDE_INT offset = 0; + + gcc_assert (MEM_P (ref)); + + if (reload_in_progress + && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0))) + { + base = find_replacement (&XEXP (ref, 0)); + + gcc_assert (memory_address_p (GET_MODE (ref), base)); + } + else + base = XEXP (ref, 0); + + if (GET_CODE (base) == PLUS) + offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); + + return plus_constant (base, offset); +} + +/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. + X is always returned in a register. */ + +rtx +get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) +{ + if (GET_CODE (addr) == PLUS) + { + ofs += INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), + NULL_RTX, 1, OPTAB_LIB_WIDEN); +} + +/* On the Alpha, all (non-symbolic) constants except zero go into + a floating-point register via memory. Note that we cannot + return anything that is not a subset of RCLASS, and that some + symbolic constants cannot be dropped to memory. */ + +enum reg_class +alpha_preferred_reload_class(rtx x, enum reg_class rclass) +{ + /* Zero is present in any register class. */ + if (x == CONST0_RTX (GET_MODE (x))) + return rclass; + + /* These sorts of constants we can easily drop to memory. */ + if (CONST_INT_P (x) + || GET_CODE (x) == CONST_DOUBLE + || GET_CODE (x) == CONST_VECTOR) + { + if (rclass == FLOAT_REGS) + return NO_REGS; + if (rclass == ALL_REGS) + return GENERAL_REGS; + return rclass; + } + + /* All other kinds of constants should not (and in the case of HIGH + cannot) be dropped to memory -- instead we use a GENERAL_REGS + secondary reload. */ + if (CONSTANT_P (x)) + return (rclass == ALL_REGS ? GENERAL_REGS : rclass); + + return rclass; +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. */ + +static reg_class_t +alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + enum reg_class rclass = (enum reg_class) rclass_i; + + /* Loading and storing HImode or QImode values to and from memory + usually requires a scratch register. */ + if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) + { + if (any_memory_operand (x, mode)) + { + if (in_p) + { + if (!aligned_memory_operand (x, mode)) + sri->icode = direct_optab_handler (reload_in_optab, mode); + } + else + sri->icode = direct_optab_handler (reload_out_optab, mode); + return NO_REGS; + } + } + + /* We also cannot do integral arithmetic into FP regs, as might result + from register elimination into a DImode fp register. */ + if (rclass == FLOAT_REGS) + { + if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) + return GENERAL_REGS; + if (in_p && INTEGRAL_MODE_P (mode) + && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x)) + return GENERAL_REGS; + } + + return NO_REGS; +} + +/* Subfunction of the following function. Update the flags of any MEM + found in part of X. */ + +static int +alpha_set_memflags_1 (rtx *xp, void *data) +{ + rtx x = *xp, orig = (rtx) data; + + if (!MEM_P (x)) + return 0; + + MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig); + MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig); + MEM_SCALAR_P (x) = MEM_SCALAR_P (orig); + MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig); + MEM_READONLY_P (x) = MEM_READONLY_P (orig); + + /* Sadly, we cannot use alias sets because the extra aliasing + produced by the AND interferes. Given that two-byte quantities + are the only thing we would be able to differentiate anyway, + there does not seem to be any point in convoluting the early + out of the alias check. */ + + return -1; +} + +/* Given SEQ, which is an INSN list, look for any MEMs in either + a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and + volatile flags from REF into each of the MEMs found. If REF is not + a MEM, don't do anything. */ + +void +alpha_set_memflags (rtx seq, rtx ref) +{ + rtx insn; + + if (!MEM_P (ref)) + return; + + /* This is only called from alpha.md, after having had something + generated from one of the insn patterns. So if everything is + zero, the pattern is already up-to-date. */ + if (!MEM_VOLATILE_P (ref) + && !MEM_IN_STRUCT_P (ref) + && !MEM_SCALAR_P (ref) + && !MEM_NOTRAP_P (ref) + && !MEM_READONLY_P (ref)) + return; + + for (insn = seq; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref); + else + gcc_unreachable (); +} + +static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT, + int, bool); + +/* Internal routine for alpha_emit_set_const to check for N or below insns. + If NO_OUTPUT is true, then we only check to see if N insns are possible, + and return pc_rtx if successful. */ + +static rtx +alpha_emit_set_const_1 (rtx target, enum machine_mode mode, + HOST_WIDE_INT c, int n, bool no_output) +{ + HOST_WIDE_INT new_const; + int i, bits; + /* Use a pseudo if highly optimizing and still generating RTL. */ + rtx subtarget + = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); + rtx temp, insn; + + /* If this is a sign-extended 32-bit constant, we can do this in at most + three insns, so do it if we have enough insns left. We always have + a sign-extended 32-bit constant when compiling on a narrow machine. */ + + if (HOST_BITS_PER_WIDE_INT != 64 + || c >> 31 == -1 || c >> 31 == 0) + { + HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT tmp1 = c - low; + HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT extra = 0; + + /* If HIGH will be interpreted as negative but the constant is + positive, we must adjust it to do two ldha insns. */ + + if ((high & 0x8000) != 0 && c >= 0) + { + extra = 0x4000; + tmp1 -= 0x40000000; + high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); + } + + if (c == low || (low == 0 && extra == 0)) + { + /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) + but that meant that we can't handle INT_MIN on 32-bit machines + (like NT/Alpha), because we recurse indefinitely through + emit_move_insn to gen_movdi. So instead, since we know exactly + what we want, create it explicitly. */ + + if (no_output) + return pc_rtx; + if (target == NULL) + target = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c))); + return target; + } + else if (n >= 2 + (extra != 0)) + { + if (no_output) + return pc_rtx; + if (!can_create_pseudo_p ()) + { + emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16))); + temp = target; + } + else + temp = copy_to_suggested_reg (GEN_INT (high << 16), + subtarget, mode); + + /* As of 2002-02-23, addsi3 is only available when not optimizing. + This means that if we go through expand_binop, we'll try to + generate extensions, etc, which will require new pseudos, which + will fail during some split phases. The SImode add patterns + still exist, but are not named. So build the insns by hand. */ + + if (extra != 0) + { + if (! subtarget) + subtarget = gen_reg_rtx (mode); + insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); + insn = gen_rtx_SET (VOIDmode, subtarget, insn); + emit_insn (insn); + temp = subtarget; + } + + if (target == NULL) + target = gen_reg_rtx (mode); + insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); + insn = gen_rtx_SET (VOIDmode, target, insn); + emit_insn (insn); + return target; + } + } + + /* If we couldn't do it that way, try some other methods. But if we have + no instructions left, don't bother. Likewise, if this is SImode and + we can't make pseudos, we can't do anything since the expand_binop + and expand_unop calls will widen and try to make pseudos. */ + + if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) + return 0; + + /* Next, see if we can load a related constant and then shift and possibly + negate it to get the constant we want. Try this once each increasing + numbers of insns. */ + + for (i = 1; i < n; i++) + { + /* First, see if minus some low bits, we've an easy load of + high bits. */ + + new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; + if (new_const != 0) + { + temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, add_optab, temp, GEN_INT (new_const), + target, 0, OPTAB_WIDEN); + } + } + + /* Next try complementing. */ + temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_unop (mode, one_cmpl_optab, temp, target, 0); + } + + /* Next try to form a constant and do a left shift. We can do this + if some low-order bits are zero; the exact_log2 call below tells + us that information. The bits we are shifting out could be any + value, but here we'll just try the 0- and sign-extended forms of + the constant. To try to increase the chance of having the same + constant in more than one insn, start at the highest number of + bits to shift, but try all possibilities in case a ZAPNOT will + be useful. */ + + bits = exact_log2 (c & -c); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c >> bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp && c < 0) + { + new_const = (unsigned HOST_WIDE_INT)c >> bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, + i, no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), + target, 0, OPTAB_WIDEN); + } + } + + /* Now try high-order zero bits. Here we try the shifted-in bits as + all zero and all ones. Be careful to avoid shifting outside the + mode and to avoid shifting outside the host wide int size. */ + /* On narrow hosts, don't shift a 1 into the high bit, since we'll + confuse the recursive call and set all of the high 32 bits. */ + + bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) + - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64)); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c << bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp) + { + new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); + temp = alpha_emit_set_const (subtarget, mode, new_const, + i, no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), + target, 1, OPTAB_WIDEN); + } + } + + /* Now try high-order 1 bits. We get that with a sign-extension. + But one bit isn't enough here. Be careful to avoid shifting outside + the mode and to avoid shifting outside the host wide int size. */ + + bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) + - floor_log2 (~ c) - 2); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c << bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp) + { + new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); + temp = alpha_emit_set_const (subtarget, mode, new_const, + i, no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), + target, 0, OPTAB_WIDEN); + } + } + } + +#if HOST_BITS_PER_WIDE_INT == 64 + /* Finally, see if can load a value into the target that is the same as the + constant except that all bytes that are 0 are changed to be 0xff. If we + can, then we can do a ZAPNOT to obtain the desired constant. */ + + new_const = c; + for (i = 0; i < 64; i += 8) + if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) + new_const |= (HOST_WIDE_INT) 0xff << i; + + /* We are only called for SImode and DImode. If this is SImode, ensure that + we are sign extended to a full word. */ + + if (mode == SImode) + new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; + + if (new_const != c) + { + temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const), + target, 0, OPTAB_WIDEN); + } + } +#endif + + return 0; +} + +/* Try to output insns to set TARGET equal to the constant C if it can be + done in less than N insns. Do all computations in MODE. Returns the place + where the output has been placed if it can be done and the insns have been + emitted. If it would take more than N insns, zero is returned and no + insns and emitted. */ + +static rtx +alpha_emit_set_const (rtx target, enum machine_mode mode, + HOST_WIDE_INT c, int n, bool no_output) +{ + enum machine_mode orig_mode = mode; + rtx orig_target = target; + rtx result = 0; + int i; + + /* If we can't make any pseudos, TARGET is an SImode hard register, we + can't load this constant in one insn, do this in DImode. */ + if (!can_create_pseudo_p () && mode == SImode + && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER) + { + result = alpha_emit_set_const_1 (target, mode, c, 1, no_output); + if (result) + return result; + + target = no_output ? NULL : gen_lowpart (DImode, target); + mode = DImode; + } + else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) + { + target = no_output ? NULL : gen_lowpart (DImode, target); + mode = DImode; + } + + /* Try 1 insn, then 2, then up to N. */ + for (i = 1; i <= n; i++) + { + result = alpha_emit_set_const_1 (target, mode, c, i, no_output); + if (result) + { + rtx insn, set; + + if (no_output) + return result; + + insn = get_last_insn (); + set = single_set (insn); + if (! CONSTANT_P (SET_SRC (set))) + set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); + break; + } + } + + /* Allow for the case where we changed the mode of TARGET. */ + if (result) + { + if (result == target) + result = orig_target; + else if (mode != orig_mode) + result = gen_lowpart (orig_mode, result); + } + + return result; +} + +/* Having failed to find a 3 insn sequence in alpha_emit_set_const, + fall back to a straight forward decomposition. We do this to avoid + exponential run times encountered when looking for longer sequences + with alpha_emit_set_const. */ + +static rtx +alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2) +{ + HOST_WIDE_INT d1, d2, d3, d4; + + /* Decompose the entire word */ +#if HOST_BITS_PER_WIDE_INT >= 64 + gcc_assert (c2 == -(c1 < 0)); + d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d1; + d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + c1 = (c1 - d2) >> 32; + d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d3; + d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c1 == d4); +#else + d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d1; + d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c1 == d2); + c2 += (d2 < 0); + d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000; + c2 -= d3; + d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c2 == d4); +#endif + + /* Construct the high word */ + if (d4) + { + emit_move_insn (target, GEN_INT (d4)); + if (d3) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); + } + else + emit_move_insn (target, GEN_INT (d3)); + + /* Shift it into place */ + emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); + + /* Add in the low bits. */ + if (d2) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); + if (d1) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); + + return target; +} + +/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return + the low 64 bits. */ + +static void +alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1) +{ + HOST_WIDE_INT i0, i1; + + if (GET_CODE (x) == CONST_VECTOR) + x = simplify_subreg (DImode, x, GET_MODE (x), 0); + + + if (CONST_INT_P (x)) + { + i0 = INTVAL (x); + i1 = -(i0 < 0); + } + else if (HOST_BITS_PER_WIDE_INT >= 64) + { + i0 = CONST_DOUBLE_LOW (x); + i1 = -(i0 < 0); + } + else + { + i0 = CONST_DOUBLE_LOW (x); + i1 = CONST_DOUBLE_HIGH (x); + } + + *p0 = i0; + *p1 = i1; +} + +/* Implement LEGITIMATE_CONSTANT_P. This is all constants for which we + are willing to load the value into a register via a move pattern. + Normally this is all symbolic constants, integral constants that + take three or fewer instructions, and floating-point zero. */ + +bool +alpha_legitimate_constant_p (rtx x) +{ + enum machine_mode mode = GET_MODE (x); + HOST_WIDE_INT i0, i1; + + switch (GET_CODE (x)) + { + case LABEL_REF: + case HIGH: + return true; + + case CONST: + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) + x = XEXP (XEXP (x, 0), 0); + else + return true; + + if (GET_CODE (x) != SYMBOL_REF) + return true; + + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS symbols are never valid. */ + return SYMBOL_REF_TLS_MODEL (x) == 0; + + case CONST_DOUBLE: + if (x == CONST0_RTX (mode)) + return true; + if (FLOAT_MODE_P (mode)) + return false; + goto do_integer; + + case CONST_VECTOR: + if (x == CONST0_RTX (mode)) + return true; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return false; + if (GET_MODE_SIZE (mode) != 8) + return false; + goto do_integer; + + case CONST_INT: + do_integer: + if (TARGET_BUILD_CONSTANTS) + return true; + alpha_extract_integer (x, &i0, &i1); + if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0)) + return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL; + return false; + + default: + return false; + } +} + +/* Operand 1 is known to be a constant, and should require more than one + instruction to load. Emit that multi-part load. */ + +bool +alpha_split_const_mov (enum machine_mode mode, rtx *operands) +{ + HOST_WIDE_INT i0, i1; + rtx temp = NULL_RTX; + + alpha_extract_integer (operands[1], &i0, &i1); + + if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0)) + temp = alpha_emit_set_const (operands[0], mode, i0, 3, false); + + if (!temp && TARGET_BUILD_CONSTANTS) + temp = alpha_emit_set_long_const (operands[0], i0, i1); + + if (temp) + { + if (!rtx_equal_p (operands[0], temp)) + emit_move_insn (operands[0], temp); + return true; + } + + return false; +} + +/* Expand a move instruction; return true if all work is done. + We don't handle non-bwx subword loads here. */ + +bool +alpha_expand_mov (enum machine_mode mode, rtx *operands) +{ + rtx tmp; + + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + + /* Allow legitimize_address to perform some simplifications. */ + if (mode == Pmode && symbolic_operand (operands[1], mode)) + { + tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode); + if (tmp) + { + if (tmp == operands[0]) + return true; + operands[1] = tmp; + return false; + } + } + + /* Early out for non-constants and valid constants. */ + if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) + return false; + + /* Split large integers. */ + if (CONST_INT_P (operands[1]) + || GET_CODE (operands[1]) == CONST_DOUBLE + || GET_CODE (operands[1]) == CONST_VECTOR) + { + if (alpha_split_const_mov (mode, operands)) + return true; + } + + /* Otherwise we've nothing left but to drop the thing to memory. */ + tmp = force_const_mem (mode, operands[1]); + + if (tmp == NULL_RTX) + return false; + + if (reload_in_progress) + { + emit_move_insn (operands[0], XEXP (tmp, 0)); + operands[1] = replace_equiv_address (tmp, operands[0]); + } + else + operands[1] = validize_mem (tmp); + return false; +} + +/* Expand a non-bwx QImode or HImode move instruction; + return true if all work is done. */ + +bool +alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands) +{ + rtx seq; + + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + + /* Handle four memory cases, unaligned and aligned for either the input + or the output. The only case where we can be called during reload is + for aligned loads; all other cases require temporaries. */ + + if (any_memory_operand (operands[1], mode)) + { + if (aligned_memory_operand (operands[1], mode)) + { + if (reload_in_progress) + { + if (mode == QImode) + seq = gen_reload_inqi_aligned (operands[0], operands[1]); + else + seq = gen_reload_inhi_aligned (operands[0], operands[1]); + emit_insn (seq); + } + else + { + rtx aligned_mem, bitnum; + rtx scratch = gen_reg_rtx (SImode); + rtx subtarget; + bool copyout; + + get_aligned_mem (operands[1], &aligned_mem, &bitnum); + + subtarget = operands[0]; + if (REG_P (subtarget)) + subtarget = gen_lowpart (DImode, subtarget), copyout = false; + else + subtarget = gen_reg_rtx (DImode), copyout = true; + + if (mode == QImode) + seq = gen_aligned_loadqi (subtarget, aligned_mem, + bitnum, scratch); + else + seq = gen_aligned_loadhi (subtarget, aligned_mem, + bitnum, scratch); + emit_insn (seq); + + if (copyout) + emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); + } + } + else + { + /* Don't pass these as parameters since that makes the generated + code depend on parameter evaluation order which will cause + bootstrap failures. */ + + rtx temp1, temp2, subtarget, ua; + bool copyout; + + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + + subtarget = operands[0]; + if (REG_P (subtarget)) + subtarget = gen_lowpart (DImode, subtarget), copyout = false; + else + subtarget = gen_reg_rtx (DImode), copyout = true; + + ua = get_unaligned_address (operands[1]); + if (mode == QImode) + seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); + else + seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); + + alpha_set_memflags (seq, operands[1]); + emit_insn (seq); + + if (copyout) + emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); + } + return true; + } + + if (any_memory_operand (operands[0], mode)) + { + if (aligned_memory_operand (operands[0], mode)) + { + rtx aligned_mem, bitnum; + rtx temp1 = gen_reg_rtx (SImode); + rtx temp2 = gen_reg_rtx (SImode); + + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + + emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, + temp1, temp2)); + } + else + { + rtx temp1 = gen_reg_rtx (DImode); + rtx temp2 = gen_reg_rtx (DImode); + rtx temp3 = gen_reg_rtx (DImode); + rtx ua = get_unaligned_address (operands[0]); + + if (mode == QImode) + seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3); + else + seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3); + + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + } + return true; + } + + return false; +} + +/* Implement the movmisalign patterns. One of the operands is a memory + that is not naturally aligned. Emit instructions to load it. */ + +void +alpha_expand_movmisalign (enum machine_mode mode, rtx *operands) +{ + /* Honor misaligned loads, for those we promised to do so. */ + if (MEM_P (operands[1])) + { + rtx tmp; + + if (register_operand (operands[0], mode)) + tmp = operands[0]; + else + tmp = gen_reg_rtx (mode); + + alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0); + if (tmp != operands[0]) + emit_move_insn (operands[0], tmp); + } + else if (MEM_P (operands[0])) + { + if (!reg_or_0_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); + } + else + gcc_unreachable (); +} + +/* Generate an unsigned DImode to FP conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. + + For SFmode, this is the only construction I've found that can pass + gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode + intermediates will work, because you'll get intermediate rounding + that ruins the end result. Some of this could be fixed by turning + on round-to-positive-infinity, but that requires diddling the fpsr, + which kills performance. I tried turning this around and converting + to a negative number, so that I could turn on /m, but either I did + it wrong or there's something else cause I wound up with the exact + same single-bit error. There is a branch-less form of this same code: + + srl $16,1,$1 + and $16,1,$2 + cmplt $16,0,$3 + or $1,$2,$2 + cmovge $16,$16,$2 + itoft $3,$f10 + itoft $2,$f11 + cvtqs $f11,$f11 + adds $f11,$f11,$f0 + fcmoveq $f10,$f11,$f0 + + I'm not using it because it's the same number of instructions as + this branch-full form, and it has more serialized long latency + instructions on the critical path. + + For DFmode, we can avoid rounding errors by breaking up the word + into two pieces, converting them separately, and adding them back: + + LC0: .long 0,0x5f800000 + + itoft $16,$f11 + lda $2,LC0 + cmplt $16,0,$1 + cpyse $f11,$f31,$f10 + cpyse $f31,$f11,$f11 + s4addq $1,$2,$1 + lds $f12,0($1) + cvtqt $f10,$f10 + cvtqt $f11,$f11 + addt $f12,$f10,$f0 + addt $f0,$f11,$f0 + + This doesn't seem to be a clear-cut win over the optabs form. + It probably all depends on the distribution of numbers being + converted -- in the optabs form, all but high-bit-set has a + much lower minimum execution time. */ + +void +alpha_emit_floatuns (rtx operands[2]) +{ + rtx neglab, donelab, i0, i1, f0, in, out; + enum machine_mode mode; + + out = operands[0]; + in = force_reg (DImode, operands[1]); + mode = GET_MODE (out); + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + i0 = gen_reg_rtx (DImode); + i1 = gen_reg_rtx (DImode); + f0 = gen_reg_rtx (mode); + + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); + + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); + emit_insn (gen_anddi3 (i1, in, const1_rtx)); + emit_insn (gen_iordi3 (i0, i0, i1)); + emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); + + emit_label (donelab); +} + +/* Generate the comparison for a conditional branch. */ + +void +alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode) +{ + enum rtx_code cmp_code, branch_code; + enum machine_mode branch_mode = VOIDmode; + enum rtx_code code = GET_CODE (operands[0]); + rtx op0 = operands[1], op1 = operands[2]; + rtx tem; + + if (cmp_mode == TFmode) + { + op0 = alpha_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + /* The general case: fold the comparison code to the types of compares + that we have, choosing the branch as necessary. */ + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares. */ + cmp_code = code, branch_code = NE; + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + cmp_code = reverse_condition (code), branch_code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* For FP, we swap them, for INT, we reverse them. */ + if (cmp_mode == DFmode) + { + cmp_code = swap_condition (code); + branch_code = NE; + tem = op0, op0 = op1, op1 = tem; + } + else + { + cmp_code = reverse_condition (code); + branch_code = EQ; + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DFmode) + { + if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) + { + /* When we are not as concerned about non-finite values, and we + are comparing against zero, we can branch directly. */ + if (op1 == CONST0_RTX (DFmode)) + cmp_code = UNKNOWN, branch_code = code; + else if (op0 == CONST0_RTX (DFmode)) + { + /* Undo the swap we probably did just above. */ + tem = op0, op0 = op1, op1 = tem; + branch_code = swap_condition (cmp_code); + cmp_code = UNKNOWN; + } + } + else + { + /* ??? We mark the branch mode to be CCmode to prevent the + compare and branch from being combined, since the compare + insn follows IEEE rules that the branch does not. */ + branch_mode = CCmode; + } + } + else + { + /* The following optimizations are only for signed compares. */ + if (code != LEU && code != LTU && code != GEU && code != GTU) + { + /* Whee. Compare and branch against 0 directly. */ + if (op1 == const0_rtx) + cmp_code = UNKNOWN, branch_code = code; + + /* If the constants doesn't fit into an immediate, but can + be generated by lda/ldah, we adjust the argument and + compare against zero, so we can use beq/bne directly. */ + /* ??? Don't do this when comparing against symbols, otherwise + we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will + be declared false out of hand (at least for non-weak). */ + else if (CONST_INT_P (op1) + && (code == EQ || code == NE) + && !(symbolic_operand (op0, VOIDmode) + || (REG_P (op0) && REG_POINTER (op0)))) + { + rtx n_op1 = GEN_INT (-INTVAL (op1)); + + if (! satisfies_constraint_I (op1) + && (satisfies_constraint_K (n_op1) + || satisfies_constraint_L (n_op1))) + cmp_code = PLUS, branch_code = code, op1 = n_op1; + } + } + + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* Emit an initial compare instruction, if necessary. */ + tem = op0; + if (cmp_code != UNKNOWN) + { + tem = gen_reg_rtx (cmp_mode); + emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); + } + + /* Emit the branch instruction. */ + tem = gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (branch_code, + branch_mode, tem, + CONST0_RTX (cmp_mode)), + gen_rtx_LABEL_REF (VOIDmode, + operands[3]), + pc_rtx)); + emit_jump_insn (tem); +} + +/* Certain simplifications can be done to make invalid setcc operations + valid. Return the final comparison, or NULL if we can't work. */ + +bool +alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode) +{ + enum rtx_code cmp_code; + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = operands[2], op1 = operands[3]; + rtx tmp; + + if (cmp_mode == TFmode) + { + op0 = alpha_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + if (cmp_mode == DFmode && !TARGET_FIX) + return 0; + + /* The general case: fold the comparison code to the types of compares + that we have, choosing the branch as necessary. */ + + cmp_code = UNKNOWN; + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares. */ + if (cmp_mode == DFmode) + cmp_code = code, code = NE; + break; + + case NE: + if (cmp_mode == DImode && op1 == const0_rtx) + break; + /* FALLTHRU */ + + case ORDERED: + cmp_code = reverse_condition (code); + code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + if (cmp_mode == DFmode) + cmp_code = code, code = NE; + tmp = op0, op0 = op1, op1 = tmp; + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!register_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* Emit an initial compare instruction, if necessary. */ + if (cmp_code != UNKNOWN) + { + tmp = gen_reg_rtx (cmp_mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1))); + + op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; + op1 = const0_rtx; + } + + /* Emit the setcc instruction. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (code, DImode, op0, op1))); + return true; +} + + +/* Rewrite a comparison against zero CMP of the form + (CODE (cc0) (const_int 0)) so it can be written validly in + a conditional move (if_then_else CMP ...). + If both of the operands that set cc0 are nonzero we must emit + an insn to perform the compare (it can't be done within + the conditional move). */ + +rtx +alpha_emit_conditional_move (rtx cmp, enum machine_mode mode) +{ + enum rtx_code code = GET_CODE (cmp); + enum rtx_code cmov_code = NE; + rtx op0 = XEXP (cmp, 0); + rtx op1 = XEXP (cmp, 1); + enum machine_mode cmp_mode + = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); + enum machine_mode cmov_mode = VOIDmode; + int local_fast_math = flag_unsafe_math_optimizations; + rtx tem; + + if (cmp_mode == TFmode) + { + op0 = alpha_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + gcc_assert (cmp_mode == DFmode || cmp_mode == DImode); + + if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) + { + enum rtx_code cmp_code; + + if (! TARGET_FIX) + return 0; + + /* If we have fp<->int register move instructions, do a cmov by + performing the comparison in fp registers, and move the + zero/nonzero value to integer registers, where we can then + use a normal cmov, or vice-versa. */ + + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares. */ + cmp_code = code, code = NE; + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + cmp_code = reverse_condition (code), code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + cmp_code = code, code = NE; + else + { + cmp_code = swap_condition (code); + code = NE; + tem = op0, op0 = op1, op1 = tem; + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + tem = gen_reg_rtx (cmp_mode); + emit_insn (gen_rtx_SET (VOIDmode, tem, + gen_rtx_fmt_ee (cmp_code, cmp_mode, + op0, op1))); + + cmp_mode = cmp_mode == DImode ? DFmode : DImode; + op0 = gen_lowpart (cmp_mode, tem); + op1 = CONST0_RTX (cmp_mode); + local_fast_math = 1; + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* We may be able to use a conditional move directly. + This avoids emitting spurious compares. */ + if (signed_comparison_operator (cmp, VOIDmode) + && (cmp_mode == DImode || local_fast_math) + && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) + return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + + /* We can't put the comparison inside the conditional move; + emit a compare instruction and put that inside the + conditional move. Make sure we emit only comparisons we have; + swap or reverse as necessary. */ + + if (!can_create_pseudo_p ()) + return NULL_RTX; + + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares: */ + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + code = reverse_condition (code); + cmov_code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* These must be swapped. */ + if (op1 != CONST0_RTX (cmp_mode)) + { + code = swap_condition (code); + tem = op0, op0 = op1, op1 = tem; + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* ??? We mark the branch mode to be CCmode to prevent the compare + and cmov from being combined, since the compare insn follows IEEE + rules that the cmov does not. */ + if (cmp_mode == DFmode && !local_fast_math) + cmov_mode = CCmode; + + tem = gen_reg_rtx (cmp_mode); + emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); + return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); +} + +/* Simplify a conditional move of two constants into a setcc with + arithmetic. This is done with a splitter since combine would + just undo the work if done during code generation. It also catches + cases we wouldn't have before cse. */ + +int +alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, + rtx t_rtx, rtx f_rtx) +{ + HOST_WIDE_INT t, f, diff; + enum machine_mode mode; + rtx target, subtarget, tmp; + + mode = GET_MODE (dest); + t = INTVAL (t_rtx); + f = INTVAL (f_rtx); + diff = t - f; + + if (((code == NE || code == EQ) && diff < 0) + || (code == GE || code == GT)) + { + code = reverse_condition (code); + diff = t, t = f, f = diff; + diff = t - f; + } + + subtarget = target = dest; + if (mode != DImode) + { + target = gen_lowpart (DImode, dest); + if (can_create_pseudo_p ()) + subtarget = gen_reg_rtx (DImode); + else + subtarget = target; + } + /* Below, we must be careful to use copy_rtx on target and subtarget + in intermediate insns, as they may be a subreg rtx, which may not + be shared. */ + + if (f == 0 && exact_log2 (diff) > 0 + /* On EV6, we've got enough shifters to make non-arithmetic shifts + viable over a longer latency cmove. On EV5, the E0 slot is a + scarce resource, and on EV4 shift has the same latency as a cmove. */ + && (diff <= 8 || alpha_tune == PROCESSOR_EV6)) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); + + tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), + GEN_INT (exact_log2 (t))); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else if (f == 0 && t == -1) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); + + emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); + } + else if (diff == 1 || diff == 4 || diff == 8) + { + rtx add_op; + + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); + + if (diff == 1) + emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); + else + { + add_op = GEN_INT (f); + if (sext_add_operand (add_op, mode)) + { + tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), + GEN_INT (diff)); + tmp = gen_rtx_PLUS (DImode, tmp, add_op); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else + return 0; + } + } + else + return 0; + + return 1; +} + +/* Look up the function X_floating library function name for the + given operation. */ + +struct GTY(()) xfloating_op +{ + const enum rtx_code code; + const char *const GTY((skip)) osf_func; + const char *const GTY((skip)) vms_func; + rtx libcall; +}; + +static GTY(()) struct xfloating_op xfloating_ops[] = +{ + { PLUS, "_OtsAddX", "OTS$ADD_X", 0 }, + { MINUS, "_OtsSubX", "OTS$SUB_X", 0 }, + { MULT, "_OtsMulX", "OTS$MUL_X", 0 }, + { DIV, "_OtsDivX", "OTS$DIV_X", 0 }, + { EQ, "_OtsEqlX", "OTS$EQL_X", 0 }, + { NE, "_OtsNeqX", "OTS$NEQ_X", 0 }, + { LT, "_OtsLssX", "OTS$LSS_X", 0 }, + { LE, "_OtsLeqX", "OTS$LEQ_X", 0 }, + { GT, "_OtsGtrX", "OTS$GTR_X", 0 }, + { GE, "_OtsGeqX", "OTS$GEQ_X", 0 }, + { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 }, + { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 }, + { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 }, + { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 }, + { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 } +}; + +static GTY(()) struct xfloating_op vax_cvt_ops[] = +{ + { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 }, + { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 } +}; + +static rtx +alpha_lookup_xfloating_lib_func (enum rtx_code code) +{ + struct xfloating_op *ops = xfloating_ops; + long n = ARRAY_SIZE (xfloating_ops); + long i; + + gcc_assert (TARGET_HAS_XFLOATING_LIBS); + + /* How irritating. Nothing to key off for the main table. */ + if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) + { + ops = vax_cvt_ops; + n = ARRAY_SIZE (vax_cvt_ops); + } + + for (i = 0; i < n; ++i, ++ops) + if (ops->code == code) + { + rtx func = ops->libcall; + if (!func) + { + func = init_one_libfunc (TARGET_ABI_OPEN_VMS + ? ops->vms_func : ops->osf_func); + ops->libcall = func; + } + return func; + } + + gcc_unreachable (); +} + +/* Most X_floating operations take the rounding mode as an argument. + Compute that here. */ + +static int +alpha_compute_xfloating_mode_arg (enum rtx_code code, + enum alpha_fp_rounding_mode round) +{ + int mode; + + switch (round) + { + case ALPHA_FPRM_NORM: + mode = 2; + break; + case ALPHA_FPRM_MINF: + mode = 1; + break; + case ALPHA_FPRM_CHOP: + mode = 0; + break; + case ALPHA_FPRM_DYN: + mode = 4; + break; + default: + gcc_unreachable (); + + /* XXX For reference, round to +inf is mode = 3. */ + } + + if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N) + mode |= 0x10000; + + return mode; +} + +/* Emit an X_floating library function call. + + Note that these functions do not follow normal calling conventions: + TFmode arguments are passed in two integer registers (as opposed to + indirect); TFmode return values appear in R16+R17. + + FUNC is the function to call. + TARGET is where the output belongs. + OPERANDS are the inputs. + NOPERANDS is the count of inputs. + EQUIV is the expression equivalent for the function. +*/ + +static void +alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], + int noperands, rtx equiv) +{ + rtx usage = NULL_RTX, tmp, reg; + int regno = 16, i; + + start_sequence (); + + for (i = 0; i < noperands; ++i) + { + switch (GET_MODE (operands[i])) + { + case TFmode: + reg = gen_rtx_REG (TFmode, regno); + regno += 2; + break; + + case DFmode: + reg = gen_rtx_REG (DFmode, regno + 32); + regno += 1; + break; + + case VOIDmode: + gcc_assert (CONST_INT_P (operands[i])); + /* FALLTHRU */ + case DImode: + reg = gen_rtx_REG (DImode, regno); + regno += 1; + break; + + default: + gcc_unreachable (); + } + + emit_move_insn (reg, operands[i]); + usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage); + } + + switch (GET_MODE (target)) + { + case TFmode: + reg = gen_rtx_REG (TFmode, 16); + break; + case DFmode: + reg = gen_rtx_REG (DFmode, 32); + break; + case DImode: + reg = gen_rtx_REG (DImode, 0); + break; + default: + gcc_unreachable (); + } + + tmp = gen_rtx_MEM (QImode, func); + tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx, + const0_rtx, const0_rtx)); + CALL_INSN_FUNCTION_USAGE (tmp) = usage; + RTL_CONST_CALL_P (tmp) = 1; + + tmp = get_insns (); + end_sequence (); + + emit_libcall_block (tmp, target, reg, equiv); +} + +/* Emit an X_floating library function call for arithmetic (+,-,*,/). */ + +void +alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[]) +{ + rtx func; + int mode; + rtx out_operands[3]; + + func = alpha_lookup_xfloating_lib_func (code); + mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); + + out_operands[0] = operands[1]; + out_operands[1] = operands[2]; + out_operands[2] = GEN_INT (mode); + alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3, + gen_rtx_fmt_ee (code, TFmode, operands[1], + operands[2])); +} + +/* Emit an X_floating library function call for a comparison. */ + +static rtx +alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) +{ + enum rtx_code cmp_code, res_code; + rtx func, out, operands[2], note; + + /* X_floating library comparison functions return + -1 unordered + 0 false + 1 true + Convert the compare against the raw return value. */ + + cmp_code = *pcode; + switch (cmp_code) + { + case UNORDERED: + cmp_code = EQ; + res_code = LT; + break; + case ORDERED: + cmp_code = EQ; + res_code = GE; + break; + case NE: + res_code = NE; + break; + case EQ: + case LT: + case GT: + case LE: + case GE: + res_code = GT; + break; + default: + gcc_unreachable (); + } + *pcode = res_code; + + func = alpha_lookup_xfloating_lib_func (cmp_code); + + operands[0] = op0; + operands[1] = op1; + out = gen_reg_rtx (DImode); + + /* What's actually returned is -1,0,1, not a proper boolean value, + so use an EXPR_LIST as with a generic libcall instead of a + comparison type expression. */ + note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX); + note = gen_rtx_EXPR_LIST (VOIDmode, op0, note); + note = gen_rtx_EXPR_LIST (VOIDmode, func, note); + alpha_emit_xfloating_libcall (func, out, operands, 2, note); + + return out; +} + +/* Emit an X_floating library function call for a conversion. */ + +void +alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[]) +{ + int noperands = 1, mode; + rtx out_operands[2]; + rtx func; + enum rtx_code code = orig_code; + + if (code == UNSIGNED_FIX) + code = FIX; + + func = alpha_lookup_xfloating_lib_func (code); + + out_operands[0] = operands[1]; + + switch (code) + { + case FIX: + mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP); + out_operands[1] = GEN_INT (mode); + noperands = 2; + break; + case FLOAT_TRUNCATE: + mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); + out_operands[1] = GEN_INT (mode); + noperands = 2; + break; + default: + break; + } + + alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands, + gen_rtx_fmt_e (orig_code, + GET_MODE (operands[0]), + operands[1])); +} + +/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of + DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true, + guarantee that the sequence + set (OP[0] OP[2]) + set (OP[1] OP[3]) + is valid. Naturally, output operand ordering is little-endian. + This is used by *movtf_internal and *movti_internal. */ + +void +alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode, + bool fixup_overlap) +{ + switch (GET_CODE (operands[1])) + { + case REG: + operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[1])); + break; + + case MEM: + operands[3] = adjust_address (operands[1], DImode, 8); + operands[2] = adjust_address (operands[1], DImode, 0); + break; + + case CONST_INT: + case CONST_DOUBLE: + gcc_assert (operands[1] == CONST0_RTX (mode)); + operands[2] = operands[3] = const0_rtx; + break; + + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[0])) + { + case REG: + operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + break; + + case MEM: + operands[1] = adjust_address (operands[0], DImode, 8); + operands[0] = adjust_address (operands[0], DImode, 0); + break; + + default: + gcc_unreachable (); + } + + if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3])) + { + rtx tmp; + tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; + tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; + } +} + +/* Implement negtf2 or abstf2. Op0 is destination, op1 is source, + op2 is a register containing the sign bit, operation is the + logical operation to be performed. */ + +void +alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx)) +{ + rtx high_bit = operands[2]; + rtx scratch; + int move; + + alpha_split_tmode_pair (operands, TFmode, false); + + /* Detect three flavors of operand overlap. */ + move = 1; + if (rtx_equal_p (operands[0], operands[2])) + move = 0; + else if (rtx_equal_p (operands[1], operands[2])) + { + if (rtx_equal_p (operands[0], high_bit)) + move = 2; + else + move = -1; + } + + if (move < 0) + emit_move_insn (operands[0], operands[2]); + + /* ??? If the destination overlaps both source tf and high_bit, then + assume source tf is dead in its entirety and use the other half + for a scratch register. Otherwise "scratch" is just the proper + destination register. */ + scratch = operands[move < 2 ? 1 : 3]; + + emit_insn ((*operation) (scratch, high_bit, operands[3])); + + if (move > 0) + { + emit_move_insn (operands[0], operands[2]); + if (move > 1) + emit_move_insn (operands[1], scratch); + } +} + +/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting + unaligned data: + + unsigned: signed: + word: ldq_u r1,X(r11) ldq_u r1,X(r11) + ldq_u r2,X+1(r11) ldq_u r2,X+1(r11) + lda r3,X(r11) lda r3,X+2(r11) + extwl r1,r3,r1 extql r1,r3,r1 + extwh r2,r3,r2 extqh r2,r3,r2 + or r1.r2.r1 or r1,r2,r1 + sra r1,48,r1 + + long: ldq_u r1,X(r11) ldq_u r1,X(r11) + ldq_u r2,X+3(r11) ldq_u r2,X+3(r11) + lda r3,X(r11) lda r3,X(r11) + extll r1,r3,r1 extll r1,r3,r1 + extlh r2,r3,r2 extlh r2,r3,r2 + or r1.r2.r1 addl r1,r2,r1 + + quad: ldq_u r1,X(r11) + ldq_u r2,X+7(r11) + lda r3,X(r11) + extql r1,r3,r1 + extqh r2,r3,r2 + or r1.r2.r1 +*/ + +void +alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size, + HOST_WIDE_INT ofs, int sign) +{ + rtx meml, memh, addr, extl, exth, tmp, mema; + enum machine_mode mode; + + if (TARGET_BWX && size == 2) + { + meml = adjust_address (mem, QImode, ofs); + memh = adjust_address (mem, QImode, ofs+1); + if (BYTES_BIG_ENDIAN) + tmp = meml, meml = memh, memh = tmp; + extl = gen_reg_rtx (DImode); + exth = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendqidi2 (extl, meml)); + emit_insn (gen_zero_extendqidi2 (exth, memh)); + exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), + NULL, 1, OPTAB_LIB_WIDEN); + addr = expand_simple_binop (DImode, IOR, extl, exth, + NULL, 1, OPTAB_LIB_WIDEN); + + if (sign && GET_MODE (tgt) != HImode) + { + addr = gen_lowpart (HImode, addr); + emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0)); + } + else + { + if (GET_MODE (tgt) != DImode) + addr = gen_lowpart (GET_MODE (tgt), addr); + emit_move_insn (tgt, addr); + } + return; + } + + meml = gen_reg_rtx (DImode); + memh = gen_reg_rtx (DImode); + addr = gen_reg_rtx (DImode); + extl = gen_reg_rtx (DImode); + exth = gen_reg_rtx (DImode); + + mema = XEXP (mem, 0); + if (GET_CODE (mema) == LO_SUM) + mema = force_reg (Pmode, mema); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + + tmp = change_address (mem, DImode, + gen_rtx_AND (DImode, + plus_constant (mema, ofs), + GEN_INT (-8))); + set_mem_alias_set (tmp, 0); + emit_move_insn (meml, tmp); + + tmp = change_address (mem, DImode, + gen_rtx_AND (DImode, + plus_constant (mema, ofs + size - 1), + GEN_INT (-8))); + set_mem_alias_set (tmp, 0); + emit_move_insn (memh, tmp); + + if (WORDS_BIG_ENDIAN && sign && (size == 2 || size == 4)) + { + emit_move_insn (addr, plus_constant (mema, -1)); + + emit_insn (gen_extqh_be (extl, meml, addr)); + emit_insn (gen_extxl_be (exth, memh, GEN_INT (64), addr)); + + addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); + addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (64 - size*8), + addr, 1, OPTAB_WIDEN); + } + else if (sign && size == 2) + { + emit_move_insn (addr, plus_constant (mema, ofs+2)); + + emit_insn (gen_extxl_le (extl, meml, GEN_INT (64), addr)); + emit_insn (gen_extqh_le (exth, memh, addr)); + + /* We must use tgt here for the target. Alpha-vms port fails if we use + addr for the target, because addr is marked as a pointer and combine + knows that pointers are always sign-extended 32-bit values. */ + addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); + addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48), + addr, 1, OPTAB_WIDEN); + } + else + { + if (WORDS_BIG_ENDIAN) + { + emit_move_insn (addr, plus_constant (mema, ofs+size-1)); + switch ((int) size) + { + case 2: + emit_insn (gen_extwh_be (extl, meml, addr)); + mode = HImode; + break; + + case 4: + emit_insn (gen_extlh_be (extl, meml, addr)); + mode = SImode; + break; + + case 8: + emit_insn (gen_extqh_be (extl, meml, addr)); + mode = DImode; + break; + + default: + gcc_unreachable (); + } + emit_insn (gen_extxl_be (exth, memh, GEN_INT (size*8), addr)); + } + else + { + emit_move_insn (addr, plus_constant (mema, ofs)); + emit_insn (gen_extxl_le (extl, meml, GEN_INT (size*8), addr)); + switch ((int) size) + { + case 2: + emit_insn (gen_extwh_le (exth, memh, addr)); + mode = HImode; + break; + + case 4: + emit_insn (gen_extlh_le (exth, memh, addr)); + mode = SImode; + break; + + case 8: + emit_insn (gen_extqh_le (exth, memh, addr)); + mode = DImode; + break; + + default: + gcc_unreachable (); + } + } + + addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl), + gen_lowpart (mode, exth), gen_lowpart (mode, tgt), + sign, OPTAB_WIDEN); + } + + if (addr != tgt) + emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr)); +} + +/* Similarly, use ins and msk instructions to perform unaligned stores. */ + +void +alpha_expand_unaligned_store (rtx dst, rtx src, + HOST_WIDE_INT size, HOST_WIDE_INT ofs) +{ + rtx dstl, dsth, addr, insl, insh, meml, memh, dsta; + + if (TARGET_BWX && size == 2) + { + if (src != const0_rtx) + { + dstl = gen_lowpart (QImode, src); + dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), + NULL, 1, OPTAB_LIB_WIDEN); + dsth = gen_lowpart (QImode, dsth); + } + else + dstl = dsth = const0_rtx; + + meml = adjust_address (dst, QImode, ofs); + memh = adjust_address (dst, QImode, ofs+1); + if (BYTES_BIG_ENDIAN) + addr = meml, meml = memh, memh = addr; + + emit_move_insn (meml, dstl); + emit_move_insn (memh, dsth); + return; + } + + dstl = gen_reg_rtx (DImode); + dsth = gen_reg_rtx (DImode); + insl = gen_reg_rtx (DImode); + insh = gen_reg_rtx (DImode); + + dsta = XEXP (dst, 0); + if (GET_CODE (dsta) == LO_SUM) + dsta = force_reg (Pmode, dsta); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + + meml = change_address (dst, DImode, + gen_rtx_AND (DImode, + plus_constant (dsta, ofs), + GEN_INT (-8))); + set_mem_alias_set (meml, 0); + + memh = change_address (dst, DImode, + gen_rtx_AND (DImode, + plus_constant (dsta, ofs + size - 1), + GEN_INT (-8))); + set_mem_alias_set (memh, 0); + + emit_move_insn (dsth, memh); + emit_move_insn (dstl, meml); + if (WORDS_BIG_ENDIAN) + { + addr = copy_addr_to_reg (plus_constant (dsta, ofs+size-1)); + + if (src != const0_rtx) + { + switch ((int) size) + { + case 2: + emit_insn (gen_inswl_be (insh, gen_lowpart (HImode,src), addr)); + break; + case 4: + emit_insn (gen_insll_be (insh, gen_lowpart (SImode,src), addr)); + break; + case 8: + emit_insn (gen_insql_be (insh, gen_lowpart (DImode,src), addr)); + break; + } + emit_insn (gen_insxh (insl, gen_lowpart (DImode, src), + GEN_INT (size*8), addr)); + } + + switch ((int) size) + { + case 2: + emit_insn (gen_mskxl_be (dsth, dsth, GEN_INT (0xffff), addr)); + break; + case 4: + { + rtx msk = immed_double_const (0xffffffff, 0, DImode); + emit_insn (gen_mskxl_be (dsth, dsth, msk, addr)); + break; + } + case 8: + emit_insn (gen_mskxl_be (dsth, dsth, constm1_rtx, addr)); + break; + } + + emit_insn (gen_mskxh (dstl, dstl, GEN_INT (size*8), addr)); + } + else + { + addr = copy_addr_to_reg (plus_constant (dsta, ofs)); + + if (src != CONST0_RTX (GET_MODE (src))) + { + emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), + GEN_INT (size*8), addr)); + + switch ((int) size) + { + case 2: + emit_insn (gen_inswl_le (insl, gen_lowpart (HImode, src), addr)); + break; + case 4: + emit_insn (gen_insll_le (insl, gen_lowpart (SImode, src), addr)); + break; + case 8: + emit_insn (gen_insql_le (insl, gen_lowpart (DImode, src), addr)); + break; + } + } + + emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr)); + + switch ((int) size) + { + case 2: + emit_insn (gen_mskxl_le (dstl, dstl, GEN_INT (0xffff), addr)); + break; + case 4: + { + rtx msk = immed_double_const (0xffffffff, 0, DImode); + emit_insn (gen_mskxl_le (dstl, dstl, msk, addr)); + break; + } + case 8: + emit_insn (gen_mskxl_le (dstl, dstl, constm1_rtx, addr)); + break; + } + } + + if (src != CONST0_RTX (GET_MODE (src))) + { + dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN); + dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN); + } + + if (WORDS_BIG_ENDIAN) + { + emit_move_insn (meml, dstl); + emit_move_insn (memh, dsth); + } + else + { + /* Must store high before low for degenerate case of aligned. */ + emit_move_insn (memh, dsth); + emit_move_insn (meml, dstl); + } +} + +/* The block move code tries to maximize speed by separating loads and + stores at the expense of register pressure: we load all of the data + before we store it back out. There are two secondary effects worth + mentioning, that this speeds copying to/from aligned and unaligned + buffers, and that it makes the code significantly easier to write. */ + +#define MAX_MOVE_WORDS 8 + +/* Load an integral number of consecutive unaligned quadwords. */ + +static void +alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem, + HOST_WIDE_INT words, HOST_WIDE_INT ofs) +{ + rtx const im8 = GEN_INT (-8); + rtx const i64 = GEN_INT (64); + rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1]; + rtx sreg, areg, tmp, smema; + HOST_WIDE_INT i; + + smema = XEXP (smem, 0); + if (GET_CODE (smema) == LO_SUM) + smema = force_reg (Pmode, smema); + + /* Generate all the tmp registers we need. */ + for (i = 0; i < words; ++i) + { + data_regs[i] = out_regs[i]; + ext_tmps[i] = gen_reg_rtx (DImode); + } + data_regs[words] = gen_reg_rtx (DImode); + + if (ofs != 0) + smem = adjust_address (smem, GET_MODE (smem), ofs); + + /* Load up all of the source data. */ + for (i = 0; i < words; ++i) + { + tmp = change_address (smem, DImode, + gen_rtx_AND (DImode, + plus_constant (smema, 8*i), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (data_regs[i], tmp); + } + + tmp = change_address (smem, DImode, + gen_rtx_AND (DImode, + plus_constant (smema, 8*words - 1), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (data_regs[words], tmp); + + /* Extract the half-word fragments. Unfortunately DEC decided to make + extxh with offset zero a noop instead of zeroing the register, so + we must take care of that edge condition ourselves with cmov. */ + + sreg = copy_addr_to_reg (smema); + areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, + 1, OPTAB_WIDEN); + if (WORDS_BIG_ENDIAN) + emit_move_insn (sreg, plus_constant (sreg, 7)); + for (i = 0; i < words; ++i) + { + if (WORDS_BIG_ENDIAN) + { + emit_insn (gen_extqh_be (data_regs[i], data_regs[i], sreg)); + emit_insn (gen_extxl_be (ext_tmps[i], data_regs[i+1], i64, sreg)); + } + else + { + emit_insn (gen_extxl_le (data_regs[i], data_regs[i], i64, sreg)); + emit_insn (gen_extqh_le (ext_tmps[i], data_regs[i+1], sreg)); + } + emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i], + gen_rtx_IF_THEN_ELSE (DImode, + gen_rtx_EQ (DImode, areg, + const0_rtx), + const0_rtx, ext_tmps[i]))); + } + + /* Merge the half-words into whole words. */ + for (i = 0; i < words; ++i) + { + out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], + ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN); + } +} + +/* Store an integral number of consecutive unaligned quadwords. DATA_REGS + may be NULL to store zeros. */ + +static void +alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, + HOST_WIDE_INT words, HOST_WIDE_INT ofs) +{ + rtx const im8 = GEN_INT (-8); + rtx const i64 = GEN_INT (64); + rtx ins_tmps[MAX_MOVE_WORDS]; + rtx st_tmp_1, st_tmp_2, dreg; + rtx st_addr_1, st_addr_2, dmema; + HOST_WIDE_INT i; + + dmema = XEXP (dmem, 0); + if (GET_CODE (dmema) == LO_SUM) + dmema = force_reg (Pmode, dmema); + + /* Generate all the tmp registers we need. */ + if (data_regs != NULL) + for (i = 0; i < words; ++i) + ins_tmps[i] = gen_reg_rtx(DImode); + st_tmp_1 = gen_reg_rtx(DImode); + st_tmp_2 = gen_reg_rtx(DImode); + + if (ofs != 0) + dmem = adjust_address (dmem, GET_MODE (dmem), ofs); + + st_addr_2 = change_address (dmem, DImode, + gen_rtx_AND (DImode, + plus_constant (dmema, words*8 - 1), + im8)); + set_mem_alias_set (st_addr_2, 0); + + st_addr_1 = change_address (dmem, DImode, + gen_rtx_AND (DImode, dmema, im8)); + set_mem_alias_set (st_addr_1, 0); + + /* Load up the destination end bits. */ + emit_move_insn (st_tmp_2, st_addr_2); + emit_move_insn (st_tmp_1, st_addr_1); + + /* Shift the input data into place. */ + dreg = copy_addr_to_reg (dmema); + if (WORDS_BIG_ENDIAN) + emit_move_insn (dreg, plus_constant (dreg, 7)); + if (data_regs != NULL) + { + for (i = words-1; i >= 0; --i) + { + if (WORDS_BIG_ENDIAN) + { + emit_insn (gen_insql_be (ins_tmps[i], data_regs[i], dreg)); + emit_insn (gen_insxh (data_regs[i], data_regs[i], i64, dreg)); + } + else + { + emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg)); + emit_insn (gen_insql_le (data_regs[i], data_regs[i], dreg)); + } + } + for (i = words-1; i > 0; --i) + { + ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i], + ins_tmps[i-1], ins_tmps[i-1], 1, + OPTAB_WIDEN); + } + } + + /* Split and merge the ends with the destination data. */ + if (WORDS_BIG_ENDIAN) + { + emit_insn (gen_mskxl_be (st_tmp_2, st_tmp_2, constm1_rtx, dreg)); + emit_insn (gen_mskxh (st_tmp_1, st_tmp_1, i64, dreg)); + } + else + { + emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg)); + emit_insn (gen_mskxl_le (st_tmp_1, st_tmp_1, constm1_rtx, dreg)); + } + + if (data_regs != NULL) + { + st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1], + st_tmp_2, 1, OPTAB_WIDEN); + st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0], + st_tmp_1, 1, OPTAB_WIDEN); + } + + /* Store it all. */ + if (WORDS_BIG_ENDIAN) + emit_move_insn (st_addr_1, st_tmp_1); + else + emit_move_insn (st_addr_2, st_tmp_2); + for (i = words-1; i > 0; --i) + { + rtx tmp = change_address (dmem, DImode, + gen_rtx_AND (DImode, + plus_constant(dmema, + WORDS_BIG_ENDIAN ? i*8-1 : i*8), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx); + } + if (WORDS_BIG_ENDIAN) + emit_move_insn (st_addr_2, st_tmp_2); + else + emit_move_insn (st_addr_1, st_tmp_1); +} + + +/* Expand string/block move operations. + + operands[0] is the pointer to the destination. + operands[1] is the pointer to the source. + operands[2] is the number of bytes to move. + operands[3] is the alignment. */ + +int +alpha_expand_block_move (rtx operands[]) +{ + rtx bytes_rtx = operands[2]; + rtx align_rtx = operands[3]; + HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT dst_align = src_align; + rtx orig_src = operands[1]; + rtx orig_dst = operands[0]; + rtx data_regs[2 * MAX_MOVE_WORDS + 16]; + rtx tmp; + unsigned int i, words, ofs, nregs = 0; + + if (orig_bytes <= 0) + return 1; + else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) + return 0; + + /* Look for additional alignment information from recorded register info. */ + + tmp = XEXP (orig_src, 0); + if (REG_P (tmp)) + src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS + && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > src_align) + { + if (a >= 64 && c % 8 == 0) + src_align = 64; + else if (a >= 32 && c % 4 == 0) + src_align = 32; + else if (a >= 16 && c % 2 == 0) + src_align = 16; + } + } + + tmp = XEXP (orig_dst, 0); + if (REG_P (tmp)) + dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS + && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > dst_align) + { + if (a >= 64 && c % 8 == 0) + dst_align = 64; + else if (a >= 32 && c % 4 == 0) + dst_align = 32; + else if (a >= 16 && c % 2 == 0) + dst_align = 16; + } + } + + ofs = 0; + if (src_align >= 64 && bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words; ++i) + data_regs[nregs + i] = gen_reg_rtx (DImode); + + for (i = 0; i < words; ++i) + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, DImode, ofs + i * 8)); + + nregs += words; + bytes -= words * 8; + ofs += words * 8; + } + + if (src_align >= 32 && bytes >= 4) + { + words = bytes / 4; + + for (i = 0; i < words; ++i) + data_regs[nregs + i] = gen_reg_rtx (SImode); + + for (i = 0; i < words; ++i) + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + i * 4)); + + nregs += words; + bytes -= words * 4; + ofs += words * 4; + } + + if (bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words+1; ++i) + data_regs[nregs + i] = gen_reg_rtx (DImode); + + alpha_expand_unaligned_load_words (data_regs + nregs, orig_src, + words, ofs); + + nregs += words; + bytes -= words * 8; + ofs += words * 8; + } + + if (! TARGET_BWX && bytes >= 4) + { + data_regs[nregs++] = tmp = gen_reg_rtx (SImode); + alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0); + bytes -= 4; + ofs += 4; + } + + if (bytes >= 2) + { + if (src_align >= 16) + { + do { + data_regs[nregs++] = tmp = gen_reg_rtx (HImode); + emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); + bytes -= 2; + ofs += 2; + } while (bytes >= 2); + } + else if (! TARGET_BWX) + { + data_regs[nregs++] = tmp = gen_reg_rtx (HImode); + alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0); + bytes -= 2; + ofs += 2; + } + } + + while (bytes > 0) + { + data_regs[nregs++] = tmp = gen_reg_rtx (QImode); + emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs)); + bytes -= 1; + ofs += 1; + } + + gcc_assert (nregs <= ARRAY_SIZE (data_regs)); + + /* Now save it back out again. */ + + i = 0, ofs = 0; + + /* Write out the data in whatever chunks reading the source allowed. */ + if (dst_align >= 64) + { + while (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + emit_move_insn (adjust_address (orig_dst, DImode, ofs), + data_regs[i]); + ofs += 8; + i++; + } + } + + if (dst_align >= 32) + { + /* If the source has remaining DImode regs, write them out in + two pieces. */ + while (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + gen_lowpart (SImode, data_regs[i])); + emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4), + gen_lowpart (SImode, tmp)); + ofs += 8; + i++; + } + + while (i < nregs && GET_MODE (data_regs[i]) == SImode) + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + data_regs[i]); + ofs += 4; + i++; + } + } + + if (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + /* Write out a remaining block of words using unaligned methods. */ + + for (words = 1; i + words < nregs; words++) + if (GET_MODE (data_regs[i + words]) != DImode) + break; + + if (words == 1) + alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + else + alpha_expand_unaligned_store_words (data_regs + i, orig_dst, + words, ofs); + + i += words; + ofs += words * 8; + } + + /* Due to the above, this won't be aligned. */ + /* ??? If we have more than one of these, consider constructing full + words in registers and using alpha_expand_unaligned_store_words. */ + while (i < nregs && GET_MODE (data_regs[i]) == SImode) + { + alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); + ofs += 4; + i++; + } + + if (dst_align >= 16) + while (i < nregs && GET_MODE (data_regs[i]) == HImode) + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); + i++; + ofs += 2; + } + else + while (i < nregs && GET_MODE (data_regs[i]) == HImode) + { + alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); + i++; + ofs += 2; + } + + /* The remainder must be byte copies. */ + while (i < nregs) + { + gcc_assert (GET_MODE (data_regs[i]) == QImode); + emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); + i++; + ofs += 1; + } + + return 1; +} + +int +alpha_expand_block_clear (rtx operands[]) +{ + rtx bytes_rtx = operands[1]; + rtx align_rtx = operands[3]; + HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT alignofs = 0; + rtx orig_dst = operands[0]; + rtx tmp; + int i, words, ofs = 0; + + if (orig_bytes <= 0) + return 1; + if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) + return 0; + + /* Look for stricter alignment. */ + tmp = XEXP (orig_dst, 0); + if (REG_P (tmp)) + align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS + && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > align) + { + if (a >= 64) + align = a, alignofs = 8 - c % 8; + else if (a >= 32) + align = a, alignofs = 4 - c % 4; + else if (a >= 16) + align = a, alignofs = 2 - c % 2; + } + } + + /* Handle an unaligned prefix first. */ + + if (alignofs > 0) + { +#if HOST_BITS_PER_WIDE_INT >= 64 + /* Given that alignofs is bounded by align, the only time BWX could + generate three stores is for a 7 byte fill. Prefer two individual + stores over a load/mask/store sequence. */ + if ((!TARGET_BWX || alignofs == 7) + && align >= 32 + && !(alignofs == 4 && bytes >= 4)) + { + enum machine_mode mode = (align >= 64 ? DImode : SImode); + int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, mode, ofs - inv_alignofs); + set_mem_alias_set (mem, 0); + + mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8)); + if (bytes < alignofs) + { + mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8); + ofs += bytes; + bytes = 0; + } + else + { + bytes -= alignofs; + ofs += alignofs; + } + alignofs = 0; + + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + } +#endif + + if (TARGET_BWX && (alignofs & 1) && bytes >= 1) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + alignofs -= 1; + } + if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx); + bytes -= 2; + ofs += 2; + alignofs -= 2; + } + if (alignofs == 4 && bytes >= 4) + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); + bytes -= 4; + ofs += 4; + alignofs = 0; + } + + /* If we've not used the extra lead alignment information by now, + we won't be able to. Downgrade align to match what's left over. */ + if (alignofs > 0) + { + alignofs = alignofs & -alignofs; + align = MIN (align, alignofs * BITS_PER_UNIT); + } + } + + /* Handle a block of contiguous long-words. */ + + if (align >= 64 && bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words; ++i) + emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8), + const0_rtx); + + bytes -= words * 8; + ofs += words * 8; + } + + /* If the block is large and appropriately aligned, emit a single + store followed by a sequence of stq_u insns. */ + + if (align >= 32 && bytes > 16) + { + rtx orig_dsta; + + emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); + bytes -= 4; + ofs += 4; + + orig_dsta = XEXP (orig_dst, 0); + if (GET_CODE (orig_dsta) == LO_SUM) + orig_dsta = force_reg (Pmode, orig_dsta); + + words = bytes / 8; + for (i = 0; i < words; ++i) + { + rtx mem + = change_address (orig_dst, DImode, + gen_rtx_AND (DImode, + plus_constant (orig_dsta, ofs + i*8), + GEN_INT (-8))); + set_mem_alias_set (mem, 0); + emit_move_insn (mem, const0_rtx); + } + + /* Depending on the alignment, the first stq_u may have overlapped + with the initial stl, which means that the last stq_u didn't + write as much as it would appear. Leave those questionable bytes + unaccounted for. */ + bytes -= words * 8 - 4; + ofs += words * 8 - 4; + } + + /* Handle a smaller block of aligned words. */ + + if ((align >= 64 && bytes == 4) + || (align == 32 && bytes >= 4)) + { + words = bytes / 4; + + for (i = 0; i < words; ++i) + emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4), + const0_rtx); + + bytes -= words * 4; + ofs += words * 4; + } + + /* An unaligned block uses stq_u stores for as many as possible. */ + + if (bytes >= 8) + { + words = bytes / 8; + + alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); + + bytes -= words * 8; + ofs += words * 8; + } + + /* Next clean up any trailing pieces. */ + +#if HOST_BITS_PER_WIDE_INT >= 64 + /* Count the number of bits in BYTES for which aligned stores could + be emitted. */ + words = 0; + for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1) + if (bytes & i) + words += 1; + + /* If we have appropriate alignment (and it wouldn't take too many + instructions otherwise), mask out the bytes we need. */ + if (TARGET_BWX ? words > 2 : bytes > 0) + { + if (align >= 64) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, DImode, ofs); + set_mem_alias_set (mem, 0); + + mask = ~(HOST_WIDE_INT)0 << (bytes * 8); + + tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + else if (align >= 32 && bytes < 4) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, SImode, ofs); + set_mem_alias_set (mem, 0); + + mask = ~(HOST_WIDE_INT)0 << (bytes * 8); + + tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + } +#endif + + if (!TARGET_BWX && bytes >= 4) + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); + bytes -= 4; + ofs += 4; + } + + if (bytes >= 2) + { + if (align >= 16) + { + do { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), + const0_rtx); + bytes -= 2; + ofs += 2; + } while (bytes >= 2); + } + else if (! TARGET_BWX) + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); + bytes -= 2; + ofs += 2; + } + } + + while (bytes > 0) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + } + + return 1; +} + +/* Returns a mask so that zap(x, value) == x & mask. */ + +rtx +alpha_expand_zap_mask (HOST_WIDE_INT value) +{ + rtx result; + int i; + + if (HOST_BITS_PER_WIDE_INT >= 64) + { + HOST_WIDE_INT mask = 0; + + for (i = 7; i >= 0; --i) + { + mask <<= 8; + if (!((value >> i) & 1)) + mask |= 0xff; + } + + result = gen_int_mode (mask, DImode); + } + else + { + HOST_WIDE_INT mask_lo = 0, mask_hi = 0; + + gcc_assert (HOST_BITS_PER_WIDE_INT == 32); + + for (i = 7; i >= 4; --i) + { + mask_hi <<= 8; + if (!((value >> i) & 1)) + mask_hi |= 0xff; + } + + for (i = 3; i >= 0; --i) + { + mask_lo <<= 8; + if (!((value >> i) & 1)) + mask_lo |= 0xff; + } + + result = immed_double_const (mask_lo, mask_hi, DImode); + } + + return result; +} + +void +alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), + enum machine_mode mode, + rtx op0, rtx op1, rtx op2) +{ + op0 = gen_lowpart (mode, op0); + + if (op1 == const0_rtx) + op1 = CONST0_RTX (mode); + else + op1 = gen_lowpart (mode, op1); + + if (op2 == const0_rtx) + op2 = CONST0_RTX (mode); + else + op2 = gen_lowpart (mode, op2); + + emit_insn ((*gen) (op0, op1, op2)); +} + +/* A subroutine of the atomic operation splitters. Jump to LABEL if + COND is true. Mark the jump as unlikely to be taken. */ + +static void +emit_unlikely_jump (rtx cond, rtx label) +{ + rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); + rtx x; + + x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); + x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); + add_reg_note (x, REG_BR_PROB, very_unlikely); +} + +/* A subroutine of the atomic operation splitters. Emit a load-locked + instruction in MODE. */ + +static void +emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) +{ + rtx (*fn) (rtx, rtx) = NULL; + if (mode == SImode) + fn = gen_load_locked_si; + else if (mode == DImode) + fn = gen_load_locked_di; + emit_insn (fn (reg, mem)); +} + +/* A subroutine of the atomic operation splitters. Emit a store-conditional + instruction in MODE. */ + +static void +emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val) +{ + rtx (*fn) (rtx, rtx, rtx) = NULL; + if (mode == SImode) + fn = gen_store_conditional_si; + else if (mode == DImode) + fn = gen_store_conditional_di; + emit_insn (fn (res, mem, val)); +} + +/* A subroutine of the atomic operation splitters. Emit an insxl + instruction in MODE. */ + +static rtx +emit_insxl (enum machine_mode mode, rtx op1, rtx op2) +{ + rtx ret = gen_reg_rtx (DImode); + rtx (*fn) (rtx, rtx, rtx); + + if (WORDS_BIG_ENDIAN) + { + if (mode == QImode) + fn = gen_insbl_be; + else + fn = gen_inswl_be; + } + else + { + if (mode == QImode) + fn = gen_insbl_le; + else + fn = gen_inswl_le; + } + /* The insbl and inswl patterns require a register operand. */ + op1 = force_reg (mode, op1); + emit_insn (fn (ret, op1, op2)); + + return ret; +} + +/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation + to perform. MEM is the memory on which to operate. VAL is the second + operand of the binary operator. BEFORE and AFTER are optional locations to + return the value of MEM either before of after the operation. SCRATCH is + a scratch register. */ + +void +alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, + rtx before, rtx after, rtx scratch) +{ + enum machine_mode mode = GET_MODE (mem); + rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch)); + + emit_insn (gen_memory_barrier ()); + + label = gen_label_rtx (); + emit_label (label); + label = gen_rtx_LABEL_REF (DImode, label); + + if (before == NULL) + before = scratch; + emit_load_locked (mode, before, mem); + + if (code == NOT) + { + x = gen_rtx_AND (mode, before, val); + emit_insn (gen_rtx_SET (VOIDmode, val, x)); + + x = gen_rtx_NOT (mode, val); + } + else + x = gen_rtx_fmt_ee (code, mode, before, val); + if (after) + emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x))); + emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); + + emit_store_conditional (mode, cond, mem, scratch); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label); + + emit_insn (gen_memory_barrier ()); +} + +/* Expand a compare and swap operation. */ + +void +alpha_split_compare_and_swap (rtx retval, rtx mem, rtx oldval, rtx newval, + rtx scratch) +{ + enum machine_mode mode = GET_MODE (mem); + rtx label1, label2, x, cond = gen_lowpart (DImode, scratch); + + emit_insn (gen_memory_barrier ()); + + label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + + emit_load_locked (mode, retval, mem); + + x = gen_lowpart (DImode, retval); + if (oldval == const0_rtx) + x = gen_rtx_NE (DImode, x, const0_rtx); + else + { + x = gen_rtx_EQ (DImode, x, oldval); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } + emit_unlikely_jump (x, label2); + + emit_move_insn (scratch, newval); + emit_store_conditional (mode, cond, mem, scratch); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label1); + + emit_insn (gen_memory_barrier ()); + emit_label (XEXP (label2, 0)); +} + +void +alpha_expand_compare_and_swap_12 (rtx dst, rtx mem, rtx oldval, rtx newval) +{ + enum machine_mode mode = GET_MODE (mem); + rtx addr, align, wdst; + rtx (*fn5) (rtx, rtx, rtx, rtx, rtx); + + addr = force_reg (DImode, XEXP (mem, 0)); + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), + NULL_RTX, 1, OPTAB_DIRECT); + + oldval = convert_modes (DImode, mode, oldval, 1); + newval = emit_insxl (mode, newval, addr); + + wdst = gen_reg_rtx (DImode); + if (mode == QImode) + fn5 = gen_sync_compare_and_swapqi_1; + else + fn5 = gen_sync_compare_and_swaphi_1; + emit_insn (fn5 (wdst, addr, oldval, newval, align)); + + emit_move_insn (dst, gen_lowpart (mode, wdst)); +} + +void +alpha_split_compare_and_swap_12 (enum machine_mode mode, rtx dest, rtx addr, + rtx oldval, rtx newval, rtx align, + rtx scratch, rtx cond) +{ + rtx label1, label2, mem, width, mask, x; + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = 1; + + emit_insn (gen_memory_barrier ()); + label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + + emit_load_locked (DImode, scratch, mem); + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + if (WORDS_BIG_ENDIAN) + emit_insn (gen_extxl_be (dest, scratch, width, addr)); + else + emit_insn (gen_extxl_le (dest, scratch, width, addr)); + + if (oldval == const0_rtx) + x = gen_rtx_NE (DImode, dest, const0_rtx); + else + { + x = gen_rtx_EQ (DImode, dest, oldval); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } + emit_unlikely_jump (x, label2); + + if (WORDS_BIG_ENDIAN) + emit_insn (gen_mskxl_be (scratch, scratch, mask, addr)); + else + emit_insn (gen_mskxl_le (scratch, scratch, mask, addr)); + emit_insn (gen_iordi3 (scratch, scratch, newval)); + + emit_store_conditional (DImode, scratch, mem, scratch); + + x = gen_rtx_EQ (DImode, scratch, const0_rtx); + emit_unlikely_jump (x, label1); + + emit_insn (gen_memory_barrier ()); + emit_label (XEXP (label2, 0)); +} + +/* Expand an atomic exchange operation. */ + +void +alpha_split_lock_test_and_set (rtx retval, rtx mem, rtx val, rtx scratch) +{ + enum machine_mode mode = GET_MODE (mem); + rtx label, x, cond = gen_lowpart (DImode, scratch); + + label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_load_locked (mode, retval, mem); + emit_move_insn (scratch, val); + emit_store_conditional (mode, cond, mem, scratch); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label); + + emit_insn (gen_memory_barrier ()); +} + +void +alpha_expand_lock_test_and_set_12 (rtx dst, rtx mem, rtx val) +{ + enum machine_mode mode = GET_MODE (mem); + rtx addr, align, wdst; + rtx (*fn4) (rtx, rtx, rtx, rtx); + + /* Force the address into a register. */ + addr = force_reg (DImode, XEXP (mem, 0)); + + /* Align it to a multiple of 8. */ + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), + NULL_RTX, 1, OPTAB_DIRECT); + + /* Insert val into the correct byte location within the word. */ + val = emit_insxl (mode, val, addr); + + wdst = gen_reg_rtx (DImode); + if (mode == QImode) + fn4 = gen_sync_lock_test_and_setqi_1; + else + fn4 = gen_sync_lock_test_and_sethi_1; + emit_insn (fn4 (wdst, addr, val, align)); + + emit_move_insn (dst, gen_lowpart (mode, wdst)); +} + +void +alpha_split_lock_test_and_set_12 (enum machine_mode mode, rtx dest, rtx addr, + rtx val, rtx align, rtx scratch) +{ + rtx label, mem, width, mask, x; + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = 1; + + label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_load_locked (DImode, scratch, mem); + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + if (WORDS_BIG_ENDIAN) + { + emit_insn (gen_extxl_be (dest, scratch, width, addr)); + emit_insn (gen_mskxl_be (scratch, scratch, mask, addr)); + } + else + { + emit_insn (gen_extxl_le (dest, scratch, width, addr)); + emit_insn (gen_mskxl_le (scratch, scratch, mask, addr)); + } + emit_insn (gen_iordi3 (scratch, scratch, val)); + + emit_store_conditional (DImode, scratch, mem, scratch); + + x = gen_rtx_EQ (DImode, scratch, const0_rtx); + emit_unlikely_jump (x, label); + + emit_insn (gen_memory_barrier ()); +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type dep_insn_type; + + /* If the dependence is an anti-dependence, there is no cost. For an + output dependence, there is sometimes a cost, but it doesn't seem + worth handling those few cases. */ + if (REG_NOTE_KIND (link) != 0) + return cost; + + /* If we can't recognize the insns, we can't really do anything. */ + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; + + dep_insn_type = get_attr_type (dep_insn); + + /* Bring in the user-defined memory latency. */ + if (dep_insn_type == TYPE_ILD + || dep_insn_type == TYPE_FLD + || dep_insn_type == TYPE_LDSYM) + cost += alpha_memory_latency-1; + + /* Everything else handled in DFA bypasses now. */ + + return cost; +} + +/* The number of instructions that can be issued per cycle. */ + +static int +alpha_issue_rate (void) +{ + return (alpha_tune == PROCESSOR_EV4 ? 2 : 4); +} + +/* How many alternative schedules to try. This should be as wide as the + scheduling freedom in the DFA, but no wider. Making this value too + large results extra work for the scheduler. + + For EV4, loads can be issued to either IB0 or IB1, thus we have 2 + alternative schedules. For EV5, we can choose between E0/E1 and + FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */ + +static int +alpha_multipass_dfa_lookahead (void) +{ + return (alpha_tune == PROCESSOR_EV6 ? 4 : 2); +} + +/* Machine-specific function data. */ + +struct GTY(()) machine_function +{ + /* For unicosmk. */ + /* List of call information words for calls from this function. */ + struct rtx_def *first_ciw; + struct rtx_def *last_ciw; + int ciw_count; + + /* List of deferred case vectors. */ + struct rtx_def *addr_list; + + /* For OSF. */ + const char *some_ld_name; + + /* For TARGET_LD_BUGGY_LDGP. */ + struct rtx_def *gp_save_rtx; + + /* For VMS condition handlers. */ + bool uses_condition_handler; +}; + +/* How to allocate a 'struct machine_function'. */ + +static struct machine_function * +alpha_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Support for frame based VMS condition handlers. */ + +/* A VMS condition handler may be established for a function with a call to + __builtin_establish_vms_condition_handler, and cancelled with a call to + __builtin_revert_vms_condition_handler. + + The VMS Condition Handling Facility knows about the existence of a handler + from the procedure descriptor .handler field. As the VMS native compilers, + we store the user specified handler's address at a fixed location in the + stack frame and point the procedure descriptor at a common wrapper which + fetches the real handler's address and issues an indirect call. + + The indirection wrapper is "__gcc_shell_handler", provided by libgcc. + + We force the procedure kind to PT_STACK, and the fixed frame location is + fp+8, just before the register save area. We use the handler_data field in + the procedure descriptor to state the fp offset at which the installed + handler address can be found. */ + +#define VMS_COND_HANDLER_FP_OFFSET 8 + +/* Expand code to store the currently installed user VMS condition handler + into TARGET and install HANDLER as the new condition handler. */ + +void +alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler) +{ + rtx handler_slot_address + = plus_constant (hard_frame_pointer_rtx, VMS_COND_HANDLER_FP_OFFSET); + + rtx handler_slot + = gen_rtx_MEM (DImode, handler_slot_address); + + emit_move_insn (target, handler_slot); + emit_move_insn (handler_slot, handler); + + /* Notify the start/prologue/epilogue emitters that the condition handler + slot is needed. In addition to reserving the slot space, this will force + the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx + use above is correct. */ + cfun->machine->uses_condition_handler = true; +} + +/* Expand code to store the current VMS condition handler into TARGET and + nullify it. */ + +void +alpha_expand_builtin_revert_vms_condition_handler (rtx target) +{ + /* We implement this by establishing a null condition handler, with the tiny + side effect of setting uses_condition_handler. This is a little bit + pessimistic if no actual builtin_establish call is ever issued, which is + not a real problem and expected never to happen anyway. */ + + alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx); +} + +/* Functions to save and restore alpha_return_addr_rtx. */ + +/* Start the ball rolling with RETURN_ADDR_RTX. */ + +rtx +alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, REG_RA); +} + +/* Return or create a memory slot containing the gp value for the current + function. Needed only if TARGET_LD_BUGGY_LDGP. */ + +rtx +alpha_gp_save_rtx (void) +{ + rtx seq, m = cfun->machine->gp_save_rtx; + + if (m == NULL) + { + start_sequence (); + + m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD); + m = validize_mem (m); + emit_move_insn (m, pic_offset_table_rtx); + + seq = get_insns (); + end_sequence (); + + /* We used to simply emit the sequence after entry_of_function. + However this breaks the CFG if the first instruction in the + first block is not the NOTE_INSN_BASIC_BLOCK, for example a + label. Emit the sequence properly on the edge. We are only + invoked from dw2_build_landing_pads and finish_eh_generation + will call commit_edge_insertions thanks to a kludge. */ + insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); + + cfun->machine->gp_save_rtx = m; + } + + return m; +} + +static void +alpha_instantiate_decls (void) +{ + if (cfun->machine->gp_save_rtx != NULL_RTX) + instantiate_decl_rtl (cfun->machine->gp_save_rtx); +} + +static int +alpha_ra_ever_killed (void) +{ + rtx top; + + if (!has_hard_reg_initial_val (Pmode, REG_RA)) + return (int)df_regs_ever_live_p (REG_RA); + + push_topmost_sequence (); + top = get_insns (); + pop_topmost_sequence (); + + return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX); +} + + +/* Return the trap mode suffix applicable to the current + instruction, or NULL. */ + +static const char * +get_trap_mode_suffix (void) +{ + enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn); + + switch (s) + { + case TRAP_SUFFIX_NONE: + return NULL; + + case TRAP_SUFFIX_SU: + if (alpha_fptm >= ALPHA_FPTM_SU) + return "su"; + return NULL; + + case TRAP_SUFFIX_SUI: + if (alpha_fptm >= ALPHA_FPTM_SUI) + return "sui"; + return NULL; + + case TRAP_SUFFIX_V_SV: + switch (alpha_fptm) + { + case ALPHA_FPTM_N: + return NULL; + case ALPHA_FPTM_U: + return "v"; + case ALPHA_FPTM_SU: + case ALPHA_FPTM_SUI: + return "sv"; + default: + gcc_unreachable (); + } + + case TRAP_SUFFIX_V_SV_SVI: + switch (alpha_fptm) + { + case ALPHA_FPTM_N: + return NULL; + case ALPHA_FPTM_U: + return "v"; + case ALPHA_FPTM_SU: + return "sv"; + case ALPHA_FPTM_SUI: + return "svi"; + default: + gcc_unreachable (); + } + break; + + case TRAP_SUFFIX_U_SU_SUI: + switch (alpha_fptm) + { + case ALPHA_FPTM_N: + return NULL; + case ALPHA_FPTM_U: + return "u"; + case ALPHA_FPTM_SU: + return "su"; + case ALPHA_FPTM_SUI: + return "sui"; + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Return the rounding mode suffix applicable to the current + instruction, or NULL. */ + +static const char * +get_round_mode_suffix (void) +{ + enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); + + switch (s) + { + case ROUND_SUFFIX_NONE: + return NULL; + case ROUND_SUFFIX_NORMAL: + switch (alpha_fprm) + { + case ALPHA_FPRM_NORM: + return NULL; + case ALPHA_FPRM_MINF: + return "m"; + case ALPHA_FPRM_CHOP: + return "c"; + case ALPHA_FPRM_DYN: + return "d"; + default: + gcc_unreachable (); + } + break; + + case ROUND_SUFFIX_C: + return "c"; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in some movdi_er_tlsldm pattern. */ + +static int +get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + if (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) + { + cfun->machine->some_ld_name = XSTR (x, 0); + return 1; + } + + return 0; +} + +static const char * +get_some_local_dynamic_name (void) +{ + rtx insn; + + if (cfun->machine->some_ld_name) + return cfun->machine->some_ld_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) + return cfun->machine->some_ld_name; + + gcc_unreachable (); +} + +/* Print an operand. Recognize special options, documented below. */ + +void +print_operand (FILE *file, rtx x, int code) +{ + int i; + + switch (code) + { + case '~': + /* Print the assembler name of the current function. */ + assemble_name (file, alpha_fnname); + break; + + case '&': + assemble_name (file, get_some_local_dynamic_name ()); + break; + + case '/': + { + const char *trap = get_trap_mode_suffix (); + const char *round = get_round_mode_suffix (); + + if (trap || round) + fprintf (file, (TARGET_AS_SLASH_BEFORE_SUFFIX ? "/%s%s" : "%s%s"), + (trap ? trap : ""), (round ? round : "")); + break; + } + + case ',': + /* Generates single precision instruction suffix. */ + fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file); + break; + + case '-': + /* Generates double precision instruction suffix. */ + fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file); + break; + + case '#': + if (alpha_this_literal_sequence_number == 0) + alpha_this_literal_sequence_number = alpha_next_sequence_number++; + fprintf (file, "%d", alpha_this_literal_sequence_number); + break; + + case '*': + if (alpha_this_gpdisp_sequence_number == 0) + alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++; + fprintf (file, "%d", alpha_this_gpdisp_sequence_number); + break; + + case 'H': + if (GET_CODE (x) == HIGH) + output_addr_const (file, XEXP (x, 0)); + else + output_operand_lossage ("invalid %%H value"); + break; + + case 'J': + { + const char *lituse; + + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL) + { + x = XVECEXP (x, 0, 0); + lituse = "lituse_tlsgd"; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL) + { + x = XVECEXP (x, 0, 0); + lituse = "lituse_tlsldm"; + } + else if (CONST_INT_P (x)) + lituse = "lituse_jsr"; + else + { + output_operand_lossage ("invalid %%J value"); + break; + } + + if (x != const0_rtx) + fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); + } + break; + + case 'j': + { + const char *lituse; + +#ifdef HAVE_AS_JSRDIRECT_RELOCS + lituse = "lituse_jsrdirect"; +#else + lituse = "lituse_jsr"; +#endif + + gcc_assert (INTVAL (x) != 0); + fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); + } + break; + case 'r': + /* If this operand is the constant zero, write it as "$31". */ + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (x == CONST0_RTX (GET_MODE (x))) + fprintf (file, "$31"); + else + output_operand_lossage ("invalid %%r value"); + break; + + case 'R': + /* Similar, but for floating-point. */ + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (x == CONST0_RTX (GET_MODE (x))) + fprintf (file, "$f31"); + else + output_operand_lossage ("invalid %%R value"); + break; + + case 'N': + /* Write the 1's complement of a constant. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%N value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); + break; + + case 'P': + /* Write 1 << C, for a constant C. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%P value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x)); + break; + + case 'h': + /* Write the high-order 16 bits of a constant, sign-extended. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%h value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16); + break; + + case 'L': + /* Write the low-order 16 bits of a constant, sign-extended. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%L value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000)); + break; + + case 'm': + /* Write mask for ZAP insn. */ + if (GET_CODE (x) == CONST_DOUBLE) + { + HOST_WIDE_INT mask = 0; + HOST_WIDE_INT value; + + value = CONST_DOUBLE_LOW (x); + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; + i++, value >>= 8) + if (value & 0xff) + mask |= (1 << i); + + value = CONST_DOUBLE_HIGH (x); + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; + i++, value >>= 8) + if (value & 0xff) + mask |= (1 << (i + sizeof (int))); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff); + } + + else if (CONST_INT_P (x)) + { + HOST_WIDE_INT mask = 0, value = INTVAL (x); + + for (i = 0; i < 8; i++, value >>= 8) + if (value & 0xff) + mask |= (1 << i); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask); + } + else + output_operand_lossage ("invalid %%m value"); + break; + + case 'M': + /* 'b', 'w', 'l', or 'q' as the value of the constant. */ + if (!CONST_INT_P (x) + || (INTVAL (x) != 8 && INTVAL (x) != 16 + && INTVAL (x) != 32 && INTVAL (x) != 64)) + output_operand_lossage ("invalid %%M value"); + + fprintf (file, "%s", + (INTVAL (x) == 8 ? "b" + : INTVAL (x) == 16 ? "w" + : INTVAL (x) == 32 ? "l" + : "q")); + break; + + case 'U': + /* Similar, except do it from the mask. */ + if (CONST_INT_P (x)) + { + HOST_WIDE_INT value = INTVAL (x); + + if (value == 0xff) + { + fputc ('b', file); + break; + } + if (value == 0xffff) + { + fputc ('w', file); + break; + } + if (value == 0xffffffff) + { + fputc ('l', file); + break; + } + if (value == -1) + { + fputc ('q', file); + break; + } + } + else if (HOST_BITS_PER_WIDE_INT == 32 + && GET_CODE (x) == CONST_DOUBLE + && CONST_DOUBLE_LOW (x) == 0xffffffff + && CONST_DOUBLE_HIGH (x) == 0) + { + fputc ('l', file); + break; + } + output_operand_lossage ("invalid %%U value"); + break; + + case 's': + /* Write the constant value divided by 8 for little-endian mode or + (56 - value) / 8 for big-endian mode. */ + + if (!CONST_INT_P (x) + || (unsigned HOST_WIDE_INT) INTVAL (x) >= (WORDS_BIG_ENDIAN + ? 56 + : 64) + || (INTVAL (x) & 7) != 0) + output_operand_lossage ("invalid %%s value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + WORDS_BIG_ENDIAN + ? (56 - INTVAL (x)) / 8 + : INTVAL (x) / 8); + break; + + case 'S': + /* Same, except compute (64 - c) / 8 */ + + if (!CONST_INT_P (x) + && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 + && (INTVAL (x) & 7) != 8) + output_operand_lossage ("invalid %%s value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8); + break; + + case 't': + { + /* On Unicos/Mk systems: use a DEX expression if the symbol + clashes with a register name. */ + int dex = unicosmk_need_dex (x); + if (dex) + fprintf (file, "DEX(%d)", dex); + else + output_addr_const (file, x); + } + break; + + case 'C': case 'D': case 'c': case 'd': + /* Write out comparison name. */ + { + enum rtx_code c = GET_CODE (x); + + if (!COMPARISON_P (x)) + output_operand_lossage ("invalid %%C value"); + + else if (code == 'D') + c = reverse_condition (c); + else if (code == 'c') + c = swap_condition (c); + else if (code == 'd') + c = swap_condition (reverse_condition (c)); + + if (c == LEU) + fprintf (file, "ule"); + else if (c == LTU) + fprintf (file, "ult"); + else if (c == UNORDERED) + fprintf (file, "un"); + else + fprintf (file, "%s", GET_RTX_NAME (c)); + } + break; + + case 'E': + /* Write the divide or modulus operator. */ + switch (GET_CODE (x)) + { + case DIV: + fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q"); + break; + case UDIV: + fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q"); + break; + case MOD: + fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q"); + break; + case UMOD: + fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q"); + break; + default: + output_operand_lossage ("invalid %%E value"); + break; + } + break; + + case 'A': + /* Write "_u" for unaligned access. */ + if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) + fprintf (file, "_u"); + break; + + case 0: + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (MEM_P (x)) + output_address (XEXP (x, 0)); + else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC) + { + switch (XINT (XEXP (x, 0), 1)) + { + case UNSPEC_DTPREL: + case UNSPEC_TPREL: + output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0)); + break; + default: + output_operand_lossage ("unknown relocation unspec"); + break; + } + } + else + output_addr_const (file, x); + break; + + default: + output_operand_lossage ("invalid %%xn code"); + } +} + +void +print_operand_address (FILE *file, rtx addr) +{ + int basereg = 31; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (addr) == AND) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == PLUS + && CONST_INT_P (XEXP (addr, 1))) + { + offset = INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + if (GET_CODE (addr) == LO_SUM) + { + const char *reloc16, *reloclo; + rtx op1 = XEXP (addr, 1); + + if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC) + { + op1 = XEXP (op1, 0); + switch (XINT (op1, 1)) + { + case UNSPEC_DTPREL: + reloc16 = NULL; + reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello"); + break; + case UNSPEC_TPREL: + reloc16 = NULL; + reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello"); + break; + default: + output_operand_lossage ("unknown relocation unspec"); + return; + } + + output_addr_const (file, XVECEXP (op1, 0, 0)); + } + else + { + reloc16 = "gprel"; + reloclo = "gprellow"; + output_addr_const (file, op1); + } + + if (offset) + fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); + + addr = XEXP (addr, 0); + switch (GET_CODE (addr)) + { + case REG: + basereg = REGNO (addr); + break; + + case SUBREG: + basereg = subreg_regno (addr); + break; + + default: + gcc_unreachable (); + } + + fprintf (file, "($%d)\t\t!%s", basereg, + (basereg == 29 ? reloc16 : reloclo)); + return; + } + + switch (GET_CODE (addr)) + { + case REG: + basereg = REGNO (addr); + break; + + case SUBREG: + basereg = subreg_regno (addr); + break; + + case CONST_INT: + offset = INTVAL (addr); + break; + +#if TARGET_ABI_OPEN_VMS + case SYMBOL_REF: + fprintf (file, "%s", XSTR (addr, 0)); + return; + + case CONST: + gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF); + fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC, + XSTR (XEXP (XEXP (addr, 0), 0), 0), + INTVAL (XEXP (XEXP (addr, 0), 1))); + return; + +#endif + default: + gcc_unreachable (); + } + + fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline at + M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx + for the static chain value for the function. */ + +static void +alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr, mem, word1, word2; + + fnaddr = XEXP (DECL_RTL (fndecl), 0); + +#ifdef POINTERS_EXTEND_UNSIGNED + fnaddr = convert_memory_address (Pmode, fnaddr); + chain_value = convert_memory_address (Pmode, chain_value); +#endif + + if (TARGET_ABI_OPEN_VMS) + { + const char *fnname; + char *trname; + + /* Construct the name of the trampoline entry point. */ + fnname = XSTR (fnaddr, 0); + trname = (char *) alloca (strlen (fnname) + 5); + strcpy (trname, fnname); + strcat (trname, "..tr"); + fnname = ggc_alloc_string (trname, strlen (trname) + 1); + word2 = gen_rtx_SYMBOL_REF (Pmode, fnname); + + /* Trampoline (or "bounded") procedure descriptor is constructed from + the function's procedure descriptor with certain fields zeroed IAW + the VMS calling standard. This is stored in the first quadword. */ + word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr)); + word1 = expand_and (DImode, word1, GEN_INT (0xffff0fff0000fff0), NULL); + } + else + { + /* These 4 instructions are: + ldq $1,24($27) + ldq $27,16($27) + jmp $31,($27),0 + nop + We don't bother setting the HINT field of the jump; the nop + is merely there for padding. */ + word1 = GEN_INT (0xa77b0010a43b0018); + word2 = GEN_INT (0x47ff041f6bfb0000); + } + + /* Store the first two words, as computed above. */ + mem = adjust_address (m_tramp, DImode, 0); + emit_move_insn (mem, word1); + mem = adjust_address (m_tramp, DImode, 8); + emit_move_insn (mem, word2); + + /* Store function address and static chain value. */ + mem = adjust_address (m_tramp, Pmode, 16); + emit_move_insn (mem, fnaddr); + mem = adjust_address (m_tramp, Pmode, 24); + emit_move_insn (mem, chain_value); + + if (!TARGET_ABI_OPEN_VMS) + { + emit_insn (gen_imb ()); +#ifdef ENABLE_EXECUTE_STACK + emit_library_call (init_one_libfunc ("__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); +#endif + } +} + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On Alpha the first 6 words of args are normally in registers + and the rest are pushed. */ + +static rtx +alpha_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int basereg; + int num_args; + + /* Don't get confused and pass small structures in FP registers. */ + if (type && AGGREGATE_TYPE_P (type)) + basereg = 16; + else + { +#ifdef ENABLE_CHECKING + /* With alpha_split_complex_arg, we shouldn't see any raw complex + values here. */ + gcc_assert (!COMPLEX_MODE_P (mode)); +#endif + + /* Set up defaults for FP operands passed in FP registers, and + integral operands passed in integer registers. */ + if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT) + basereg = 32 + 16; + else + basereg = 16; + } + + /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for + the two platforms, so we can't avoid conditional compilation. */ +#if TARGET_ABI_OPEN_VMS + { + if (mode == VOIDmode) + return alpha_arg_info_reg_val (*cum); + + num_args = cum->num_args; + if (num_args >= 6 + || targetm.calls.must_pass_in_stack (mode, type)) + return NULL_RTX; + } +#elif TARGET_ABI_OSF + { + if (*cum >= 6) + return NULL_RTX; + num_args = *cum; + + /* VOID is passed as a special flag for "last argument". */ + if (type == void_type_node) + basereg = 16; + else if (targetm.calls.must_pass_in_stack (mode, type)) + return NULL_RTX; + } +#else +#error Unhandled ABI +#endif + + return gen_rtx_REG (mode, num_args + basereg); +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +alpha_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + bool onstack = targetm.calls.must_pass_in_stack (mode, type); + int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named); + +#if TARGET_ABI_OSF + *cum += increment; +#else + if (!onstack && cum->num_args < 6) + cum->atypes[cum->num_args] = alpha_arg_type (mode); + cum->num_args += increment; +#endif +} + +static int +alpha_arg_partial_bytes (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + int words = 0; + +#if TARGET_ABI_OPEN_VMS + if (cum->num_args < 6 + && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named)) + words = 6 - cum->num_args; +#elif TARGET_ABI_UNICOSMK + /* Never any split arguments. */ +#elif TARGET_ABI_OSF + if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named)) + words = 6 - *cum; +#else +#error Unhandled ABI +#endif + + return words * UNITS_PER_WORD; +} + + +/* Return true if TYPE must be returned in memory, instead of in registers. */ + +static bool +alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +{ + enum machine_mode mode = VOIDmode; + int size; + + if (type) + { + mode = TYPE_MODE (type); + + /* All aggregates are returned in memory, except on OpenVMS where + records that fit 64 bits should be returned by immediate value + as required by section 3.8.7.1 of the OpenVMS Calling Standard. */ + if (TARGET_ABI_OPEN_VMS + && TREE_CODE (type) != ARRAY_TYPE + && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8) + return false; + + if (AGGREGATE_TYPE_P (type)) + return true; + } + + size = GET_MODE_SIZE (mode); + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_FLOAT: + /* Pass all float vectors in memory, like an aggregate. */ + return true; + + case MODE_COMPLEX_FLOAT: + /* We judge complex floats on the size of their element, + not the size of the whole type. */ + size = GET_MODE_UNIT_SIZE (mode); + break; + + case MODE_INT: + case MODE_FLOAT: + case MODE_COMPLEX_INT: + case MODE_VECTOR_INT: + break; + + default: + /* ??? We get called on all sorts of random stuff from + aggregate_value_p. We must return something, but it's not + clear what's safe to return. Pretend it's a struct I + guess. */ + return true; + } + + /* Otherwise types must fit in one register. */ + return size > UNITS_PER_WORD; +} + +/* Return true if TYPE should be passed by invisible reference. */ + +static bool +alpha_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, + enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + return mode == TFmode || mode == TCmode; +} + +/* Define how to find the value returned by a function. VALTYPE is the + data type of the value (as a tree). If the precise function being + called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. + MODE is set instead of VALTYPE for libcalls. + + On Alpha the value is found in $0 for integer functions and + $f0 for floating-point functions. */ + +rtx +function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + unsigned int regnum, dummy ATTRIBUTE_UNUSED; + enum mode_class mclass; + + gcc_assert (!valtype || !alpha_return_in_memory (valtype, func)); + + if (valtype) + mode = TYPE_MODE (valtype); + + mclass = GET_MODE_CLASS (mode); + switch (mclass) + { + case MODE_INT: + /* Do the same thing as PROMOTE_MODE except for libcalls on VMS, + where we have them returning both SImode and DImode. */ + if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype))) + PROMOTE_MODE (mode, dummy, valtype); + /* FALLTHRU */ + + case MODE_COMPLEX_INT: + case MODE_VECTOR_INT: + regnum = 0; + break; + + case MODE_FLOAT: + regnum = 32; + break; + + case MODE_COMPLEX_FLOAT: + { + enum machine_mode cmode = GET_MODE_INNER (mode); + + return gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32), + const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33), + GEN_INT (GET_MODE_SIZE (cmode))))); + } + + case MODE_RANDOM: + /* We should only reach here for BLKmode on VMS. */ + gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode); + regnum = 0; + break; + + default: + gcc_unreachable (); + } + + return gen_rtx_REG (mode, regnum); +} + +/* TCmode complex values are passed by invisible reference. We + should not split these values. */ + +static bool +alpha_split_complex_arg (const_tree type) +{ + return TYPE_MODE (type) != TCmode; +} + +static tree +alpha_build_builtin_va_list (void) +{ + tree base, ofs, space, record, type_decl; + + if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK) + return ptr_type_node; + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__va_list_tag"), record); + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + + /* C++? SET_IS_AGGR_TYPE (record, 1); */ + + /* Dummy field to prevent alignment warnings. */ + space = build_decl (BUILTINS_LOCATION, + FIELD_DECL, NULL_TREE, integer_type_node); + DECL_FIELD_CONTEXT (space) = record; + DECL_ARTIFICIAL (space) = 1; + DECL_IGNORED_P (space) = 1; + + ofs = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__offset"), + integer_type_node); + DECL_FIELD_CONTEXT (ofs) = record; + DECL_CHAIN (ofs) = space; + /* ??? This is a hack, __offset is marked volatile to prevent + DCE that confuses stdarg optimization and results in + gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */ + TREE_THIS_VOLATILE (ofs) = 1; + + base = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__base"), + ptr_type_node); + DECL_FIELD_CONTEXT (base) = record; + DECL_CHAIN (base) = ofs; + + TYPE_FIELDS (record) = base; + layout_type (record); + + va_list_gpr_counter_field = ofs; + return record; +} + +#if TARGET_ABI_OSF +/* Helper function for alpha_stdarg_optimize_hook. Skip over casts + and constant additions. */ + +static gimple +va_list_skip_additions (tree lhs) +{ + gimple stmt; + + for (;;) + { + enum tree_code code; + + stmt = SSA_NAME_DEF_STMT (lhs); + + if (gimple_code (stmt) == GIMPLE_PHI) + return stmt; + + if (!is_gimple_assign (stmt) + || gimple_assign_lhs (stmt) != lhs) + return NULL; + + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) + return stmt; + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) + && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST + || !host_integerp (gimple_assign_rhs2 (stmt), 1))) + return stmt; + + lhs = gimple_assign_rhs1 (stmt); + } +} + +/* Check if LHS = RHS statement is + LHS = *(ap.__base + ap.__offset + cst) + or + LHS = *(ap.__base + + ((ap.__offset + cst <= 47) + ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2). + If the former, indicate that GPR registers are needed, + if the latter, indicate that FPR registers are needed. + + Also look for LHS = (*ptr).field, where ptr is one of the forms + listed above. + + On alpha, cfun->va_list_gpr_size is used as size of the needed + regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR + registers are needed and bit 1 set if FPR registers are needed. + Return true if va_list references should not be scanned for the + current statement. */ + +static bool +alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) +{ + tree base, offset, rhs; + int offset_arg = 1; + gimple base_stmt; + + if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) + != GIMPLE_SINGLE_RHS) + return false; + + rhs = gimple_assign_rhs1 (stmt); + while (handled_component_p (rhs)) + rhs = TREE_OPERAND (rhs, 0); + if (TREE_CODE (rhs) != MEM_REF + || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) + return false; + + stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); + if (stmt == NULL + || !is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) + return false; + + base = gimple_assign_rhs1 (stmt); + if (TREE_CODE (base) == SSA_NAME) + { + base_stmt = va_list_skip_additions (base); + if (base_stmt + && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } + + if (TREE_CODE (base) != COMPONENT_REF + || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) + { + base = gimple_assign_rhs2 (stmt); + if (TREE_CODE (base) == SSA_NAME) + { + base_stmt = va_list_skip_additions (base); + if (base_stmt + && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } + + if (TREE_CODE (base) != COMPONENT_REF + || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) + return false; + + offset_arg = 0; + } + + base = get_base_address (base); + if (TREE_CODE (base) != VAR_DECL + || !bitmap_bit_p (si->va_list_vars, DECL_UID (base))) + return false; + + offset = gimple_op (stmt, 1 + offset_arg); + if (TREE_CODE (offset) == SSA_NAME) + { + gimple offset_stmt = va_list_skip_additions (offset); + + if (offset_stmt + && gimple_code (offset_stmt) == GIMPLE_PHI) + { + HOST_WIDE_INT sub; + gimple arg1_stmt, arg2_stmt; + tree arg1, arg2; + enum tree_code code1, code2; + + if (gimple_phi_num_args (offset_stmt) != 2) + goto escapes; + + arg1_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); + arg2_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); + if (arg1_stmt == NULL + || !is_gimple_assign (arg1_stmt) + || arg2_stmt == NULL + || !is_gimple_assign (arg2_stmt)) + goto escapes; + + code1 = gimple_assign_rhs_code (arg1_stmt); + code2 = gimple_assign_rhs_code (arg2_stmt); + if (code1 == COMPONENT_REF + && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) + /* Do nothing. */; + else if (code2 == COMPONENT_REF + && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) + { + gimple tem = arg1_stmt; + code2 = code1; + arg1_stmt = arg2_stmt; + arg2_stmt = tem; + } + else + goto escapes; + + if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0)) + goto escapes; + + sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0); + if (code2 == MINUS_EXPR) + sub = -sub; + if (sub < -48 || sub > -32) + goto escapes; + + arg1 = gimple_assign_rhs1 (arg1_stmt); + arg2 = gimple_assign_rhs1 (arg2_stmt); + if (TREE_CODE (arg2) == SSA_NAME) + { + arg2_stmt = va_list_skip_additions (arg2); + if (arg2_stmt == NULL + || !is_gimple_assign (arg2_stmt) + || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) + goto escapes; + arg2 = gimple_assign_rhs1 (arg2_stmt); + } + if (arg1 != arg2) + goto escapes; + + if (TREE_CODE (arg1) != COMPONENT_REF + || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field + || get_base_address (arg1) != base) + goto escapes; + + /* Need floating point regs. */ + cfun->va_list_fpr_size |= 2; + return false; + } + if (offset_stmt + && is_gimple_assign (offset_stmt) + && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) + offset = gimple_assign_rhs1 (offset_stmt); + } + if (TREE_CODE (offset) != COMPONENT_REF + || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field + || get_base_address (offset) != base) + goto escapes; + else + /* Need general regs. */ + cfun->va_list_fpr_size |= 1; + return false; + +escapes: + si->va_list_escapes = true; + return false; +} +#endif + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. */ + +static void +alpha_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + tree type, int *pretend_size, int no_rtl) +{ + CUMULATIVE_ARGS cum = *pcum; + + /* Skip the current argument. */ + targetm.calls.function_arg_advance (&cum, mode, type, true); + +#if TARGET_ABI_UNICOSMK + /* On Unicos/Mk, the standard subroutine __T3E_MISMATCH stores all register + arguments on the stack. Unfortunately, it doesn't always store the first + one (i.e. the one that arrives in $16 or $f16). This is not a problem + with stdargs as we always have at least one named argument there. */ + if (cum.num_reg_words < 6) + { + if (!no_rtl) + { + emit_insn (gen_umk_mismatch_args (GEN_INT (cum.num_reg_words))); + emit_insn (gen_arg_home_umk ()); + } + *pretend_size = 0; + } +#elif TARGET_ABI_OPEN_VMS + /* For VMS, we allocate space for all 6 arg registers plus a count. + + However, if NO registers need to be saved, don't allocate any space. + This is not only because we won't need the space, but because AP + includes the current_pretend_args_size and we don't want to mess up + any ap-relative addresses already made. */ + if (cum.num_args < 6) + { + if (!no_rtl) + { + emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx); + emit_insn (gen_arg_home ()); + } + *pretend_size = 7 * UNITS_PER_WORD; + } +#else + /* On OSF/1 and friends, we allocate space for all 12 arg registers, but + only push those that are remaining. However, if NO registers need to + be saved, don't allocate any space. This is not only because we won't + need the space, but because AP includes the current_pretend_args_size + and we don't want to mess up any ap-relative addresses already made. + + If we are not to use the floating-point registers, save the integer + registers where we would put the floating-point registers. This is + not the most efficient way to implement varargs with just one register + class, but it isn't worth doing anything more efficient in this rare + case. */ + if (cum >= 6) + return; + + if (!no_rtl) + { + int count; + alias_set_type set = get_varargs_alias_set (); + rtx tmp; + + count = cfun->va_list_gpr_size / UNITS_PER_WORD; + if (count > 6 - cum) + count = 6 - cum; + + /* Detect whether integer registers or floating-point registers + are needed by the detected va_arg statements. See above for + how these values are computed. Note that the "escape" value + is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of + these bits set. */ + gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3); + + if (cfun->va_list_fpr_size & 1) + { + tmp = gen_rtx_MEM (BLKmode, + plus_constant (virtual_incoming_args_rtx, + (cum + 6) * UNITS_PER_WORD)); + MEM_NOTRAP_P (tmp) = 1; + set_mem_alias_set (tmp, set); + move_block_from_reg (16 + cum, tmp, count); + } + + if (cfun->va_list_fpr_size & 2) + { + tmp = gen_rtx_MEM (BLKmode, + plus_constant (virtual_incoming_args_rtx, + cum * UNITS_PER_WORD)); + MEM_NOTRAP_P (tmp) = 1; + set_mem_alias_set (tmp, set); + move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count); + } + } + *pretend_size = 12 * UNITS_PER_WORD; +#endif +} + +static void +alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT offset; + tree t, offset_field, base_field; + + if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK) + return; + + if (TARGET_ABI_UNICOSMK) + std_expand_builtin_va_start (valist, nextarg); + + /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base + up by 48, storing fp arg registers in the first 48 bytes, and the + integer arg registers in the next 48 bytes. This is only done, + however, if any integer registers need to be stored. + + If no integer registers need be stored, then we must subtract 48 + in order to account for the integer arg registers which are counted + in argsize above, but which are not actually stored on the stack. + Must further be careful here about structures straddling the last + integer argument register; that futzes with pretend_args_size, + which changes the meaning of AP. */ + + if (NUM_ARGS < 6) + offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD; + else + offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size; + + if (TARGET_ABI_OPEN_VMS) + { + t = make_tree (ptr_type_node, virtual_incoming_args_rtx); + t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, + size_int (offset + NUM_ARGS * UNITS_PER_WORD)); + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + else + { + base_field = TYPE_FIELDS (TREE_TYPE (valist)); + offset_field = DECL_CHAIN (base_field); + + base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), + valist, base_field, NULL_TREE); + offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), + valist, offset_field, NULL_TREE); + + t = make_tree (ptr_type_node, virtual_incoming_args_rtx); + t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, + size_int (offset)); + t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD); + t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } +} + +static tree +alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, + gimple_seq *pre_p) +{ + tree type_size, ptr_type, addend, t, addr; + gimple_seq internal_post; + + /* If the type could not be passed in registers, skip the block + reserved for the registers. */ + if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type)) + { + t = build_int_cst (TREE_TYPE (offset), 6*8); + gimplify_assign (offset, + build2 (MAX_EXPR, TREE_TYPE (offset), offset, t), + pre_p); + } + + addend = offset; + ptr_type = build_pointer_type_for_mode (type, ptr_mode, true); + + if (TREE_CODE (type) == COMPLEX_TYPE) + { + tree real_part, imag_part, real_temp; + + real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, + offset, pre_p); + + /* Copy the value into a new temporary, lest the formal temporary + be reused out from under us. */ + real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); + + imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, + offset, pre_p); + + return build2 (COMPLEX_EXPR, type, real_temp, imag_part); + } + else if (TREE_CODE (type) == REAL_TYPE) + { + tree fpaddend, cond, fourtyeight; + + fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8); + fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend), + addend, fourtyeight); + cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight); + addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, + fpaddend, addend); + } + + /* Build the final address and force that value into a temporary. */ + addr = build2 (POINTER_PLUS_EXPR, ptr_type, fold_convert (ptr_type, base), + fold_convert (sizetype, addend)); + internal_post = NULL; + gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); + gimple_seq_add_seq (pre_p, internal_post); + + /* Update the offset field. */ + type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); + if (type_size == NULL || TREE_OVERFLOW (type_size)) + t = size_zero_node; + else + { + t = size_binop (PLUS_EXPR, type_size, size_int (7)); + t = size_binop (TRUNC_DIV_EXPR, t, size_int (8)); + t = size_binop (MULT_EXPR, t, size_int (8)); + } + t = fold_convert (TREE_TYPE (offset), t); + gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t), + pre_p); + + return build_va_arg_indirect_ref (addr); +} + +static tree +alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + tree offset_field, base_field, offset, base, t, r; + bool indirect; + + if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK) + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + + base_field = TYPE_FIELDS (va_list_type_node); + offset_field = DECL_CHAIN (base_field); + base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), + valist, base_field, NULL_TREE); + offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), + valist, offset_field, NULL_TREE); + + /* Pull the fields of the structure out into temporaries. Since we never + modify the base field, we can use a formal temporary. Sign-extend the + offset field so that it's the proper width for pointer arithmetic. */ + base = get_formal_tmp_var (base_field, pre_p); + + t = fold_convert (lang_hooks.types.type_for_size (64, 0), offset_field); + offset = get_initialized_tmp_var (t, pre_p, NULL); + + indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect) + type = build_pointer_type_for_mode (type, ptr_mode, true); + + /* Find the value. Note that this will be a stable indirection, or + a composite of stable indirections in the case of complex. */ + r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p); + + /* Stuff the offset temporary back into its field. */ + gimplify_assign (unshare_expr (offset_field), + fold_convert (TREE_TYPE (offset_field), offset), pre_p); + + if (indirect) + r = build_va_arg_indirect_ref (r); + + return r; +} + +/* Builtins. */ + +enum alpha_builtin +{ + ALPHA_BUILTIN_CMPBGE, + ALPHA_BUILTIN_EXTBL, + ALPHA_BUILTIN_EXTWL, + ALPHA_BUILTIN_EXTLL, + ALPHA_BUILTIN_EXTQL, + ALPHA_BUILTIN_EXTWH, + ALPHA_BUILTIN_EXTLH, + ALPHA_BUILTIN_EXTQH, + ALPHA_BUILTIN_INSBL, + ALPHA_BUILTIN_INSWL, + ALPHA_BUILTIN_INSLL, + ALPHA_BUILTIN_INSQL, + ALPHA_BUILTIN_INSWH, + ALPHA_BUILTIN_INSLH, + ALPHA_BUILTIN_INSQH, + ALPHA_BUILTIN_MSKBL, + ALPHA_BUILTIN_MSKWL, + ALPHA_BUILTIN_MSKLL, + ALPHA_BUILTIN_MSKQL, + ALPHA_BUILTIN_MSKWH, + ALPHA_BUILTIN_MSKLH, + ALPHA_BUILTIN_MSKQH, + ALPHA_BUILTIN_UMULH, + ALPHA_BUILTIN_ZAP, + ALPHA_BUILTIN_ZAPNOT, + ALPHA_BUILTIN_AMASK, + ALPHA_BUILTIN_IMPLVER, + ALPHA_BUILTIN_RPCC, + ALPHA_BUILTIN_THREAD_POINTER, + ALPHA_BUILTIN_SET_THREAD_POINTER, + ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, + ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, + + /* TARGET_MAX */ + ALPHA_BUILTIN_MINUB8, + ALPHA_BUILTIN_MINSB8, + ALPHA_BUILTIN_MINUW4, + ALPHA_BUILTIN_MINSW4, + ALPHA_BUILTIN_MAXUB8, + ALPHA_BUILTIN_MAXSB8, + ALPHA_BUILTIN_MAXUW4, + ALPHA_BUILTIN_MAXSW4, + ALPHA_BUILTIN_PERR, + ALPHA_BUILTIN_PKLB, + ALPHA_BUILTIN_PKWB, + ALPHA_BUILTIN_UNPKBL, + ALPHA_BUILTIN_UNPKBW, + + /* TARGET_CIX */ + ALPHA_BUILTIN_CTTZ, + ALPHA_BUILTIN_CTLZ, + ALPHA_BUILTIN_CTPOP, + + ALPHA_BUILTIN_max +}; + +static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = { + CODE_FOR_builtin_cmpbge, + CODE_FOR_builtin_extbl, + CODE_FOR_builtin_extwl, + CODE_FOR_builtin_extll, + CODE_FOR_builtin_extql, + CODE_FOR_builtin_extwh, + CODE_FOR_builtin_extlh, + CODE_FOR_builtin_extqh, + CODE_FOR_builtin_insbl, + CODE_FOR_builtin_inswl, + CODE_FOR_builtin_insll, + CODE_FOR_builtin_insql, + CODE_FOR_builtin_inswh, + CODE_FOR_builtin_inslh, + CODE_FOR_builtin_insqh, + CODE_FOR_builtin_mskbl, + CODE_FOR_builtin_mskwl, + CODE_FOR_builtin_mskll, + CODE_FOR_builtin_mskql, + CODE_FOR_builtin_mskwh, + CODE_FOR_builtin_msklh, + CODE_FOR_builtin_mskqh, + CODE_FOR_umuldi3_highpart, + CODE_FOR_builtin_zap, + CODE_FOR_builtin_zapnot, + CODE_FOR_builtin_amask, + CODE_FOR_builtin_implver, + CODE_FOR_builtin_rpcc, + CODE_FOR_load_tp, + CODE_FOR_set_tp, + CODE_FOR_builtin_establish_vms_condition_handler, + CODE_FOR_builtin_revert_vms_condition_handler, + + /* TARGET_MAX */ + CODE_FOR_builtin_minub8, + CODE_FOR_builtin_minsb8, + CODE_FOR_builtin_minuw4, + CODE_FOR_builtin_minsw4, + CODE_FOR_builtin_maxub8, + CODE_FOR_builtin_maxsb8, + CODE_FOR_builtin_maxuw4, + CODE_FOR_builtin_maxsw4, + CODE_FOR_builtin_perr, + CODE_FOR_builtin_pklb, + CODE_FOR_builtin_pkwb, + CODE_FOR_builtin_unpkbl, + CODE_FOR_builtin_unpkbw, + + /* TARGET_CIX */ + CODE_FOR_ctzdi2, + CODE_FOR_clzdi2, + CODE_FOR_popcountdi2 +}; + +struct alpha_builtin_def +{ + const char *name; + enum alpha_builtin code; + unsigned int target_mask; + bool is_const; +}; + +static struct alpha_builtin_def const zero_arg_builtins[] = { + { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true }, + { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false } +}; + +static struct alpha_builtin_def const one_arg_builtins[] = { + { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true }, + { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true }, + { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true }, + { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true }, + { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true }, + { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true }, + { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true }, + { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true } +}; + +static struct alpha_builtin_def const two_arg_builtins[] = { + { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true }, + { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true }, + { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true }, + { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true }, + { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true }, + { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true }, + { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true }, + { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true }, + { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true }, + { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true }, + { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true }, + { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true }, + { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true }, + { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true }, + { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true }, + { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true }, + { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true }, + { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true }, + { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true }, + { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true }, + { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true }, + { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true }, + { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true }, + { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true }, + { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true }, + { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true }, + { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true }, + { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true }, + { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true }, + { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true }, + { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true }, + { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true }, + { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true }, + { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true } +}; + +static GTY(()) tree alpha_v8qi_u; +static GTY(()) tree alpha_v8qi_s; +static GTY(()) tree alpha_v4hi_u; +static GTY(()) tree alpha_v4hi_s; + +static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max]; + +/* Return the alpha builtin for CODE. */ + +static tree +alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= ALPHA_BUILTIN_max) + return error_mark_node; + return alpha_builtins[code]; +} + +/* Helper function of alpha_init_builtins. Add the built-in specified + by NAME, TYPE, CODE, and ECF. */ + +static void +alpha_builtin_function (const char *name, tree ftype, + enum alpha_builtin code, unsigned ecf) +{ + tree decl = add_builtin_function (name, ftype, (int) code, + BUILT_IN_MD, NULL, NULL_TREE); + + if (ecf & ECF_CONST) + TREE_READONLY (decl) = 1; + if (ecf & ECF_NOTHROW) + TREE_NOTHROW (decl) = 1; + + alpha_builtins [(int) code] = decl; +} + +/* Helper function of alpha_init_builtins. Add the COUNT built-in + functions pointed to by P, with function type FTYPE. */ + +static void +alpha_add_builtins (const struct alpha_builtin_def *p, size_t count, + tree ftype) +{ + size_t i; + + for (i = 0; i < count; ++i, ++p) + if ((target_flags & p->target_mask) == p->target_mask) + alpha_builtin_function (p->name, ftype, p->code, + (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW); +} + +static void +alpha_init_builtins (void) +{ + tree dimode_integer_type_node; + tree ftype; + + dimode_integer_type_node = lang_hooks.types.type_for_mode (DImode, 0); + + /* Fwrite on VMS is non-standard. */ +#if TARGET_ABI_OPEN_VMS + implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE; + implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE; +#endif + + ftype = build_function_type (dimode_integer_type_node, void_list_node); + alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), + ftype); + + ftype = build_function_type_list (dimode_integer_type_node, + dimode_integer_type_node, NULL_TREE); + alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), + ftype); + + ftype = build_function_type_list (dimode_integer_type_node, + dimode_integer_type_node, + dimode_integer_type_node, NULL_TREE); + alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), + ftype); + + ftype = build_function_type (ptr_type_node, void_list_node); + alpha_builtin_function ("__builtin_thread_pointer", ftype, + ALPHA_BUILTIN_THREAD_POINTER, ECF_NOTHROW); + + ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + alpha_builtin_function ("__builtin_set_thread_pointer", ftype, + ALPHA_BUILTIN_SET_THREAD_POINTER, ECF_NOTHROW); + + if (TARGET_ABI_OPEN_VMS) + { + ftype = build_function_type_list (ptr_type_node, ptr_type_node, + NULL_TREE); + alpha_builtin_function ("__builtin_establish_vms_condition_handler", + ftype, + ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, + 0); + + ftype = build_function_type_list (ptr_type_node, void_type_node, + NULL_TREE); + alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype, + ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0); + } + + alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8); + alpha_v8qi_s = build_vector_type (intQI_type_node, 8); + alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4); + alpha_v4hi_s = build_vector_type (intHI_type_node, 4); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +alpha_expand_builtin (tree exp, rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ +#define MAX_ARGS 2 + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg; + call_expr_arg_iterator iter; + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + + if (fcode >= ALPHA_BUILTIN_max) + internal_error ("bad builtin fcode"); + icode = code_for_builtin[fcode]; + if (icode == 0) + internal_error ("bad builtin fcode"); + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + arity = 0; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + const struct insn_operand_data *insn_op; + + if (arg == error_mark_node) + return NULL_RTX; + if (arity > MAX_ARGS) + return NULL_RTX; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + + if (!(*insn_op->predicate) (op[arity], insn_op->mode)) + op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); + arity++; + } + + if (nonvoid) + { + enum machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + + +/* Several bits below assume HWI >= 64 bits. This should be enforced + by config.gcc. */ +#if HOST_BITS_PER_WIDE_INT < 64 +# error "HOST_WIDE_INT too small" +#endif + +/* Fold the builtin for the CMPBGE instruction. This is a vector comparison + with an 8-bit output vector. OPINT contains the integer operands; bit N + of OP_CONST is set if OPINT[N] is valid. */ + +static tree +alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const) +{ + if (op_const == 3) + { + int i, val; + for (i = 0, val = 0; i < 8; ++i) + { + unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff; + unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff; + if (c0 >= c1) + val |= 1 << i; + } + return build_int_cst (long_integer_type_node, val); + } + else if (op_const == 2 && opint[1] == 0) + return build_int_cst (long_integer_type_node, 0xff); + return NULL; +} + +/* Fold the builtin for the ZAPNOT instruction. This is essentially a + specialized form of an AND operation. Other byte manipulation instructions + are defined in terms of this instruction, so this is also used as a + subroutine for other builtins. + + OP contains the tree operands; OPINT contains the extracted integer values. + Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only + OPINT may be considered. */ + +static tree +alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[], + long op_const) +{ + if (op_const & 2) + { + unsigned HOST_WIDE_INT mask = 0; + int i; + + for (i = 0; i < 8; ++i) + if ((opint[1] >> i) & 1) + mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8); + + if (op_const & 1) + return build_int_cst (long_integer_type_node, opint[0] & mask); + + if (op) + return fold_build2 (BIT_AND_EXPR, long_integer_type_node, op[0], + build_int_cst (long_integer_type_node, mask)); + } + else if ((op_const & 1) && opint[0] == 0) + return build_int_cst (long_integer_type_node, 0); + return NULL; +} + +/* Fold the builtins for the EXT family of instructions. */ + +static tree +alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + long zap_const = 2; + tree *zap_op = NULL; + + if (op_const & 2) + { + unsigned HOST_WIDE_INT loc; + + loc = opint[1] & 7; + if (BYTES_BIG_ENDIAN) + loc ^= 7; + loc *= 8; + + if (loc != 0) + { + if (op_const & 1) + { + unsigned HOST_WIDE_INT temp = opint[0]; + if (is_high) + temp <<= loc; + else + temp >>= loc; + opint[0] = temp; + zap_const = 3; + } + } + else + zap_op = op; + } + + opint[1] = bytemask; + return alpha_fold_builtin_zapnot (zap_op, opint, zap_const); +} + +/* Fold the builtins for the INS family of instructions. */ + +static tree +alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + if ((op_const & 1) && opint[0] == 0) + return build_int_cst (long_integer_type_node, 0); + + if (op_const & 2) + { + unsigned HOST_WIDE_INT temp, loc, byteloc; + tree *zap_op = NULL; + + loc = opint[1] & 7; + if (BYTES_BIG_ENDIAN) + loc ^= 7; + bytemask <<= loc; + + temp = opint[0]; + if (is_high) + { + byteloc = (64 - (loc * 8)) & 0x3f; + if (byteloc == 0) + zap_op = op; + else + temp >>= byteloc; + bytemask >>= 8; + } + else + { + byteloc = loc * 8; + if (byteloc == 0) + zap_op = op; + else + temp <<= byteloc; + } + + opint[0] = temp; + opint[1] = bytemask; + return alpha_fold_builtin_zapnot (zap_op, opint, op_const); + } + + return NULL; +} + +static tree +alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + if (op_const & 2) + { + unsigned HOST_WIDE_INT loc; + + loc = opint[1] & 7; + if (BYTES_BIG_ENDIAN) + loc ^= 7; + bytemask <<= loc; + + if (is_high) + bytemask >>= 8; + + opint[1] = bytemask ^ 0xff; + } + + return alpha_fold_builtin_zapnot (op, opint, op_const); +} + +static tree +alpha_fold_builtin_umulh (unsigned HOST_WIDE_INT opint[], long op_const) +{ + switch (op_const) + { + case 3: + { + unsigned HOST_WIDE_INT l; + HOST_WIDE_INT h; + + mul_double (opint[0], 0, opint[1], 0, &l, &h); + +#if HOST_BITS_PER_WIDE_INT > 64 +# error fixme +#endif + + return build_int_cst (long_integer_type_node, h); + } + + case 1: + opint[1] = opint[0]; + /* FALLTHRU */ + case 2: + /* Note that (X*1) >> 64 == 0. */ + if (opint[1] == 0 || opint[1] == 1) + return build_int_cst (long_integer_type_node, 0); + break; + } + return NULL; +} + +static tree +alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype) +{ + tree op0 = fold_convert (vtype, op[0]); + tree op1 = fold_convert (vtype, op[1]); + tree val = fold_build2 (code, vtype, op0, op1); + return fold_build1 (VIEW_CONVERT_EXPR, long_integer_type_node, val); +} + +static tree +alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp = 0; + int i; + + if (op_const != 3) + return NULL; + + for (i = 0; i < 8; ++i) + { + unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff; + unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff; + if (a >= b) + temp += a - b; + else + temp += b - a; + } + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] >> 24) & 0xff00; + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] >> 8) & 0xff00; + temp |= (opint[0] >> 16) & 0xff0000; + temp |= (opint[0] >> 24) & 0xff000000; + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] & 0xff00) << 24; + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] & 0x0000ff00) << 8; + temp |= (opint[0] & 0x00ff0000) << 16; + temp |= (opint[0] & 0xff000000) << 24; + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + if (opint[0] == 0) + temp = 64; + else + temp = exact_log2 (opint[0] & -opint[0]); + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + if (opint[0] == 0) + temp = 64; + else + temp = 64 - floor_log2 (opint[0]) - 1; + + return build_int_cst (long_integer_type_node, temp); +} + +static tree +alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp, op; + + if (op_const == 0) + return NULL; + + op = opint[0]; + temp = 0; + while (op) + temp++, op &= op - 1; + + return build_int_cst (long_integer_type_node, temp); +} + +/* Fold one of our builtin functions. */ + +static tree +alpha_fold_builtin (tree fndecl, int n_args, tree *op, + bool ignore ATTRIBUTE_UNUSED) +{ + unsigned HOST_WIDE_INT opint[MAX_ARGS]; + long op_const = 0; + int i; + + if (n_args >= MAX_ARGS) + return NULL; + + for (i = 0; i < n_args; i++) + { + tree arg = op[i]; + if (arg == error_mark_node) + return NULL; + + opint[i] = 0; + if (TREE_CODE (arg) == INTEGER_CST) + { + op_const |= 1L << i; + opint[i] = int_cst_value (arg); + } + } + + switch (DECL_FUNCTION_CODE (fndecl)) + { + case ALPHA_BUILTIN_CMPBGE: + return alpha_fold_builtin_cmpbge (opint, op_const); + + case ALPHA_BUILTIN_EXTBL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false); + case ALPHA_BUILTIN_EXTWL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false); + case ALPHA_BUILTIN_EXTLL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false); + case ALPHA_BUILTIN_EXTQL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false); + case ALPHA_BUILTIN_EXTWH: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true); + case ALPHA_BUILTIN_EXTLH: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true); + case ALPHA_BUILTIN_EXTQH: + return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true); + + case ALPHA_BUILTIN_INSBL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false); + case ALPHA_BUILTIN_INSWL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false); + case ALPHA_BUILTIN_INSLL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false); + case ALPHA_BUILTIN_INSQL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false); + case ALPHA_BUILTIN_INSWH: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true); + case ALPHA_BUILTIN_INSLH: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true); + case ALPHA_BUILTIN_INSQH: + return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true); + + case ALPHA_BUILTIN_MSKBL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false); + case ALPHA_BUILTIN_MSKWL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false); + case ALPHA_BUILTIN_MSKLL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false); + case ALPHA_BUILTIN_MSKQL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false); + case ALPHA_BUILTIN_MSKWH: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true); + case ALPHA_BUILTIN_MSKLH: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true); + case ALPHA_BUILTIN_MSKQH: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true); + + case ALPHA_BUILTIN_UMULH: + return alpha_fold_builtin_umulh (opint, op_const); + + case ALPHA_BUILTIN_ZAP: + opint[1] ^= 0xff; + /* FALLTHRU */ + case ALPHA_BUILTIN_ZAPNOT: + return alpha_fold_builtin_zapnot (op, opint, op_const); + + case ALPHA_BUILTIN_MINUB8: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u); + case ALPHA_BUILTIN_MINSB8: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s); + case ALPHA_BUILTIN_MINUW4: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u); + case ALPHA_BUILTIN_MINSW4: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s); + case ALPHA_BUILTIN_MAXUB8: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u); + case ALPHA_BUILTIN_MAXSB8: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s); + case ALPHA_BUILTIN_MAXUW4: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u); + case ALPHA_BUILTIN_MAXSW4: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s); + + case ALPHA_BUILTIN_PERR: + return alpha_fold_builtin_perr (opint, op_const); + case ALPHA_BUILTIN_PKLB: + return alpha_fold_builtin_pklb (opint, op_const); + case ALPHA_BUILTIN_PKWB: + return alpha_fold_builtin_pkwb (opint, op_const); + case ALPHA_BUILTIN_UNPKBL: + return alpha_fold_builtin_unpkbl (opint, op_const); + case ALPHA_BUILTIN_UNPKBW: + return alpha_fold_builtin_unpkbw (opint, op_const); + + case ALPHA_BUILTIN_CTTZ: + return alpha_fold_builtin_cttz (opint, op_const); + case ALPHA_BUILTIN_CTLZ: + return alpha_fold_builtin_ctlz (opint, op_const); + case ALPHA_BUILTIN_CTPOP: + return alpha_fold_builtin_ctpop (opint, op_const); + + case ALPHA_BUILTIN_AMASK: + case ALPHA_BUILTIN_IMPLVER: + case ALPHA_BUILTIN_RPCC: + case ALPHA_BUILTIN_THREAD_POINTER: + case ALPHA_BUILTIN_SET_THREAD_POINTER: + /* None of these are foldable at compile-time. */ + default: + return NULL; + } +} + +/* This page contains routines that are used to determine what the function + prologue and epilogue code will do and write them out. */ + +/* Compute the size of the save area in the stack. */ + +/* These variables are used for communication between the following functions. + They indicate various things about the current function being compiled + that are used to tell what kind of prologue, epilogue and procedure + descriptor to generate. */ + +/* Nonzero if we need a stack procedure. */ +enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2}; +static enum alpha_procedure_types alpha_procedure_type; + +/* Register number (either FP or SP) that is used to unwind the frame. */ +static int vms_unwind_regno; + +/* Register number used to save FP. We need not have one for RA since + we don't modify it for register procedures. This is only defined + for register frame procedures. */ +static int vms_save_fp_regno; + +/* Register number used to reference objects off our PV. */ +static int vms_base_regno; + +/* Compute register masks for saved registers. */ + +static void +alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP) +{ + unsigned long imask = 0; + unsigned long fmask = 0; + unsigned int i; + + /* When outputting a thunk, we don't have valid register life info, + but assemble_start_function wants to output .frame and .mask + directives. */ + if (cfun->is_thunk) + { + *imaskP = 0; + *fmaskP = 0; + return; + } + + if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) + imask |= (1UL << HARD_FRAME_POINTER_REGNUM); + + /* One for every register we have to save. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (! fixed_regs[i] && ! call_used_regs[i] + && df_regs_ever_live_p (i) && i != REG_RA + && (!TARGET_ABI_UNICOSMK || i != HARD_FRAME_POINTER_REGNUM)) + { + if (i < 32) + imask |= (1UL << i); + else + fmask |= (1UL << (i - 32)); + } + + /* We need to restore these for the handler. */ + if (crtl->calls_eh_return) + { + for (i = 0; ; ++i) + { + unsigned regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + imask |= 1UL << regno; + } + } + + /* If any register spilled, then spill the return address also. */ + /* ??? This is required by the Digital stack unwind specification + and isn't needed if we're doing Dwarf2 unwinding. */ + if (imask || fmask || alpha_ra_ever_killed ()) + imask |= (1UL << REG_RA); + + *imaskP = imask; + *fmaskP = fmask; +} + +int +alpha_sa_size (void) +{ + unsigned long mask[2]; + int sa_size = 0; + int i, j; + + alpha_sa_mask (&mask[0], &mask[1]); + + if (TARGET_ABI_UNICOSMK) + { + if (mask[0] || mask[1]) + sa_size = 14; + } + else + { + for (j = 0; j < 2; ++j) + for (i = 0; i < 32; ++i) + if ((mask[j] >> i) & 1) + sa_size++; + } + + if (TARGET_ABI_UNICOSMK) + { + /* We might not need to generate a frame if we don't make any calls + (including calls to __T3E_MISMATCH if this is a vararg function), + don't have any local variables which require stack slots, don't + use alloca and have not determined that we need a frame for other + reasons. */ + + alpha_procedure_type + = (sa_size || get_frame_size() != 0 + || crtl->outgoing_args_size + || cfun->stdarg || cfun->calls_alloca + || frame_pointer_needed) + ? PT_STACK : PT_REGISTER; + + /* Always reserve space for saving callee-saved registers if we + need a frame as required by the calling convention. */ + if (alpha_procedure_type == PT_STACK) + sa_size = 14; + } + else if (TARGET_ABI_OPEN_VMS) + { + /* Start with a stack procedure if we make any calls (REG_RA used), or + need a frame pointer, with a register procedure if we otherwise need + at least a slot, and with a null procedure in other cases. */ + if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed) + alpha_procedure_type = PT_STACK; + else if (get_frame_size() != 0) + alpha_procedure_type = PT_REGISTER; + else + alpha_procedure_type = PT_NULL; + + /* Don't reserve space for saving FP & RA yet. Do that later after we've + made the final decision on stack procedure vs register procedure. */ + if (alpha_procedure_type == PT_STACK) + sa_size -= 2; + + /* Decide whether to refer to objects off our PV via FP or PV. + If we need FP for something else or if we receive a nonlocal + goto (which expects PV to contain the value), we must use PV. + Otherwise, start by assuming we can use FP. */ + + vms_base_regno + = (frame_pointer_needed + || cfun->has_nonlocal_label + || alpha_procedure_type == PT_STACK + || crtl->outgoing_args_size) + ? REG_PV : HARD_FRAME_POINTER_REGNUM; + + /* If we want to copy PV into FP, we need to find some register + in which to save FP. */ + + vms_save_fp_regno = -1; + if (vms_base_regno == HARD_FRAME_POINTER_REGNUM) + for (i = 0; i < 32; i++) + if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i)) + vms_save_fp_regno = i; + + /* A VMS condition handler requires a stack procedure in our + implementation. (not required by the calling standard). */ + if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER) + || cfun->machine->uses_condition_handler) + vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK; + else if (alpha_procedure_type == PT_NULL) + vms_base_regno = REG_PV; + + /* Stack unwinding should be done via FP unless we use it for PV. */ + vms_unwind_regno = (vms_base_regno == REG_PV + ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM); + + /* If this is a stack procedure, allow space for saving FP, RA and + a condition handler slot if needed. */ + if (alpha_procedure_type == PT_STACK) + sa_size += 2 + cfun->machine->uses_condition_handler; + } + else + { + /* Our size must be even (multiple of 16 bytes). */ + if (sa_size & 1) + sa_size++; + } + + return sa_size * 8; +} + +/* Define the offset between two registers, one to be eliminated, + and the other its replacement, at the start of a routine. */ + +HOST_WIDE_INT +alpha_initial_elimination_offset (unsigned int from, + unsigned int to ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT ret; + + ret = alpha_sa_size (); + ret += ALPHA_ROUND (crtl->outgoing_args_size); + + switch (from) + { + case FRAME_POINTER_REGNUM: + break; + + case ARG_POINTER_REGNUM: + ret += (ALPHA_ROUND (get_frame_size () + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size); + break; + + default: + gcc_unreachable (); + } + + return ret; +} + +#if TARGET_ABI_OPEN_VMS + +/* Worker function for TARGET_CAN_ELIMINATE. */ + +static bool +alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + /* We need the alpha_procedure_type to decide. Evaluate it now. */ + alpha_sa_size (); + + switch (alpha_procedure_type) + { + case PT_NULL: + /* NULL procedures have no frame of their own and we only + know how to resolve from the current stack pointer. */ + return to == STACK_POINTER_REGNUM; + + case PT_REGISTER: + case PT_STACK: + /* We always eliminate except to the stack pointer if there is no + usable frame pointer at hand. */ + return (to != STACK_POINTER_REGNUM + || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM); + } + + gcc_unreachable (); +} + +/* FROM is to be eliminated for TO. Return the offset so that TO+offset + designates the same location as FROM. */ + +HOST_WIDE_INT +alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to) +{ + /* The only possible attempts we ever expect are ARG or FRAME_PTR to + HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide + on the proper computations and will need the register save area size + in most cases. */ + + HOST_WIDE_INT sa_size = alpha_sa_size (); + + /* PT_NULL procedures have no frame of their own and we only allow + elimination to the stack pointer. This is the argument pointer and we + resolve the soft frame pointer to that as well. */ + + if (alpha_procedure_type == PT_NULL) + return 0; + + /* For a PT_STACK procedure the frame layout looks as follows + + -----> decreasing addresses + + < size rounded up to 16 | likewise > + --------------#------------------------------+++--------------+++-------# + incoming args # pretended args | "frame" | regs sa | PV | outgoing args # + --------------#---------------------------------------------------------# + ^ ^ ^ ^ + ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR + + + PT_REGISTER procedures are similar in that they may have a frame of their + own. They have no regs-sa/pv/outgoing-args area. + + We first compute offset to HARD_FRAME_PTR, then add what we need to get + to STACK_PTR if need be. */ + + { + HOST_WIDE_INT offset; + HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0; + + switch (from) + { + case FRAME_POINTER_REGNUM: + offset = ALPHA_ROUND (sa_size + pv_save_size); + break; + case ARG_POINTER_REGNUM: + offset = (ALPHA_ROUND (sa_size + pv_save_size + + get_frame_size () + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size); + break; + default: + gcc_unreachable (); + } + + if (to == STACK_POINTER_REGNUM) + offset += ALPHA_ROUND (crtl->outgoing_args_size); + + return offset; + } +} + +#define COMMON_OBJECT "common_object" + +static tree +common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs ATTRIBUTE_UNUSED) +{ + tree decl = *node; + gcc_assert (DECL_P (decl)); + + DECL_COMMON (decl) = 1; + return NULL_TREE; +} + +static const struct attribute_spec vms_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler }, + { NULL, 0, 0, false, false, false, NULL } +}; + +void +vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + tree attr = DECL_ATTRIBUTES (decl); + fprintf (file, "%s", COMMON_ASM_OP); + assemble_name (file, name); + fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size); + /* ??? Unlike on OSF/1, the alignment factor is not in log units. */ + fprintf (file, ",%u", align / BITS_PER_UNIT); + if (attr) + { + attr = lookup_attribute (COMMON_OBJECT, attr); + if (attr) + fprintf (file, ",%s", + IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr)))); + } + fputc ('\n', file); +} + +#undef COMMON_OBJECT + +#endif + +static int +find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx; +} + +int +alpha_find_lo_sum_using_gp (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0; +} + +static int +alpha_does_function_need_gp (void) +{ + rtx insn; + + /* The GP being variable is an OSF abi thing. */ + if (! TARGET_ABI_OSF) + return 0; + + /* We need the gp to load the address of __mcount. */ + if (TARGET_PROFILING_NEEDS_GP && crtl->profile) + return 1; + + /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */ + if (cfun->is_thunk) + return 1; + + /* The nonlocal receiver pattern assumes that the gp is valid for + the nested function. Reasonable because it's almost always set + correctly already. For the cases where that's wrong, make sure + the nested function loads its gp on entry. */ + if (crtl->has_nonlocal_goto) + return 1; + + /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first. + Even if we are a static function, we still need to do this in case + our address is taken and passed to something like qsort. */ + + push_topmost_sequence (); + insn = get_insns (); + pop_topmost_sequence (); + + for (; insn; insn = NEXT_INSN (insn)) + if (NONDEBUG_INSN_P (insn) + && ! JUMP_TABLE_DATA_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_usegp (insn)) + return 1; + + return 0; +} + + +/* Helper function to set RTX_FRAME_RELATED_P on instructions, including + sequences. */ + +static rtx +set_frame_related_p (void) +{ + rtx seq = get_insns (); + rtx insn; + + end_sequence (); + + if (!seq) + return NULL_RTX; + + if (INSN_P (seq)) + { + insn = seq; + while (insn != NULL_RTX) + { + RTX_FRAME_RELATED_P (insn) = 1; + insn = NEXT_INSN (insn); + } + seq = emit_insn (seq); + } + else + { + seq = emit_insn (seq); + RTX_FRAME_RELATED_P (seq) = 1; + } + return seq; +} + +#define FRP(exp) (start_sequence (), exp, set_frame_related_p ()) + +/* Generates a store with the proper unwind info attached. VALUE is + stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG + contains SP+FRAME_BIAS, and that is the unwind info that should be + generated. If FRAME_REG != VALUE, then VALUE is being stored on + behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */ + +static void +emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias, + HOST_WIDE_INT base_ofs, rtx frame_reg) +{ + rtx addr, mem, insn; + + addr = plus_constant (base_reg, base_ofs); + mem = gen_frame_mem (DImode, addr); + + insn = emit_move_insn (mem, value); + RTX_FRAME_RELATED_P (insn) = 1; + + if (frame_bias || value != frame_reg) + { + if (frame_bias) + { + addr = plus_constant (stack_pointer_rtx, frame_bias + base_ofs); + mem = gen_rtx_MEM (DImode, addr); + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, mem, frame_reg)); + } +} + +static void +emit_frame_store (unsigned int regno, rtx base_reg, + HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs) +{ + rtx reg = gen_rtx_REG (DImode, regno); + emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); +} + +/* Compute the frame size. SIZE is the size of the "naked" frame + and SA_SIZE is the size of the register save area. */ + +static HOST_WIDE_INT +compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size) +{ + if (TARGET_ABI_OPEN_VMS) + return ALPHA_ROUND (sa_size + + (alpha_procedure_type == PT_STACK ? 8 : 0) + + size + + crtl->args.pretend_args_size); + else if (TARGET_ABI_UNICOSMK) + /* We have to allocate space for the DSIB if we generate a frame. */ + return ALPHA_ROUND (sa_size + + (alpha_procedure_type == PT_STACK ? 48 : 0)) + + ALPHA_ROUND (size + + crtl->outgoing_args_size); + else + return ALPHA_ROUND (crtl->outgoing_args_size) + + sa_size + + ALPHA_ROUND (size + + crtl->args.pretend_args_size); +} + +/* Write function prologue. */ + +/* On vms we have two kinds of functions: + + - stack frame (PROC_STACK) + these are 'normal' functions with local vars and which are + calling other functions + - register frame (PROC_REGISTER) + keeps all data in registers, needs no stack + + We must pass this to the assembler so it can generate the + proper pdsc (procedure descriptor) + This is done with the '.pdesc' command. + + On not-vms, we don't really differentiate between the two, as we can + simply allocate stack without saving registers. */ + +void +alpha_expand_prologue (void) +{ + /* Registers to save. */ + unsigned long imask = 0; + unsigned long fmask = 0; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size; + /* Probed stack size; it additionally includes the size of + the "reserve region" if any. */ + HOST_WIDE_INT probed_size; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + rtx sa_reg; + int i; + + sa_size = alpha_sa_size (); + frame_size = compute_frame_size (get_frame_size (), sa_size); + + if (flag_stack_usage) + current_function_static_stack_size = frame_size; + + if (TARGET_ABI_OPEN_VMS) + reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; + else + reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); + + alpha_sa_mask (&imask, &fmask); + + /* Emit an insn to reload GP, if needed. */ + if (TARGET_ABI_OSF) + { + alpha_function_needs_gp = alpha_does_function_need_gp (); + if (alpha_function_needs_gp) + emit_insn (gen_prologue_ldgp ()); + } + + /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert + the call to mcount ourselves, rather than having the linker do it + magically in response to -pg. Since _mcount has special linkage, + don't represent the call as a call. */ + if (TARGET_PROFILING_NEEDS_GP && crtl->profile) + emit_insn (gen_prologue_mcount ()); + + if (TARGET_ABI_UNICOSMK) + unicosmk_gen_dsib (&imask); + + /* Adjust the stack by the frame size. If the frame size is > 4096 + bytes, we need to be sure we probe somewhere in the first and last + 4096 bytes (we can probably get away without the latter test) and + every 8192 bytes in between. If the frame size is > 32768, we + do this in a loop. Otherwise, we generate the explicit probe + instructions. + + Note that we are only allowed to adjust sp once in the prologue. */ + + probed_size = frame_size; + if (flag_stack_check) + probed_size += STACK_CHECK_PROTECT; + + if (probed_size <= 32768) + { + if (probed_size > 4096) + { + int probed; + + for (probed = 4096; probed < probed_size; probed += 8192) + emit_insn (gen_probe_stack (GEN_INT (TARGET_ABI_UNICOSMK + ? -probed + 64 + : -probed))); + + /* We only have to do this probe if we aren't saving registers or + if we are probing beyond the frame because of -fstack-check. */ + if ((sa_size == 0 && probed_size > probed - 4096) + || flag_stack_check) + emit_insn (gen_probe_stack (GEN_INT (-probed_size))); + } + + if (frame_size != 0) + FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (TARGET_ABI_UNICOSMK + ? -frame_size + 64 + : -frame_size)))); + } + else + { + /* Here we generate code to set R22 to SP + 4096 and set R23 to the + number of 8192 byte blocks to probe. We then probe each block + in the loop and then set SP to the proper location. If the + amount remaining is > 4096, we have to do one more probe if we + are not saving any registers or if we are probing beyond the + frame because of -fstack-check. */ + + HOST_WIDE_INT blocks = (probed_size + 4096) / 8192; + HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192; + rtx ptr = gen_rtx_REG (DImode, 22); + rtx count = gen_rtx_REG (DImode, 23); + rtx seq; + + emit_move_insn (count, GEN_INT (blocks)); + emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, + GEN_INT (TARGET_ABI_UNICOSMK ? 4096 - 64 : 4096))); + + /* Because of the difficulty in emitting a new basic block this + late in the compilation, generate the loop as a single insn. */ + emit_insn (gen_prologue_stack_probe_loop (count, ptr)); + + if ((leftover > 4096 && sa_size == 0) || flag_stack_check) + { + rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover)); + MEM_VOLATILE_P (last) = 1; + emit_move_insn (last, const0_rtx); + } + + if (TARGET_ABI_WINDOWS_NT || flag_stack_check) + { + /* For NT stack unwind (done by 'reverse execution'), it's + not OK to take the result of a loop, even though the value + is already in ptr, so we reload it via a single operation + and subtract it to sp. + + Same if -fstack-check is specified, because the probed stack + size is not equal to the frame size. + + Yes, that's correct -- we have to reload the whole constant + into a temporary via ldah+lda then subtract from sp. */ + + HOST_WIDE_INT lo, hi; + lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; + hi = frame_size - lo; + + emit_move_insn (ptr, GEN_INT (hi)); + emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo))); + seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, + ptr)); + } + else + { + seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr, + GEN_INT (-leftover))); + } + + /* This alternative is special, because the DWARF code cannot + possibly intuit through the loop above. So we invent this + note it looks at instead. */ + RTX_FRAME_RELATED_P (seq) = 1; + add_reg_note (seq, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (TARGET_ABI_UNICOSMK + ? -frame_size + 64 + : -frame_size)))); + } + + if (!TARGET_ABI_UNICOSMK) + { + HOST_WIDE_INT sa_bias = 0; + + /* Cope with very large offsets to the register save area. */ + sa_reg = stack_pointer_rtx; + if (reg_offset + sa_size > 0x8000) + { + int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; + rtx sa_bias_rtx; + + if (low + sa_size <= 0x8000) + sa_bias = reg_offset - low, reg_offset = low; + else + sa_bias = reg_offset, reg_offset = 0; + + sa_reg = gen_rtx_REG (DImode, 24); + sa_bias_rtx = GEN_INT (sa_bias); + + if (add_operand (sa_bias_rtx, DImode)) + emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx)); + else + { + emit_move_insn (sa_reg, sa_bias_rtx); + emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg)); + } + } + + /* Save regs in stack order. Beginning with VMS PV. */ + if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) + emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0); + + /* Save register RA next. */ + if (imask & (1UL << REG_RA)) + { + emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset); + imask &= ~(1UL << REG_RA); + reg_offset += 8; + } + + /* Now save any other registers required to be saved. */ + for (i = 0; i < 31; i++) + if (imask & (1UL << i)) + { + emit_frame_store (i, sa_reg, sa_bias, reg_offset); + reg_offset += 8; + } + + for (i = 0; i < 31; i++) + if (fmask & (1UL << i)) + { + emit_frame_store (i+32, sa_reg, sa_bias, reg_offset); + reg_offset += 8; + } + } + else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK) + { + /* The standard frame on the T3E includes space for saving registers. + We just have to use it. We don't have to save the return address and + the old frame pointer here - they are saved in the DSIB. */ + + reg_offset = -56; + for (i = 9; i < 15; i++) + if (imask & (1UL << i)) + { + emit_frame_store (i, hard_frame_pointer_rtx, 0, reg_offset); + reg_offset -= 8; + } + for (i = 2; i < 10; i++) + if (fmask & (1UL << i)) + { + emit_frame_store (i+32, hard_frame_pointer_rtx, 0, reg_offset); + reg_offset -= 8; + } + } + + if (TARGET_ABI_OPEN_VMS) + { + /* Register frame procedures save the fp. */ + if (alpha_procedure_type == PT_REGISTER) + { + rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno), + hard_frame_pointer_rtx); + add_reg_note (insn, REG_CFA_REGISTER, NULL); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV) + emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno), + gen_rtx_REG (DImode, REG_PV))); + + if (alpha_procedure_type != PT_NULL + && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM) + FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); + + /* If we have to allocate space for outgoing args, do it now. */ + if (crtl->outgoing_args_size != 0) + { + rtx seq + = emit_move_insn (stack_pointer_rtx, + plus_constant + (hard_frame_pointer_rtx, + - (ALPHA_ROUND + (crtl->outgoing_args_size)))); + + /* Only set FRAME_RELATED_P on the stack adjustment we just emitted + if ! frame_pointer_needed. Setting the bit will change the CFA + computation rule to use sp again, which would be wrong if we had + frame_pointer_needed, as this means sp might move unpredictably + later on. + + Also, note that + frame_pointer_needed + => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM + and + crtl->outgoing_args_size != 0 + => alpha_procedure_type != PT_NULL, + + so when we are not setting the bit here, we are guaranteed to + have emitted an FRP frame pointer update just before. */ + RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed; + } + } + else if (!TARGET_ABI_UNICOSMK) + { + /* If we need a frame pointer, set it from the stack pointer. */ + if (frame_pointer_needed) + { + if (TARGET_CAN_FAULT_IN_PROLOGUE) + FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); + else + /* This must always be the last instruction in the + prologue, thus we emit a special move + clobber. */ + FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx, + stack_pointer_rtx, sa_reg))); + } + } + + /* The ABIs for VMS and OSF/1 say that while we can schedule insns into + the prologue, for exception handling reasons, we cannot do this for + any insn that might fault. We could prevent this for mems with a + (clobber:BLK (scratch)), but this doesn't work for fp insns. So we + have to prevent all such scheduling with a blockage. + + Linux, on the other hand, never bothered to implement OSF/1's + exception handling, and so doesn't care about such things. Anyone + planning to use dwarf2 frame-unwind info can also omit the blockage. */ + + if (! TARGET_CAN_FAULT_IN_PROLOGUE) + emit_insn (gen_blockage ()); +} + +/* Count the number of .file directives, so that .loc is up to date. */ +int num_source_filenames = 0; + +/* Output the textual info surrounding the prologue. */ + +void +alpha_start_function (FILE *file, const char *fnname, + tree decl ATTRIBUTE_UNUSED) +{ + unsigned long imask = 0; + unsigned long fmask = 0; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size; + /* Complete stack size needed. */ + unsigned HOST_WIDE_INT frame_size; + /* The maximum debuggable frame size (512 Kbytes using Tru64 as). */ + unsigned HOST_WIDE_INT max_frame_size = TARGET_ABI_OSF && !TARGET_GAS + ? 524288 + : 1UL << 31; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + char *entry_label = (char *) alloca (strlen (fnname) + 6); + char *tramp_label = (char *) alloca (strlen (fnname) + 6); + int i; + + /* Don't emit an extern directive for functions defined in the same file. */ + if (TARGET_ABI_UNICOSMK) + { + tree name_tree; + name_tree = get_identifier (fnname); + TREE_ASM_WRITTEN (name_tree) = 1; + } + +#if TARGET_ABI_OPEN_VMS + if (vms_debug_main + && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0) + { + targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER); + ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname); + switch_to_section (text_section); + vms_debug_main = NULL; + } +#endif + + alpha_fnname = fnname; + sa_size = alpha_sa_size (); + frame_size = compute_frame_size (get_frame_size (), sa_size); + + if (TARGET_ABI_OPEN_VMS) + reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; + else + reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); + + alpha_sa_mask (&imask, &fmask); + + /* Ecoff can handle multiple .file directives, so put out file and lineno. + We have to do that before the .ent directive as we cannot switch + files within procedures with native ecoff because line numbers are + linked to procedure descriptors. + Outputting the lineno helps debugging of one line functions as they + would otherwise get no line number at all. Please note that we would + like to put out last_linenum from final.c, but it is not accessible. */ + + if (write_symbols == SDB_DEBUG) + { +#ifdef ASM_OUTPUT_SOURCE_FILENAME + ASM_OUTPUT_SOURCE_FILENAME (file, + DECL_SOURCE_FILE (current_function_decl)); +#endif +#ifdef SDB_OUTPUT_SOURCE_LINE + if (debug_info_level != DINFO_LEVEL_TERSE) + SDB_OUTPUT_SOURCE_LINE (file, + DECL_SOURCE_LINE (current_function_decl)); +#endif + } + + /* Issue function start and label. */ + if (TARGET_ABI_OPEN_VMS + || (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive)) + { + fputs ("\t.ent ", file); + assemble_name (file, fnname); + putc ('\n', file); + + /* If the function needs GP, we'll write the "..ng" label there. + Otherwise, do it here. */ + if (TARGET_ABI_OSF + && ! alpha_function_needs_gp + && ! cfun->is_thunk) + { + putc ('$', file); + assemble_name (file, fnname); + fputs ("..ng:\n", file); + } + } + /* Nested functions on VMS that are potentially called via trampoline + get a special transfer entry point that loads the called functions + procedure descriptor and static chain. */ + if (TARGET_ABI_OPEN_VMS + && !TREE_PUBLIC (decl) + && DECL_CONTEXT (decl) + && !TYPE_P (DECL_CONTEXT (decl))) + { + strcpy (tramp_label, fnname); + strcat (tramp_label, "..tr"); + ASM_OUTPUT_LABEL (file, tramp_label); + fprintf (file, "\tldq $1,24($27)\n"); + fprintf (file, "\tldq $27,16($27)\n"); + } + + strcpy (entry_label, fnname); + if (TARGET_ABI_OPEN_VMS) + strcat (entry_label, "..en"); + + /* For public functions, the label must be globalized by appending an + additional colon. */ + if (TARGET_ABI_UNICOSMK && TREE_PUBLIC (decl)) + strcat (entry_label, ":"); + + ASM_OUTPUT_LABEL (file, entry_label); + inside_function = TRUE; + + if (TARGET_ABI_OPEN_VMS) + fprintf (file, "\t.base $%d\n", vms_base_regno); + + if (!TARGET_ABI_OPEN_VMS && !TARGET_ABI_UNICOSMK && TARGET_IEEE_CONFORMANT + && !flag_inhibit_size_directive) + { + /* Set flags in procedure descriptor to request IEEE-conformant + math-library routines. The value we set it to is PDSC_EXC_IEEE + (/usr/include/pdsc.h). */ + fputs ("\t.eflag 48\n", file); + } + + /* Set up offsets to alpha virtual arg/local debugging pointer. */ + alpha_auto_offset = -frame_size + crtl->args.pretend_args_size; + alpha_arg_offset = -frame_size + 48; + + /* Describe our frame. If the frame size is larger than an integer, + print it as zero to avoid an assembler error. We won't be + properly describing such a frame, but that's the best we can do. */ + if (TARGET_ABI_UNICOSMK) + ; + else if (TARGET_ABI_OPEN_VMS) + fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26," + HOST_WIDE_INT_PRINT_DEC "\n", + vms_unwind_regno, + frame_size >= (1UL << 31) ? 0 : frame_size, + reg_offset); + else if (!flag_inhibit_size_directive) + fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n", + (frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM), + frame_size >= max_frame_size ? 0 : frame_size, + crtl->args.pretend_args_size); + + /* Describe which registers were spilled. */ + if (TARGET_ABI_UNICOSMK) + ; + else if (TARGET_ABI_OPEN_VMS) + { + if (imask) + /* ??? Does VMS care if mask contains ra? The old code didn't + set it, so I don't here. */ + fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA)); + if (fmask) + fprintf (file, "\t.fmask 0x%lx,0\n", fmask); + if (alpha_procedure_type == PT_REGISTER) + fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno); + } + else if (!flag_inhibit_size_directive) + { + if (imask) + { + fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask, + frame_size >= max_frame_size ? 0 : reg_offset - frame_size); + + for (i = 0; i < 32; ++i) + if (imask & (1UL << i)) + reg_offset += 8; + } + + if (fmask) + fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask, + frame_size >= max_frame_size ? 0 : reg_offset - frame_size); + } + +#if TARGET_ABI_OPEN_VMS + /* If a user condition handler has been installed at some point, emit + the procedure descriptor bits to point the Condition Handling Facility + at the indirection wrapper, and state the fp offset at which the user + handler may be found. */ + if (cfun->machine->uses_condition_handler) + { + fprintf (file, "\t.handler __gcc_shell_handler\n"); + fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET); + } + + /* Ifdef'ed cause link_section are only available then. */ + switch_to_section (readonly_data_section); + fprintf (file, "\t.align 3\n"); + assemble_name (file, fnname); fputs ("..na:\n", file); + fputs ("\t.ascii \"", file); + assemble_name (file, fnname); + fputs ("\\0\"\n", file); + alpha_need_linkage (fnname, 1); + switch_to_section (text_section); +#endif +} + +/* Emit the .prologue note at the scheduled end of the prologue. */ + +static void +alpha_output_function_end_prologue (FILE *file) +{ + if (TARGET_ABI_UNICOSMK) + ; + else if (TARGET_ABI_OPEN_VMS) + fputs ("\t.prologue\n", file); + else if (TARGET_ABI_WINDOWS_NT) + fputs ("\t.prologue 0\n", file); + else if (!flag_inhibit_size_directive) + fprintf (file, "\t.prologue %d\n", + alpha_function_needs_gp || cfun->is_thunk); +} + +/* Write function epilogue. */ + +void +alpha_expand_epilogue (void) +{ + /* Registers to save. */ + unsigned long imask = 0; + unsigned long fmask = 0; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + int fp_is_frame_pointer, fp_offset; + rtx sa_reg, sa_reg_exp = NULL; + rtx sp_adj1, sp_adj2, mem, reg, insn; + rtx eh_ofs; + rtx cfa_restores = NULL_RTX; + int i; + + sa_size = alpha_sa_size (); + frame_size = compute_frame_size (get_frame_size (), sa_size); + + if (TARGET_ABI_OPEN_VMS) + { + if (alpha_procedure_type == PT_STACK) + reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; + else + reg_offset = 0; + } + else + reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); + + alpha_sa_mask (&imask, &fmask); + + fp_is_frame_pointer + = ((TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) + || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed)); + fp_offset = 0; + sa_reg = stack_pointer_rtx; + + if (crtl->calls_eh_return) + eh_ofs = EH_RETURN_STACKADJ_RTX; + else + eh_ofs = NULL_RTX; + + if (!TARGET_ABI_UNICOSMK && sa_size) + { + /* If we have a frame pointer, restore SP from it. */ + if ((TARGET_ABI_OPEN_VMS + && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM) + || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed)) + emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); + + /* Cope with very large offsets to the register save area. */ + if (reg_offset + sa_size > 0x8000) + { + int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT bias; + + if (low + sa_size <= 0x8000) + bias = reg_offset - low, reg_offset = low; + else + bias = reg_offset, reg_offset = 0; + + sa_reg = gen_rtx_REG (DImode, 22); + sa_reg_exp = plus_constant (stack_pointer_rtx, bias); + + emit_move_insn (sa_reg, sa_reg_exp); + } + + /* Restore registers in order, excepting a true frame pointer. */ + + mem = gen_frame_mem (DImode, plus_constant (sa_reg, reg_offset)); + reg = gen_rtx_REG (DImode, REG_RA); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + + reg_offset += 8; + imask &= ~(1UL << REG_RA); + + for (i = 0; i < 31; ++i) + if (imask & (1UL << i)) + { + if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer) + fp_offset = reg_offset; + else + { + mem = gen_frame_mem (DImode, + plus_constant (sa_reg, reg_offset)); + reg = gen_rtx_REG (DImode, i); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, + cfa_restores); + } + reg_offset += 8; + } + + for (i = 0; i < 31; ++i) + if (fmask & (1UL << i)) + { + mem = gen_frame_mem (DFmode, plus_constant (sa_reg, reg_offset)); + reg = gen_rtx_REG (DFmode, i+32); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + reg_offset += 8; + } + } + else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK) + { + /* Restore callee-saved general-purpose registers. */ + + reg_offset = -56; + + for (i = 9; i < 15; i++) + if (imask & (1UL << i)) + { + mem = gen_frame_mem (DImode, + plus_constant (hard_frame_pointer_rtx, + reg_offset)); + reg = gen_rtx_REG (DImode, i); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + reg_offset -= 8; + } + + for (i = 2; i < 10; i++) + if (fmask & (1UL << i)) + { + mem = gen_frame_mem (DFmode, + plus_constant (hard_frame_pointer_rtx, + reg_offset)); + reg = gen_rtx_REG (DFmode, i+32); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + reg_offset -= 8; + } + + /* Restore the return address from the DSIB. */ + mem = gen_frame_mem (DImode, plus_constant (hard_frame_pointer_rtx, -8)); + reg = gen_rtx_REG (DImode, REG_RA); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + } + + if (frame_size || eh_ofs) + { + sp_adj1 = stack_pointer_rtx; + + if (eh_ofs) + { + sp_adj1 = gen_rtx_REG (DImode, 23); + emit_move_insn (sp_adj1, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs)); + } + + /* If the stack size is large, begin computation into a temporary + register so as not to interfere with a potential fp restore, + which must be consecutive with an SP restore. */ + if (frame_size < 32768 + && ! (TARGET_ABI_UNICOSMK && cfun->calls_alloca)) + sp_adj2 = GEN_INT (frame_size); + else if (TARGET_ABI_UNICOSMK) + { + sp_adj1 = gen_rtx_REG (DImode, 23); + emit_move_insn (sp_adj1, hard_frame_pointer_rtx); + sp_adj2 = const0_rtx; + } + else if (frame_size < 0x40007fffL) + { + int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; + + sp_adj2 = plus_constant (sp_adj1, frame_size - low); + if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2)) + sp_adj1 = sa_reg; + else + { + sp_adj1 = gen_rtx_REG (DImode, 23); + emit_move_insn (sp_adj1, sp_adj2); + } + sp_adj2 = GEN_INT (low); + } + else + { + rtx tmp = gen_rtx_REG (DImode, 23); + sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false); + if (!sp_adj2) + { + /* We can't drop new things to memory this late, afaik, + so build it up by pieces. */ + sp_adj2 = alpha_emit_set_long_const (tmp, frame_size, + -(frame_size < 0)); + gcc_assert (sp_adj2); + } + } + + /* From now on, things must be in order. So emit blockages. */ + + /* Restore the frame pointer. */ + if (TARGET_ABI_UNICOSMK) + { + emit_insn (gen_blockage ()); + mem = gen_frame_mem (DImode, + plus_constant (hard_frame_pointer_rtx, -16)); + emit_move_insn (hard_frame_pointer_rtx, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + hard_frame_pointer_rtx, cfa_restores); + } + else if (fp_is_frame_pointer) + { + emit_insn (gen_blockage ()); + mem = gen_frame_mem (DImode, plus_constant (sa_reg, fp_offset)); + emit_move_insn (hard_frame_pointer_rtx, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + hard_frame_pointer_rtx, cfa_restores); + } + else if (TARGET_ABI_OPEN_VMS) + { + emit_insn (gen_blockage ()); + emit_move_insn (hard_frame_pointer_rtx, + gen_rtx_REG (DImode, vms_save_fp_regno)); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + hard_frame_pointer_rtx, cfa_restores); + } + + /* Restore the stack pointer. */ + emit_insn (gen_blockage ()); + if (sp_adj2 == const0_rtx) + insn = emit_move_insn (stack_pointer_rtx, sp_adj1); + else + insn = emit_move_insn (stack_pointer_rtx, + gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)); + REG_NOTES (insn) = cfa_restores; + add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + gcc_assert (cfa_restores == NULL); + + if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER) + { + emit_insn (gen_blockage ()); + insn = emit_move_insn (hard_frame_pointer_rtx, + gen_rtx_REG (DImode, vms_save_fp_regno)); + add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (TARGET_ABI_UNICOSMK && alpha_procedure_type != PT_STACK) + { + /* Decrement the frame pointer if the function does not have a + frame. */ + emit_insn (gen_blockage ()); + emit_insn (gen_adddi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, constm1_rtx)); + } + } +} + +/* Output the rest of the textual info surrounding the epilogue. */ + +void +alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED) +{ + rtx insn; + + /* We output a nop after noreturn calls at the very end of the function to + ensure that the return address always remains in the caller's code range, + as not doing so might confuse unwinding engines. */ + insn = get_last_insn (); + if (!INSN_P (insn)) + insn = prev_active_insn (insn); + if (insn && CALL_P (insn)) + output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL); + +#if TARGET_ABI_OPEN_VMS + alpha_write_linkage (file, fnname, decl); +#endif + + /* End the function. */ + if (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive) + { + fputs ("\t.end ", file); + assemble_name (file, fnname); + putc ('\n', file); + } + inside_function = FALSE; + + /* Output jump tables and the static subroutine information block. */ + if (TARGET_ABI_UNICOSMK) + { + unicosmk_output_ssib (file, fnname); + unicosmk_output_deferred_case_vectors (file); + } +} + +#if TARGET_ABI_OPEN_VMS +void avms_asm_output_external (FILE *file, tree decl ATTRIBUTE_UNUSED, const char *name) +{ +#ifdef DO_CRTL_NAMES + DO_CRTL_NAMES; +#endif +} +#endif + +#if TARGET_ABI_OSF +/* Emit a tail call to FUNCTION after adjusting THIS by DELTA. + + In order to avoid the hordes of differences between generated code + with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating + lots of code loading up large constants, generate rtl and emit it + instead of going straight to text. + + Not sure why this idea hasn't been explored before... */ + +static void +alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + HOST_WIDE_INT hi, lo; + rtx this_rtx, insn, funexp; + + /* We always require a valid GP. */ + emit_insn (gen_prologue_ldgp ()); + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Find the "this" pointer. If the function returns a structure, + the structure return pointer is in $16. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, 17); + else + this_rtx = gen_rtx_REG (Pmode, 16); + + /* Add DELTA. When possible we use ldah+lda. Otherwise load the + entire constant for the add. */ + lo = ((delta & 0xffff) ^ 0x8000) - 0x8000; + hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (hi + lo == delta) + { + if (hi) + emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi))); + if (lo) + emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo))); + } + else + { + rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), + delta, -(delta < 0)); + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Add a delta stored in the vtable at VCALL_OFFSET. */ + if (vcall_offset) + { + rtx tmp, tmp2; + + tmp = gen_rtx_REG (Pmode, 0); + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); + + lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000; + hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (hi + lo == vcall_offset) + { + if (hi) + emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi))); + } + else + { + tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1), + vcall_offset, -(vcall_offset < 0)); + emit_insn (gen_adddi3 (tmp, tmp, tmp2)); + lo = 0; + } + if (lo) + tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo)); + else + tmp2 = tmp; + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2)); + + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Generate a tail call to the target function. */ + if (! TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + + /* Run just enough of rest_of_compilation to get the insns emitted. + There's not really enough bulk here to make other passes such as + instruction scheduling worth while. Note that use_thunk calls + assemble_start_function and assemble_end_function. */ + insn = get_insns (); + insn_locators_alloc (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); +} +#endif /* TARGET_ABI_OSF */ + +/* Debugging support. */ + +#include "gstab.h" + +/* Count the number of sdb related labels are generated (to find block + start and end boundaries). */ + +int sdb_label_count = 0; + +/* Name of the file containing the current function. */ + +static const char *current_function_file = ""; + +/* Offsets to alpha virtual arg/local debugging pointers. */ + +long alpha_arg_offset; +long alpha_auto_offset; + +/* Emit a new filename to a stream. */ + +void +alpha_output_filename (FILE *stream, const char *name) +{ + static int first_time = TRUE; + + if (first_time) + { + first_time = FALSE; + ++num_source_filenames; + current_function_file = name; + fprintf (stream, "\t.file\t%d ", num_source_filenames); + output_quoted_string (stream, name); + fprintf (stream, "\n"); + if (!TARGET_GAS && write_symbols == DBX_DEBUG) + fprintf (stream, "\t#@stabs\n"); + } + + else if (write_symbols == DBX_DEBUG) + /* dbxout.c will emit an appropriate .stabs directive. */ + return; + + else if (name != current_function_file + && strcmp (name, current_function_file) != 0) + { + if (inside_function && ! TARGET_GAS) + fprintf (stream, "\t#.file\t%d ", num_source_filenames); + else + { + ++num_source_filenames; + current_function_file = name; + fprintf (stream, "\t.file\t%d ", num_source_filenames); + } + + output_quoted_string (stream, name); + fprintf (stream, "\n"); + } +} + +/* Structure to show the current status of registers and memory. */ + +struct shadow_summary +{ + struct { + unsigned int i : 31; /* Mask of int regs */ + unsigned int fp : 31; /* Mask of fp regs */ + unsigned int mem : 1; /* mem == imem | fpmem */ + } used, defd; +}; + +/* Summary the effects of expression X on the machine. Update SUM, a pointer + to the summary structure. SET is nonzero if the insn is setting the + object, otherwise zero. */ + +static void +summarize_insn (rtx x, struct shadow_summary *sum, int set) +{ + const char *format_ptr; + int i, j; + + if (x == 0) + return; + + switch (GET_CODE (x)) + { + /* ??? Note that this case would be incorrect if the Alpha had a + ZERO_EXTRACT in SET_DEST. */ + case SET: + summarize_insn (SET_SRC (x), sum, 0); + summarize_insn (SET_DEST (x), sum, 1); + break; + + case CLOBBER: + summarize_insn (XEXP (x, 0), sum, 1); + break; + + case USE: + summarize_insn (XEXP (x, 0), sum, 0); + break; + + case ASM_OPERANDS: + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) + summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0); + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + summarize_insn (XVECEXP (x, 0, i), sum, 0); + break; + + case SUBREG: + summarize_insn (SUBREG_REG (x), sum, 0); + break; + + case REG: + { + int regno = REGNO (x); + unsigned long mask = ((unsigned long) 1) << (regno % 32); + + if (regno == 31 || regno == 63) + break; + + if (set) + { + if (regno < 32) + sum->defd.i |= mask; + else + sum->defd.fp |= mask; + } + else + { + if (regno < 32) + sum->used.i |= mask; + else + sum->used.fp |= mask; + } + } + break; + + case MEM: + if (set) + sum->defd.mem = 1; + else + sum->used.mem = 1; + + /* Find the regs used in memory address computation: */ + summarize_insn (XEXP (x, 0), sum, 0); + break; + + case CONST_INT: case CONST_DOUBLE: + case SYMBOL_REF: case LABEL_REF: case CONST: + case SCRATCH: case ASM_INPUT: + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: case PLUS: case MINUS: case MULT: case DIV: + case MOD: case UDIV: case UMOD: case AND: case IOR: + case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: + case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: + case NE: case EQ: case GE: case GT: case LE: + case LT: case GEU: case GTU: case LEU: case LTU: + summarize_insn (XEXP (x, 0), sum, 0); + summarize_insn (XEXP (x, 1), sum, 0); + break; + + case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: + case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: + case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: + case SQRT: case FFS: + summarize_insn (XEXP (x, 0), sum, 0); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case 'e': + summarize_insn (XEXP (x, i), sum, 0); + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + summarize_insn (XVECEXP (x, i, j), sum, 0); + break; + + case 'i': + break; + + default: + gcc_unreachable (); + } + } +} + +/* Ensure a sufficient number of `trapb' insns are in the code when + the user requests code with a trap precision of functions or + instructions. + + In naive mode, when the user requests a trap-precision of + "instruction", a trapb is needed after every instruction that may + generate a trap. This ensures that the code is resumption safe but + it is also slow. + + When optimizations are turned on, we delay issuing a trapb as long + as possible. In this context, a trap shadow is the sequence of + instructions that starts with a (potentially) trap generating + instruction and extends to the next trapb or call_pal instruction + (but GCC never generates call_pal by itself). We can delay (and + therefore sometimes omit) a trapb subject to the following + conditions: + + (a) On entry to the trap shadow, if any Alpha register or memory + location contains a value that is used as an operand value by some + instruction in the trap shadow (live on entry), then no instruction + in the trap shadow may modify the register or memory location. + + (b) Within the trap shadow, the computation of the base register + for a memory load or store instruction may not involve using the + result of an instruction that might generate an UNPREDICTABLE + result. + + (c) Within the trap shadow, no register may be used more than once + as a destination register. (This is to make life easier for the + trap-handler.) + + (d) The trap shadow may not include any branch instructions. */ + +static void +alpha_handle_trap_shadows (void) +{ + struct shadow_summary shadow; + int trap_pending, exception_nesting; + rtx i, n; + + trap_pending = 0; + exception_nesting = 0; + shadow.used.i = 0; + shadow.used.fp = 0; + shadow.used.mem = 0; + shadow.defd = shadow.used; + + for (i = get_insns (); i ; i = NEXT_INSN (i)) + { + if (NOTE_P (i)) + { + switch (NOTE_KIND (i)) + { + case NOTE_INSN_EH_REGION_BEG: + exception_nesting++; + if (trap_pending) + goto close_shadow; + break; + + case NOTE_INSN_EH_REGION_END: + exception_nesting--; + if (trap_pending) + goto close_shadow; + break; + + case NOTE_INSN_EPILOGUE_BEG: + if (trap_pending && alpha_tp >= ALPHA_TP_FUNC) + goto close_shadow; + break; + } + } + else if (trap_pending) + { + if (alpha_tp == ALPHA_TP_FUNC) + { + if (JUMP_P (i) + && GET_CODE (PATTERN (i)) == RETURN) + goto close_shadow; + } + else if (alpha_tp == ALPHA_TP_INSN) + { + if (optimize > 0) + { + struct shadow_summary sum; + + sum.used.i = 0; + sum.used.fp = 0; + sum.used.mem = 0; + sum.defd = sum.used; + + switch (GET_CODE (i)) + { + case INSN: + /* Annoyingly, get_attr_trap will die on these. */ + if (GET_CODE (PATTERN (i)) == USE + || GET_CODE (PATTERN (i)) == CLOBBER) + break; + + summarize_insn (PATTERN (i), &sum, 0); + + if ((sum.defd.i & shadow.defd.i) + || (sum.defd.fp & shadow.defd.fp)) + { + /* (c) would be violated */ + goto close_shadow; + } + + /* Combine shadow with summary of current insn: */ + shadow.used.i |= sum.used.i; + shadow.used.fp |= sum.used.fp; + shadow.used.mem |= sum.used.mem; + shadow.defd.i |= sum.defd.i; + shadow.defd.fp |= sum.defd.fp; + shadow.defd.mem |= sum.defd.mem; + + if ((sum.defd.i & shadow.used.i) + || (sum.defd.fp & shadow.used.fp) + || (sum.defd.mem & shadow.used.mem)) + { + /* (a) would be violated (also takes care of (b)) */ + gcc_assert (get_attr_trap (i) != TRAP_YES + || (!(sum.defd.i & sum.used.i) + && !(sum.defd.fp & sum.used.fp))); + + goto close_shadow; + } + break; + + case JUMP_INSN: + case CALL_INSN: + case CODE_LABEL: + goto close_shadow; + + default: + gcc_unreachable (); + } + } + else + { + close_shadow: + n = emit_insn_before (gen_trapb (), i); + PUT_MODE (n, TImode); + PUT_MODE (i, TImode); + trap_pending = 0; + shadow.used.i = 0; + shadow.used.fp = 0; + shadow.used.mem = 0; + shadow.defd = shadow.used; + } + } + } + + if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC) + && NONJUMP_INSN_P (i) + && GET_CODE (PATTERN (i)) != USE + && GET_CODE (PATTERN (i)) != CLOBBER + && get_attr_trap (i) == TRAP_YES) + { + if (optimize && !trap_pending) + summarize_insn (PATTERN (i), &shadow, 0); + trap_pending = 1; + } + } +} + +/* Alpha can only issue instruction groups simultaneously if they are + suitably aligned. This is very processor-specific. */ +/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe + that are marked "fake". These instructions do not exist on that target, + but it is possible to see these insns with deranged combinations of + command-line options, such as "-mtune=ev4 -mmax". Instead of aborting, + choose a result at random. */ + +enum alphaev4_pipe { + EV4_STOP = 0, + EV4_IB0 = 1, + EV4_IB1 = 2, + EV4_IBX = 4 +}; + +enum alphaev5_pipe { + EV5_STOP = 0, + EV5_NONE = 1, + EV5_E01 = 2, + EV5_E0 = 4, + EV5_E1 = 8, + EV5_FAM = 16, + EV5_FA = 32, + EV5_FM = 64 +}; + +static enum alphaev4_pipe +alphaev4_insn_pipe (rtx insn) +{ + if (recog_memoized (insn) < 0) + return EV4_STOP; + if (get_attr_length (insn) != 4) + return EV4_STOP; + + switch (get_attr_type (insn)) + { + case TYPE_ILD: + case TYPE_LDSYM: + case TYPE_FLD: + case TYPE_LD_L: + return EV4_IBX; + + case TYPE_IADD: + case TYPE_ILOG: + case TYPE_ICMOV: + case TYPE_ICMP: + case TYPE_FST: + case TYPE_SHIFT: + case TYPE_IMUL: + case TYPE_FBR: + case TYPE_MVI: /* fake */ + return EV4_IB0; + + case TYPE_IST: + case TYPE_MISC: + case TYPE_IBR: + case TYPE_JSR: + case TYPE_CALLPAL: + case TYPE_FCPYS: + case TYPE_FCMOV: + case TYPE_FADD: + case TYPE_FDIV: + case TYPE_FMUL: + case TYPE_ST_C: + case TYPE_MB: + case TYPE_FSQRT: /* fake */ + case TYPE_FTOI: /* fake */ + case TYPE_ITOF: /* fake */ + return EV4_IB1; + + default: + gcc_unreachable (); + } +} + +static enum alphaev5_pipe +alphaev5_insn_pipe (rtx insn) +{ + if (recog_memoized (insn) < 0) + return EV5_STOP; + if (get_attr_length (insn) != 4) + return EV5_STOP; + + switch (get_attr_type (insn)) + { + case TYPE_ILD: + case TYPE_FLD: + case TYPE_LDSYM: + case TYPE_IADD: + case TYPE_ILOG: + case TYPE_ICMOV: + case TYPE_ICMP: + return EV5_E01; + + case TYPE_IST: + case TYPE_FST: + case TYPE_SHIFT: + case TYPE_IMUL: + case TYPE_MISC: + case TYPE_MVI: + case TYPE_LD_L: + case TYPE_ST_C: + case TYPE_MB: + case TYPE_FTOI: /* fake */ + case TYPE_ITOF: /* fake */ + return EV5_E0; + + case TYPE_IBR: + case TYPE_JSR: + case TYPE_CALLPAL: + return EV5_E1; + + case TYPE_FCPYS: + return EV5_FAM; + + case TYPE_FBR: + case TYPE_FCMOV: + case TYPE_FADD: + case TYPE_FDIV: + case TYPE_FSQRT: /* fake */ + return EV5_FA; + + case TYPE_FMUL: + return EV5_FM; + + default: + gcc_unreachable (); + } +} + +/* IN_USE is a mask of the slots currently filled within the insn group. + The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then + the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1. + + LEN is, of course, the length of the group in bytes. */ + +static rtx +alphaev4_next_group (rtx insn, int *pin_use, int *plen) +{ + int len, in_use; + + len = in_use = 0; + + if (! INSN_P (insn) + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE) + goto next_and_done; + + while (1) + { + enum alphaev4_pipe pipe; + + pipe = alphaev4_insn_pipe (insn); + switch (pipe) + { + case EV4_STOP: + /* Force complex instructions to start new groups. */ + if (in_use) + goto done; + + /* If this is a completely unrecognized insn, it's an asm. + We don't know how long it is, so record length as -1 to + signal a needed realignment. */ + if (recog_memoized (insn) < 0) + len = -1; + else + len = get_attr_length (insn); + goto next_and_done; + + case EV4_IBX: + if (in_use & EV4_IB0) + { + if (in_use & EV4_IB1) + goto done; + in_use |= EV4_IB1; + } + else + in_use |= EV4_IB0 | EV4_IBX; + break; + + case EV4_IB0: + if (in_use & EV4_IB0) + { + if (!(in_use & EV4_IBX) || (in_use & EV4_IB1)) + goto done; + in_use |= EV4_IB1; + } + in_use |= EV4_IB0; + break; + + case EV4_IB1: + if (in_use & EV4_IB1) + goto done; + in_use |= EV4_IB1; + break; + + default: + gcc_unreachable (); + } + len += 4; + + /* Haifa doesn't do well scheduling branches. */ + if (JUMP_P (insn)) + goto next_and_done; + + next: + insn = next_nonnote_insn (insn); + + if (!insn || ! INSN_P (insn)) + goto done; + + /* Let Haifa tell us where it thinks insn group boundaries are. */ + if (GET_MODE (insn) == TImode) + goto done; + + if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) + goto next; + } + + next_and_done: + insn = next_nonnote_insn (insn); + + done: + *plen = len; + *pin_use = in_use; + return insn; +} + +/* IN_USE is a mask of the slots currently filled within the insn group. + The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then + the insn in EV5_E0 can be swapped by the hardware into EV5_E1. + + LEN is, of course, the length of the group in bytes. */ + +static rtx +alphaev5_next_group (rtx insn, int *pin_use, int *plen) +{ + int len, in_use; + + len = in_use = 0; + + if (! INSN_P (insn) + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE) + goto next_and_done; + + while (1) + { + enum alphaev5_pipe pipe; + + pipe = alphaev5_insn_pipe (insn); + switch (pipe) + { + case EV5_STOP: + /* Force complex instructions to start new groups. */ + if (in_use) + goto done; + + /* If this is a completely unrecognized insn, it's an asm. + We don't know how long it is, so record length as -1 to + signal a needed realignment. */ + if (recog_memoized (insn) < 0) + len = -1; + else + len = get_attr_length (insn); + goto next_and_done; + + /* ??? Most of the places below, we would like to assert never + happen, as it would indicate an error either in Haifa, or + in the scheduling description. Unfortunately, Haifa never + schedules the last instruction of the BB, so we don't have + an accurate TI bit to go off. */ + case EV5_E01: + if (in_use & EV5_E0) + { + if (in_use & EV5_E1) + goto done; + in_use |= EV5_E1; + } + else + in_use |= EV5_E0 | EV5_E01; + break; + + case EV5_E0: + if (in_use & EV5_E0) + { + if (!(in_use & EV5_E01) || (in_use & EV5_E1)) + goto done; + in_use |= EV5_E1; + } + in_use |= EV5_E0; + break; + + case EV5_E1: + if (in_use & EV5_E1) + goto done; + in_use |= EV5_E1; + break; + + case EV5_FAM: + if (in_use & EV5_FA) + { + if (in_use & EV5_FM) + goto done; + in_use |= EV5_FM; + } + else + in_use |= EV5_FA | EV5_FAM; + break; + + case EV5_FA: + if (in_use & EV5_FA) + goto done; + in_use |= EV5_FA; + break; + + case EV5_FM: + if (in_use & EV5_FM) + goto done; + in_use |= EV5_FM; + break; + + case EV5_NONE: + break; + + default: + gcc_unreachable (); + } + len += 4; + + /* Haifa doesn't do well scheduling branches. */ + /* ??? If this is predicted not-taken, slotting continues, except + that no more IBR, FBR, or JSR insns may be slotted. */ + if (JUMP_P (insn)) + goto next_and_done; + + next: + insn = next_nonnote_insn (insn); + + if (!insn || ! INSN_P (insn)) + goto done; + + /* Let Haifa tell us where it thinks insn group boundaries are. */ + if (GET_MODE (insn) == TImode) + goto done; + + if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) + goto next; + } + + next_and_done: + insn = next_nonnote_insn (insn); + + done: + *plen = len; + *pin_use = in_use; + return insn; +} + +static rtx +alphaev4_next_nop (int *pin_use) +{ + int in_use = *pin_use; + rtx nop; + + if (!(in_use & EV4_IB0)) + { + in_use |= EV4_IB0; + nop = gen_nop (); + } + else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX) + { + in_use |= EV4_IB1; + nop = gen_nop (); + } + else if (TARGET_FP && !(in_use & EV4_IB1)) + { + in_use |= EV4_IB1; + nop = gen_fnop (); + } + else + nop = gen_unop (); + + *pin_use = in_use; + return nop; +} + +static rtx +alphaev5_next_nop (int *pin_use) +{ + int in_use = *pin_use; + rtx nop; + + if (!(in_use & EV5_E1)) + { + in_use |= EV5_E1; + nop = gen_nop (); + } + else if (TARGET_FP && !(in_use & EV5_FA)) + { + in_use |= EV5_FA; + nop = gen_fnop (); + } + else if (TARGET_FP && !(in_use & EV5_FM)) + { + in_use |= EV5_FM; + nop = gen_fnop (); + } + else + nop = gen_unop (); + + *pin_use = in_use; + return nop; +} + +/* The instruction group alignment main loop. */ + +static void +alpha_align_insns (unsigned int max_align, + rtx (*next_group) (rtx, int *, int *), + rtx (*next_nop) (int *)) +{ + /* ALIGN is the known alignment for the insn group. */ + unsigned int align; + /* OFS is the offset of the current insn in the insn group. */ + int ofs; + int prev_in_use, in_use, len, ldgp; + rtx i, next; + + /* Let shorten branches care for assigning alignments to code labels. */ + shorten_branches (get_insns ()); + + if (align_functions < 4) + align = 4; + else if ((unsigned int) align_functions < max_align) + align = align_functions; + else + align = max_align; + + ofs = prev_in_use = 0; + i = get_insns (); + if (NOTE_P (i)) + i = next_nonnote_insn (i); + + ldgp = alpha_function_needs_gp ? 8 : 0; + + while (i) + { + next = (*next_group) (i, &in_use, &len); + + /* When we see a label, resync alignment etc. */ + if (LABEL_P (i)) + { + unsigned int new_align = 1 << label_to_alignment (i); + + if (new_align >= align) + { + align = new_align < max_align ? new_align : max_align; + ofs = 0; + } + + else if (ofs & (new_align-1)) + ofs = (ofs | (new_align-1)) + 1; + gcc_assert (!len); + } + + /* Handle complex instructions special. */ + else if (in_use == 0) + { + /* Asms will have length < 0. This is a signal that we have + lost alignment knowledge. Assume, however, that the asm + will not mis-align instructions. */ + if (len < 0) + { + ofs = 0; + align = 4; + len = 0; + } + } + + /* If the known alignment is smaller than the recognized insn group, + realign the output. */ + else if ((int) align < len) + { + unsigned int new_log_align = len > 8 ? 4 : 3; + rtx prev, where; + + where = prev = prev_nonnote_insn (i); + if (!where || !LABEL_P (where)) + where = i; + + /* Can't realign between a call and its gp reload. */ + if (! (TARGET_EXPLICIT_RELOCS + && prev && CALL_P (prev))) + { + emit_insn_before (gen_realign (GEN_INT (new_log_align)), where); + align = 1 << new_log_align; + ofs = 0; + } + } + + /* We may not insert padding inside the initial ldgp sequence. */ + else if (ldgp > 0) + ldgp -= len; + + /* If the group won't fit in the same INT16 as the previous, + we need to add padding to keep the group together. Rather + than simply leaving the insn filling to the assembler, we + can make use of the knowledge of what sorts of instructions + were issued in the previous group to make sure that all of + the added nops are really free. */ + else if (ofs + len > (int) align) + { + int nop_count = (align - ofs) / 4; + rtx where; + + /* Insert nops before labels, branches, and calls to truly merge + the execution of the nops with the previous instruction group. */ + where = prev_nonnote_insn (i); + if (where) + { + if (LABEL_P (where)) + { + rtx where2 = prev_nonnote_insn (where); + if (where2 && JUMP_P (where2)) + where = where2; + } + else if (NONJUMP_INSN_P (where)) + where = i; + } + else + where = i; + + do + emit_insn_before ((*next_nop)(&prev_in_use), where); + while (--nop_count); + ofs = 0; + } + + ofs = (ofs + len) & (align - 1); + prev_in_use = in_use; + i = next; + } +} + +/* Insert an unop between sibcall or noreturn function call and GP load. */ + +static void +alpha_pad_function_end (void) +{ + rtx insn, next; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (! (CALL_P (insn) + && (SIBLING_CALL_P (insn) + || find_reg_note (insn, REG_NORETURN, NULL_RTX)))) + continue; + + next = next_active_insn (insn); + + if (next) + { + rtx pat = PATTERN (next); + + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE + && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1) + emit_insn_after (gen_unop (), insn); + } + } +} + +/* Machine dependent reorg pass. */ + +static void +alpha_reorg (void) +{ + /* Workaround for a linker error that triggers when an exception + handler immediatelly follows a sibcall or a noreturn function. + +In the sibcall case: + + The instruction stream from an object file: + + 1d8: 00 00 fb 6b jmp (t12) + 1dc: 00 00 ba 27 ldah gp,0(ra) + 1e0: 00 00 bd 23 lda gp,0(gp) + 1e4: 00 00 7d a7 ldq t12,0(gp) + 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec> + + was converted in the final link pass to: + + 12003aa88: 67 fa ff c3 br 120039428 <...> + 12003aa8c: 00 00 fe 2f unop + 12003aa90: 00 00 fe 2f unop + 12003aa94: 48 83 7d a7 ldq t12,-31928(gp) + 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec> + +And in the noreturn case: + + The instruction stream from an object file: + + 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58> + 58: 00 00 ba 27 ldah gp,0(ra) + 5c: 00 00 bd 23 lda gp,0(gp) + 60: 00 00 7d a7 ldq t12,0(gp) + 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68> + + was converted in the final link pass to: + + fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8> + fdb28: 00 00 fe 2f unop + fdb2c: 00 00 fe 2f unop + fdb30: 30 82 7d a7 ldq t12,-32208(gp) + fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68> + + GP load instructions were wrongly cleared by the linker relaxation + pass. This workaround prevents removal of GP loads by inserting + an unop instruction between a sibcall or noreturn function call and + exception handler prologue. */ + + if (current_function_has_exception_handlers ()) + alpha_pad_function_end (); + + if (alpha_tp != ALPHA_TP_PROG || flag_exceptions) + alpha_handle_trap_shadows (); + + /* Due to the number of extra trapb insns, don't bother fixing up + alignment when trap precision is instruction. Moreover, we can + only do our job when sched2 is run. */ + if (optimize && !optimize_size + && alpha_tp != ALPHA_TP_INSN + && flag_schedule_insns_after_reload) + { + if (alpha_tune == PROCESSOR_EV4) + alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop); + else if (alpha_tune == PROCESSOR_EV5) + alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop); + } +} + +#if !TARGET_ABI_UNICOSMK + +#ifdef HAVE_STAMP_H +#include +#endif + +static void +alpha_file_start (void) +{ +#ifdef OBJECT_FORMAT_ELF + /* If emitting dwarf2 debug information, we cannot generate a .file + directive to start the file, as it will conflict with dwarf2out + file numbers. So it's only useful when emitting mdebug output. */ + targetm.asm_file_start_file_directive = (write_symbols == DBX_DEBUG); +#endif + + default_file_start (); +#ifdef MS_STAMP + fprintf (asm_out_file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP); +#endif + + fputs ("\t.set noreorder\n", asm_out_file); + fputs ("\t.set volatile\n", asm_out_file); + if (!TARGET_ABI_OPEN_VMS) + fputs ("\t.set noat\n", asm_out_file); + if (TARGET_EXPLICIT_RELOCS) + fputs ("\t.set nomacro\n", asm_out_file); + if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX) + { + const char *arch; + + if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX) + arch = "ev6"; + else if (TARGET_MAX) + arch = "pca56"; + else if (TARGET_BWX) + arch = "ev56"; + else if (alpha_cpu == PROCESSOR_EV5) + arch = "ev5"; + else + arch = "ev4"; + + fprintf (asm_out_file, "\t.arch %s\n", arch); + } +} +#endif + +#ifdef OBJECT_FORMAT_ELF +/* Since we don't have a .dynbss section, we should not allow global + relocations in the .rodata section. */ + +static int +alpha_elf_reloc_rw_mask (void) +{ + return flag_pic ? 3 : 2; +} + +/* Return a section for X. The only special thing we do here is to + honor small data. */ + +static section * +alpha_elf_select_rtx_section (enum machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) +{ + if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value) + /* ??? Consider using mergeable sdata sections. */ + return sdata_section; + else + return default_elf_select_rtx_section (mode, x, align); +} + +static unsigned int +alpha_elf_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = 0; + + if (strcmp (name, ".sdata") == 0 + || strncmp (name, ".sdata.", 7) == 0 + || strncmp (name, ".gnu.linkonce.s.", 16) == 0 + || strcmp (name, ".sbss") == 0 + || strncmp (name, ".sbss.", 6) == 0 + || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) + flags = SECTION_SMALL; + + flags |= default_section_type_flags (decl, name, reloc); + return flags; +} +#endif /* OBJECT_FORMAT_ELF */ + +/* Structure to collect function names for final output in link section. */ +/* Note that items marked with GTY can't be ifdef'ed out. */ + +enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN}; +enum reloc_kind {KIND_LINKAGE, KIND_CODEADDR}; + +struct GTY(()) alpha_links +{ + int num; + const char *target; + rtx linkage; + enum links_kind lkind; + enum reloc_kind rkind; +}; + +struct GTY(()) alpha_funcs +{ + int num; + splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *))) + links; +}; + +static GTY ((param1_is (char *), param2_is (struct alpha_links *))) + splay_tree alpha_links_tree; +static GTY ((param1_is (tree), param2_is (struct alpha_funcs *))) + splay_tree alpha_funcs_tree; + +static GTY(()) int alpha_funcs_num; + +#if TARGET_ABI_OPEN_VMS + +/* Return the VMS argument type corresponding to MODE. */ + +enum avms_arg_type +alpha_arg_type (enum machine_mode mode) +{ + switch (mode) + { + case SFmode: + return TARGET_FLOAT_VAX ? FF : FS; + case DFmode: + return TARGET_FLOAT_VAX ? FD : FT; + default: + return I64; + } +} + +/* Return an rtx for an integer representing the VMS Argument Information + register value. */ + +rtx +alpha_arg_info_reg_val (CUMULATIVE_ARGS cum) +{ + unsigned HOST_WIDE_INT regval = cum.num_args; + int i; + + for (i = 0; i < 6; i++) + regval |= ((int) cum.atypes[i]) << (i * 3 + 8); + + return GEN_INT (regval); +} + +/* Register the need for a (fake) .linkage entry for calls to function NAME. + IS_LOCAL is 1 if this is for a definition, 0 if this is for a real call. + Return a SYMBOL_REF suited to the call instruction. */ + +rtx +alpha_need_linkage (const char *name, int is_local) +{ + splay_tree_node node; + struct alpha_links *al; + const char *target; + tree id; + + if (name[0] == '*') + name++; + + if (is_local) + { + struct alpha_funcs *cfaf; + + if (!alpha_funcs_tree) + alpha_funcs_tree = splay_tree_new_ggc + (splay_tree_compare_pointers, + ggc_alloc_splay_tree_tree_node_tree_node_splay_tree_s, + ggc_alloc_splay_tree_tree_node_tree_node_splay_tree_node_s); + + + cfaf = ggc_alloc_alpha_funcs (); + + cfaf->links = 0; + cfaf->num = ++alpha_funcs_num; + + splay_tree_insert (alpha_funcs_tree, + (splay_tree_key) current_function_decl, + (splay_tree_value) cfaf); + } + + if (alpha_links_tree) + { + /* Is this name already defined? */ + + node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name); + if (node) + { + al = (struct alpha_links *) node->value; + if (is_local) + { + /* Defined here but external assumed. */ + if (al->lkind == KIND_EXTERN) + al->lkind = KIND_LOCAL; + } + else + { + /* Used here but unused assumed. */ + if (al->lkind == KIND_UNUSED) + al->lkind = KIND_LOCAL; + } + return al->linkage; + } + } + else + alpha_links_tree = splay_tree_new_ggc + ((splay_tree_compare_fn) strcmp, + ggc_alloc_splay_tree_str_alpha_links_splay_tree_s, + ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s); + + al = ggc_alloc_alpha_links (); + name = ggc_strdup (name); + + /* Assume external if no definition. */ + al->lkind = (is_local ? KIND_UNUSED : KIND_EXTERN); + + /* Ensure we have an IDENTIFIER so assemble_name can mark it used + and find the ultimate alias target like assemble_name. */ + id = get_identifier (name); + target = NULL; + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + { + id = TREE_CHAIN (id); + target = IDENTIFIER_POINTER (id); + } + + al->target = target ? target : name; + al->linkage = gen_rtx_SYMBOL_REF (Pmode, name); + + splay_tree_insert (alpha_links_tree, (splay_tree_key) name, + (splay_tree_value) al); + + return al->linkage; +} + +/* Return a SYMBOL_REF representing the reference to the .linkage entry + of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if + this is the reference to the linkage pointer value, 0 if this is the + reference to the function entry value. RFLAG is 1 if this a reduced + reference (code address only), 0 if this is a full reference. */ + +rtx +alpha_use_linkage (rtx func, tree cfundecl, int lflag, int rflag) +{ + splay_tree_node cfunnode; + struct alpha_funcs *cfaf; + struct alpha_links *al; + const char *name = XSTR (func, 0); + + cfaf = (struct alpha_funcs *) 0; + al = (struct alpha_links *) 0; + + cfunnode = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) cfundecl); + cfaf = (struct alpha_funcs *) cfunnode->value; + + if (cfaf->links) + { + splay_tree_node lnode; + + /* Is this name already defined? */ + + lnode = splay_tree_lookup (cfaf->links, (splay_tree_key) name); + if (lnode) + al = (struct alpha_links *) lnode->value; + } + else + cfaf->links = splay_tree_new_ggc + ((splay_tree_compare_fn) strcmp, + ggc_alloc_splay_tree_str_alpha_links_splay_tree_s, + ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s); + + if (!al) + { + size_t name_len; + size_t buflen; + char *linksym; + splay_tree_node node = 0; + struct alpha_links *anl; + + if (name[0] == '*') + name++; + + name_len = strlen (name); + linksym = (char *) alloca (name_len + 50); + + al = ggc_alloc_alpha_links (); + al->num = cfaf->num; + al->target = NULL; + + node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name); + if (node) + { + anl = (struct alpha_links *) node->value; + al->lkind = anl->lkind; + name = anl->target; + } + + sprintf (linksym, "$%d..%s..lk", cfaf->num, name); + buflen = strlen (linksym); + + al->linkage = gen_rtx_SYMBOL_REF + (Pmode, ggc_alloc_string (linksym, buflen + 1)); + + splay_tree_insert (cfaf->links, (splay_tree_key) name, + (splay_tree_value) al); + } + + if (rflag) + al->rkind = KIND_CODEADDR; + else + al->rkind = KIND_LINKAGE; + + if (lflag) + return gen_rtx_MEM (Pmode, plus_constant (al->linkage, 8)); + else + return al->linkage; +} + +static int +alpha_write_one_linkage (splay_tree_node node, void *data) +{ + const char *const name = (const char *) node->key; + struct alpha_links *link = (struct alpha_links *) node->value; + FILE *stream = (FILE *) data; + + fprintf (stream, "$%d..%s..lk:\n", link->num, name); + if (link->rkind == KIND_CODEADDR) + { + if (link->lkind == KIND_LOCAL) + { + /* Local and used */ + fprintf (stream, "\t.quad %s..en\n", name); + } + else + { + /* External and used, request code address. */ + fprintf (stream, "\t.code_address %s\n", name); + } + } + else + { + if (link->lkind == KIND_LOCAL) + { + /* Local and used, build linkage pair. */ + fprintf (stream, "\t.quad %s..en\n", name); + fprintf (stream, "\t.quad %s\n", name); + } + else + { + /* External and used, request linkage pair. */ + fprintf (stream, "\t.linkage %s\n", name); + } + } + + return 0; +} + +static void +alpha_write_linkage (FILE *stream, const char *funname, tree fundecl) +{ + splay_tree_node node; + struct alpha_funcs *func; + + fprintf (stream, "\t.link\n"); + fprintf (stream, "\t.align 3\n"); + in_section = NULL; + + node = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) fundecl); + func = (struct alpha_funcs *) node->value; + + fputs ("\t.name ", stream); + assemble_name (stream, funname); + fputs ("..na\n", stream); + ASM_OUTPUT_LABEL (stream, funname); + fprintf (stream, "\t.pdesc "); + assemble_name (stream, funname); + fprintf (stream, "..en,%s\n", + alpha_procedure_type == PT_STACK ? "stack" + : alpha_procedure_type == PT_REGISTER ? "reg" : "null"); + + if (func->links) + { + splay_tree_foreach (func->links, alpha_write_one_linkage, stream); + /* splay_tree_delete (func->links); */ + } +} + +/* Switch to an arbitrary section NAME with attributes as specified + by FLAGS. ALIGN specifies any known alignment requirements for + the section; 0 if the default should be used. */ + +static void +vms_asm_named_section (const char *name, unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + fputc ('\n', asm_out_file); + fprintf (asm_out_file, ".section\t%s", name); + + if (flags & SECTION_DEBUG) + fprintf (asm_out_file, ",NOWRT"); + + fputc ('\n', asm_out_file); +} + +/* Record an element in the table of global constructors. SYMBOL is + a SYMBOL_REF of the function to be called; PRIORITY is a number + between 0 and MAX_INIT_PRIORITY. + + Differs from default_ctors_section_asm_out_constructor in that the + width of the .ctors entry is always 64 bits, rather than the 32 bits + used by a normal pointer. */ + +static void +vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + switch_to_section (ctors_section); + assemble_align (BITS_PER_WORD); + assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); +} + +static void +vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + switch_to_section (dtors_section); + assemble_align (BITS_PER_WORD); + assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); +} +#else + +rtx +alpha_need_linkage (const char *name ATTRIBUTE_UNUSED, + int is_local ATTRIBUTE_UNUSED) +{ + return NULL_RTX; +} + +rtx +alpha_use_linkage (rtx func ATTRIBUTE_UNUSED, + tree cfundecl ATTRIBUTE_UNUSED, + int lflag ATTRIBUTE_UNUSED, + int rflag ATTRIBUTE_UNUSED) +{ + return NULL_RTX; +} + +#endif /* TARGET_ABI_OPEN_VMS */ + +#if TARGET_ABI_UNICOSMK + +/* This evaluates to true if we do not know how to pass TYPE solely in + registers. This is the case for all arguments that do not fit in two + registers. */ + +static bool +unicosmk_must_pass_in_stack (enum machine_mode mode, const_tree type) +{ + if (type == NULL) + return false; + + if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) + return true; + if (TREE_ADDRESSABLE (type)) + return true; + + return ALPHA_ARG_SIZE (mode, type, 0) > 2; +} + +/* Define the offset between two registers, one to be eliminated, and the + other its replacement, at the start of a routine. */ + +int +unicosmk_initial_elimination_offset (int from, int to) +{ + int fixed_size; + + fixed_size = alpha_sa_size(); + if (fixed_size != 0) + fixed_size += 48; + + if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return -fixed_size; + else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return 0; + else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return (ALPHA_ROUND (crtl->outgoing_args_size) + + ALPHA_ROUND (get_frame_size())); + else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return (ALPHA_ROUND (fixed_size) + + ALPHA_ROUND (get_frame_size() + + crtl->outgoing_args_size)); + else + gcc_unreachable (); +} + +/* Output the module name for .ident and .end directives. We have to strip + directories and add make sure that the module name starts with a letter + or '$'. */ + +static void +unicosmk_output_module_name (FILE *file) +{ + const char *name = lbasename (main_input_filename); + unsigned len = strlen (name); + char *clean_name = alloca (len + 2); + char *ptr = clean_name; + + /* CAM only accepts module names that start with a letter or '$'. We + prefix the module name with a '$' if necessary. */ + + if (!ISALPHA (*name)) + *ptr++ = '$'; + memcpy (ptr, name, len + 1); + clean_symbol_name (clean_name); + fputs (clean_name, file); +} + +/* Output the definition of a common variable. */ + +void +unicosmk_output_common (FILE *file, const char *name, int size, int align) +{ + tree name_tree; + printf ("T3E__: common %s\n", name); + + in_section = NULL; + fputs("\t.endp\n\n\t.psect ", file); + assemble_name(file, name); + fprintf(file, ",%d,common\n", floor_log2 (align / BITS_PER_UNIT)); + fprintf(file, "\t.byte\t0:%d\n", size); + + /* Mark the symbol as defined in this module. */ + name_tree = get_identifier (name); + TREE_ASM_WRITTEN (name_tree) = 1; +} + +#define SECTION_PUBLIC SECTION_MACH_DEP +#define SECTION_MAIN (SECTION_PUBLIC << 1) +static int current_section_align; + +/* A get_unnamed_section callback for switching to the text section. */ + +static void +unicosmk_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED) +{ + static int count = 0; + fprintf (asm_out_file, "\t.endp\n\n\t.psect\tgcc@text___%d,code\n", count++); +} + +/* A get_unnamed_section callback for switching to the data section. */ + +static void +unicosmk_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED) +{ + static int count = 1; + fprintf (asm_out_file, "\t.endp\n\n\t.psect\tgcc@data___%d,data\n", count++); +} + +/* Implement TARGET_ASM_INIT_SECTIONS. + + The Cray assembler is really weird with respect to sections. It has only + named sections and you can't reopen a section once it has been closed. + This means that we have to generate unique names whenever we want to + reenter the text or the data section. */ + +static void +unicosmk_init_sections (void) +{ + text_section = get_unnamed_section (SECTION_CODE, + unicosmk_output_text_section_asm_op, + NULL); + data_section = get_unnamed_section (SECTION_WRITE, + unicosmk_output_data_section_asm_op, + NULL); + readonly_data_section = data_section; +} + +static unsigned int +unicosmk_section_type_flags (tree decl, const char *name, + int reloc ATTRIBUTE_UNUSED) +{ + unsigned int flags = default_section_type_flags (decl, name, reloc); + + if (!decl) + return flags; + + if (TREE_CODE (decl) == FUNCTION_DECL) + { + current_section_align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); + if (align_functions_log > current_section_align) + current_section_align = align_functions_log; + + if (! strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), "main")) + flags |= SECTION_MAIN; + } + else + current_section_align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT); + + if (TREE_PUBLIC (decl)) + flags |= SECTION_PUBLIC; + + return flags; +} + +/* Generate a section name for decl and associate it with the + declaration. */ + +static void +unicosmk_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) +{ + const char *name; + int len; + + gcc_assert (decl); + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = default_strip_name_encoding (name); + len = strlen (name); + + if (TREE_CODE (decl) == FUNCTION_DECL) + { + char *string; + + /* It is essential that we prefix the section name here because + otherwise the section names generated for constructors and + destructors confuse collect2. */ + + string = alloca (len + 6); + sprintf (string, "code@%s", name); + DECL_SECTION_NAME (decl) = build_string (len + 5, string); + } + else if (TREE_PUBLIC (decl)) + DECL_SECTION_NAME (decl) = build_string (len, name); + else + { + char *string; + + string = alloca (len + 6); + sprintf (string, "data@%s", name); + DECL_SECTION_NAME (decl) = build_string (len + 5, string); + } +} + +/* Switch to an arbitrary section NAME with attributes as specified + by FLAGS. ALIGN specifies any known alignment requirements for + the section; 0 if the default should be used. */ + +static void +unicosmk_asm_named_section (const char *name, unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + const char *kind; + + /* Close the previous section. */ + + fputs ("\t.endp\n\n", asm_out_file); + + /* Find out what kind of section we are opening. */ + + if (flags & SECTION_MAIN) + fputs ("\t.start\tmain\n", asm_out_file); + + if (flags & SECTION_CODE) + kind = "code"; + else if (flags & SECTION_PUBLIC) + kind = "common"; + else + kind = "data"; + + if (current_section_align != 0) + fprintf (asm_out_file, "\t.psect\t%s,%d,%s\n", name, + current_section_align, kind); + else + fprintf (asm_out_file, "\t.psect\t%s,%s\n", name, kind); +} + +static void +unicosmk_insert_attributes (tree decl, tree *attr_ptr ATTRIBUTE_UNUSED) +{ + if (DECL_P (decl) + && (TREE_PUBLIC (decl) || TREE_CODE (decl) == FUNCTION_DECL)) + unicosmk_unique_section (decl, 0); +} + +/* Output an alignment directive. We have to use the macro 'gcc@code@align' + in code sections because .align fill unused space with zeroes. */ + +void +unicosmk_output_align (FILE *file, int align) +{ + if (inside_function) + fprintf (file, "\tgcc@code@align\t%d\n", align); + else + fprintf (file, "\t.align\t%d\n", align); +} + +/* Add a case vector to the current function's list of deferred case + vectors. Case vectors have to be put into a separate section because CAM + does not allow data definitions in code sections. */ + +void +unicosmk_defer_case_vector (rtx lab, rtx vec) +{ + struct machine_function *machine = cfun->machine; + + vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); + machine->addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, + machine->addr_list); +} + +/* Output a case vector. */ + +static void +unicosmk_output_addr_vec (FILE *file, rtx vec) +{ + rtx lab = XEXP (vec, 0); + rtx body = XEXP (vec, 1); + int vlen = XVECLEN (body, 0); + int idx; + + (*targetm.asm_out.internal_label) (file, "L", CODE_LABEL_NUMBER (lab)); + + for (idx = 0; idx < vlen; idx++) + { + ASM_OUTPUT_ADDR_VEC_ELT + (file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); + } +} + +/* Output current function's deferred case vectors. */ + +static void +unicosmk_output_deferred_case_vectors (FILE *file) +{ + struct machine_function *machine = cfun->machine; + rtx t; + + if (machine->addr_list == NULL_RTX) + return; + + switch_to_section (data_section); + for (t = machine->addr_list; t; t = XEXP (t, 1)) + unicosmk_output_addr_vec (file, XEXP (t, 0)); +} + +/* Generate the name of the SSIB section for the current function. */ + +#define SSIB_PREFIX "__SSIB_" +#define SSIB_PREFIX_LEN 7 + +static const char * +unicosmk_ssib_name (void) +{ + /* This is ok since CAM won't be able to deal with names longer than that + anyway. */ + + static char name[256]; + + rtx x; + const char *fnname; + int len; + + x = DECL_RTL (cfun->decl); + gcc_assert (MEM_P (x)); + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == SYMBOL_REF); + fnname = XSTR (x, 0); + + len = strlen (fnname); + if (len + SSIB_PREFIX_LEN > 255) + len = 255 - SSIB_PREFIX_LEN; + + strcpy (name, SSIB_PREFIX); + strncpy (name + SSIB_PREFIX_LEN, fnname, len); + name[len + SSIB_PREFIX_LEN] = 0; + + return name; +} + +/* Set up the dynamic subprogram information block (DSIB) and update the + frame pointer register ($15) for subroutines which have a frame. If the + subroutine doesn't have a frame, simply increment $15. */ + +static void +unicosmk_gen_dsib (unsigned long *imaskP) +{ + if (alpha_procedure_type == PT_STACK) + { + const char *ssib_name; + rtx mem; + + /* Allocate 64 bytes for the DSIB. */ + + FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-64)))); + emit_insn (gen_blockage ()); + + /* Save the return address. */ + + mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 56)); + FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA))); + (*imaskP) &= ~(1UL << REG_RA); + + /* Save the old frame pointer. */ + + mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 48)); + FRP (emit_move_insn (mem, hard_frame_pointer_rtx)); + (*imaskP) &= ~(1UL << HARD_FRAME_POINTER_REGNUM); + + emit_insn (gen_blockage ()); + + /* Store the SSIB pointer. */ + + ssib_name = ggc_strdup (unicosmk_ssib_name ()); + mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 32)); + + FRP (emit_move_insn (gen_rtx_REG (DImode, 5), + gen_rtx_SYMBOL_REF (Pmode, ssib_name))); + FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 5))); + + /* Save the CIW index. */ + + mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 24)); + FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 25))); + + emit_insn (gen_blockage ()); + + /* Set the new frame pointer. */ + FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, GEN_INT (64)))); + } + else + { + /* Increment the frame pointer register to indicate that we do not + have a frame. */ + emit_insn (gen_adddi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, const1_rtx)); + } +} + +/* Output the static subroutine information block for the current + function. */ + +static void +unicosmk_output_ssib (FILE *file, const char *fnname) +{ + int len; + int i; + rtx x; + rtx ciw; + struct machine_function *machine = cfun->machine; + + in_section = NULL; + fprintf (file, "\t.endp\n\n\t.psect\t%s%s,data\n", user_label_prefix, + unicosmk_ssib_name ()); + + /* Some required stuff and the function name length. */ + + len = strlen (fnname); + fprintf (file, "\t.quad\t^X20008%2.2X28\n", len); + + /* Saved registers + ??? We don't do that yet. */ + + fputs ("\t.quad\t0\n", file); + + /* Function address. */ + + fputs ("\t.quad\t", file); + assemble_name (file, fnname); + putc ('\n', file); + + fputs ("\t.quad\t0\n", file); + fputs ("\t.quad\t0\n", file); + + /* Function name. + ??? We do it the same way Cray CC does it but this could be + simplified. */ + + for( i = 0; i < len; i++ ) + fprintf (file, "\t.byte\t%d\n", (int)(fnname[i])); + if( (len % 8) == 0 ) + fputs ("\t.quad\t0\n", file); + else + fprintf (file, "\t.bits\t%d : 0\n", (8 - (len % 8))*8); + + /* All call information words used in the function. */ + + for (x = machine->first_ciw; x; x = XEXP (x, 1)) + { + ciw = XEXP (x, 0); +#if HOST_BITS_PER_WIDE_INT == 32 + fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_DOUBLE_HEX "\n", + CONST_DOUBLE_HIGH (ciw), CONST_DOUBLE_LOW (ciw)); +#else + fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n", INTVAL (ciw)); +#endif + } +} + +/* Add a call information word (CIW) to the list of the current function's + CIWs and return its index. + + X is a CONST_INT or CONST_DOUBLE representing the CIW. */ + +rtx +unicosmk_add_call_info_word (rtx x) +{ + rtx node; + struct machine_function *machine = cfun->machine; + + node = gen_rtx_EXPR_LIST (VOIDmode, x, NULL_RTX); + if (machine->first_ciw == NULL_RTX) + machine->first_ciw = node; + else + XEXP (machine->last_ciw, 1) = node; + + machine->last_ciw = node; + ++machine->ciw_count; + + return GEN_INT (machine->ciw_count + + strlen (current_function_name ())/8 + 5); +} + +/* The Cray assembler doesn't accept extern declarations for symbols which + are defined in the same file. We have to keep track of all global + symbols which are referenced and/or defined in a source file and output + extern declarations for those which are referenced but not defined at + the end of file. */ + +/* List of identifiers for which an extern declaration might have to be + emitted. */ +/* FIXME: needs to use GC, so it can be saved and restored for PCH. */ + +struct unicosmk_extern_list +{ + struct unicosmk_extern_list *next; + const char *name; +}; + +static struct unicosmk_extern_list *unicosmk_extern_head = 0; + +/* Output extern declarations which are required for every asm file. */ + +static void +unicosmk_output_default_externs (FILE *file) +{ + static const char *const externs[] = + { "__T3E_MISMATCH" }; + + int i; + int n; + + n = ARRAY_SIZE (externs); + + for (i = 0; i < n; i++) + fprintf (file, "\t.extern\t%s\n", externs[i]); +} + +/* Output extern declarations for global symbols which are have been + referenced but not defined. */ + +static void +unicosmk_output_externs (FILE *file) +{ + struct unicosmk_extern_list *p; + const char *real_name; + int len; + tree name_tree; + + len = strlen (user_label_prefix); + for (p = unicosmk_extern_head; p != 0; p = p->next) + { + /* We have to strip the encoding and possibly remove user_label_prefix + from the identifier in order to handle -fleading-underscore and + explicit asm names correctly (cf. gcc.dg/asm-names-1.c). */ + real_name = default_strip_name_encoding (p->name); + if (len && p->name[0] == '*' + && !memcmp (real_name, user_label_prefix, len)) + real_name += len; + + name_tree = get_identifier (real_name); + if (! TREE_ASM_WRITTEN (name_tree)) + { + TREE_ASM_WRITTEN (name_tree) = 1; + fputs ("\t.extern\t", file); + assemble_name (file, p->name); + putc ('\n', file); + } + } +} + +/* Record an extern. */ + +void +unicosmk_add_extern (const char *name) +{ + struct unicosmk_extern_list *p; + + p = (struct unicosmk_extern_list *) + xmalloc (sizeof (struct unicosmk_extern_list)); + p->next = unicosmk_extern_head; + p->name = name; + unicosmk_extern_head = p; +} + +/* The Cray assembler generates incorrect code if identifiers which + conflict with register names are used as instruction operands. We have + to replace such identifiers with DEX expressions. */ + +/* Structure to collect identifiers which have been replaced by DEX + expressions. */ +/* FIXME: needs to use GC, so it can be saved and restored for PCH. */ + +struct unicosmk_dex { + struct unicosmk_dex *next; + const char *name; +}; + +/* List of identifiers which have been replaced by DEX expressions. The DEX + number is determined by the position in the list. */ + +static struct unicosmk_dex *unicosmk_dex_list = NULL; + +/* The number of elements in the DEX list. */ + +static int unicosmk_dex_count = 0; + +/* Check if NAME must be replaced by a DEX expression. */ + +static int +unicosmk_special_name (const char *name) +{ + if (name[0] == '*') + ++name; + + if (name[0] == '$') + ++name; + + if (name[0] != 'r' && name[0] != 'f' && name[0] != 'R' && name[0] != 'F') + return 0; + + switch (name[1]) + { + case '1': case '2': + return (name[2] == '\0' || (ISDIGIT (name[2]) && name[3] == '\0')); + + case '3': + return (name[2] == '\0' + || ((name[2] == '0' || name[2] == '1') && name[3] == '\0')); + + default: + return (ISDIGIT (name[1]) && name[2] == '\0'); + } +} + +/* Return the DEX number if X must be replaced by a DEX expression and 0 + otherwise. */ + +static int +unicosmk_need_dex (rtx x) +{ + struct unicosmk_dex *dex; + const char *name; + int i; + + if (GET_CODE (x) != SYMBOL_REF) + return 0; + + name = XSTR (x,0); + if (! unicosmk_special_name (name)) + return 0; + + i = unicosmk_dex_count; + for (dex = unicosmk_dex_list; dex; dex = dex->next) + { + if (! strcmp (name, dex->name)) + return i; + --i; + } + + dex = (struct unicosmk_dex *) xmalloc (sizeof (struct unicosmk_dex)); + dex->name = name; + dex->next = unicosmk_dex_list; + unicosmk_dex_list = dex; + + ++unicosmk_dex_count; + return unicosmk_dex_count; +} + +/* Output the DEX definitions for this file. */ + +static void +unicosmk_output_dex (FILE *file) +{ + struct unicosmk_dex *dex; + int i; + + if (unicosmk_dex_list == NULL) + return; + + fprintf (file, "\t.dexstart\n"); + + i = unicosmk_dex_count; + for (dex = unicosmk_dex_list; dex; dex = dex->next) + { + fprintf (file, "\tDEX (%d) = ", i); + assemble_name (file, dex->name); + putc ('\n', file); + --i; + } + + fprintf (file, "\t.dexend\n"); +} + +/* Output text that to appear at the beginning of an assembler file. */ + +static void +unicosmk_file_start (void) +{ + int i; + + fputs ("\t.ident\t", asm_out_file); + unicosmk_output_module_name (asm_out_file); + fputs ("\n\n", asm_out_file); + + /* The Unicos/Mk assembler uses different register names. Instead of trying + to support them, we simply use micro definitions. */ + + /* CAM has different register names: rN for the integer register N and fN + for the floating-point register N. Instead of trying to use these in + alpha.md, we define the symbols $N and $fN to refer to the appropriate + register. */ + + for (i = 0; i < 32; ++i) + fprintf (asm_out_file, "$%d <- r%d\n", i, i); + + for (i = 0; i < 32; ++i) + fprintf (asm_out_file, "$f%d <- f%d\n", i, i); + + putc ('\n', asm_out_file); + + /* The .align directive fill unused space with zeroes which does not work + in code sections. We define the macro 'gcc@code@align' which uses nops + instead. Note that it assumes that code sections always have the + biggest possible alignment since . refers to the current offset from + the beginning of the section. */ + + fputs ("\t.macro gcc@code@align n\n", asm_out_file); + fputs ("gcc@n@bytes = 1 << n\n", asm_out_file); + fputs ("gcc@here = . % gcc@n@bytes\n", asm_out_file); + fputs ("\t.if ne, gcc@here, 0\n", asm_out_file); + fputs ("\t.repeat (gcc@n@bytes - gcc@here) / 4\n", asm_out_file); + fputs ("\tbis r31,r31,r31\n", asm_out_file); + fputs ("\t.endr\n", asm_out_file); + fputs ("\t.endif\n", asm_out_file); + fputs ("\t.endm gcc@code@align\n\n", asm_out_file); + + /* Output extern declarations which should always be visible. */ + unicosmk_output_default_externs (asm_out_file); + + /* Open a dummy section. We always need to be inside a section for the + section-switching code to work correctly. + ??? This should be a module id or something like that. I still have to + figure out what the rules for those are. */ + fputs ("\n\t.psect\t$SG00000,data\n", asm_out_file); +} + +/* Output text to appear at the end of an assembler file. This includes all + pending extern declarations and DEX expressions. */ + +static void +unicosmk_file_end (void) +{ + fputs ("\t.endp\n\n", asm_out_file); + + /* Output all pending externs. */ + + unicosmk_output_externs (asm_out_file); + + /* Output dex definitions used for functions whose names conflict with + register names. */ + + unicosmk_output_dex (asm_out_file); + + fputs ("\t.end\t", asm_out_file); + unicosmk_output_module_name (asm_out_file); + putc ('\n', asm_out_file); +} + +#else + +static void +unicosmk_output_deferred_case_vectors (FILE *file ATTRIBUTE_UNUSED) +{} + +static void +unicosmk_gen_dsib (unsigned long *imaskP ATTRIBUTE_UNUSED) +{} + +static void +unicosmk_output_ssib (FILE * file ATTRIBUTE_UNUSED, + const char * fnname ATTRIBUTE_UNUSED) +{} + +rtx +unicosmk_add_call_info_word (rtx x ATTRIBUTE_UNUSED) +{ + return NULL_RTX; +} + +static int +unicosmk_need_dex (rtx x ATTRIBUTE_UNUSED) +{ + return 0; +} + +#endif /* TARGET_ABI_UNICOSMK */ + +static void +alpha_init_libfuncs (void) +{ + if (TARGET_ABI_UNICOSMK) + { + /* Prevent gcc from generating calls to __divsi3. */ + set_optab_libfunc (sdiv_optab, SImode, 0); + set_optab_libfunc (udiv_optab, SImode, 0); + + /* Use the functions provided by the system library + for DImode integer division. */ + set_optab_libfunc (sdiv_optab, DImode, "$sldiv"); + set_optab_libfunc (udiv_optab, DImode, "$uldiv"); + } + else if (TARGET_ABI_OPEN_VMS) + { + /* Use the VMS runtime library functions for division and + remainder. */ + set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); + set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); + set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); + set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); + set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); + set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); + set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); + set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); + abort_libfunc = init_one_libfunc ("decc$abort"); + memcmp_libfunc = init_one_libfunc ("decc$memcmp"); +#ifdef MEM_LIBFUNCS_INIT + MEM_LIBFUNCS_INIT; +#endif + } +} + +/* On the Alpha, we use this to disable the floating-point registers + when they don't exist. */ + +static void +alpha_conditional_register_usage (void) +{ + int i; + if (! TARGET_FPREGS) + for (i = 32; i < 63; i++) + fixed_regs[i] = call_used_regs[i] = 1; +} + +/* Initialize the GCC target structure. */ +#if TARGET_ABI_OPEN_VMS +# undef TARGET_ATTRIBUTE_TABLE +# define TARGET_ATTRIBUTE_TABLE vms_attribute_table +# undef TARGET_CAN_ELIMINATE +# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate +#endif + +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p + +#if TARGET_ABI_UNICOSMK +# undef TARGET_INSERT_ATTRIBUTES +# define TARGET_INSERT_ATTRIBUTES unicosmk_insert_attributes +# undef TARGET_SECTION_TYPE_FLAGS +# define TARGET_SECTION_TYPE_FLAGS unicosmk_section_type_flags +# undef TARGET_ASM_UNIQUE_SECTION +# define TARGET_ASM_UNIQUE_SECTION unicosmk_unique_section +#undef TARGET_ASM_FUNCTION_RODATA_SECTION +#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section +# undef TARGET_ASM_GLOBALIZE_LABEL +# define TARGET_ASM_GLOBALIZE_LABEL hook_void_FILEptr_constcharptr +# undef TARGET_MUST_PASS_IN_STACK +# define TARGET_MUST_PASS_IN_STACK unicosmk_must_pass_in_stack +#endif + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" + +/* Default unaligned ops are provided for ELF systems. To get unaligned + data for non-ELF systems, we have to turn off auto alignment. */ +#if !defined (OBJECT_FORMAT_ELF) || TARGET_ABI_OPEN_VMS +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t" +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t" +#endif + +#ifdef OBJECT_FORMAT_ELF +#undef TARGET_ASM_RELOC_RW_MASK +#define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags +#endif + +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address + +#if TARGET_ABI_UNICOSMK +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START unicosmk_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END unicosmk_file_end +#else +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START alpha_file_start +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true +#endif + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + alpha_multipass_dfa_lookahead + +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS HAVE_AS_TLS + +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL alpha_builtin_decl +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS alpha_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN alpha_expand_builtin +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN alpha_fold_builtin + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem + +#if TARGET_ABI_OSF +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true +#undef TARGET_STDARG_OPTIMIZE_HOOK +#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook +#endif + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS alpha_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED +#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG alpha_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init + +#undef TARGET_INSTANTIATE_DECLS +#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD alpha_secondary_reload + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start + +/* The Alpha architecture does not require sequential consistency. See + http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html + for an example of how it can be violated in practice. */ +#undef TARGET_RELAXED_ORDERING +#define TARGET_RELAXED_ORDERING true + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS \ + (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS) +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION alpha_handle_option + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE alpha_option_override + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE alpha_option_optimization_table + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE alpha_mangle_type +#endif + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage + +struct gcc_target targetm = TARGET_INITIALIZER; + + +#include "gt-alpha.h" diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h new file mode 100644 index 000000000..409915abc --- /dev/null +++ b/gcc/config/alpha/alpha.h @@ -0,0 +1,1344 @@ +/* Definitions of target machine for GNU compiler, for DEC Alpha. + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__alpha"); \ + builtin_define ("__alpha__"); \ + builtin_assert ("cpu=alpha"); \ + builtin_assert ("machine=alpha"); \ + if (TARGET_CIX) \ + { \ + builtin_define ("__alpha_cix__"); \ + builtin_assert ("cpu=cix"); \ + } \ + if (TARGET_FIX) \ + { \ + builtin_define ("__alpha_fix__"); \ + builtin_assert ("cpu=fix"); \ + } \ + if (TARGET_BWX) \ + { \ + builtin_define ("__alpha_bwx__"); \ + builtin_assert ("cpu=bwx"); \ + } \ + if (TARGET_MAX) \ + { \ + builtin_define ("__alpha_max__"); \ + builtin_assert ("cpu=max"); \ + } \ + if (alpha_cpu == PROCESSOR_EV6) \ + { \ + builtin_define ("__alpha_ev6__"); \ + builtin_assert ("cpu=ev6"); \ + } \ + else if (alpha_cpu == PROCESSOR_EV5) \ + { \ + builtin_define ("__alpha_ev5__"); \ + builtin_assert ("cpu=ev5"); \ + } \ + else /* Presumably ev4. */ \ + { \ + builtin_define ("__alpha_ev4__"); \ + builtin_assert ("cpu=ev4"); \ + } \ + if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT) \ + builtin_define ("_IEEE_FP"); \ + if (TARGET_IEEE_WITH_INEXACT) \ + builtin_define ("_IEEE_FP_INEXACT"); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + \ + /* Macros dependent on the C dialect. */ \ + SUBTARGET_LANGUAGE_CPP_BUILTINS(); \ +} while (0) + +#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS +#define SUBTARGET_LANGUAGE_CPP_BUILTINS() \ + do \ + { \ + if (preprocessing_asm_p ()) \ + builtin_define_std ("LANGUAGE_ASSEMBLY"); \ + else if (c_dialect_cxx ()) \ + { \ + builtin_define ("__LANGUAGE_C_PLUS_PLUS"); \ + builtin_define ("__LANGUAGE_C_PLUS_PLUS__"); \ + } \ + else \ + builtin_define_std ("LANGUAGE_C"); \ + if (c_dialect_objc ()) \ + { \ + builtin_define ("__LANGUAGE_OBJECTIVE_C"); \ + builtin_define ("__LANGUAGE_OBJECTIVE_C__"); \ + } \ + } \ + while (0) +#endif + +/* Print subsidiary information on the compiler version in use. */ +#define TARGET_VERSION + +/* Run-time compilation parameters selecting different hardware subsets. */ + +/* Which processor to schedule for. The cpu attribute defines a list that + mirrors this list, so changes to alpha.md must be made at the same time. */ + +enum processor_type +{ + PROCESSOR_EV4, /* 2106[46]{a,} */ + PROCESSOR_EV5, /* 21164{a,pc,} */ + PROCESSOR_EV6, /* 21264 */ + PROCESSOR_MAX +}; + +extern enum processor_type alpha_cpu; +extern enum processor_type alpha_tune; + +enum alpha_trap_precision +{ + ALPHA_TP_PROG, /* No precision (default). */ + ALPHA_TP_FUNC, /* Trap contained within originating function. */ + ALPHA_TP_INSN /* Instruction accuracy and code is resumption safe. */ +}; + +enum alpha_fp_rounding_mode +{ + ALPHA_FPRM_NORM, /* Normal rounding mode. */ + ALPHA_FPRM_MINF, /* Round towards minus-infinity. */ + ALPHA_FPRM_CHOP, /* Chopped rounding mode (towards 0). */ + ALPHA_FPRM_DYN /* Dynamic rounding mode. */ +}; + +enum alpha_fp_trap_mode +{ + ALPHA_FPTM_N, /* Normal trap mode. */ + ALPHA_FPTM_U, /* Underflow traps enabled. */ + ALPHA_FPTM_SU, /* Software completion, w/underflow traps */ + ALPHA_FPTM_SUI /* Software completion, w/underflow & inexact traps */ +}; + +extern enum alpha_trap_precision alpha_tp; +extern enum alpha_fp_rounding_mode alpha_fprm; +extern enum alpha_fp_trap_mode alpha_fptm; + +/* Invert the easy way to make options work. */ +#define TARGET_FP (!TARGET_SOFT_FP) + +/* These are for target os support and cannot be changed at runtime. */ +#define TARGET_ABI_WINDOWS_NT 0 +#define TARGET_ABI_OPEN_VMS 0 +#define TARGET_ABI_UNICOSMK 0 +#define TARGET_ABI_OSF (!TARGET_ABI_WINDOWS_NT \ + && !TARGET_ABI_OPEN_VMS \ + && !TARGET_ABI_UNICOSMK) + +#ifndef TARGET_AS_CAN_SUBTRACT_LABELS +#define TARGET_AS_CAN_SUBTRACT_LABELS TARGET_GAS +#endif +#ifndef TARGET_AS_SLASH_BEFORE_SUFFIX +#define TARGET_AS_SLASH_BEFORE_SUFFIX TARGET_GAS +#endif +#ifndef TARGET_CAN_FAULT_IN_PROLOGUE +#define TARGET_CAN_FAULT_IN_PROLOGUE 0 +#endif +#ifndef TARGET_HAS_XFLOATING_LIBS +#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128 +#endif +#ifndef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 0 +#endif +#ifndef TARGET_LD_BUGGY_LDGP +#define TARGET_LD_BUGGY_LDGP 0 +#endif +#ifndef TARGET_FIXUP_EV5_PREFETCH +#define TARGET_FIXUP_EV5_PREFETCH 0 +#endif +#ifndef HAVE_AS_TLS +#define HAVE_AS_TLS 0 +#endif + +#define TARGET_DEFAULT MASK_FPREGS + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS +#ifdef HAVE_AS_EXPLICIT_RELOCS +#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS +#define TARGET_SUPPORT_ARCH 1 +#else +#define TARGET_DEFAULT_EXPLICIT_RELOCS 0 +#endif +#endif + +#ifndef TARGET_SUPPORT_ARCH +#define TARGET_SUPPORT_ARCH 0 +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified. + --with-tune is ignored if -mtune is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" } + + +/* target machine storage layout */ + +/* Define the size of `int'. The default is the same as the word size. */ +#define INT_TYPE_SIZE 32 + +/* Define the size of `long long'. The default is the twice the word size. */ +#define LONG_LONG_TYPE_SIZE 64 + +/* The two floating-point formats we support are S-floating, which is + 4 bytes, and T-floating, which is 8 bytes. `float' is S and `double' + and `long double' are T. */ + +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +/* Define this to set long double type size to use in libgcc2.c, which can + not depend on target_flags. */ +#ifdef __LONG_DOUBLE_128__ +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 +#else +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif + +/* Work around target_flags dependency in ada/targtyps.c. */ +#define WIDEST_HARDWARE_FP_SIZE 64 + +#define WCHAR_TYPE "unsigned int" +#define WCHAR_TYPE_SIZE 32 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. + + For Alpha, we always store objects in a full register. 32-bit integers + are always sign-extended, but smaller objects retain their signedness. + + Note that small vector types can get mapped onto integer modes at the + whim of not appearing in alpha-modes.def. We never promoted these + values before; don't do so now that we've trimmed the set of modes to + those actually implemented in the backend. */ + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE) \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + { \ + if ((MODE) == SImode) \ + (UNSIGNEDP) = 0; \ + (MODE) = DImode; \ + } + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. + + There are no such instructions on the Alpha, but the documentation + is little endian. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + This is false on the Alpha. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is lowest + numbered. + + For Alpha we can decide arbitrarily since there are no machine instructions + for them. Might as well be consistent with bytes. */ +#define WORDS_BIG_ENDIAN 0 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 8 + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +#define POINTER_SIZE 64 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 64 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY 128 + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 64 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 128 + +/* For atomic access to objects, must have at least 32-bit alignment + unless the machine has byte operations. */ +#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32)) + +/* Align all constants and variables to at least a word boundary so + we can pick up pieces of them faster. */ +/* ??? Only if block-move stuff knows about different source/destination + alignment. */ +#if 0 +#define CONSTANT_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD) +#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD) +#endif + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. + + Since we get an error message when we do one, call them invalid. */ + +#define STRICT_ALIGNMENT 1 + +/* Set this nonzero if unaligned move instructions are extremely slow. + + On the Alpha, they trap. */ + +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + We define all 32 integer registers, even though $31 is always zero, + and all 32 floating-point registers, even though $f31 is also + always zero. We do not bother defining the FP status register and + there are no other registers. + + Since $31 is always zero, we will use register number 31 as the + argument pointer. It will never appear in the generated code + because we will always be eliminating it in favor of the stack + pointer or hardware frame pointer. + + Likewise, we use $f31 for the frame pointer, which will always + be eliminated in favor of the hardware frame pointer or the + stack pointer. */ + +#define FIRST_PSEUDO_REGISTER 64 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ + +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ + {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +/* List the order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. */ + +#define REG_ALLOC_ORDER { \ + 1, 2, 3, 4, 5, 6, 7, 8, /* nonsaved integer registers */ \ + 22, 23, 24, 25, 28, /* likewise */ \ + 0, /* likewise, but return value */ \ + 21, 20, 19, 18, 17, 16, /* likewise, but input args */ \ + 27, /* likewise, but OSF procedure value */ \ + \ + 42, 43, 44, 45, 46, 47, /* nonsaved floating-point registers */ \ + 54, 55, 56, 57, 58, 59, /* likewise */ \ + 60, 61, 62, /* likewise */ \ + 32, 33, /* likewise, but return values */ \ + 53, 52, 51, 50, 49, 48, /* likewise, but input args */ \ + \ + 9, 10, 11, 12, 13, 14, /* saved integer registers */ \ + 26, /* return address */ \ + 15, /* hard frame pointer */ \ + \ + 34, 35, 36, 37, 38, 39, /* saved floating-point registers */ \ + 40, 41, /* likewise */ \ + \ + 29, 30, 31, 63 /* gp, sp, ap, sfp */ \ +} + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. + On Alpha, the integer registers can hold any mode. The floating-point + registers can hold 64-bit integers as well, but not smaller values. */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (IN_RANGE ((REGNO), 32, 62) \ + ? (MODE) == SFmode || (MODE) == DFmode || (MODE) == DImode \ + || (MODE) == SCmode || (MODE) == DCmode \ + : 1) + +/* A C expression that is nonzero if a value of mode + MODE1 is accessible in mode MODE2 without copying. + + This asymmetric test is true when MODE1 could be put + in an FP register but MODE2 could not. */ + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (HARD_REGNO_MODE_OK (32, (MODE1)) \ + ? HARD_REGNO_MODE_OK (32, (MODE2)) \ + : 1) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* Alpha pc isn't overloaded on a register that the compiler knows about. */ +/* #define PC_REGNUM */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 30 + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 15 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 31 + +/* Base register for access to local variables of function. */ +#define FRAME_POINTER_REGNUM 63 + +/* Register in which static-chain is passed to a function. + + For the Alpha, this is based on an example; the calling sequence + doesn't seem to specify this. */ +#define STATIC_CHAIN_REGNUM 1 + +/* The register number of the register used to address a table of + static data addresses in memory. */ +#define PIC_OFFSET_TABLE_REGNUM 29 + +/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' + is clobbered by calls. */ +/* ??? It is and it isn't. It's required to be valid for a given + function when the function returns. It isn't clobbered by + current_file functions. Moreover, we do not expose the ldgp + until after reload, so we're probably safe. */ +/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +enum reg_class { + NO_REGS, R0_REG, R24_REG, R25_REG, R27_REG, + GENERAL_REGS, FLOAT_REGS, ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ + {"NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG", \ + "GENERAL_REGS", "FLOAT_REGS", "ALL_REGS" } + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ {0x00000000, 0x00000000}, /* NO_REGS */ \ + {0x00000001, 0x00000000}, /* R0_REG */ \ + {0x01000000, 0x00000000}, /* R24_REG */ \ + {0x02000000, 0x00000000}, /* R25_REG */ \ + {0x08000000, 0x00000000}, /* R27_REG */ \ + {0xffffffff, 0x80000000}, /* GENERAL_REGS */ \ + {0x00000000, 0x7fffffff}, /* FLOAT_REGS */ \ + {0xffffffff, 0xffffffff} } + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FLOAT_REGS, LIM_REG_CLASSES \ +} + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == 0 ? R0_REG \ + : (REGNO) == 24 ? R24_REG \ + : (REGNO) == 25 ? R25_REG \ + : (REGNO) == 27 ? R27_REG \ + : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS \ + : GENERAL_REGS) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS NO_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ + +#define PREFERRED_RELOAD_CLASS alpha_preferred_reload_class + +/* If we are copying between general and FP registers, we need a memory + location unless the FIX extension is available. */ + +#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \ + (! TARGET_FIX && (((CLASS1) == FLOAT_REGS && (CLASS2) != FLOAT_REGS) \ + || ((CLASS2) == FLOAT_REGS && (CLASS1) != FLOAT_REGS))) + +/* Specify the mode to be used for memory when a secondary memory + location is needed. If MODE is floating-point, use it. Otherwise, + widen to a word like the default. This is needed because we always + store integers in FP registers in quadword format. This whole + area is very tricky! */ +#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \ + (GET_MODE_CLASS (MODE) == MODE_FLOAT ? (MODE) \ + : GET_MODE_SIZE (MODE) >= 4 ? (MODE) \ + : mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0)) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ + +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Return the class of registers that cannot change mode from FROM to TO. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ + ? reg_classes_intersect_p (FLOAT_REGS, CLASS) : 0) + +/* Define the cost of moving between registers of various classes. Moving + between FLOAT_REGS and anything else except float regs is expensive. + In fact, we make it quite expensive because we really don't want to + do these moves unless it is clearly worth it. Optimizations may + reduce the impact of not being able to allocate a pseudo to a + hard register. */ + +#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \ + (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) ? 2 \ + : TARGET_FIX ? ((CLASS1) == FLOAT_REGS ? 6 : 8) \ + : 4+2*alpha_memory_latency) + +/* A C expressions returning the cost of moving data of MODE from a register to + or from memory. + + On the Alpha, bump this up a bit. */ + +extern int alpha_memory_latency; +#define MEMORY_MOVE_COST(MODE,CLASS,IN) (2*alpha_memory_latency) + +/* Provide the cost of a branch. Exact meaning under development. */ +#define BRANCH_COST(speed_p, predictable_p) 5 + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +/* #define FRAME_GROWS_DOWNWARD 0 */ + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ + +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. + On Alpha, don't define this because there are no push insns. */ +/* #define PUSH_ROUNDING(BYTES) */ + +/* Define this to be nonzero if stack checking is built into the ABI. */ +#define STACK_CHECK_BUILTIN 1 + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset of first parameter from the argument pointer register value. */ + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Definitions for register eliminations. + + We have two registers that can be eliminated on the Alpha. First, the + frame pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the argument pointer register can always be + eliminated; it is replaced with either the stack or frame pointer. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} + +/* Round up to a multiple of 16 bytes. */ +#define ALPHA_ROUND(X) (((X) + 15) & ~ 15) + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = alpha_initial_elimination_offset(FROM, TO)) + +/* Define this if stack space is still allocated for a parameter passed + in a register. */ +/* #define REG_PARM_STACK_SPACE */ + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. + + On Alpha the value is found in $0 for integer functions and + $f0 for floating-point functions. */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + function_value (VALTYPE, FUNC, VOIDmode) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +#define LIBCALL_VALUE(MODE) \ + function_value (NULL, NULL, MODE) + +/* 1 if N is a possible register number for a function value + as seen by the caller. */ + +#define FUNCTION_VALUE_REGNO_P(N) \ + ((N) == 0 || (N) == 1 || (N) == 32 || (N) == 33) + +/* 1 if N is a possible register number for function argument passing. + On Alpha, these are $16-$21 and $f16-$f21. */ + +#define FUNCTION_ARG_REGNO_P(N) \ + (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32)) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On Alpha, this is a single integer, which is a number of words + of arguments scanned so far. + Thus 6 or more means all following args should go on the stack. */ + +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM) = 0 + +/* Define intermediate macro to compute the size (in registers) of an argument + for the Alpha. */ + +#define ALPHA_ARG_SIZE(MODE, TYPE, NAMED) \ + ((MODE) == TFmode || (MODE) == TCmode ? 1 \ + : (((MODE) == BLKmode ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \ + + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) + +/* Make (or fake) .linkage entry for function call. + IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */ + +/* This macro defines the start of an assembly comment. */ + +#define ASM_COMMENT_START " #" + +/* This macro produces the initial definition of a function. */ + +#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \ + alpha_start_function(FILE,NAME,DECL); + +/* This macro closes up a function definition for the assembler. */ + +#define ASM_DECLARE_FUNCTION_SIZE(FILE,NAME,DECL) \ + alpha_end_function(FILE,NAME,DECL) + +/* Output any profiling code before the prologue. */ + +#define PROFILE_BEFORE_PROLOGUE 1 + +/* Never use profile counters. */ + +#define NO_PROFILE_COUNTERS 1 + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. Under OSF/1, profiling is enabled + by simply passing -pg to the assembler and linker. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK 1 + +/* Define registers used by the epilogue and return instruction. */ + +#define EPILOGUE_USES(REGNO) ((REGNO) == 26) + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE 32 + +/* The alignment of a trampoline, in bits. */ + +#define TRAMPOLINE_ALIGNMENT 64 + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. + FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of + the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined. */ + +#define RETURN_ADDR_RTX alpha_return_addr + +/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders + can use DWARF_ALT_FRAME_RETURN_COLUMN defined below. This is just the same + as the default definition in dwarf2out.c. */ +#undef DWARF_FRAME_REGNUM +#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG) + +/* Before the prologue, RA lives in $26. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 26) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26) +#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64) +#define DWARF_ZERO_REG 31 + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 16 : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 28) +#define EH_RETURN_HANDLER_RTX \ + gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, \ + crtl->outgoing_args_size)) + +/* Addressing modes, and classification of registers for them. */ + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) 0 +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32 \ + || (REGNO) == 63 || reg_renumber[REGNO] == 63) + +/* Maximum number of registers that can appear in a valid memory address. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* Recognize any constant value that is a valid address. For the Alpha, + there are only constants none since we want to use LDA to load any + symbolic addresses into registers. */ + +#define CONSTANT_ADDRESS_P(X) \ + (CONST_INT_P (X) \ + && (unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000) + +/* Include all constant integers and constant doubles, but not + floating-point, except for floating-point zero. */ + +#define LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) 0 + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define NONSTRICT_REG_OK_FOR_BASE_P(X) \ + (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* ??? Nonzero if X is the frame pointer, or some virtual register + that may eliminate to the frame pointer. These will be allowed to + have offsets greater than 32K. This is done because register + elimination offsets will change the hi/lo split, and if we split + before reload, we will require additional instructions. */ +#define NONSTRICT_REG_OK_FP_BASE_P(X) \ + (REGNO (X) == 31 || REGNO (X) == 63 \ + || (REGNO (X) >= FIRST_PSEUDO_REGISTER \ + && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER)) + +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P (X) +#else +#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P (X) +#endif + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN) \ +do { \ + rtx new_x = alpha_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ +} while (0) + +/* Go to LABEL if ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. + On the Alpha this is true only for the unaligned modes. We can + simplify this test since we know that the address must be valid. */ + +#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL) \ +{ if (GET_CODE (ADDR) == AND) goto LABEL; } + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE SImode + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + + Do not define this if the table should contain absolute addresses. + On the Alpha, the table is really GP-relative, not relative to the PC + of the table, but we pretend that it is PC-relative; this should be OK, + but we should try to find some better way sometime. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Max number of bytes we can move to or from memory + in one reasonably fast instruction. */ + +#define MOVE_MAX 8 + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. + + Without byte/word accesses, we want no more than four instructions; + with, several single byte accesses are better. */ + +#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2) + +/* Largest number of bytes of an object that can be placed in a register. + On the Alpha we have plenty of registers, so use TImode. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode) + +/* Nonzero if access to memory by bytes is no faster than for words. + Also nonzero if doing byte operations (specifically shifts) in registers + is undesirable. + + On the Alpha, we want to not use the byte operation and instead use + masking operations to access fields; these will save instructions. */ + +#define SLOW_BYTE_ACCESS 1 + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND) + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* The CIX ctlz and cttz instructions return 64 for zero. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, TARGET_CIX) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, TARGET_CIX) + +/* Define the value returned by a floating-point comparison instruction. */ + +#define FLOAT_STORE_FLAG_VALUE(MODE) \ + REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE)) + +/* Canonicalize a comparison from one we don't have to one we do have. */ + +#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \ + do { \ + if (((CODE) == GE || (CODE) == GT || (CODE) == GEU || (CODE) == GTU) \ + && (REG_P (OP1) || (OP1) == const0_rtx)) \ + { \ + rtx tem = (OP0); \ + (OP0) = (OP1); \ + (OP1) = tem; \ + (CODE) = swap_condition (CODE); \ + } \ + if (((CODE) == LT || (CODE) == LTU) \ + && CONST_INT_P (OP1) && INTVAL (OP1) == 256) \ + { \ + (CODE) = (CODE) == LT ? LE : LEU; \ + (OP1) = GEN_INT (255); \ + } \ + } while (0) + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode DImode + +/* Mode of a function address in a call instruction (for indexing purposes). */ + +#define FUNCTION_MODE Pmode + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. + + We define this on the Alpha so that gen_call and gen_call_value + get to see the SYMBOL_REF (for the hint field of the jsr). It will + then copy it into a register, thus actually letting the address be + cse'ed. */ + +#define NO_FUNCTION_CSE + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Control the assembler format that we output. */ + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "") + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "") + +#define TEXT_SECTION_ASM_OP "\t.text" + +/* Output before read-only data. */ + +#define READONLY_DATA_SECTION_ASM_OP "\t.rdata" + +/* Output before writable data. */ + +#define DATA_SECTION_ASM_OP "\t.data" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ +{"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", \ + "$9", "$10", "$11", "$12", "$13", "$14", "$15", \ + "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", \ + "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP", \ + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", \ + "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ + "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",\ + "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "FP"} + +/* Strip name encoding when emitting labels. */ + +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ +do { \ + const char *name_ = NAME; \ + if (*name_ == '@' || *name_ == '%') \ + name_ += 2; \ + if (*name_ == '*') \ + name_++; \ + else \ + fputs (user_label_prefix, STREAM); \ + fputs (name_, STREAM); \ +} while (0) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +/* The prefix to add to user-visible assembler symbols. */ + +#define USER_LABEL_PREFIX "" + +/* This is how to output a label for a jump table. Arguments are the same as + for (*targetm.asm_out.internal_label), except the insn for the jump table is + passed. */ + +#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN) \ +{ ASM_OUTPUT_ALIGN (FILE, 2); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); } + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*$%s%ld", (PREFIX), (long)(NUM)) + +/* We use the default ASCII-output routine, except that we don't write more + than 50 characters since the assembler doesn't support very long lines. */ + +#define ASM_OUTPUT_ASCII(MYFILE, MYSTRING, MYLENGTH) \ + do { \ + FILE *_hide_asm_out_file = (MYFILE); \ + const unsigned char *_hide_p = (const unsigned char *) (MYSTRING); \ + int _hide_thissize = (MYLENGTH); \ + int _size_so_far = 0; \ + { \ + FILE *asm_out_file = _hide_asm_out_file; \ + const unsigned char *p = _hide_p; \ + int thissize = _hide_thissize; \ + int i; \ + fprintf (asm_out_file, "\t.ascii \""); \ + \ + for (i = 0; i < thissize; i++) \ + { \ + register int c = p[i]; \ + \ + if (_size_so_far ++ > 50 && i < thissize - 4) \ + _size_so_far = 0, fprintf (asm_out_file, "\"\n\t.ascii \""); \ + \ + if (c == '\"' || c == '\\') \ + putc ('\\', asm_out_file); \ + if (c >= ' ' && c < 0177) \ + putc (c, asm_out_file); \ + else \ + { \ + fprintf (asm_out_file, "\\%o", c); \ + /* After an octal-escape, if a digit follows, \ + terminate one string constant and start another. \ + The VAX assembler fails to stop reading the escape \ + after three digits, so this is the only way we \ + can get it to parse the data properly. */ \ + if (i < thissize - 1 && ISDIGIT (p[i + 1])) \ + _size_so_far = 0, fprintf (asm_out_file, "\"\n\t.ascii \""); \ + } \ + } \ + fprintf (asm_out_file, "\"\n"); \ + } \ + } \ + while (0) + +/* This is how to output an element of a case-vector that is relative. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + fprintf (FILE, "\t.%s $L%d\n", TARGET_ABI_WINDOWS_NT ? "long" : "gprel32", \ + (VALUE)) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", LOG); + +/* This is how to advance the location counter by SIZE bytes. */ + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ +( fputs ("\t.comm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))) + +/* This says how to output an assembler line + to define a local common symbol. */ + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE,ROUNDED) \ +( fputs ("\t.lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))) + + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) + +/* Determine which codes are valid without a following integer. These must + not be alphabetic. + + ~ Generates the name of the current function. + + / Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX + attributes are examined to determine what is appropriate. + + , Generates single precision suffix for floating point + instructions (s for IEEE, f for VAX) + + - Generates double precision suffix for floating point + instructions (t for IEEE, g for VAX) + */ + +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \ + ((CODE) == '/' || (CODE) == ',' || (CODE) == '-' || (CODE) == '~' \ + || (CODE) == '#' || (CODE) == '*' || (CODE) == '&') + +/* Print a memory address as an operand to reference that memory location. */ + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ + print_operand_address((FILE), (ADDR)) + +/* Tell collect that the object format is ECOFF. */ +#define OBJECT_FORMAT_COFF +#define EXTENDED_COFF + +/* If we use NM, pass -g to it so it only lists globals. */ +#define NM_FLAGS "-pg" + +/* Definitions for debugging. */ + +#define SDB_DEBUGGING_INFO 1 /* generate info for mips-tfile */ +#define DBX_DEBUGGING_INFO 1 /* generate embedded stabs */ +#define MIPS_DEBUGGING_INFO 1 /* MIPS specific debugging info */ + +#ifndef PREFERRED_DEBUGGING_TYPE /* assume SDB_DEBUGGING_INFO */ +#define PREFERRED_DEBUGGING_TYPE SDB_DEBUG +#endif + + +/* Correct the offset of automatic variables and arguments. Note that + the Alpha debug format wants all automatic variables and arguments + to be in terms of two different offsets from the virtual frame pointer, + which is the stack pointer before any adjustment in the function. + The offset for the argument pointer is fixed for the native compiler, + it is either zero (for the no arguments case) or large enough to hold + all argument registers. + The offset for the auto pointer is the fourth argument to the .frame + directive (local_offset). + To stay compatible with the native tools we use the same offsets + from the virtual frame pointer and adjust the debugger arg/auto offsets + accordingly. These debugger offsets are set up in output_prolog. */ + +extern long alpha_arg_offset; +extern long alpha_auto_offset; +#define DEBUGGER_AUTO_OFFSET(X) \ + ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + alpha_auto_offset) +#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + alpha_arg_offset) + +/* mips-tfile doesn't understand .stabd directives. */ +#define DBX_OUTPUT_SOURCE_LINE(STREAM, LINE, COUNTER) do { \ + dbxout_begin_stabn_sline (LINE); \ + dbxout_stab_value_internal_label ("LM", &COUNTER); \ +} while (0) + +/* We want to use MIPS-style .loc directives for SDB line numbers. */ +extern int num_source_filenames; +#define SDB_OUTPUT_SOURCE_LINE(STREAM, LINE) \ + fprintf (STREAM, "\t.loc\t%d %d\n", num_source_filenames, LINE) + +#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME) \ + alpha_output_filename (STREAM, NAME) + +/* mips-tfile.c limits us to strings of one page. We must underestimate this + number, because the real length runs past this up to the next + continuation point. This is really a dbxout.c bug. */ +#define DBX_CONTIN_LENGTH 3000 + +/* By default, turn on GDB extensions. */ +#define DEFAULT_GDB_EXTENSIONS 1 + +/* Stabs-in-ECOFF can't handle dbxout_function_end(). */ +#define NO_DBX_FUNCTION_END 1 + +/* If we are smuggling stabs through the ALPHA ECOFF object + format, put a comment in front of the .stab operation so + that the ALPHA assembler does not choke. The mips-tfile program + will correctly put the stab into the object file. */ + +#define ASM_STABS_OP ((TARGET_GAS) ? "\t.stabs\t" : " #.stabs\t") +#define ASM_STABN_OP ((TARGET_GAS) ? "\t.stabn\t" : " #.stabn\t") +#define ASM_STABD_OP ((TARGET_GAS) ? "\t.stabd\t" : " #.stabd\t") + +/* Forward references to tags are allowed. */ +#define SDB_ALLOW_FORWARD_REFERENCES + +/* Unknown tags are also allowed. */ +#define SDB_ALLOW_UNKNOWN_REFERENCES + +#define PUT_SDB_DEF(a) \ +do { \ + fprintf (asm_out_file, "\t%s.def\t", \ + (TARGET_GAS) ? "" : "#"); \ + ASM_OUTPUT_LABELREF (asm_out_file, a); \ + fputc (';', asm_out_file); \ +} while (0) + +#define PUT_SDB_PLAIN_DEF(a) \ +do { \ + fprintf (asm_out_file, "\t%s.def\t.%s;", \ + (TARGET_GAS) ? "" : "#", (a)); \ +} while (0) + +#define PUT_SDB_TYPE(a) \ +do { \ + fprintf (asm_out_file, "\t.type\t0x%x;", (a)); \ +} while (0) + +/* For block start and end, we create labels, so that + later we can figure out where the correct offset is. + The normal .ent/.end serve well enough for functions, + so those are just commented out. */ + +extern int sdb_label_count; /* block start/end next label # */ + +#define PUT_SDB_BLOCK_START(LINE) \ +do { \ + fprintf (asm_out_file, \ + "$Lb%d:\n\t%s.begin\t$Lb%d\t%d\n", \ + sdb_label_count, \ + (TARGET_GAS) ? "" : "#", \ + sdb_label_count, \ + (LINE)); \ + sdb_label_count++; \ +} while (0) + +#define PUT_SDB_BLOCK_END(LINE) \ +do { \ + fprintf (asm_out_file, \ + "$Le%d:\n\t%s.bend\t$Le%d\t%d\n", \ + sdb_label_count, \ + (TARGET_GAS) ? "" : "#", \ + sdb_label_count, \ + (LINE)); \ + sdb_label_count++; \ +} while (0) + +#define PUT_SDB_FUNCTION_START(LINE) + +#define PUT_SDB_FUNCTION_END(LINE) + +#define PUT_SDB_EPILOGUE_END(NAME) ((void)(NAME)) + +/* Macros for mips-tfile.c to encapsulate stabs in ECOFF, and for + mips-tdump.c to print them out. + + These must match the corresponding definitions in gdb/mipsread.c. + Unfortunately, gcc and gdb do not currently share any directories. */ + +#define CODE_MASK 0x8F300 +#define MIPS_IS_STAB(sym) (((sym)->index & 0xFFF00) == CODE_MASK) +#define MIPS_MARK_STAB(code) ((code)+CODE_MASK) +#define MIPS_UNMARK_STAB(code) ((code)-CODE_MASK) + +/* Override some mips-tfile definitions. */ + +#define SHASH_SIZE 511 +#define THASH_SIZE 55 + +/* Align ecoff symbol tables to avoid OSF1/1.3 nm complaints. */ + +#define ALIGN_SYMTABLE_OFFSET(OFFSET) (((OFFSET) + 7) & ~7) + +/* The system headers under Alpha systems are generally C++-aware. */ +#define NO_IMPLICIT_EXTERN_C diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md new file mode 100644 index 000000000..d6fba76f7 --- /dev/null +++ b/gcc/config/alpha/alpha.md @@ -0,0 +1,7999 @@ +;; Machine description for DEC Alpha for GNU C compiler +;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +;; 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; Uses of UNSPEC in this file: + +(define_constants + [(UNSPEC_ARG_HOME 0) + (UNSPEC_LDGP1 1) + (UNSPEC_INSXH 2) + (UNSPEC_MSKXH 3) + (UNSPEC_CVTQL 4) + (UNSPEC_CVTLQ 5) + (UNSPEC_UMK_LAUM 6) + (UNSPEC_UMK_LALM 7) + (UNSPEC_UMK_LAL 8) + (UNSPEC_UMK_LOAD_CIW 9) + (UNSPEC_LDGP2 10) + (UNSPEC_LITERAL 11) + (UNSPEC_LITUSE 12) + (UNSPEC_SIBCALL 13) + (UNSPEC_SYMBOL 14) + + ;; TLS Support + (UNSPEC_TLSGD_CALL 15) + (UNSPEC_TLSLDM_CALL 16) + (UNSPEC_TLSGD 17) + (UNSPEC_TLSLDM 18) + (UNSPEC_DTPREL 19) + (UNSPEC_TPREL 20) + (UNSPEC_TP 21) + + ;; Builtins + (UNSPEC_CMPBGE 22) + (UNSPEC_ZAP 23) + (UNSPEC_AMASK 24) + (UNSPEC_IMPLVER 25) + (UNSPEC_PERR 26) + (UNSPEC_COPYSIGN 27) + + ;; Atomic operations + (UNSPEC_MB 28) + (UNSPEC_ATOMIC 31) + (UNSPEC_CMPXCHG 32) + (UNSPEC_XCHG 33) + ]) + +;; UNSPEC_VOLATILE: + +(define_constants + [(UNSPECV_IMB 0) + (UNSPECV_BLOCKAGE 1) + (UNSPECV_SETJMPR 2) ; builtin_setjmp_receiver + (UNSPECV_LONGJMP 3) ; builtin_longjmp + (UNSPECV_TRAPB 4) + (UNSPECV_PSPL 5) ; prologue_stack_probe_loop + (UNSPECV_REALIGN 6) + (UNSPECV_EHR 7) ; exception_receiver + (UNSPECV_MCOUNT 8) + (UNSPECV_FORCE_MOV 9) + (UNSPECV_LDGP1 10) + (UNSPECV_PLDGP2 11) ; prologue ldgp + (UNSPECV_SET_TP 12) + (UNSPECV_RPCC 13) + (UNSPECV_SETJMPR_ER 14) ; builtin_setjmp_receiver fragment + (UNSPECV_LL 15) ; load-locked + (UNSPECV_SC 16) ; store-conditional + ]) + +;; On non-BWX targets, CQImode must be handled the similarly to HImode +;; when generating reloads. +(define_mode_iterator RELOAD12 [QI HI CQI]) +(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")]) + +;; Other mode iterators +(define_mode_iterator I12MODE [QI HI]) +(define_mode_iterator I48MODE [SI DI]) +(define_mode_attr modesuffix [(SI "l") (DI "q")]) + +;; Where necessary, the suffixes _le and _be are used to distinguish between +;; little-endian and big-endian patterns. +;; +;; Note that the Unicos/Mk assembler does not support the following +;; opcodes: mov, fmov, nop, fnop, unop. + +;; Processor type -- this attribute must exactly match the processor_type +;; enumeration in alpha.h. + +(define_attr "tune" "ev4,ev5,ev6" + (const (symbol_ref "((enum attr_tune) alpha_tune)"))) + +;; Define an insn type attribute. This is used in function unit delay +;; computations, among other purposes. For the most part, we use the names +;; defined in the EV4 documentation, but add a few that we have to know about +;; separately. + +(define_attr "type" + "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,jsr,iadd,ilog,shift,icmov,fcmov, + icmp,imul,fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c, + multi,none" + (const_string "iadd")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "multi")]) + +;; Define the operand size an insn operates on. Used primarily by mul +;; and div operations that have size dependent timings. + +(define_attr "opsize" "si,di,udi" + (const_string "di")) + +;; The TRAP attribute marks instructions that may generate traps +;; (which are imprecise and may need a trapb if software completion +;; is desired). + +(define_attr "trap" "no,yes" + (const_string "no")) + +;; The ROUND_SUFFIX attribute marks which instructions require a +;; rounding-mode suffix. The value NONE indicates no suffix, +;; the value NORMAL indicates a suffix controlled by alpha_fprm. + +(define_attr "round_suffix" "none,normal,c" + (const_string "none")) + +;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix: +;; NONE no suffix +;; SU accepts only /su (cmpt et al) +;; SUI accepts only /sui (cvtqt and cvtqs) +;; V_SV accepts /v and /sv (cvtql only) +;; V_SV_SVI accepts /v, /sv and /svi (cvttq only) +;; U_SU_SUI accepts /u, /su and /sui (most fp instructions) +;; +;; The actual suffix emitted is controlled by alpha_fptm. + +(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui" + (const_string "none")) + +;; The length of an instruction sequence in bytes. + +(define_attr "length" "" + (const_int 4)) + +;; The USEGP attribute marks instructions that have relocations that use +;; the GP. + +(define_attr "usegp" "no,yes" + (cond [(eq_attr "type" "ldsym,jsr") + (const_string "yes") + (eq_attr "type" "ild,fld,ist,fst") + (symbol_ref "((enum attr_usegp) alpha_find_lo_sum_using_gp (insn))") + ] + (const_string "no"))) + +;; The CANNOT_COPY attribute marks instructions with relocations that +;; cannot easily be duplicated. This includes insns with gpdisp relocs +;; since they have to stay in 1-1 correspondence with one another. This +;; also includes jsr insns, since they must stay in correspondence with +;; the immediately following gpdisp instructions. + +(define_attr "cannot_copy" "false,true" + (const_string "false")) + +;; Include scheduling descriptions. + +(include "ev4.md") +(include "ev5.md") +(include "ev6.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; First define the arithmetic insns. Note that the 32-bit forms also +;; sign-extend. + +;; Handle 32-64 bit extension from memory to a floating point register +;; specially, since this occurs frequently in int->double conversions. +;; +;; Note that while we must retain the =f case in the insn for reload's +;; benefit, it should be eliminated after reload, so we should never emit +;; code for that case. But we don't reject the possibility. + +(define_expand "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" + "") + +(define_insn "*cvtlq" + [(set (match_operand:DI 0 "register_operand" "=f") + (unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTLQ))] + "" + "cvtlq %1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*extendsidi2_1" + [(set (match_operand:DI 0 "register_operand" "=r,r,!*f") + (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))] + "" + "@ + addl $31,%1,%0 + ldl %0,%1 + lds %0,%1\;cvtlq %0,%0" + [(set_attr "type" "iadd,ild,fld") + (set_attr "length" "*,*,8")]) + +(define_split + [(set (match_operand:DI 0 "hard_fp_register_operand" "") + (sign_extend:DI (match_operand:SI 1 "memory_operand" "")))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0])); +}) + +;; Optimize sign-extension of SImode loads. This shows up in the wake of +;; reload when converting fp->int. + +(define_peephole2 + [(set (match_operand:SI 0 "hard_int_register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_operand:DI 2 "hard_int_register_operand" "") + (sign_extend:DI (match_dup 0)))] + "true_regnum (operands[0]) == true_regnum (operands[2]) + || peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (sign_extend:DI (match_dup 1)))] + "") + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ") + (match_operand:SI 2 "add_operand" "rI,O,K,L")))] + "" + "@ + addl %r1,%2,%0 + subl %r1,%n2,%0 + lda %0,%2(%r1) + ldah %0,%h2(%r1)") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "! add_operand (operands[2], SImode)" + [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000); + HOST_WIDE_INT rest = val - low; + + operands[3] = GEN_INT (rest); + operands[4] = GEN_INT (low); +}) + +(define_insn "*addsi_se" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "sext_add_operand" "rI,O"))))] + "" + "@ + addl %r1,%2,%0 + subl %r1,%n2,%0") + +(define_insn "*addsi_se2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "sext_add_operand" "rI,O")) + 0)))] + "" + "@ + addl %r1,%2,%0 + subl %r1,%n2,%0") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "reg_not_elim_operand" "") + (match_operand:SI 2 "const_int_operand" "")))) + (clobber (match_operand:SI 3 "reg_not_elim_operand" ""))] + "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) % 4 == 0" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3) + (match_dup 5)) + (match_dup 1))))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]) / 4; + int mult = 4; + + if (val % 2 == 0) + val /= 2, mult = 8; + + operands[4] = GEN_INT (val); + operands[5] = GEN_INT (mult); +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI + (plus:SI (match_operator:SI 1 "comparison_operator" + [(match_operand 2 "" "") + (match_operand 3 "" "")]) + (match_operand:SI 4 "add_operand" "")))) + (clobber (match_operand:DI 5 "register_operand" ""))] + "" + [(set (match_dup 5) (match_dup 6)) + (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))] +{ + operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, + operands[2], operands[3]); + operands[7] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn "addvsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "sext_add_operand" "rI,O"))) + (trap_if (ne (plus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2))) + (sign_extend:DI (plus:SI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "@ + addlv %r1,%2,%0 + sublv %r1,%n2,%0") + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "add_operand" "")))] + "" + "") + +(define_insn "*adddi_er_lo16_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "dtp16_symbolic_operand" "")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!dtprel") + +(define_insn "*adddi_er_hi32_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "dtp32_symbolic_operand" ""))))] + "HAVE_AS_TLS" + "ldah %0,%2(%1)\t\t!dtprelhi") + +(define_insn "*adddi_er_lo32_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "dtp32_symbolic_operand" "")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!dtprello") + +(define_insn "*adddi_er_lo16_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "tp16_symbolic_operand" "")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!tprel") + +(define_insn "*adddi_er_hi32_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "tp32_symbolic_operand" ""))))] + "HAVE_AS_TLS" + "ldah %0,%2(%1)\t\t!tprelhi") + +(define_insn "*adddi_er_lo32_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "tp32_symbolic_operand" "")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!tprello") + +(define_insn "*adddi_er_high_l" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "local_symbolic_operand" ""))))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + "ldah %0,%2(%1)\t\t!gprelhigh" + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (high:DI (match_operand:DI 1 "local_symbolic_operand" "")))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:DI (match_dup 2) (high:DI (match_dup 1))))] + "operands[2] = pic_offset_table_rtx;") + +;; We used to expend quite a lot of effort choosing addq/subq/lda. +;; With complications like +;; +;; The NT stack unwind code can't handle a subq to adjust the stack +;; (that's a bug, but not one we can do anything about). As of NT4.0 SP3, +;; the exception handling code will loop if a subq is used and an +;; exception occurs. +;; +;; The 19980616 change to emit prologues as RTL also confused some +;; versions of GDB, which also interprets prologues. This has been +;; fixed as of GDB 4.18, but it does not harm to unconditionally +;; use lda here. +;; +;; and the fact that the three insns schedule exactly the same, it's +;; just not worth the effort. + +(define_insn "*adddi_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r,r") + (match_operand:DI 2 "add_operand" "r,K,L")))] + "" + "@ + addq %1,%2,%0 + lda %0,%2(%1) + ldah %0,%h2(%1)") + +;; ??? Allow large constants when basing off the frame pointer or some +;; virtual register that may eliminate to the frame pointer. This is +;; done because register elimination offsets will change the hi/lo split, +;; and if we split before reload, we will require additional instructions. + +(define_insn "*adddi_fp_hack" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r") + (match_operand:DI 2 "const_int_operand" "K,L,n")))] + "NONSTRICT_REG_OK_FP_BASE_P (operands[1]) + && INTVAL (operands[2]) >= 0 + /* This is the largest constant an lda+ldah pair can add, minus + an upper bound on the displacement between SP and AP during + register elimination. See INITIAL_ELIMINATION_OFFSET. */ + && INTVAL (operands[2]) + < (0x7fff8000 + - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD + - ALPHA_ROUND(crtl->outgoing_args_size) + - (ALPHA_ROUND (get_frame_size () + + max_reg_num () * UNITS_PER_WORD + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size))" + "@ + lda %0,%2(%1) + ldah %0,%h2(%1) + #") + +;; Don't do this if we are adjusting SP since we don't want to do it +;; in two steps. Don't split FP sources for the reason listed above. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" "")))] + "! add_operand (operands[2], DImode) + && operands[0] != stack_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[1] != arg_pointer_rtx" + [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000); + HOST_WIDE_INT rest = val - low; + rtx rest_rtx = GEN_INT (rest); + + operands[4] = GEN_INT (low); + if (satisfies_constraint_L (rest_rtx)) + operands[3] = rest_rtx; + else if (can_create_pseudo_p ()) + { + operands[3] = gen_reg_rtx (DImode); + emit_move_insn (operands[3], operands[2]); + emit_insn (gen_adddi3 (operands[0], operands[1], operands[3])); + DONE; + } + else + FAIL; +}) + +(define_insn "*saddl" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") + (match_operand:SI 2 "const48_operand" "I,I")) + (match_operand:SI 3 "sext_add_operand" "rI,O")))] + "" + "@ + s%2addl %1,%3,%0 + s%2subl %1,%n3,%0") + +(define_insn "*saddl_se" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") + (match_operand:SI 2 "const48_operand" "I,I")) + (match_operand:SI 3 "sext_add_operand" "rI,O"))))] + "" + "@ + s%2addl %1,%3,%0 + s%2subl %1,%n3,%0") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI + (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator" + [(match_operand 2 "" "") + (match_operand 3 "" "")]) + (match_operand:SI 4 "const48_operand" "")) + (match_operand:SI 5 "sext_add_operand" "")))) + (clobber (match_operand:DI 6 "reg_not_elim_operand" ""))] + "" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4)) + (match_dup 5))))] +{ + operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, + operands[2], operands[3]); + operands[8] = gen_lowpart (SImode, operands[6]); +}) + +(define_insn "*saddq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r") + (match_operand:DI 2 "const48_operand" "I,I")) + (match_operand:DI 3 "sext_add_operand" "rI,O")))] + "" + "@ + s%2addq %1,%3,%0 + s%2subq %1,%n3,%0") + +(define_insn "addvdi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "sext_add_operand" "rI,O"))) + (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (plus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "@ + addqv %r1,%2,%0 + subqv %r1,%n2,%0") + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))] + "" + "subl $31,%1,%0") + +(define_insn "*negsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (neg:SI + (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))] + "" + "subl $31,%1,%0") + +(define_insn "negvsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "register_operand" "r"))) + (trap_if (ne (neg:DI (sign_extend:DI (match_dup 1))) + (sign_extend:DI (neg:SI (match_dup 1)))) + (const_int 0))] + "" + "sublv $31,%1,%0") + +(define_insn "negdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))] + "" + "subq $31,%1,%0") + +(define_insn "negvdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r"))) + (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1))) + (sign_extend:TI (neg:DI (match_dup 1)))) + (const_int 0))] + "" + "subqv $31,%1,%0") + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI")))] + "" + "subl %r1,%2,%0") + +(define_insn "*subsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))] + "" + "subl %r1,%2,%0") + +(define_insn "*subsi_se2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")) + 0)))] + "" + "subl %r1,%2,%0") + +(define_insn "subvsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))) + (trap_if (ne (minus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2))) + (sign_extend:DI (minus:SI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "sublv %r1,%2,%0") + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")))] + "" + "subq %r1,%2,%0") + +(define_insn "*ssubl" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r") + (match_operand:SI 2 "const48_operand" "I")) + (match_operand:SI 3 "reg_or_8bit_operand" "rI")))] + "" + "s%2subl %1,%3,%0") + +(define_insn "*ssubl_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r") + (match_operand:SI 2 "const48_operand" "I")) + (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))] + "" + "s%2subl %1,%3,%0") + +(define_insn "*ssubq" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r") + (match_operand:DI 2 "const48_operand" "I")) + (match_operand:DI 3 "reg_or_8bit_operand" "rI")))] + "" + "s%2subq %1,%3,%0") + +(define_insn "subvdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI"))) + (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (minus:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "subqv %r1,%2,%0") + +;; The Unicos/Mk assembler doesn't support mull. + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI")))] + "!TARGET_ABI_UNICOSMK" + "mull %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "si")]) + +(define_insn "*mulsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))] + "!TARGET_ABI_UNICOSMK" + "mull %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "si")]) + +(define_insn "mulvsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))) + (trap_if (ne (mult:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2))) + (sign_extend:DI (mult:SI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "!TARGET_ABI_UNICOSMK" + "mullv %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "si")]) + +(define_insn "muldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")))] + "" + "mulq %r1,%2,%0" + [(set_attr "type" "imul")]) + +(define_insn "mulvdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI"))) + (trap_if (ne (mult:TI (sign_extend:TI (match_dup 1)) + (sign_extend:TI (match_dup 2))) + (sign_extend:TI (mult:DI (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "mulqv %r1,%2,%0" + [(set_attr "type" "imul")]) + +(define_expand "umuldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand" "")) + (match_operand:DI 2 "reg_or_8bit_operand" "")) + (const_int 64))))] + "" +{ + if (REG_P (operands[2])) + operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]); +}) + +(define_insn "*umuldi3_highpart_reg" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand" "r")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r"))) + (const_int 64))))] + "" + "umulh %1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "udi")]) + +(define_insn "*umuldi3_highpart_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) + (match_operand:TI 2 "cint8_operand" "I")) + (const_int 64))))] + "" + "umulh %1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "udi")]) + +;; The divide and remainder operations take their inputs from r24 and +;; r25, put their output in r27, and clobber r23 and r28 on all +;; systems except Unicos/Mk. On Unicos, the standard library provides +;; subroutines which use the standard calling convention and work on +;; DImode operands. + +;; ??? Force sign-extension here because some versions of OSF/1 and +;; Interix/NT don't do the right thing if the inputs are not properly +;; sign-extended. But Linux, for instance, does not have this +;; problem. Is it worth the complication here to eliminate the sign +;; extension? + +(define_expand "divsi3" + [(set (match_dup 3) + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" ""))) + (parallel [(set (match_dup 5) + (sign_extend:DI (div:SI (match_dup 3) (match_dup 4)))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))]) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (subreg:SI (match_dup 5) 0))] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}) + +(define_expand "udivsi3" + [(set (match_dup 3) + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" ""))) + (parallel [(set (match_dup 5) + (sign_extend:DI (udiv:SI (match_dup 3) (match_dup 4)))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))]) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (subreg:SI (match_dup 5) 0))] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}) + +(define_expand "modsi3" + [(set (match_dup 3) + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" ""))) + (parallel [(set (match_dup 5) + (sign_extend:DI (mod:SI (match_dup 3) (match_dup 4)))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))]) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (subreg:SI (match_dup 5) 0))] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}) + +(define_expand "umodsi3" + [(set (match_dup 3) + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" ""))) + (parallel [(set (match_dup 5) + (sign_extend:DI (umod:SI (match_dup 3) (match_dup 4)))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))]) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (subreg:SI (match_dup 5) 0))] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}) + +(define_expand "divdi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" + "") + +(define_expand "udivdi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (udiv:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" + "") + +(define_expand "moddi3" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "register_operand" ""))] + "!TARGET_ABI_OPEN_VMS" +{ + if (TARGET_ABI_UNICOSMK) + emit_insn (gen_moddi3_umk (operands[0], operands[1], operands[2])); + else + emit_insn (gen_moddi3_dft (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "moddi3_dft" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mod:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" + "") + +;; On Unicos/Mk, we do as the system's C compiler does: +;; compute the quotient, multiply and subtract. + +(define_expand "moddi3_umk" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "register_operand" ""))] + "TARGET_ABI_UNICOSMK" +{ + rtx div, mul = gen_reg_rtx (DImode); + + div = expand_binop (DImode, sdiv_optab, operands[1], operands[2], + NULL_RTX, 0, OPTAB_LIB); + div = force_reg (DImode, div); + emit_insn (gen_muldi3 (mul, operands[2], div)); + emit_insn (gen_subdi3 (operands[0], operands[1], mul)); + DONE; +}) + +(define_expand "umoddi3" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "register_operand" ""))] + "! TARGET_ABI_OPEN_VMS" +{ + if (TARGET_ABI_UNICOSMK) + emit_insn (gen_umoddi3_umk (operands[0], operands[1], operands[2])); + else + emit_insn (gen_umoddi3_dft (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "umoddi3_dft" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (umod:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" + "") + +(define_expand "umoddi3_umk" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "register_operand" ""))] + "TARGET_ABI_UNICOSMK" +{ + rtx div, mul = gen_reg_rtx (DImode); + + div = expand_binop (DImode, udiv_optab, operands[1], operands[2], + NULL_RTX, 1, OPTAB_LIB); + div = force_reg (DImode, div); + emit_insn (gen_muldi3 (mul, operands[2], div)); + emit_insn (gen_subdi3 (operands[0], operands[1], mul)); + DONE; +}) + +;; Lengths of 8 for ldq $t12,__divq($gp); jsr $t9,($t12),__divq as +;; expanded by the assembler. + +(define_insn_and_split "*divmodsi_internal_er" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (sign_extend:DI (match_dup 3))) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] +{ + const char *str; + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divl"; + break; + case UDIV: + str = "__divlu"; + break; + case MOD: + str = "__reml"; + break; + case UMOD: + str = "__remlu"; + break; + default: + gcc_unreachable (); + } + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx, + gen_rtx_SYMBOL_REF (DImode, str), + operands[4])); +} + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +(define_insn "*divmodsi_internal_er_1" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (use (match_operand:DI 4 "register_operand" "c")) + (use (match_operand 5 "const_int_operand" "")) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS" + "jsr $23,($27),__%E3%j5" + [(set_attr "type" "jsr") + (set_attr "length" "4")]) + +(define_insn "*divmodsi_internal" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" + "%E3 %1,%2,%0" + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +(define_insn_and_split "*divmoddi_internal_er" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (match_dup 3)) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] +{ + const char *str; + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divq"; + break; + case UDIV: + str = "__divqu"; + break; + case MOD: + str = "__remq"; + break; + case UMOD: + str = "__remqu"; + break; + default: + gcc_unreachable (); + } + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx, + gen_rtx_SYMBOL_REF (DImode, str), + operands[4])); +} + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +(define_insn "*divmoddi_internal_er_1" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "c")) + (use (match_operand 5 "const_int_operand" "")) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS" + "jsr $23,($27),__%E3%j5" + [(set_attr "type" "jsr") + (set_attr "length" "4")]) + +(define_insn "*divmoddi_internal" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK" + "%E3 %1,%2,%0" + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +;; Next are the basic logical operations. We only expose the DImode operations +;; to the rtl expanders, but SImode versions exist for combine as well as for +;; the atomic operation splitters. + +(define_insn "*andsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ") + (match_operand:SI 2 "and_operand" "rI,N,MH")))] + "" + "@ + and %r1,%2,%0 + bic %r1,%N2,%0 + zapnot %r1,%m2,%0" + [(set_attr "type" "ilog,ilog,shift")]) + +(define_insn "anddi3" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ") + (match_operand:DI 2 "and_operand" "rI,N,MH")))] + "" + "@ + and %r1,%2,%0 + bic %r1,%N2,%0 + zapnot %r1,%m2,%0" + [(set_attr "type" "ilog,ilog,shift")]) + +;; There are times when we can split an AND into two AND insns. This occurs +;; when we can first clear any bytes and then clear anything else. For +;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07". +;; Only do this when running on 64-bit host since the computations are +;; too messy otherwise. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (and:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" "")))] + "HOST_BITS_PER_WIDE_INT == 64 && ! and_operand (operands[2], DImode)" + [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))] +{ + unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]); + unsigned HOST_WIDE_INT mask2 = mask1; + int i; + + /* For each byte that isn't all zeros, make it all ones. */ + for (i = 0; i < 64; i += 8) + if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0) + mask1 |= (HOST_WIDE_INT) 0xff << i; + + /* Now turn on any bits we've just turned off. */ + mask2 |= ~ mask1; + + operands[3] = GEN_INT (mask1); + operands[4] = GEN_INT (mask2); +}) + +(define_expand "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))] + "" +{ + if (! TARGET_BWX) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*zero_extendqihi2_bwx" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_BWX" + "@ + and %1,0xff,%0 + ldbu %0,%1" + [(set_attr "type" "ilog,ild")]) + +(define_insn "*zero_extendqihi2_nobwx" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))] + "! TARGET_BWX" + "and %1,0xff,%0" + [(set_attr "type" "ilog")]) + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))] + "" +{ + if (! TARGET_BWX) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*zero_extendqisi2_bwx" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_BWX" + "@ + and %1,0xff,%0 + ldbu %0,%1" + [(set_attr "type" "ilog,ild")]) + +(define_insn "*zero_extendqisi2_nobwx" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))] + "! TARGET_BWX" + "and %1,0xff,%0" + [(set_attr "type" "ilog")]) + +(define_expand "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "")))] + "" +{ + if (! TARGET_BWX) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*zero_extendqidi2_bwx" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_BWX" + "@ + and %1,0xff,%0 + ldbu %0,%1" + [(set_attr "type" "ilog,ild")]) + +(define_insn "*zero_extendqidi2_nobwx" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "! TARGET_BWX" + "and %1,0xff,%0" + [(set_attr "type" "ilog")]) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" +{ + if (! TARGET_BWX) + operands[1] = force_reg (HImode, operands[1]); +}) + +(define_insn "*zero_extendhisi2_bwx" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_BWX" + "@ + zapnot %1,3,%0 + ldwu %0,%1" + [(set_attr "type" "shift,ild")]) + +(define_insn "*zero_extendhisi2_nobwx" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))] + "! TARGET_BWX" + "zapnot %1,3,%0" + [(set_attr "type" "shift")]) + +(define_expand "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" +{ + if (! TARGET_BWX) + operands[1] = force_reg (HImode, operands[1]); +}) + +(define_insn "*zero_extendhidi2_bwx" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_BWX" + "@ + zapnot %1,3,%0 + ldwu %0,%1" + [(set_attr "type" "shift,ild")]) + +(define_insn "*zero_extendhidi2_nobwx" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "" + "zapnot %1,3,%0" + [(set_attr "type" "shift")]) + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "" + "zapnot %1,15,%0" + [(set_attr "type" "shift")]) + +(define_insn "*andnotsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")) + (match_operand:SI 2 "reg_or_0_operand" "rJ")))] + "" + "bic %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "andnotdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")) + (match_operand:DI 2 "reg_or_0_operand" "rJ")))] + "" + "bic %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iorsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "or_operand" "rI,N")))] + "" + "@ + bis %r1,%2,%0 + ornot %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "or_operand" "rI,N")))] + "" + "@ + bis %r1,%2,%0 + ornot %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*one_cmplsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "one_cmpldi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornotsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")) + (match_operand:SI 2 "reg_or_0_operand" "rJ")))] + "" + "ornot %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornotdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")) + (match_operand:DI 2 "reg_or_0_operand" "rJ")))] + "" + "ornot %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xorsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "or_operand" "rI,N")))] + "" + "@ + xor %r1,%2,%0 + eqv %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "or_operand" "rI,N")))] + "" + "@ + xor %r1,%2,%0 + eqv %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornotsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (xor:SI (match_operand:SI 1 "register_operand" "%rJ") + (match_operand:SI 2 "register_operand" "rI"))))] + "" + "eqv %r1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornotdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (xor:DI (match_operand:DI 1 "register_operand" "%rJ") + (match_operand:DI 2 "register_operand" "rI"))))] + "" + "eqv %r1,%2,%0" + [(set_attr "type" "ilog")]) + +;; Handle FFS and related insns iff we support CIX. + +(define_expand "ffsdi2" + [(set (match_dup 2) + (ctz:DI (match_operand:DI 1 "register_operand" ""))) + (set (match_dup 3) + (plus:DI (match_dup 2) (const_int 1))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (eq (match_dup 1) (const_int 0)) + (const_int 0) (match_dup 3)))] + "TARGET_CIX" +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "clzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "ctlz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "cttz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "ctpop %1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "!optimize_size" +{ + rtx t0, t1; + + t0 = gen_reg_rtx (DImode); + t1 = gen_reg_rtx (DImode); + + emit_insn (gen_insxh (t0, gen_lowpart (DImode, operands[1]), + GEN_INT (32), GEN_INT (WORDS_BIG_ENDIAN ? 0 : 7))); + emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]), + GEN_INT (24))); + emit_insn (gen_iordi3 (t1, t0, t1)); + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16))); + emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x5))); + emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xa))); + emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0), + gen_lowpart (SImode, t1))); + DONE; +}) + +(define_expand "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "") + (bswap:DI (match_operand:DI 1 "register_operand" "")))] + "!optimize_size" +{ + rtx t0, t1; + + t0 = gen_reg_rtx (DImode); + t1 = gen_reg_rtx (DImode); + + /* This method of shifting and masking is not specific to Alpha, but + is only profitable on Alpha because of our handy byte zap insn. */ + + emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32))); + emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32))); + emit_insn (gen_iordi3 (t1, t0, t1)); + + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16))); + emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16))); + emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xcc))); + emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x33))); + emit_insn (gen_iordi3 (t1, t0, t1)); + + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8))); + emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8))); + emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xaa))); + emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x55))); + emit_insn (gen_iordi3 (operands[0], t0, t1)); + DONE; +}) + +;; Next come the shifts and the various extract and insert operations. + +(define_insn "ashldi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))] + "" +{ + switch (which_alternative) + { + case 0: + if (operands[2] == const1_rtx) + return "addq %r1,%r1,%0"; + else + return "s%P2addq %r1,0,%0"; + case 1: + return "sll %r1,%2,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "iadd,shift")]) + +(define_insn "*ashldi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "const_int_operand" "P")) + 0)))] + "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3" +{ + if (operands[2] == const1_rtx) + return "addl %r1,%r1,%0"; + else + return "s%P2addl %r1,0,%0"; +} + [(set_attr "type" "iadd")]) + +(define_insn "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "" + "srl %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "" + "sra %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_expand "extendqihi2" + [(set (match_dup 2) + (ashift:DI (match_operand:QI 1 "some_operand" "") + (const_int 56))) + (set (match_operand:HI 0 "register_operand" "") + (ashiftrt:DI (match_dup 2) + (const_int 56)))] + "" +{ + if (TARGET_BWX) + { + emit_insn (gen_extendqihi2x (operands[0], + force_reg (QImode, operands[1]))); + DONE; + } + + /* If we have an unaligned MEM, extend to DImode (which we do + specially) and then copy to the result. */ + if (unaligned_memory_operand (operands[1], HImode)) + { + rtx temp = gen_reg_rtx (DImode); + + emit_insn (gen_extendqidi2 (temp, operands[1])); + emit_move_insn (operands[0], gen_lowpart (HImode, temp)); + DONE; + } + + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, force_reg (QImode, operands[1])); + operands[2] = gen_reg_rtx (DImode); +}) + +(define_insn "extendqidi2x" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendhidi2x" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextw %1,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendqisi2x" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendhisi2x" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextw %1,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendqihi2x" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_expand "extendqisi2" + [(set (match_dup 2) + (ashift:DI (match_operand:QI 1 "some_operand" "") + (const_int 56))) + (set (match_operand:SI 0 "register_operand" "") + (ashiftrt:DI (match_dup 2) + (const_int 56)))] + "" +{ + if (TARGET_BWX) + { + emit_insn (gen_extendqisi2x (operands[0], + force_reg (QImode, operands[1]))); + DONE; + } + + /* If we have an unaligned MEM, extend to a DImode form of + the result (which we do specially). */ + if (unaligned_memory_operand (operands[1], QImode)) + { + rtx temp = gen_reg_rtx (DImode); + + emit_insn (gen_extendqidi2 (temp, operands[1])); + emit_move_insn (operands[0], gen_lowpart (SImode, temp)); + DONE; + } + + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, force_reg (QImode, operands[1])); + operands[2] = gen_reg_rtx (DImode); +}) + +(define_expand "extendqidi2" + [(set (match_dup 2) + (ashift:DI (match_operand:QI 1 "some_operand" "") + (const_int 56))) + (set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_dup 2) + (const_int 56)))] + "" +{ + if (TARGET_BWX) + { + emit_insn (gen_extendqidi2x (operands[0], + force_reg (QImode, operands[1]))); + DONE; + } + + if (unaligned_memory_operand (operands[1], QImode)) + { + rtx seq = gen_unaligned_extendqidi (operands[0], XEXP (operands[1], 0)); + alpha_set_memflags (seq, operands[1]); + emit_insn (seq); + DONE; + } + + operands[1] = gen_lowpart (DImode, force_reg (QImode, operands[1])); + operands[2] = gen_reg_rtx (DImode); +}) + +(define_expand "extendhisi2" + [(set (match_dup 2) + (ashift:DI (match_operand:HI 1 "some_operand" "") + (const_int 48))) + (set (match_operand:SI 0 "register_operand" "") + (ashiftrt:DI (match_dup 2) + (const_int 48)))] + "" +{ + if (TARGET_BWX) + { + emit_insn (gen_extendhisi2x (operands[0], + force_reg (HImode, operands[1]))); + DONE; + } + + /* If we have an unaligned MEM, extend to a DImode form of + the result (which we do specially). */ + if (unaligned_memory_operand (operands[1], HImode)) + { + rtx temp = gen_reg_rtx (DImode); + + emit_insn (gen_extendhidi2 (temp, operands[1])); + emit_move_insn (operands[0], gen_lowpart (SImode, temp)); + DONE; + } + + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, force_reg (HImode, operands[1])); + operands[2] = gen_reg_rtx (DImode); +}) + +(define_expand "extendhidi2" + [(set (match_dup 2) + (ashift:DI (match_operand:HI 1 "some_operand" "") + (const_int 48))) + (set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_dup 2) + (const_int 48)))] + "" +{ + if (TARGET_BWX) + { + emit_insn (gen_extendhidi2x (operands[0], + force_reg (HImode, operands[1]))); + DONE; + } + + if (unaligned_memory_operand (operands[1], HImode)) + { + rtx seq = gen_unaligned_extendhidi (operands[0], XEXP (operands[1], 0)); + + alpha_set_memflags (seq, operands[1]); + emit_insn (seq); + DONE; + } + + operands[1] = gen_lowpart (DImode, force_reg (HImode, operands[1])); + operands[2] = gen_reg_rtx (DImode); +}) + +;; Here's how we sign extend an unaligned byte and halfword. Doing this +;; as a pattern saves one instruction. The code is similar to that for +;; the unaligned loads (see below). +;; +;; Operand 1 is the address, operand 0 is the result. +(define_expand "unaligned_extendqidi" + [(use (match_operand:QI 0 "register_operand" "")) + (use (match_operand:DI 1 "address_operand" ""))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + if (WORDS_BIG_ENDIAN) + emit_insn (gen_unaligned_extendqidi_be (operands[0], operands[1])); + else + emit_insn (gen_unaligned_extendqidi_le (operands[0], operands[1])); + DONE; +}) + +(define_expand "unaligned_extendqidi_le" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (minus:DI (const_int 64) + (ashift:DI + (and:DI (match_dup 2) (const_int 7)) + (const_int 3))))) + (set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_dup 4) (const_int 56)))] + "! WORDS_BIG_ENDIAN" +{ + operands[2] = get_unaligned_offset (operands[1], 1); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_expand "unaligned_extendqidi_be" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (ashift:DI + (and:DI + (plus:DI (match_dup 2) (const_int 1)) + (const_int 7)) + (const_int 3)))) + (set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_dup 4) (const_int 56)))] + "WORDS_BIG_ENDIAN" +{ + operands[2] = get_unaligned_offset (operands[1], -1); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_expand "unaligned_extendhidi" + [(use (match_operand:QI 0 "register_operand" "")) + (use (match_operand:DI 1 "address_operand" ""))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + if (WORDS_BIG_ENDIAN) + emit_insn (gen_unaligned_extendhidi_be (operands[0], operands[1])); + else + emit_insn (gen_unaligned_extendhidi_le (operands[0], operands[1])); + DONE; +}) + +(define_expand "unaligned_extendhidi_le" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (minus:DI (const_int 64) + (ashift:DI + (and:DI (match_dup 2) (const_int 7)) + (const_int 3))))) + (set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_dup 4) (const_int 48)))] + "! WORDS_BIG_ENDIAN" +{ + operands[2] = get_unaligned_offset (operands[1], 2); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_expand "unaligned_extendhidi_be" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (ashift:DI + (and:DI + (plus:DI (match_dup 2) (const_int 1)) + (const_int 7)) + (const_int 3)))) + (set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_dup 4) (const_int 48)))] + "WORDS_BIG_ENDIAN" +{ + operands[2] = get_unaligned_offset (operands[1], -1); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_insn "*extxl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "mul8_operand" "I")))] + "" + "ext%M2l %r1,%s3,%0" + [(set_attr "type" "shift")]) + +(define_insn "extxl_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "ext%M2l %r1,%3,%0" + [(set_attr "type" "shift")]) + +(define_insn "extxl_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (minus:DI + (const_int 56) + (ashift:DI + (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "ext%M2l %r1,%3,%0" + [(set_attr "type" "shift")]) + +;; Combine has some strange notion of preserving existing undefined behavior +;; in shifts larger than a word size. So capture these patterns that it +;; should have turned into zero_extracts. + +(define_insn "*extxl_1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))) + (match_operand:DI 3 "mode_mask_operand" "n")))] + "! WORDS_BIG_ENDIAN" + "ext%U3l %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "*extxl_1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (lshiftrt:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (minus:DI (const_int 56) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3)))) + (match_operand:DI 3 "mode_mask_operand" "n")))] + "WORDS_BIG_ENDIAN" + "ext%U3l %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "*extql_2_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "extql %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "*extql_2_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (minus:DI (const_int 56) + (ashift:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "extql %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extqh_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "! WORDS_BIG_ENDIAN" + "extqh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extqh_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI + (and:DI + (plus:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 1)) + (const_int 7)) + (const_int 3))))] + "WORDS_BIG_ENDIAN" + "extqh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extlh_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 2147483647)) + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "! WORDS_BIG_ENDIAN" + "extlh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extlh_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (ashift:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI + (and:DI + (plus:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 1)) + (const_int 7)) + (const_int 3))) + (const_int 2147483647)))] + "WORDS_BIG_ENDIAN" + "extlh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extwh_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 65535)) + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "! WORDS_BIG_ENDIAN" + "extwh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extwh_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI + (and:DI + (plus:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 1)) + (const_int 7)) + (const_int 3))) + (const_int 65535)))] + "WORDS_BIG_ENDIAN" + "extwh %r1,%2,%0" + [(set_attr "type" "shift")]) + +;; This converts an extXl into an extXh with an appropriate adjustment +;; to the address calculation. + +;;(define_split +;; [(set (match_operand:DI 0 "register_operand" "") +;; (ashift:DI (zero_extract:DI (match_operand:DI 1 "register_operand" "") +;; (match_operand:DI 2 "mode_width_operand" "") +;; (ashift:DI (match_operand:DI 3 "" "") +;; (const_int 3))) +;; (match_operand:DI 4 "const_int_operand" ""))) +;; (clobber (match_operand:DI 5 "register_operand" ""))] +;; "INTVAL (operands[4]) == 64 - INTVAL (operands[2])" +;; [(set (match_dup 5) (match_dup 6)) +;; (set (match_dup 0) +;; (ashift:DI (zero_extract:DI (match_dup 1) (match_dup 2) +;; (ashift:DI (plus:DI (match_dup 5) +;; (match_dup 7)) +;; (const_int 3))) +;; (match_dup 4)))] +;; " +;;{ +;; operands[6] = plus_constant (operands[3], +;; INTVAL (operands[2]) / BITS_PER_UNIT); +;; operands[7] = GEN_INT (- INTVAL (operands[2]) / BITS_PER_UNIT); +;;}") + +(define_insn "*insbl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "insbl %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "inswl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "inswl %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "*insll_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "insll %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insbl_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "insbl %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insbl_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "r")) + (minus:DI (const_int 56) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "insbl %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "inswl_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "inswl %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "inswl_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "r")) + (minus:DI (const_int 56) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "inswl %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insll_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "insll %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insll_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) + (minus:DI (const_int 56) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "insll %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insql_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "insql %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insql_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (minus:DI (const_int 56) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "insql %1,%2,%0" + [(set_attr "type" "shift")]) + +;; Combine has this sometimes habit of moving the and outside of the +;; shift, making life more interesting. + +(define_insn "*insxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mul8_operand" "I")) + (match_operand:DI 3 "immediate_operand" "i")))] + "HOST_BITS_PER_WIDE_INT == 64 + && CONST_INT_P (operands[3]) + && (((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])))" +{ +#if HOST_BITS_PER_WIDE_INT == 64 + if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "insbl %1,%s2,%0"; + if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "inswl %1,%s2,%0"; + if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "insll %1,%s2,%0"; +#endif + gcc_unreachable (); +} + [(set_attr "type" "shift")]) + +;; We do not include the insXh insns because they are complex to express +;; and it does not appear that we would ever want to generate them. +;; +;; Since we need them for block moves, though, cop out and use unspec. + +(define_insn "insxh" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")] + UNSPEC_INSXH))] + "" + "ins%M2h %1,%3,%0" + [(set_attr "type" "shift")]) + +(define_insn "mskxl_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (ashift:DI + (match_operand:DI 2 "mode_mask_operand" "n") + (ashift:DI + (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3)))) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "! WORDS_BIG_ENDIAN" + "msk%U2l %r1,%3,%0" + [(set_attr "type" "shift")]) + +(define_insn "mskxl_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (ashift:DI + (match_operand:DI 2 "mode_mask_operand" "n") + (minus:DI (const_int 56) + (ashift:DI + (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3))))) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "WORDS_BIG_ENDIAN" + "msk%U2l %r1,%3,%0" + [(set_attr "type" "shift")]) + +;; We do not include the mskXh insns because it does not appear we would +;; ever generate one. +;; +;; Again, we do for block moves and we use unspec again. + +(define_insn "mskxh" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")] + UNSPEC_MSKXH))] + "" + "msk%M2h %1,%3,%0" + [(set_attr "type" "shift")]) + +;; Prefer AND + NE over LSHIFTRT + AND. + +(define_insn_and_split "*ze_and_ne" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (match_operand 2 "const_int_operand" "I")))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + "#" + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) + (ne:DI (match_dup 0) (const_int 0)))] + "operands[3] = GEN_INT (1 << INTVAL (operands[2]));") + +;; Floating-point operations. All the double-precision insns can extend +;; from single, so indicate that. The exception are the ones that simply +;; play with the sign bits; it's not clear what to do there. + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (abs:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cpys $f31,%R1,%0" + [(set_attr "type" "fcpys")]) + +(define_insn "*nabssf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (abs:SF (match_operand:SF 1 "reg_or_0_operand" "fG"))))] + "TARGET_FP" + "cpysn $f31,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "absdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (abs:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cpys $f31,%R1,%0" + [(set_attr "type" "fcpys")]) + +(define_insn "*nabsdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (abs:DF (match_operand:DF 1 "reg_or_0_operand" "fG"))))] + "TARGET_FP" + "cpysn $f31,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_expand "abstf2" + [(parallel [(set (match_operand:TF 0 "register_operand" "") + (abs:TF (match_operand:TF 1 "reg_or_0_operand" ""))) + (use (match_dup 2))])] + "TARGET_HAS_XFLOATING_LIBS" +{ +#if HOST_BITS_PER_WIDE_INT >= 64 + operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63)); +#else + operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode)); +#endif +}) + +(define_insn_and_split "*abstf_internal" + [(set (match_operand:TF 0 "register_operand" "=r") + (abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG"))) + (use (match_operand:DI 2 "register_operand" "r"))] + "TARGET_HAS_XFLOATING_LIBS" + "#" + "&& reload_completed" + [(const_int 0)] + "alpha_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;") + +(define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cpysn %R1,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "negdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cpysn %R1,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_expand "negtf2" + [(parallel [(set (match_operand:TF 0 "register_operand" "") + (neg:TF (match_operand:TF 1 "reg_or_0_operand" ""))) + (use (match_dup 2))])] + "TARGET_HAS_XFLOATING_LIBS" +{ +#if HOST_BITS_PER_WIDE_INT >= 64 + operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63)); +#else + operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode)); +#endif +}) + +(define_insn_and_split "*negtf_internal" + [(set (match_operand:TF 0 "register_operand" "=r") + (neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG"))) + (use (match_operand:DI 2 "register_operand" "r"))] + "TARGET_HAS_XFLOATING_LIBS" + "#" + "&& reload_completed" + [(const_int 0)] + "alpha_split_tfmode_frobsign (operands, gen_xordi3); DONE;") + +(define_insn "copysignsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 1 "reg_or_0_operand" "fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN))] + "TARGET_FP" + "cpys %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*ncopysignsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (unspec:SF [(match_operand:SF 1 "reg_or_0_operand" "fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN)))] + "TARGET_FP" + "cpysn %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "copysigndf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 1 "reg_or_0_operand" "fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN))] + "TARGET_FP" + "cpys %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*ncopysigndf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (unspec:DF [(match_operand:DF 1 "reg_or_0_operand" "fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN)))] + "TARGET_FP" + "cpysn %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*addsf_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (plus:SF (match_operand:SF 1 "reg_or_0_operand" "%fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "add%,%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (plus:SF (match_operand:SF 1 "reg_or_0_operand" "%fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "add%,%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (plus:DF (match_operand:DF 1 "reg_or_0_operand" "%fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "add%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "adddf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (match_operand:DF 1 "reg_or_0_operand" "%fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "add%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "add%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "add%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "addtf3" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (PLUS, operands); DONE;") + +;; Define conversion operators between DFmode and SImode, using the cvtql +;; instruction. To allow combine et al to do useful things, we keep the +;; operation as a unit until after reload, at which point we split the +;; instructions. +;; +;; Note that we (attempt to) only consider this optimization when the +;; ultimate destination is memory. If we will be doing further integer +;; processing, it is cheaper to do the truncation in the int regs. + +(define_insn "*cvtql" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTQL))] + "TARGET_FP" + "cvtql%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "v_sv")]) + +(define_insn_and_split "*fix_truncdfsi_ieee" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] +{ + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn_and_split "*fix_truncdfsi_internal" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 3 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0)) + (clobber (match_scratch:DI 2 "=f"))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 3 [(match_dup 1)])) + (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 4))] +{ + operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2])); + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*fix_truncdfdi_ieee" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f") + (match_operator:DI 2 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_insn "*fix_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f") + (match_operator:DI 2 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]))] + "TARGET_FP" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_expand "fix_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "") + (fix:DI (match_operand:DF 1 "reg_or_0_operand" "")))] + "TARGET_FP" + "") + +(define_expand "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "") + (unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand" "")))] + "TARGET_FP" + "") + +;; Likewise between SFmode and SImode. + +(define_insn_and_split "*fix_truncsfsi_ieee" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] +{ + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn_and_split "*fix_truncsfsi_internal" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 3 "fix_operator" + [(float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0)) + (clobber (match_scratch:DI 2 "=f"))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 3 [(float_extend:DF (match_dup 1))])) + (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 4))] +{ + operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2])); + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*fix_truncsfdi_ieee" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f") + (match_operator:DI 2 "fix_operator" + [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_insn "*fix_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f") + (match_operator:DI 2 "fix_operator" + [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))] + "TARGET_FP" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_expand "fix_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "") + (fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" ""))))] + "TARGET_FP" + "") + +(define_expand "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "") + (unsigned_fix:DI + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" ""))))] + "TARGET_FP" + "") + +(define_expand "fix_trunctfdi2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:TF 1 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FIX, operands); DONE;") + +(define_expand "fixuns_trunctfdi2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:TF 1 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;") + +(define_insn "*floatdisf_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvtq%,%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP" + "cvtq%,%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn_and_split "*floatsisf2_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float:SF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:SF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); +}) + +(define_insn_and_split "*floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_FP" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 0)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:SF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[0])); +}) + +(define_insn "*floatdidf_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvtq%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP" + "cvtq%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn_and_split "*floatsidf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float:DF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:DF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); +}) + +(define_insn_and_split "*floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_FP" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:DF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0])); +}) + +(define_expand "floatditf2" + [(use (match_operand:TF 0 "register_operand" "")) + (use (match_operand:DI 1 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FLOAT, operands); DONE;") + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "TARGET_FP" + "alpha_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "TARGET_FP" + "alpha_emit_floatuns (operands); DONE;") + +(define_expand "floatunsditf2" + [(use (match_operand:TF 0 "register_operand" "")) + (use (match_operand:DI 1 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_FP" +{ + if (alpha_fptm >= ALPHA_FPTM_SU) + operands[1] = force_reg (SFmode, operands[1]); +}) + +;; The Unicos/Mk assembler doesn't support cvtst, but we've already +;; asserted that alpha_fptm == ALPHA_FPTM_N. + +(define_insn "*extendsfdf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvtsts %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*extendsfdf2_internal" + [(set (match_operand:DF 0 "register_operand" "=f,f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + cpys %1,%1,%0 + ld%, %0,%1 + st%- %1,%0" + [(set_attr "type" "fcpys,fld,fst")]) + +;; Use register_operand for operand 1 to prevent compress_float_constant +;; from doing something silly. When optimizing we'll put things back +;; together anyway. +(define_expand "extendsftf2" + [(use (match_operand:TF 0 "register_operand" "")) + (use (match_operand:SF 1 "register_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" +{ + rtx tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (tmp, operands[1])); + emit_insn (gen_extenddftf2 (operands[0], tmp)); + DONE; +}) + +(define_expand "extenddftf2" + [(use (match_operand:TF 0 "register_operand" "")) + (use (match_operand:DF 1 "register_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;") + +(define_insn "*truncdfsf2_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvt%-%,%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cvt%-%,%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "trunctfdf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:TF 1 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;") + +(define_expand "trunctfsf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:TF 1 "general_operand" ""))] + "TARGET_FP && TARGET_HAS_XFLOATING_LIBS" +{ + rtx tmpf, sticky, arg, lo, hi; + + tmpf = gen_reg_rtx (DFmode); + sticky = gen_reg_rtx (DImode); + arg = copy_to_mode_reg (TFmode, operands[1]); + lo = gen_lowpart (DImode, arg); + hi = gen_highpart (DImode, arg); + + /* Convert the low word of the TFmode value into a sticky rounding bit, + then or it into the low bit of the high word. This leaves the sticky + bit at bit 48 of the fraction, which is representable in DFmode, + which prevents rounding error in the final conversion to SFmode. */ + + emit_insn (gen_rtx_SET (VOIDmode, sticky, + gen_rtx_NE (DImode, lo, const0_rtx))); + emit_insn (gen_iordi3 (hi, hi, sticky)); + emit_insn (gen_trunctfdf2 (tmpf, arg)); + emit_insn (gen_truncdfsf2 (operands[0], tmpf)); + DONE; +}) + +(define_insn "*divsf3_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (div:SF (match_operand:SF 1 "reg_or_0_operand" "fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "div%,%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "si") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "divsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (div:SF (match_operand:SF 1 "reg_or_0_operand" "fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "div%,%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "si") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf3_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (div:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "divdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "divtf3" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (DIV, operands); DONE;") + +(define_insn "*mulsf3_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (mult:SF (match_operand:SF 1 "reg_or_0_operand" "%fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "mul%,%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (mult:SF (match_operand:SF 1 "reg_or_0_operand" "%fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "mul%,%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*muldf3_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (mult:DF (match_operand:DF 1 "reg_or_0_operand" "%fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "mul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "muldf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (match_operand:DF 1 "reg_or_0_operand" "%fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "mul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*muldf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "mul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*muldf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "mul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "multf3" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (MULT, operands); DONE;") + +(define_insn "*subsf3_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (minus:SF (match_operand:SF 1 "reg_or_0_operand" "fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "sub%,%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (minus:SF (match_operand:SF 1 "reg_or_0_operand" "fG") + (match_operand:SF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "sub%,%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf3_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "subdf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "subtf3" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand 1 "general_operand" "")) + (use (match_operand 2 "general_operand" ""))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (MINUS, operands); DONE;") + +(define_insn "*sqrtsf2_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (sqrt:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && TARGET_FIX && alpha_fptm >= ALPHA_FPTM_SU" + "sqrt%,%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "si") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "sqrtsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && TARGET_FIX" + "sqrt%,%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "si") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*sqrtdf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (sqrt:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && TARGET_FIX && alpha_fptm >= ALPHA_FPTM_SU" + "sqrt%-%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "sqrtdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (sqrt:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && TARGET_FIX" + "sqrt%-%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +;; Next are all the integer comparisons, and conditional moves and branches +;; and some of the related define_expand's and define_split's. + +(define_insn "*setcc_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "alpha_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmp%C1 %2,%3,%0" + [(set_attr "type" "icmp")]) + +;; Yes, we can technically support reg_or_8bit_operand in operand 2, +;; but that's non-canonical rtl and allowing that causes inefficiencies +;; from cse on. +(define_insn "*setcc_swapped_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "alpha_swapped_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_0_operand" "rJ")]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmp%c1 %r3,%2,%0" + [(set_attr "type" "icmp")]) + +;; Use match_operator rather than ne directly so that we can match +;; multiple integer modes. +(define_insn "*setne_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (const_int 0)]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_CODE (operands[1]) == NE + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmpult $31,%2,%0" + [(set_attr "type" "icmp")]) + +;; The mode folding trick can't be used with const_int operands, since +;; reload needs to know the proper mode. +;; +;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand +;; in order to create more pairs of constants. As long as we're allowing +;; two constants at the same time, and will have to reload one of them... + +(define_insn "*movqicc_internal" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r") + (if_then_else:QI + (match_operator 2 "signed_comparison_operator" + [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") + (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) + (match_operand:QI 1 "add_operand" "rI,0,rI,0") + (match_operand:QI 5 "add_operand" "0,rI,0,rI")))] + "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)" + "@ + cmov%C2 %r3,%1,%0 + cmov%D2 %r3,%5,%0 + cmov%c2 %r4,%1,%0 + cmov%d2 %r4,%5,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movhicc_internal" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") + (if_then_else:HI + (match_operator 2 "signed_comparison_operator" + [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") + (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) + (match_operand:HI 1 "add_operand" "rI,0,rI,0") + (match_operand:HI 5 "add_operand" "0,rI,0,rI")))] + "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)" + "@ + cmov%C2 %r3,%1,%0 + cmov%D2 %r3,%5,%0 + cmov%c2 %r4,%1,%0 + cmov%d2 %r4,%5,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movsicc_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (if_then_else:SI + (match_operator 2 "signed_comparison_operator" + [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") + (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) + (match_operand:SI 1 "add_operand" "rI,0,rI,0") + (match_operand:SI 5 "add_operand" "0,rI,0,rI")))] + "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)" + "@ + cmov%C2 %r3,%1,%0 + cmov%D2 %r3,%5,%0 + cmov%c2 %r4,%1,%0 + cmov%d2 %r4,%5,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movdicc_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (if_then_else:DI + (match_operator 2 "signed_comparison_operator" + [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") + (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) + (match_operand:DI 1 "add_operand" "rI,0,rI,0") + (match_operand:DI 5 "add_operand" "0,rI,0,rI")))] + "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)" + "@ + cmov%C2 %r3,%1,%0 + cmov%D2 %r3,%5,%0 + cmov%c2 %r4,%1,%0 + cmov%d2 %r4,%5,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movqicc_lbc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI + (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:QI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:QI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbc %r2,%1,%0 + cmovlbs %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movhicc_lbc" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (if_then_else:HI + (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:HI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:HI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbc %r2,%1,%0 + cmovlbs %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movsicc_lbc" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI + (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:SI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:SI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbc %r2,%1,%0 + cmovlbs %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movdicc_lbc" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI + (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:DI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:DI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbc %r2,%1,%0 + cmovlbs %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movqicc_lbs" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI + (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:QI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:QI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbs %r2,%1,%0 + cmovlbc %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movhicc_lbs" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (if_then_else:HI + (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:HI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:HI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbs %r2,%1,%0 + cmovlbc %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movsicc_lbs" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI + (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:SI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:SI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbs %r2,%1,%0 + cmovlbc %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movdicc_lbs" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI + (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:DI 1 "reg_or_8bit_operand" "rI,0") + (match_operand:DI 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbs %r2,%1,%0 + cmovlbc %r2,%3,%0" + [(set_attr "type" "icmov")]) + +;; For ABS, we have two choices, depending on whether the input and output +;; registers are the same or not. +(define_expand "absdi2" + [(set (match_operand:DI 0 "register_operand" "") + (abs:DI (match_operand:DI 1 "register_operand" "")))] + "" +{ + if (rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode))); + else + emit_insn (gen_absdi2_diff (operands[0], operands[1])); + DONE; +}) + +(define_expand "absdi2_same" + [(set (match_operand:DI 1 "register_operand" "") + (neg:DI (match_operand:DI 0 "register_operand" ""))) + (set (match_dup 0) + (if_then_else:DI (ge (match_dup 0) (const_int 0)) + (match_dup 0) + (match_dup 1)))] + "" + "") + +(define_expand "absdi2_diff" + [(set (match_operand:DI 0 "register_operand" "") + (neg:DI (match_operand:DI 1 "register_operand" ""))) + (set (match_dup 0) + (if_then_else:DI (lt (match_dup 1) (const_int 0)) + (match_dup 0) + (match_dup 1)))] + "" + "") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (abs:DI (match_dup 0))) + (clobber (match_operand:DI 1 "register_operand" ""))] + "" + [(set (match_dup 1) (neg:DI (match_dup 0))) + (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0)) + (match_dup 0) (match_dup 1)))] + "") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (abs:DI (match_operand:DI 1 "register_operand" "")))] + "! rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 0) (neg:DI (match_dup 1))) + (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0)) + (match_dup 0) (match_dup 1)))] + "") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (neg:DI (abs:DI (match_dup 0)))) + (clobber (match_operand:DI 1 "register_operand" ""))] + "" + [(set (match_dup 1) (neg:DI (match_dup 0))) + (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0)) + (match_dup 0) (match_dup 1)))] + "") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (neg:DI (abs:DI (match_operand:DI 1 "register_operand" ""))))] + "! rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 0) (neg:DI (match_dup 1))) + (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0)) + (match_dup 0) (match_dup 1)))] + "") + +(define_insn "sminqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (smin:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ") + (match_operand:QI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "minsb8 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "uminqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (umin:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ") + (match_operand:QI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "minub8 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "smaxqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (smax:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ") + (match_operand:QI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "maxsb8 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "umaxqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (umax:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ") + (match_operand:QI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "maxub8 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "sminhi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (smin:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ") + (match_operand:HI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "minsw4 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "uminhi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (umin:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ") + (match_operand:HI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "minuw4 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "smaxhi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (smax:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ") + (match_operand:HI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "maxsw4 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "umaxhi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (umax:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ") + (match_operand:HI 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + "maxuw4 %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_expand "smaxdi3" + [(set (match_dup 3) + (le:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + { operands[3] = gen_reg_rtx (DImode); }) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (smax:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (clobber (match_operand:DI 3 "register_operand" ""))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "") + +(define_insn "*smax_const0" + [(set (match_operand:DI 0 "register_operand" "=r") + (smax:DI (match_operand:DI 1 "register_operand" "0") + (const_int 0)))] + "" + "cmovlt %0,0,%0" + [(set_attr "type" "icmov")]) + +(define_expand "smindi3" + [(set (match_dup 3) + (lt:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + { operands[3] = gen_reg_rtx (DImode); }) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (smin:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (clobber (match_operand:DI 3 "register_operand" ""))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "") + +(define_insn "*smin_const0" + [(set (match_operand:DI 0 "register_operand" "=r") + (smin:DI (match_operand:DI 1 "register_operand" "0") + (const_int 0)))] + "" + "cmovgt %0,0,%0" + [(set_attr "type" "icmov")]) + +(define_expand "umaxdi3" + [(set (match_dup 3) + (leu:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (umax:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (clobber (match_operand:DI 3 "register_operand" ""))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "") + +(define_expand "umindi3" + [(set (match_dup 3) + (ltu:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (umin:DI (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" ""))) + (clobber (match_operand:DI 3 "register_operand" ""))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "") + +(define_insn "*bcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "b%C1 %r2,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*bcc_reverse" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (const_int 0)]) + + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "b%c1 %2,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*blbs_normal" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "blbs %r1,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*blbc_normal" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "blbc %r1,%0" + [(set_attr "type" "ibr")]) + +(define_split + [(parallel + [(set (pc) + (if_then_else + (match_operator 1 "comparison_operator" + [(zero_extract:DI (match_operand:DI 2 "register_operand" "") + (const_int 1) + (match_operand:DI 3 "const_int_operand" "")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (match_operand:DI 4 "register_operand" ""))])] + "INTVAL (operands[3]) != 0" + [(set (match_dup 4) + (lshiftrt:DI (match_dup 2) (match_dup 3))) + (set (pc) + (if_then_else (match_op_dup 1 + [(zero_extract:DI (match_dup 4) + (const_int 1) + (const_int 0)) + (const_int 0)]) + (label_ref (match_dup 0)) + (pc)))] + "") + +;; The following are the corresponding floating-point insns. Recall +;; we need to have variants that expand the arguments from SFmode +;; to DFmode. + +(define_insn "*cmpdf_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_internal" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*movdfcc_internal" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:DF 1 "reg_or_0_operand" "fG,0") + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movsfcc_internal" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (if_then_else:SF + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:SF 1 "reg_or_0_operand" "fG,0") + (match_operand:SF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext1" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0")) + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext2" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:DF 1 "reg_or_0_operand" "fG,0") + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext3" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (if_then_else:SF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:SF 1 "reg_or_0_operand" "fG,0") + (match_operand:SF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext4" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0")) + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_expand "smaxdf3" + [(set (match_dup 3) + (le:DF (match_operand:DF 1 "reg_or_0_operand" "") + (match_operand:DF 2 "reg_or_0_operand" ""))) + (set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (eq (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "smindf3" + [(set (match_dup 3) + (lt:DF (match_operand:DF 1 "reg_or_0_operand" "") + (match_operand:DF 2 "reg_or_0_operand" ""))) + (set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (ne (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "smaxsf3" + [(set (match_dup 3) + (le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand" "")))) + (set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (eq (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "sminsf3" + [(set (match_dup 3) + (lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand" "")))) + (set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (ne (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_insn "*fbcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "const0_operand" "G")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_FP" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +(define_insn "*fbcc_ext_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "const0_operand" "G")]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_FP" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +;; These are the main define_expand's used to make conditional branches +;; and compares. + +(define_expand "cbranchdf4" + [(use (match_operator 0 "alpha_cbranch_operator" + [(match_operand:DF 1 "reg_or_0_operand" "") + (match_operand:DF 2 "reg_or_0_operand" "")])) + (use (match_operand 3 ""))] + "TARGET_FP" + { alpha_emit_conditional_branch (operands, DFmode); DONE; }) + +(define_expand "cbranchtf4" + [(use (match_operator 0 "alpha_cbranch_operator" + [(match_operand:TF 1 "general_operand") + (match_operand:TF 2 "general_operand")])) + (use (match_operand 3 ""))] + "TARGET_HAS_XFLOATING_LIBS" + { alpha_emit_conditional_branch (operands, TFmode); DONE; }) + +(define_expand "cbranchdi4" + [(use (match_operator 0 "alpha_cbranch_operator" + [(match_operand:DI 1 "some_operand") + (match_operand:DI 2 "some_operand")])) + (use (match_operand 3 ""))] + "" + { alpha_emit_conditional_branch (operands, DImode); DONE; }) + +(define_expand "cstoredf4" + [(use (match_operator:DI 1 "alpha_cbranch_operator" + [(match_operand:DF 2 "reg_or_0_operand") + (match_operand:DF 3 "reg_or_0_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "TARGET_FP" + { if (!alpha_emit_setcc (operands, DFmode)) FAIL; else DONE; }) + +(define_expand "cstoretf4" + [(use (match_operator:DI 1 "alpha_cbranch_operator" + [(match_operand:TF 2 "general_operand") + (match_operand:TF 3 "general_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + { if (!alpha_emit_setcc (operands, TFmode)) FAIL; else DONE; }) + +(define_expand "cstoredi4" + [(use (match_operator:DI 1 "alpha_cbranch_operator" + [(match_operand:DI 2 "some_operand") + (match_operand:DI 3 "some_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "" + { if (!alpha_emit_setcc (operands, DImode)) FAIL; else DONE; }) + +;; These are the main define_expand's used to make conditional moves. + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "reg_or_8bit_operand" "") + (match_operand:SI 3 "reg_or_8bit_operand" "")))] + "" +{ + if ((operands[1] = alpha_emit_conditional_move (operands[1], SImode)) == 0) + FAIL; +}) + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "reg_or_8bit_operand" "") + (match_operand:DI 3 "reg_or_8bit_operand" "")))] + "" +{ + if ((operands[1] = alpha_emit_conditional_move (operands[1], DImode)) == 0) + FAIL; +}) + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (match_operand 1 "comparison_operator" "") + (match_operand:SF 2 "reg_or_8bit_operand" "") + (match_operand:SF 3 "reg_or_8bit_operand" "")))] + "" +{ + if ((operands[1] = alpha_emit_conditional_move (operands[1], SFmode)) == 0) + FAIL; +}) + +(define_expand "movdfcc" + [(set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (match_operand 1 "comparison_operator" "") + (match_operand:DF 2 "reg_or_8bit_operand" "") + (match_operand:DF 3 "reg_or_8bit_operand" "")))] + "" +{ + if ((operands[1] = alpha_emit_conditional_move (operands[1], DFmode)) == 0) + FAIL; +}) + +;; These define_split definitions are used in cases when comparisons have +;; not be stated in the correct way and we need to reverse the second +;; comparison. For example, x >= 7 has to be done as x < 6 with the +;; comparison that tests the result being reversed. We have one define_split +;; for each use of a comparison. They do not match valid insns and need +;; not generate valid insns. +;; +;; We can also handle equality comparisons (and inequality comparisons in +;; cases where the resulting add cannot overflow) by doing an add followed by +;; a comparison with zero. This is faster since the addition takes one +;; less cycle than a compare when feeding into a conditional move. +;; For this case, we also have an SImode pattern since we can merge the add +;; and sign extend and the order doesn't matter. +;; +;; We do not do this for floating-point, since it isn't clear how the "wrong" +;; operation could have been generated. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI + (match_operator 1 "comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "") + (match_operand:DI 3 "reg_or_cint_operand" "")]) + (match_operand:DI 4 "reg_or_cint_operand" "") + (match_operand:DI 5 "reg_or_cint_operand" ""))) + (clobber (match_operand:DI 6 "register_operand" ""))] + "operands[3] != const0_rtx" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))] +{ + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU); + + /* If we are comparing for equality with a constant and that constant + appears in the arm when the register equals the constant, use the + register since that is more likely to match (and to produce better code + if both would). */ + + if (code == EQ && CONST_INT_P (operands[3]) + && rtx_equal_p (operands[4], operands[3])) + operands[4] = operands[2]; + + else if (code == NE && CONST_INT_P (operands[3]) + && rtx_equal_p (operands[5], operands[3])) + operands[5] = operands[2]; + + if (code == NE || code == EQ + || (extended_count (operands[2], DImode, unsignedp) >= 1 + && extended_count (operands[3], DImode, unsignedp) >= 1)) + { + if (CONST_INT_P (operands[3])) + operands[7] = gen_rtx_PLUS (DImode, operands[2], + GEN_INT (- INTVAL (operands[3]))); + else + operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]); + + operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx); + } + + else if (code == EQ || code == LE || code == LT + || code == LEU || code == LTU) + { + operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]); + operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx); + } + else + { + operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode, + operands[2], operands[3]); + operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx); + } +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI + (match_operator 1 "comparison_operator" + [(match_operand:SI 2 "reg_or_0_operand" "") + (match_operand:SI 3 "reg_or_cint_operand" "")]) + (match_operand:DI 4 "reg_or_8bit_operand" "") + (match_operand:DI 5 "reg_or_8bit_operand" ""))) + (clobber (match_operand:DI 6 "register_operand" ""))] + "operands[3] != const0_rtx + && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))] +{ + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU); + rtx tem; + + if ((code != NE && code != EQ + && ! (extended_count (operands[2], DImode, unsignedp) >= 1 + && extended_count (operands[3], DImode, unsignedp) >= 1))) + FAIL; + + if (CONST_INT_P (operands[3])) + tem = gen_rtx_PLUS (SImode, operands[2], + GEN_INT (- INTVAL (operands[3]))); + else + tem = gen_rtx_MINUS (SImode, operands[2], operands[3]); + + operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem); + operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode, + operands[6], const0_rtx); +}) + +;; Prefer to use cmp and arithmetic when possible instead of a cmove. + +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "") + (const_int 0)]) + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")))] + "" + [(const_int 0)] +{ + if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0], + operands[2], operands[3], operands[4])) + DONE; + else + FAIL; +}) + +;; ??? Why combine is allowed to create such non-canonical rtl, I don't know. +;; Oh well, we match it in movcc, so it must be partially our fault. +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(const_int 0) + (match_operand:DI 2 "reg_or_0_operand" "")]) + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")))] + "" + [(const_int 0)] +{ + if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])), + operands[0], operands[2], operands[3], + operands[4])) + DONE; + else + FAIL; +}) + +(define_insn_and_split "*cmp_sadd_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (if_then_else:DI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}) + +(define_insn_and_split "*cmp_sadd_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = gen_lowpart (DImode, operands[0]); + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_sadd_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (plus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO")))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4))))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_ssub_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (if_then_else:DI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}) + +(define_insn_and_split "*cmp_ssub_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = gen_lowpart (DImode, operands[0]); + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_ssub_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI")))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4))))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +;; Here are the CALL and unconditional branch insns. Calls on NT and OSF +;; work differently, so we have different patterns for each. + +;; On Unicos/Mk a call information word (CIW) must be generated for each +;; call. The CIW contains information about arguments passed in registers +;; and is stored in the caller's SSIB. Its offset relative to the beginning +;; of the SSIB is passed in $25. Handling this properly is quite complicated +;; in the presence of inlining since the CIWs for calls performed by the +;; inlined function must be stored in the SSIB of the function it is inlined +;; into as well. We encode the CIW in an unspec and append it to the list +;; of the CIWs for the current function only when the instruction for loading +;; $25 is generated. + +(define_expand "call" + [(use (match_operand:DI 0 "" "")) + (use (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" ""))] + "" +{ + if (TARGET_ABI_WINDOWS_NT) + emit_call_insn (gen_call_nt (operands[0], operands[1])); + else if (TARGET_ABI_OPEN_VMS) + emit_call_insn (gen_call_vms (operands[0], operands[2])); + else if (TARGET_ABI_UNICOSMK) + emit_call_insn (gen_call_umk (operands[0], operands[2])); + else + emit_call_insn (gen_call_osf (operands[0], operands[1])); + DONE; +}) + +(define_expand "sibcall" + [(parallel [(call (mem:DI (match_operand 0 "" "")) + (match_operand 1 "" "")) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])] + "TARGET_ABI_OSF" +{ + gcc_assert (MEM_P (operands[0])); + operands[0] = XEXP (operands[0], 0); +}) + +(define_expand "call_osf" + [(parallel [(call (mem:DI (match_operand 0 "" "")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + operands[0] = XEXP (operands[0], 0); + if (! call_operand (operands[0], Pmode)) + operands[0] = copy_to_mode_reg (Pmode, operands[0]); +}) + +(define_expand "call_nt" + [(parallel [(call (mem:DI (match_operand 0 "" "")) + (match_operand 1 "" "")) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + operands[0] = XEXP (operands[0], 0); + if (GET_CODE (operands[0]) != SYMBOL_REF && !REG_P (operands[0])) + operands[0] = force_reg (DImode, operands[0]); +}) + +;; Calls on Unicos/Mk are always indirect. +;; op 0: symbol ref for called function +;; op 1: CIW for $25 represented by an unspec + +(define_expand "call_umk" + [(parallel [(call (mem:DI (match_operand 0 "" "")) + (match_operand 1 "" "")) + (use (reg:DI 25)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + /* Always load the address of the called function into a register; + load the CIW in $25. */ + + operands[0] = XEXP (operands[0], 0); + if (!REG_P (operands[0])) + operands[0] = force_reg (DImode, operands[0]); + + emit_move_insn (gen_rtx_REG (DImode, 25), operands[1]); +}) + +;; +;; call openvms/alpha +;; op 0: symbol ref for called function +;; op 1: next_arg_reg (argument information value for R25) +;; +(define_expand "call_vms" + [(parallel [(call (mem:DI (match_operand 0 "" "")) + (match_operand 1 "" "")) + (use (match_dup 2)) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + operands[0] = XEXP (operands[0], 0); + + /* Always load AI with argument information, then handle symbolic and + indirect call differently. Load RA and set operands[2] to PV in + both cases. */ + + emit_move_insn (gen_rtx_REG (DImode, 25), operands[1]); + if (GET_CODE (operands[0]) == SYMBOL_REF) + { + alpha_need_linkage (XSTR (operands[0], 0), 0); + + operands[2] = const0_rtx; + } + else + { + emit_move_insn (gen_rtx_REG (Pmode, 26), + gen_rtx_MEM (Pmode, plus_constant (operands[0], 8))); + operands[2] = operands[0]; + } + +}) + +(define_expand "call_value" + [(use (match_operand 0 "" "")) + (use (match_operand:DI 1 "" "")) + (use (match_operand 2 "" "")) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" ""))] + "" +{ + if (TARGET_ABI_WINDOWS_NT) + emit_call_insn (gen_call_value_nt (operands[0], operands[1], operands[2])); + else if (TARGET_ABI_OPEN_VMS) + emit_call_insn (gen_call_value_vms (operands[0], operands[1], + operands[3])); + else if (TARGET_ABI_UNICOSMK) + emit_call_insn (gen_call_value_umk (operands[0], operands[1], + operands[3])); + else + emit_call_insn (gen_call_value_osf (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])] + "TARGET_ABI_OSF" +{ + gcc_assert (MEM_P (operands[1])); + operands[1] = XEXP (operands[1], 0); +}) + +(define_expand "call_value_osf" + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + if (! call_operand (operands[1], Pmode)) + operands[1] = copy_to_mode_reg (Pmode, operands[1]); +}) + +(define_expand "call_value_nt" + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + if (GET_CODE (operands[1]) != SYMBOL_REF && !REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); +}) + +(define_expand "call_value_vms" + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "" "")) + (match_operand 2 "" ""))) + (use (match_dup 3)) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + + /* Always load AI with argument information, then handle symbolic and + indirect call differently. Load RA and set operands[3] to PV in + both cases. */ + + emit_move_insn (gen_rtx_REG (DImode, 25), operands[2]); + if (GET_CODE (operands[1]) == SYMBOL_REF) + { + alpha_need_linkage (XSTR (operands[1], 0), 0); + + operands[3] = const0_rtx; + } + else + { + emit_move_insn (gen_rtx_REG (Pmode, 26), + gen_rtx_MEM (Pmode, plus_constant (operands[1], 8))); + operands[3] = operands[1]; + } +}) + +(define_expand "call_value_umk" + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (reg:DI 25)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + if (!REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + + emit_move_insn (gen_rtx_REG (DImode, 25), operands[2]); +}) + +(define_insn "*call_osf_1_er_noreturn" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,%0\t\t!samegp + ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1_er" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,(%0),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%* + bsr $26,%0\t\t!samegp + ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. Consider the case of { bar(); while (1); }. +(define_peephole2 + [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand" "")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[0], Pmode) + && (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(call (mem:DI (match_dup 2)) + (match_dup 1)) + (use (reg:DI 29)) + (use (match_dup 0)) + (use (match_dup 3)) + (clobber (reg:DI 26))])] +{ + if (CONSTANT_P (operands[0])) + { + operands[2] = gen_rtx_REG (Pmode, 27); + operands[3] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx, + operands[0], operands[3])); + } + else + { + operands[2] = operands[0]; + operands[0] = const0_rtx; + operands[3] = const0_rtx; + } +}) + +(define_peephole2 + [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand" "")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[0], Pmode) + && ! (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(call (mem:DI (match_dup 2)) + (match_dup 1)) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1)) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))] +{ + if (CONSTANT_P (operands[0])) + { + operands[2] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx, + operands[0], operands[4])); + } + else + { + operands[2] = operands[0]; + operands[0] = const0_rtx; + operands[4] = const0_rtx; + } + operands[3] = GEN_INT (alpha_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +}) + +(define_insn "*call_osf_2_er_nogp" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (use (match_operand 2 "" "")) + (use (match_operand 3 "const_int_operand" "")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%0),%2%J3" + [(set_attr "type" "jsr")]) + +(define_insn "*call_osf_2_er" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1 "" "")) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand" "")] + UNSPEC_LDGP1)) + (use (match_operand 2 "" "")) + (use (match_operand 3 "const_int_operand" "")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%0),%2%J3\;ldah $29,0($26)\t\t!gpdisp!%4" + [(set_attr "type" "jsr") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_osf_1_noreturn" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,$%0..ng + jsr $26,%0" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1 "" "")) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,($27),0\;ldgp $29,0($26) + bsr $26,$%0..ng + jsr $26,%0\;ldgp $29,0($26)" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +(define_insn "*sibcall_osf_1_er" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s")) + (match_operand 1 "" "")) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,%0\t\t!samegp + ldq $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +;; Note that the DEC assembler expands "jmp foo" with $at, which +;; doesn't do what we want. +(define_insn "*sibcall_osf_1" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s")) + (match_operand 1 "" "")) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,$%0..ng + lda $27,%0\;jmp $31,($27),%0" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +(define_insn "*call_nt_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "r,R,s")) + (match_operand 1 "" "")) + (clobber (reg:DI 26))] + "TARGET_ABI_WINDOWS_NT" + "@ + jsr $26,(%0) + bsr $26,%0 + jsr $26,%0" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,12")]) + +; GAS relies on the order and position of instructions output below in order +; to generate relocs for VMS link to potentially optimize the call. +; Please do not molest. +(define_insn "*call_vms_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "r,s")) + (match_operand 1 "" "")) + (use (match_operand:DI 2 "nonmemory_operand" "r,n")) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + switch (which_alternative) + { + case 0: + return "mov %2,$27\;jsr $26,0\;ldq $27,0($29)"; + case 1: + operands [2] = alpha_use_linkage (operands [0], cfun->decl, 1, 0); + operands [3] = alpha_use_linkage (operands [0], cfun->decl, 0, 0); + return "ldq $26,%3\;ldq $27,%2\;jsr $26,%0\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "jsr") + (set_attr "length" "12,16")]) + +(define_insn "*call_umk_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "r")) + (match_operand 1 "" "")) + (use (reg:DI 25)) + (clobber (reg:DI 26))] + "TARGET_ABI_UNICOSMK" + "jsr $26,(%0)" + [(set_attr "type" "jsr")]) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" +{ + int i; + + emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0") + (set_attr "type" "none")]) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "br $31,%l0" + [(set_attr "type" "ibr")]) + +(define_expand "return" + [(return)] + "direct_return ()" + "") + +(define_insn "*return_internal" + [(return)] + "reload_completed" + "ret $31,($26),1" + [(set_attr "type" "ibr")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:DI 0 "register_operand" "r"))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +(define_expand "tablejump" + [(parallel [(set (pc) + (match_operand 0 "register_operand" "")) + (use (label_ref:DI (match_operand 1 "" "")))])] + "" +{ + if (TARGET_ABI_WINDOWS_NT) + { + rtx dest = gen_reg_rtx (DImode); + emit_insn (gen_extendsidi2 (dest, operands[0])); + operands[0] = dest; + } + else if (TARGET_ABI_OSF) + { + rtx dest = gen_reg_rtx (DImode); + emit_insn (gen_extendsidi2 (dest, operands[0])); + emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest)); + operands[0] = dest; + } +}) + +(define_insn "*tablejump_osf_nt_internal" + [(set (pc) + (match_operand:DI 0 "register_operand" "r")) + (use (label_ref:DI (match_operand 1 "" "")))] + "(TARGET_ABI_OSF || TARGET_ABI_WINDOWS_NT) + && alpha_tablejump_addr_vec (insn)" +{ + operands[2] = alpha_tablejump_best_label (insn); + return "jmp $31,(%0),%2"; +} + [(set_attr "type" "ibr")]) + +(define_insn "*tablejump_internal" + [(set (pc) + (match_operand:DI 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +;; Cache flush. Used by alpha_trampoline_init. 0x86 is PAL_imb, but we don't +;; want to have to include pal.h in our .s file. +(define_insn "imb" + [(unspec_volatile [(const_int 0)] UNSPECV_IMB)] + "" + "call_pal 0x86" + [(set_attr "type" "callpal")]) + +;; BUGCHK is documented common to OSF/1 and VMS PALcode. +;; NT does not document anything at 0x81 -- presumably it would generate +;; the equivalent of SIGILL, but this isn't that important. +;; ??? Presuming unicosmk uses either OSF/1 or VMS PALcode. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "!TARGET_ABI_WINDOWS_NT" + "call_pal 0x81" + [(set_attr "type" "callpal")]) + +;; For userland, we load the thread pointer from the TCB. +;; For the kernel, we load the per-cpu private value. + +(define_insn "load_tp" + [(set (match_operand:DI 0 "register_operand" "=v") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "TARGET_ABI_OSF" +{ + if (TARGET_TLS_KERNEL) + return "call_pal 0x32"; + else + return "call_pal 0x9e"; +} + [(set_attr "type" "callpal")]) + +;; For completeness, and possibly a __builtin function, here's how to +;; set the thread pointer. Since we don't describe enough of this +;; quantity for CSE, we have to use a volatile unspec, and then there's +;; not much point in creating an R16_REG register class. + +(define_expand "set_tp" + [(set (reg:DI 16) (match_operand:DI 0 "input_operand" "")) + (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)] + "TARGET_ABI_OSF" + "") + +(define_insn "*set_tp" + [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)] + "TARGET_ABI_OSF" +{ + if (TARGET_TLS_KERNEL) + return "call_pal 0x31"; + else + return "call_pal 0x9f"; +} + [(set_attr "type" "callpal")]) + +;; Special builtins for establishing and reverting VMS condition handlers. + +(define_expand "builtin_establish_vms_condition_handler" + [(set (reg:DI 0) (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "address_operand" ""))] + "TARGET_ABI_OPEN_VMS" +{ + alpha_expand_builtin_establish_vms_condition_handler (operands[0], + operands[1]); +}) + +(define_expand "builtin_revert_vms_condition_handler" + [(set (reg:DI 0) (match_operand:DI 0 "register_operand" ""))] + "TARGET_ABI_OPEN_VMS" +{ + alpha_expand_builtin_revert_vms_condition_handler (operands[0]); +}) + +;; Finally, we have the basic data motion insns. The byte and word insns +;; are done via define_expand. Start with the floating-point insns, since +;; they are simpler. + +(define_insn "*movsf_nofix" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m") + (match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r"))] + "TARGET_FPREGS && ! TARGET_FIX + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" + "@ + cpys %R1,%R1,%0 + ld%, %0,%1 + bis $31,%r1,%0 + ldl %0,%1 + st%, %R1,%0 + stl %r1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist")]) + +(define_insn "*movsf_fix" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r") + (match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))] + "TARGET_FPREGS && TARGET_FIX + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" + "@ + cpys %R1,%R1,%0 + ld%, %0,%1 + bis $31,%r1,%0 + ldl %0,%1 + st%, %R1,%0 + stl %r1,%0 + itofs %1,%0 + ftois %1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")]) + +(define_insn "*movsf_nofp" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:SF 1 "input_operand" "rG,m,r"))] + "! TARGET_FPREGS + && (register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode))" + "@ + bis $31,%r1,%0 + ldl %0,%1 + stl %r1,%0" + [(set_attr "type" "ilog,ild,ist")]) + +(define_insn "*movdf_nofix" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m") + (match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r"))] + "TARGET_FPREGS && ! TARGET_FIX + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + "@ + cpys %R1,%R1,%0 + ld%- %0,%1 + bis $31,%r1,%0 + ldq %0,%1 + st%- %R1,%0 + stq %r1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist")]) + +(define_insn "*movdf_fix" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r") + (match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))] + "TARGET_FPREGS && TARGET_FIX + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + "@ + cpys %R1,%R1,%0 + ld%- %0,%1 + bis $31,%r1,%0 + ldq %0,%1 + st%- %R1,%0 + stq %r1,%0 + itoft %1,%0 + ftoit %1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")]) + +(define_insn "*movdf_nofp" + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:DF 1 "input_operand" "rG,m,r"))] + "! TARGET_FPREGS + && (register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode))" + "@ + bis $31,%r1,%0 + ldq %0,%1 + stq %r1,%0" + [(set_attr "type" "ilog,ild,ist")]) + +;; Subregs suck for register allocation. Pretend we can move TFmode +;; data between general registers until after reload. + +(define_insn_and_split "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o") + (match_operand:TF 1 "input_operand" "roG,rG"))] + "register_operand (operands[0], TFmode) + || reg_or_0_operand (operands[1], TFmode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] +{ + alpha_split_tmode_pair (operands, TFmode, true); +}) + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], SFmode)) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], DFmode)) + operands[1] = force_reg (DFmode, operands[1]); +}) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], TFmode)) + operands[1] = force_reg (TFmode, operands[1]); +}) + +(define_insn "*movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,m") + (match_operand:SI 1 "input_operand" "rJ,K,L,n,m,rJ"))] + "(TARGET_ABI_OSF || TARGET_ABI_UNICOSMK) + && (register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode))" + "@ + bis $31,%r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + # + ldl %0,%1 + stl %r1,%0" + [(set_attr "type" "ilog,iadd,iadd,multi,ild,ist")]) + +(define_insn "*movsi_nt_vms" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,m") + (match_operand:SI 1 "input_operand" "rJ,K,L,s,n,m,rJ"))] + "(TARGET_ABI_WINDOWS_NT || TARGET_ABI_OPEN_VMS) + && (register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode))" + "@ + bis $31,%1,%0 + lda %0,%1 + ldah %0,%h1 + lda %0,%1 + # + ldl %0,%1 + stl %r1,%0" + [(set_attr "type" "ilog,iadd,iadd,ldsym,multi,ild,ist")]) + +(define_insn "*movhi_nobwx" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operand:HI 1 "input_operand" "rJ,n"))] + "! TARGET_BWX + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" + "@ + bis $31,%r1,%0 + lda %0,%L1($31)" + [(set_attr "type" "ilog,iadd")]) + +(define_insn "*movhi_bwx" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))] + "TARGET_BWX + && (register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode))" + "@ + bis $31,%r1,%0 + lda %0,%L1($31) + ldwu %0,%1 + stw %r1,%0" + [(set_attr "type" "ilog,iadd,ild,ist")]) + +(define_insn "*movqi_nobwx" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (match_operand:QI 1 "input_operand" "rJ,n"))] + "! TARGET_BWX + && (register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" + "@ + bis $31,%r1,%0 + lda %0,%L1($31)" + [(set_attr "type" "ilog,iadd")]) + +(define_insn "*movqi_bwx" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))] + "TARGET_BWX + && (register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode))" + "@ + bis $31,%r1,%0 + lda %0,%L1($31) + ldbu %0,%1 + stb %r1,%0" + [(set_attr "type" "ilog,iadd,ild,ist")]) + +;; We do two major things here: handle mem->mem and construct long +;; constants. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" +{ + if (alpha_expand_mov (SImode, operands)) + DONE; +}) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "non_add_const_operand" ""))] + "" + [(const_int 0)] +{ + if (alpha_split_const_mov (SImode, operands)) + DONE; + else + FAIL; +}) + +;; Split the load of an address into a four-insn sequence on Unicos/Mk. +;; Always generate a REG_EQUAL note for the last instruction to facilitate +;; optimizations. If the symbolic operand is a label_ref, generate +;; REG_LABEL_OPERAND notes and update LABEL_NUSES because this is not done +;; automatically. Labels may be incorrectly deleted if we don't do this. +;; +;; Describing what the individual instructions do correctly is too complicated +;; so use UNSPECs for each of the three parts of an address. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "symbolic_operand" ""))] + "TARGET_ABI_UNICOSMK && reload_completed" + [(const_int 0)] +{ + rtx insn1, insn2, insn3; + + insn1 = emit_insn (gen_umk_laum (operands[0], operands[1])); + emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32))); + insn2 = emit_insn (gen_umk_lalm (operands[0], operands[0], operands[1])); + insn3 = emit_insn (gen_umk_lal (operands[0], operands[0], operands[1])); + set_unique_reg_note (insn3, REG_EQUAL, operands[1]); + + if (GET_CODE (operands[1]) == LABEL_REF) + { + rtx label; + + label = XEXP (operands[1], 0); + add_reg_note (insn1, REG_LABEL_OPERAND, label); + add_reg_note (insn2, REG_LABEL_OPERAND, label); + add_reg_note (insn3, REG_LABEL_OPERAND, label); + LABEL_NUSES (label) += 3; + } + DONE; +}) + +;; Instructions for loading the three parts of an address on Unicos/Mk. + +(define_insn "umk_laum" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] + UNSPEC_UMK_LAUM))] + "TARGET_ABI_UNICOSMK" + "laum %r0,%t1($31)" + [(set_attr "type" "iadd")]) + +(define_insn "umk_lalm" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] + UNSPEC_UMK_LALM)))] + "TARGET_ABI_UNICOSMK" + "lalm %r0,%t2(%r1)" + [(set_attr "type" "iadd")]) + +(define_insn "umk_lal" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] + UNSPEC_UMK_LAL)))] + "TARGET_ABI_UNICOSMK" + "lal %r0,%t2(%r1)" + [(set_attr "type" "iadd")]) + +;; Add a new call information word to the current function's list of CIWs +;; and load its index into $25. Doing it here ensures that the CIW will be +;; associated with the correct function even in the presence of inlining. + +(define_insn "*umk_load_ciw" + [(set (reg:DI 25) + (unspec:DI [(match_operand 0 "" "")] UNSPEC_UMK_LOAD_CIW))] + "TARGET_ABI_UNICOSMK" +{ + operands[0] = unicosmk_add_call_info_word (operands[0]); + return "lda $25,%0"; +} + [(set_attr "type" "iadd")]) + +(define_insn "*movdi_er_low_l" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "local_symbolic_operand" "")))] + "TARGET_EXPLICIT_RELOCS" +{ + if (true_regnum (operands[1]) == 29) + return "lda %0,%2(%1)\t\t!gprel"; + else + return "lda %0,%2(%1)\t\t!gprellow"; +} + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "small_symbolic_operand" ""))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (lo_sum:DI (match_dup 2) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "local_symbolic_operand" ""))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:DI (match_dup 2) (high:DI (match_dup 1)))) + (set (match_dup 0) + (lo_sum:DI (match_dup 0) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(match_operand 0 "some_small_symbolic_operand" "")] + "" + [(match_dup 0)] + "operands[0] = split_small_symbolic_operand (operands[0]);") + +;; Accepts any symbolic, not just global, since function calls that +;; don't go via bsr still use !literal in hopes of linker relaxation. +(define_insn "movdi_er_high_g" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "") + (match_operand 3 "const_int_operand" "")] + UNSPEC_LITERAL))] + "TARGET_EXPLICIT_RELOCS" +{ + if (INTVAL (operands[3]) == 0) + return "ldq %0,%2(%1)\t\t!literal"; + else + return "ldq %0,%2(%1)\t\t!literal!%3"; +} + [(set_attr "type" "ldsym")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "global_symbolic_operand" ""))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1) + (const_int 0)] UNSPEC_LITERAL))] + "operands[2] = pic_offset_table_rtx;") + +(define_insn "movdi_er_tlsgd" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "") + (match_operand 3 "const_int_operand" "")] + UNSPEC_TLSGD))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[3]) == 0) + return "lda %0,%2(%1)\t\t!tlsgd"; + else + return "lda %0,%2(%1)\t\t!tlsgd!%3"; +}) + +(define_insn "movdi_er_tlsldm" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "")] + UNSPEC_TLSLDM))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[2]) == 0) + return "lda %0,%&(%1)\t\t!tlsldm"; + else + return "lda %0,%&(%1)\t\t!tlsldm!%2"; +}) + +(define_insn "*movdi_er_gotdtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "")] + UNSPEC_DTPREL))] + "HAVE_AS_TLS" + "ldq %0,%2(%1)\t\t!gotdtprel" + [(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "gotdtp_symbolic_operand" ""))] + "HAVE_AS_TLS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1)] UNSPEC_DTPREL))] +{ + operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0); + operands[2] = pic_offset_table_rtx; +}) + +(define_insn "*movdi_er_gottp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand" "")] + UNSPEC_TPREL))] + "HAVE_AS_TLS" + "ldq %0,%2(%1)\t\t!gottprel" + [(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "gottp_symbolic_operand" ""))] + "HAVE_AS_TLS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1)] UNSPEC_TPREL))] +{ + operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0); + operands[2] = pic_offset_table_rtx; +}) + +(define_insn "*movdi_er_nofix" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,*f,*f,Q") + (match_operand:DI 1 "input_operand" "rJ,K,L,T,s,n,m,rJ,*fJ,Q,*f"))] + "TARGET_EXPLICIT_RELOCS && ! TARGET_FIX + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + "@ + mov %r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + # + # + # + ldq%A1 %0,%1 + stq%A0 %r1,%0 + fmov %R1,%0 + ldt %0,%1 + stt %R1,%0" + [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ild,ist,fcpys,fld,fst") + (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*")]) + +;; The 'U' constraint matches symbolic operands on Unicos/Mk. Those should +;; have been split up by the rules above but we shouldn't reject the +;; possibility of them getting through. + +(define_insn "*movdi_nofix" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,*f,*f,Q") + (match_operand:DI 1 "input_operand" "rJ,K,L,U,s,n,m,rJ,*fJ,Q,*f"))] + "! TARGET_FIX + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + "@ + bis $31,%r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + laum %0,%t1($31)\;sll %0,32,%0\;lalm %0,%t1(%0)\;lal %0,%t1(%0) + lda %0,%1 + # + ldq%A1 %0,%1 + stq%A0 %r1,%0 + cpys %R1,%R1,%0 + ldt %0,%1 + stt %R1,%0" + [(set_attr "type" "ilog,iadd,iadd,ldsym,ldsym,multi,ild,ist,fcpys,fld,fst") + (set_attr "length" "*,*,*,16,*,*,*,*,*,*,*")]) + +(define_insn "*movdi_er_fix" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r,r,r,r,r,r, m, *f,*f, Q, r,*f") + (match_operand:DI 1 "input_operand" + "rJ,K,L,T,s,n,m,rJ,*fJ, Q,*f,*f, r"))] + "TARGET_EXPLICIT_RELOCS && TARGET_FIX + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + "@ + mov %r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + # + # + # + ldq%A1 %0,%1 + stq%A0 %r1,%0 + fmov %R1,%0 + ldt %0,%1 + stt %R1,%0 + ftoit %1,%0 + itoft %1,%0" + [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ild,ist,fcpys,fld,fst,ftoi,itof") + (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*")]) + +(define_insn "*movdi_fix" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,m,*f,*f,Q,r,*f") + (match_operand:DI 1 "input_operand" "rJ,K,L,s,n,m,rJ,*fJ,Q,*f,*f,r"))] + "! TARGET_EXPLICIT_RELOCS && TARGET_FIX + && (register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode))" + "@ + bis $31,%r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + lda %0,%1 + # + ldq%A1 %0,%1 + stq%A0 %r1,%0 + cpys %R1,%R1,%0 + ldt %0,%1 + stt %R1,%0 + ftoit %1,%0 + itoft %1,%0" + [(set_attr "type" "ilog,iadd,iadd,ldsym,multi,ild,ist,fcpys,fld,fst,ftoi,itof")]) + +;; VMS needs to set up "vms_base_regno" for unwinding. This move +;; often appears dead to the life analysis code, at which point we +;; die for emitting dead prologue instructions. Force this live. + +(define_insn "force_movdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")] + UNSPECV_FORCE_MOV))] + "" + "mov %1,%0" + [(set_attr "type" "ilog")]) + +;; We do three major things here: handle mem->mem, put 64-bit constants in +;; memory, and construct long 32-bit constants. + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" +{ + if (alpha_expand_mov (DImode, operands)) + DONE; +}) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "non_add_const_operand" ""))] + "" + [(const_int 0)] +{ + if (alpha_split_const_mov (DImode, operands)) + DONE; + else + FAIL; +}) + +;; We need to prevent reload from splitting TImode moves, because it +;; might decide to overwrite a pointer with the value it points to. +;; In that case we have to do the loads in the appropriate order so +;; that the pointer is not destroyed too early. + +(define_insn_and_split "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (match_operand:TI 1 "input_operand" "roJ,rJ"))] + "(register_operand (operands[0], TImode) + /* Prevent rematerialization of constants. */ + && ! CONSTANT_P (operands[1])) + || reg_or_0_operand (operands[1], TImode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] +{ + alpha_split_tmode_pair (operands, TImode, true); +}) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], TImode)) + operands[1] = force_reg (TImode, operands[1]); + + if (operands[1] == const0_rtx) + ; + /* We must put 64-bit constants in memory. We could keep the + 32-bit constants in TImode and rely on the splitter, but + this doesn't seem to be worth the pain. */ + else if (CONST_INT_P (operands[1]) + || GET_CODE (operands[1]) == CONST_DOUBLE) + { + rtx in[2], out[2], target; + + gcc_assert (can_create_pseudo_p ()); + + split_double (operands[1], &in[0], &in[1]); + + if (in[0] == const0_rtx) + out[0] = const0_rtx; + else + { + out[0] = gen_reg_rtx (DImode); + emit_insn (gen_movdi (out[0], in[0])); + } + + if (in[1] == const0_rtx) + out[1] = const0_rtx; + else + { + out[1] = gen_reg_rtx (DImode); + emit_insn (gen_movdi (out[1], in[1])); + } + + if (!REG_P (operands[0])) + target = gen_reg_rtx (TImode); + else + target = operands[0]; + + emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0])); + emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1])); + + if (target != operands[0]) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], target)); + + DONE; + } +}) + +;; These are the partial-word cases. +;; +;; First we have the code to load an aligned word. Operand 0 is the register +;; in which to place the result. It's mode is QImode or HImode. Operand 1 +;; is an SImode MEM at the low-order byte of the proper word. Operand 2 is the +;; number of bits within the word that the value is. Operand 3 is an SImode +;; scratch register. If operand 0 is a hard register, operand 3 may be the +;; same register. It is allowed to conflict with operand 1 as well. + +(define_expand "aligned_loadqi" + [(set (match_operand:SI 3 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (subreg:DI (match_dup 3) 0) + (const_int 8) + (match_operand:DI 2 "const_int_operand" "")))] + + "" + "") + +(define_expand "aligned_loadhi" + [(set (match_operand:SI 3 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (subreg:DI (match_dup 3) 0) + (const_int 16) + (match_operand:DI 2 "const_int_operand" "")))] + + "" + "") + +;; Similar for unaligned loads, where we use the sequence from the +;; Alpha Architecture manual. We have to distinguish between little-endian +;; and big-endian systems as the sequences are different. +;; +;; Operand 1 is the address. Operands 2 and 3 are temporaries, where +;; operand 3 can overlap the input and output registers. + +(define_expand "unaligned_loadqi" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "address_operand" "")) + (use (match_operand:DI 2 "register_operand" "")) + (use (match_operand:DI 3 "register_operand" ""))] + "" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_unaligned_loadqi_be (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_unaligned_loadqi_le (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "unaligned_loadqi_le" + [(set (match_operand:DI 2 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 1 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand" "") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (match_dup 2) + (const_int 8) + (ashift:DI (match_dup 3) (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "") + +(define_expand "unaligned_loadqi_be" + [(set (match_operand:DI 2 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 1 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand" "") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (match_dup 2) + (const_int 8) + (minus:DI + (const_int 56) + (ashift:DI (match_dup 3) (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "") + +(define_expand "unaligned_loadhi" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "address_operand" "")) + (use (match_operand:DI 2 "register_operand" "")) + (use (match_operand:DI 3 "register_operand" ""))] + "" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_unaligned_loadhi_be (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_unaligned_loadhi_le (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "unaligned_loadhi_le" + [(set (match_operand:DI 2 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 1 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand" "") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (match_dup 2) + (const_int 16) + (ashift:DI (match_dup 3) (const_int 3))))] + "! WORDS_BIG_ENDIAN" + "") + +(define_expand "unaligned_loadhi_be" + [(set (match_operand:DI 2 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 1 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand" "") + (plus:DI (match_dup 1) (const_int 1))) + (set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (match_dup 2) + (const_int 16) + (minus:DI + (const_int 56) + (ashift:DI (match_dup 3) (const_int 3)))))] + "WORDS_BIG_ENDIAN" + "") + +;; Storing an aligned byte or word requires two temporaries. Operand 0 is the +;; aligned SImode MEM. Operand 1 is the register containing the +;; byte or word to store. Operand 2 is the number of bits within the word that +;; the value should be placed. Operands 3 and 4 are SImode temporaries. + +(define_expand "aligned_store" + [(set (match_operand:SI 3 "register_operand" "") + (match_operand:SI 0 "memory_operand" "")) + (set (subreg:DI (match_dup 3) 0) + (and:DI (subreg:DI (match_dup 3) 0) (match_dup 5))) + (set (subreg:DI (match_operand:SI 4 "register_operand" "") 0) + (ashift:DI (zero_extend:DI (match_operand 1 "register_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (set (subreg:DI (match_dup 4) 0) + (ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0))) + (set (match_dup 0) (match_dup 4))] + "" +{ + operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1])) + << INTVAL (operands[2]))); +}) + +;; For the unaligned byte and halfword cases, we use code similar to that +;; in the ;; Architecture book, but reordered to lower the number of registers +;; required. Operand 0 is the address. Operand 1 is the data to store. +;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may +;; be the same temporary, if desired. If the address is in a register, +;; operand 2 can be that register. + +(define_expand "unaligned_storeqi" + [(use (match_operand:DI 0 "address_operand" "")) + (use (match_operand:QI 1 "register_operand" "")) + (use (match_operand:DI 2 "register_operand" "")) + (use (match_operand:DI 3 "register_operand" "")) + (use (match_operand:DI 4 "register_operand" ""))] + "" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_unaligned_storeqi_be (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + else + emit_insn (gen_unaligned_storeqi_le (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "unaligned_storeqi_le" + [(set (match_operand:DI 3 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 0 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 2 "register_operand" "") + (match_dup 0)) + (set (match_dup 3) + (and:DI (not:DI (ashift:DI (const_int 255) + (ashift:DI (match_dup 2) (const_int 3)))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand" "") + (ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "")) + (ashift:DI (match_dup 2) (const_int 3)))) + (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3))) + (set (mem:DI (and:DI (match_dup 0) (const_int -8))) + (match_dup 4))] + "! WORDS_BIG_ENDIAN" + "") + +(define_expand "unaligned_storeqi_be" + [(set (match_operand:DI 3 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 0 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 2 "register_operand" "") + (match_dup 0)) + (set (match_dup 3) + (and:DI (not:DI (ashift:DI (const_int 255) + (minus:DI (const_int 56) + (ashift:DI (match_dup 2) (const_int 3))))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand" "") + (ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "")) + (minus:DI (const_int 56) + (ashift:DI (match_dup 2) (const_int 3))))) + (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3))) + (set (mem:DI (and:DI (match_dup 0) (const_int -8))) + (match_dup 4))] + "WORDS_BIG_ENDIAN" + "") + +(define_expand "unaligned_storehi" + [(use (match_operand:DI 0 "address_operand" "")) + (use (match_operand:HI 1 "register_operand" "")) + (use (match_operand:DI 2 "register_operand" "")) + (use (match_operand:DI 3 "register_operand" "")) + (use (match_operand:DI 4 "register_operand" ""))] + "" +{ + if (WORDS_BIG_ENDIAN) + emit_insn (gen_unaligned_storehi_be (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + else + emit_insn (gen_unaligned_storehi_le (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "unaligned_storehi_le" + [(set (match_operand:DI 3 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 0 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 2 "register_operand" "") + (match_dup 0)) + (set (match_dup 3) + (and:DI (not:DI (ashift:DI (const_int 65535) + (ashift:DI (match_dup 2) (const_int 3)))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand" "") + (ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "")) + (ashift:DI (match_dup 2) (const_int 3)))) + (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3))) + (set (mem:DI (and:DI (match_dup 0) (const_int -8))) + (match_dup 4))] + "! WORDS_BIG_ENDIAN" + "") + +(define_expand "unaligned_storehi_be" + [(set (match_operand:DI 3 "register_operand" "") + (mem:DI (and:DI (match_operand:DI 0 "address_operand" "") + (const_int -8)))) + (set (match_operand:DI 2 "register_operand" "") + (plus:DI (match_dup 5) (const_int 1))) + (set (match_dup 3) + (and:DI (not:DI (ashift:DI + (const_int 65535) + (minus:DI (const_int 56) + (ashift:DI (match_dup 2) (const_int 3))))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand" "") + (ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "")) + (minus:DI (const_int 56) + (ashift:DI (match_dup 2) (const_int 3))))) + (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3))) + (set (mem:DI (and:DI (match_dup 0) (const_int -8))) + (match_dup 4))] + "WORDS_BIG_ENDIAN" + "operands[5] = force_reg (DImode, operands[0]);") + +;; Here are the define_expand's for QI and HI moves that use the above +;; patterns. We have the normal sets, plus the ones that need scratch +;; registers for reload. + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" +{ + if (TARGET_BWX + ? alpha_expand_mov (QImode, operands) + : alpha_expand_mov_nobwx (QImode, operands)) + DONE; +}) + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" +{ + if (TARGET_BWX + ? alpha_expand_mov (HImode, operands) + : alpha_expand_mov_nobwx (HImode, operands)) + DONE; +}) + +;; We need to hook into the extra support that we have for HImode +;; reloads when BWX insns are not available. +(define_expand "movcqi" + [(set (match_operand:CQI 0 "nonimmediate_operand" "") + (match_operand:CQI 1 "general_operand" ""))] + "!TARGET_BWX" +{ + if (GET_CODE (operands[0]) == CONCAT || GET_CODE (operands[1]) == CONCAT) + ; + else if (!any_memory_operand (operands[0], CQImode)) + { + if (!any_memory_operand (operands[1], CQImode)) + { + emit_move_insn (gen_lowpart (HImode, operands[0]), + gen_lowpart (HImode, operands[1])); + DONE; + } + if (aligned_memory_operand (operands[1], CQImode)) + { + bool done; + do_aligned1: + operands[1] = gen_lowpart (HImode, operands[1]); + do_aligned2: + operands[0] = gen_lowpart (HImode, operands[0]); + done = alpha_expand_mov_nobwx (HImode, operands); + gcc_assert (done); + DONE; + } + } + else if (aligned_memory_operand (operands[0], CQImode)) + { + if (MEM_P (operands[1])) + { + rtx x = gen_reg_rtx (HImode); + emit_move_insn (gen_lowpart (CQImode, x), operands[1]); + operands[1] = x; + goto do_aligned2; + } + goto do_aligned1; + } + + gcc_assert (!reload_in_progress); + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +;; Here are the versions for reload. +;; +;; The aligned input case is recognized early in alpha_secondary_reload +;; in order to avoid allocating an unnecessary scratch register. +;; +;; Note that in the unaligned cases we know that the operand must not be +;; a pseudo-register because stack slots are always aligned references. + +(define_expand "reload_in" + [(parallel [(match_operand:RELOAD12 0 "register_operand" "=r") + (match_operand:RELOAD12 1 "any_memory_operand" "m") + (match_operand:TI 2 "register_operand" "=&r")])] + "!TARGET_BWX" +{ + rtx scratch, seq, addr; + unsigned regno = REGNO (operands[2]); + + /* It is possible that one of the registers we got for operands[2] + might coincide with that of operands[0] (which is why we made + it TImode). Pick the other one to use as our scratch. */ + if (regno == REGNO (operands[0])) + regno++; + scratch = gen_rtx_REG (DImode, regno); + + addr = get_unaligned_address (operands[1]); + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + seq = gen_unaligned_load (operands[0], addr, + scratch, operands[0]); + alpha_set_memflags (seq, operands[1]); + + emit_insn (seq); + DONE; +}) + +(define_expand "reload_out" + [(parallel [(match_operand:RELOAD12 0 "any_memory_operand" "=m") + (match_operand:RELOAD12 1 "register_operand" "r") + (match_operand:TI 2 "register_operand" "=&r")])] + "! TARGET_BWX" +{ + unsigned regno = REGNO (operands[2]); + + if (mode == CQImode) + { + operands[0] = gen_lowpart (HImode, operands[0]); + operands[1] = gen_lowpart (HImode, operands[1]); + } + + if (aligned_memory_operand (operands[0], mode)) + { + emit_insn (gen_reload_out_aligned + (operands[0], operands[1], + gen_rtx_REG (SImode, regno), + gen_rtx_REG (SImode, regno + 1))); + } + else + { + rtx addr = get_unaligned_address (operands[0]); + rtx scratch1 = gen_rtx_REG (DImode, regno); + rtx scratch2 = gen_rtx_REG (DImode, regno + 1); + rtx scratch3 = scratch1; + rtx seq; + + if (REG_P (addr)) + scratch1 = addr; + + seq = gen_unaligned_store (addr, operands[1], scratch1, + scratch2, scratch3); + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + } + DONE; +}) + +;; Helpers for the above. The way reload is structured, we can't +;; always get a proper address for a stack slot during reload_foo +;; expansion, so we must delay our address manipulations until after. + +(define_insn_and_split "reload_in_aligned" + [(set (match_operand:I12MODE 0 "register_operand" "=r") + (match_operand:I12MODE 1 "memory_operand" "m"))] + "!TARGET_BWX && (reload_in_progress || reload_completed)" + "#" + "!TARGET_BWX && reload_completed" + [(const_int 0)] +{ + rtx aligned_mem, bitnum; + get_aligned_mem (operands[1], &aligned_mem, &bitnum); + emit_insn (gen_aligned_load + (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum, + gen_rtx_REG (SImode, REGNO (operands[0])))); + DONE; +}) + +(define_insn_and_split "reload_out_aligned" + [(set (match_operand:I12MODE 0 "memory_operand" "=m") + (match_operand:I12MODE 1 "register_operand" "r")) + (clobber (match_operand:SI 2 "register_operand" "=r")) + (clobber (match_operand:SI 3 "register_operand" "=r"))] + "!TARGET_BWX && (reload_in_progress || reload_completed)" + "#" + "!TARGET_BWX && reload_completed" + [(const_int 0)] +{ + rtx aligned_mem, bitnum; + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, + operands[2], operands[3])); + DONE; +}) + +;; Vector operations + +(define_mode_iterator VEC [V8QI V4HI V2SI]) + +(define_expand "mov" + [(set (match_operand:VEC 0 "nonimmediate_operand" "") + (match_operand:VEC 1 "general_operand" ""))] + "" +{ + if (alpha_expand_mov (mode, operands)) + DONE; +}) + +(define_split + [(set (match_operand:VEC 0 "register_operand" "") + (match_operand:VEC 1 "non_zero_const_operand" ""))] + "" + [(const_int 0)] +{ + if (alpha_split_const_mov (mode, operands)) + DONE; + else + FAIL; +}) + + +(define_expand "movmisalign" + [(set (match_operand:VEC 0 "nonimmediate_operand" "") + (match_operand:VEC 1 "general_operand" ""))] + "" +{ + alpha_expand_movmisalign (mode, operands); + DONE; +}) + +(define_insn "*mov_fix" + [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f") + (match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))] + "TARGET_FIX + && (register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode))" + "@ + bis $31,%r1,%0 + # + ldq %0,%1 + stq %r1,%0 + cpys %R1,%R1,%0 + ldt %0,%1 + stt %R1,%0 + ftoit %1,%0 + itoft %1,%0" + [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof")]) + +(define_insn "*mov_nofix" + [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m") + (match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f"))] + "! TARGET_FIX + && (register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode))" + "@ + bis $31,%r1,%0 + # + ldq %0,%1 + stq %r1,%0 + cpys %R1,%R1,%0 + ldt %0,%1 + stt %R1,%0" + [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst")]) + +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (umin:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (match_operand:V8QI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "minub8 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "sminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (smin:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (match_operand:V8QI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "minsb8 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "uminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (umin:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW") + (match_operand:V4HI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "minuw4 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (smin:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW") + (match_operand:V4HI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "minsw4 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (umax:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (match_operand:V8QI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "maxub8 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "smaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (smax:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (match_operand:V8QI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "maxsb8 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "umaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (umax:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW") + (match_operand:V4HI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "maxuw4 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (smax:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW") + (match_operand:V4HI 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + "maxsw4 %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "one_cmpl2" + [(set (match_operand:VEC 0 "register_operand" "=r") + (not:VEC (match_operand:VEC 1 "register_operand" "r")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "and3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (and:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "and %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*andnot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r")) + (match_operand:VEC 2 "register_operand" "r")))] + "" + "bic %2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "ior3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (ior:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "bis %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r")) + (match_operand:VEC 2 "register_operand" "r")))] + "" + "ornot %2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "xor3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (xor:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "xor %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r"))))] + "" + "eqv %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_expand "vec_shl_" + [(set (match_operand:VEC 0 "register_operand" "") + (ashift:DI (match_operand:VEC 1 "register_operand" "") + (match_operand:DI 2 "reg_or_6bit_operand" "")))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +(define_expand "vec_shr_" + [(set (match_operand:VEC 0 "register_operand" "") + (lshiftrt:DI (match_operand:VEC 1 "register_operand" "") + (match_operand:DI 2 "reg_or_6bit_operand" "")))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +;; Bit field extract patterns which use ext[wlq][lh] + +(define_expand "extv" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extract:DI (match_operand:QI 1 "memory_operand" "") + (match_operand:DI 2 "immediate_operand" "") + (match_operand:DI 3 "immediate_operand" "")))] + "" +{ + int ofs; + + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so we force it to be a MEM here. */ + if (!MEM_P (operands[1])) + FAIL; + + /* The bit number is relative to the mode of operand 1 which is + usually QImode (this might actually be a bug in expmed.c). Note + that the bit number is negative in big-endian mode in this case. + We have to convert that to the offset. */ + if (WORDS_BIG_ENDIAN) + ofs = GET_MODE_BITSIZE (GET_MODE (operands[1])) + - INTVAL (operands[2]) - INTVAL (operands[3]); + else + ofs = INTVAL (operands[3]); + + ofs = ofs / 8; + + alpha_expand_unaligned_load (operands[0], operands[1], + INTVAL (operands[2]) / 8, + ofs, 1); + DONE; +}) + +(define_expand "extzv" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extract:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "immediate_operand" "") + (match_operand:DI 3 "immediate_operand" "")))] + "" +{ + /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 8 + && INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; + + if (MEM_P (operands[1])) + { + int ofs; + + /* Fail 8-bit fields, falling back on a simple byte load. */ + if (INTVAL (operands[2]) == 8) + FAIL; + + /* The bit number is relative to the mode of operand 1 which is + usually QImode (this might actually be a bug in expmed.c). Note + that the bit number is negative in big-endian mode in this case. + We have to convert that to the offset. */ + if (WORDS_BIG_ENDIAN) + ofs = GET_MODE_BITSIZE (GET_MODE (operands[1])) + - INTVAL (operands[2]) - INTVAL (operands[3]); + else + ofs = INTVAL (operands[3]); + + ofs = ofs / 8; + + alpha_expand_unaligned_load (operands[0], operands[1], + INTVAL (operands[2]) / 8, + ofs, 0); + DONE; + } +}) + +(define_expand "insv" + [(set (zero_extract:DI (match_operand:QI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" "") + (match_operand:DI 2 "immediate_operand" "")) + (match_operand:DI 3 "register_operand" ""))] + "" +{ + int ofs; + + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[2]) % 8 != 0 + || (INTVAL (operands[1]) != 16 + && INTVAL (operands[1]) != 32 + && INTVAL (operands[1]) != 64)) + FAIL; + + /* From mips.md: store_bit_field doesn't verify that our source + matches the predicate, so we force it to be a MEM here. */ + if (!MEM_P (operands[0])) + FAIL; + + /* The bit number is relative to the mode of operand 1 which is + usually QImode (this might actually be a bug in expmed.c). Note + that the bit number is negative in big-endian mode in this case. + We have to convert that to the offset. */ + if (WORDS_BIG_ENDIAN) + ofs = GET_MODE_BITSIZE (GET_MODE (operands[0])) + - INTVAL (operands[1]) - INTVAL (operands[2]); + else + ofs = INTVAL (operands[2]); + + ofs = ofs / 8; + + alpha_expand_unaligned_store (operands[0], operands[3], + INTVAL (operands[1]) / 8, ofs); + DONE; +}) + +;; Block move/clear, see alpha.c for more details. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "movmemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (match_operand:DI 3 "immediate_operand" ""))])] + "" +{ + if (alpha_expand_block_move (operands)) + DONE; + else + FAIL; +}) + +(define_expand "movmemdi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (match_operand:DI 3 "immediate_operand" "")) + (use (match_dup 4)) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 18)) + (clobber (reg:DI 19)) + (clobber (reg:DI 20)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))])] + "TARGET_ABI_OPEN_VMS" +{ + operands[4] = alpha_need_linkage ("OTS$MOVE", 0); +}) + +(define_insn "*movmemdi_1" + [(set (match_operand:BLK 0 "memory_operand" "=m,=m") + (match_operand:BLK 1 "memory_operand" "m,m")) + (use (match_operand:DI 2 "nonmemory_operand" "r,i")) + (use (match_operand:DI 3 "immediate_operand" "")) + (use (match_operand:DI 4 "call_operand" "i,i")) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 18)) + (clobber (reg:DI 19)) + (clobber (reg:DI 20)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + operands [5] = alpha_use_linkage (operands [4], cfun->decl, 0, 1); + switch (which_alternative) + { + case 0: + return "lda $16,%0\;bis $31,%2,$17\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)"; + case 1: + return "lda $16,%0\;lda $17,%2($31)\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "multi") + (set_attr "length" "28")]) + +(define_expand "setmemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand 2 "const_int_operand" "")) + (use (match_operand:DI 1 "immediate_operand" "")) + (use (match_operand:DI 3 "immediate_operand" ""))])] + "" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (alpha_expand_block_clear (operands)) + DONE; + else + FAIL; +}) + +(define_expand "setmemdi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand 2 "const_int_operand" "")) + (use (match_operand:DI 1 "immediate_operand" "")) + (use (match_operand:DI 3 "immediate_operand" "")) + (use (match_dup 4)) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))])] + "TARGET_ABI_OPEN_VMS" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + operands[4] = alpha_need_linkage ("OTS$ZERO", 0); +}) + +(define_insn "*clrmemdi_1" + [(set (match_operand:BLK 0 "memory_operand" "=m,=m") + (const_int 0)) + (use (match_operand:DI 1 "nonmemory_operand" "r,i")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (match_operand:DI 3 "call_operand" "i,i")) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + operands [4] = alpha_use_linkage (operands [3], cfun->decl, 0, 1); + switch (which_alternative) + { + case 0: + return "lda $16,%0\;bis $31,%1,$17\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)"; + case 1: + return "lda $16,%0\;lda $17,%1($31)\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "multi") + (set_attr "length" "24")]) + + +;; Subroutine of stack space allocation. Perform a stack probe. +(define_expand "probe_stack" + [(set (match_dup 1) (match_operand:DI 0 "const_int_operand" ""))] + "" +{ + operands[1] = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx, + INTVAL (operands[0]))); + MEM_VOLATILE_P (operands[1]) = 1; + + operands[0] = const0_rtx; +}) + +;; This is how we allocate stack space. If we are allocating a +;; constant amount of space and we know it is less than 4096 +;; bytes, we need do nothing. +;; +;; If it is more than 4096 bytes, we need to probe the stack +;; periodically. +(define_expand "allocate_stack" + [(set (reg:DI 30) + (plus:DI (reg:DI 30) + (match_operand:DI 1 "reg_or_cint_operand" ""))) + (set (match_operand:DI 0 "register_operand" "=r") + (match_dup 2))] + "" +{ + if (CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < 32768) + { + if (INTVAL (operands[1]) >= 4096) + { + /* We do this the same way as in the prologue and generate explicit + probes. Then we update the stack by the constant. */ + + int probed = 4096; + + emit_insn (gen_probe_stack (GEN_INT (- probed))); + while (probed + 8192 < INTVAL (operands[1])) + emit_insn (gen_probe_stack (GEN_INT (- (probed += 8192)))); + + if (probed + 4096 < INTVAL (operands[1])) + emit_insn (gen_probe_stack (GEN_INT (- INTVAL(operands[1])))); + } + + operands[1] = GEN_INT (- INTVAL (operands[1])); + operands[2] = virtual_stack_dynamic_rtx; + } + else + { + rtx out_label = 0; + rtx loop_label = gen_label_rtx (); + rtx want = gen_reg_rtx (Pmode); + rtx tmp = gen_reg_rtx (Pmode); + rtx memref, test; + + emit_insn (gen_subdi3 (want, stack_pointer_rtx, + force_reg (Pmode, operands[1]))); + + if (!CONST_INT_P (operands[1])) + { + rtx limit = GEN_INT (4096); + out_label = gen_label_rtx (); + test = gen_rtx_LTU (VOIDmode, operands[1], limit); + emit_jump_insn + (gen_cbranchdi4 (test, operands[1], limit, out_label)); + } + + emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096))); + emit_label (loop_label); + memref = gen_rtx_MEM (DImode, tmp); + MEM_VOLATILE_P (memref) = 1; + emit_move_insn (memref, const0_rtx); + emit_insn (gen_adddi3 (tmp, tmp, GEN_INT(-8192))); + test = gen_rtx_GTU (VOIDmode, tmp, want); + emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label)); + + memref = gen_rtx_MEM (DImode, want); + MEM_VOLATILE_P (memref) = 1; + emit_move_insn (memref, const0_rtx); + + if (out_label) + emit_label (out_label); + + emit_move_insn (stack_pointer_rtx, want); + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; + } +}) + +;; This is used by alpha_expand_prolog to do the same thing as above, +;; except we cannot at that time generate new basic blocks, so we hide +;; the loop in this one insn. + +(define_insn "prologue_stack_probe_loop" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "r")] + UNSPECV_PSPL)] + "" +{ + operands[2] = gen_label_rtx (); + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[2])); + + return "stq $31,-8192(%1)\;subq %0,1,%0\;lda %1,-8192(%1)\;bne %0,%l2"; +} + [(set_attr "length" "16") + (set_attr "type" "multi")]) + +(define_expand "prologue" + [(clobber (const_int 0))] + "" +{ + alpha_expand_prologue (); + DONE; +}) + +;; These take care of emitting the ldgp insn in the prologue. This will be +;; an lda/ldah pair and we want to align them properly. So we have two +;; unspec_volatile insns, the first of which emits the ldgp assembler macro +;; and the second of which emits nothing. However, both are marked as type +;; IADD (the default) so the alignment code in alpha.c does the right thing +;; with them. + +(define_expand "prologue_ldgp" + [(set (match_dup 0) + (unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))] + "" +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 27); + operands[2] = (TARGET_EXPLICIT_RELOCS + ? GEN_INT (alpha_next_sequence_number++) + : const0_rtx); +}) + +(define_insn "*ldgp_er_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "")] + UNSPECV_LDGP1))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "ldah %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "")] + UNSPEC_LDGP2))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "lda %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "")] + UNSPECV_PLDGP2))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "lda %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "")] + UNSPECV_LDGP1))] + "" + "ldgp %0,0(%1)\n$%~..ng:" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "")] + UNSPECV_PLDGP2))] + "" + "") + +;; The _mcount profiling hook has special calling conventions, and +;; does not clobber all the registers that a normal call would. So +;; hide the fact this is a call at all. + +(define_insn "prologue_mcount" + [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)] + "" +{ + if (TARGET_EXPLICIT_RELOCS) + /* Note that we cannot use a lituse_jsr reloc, since _mcount + cannot be called via the PLT. */ + return "ldq $28,_mcount($29)\t\t!literal\;jsr $28,($28),_mcount"; + else + return "lda $28,_mcount\;jsr $28,($28),_mcount"; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "init_fp" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "register_operand" "r")) + (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))] + "" + "bis $31,%1,%0") + +(define_expand "epilogue" + [(return)] + "" +{ + alpha_expand_epilogue (); +}) + +(define_expand "sibcall_epilogue" + [(return)] + "TARGET_ABI_OSF" +{ + alpha_expand_epilogue (); + DONE; +}) + +(define_expand "builtin_longjmp" + [(use (match_operand:DI 0 "register_operand" "r"))] + "TARGET_ABI_OSF" +{ + /* The elements of the buffer are, in order: */ + rtx fp = gen_rtx_MEM (Pmode, operands[0]); + rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0], 8)); + rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0], 16)); + rtx pv = gen_rtx_REG (Pmode, 27); + + /* This bit is the same as expand_builtin_longjmp. */ + emit_move_insn (hard_frame_pointer_rtx, fp); + emit_move_insn (pv, lab); + emit_stack_restore (SAVE_NONLOCAL, stack); + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + /* Load the label we are jumping through into $27 so that we know + where to look for it when we get back to setjmp's function for + restoring the gp. */ + emit_jump_insn (gen_builtin_longjmp_internal (pv)); + emit_barrier (); + DONE; +}) + +;; This is effectively a copy of indirect_jump, but constrained such +;; that register renaming cannot foil our cunning plan with $27. +(define_insn "builtin_longjmp_internal" + [(set (pc) + (unspec_volatile [(match_operand:DI 0 "register_operand" "c")] + UNSPECV_LONGJMP))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +(define_expand "builtin_setjmp_receiver" + [(unspec_volatile [(label_ref (match_operand 0 "" ""))] UNSPECV_SETJMPR)] + "TARGET_ABI_OSF" + "") + +(define_insn_and_split "*builtin_setjmp_receiver_1" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_SETJMPR)] + "TARGET_ABI_OSF" +{ + if (TARGET_EXPLICIT_RELOCS) + return "#"; + else + return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)"; +} + "&& TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 1) + (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1)) + (set (match_dup 1) + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))] +{ + if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0)) + emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]), + UNSPECV_SETJMPR_ER)); + operands[1] = pic_offset_table_rtx; + operands[2] = gen_rtx_REG (Pmode, 27); + operands[3] = GEN_INT (alpha_next_sequence_number++); +} + [(set_attr "length" "12") + (set_attr "type" "multi")]) + +(define_insn "*builtin_setjmp_receiver_er_sl_1" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_SETJMPR_ER)] + "TARGET_ABI_OSF && TARGET_EXPLICIT_RELOCS && TARGET_AS_CAN_SUBTRACT_LABELS" + "lda $27,$LSJ%=-%l0($27)\n$LSJ%=:") + +(define_insn "*builtin_setjmp_receiver_er_1" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_SETJMPR_ER)] + "TARGET_ABI_OSF && TARGET_EXPLICIT_RELOCS" + "br $27,$LSJ%=\n$LSJ%=:" + [(set_attr "type" "ibr")]) + +;; When flag_reorder_blocks_and_partition is in effect, compiler puts +;; exception landing pads in a cold section. To prevent inter-section offset +;; calculation, a jump to original landing pad is emitted in the place of the +;; original landing pad. Since landing pad is moved, RA-relative GP +;; calculation in the prologue of landing pad breaks. To solve this problem, +;; we use alternative GP load approach, as in the case of TARGET_LD_BUGGY_LDGP. + +(define_expand "exception_receiver" + [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)] + "TARGET_ABI_OSF" +{ + if (TARGET_LD_BUGGY_LDGP || flag_reorder_blocks_and_partition) + operands[0] = alpha_gp_save_rtx (); + else + operands[0] = const0_rtx; +}) + +(define_insn "*exception_receiver_2" + [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)] + "TARGET_ABI_OSF + && (TARGET_LD_BUGGY_LDGP || flag_reorder_blocks_and_partition)" + "ldq $29,%0" + [(set_attr "type" "ild")]) + +(define_insn_and_split "*exception_receiver_1" + [(unspec_volatile [(const_int 0)] UNSPECV_EHR)] + "TARGET_ABI_OSF" +{ + if (TARGET_EXPLICIT_RELOCS) + return "ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"; + else + return "ldgp $29,0($26)"; +} + "&& TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec:DI [(match_dup 0) (match_dup 2)] UNSPEC_LDGP2))] +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 26); + operands[2] = GEN_INT (alpha_next_sequence_number++); +} + [(set_attr "length" "8") + (set_attr "type" "multi")]) + +(define_expand "nonlocal_goto_receiver" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) + (set (reg:DI 27) (mem:DI (reg:DI 29))) + (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) + (use (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" + "") + +(define_insn "arg_home" + [(unspec [(const_int 0)] UNSPEC_ARG_HOME) + (use (reg:DI 1)) + (use (reg:DI 25)) + (use (reg:DI 16)) + (use (reg:DI 17)) + (use (reg:DI 18)) + (use (reg:DI 19)) + (use (reg:DI 20)) + (use (reg:DI 21)) + (use (reg:DI 48)) + (use (reg:DI 49)) + (use (reg:DI 50)) + (use (reg:DI 51)) + (use (reg:DI 52)) + (use (reg:DI 53)) + (clobber (mem:BLK (const_int 0))) + (clobber (reg:DI 24)) + (clobber (reg:DI 25)) + (clobber (reg:DI 0))] + "TARGET_ABI_OPEN_VMS" + "lda $0,OTS$HOME_ARGS\;ldq $0,8($0)\;jsr $0,OTS$HOME_ARGS" + [(set_attr "length" "16") + (set_attr "type" "multi")]) + +;; Load the CIW into r2 for calling __T3E_MISMATCH + +(define_expand "umk_mismatch_args" + [(set (match_dup 1) (mem:DI (plus:DI (reg:DI 15) (const_int -16)))) + (set (match_dup 2) (mem:DI (plus:DI (match_dup 1) (const_int -32)))) + (set (reg:DI 1) (match_operand:DI 0 "const_int_operand" "")) + (set (match_dup 3) (plus:DI (mult:DI (reg:DI 25) + (const_int 8)) + (match_dup 2))) + (set (reg:DI 2) (mem:DI (match_dup 3)))] + "TARGET_ABI_UNICOSMK" +{ + operands[1] = gen_reg_rtx (DImode); + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "arg_home_umk" + [(unspec [(const_int 0)] UNSPEC_ARG_HOME) + (use (reg:DI 1)) + (use (reg:DI 2)) + (use (reg:DI 16)) + (use (reg:DI 17)) + (use (reg:DI 18)) + (use (reg:DI 19)) + (use (reg:DI 20)) + (use (reg:DI 21)) + (use (reg:DI 48)) + (use (reg:DI 49)) + (use (reg:DI 50)) + (use (reg:DI 51)) + (use (reg:DI 52)) + (use (reg:DI 53)) + (clobber (mem:BLK (const_int 0))) + (parallel [ + (clobber (reg:DI 22)) + (clobber (reg:DI 23)) + (clobber (reg:DI 24)) + (clobber (reg:DI 0)) + (clobber (reg:DI 1)) + (clobber (reg:DI 2)) + (clobber (reg:DI 3)) + (clobber (reg:DI 4)) + (clobber (reg:DI 5)) + (clobber (reg:DI 6)) + (clobber (reg:DI 7)) + (clobber (reg:DI 8))])] + "TARGET_ABI_UNICOSMK" + "laum $4,__T3E_MISMATCH($31)\;sll $4,32,$4\;lalm $4,__T3E_MISMATCH($4)\;lal $4,__T3E_MISMATCH($4)\;jsr $3,($4)" + [(set_attr "length" "16") + (set_attr "type" "multi")]) + +;; Prefetch data. +;; +;; On EV4, these instructions are nops -- no load occurs. +;; +;; On EV5, these instructions act as a normal load, and thus can trap +;; if the address is invalid. The OS may (or may not) handle this in +;; the entMM fault handler and suppress the fault. If so, then this +;; has the effect of a read prefetch instruction. +;; +;; On EV6, these become official prefetch instructions. + +(define_insn "prefetch" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n"))] + "TARGET_FIXUP_EV5_PREFETCH || alpha_cpu == PROCESSOR_EV6" +{ + /* Interpret "no temporal locality" as this data should be evicted once + it is used. The "evict next" alternatives load the data into the cache + and leave the LRU eviction counter pointing to that block. */ + static const char * const alt[2][2] = { + { + "ldq $31,%a0", /* read, evict next */ + "ldl $31,%a0", /* read, evict last */ + }, + { + "ldt $f31,%a0", /* write, evict next */ + "lds $f31,%a0", /* write, evict last */ + } + }; + + bool write = INTVAL (operands[1]) != 0; + bool lru = INTVAL (operands[2]) != 0; + + return alt[write][lru]; +} + [(set_attr "type" "ild")]) + +;; Close the trap shadow of preceding instructions. This is generated +;; by alpha_reorg. + +(define_insn "trapb" + [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)] + "" + "trapb" + [(set_attr "type" "misc")]) + +;; No-op instructions used by machine-dependent reorg to preserve +;; alignment for instruction issue. +;; The Unicos/Mk assembler does not support these opcodes. + +(define_insn "nop" + [(const_int 0)] + "" + "bis $31,$31,$31" + [(set_attr "type" "ilog")]) + +(define_insn "fnop" + [(const_int 1)] + "TARGET_FP" + "cpys $f31,$f31,$f31" + [(set_attr "type" "fcpys")]) + +(define_insn "unop" + [(const_int 2)] + "" + "ldq_u $31,0($30)") + +;; On Unicos/Mk we use a macro for aligning code. + +(define_insn "realign" + [(unspec_volatile [(match_operand 0 "immediate_operand" "i")] + UNSPECV_REALIGN)] + "" +{ + if (TARGET_ABI_UNICOSMK) + return "gcc@code@align %0"; + else + return ".align %0 #realign"; +}) + +;; Instructions to be emitted from __builtins. + +(define_insn "builtin_cmpbge" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")] + UNSPEC_CMPBGE))] + "" + "cmpbge %r1,%2,%0" + ;; The EV6 data sheets list this as ILOG. OTOH, EV6 doesn't + ;; actually differentiate between ILOG and ICMP in the schedule. + [(set_attr "type" "icmp")]) + +(define_expand "builtin_extbl" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extxl_be; + else + gen = gen_extxl_le; + emit_insn ((*gen) (operands[0], operands[1], GEN_INT (8), operands[2])); + DONE; +}) + +(define_expand "builtin_extwl" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extxl_be; + else + gen = gen_extxl_le; + emit_insn ((*gen) (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "builtin_extll" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extxl_be; + else + gen = gen_extxl_le; + emit_insn ((*gen) (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "builtin_extql" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extxl_be; + else + gen = gen_extxl_le; + emit_insn ((*gen) (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_extwh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extwh_be; + else + gen = gen_extwh_le; + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_extlh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extlh_be; + else + gen = gen_extlh_le; + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_extqh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_extqh_be; + else + gen = gen_extqh_le; + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_insbl" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_insbl_be; + else + gen = gen_insbl_le; + operands[1] = gen_lowpart (QImode, operands[1]); + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_inswl" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_inswl_be; + else + gen = gen_inswl_le; + operands[1] = gen_lowpart (HImode, operands[1]); + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_insll" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_insll_be; + else + gen = gen_insll_le; + operands[1] = gen_lowpart (SImode, operands[1]); + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_insql" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx); + if (WORDS_BIG_ENDIAN) + gen = gen_insql_be; + else + gen = gen_insql_le; + emit_insn ((*gen) (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_inswh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "builtin_inslh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "builtin_insqh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_mskbl" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx mask; + if (WORDS_BIG_ENDIAN) + gen = gen_mskxl_be; + else + gen = gen_mskxl_le; + mask = GEN_INT (0xff); + emit_insn ((*gen) (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "builtin_mskwl" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx mask; + if (WORDS_BIG_ENDIAN) + gen = gen_mskxl_be; + else + gen = gen_mskxl_le; + mask = GEN_INT (0xffff); + emit_insn ((*gen) (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "builtin_mskll" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx mask; + if (WORDS_BIG_ENDIAN) + gen = gen_mskxl_be; + else + gen = gen_mskxl_le; + mask = immed_double_const (0xffffffff, 0, DImode); + emit_insn ((*gen) (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "builtin_mskql" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx mask; + if (WORDS_BIG_ENDIAN) + gen = gen_mskxl_be; + else + gen = gen_mskxl_le; + mask = constm1_rtx; + emit_insn ((*gen) (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "builtin_mskwh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "builtin_msklh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "builtin_mskqh" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "reg_or_8bit_operand" "")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_zap" + [(set (match_operand:DI 0 "register_operand" "") + (and:DI (unspec:DI + [(match_operand:DI 2 "reg_or_cint_operand" "")] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand" "")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx mask = alpha_expand_zap_mask (INTVAL (operands[2])); + + if (mask == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (mask == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_anddi3 (operands[0], operands[1], mask)); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_insn "*builtin_zap_1" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (and:DI (unspec:DI + [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))] + "" + "@ + # + # + bis $31,$31,%0 + zap %r1,%2,%0" + [(set_attr "type" "shift,shift,ilog,shift")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (and:DI (unspec:DI + [(match_operand:QI 2 "const_int_operand" "")] + UNSPEC_ZAP) + (match_operand:DI 1 "const_int_operand" "")))] + "" + [(const_int 0)] +{ + rtx mask = alpha_expand_zap_mask (INTVAL (operands[2])); + if (HOST_BITS_PER_WIDE_INT >= 64 || CONST_INT_P (mask)) + operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode); + else + { + HOST_WIDE_INT c_lo = INTVAL (operands[1]); + HOST_WIDE_INT c_hi = (c_lo < 0 ? -1 : 0); + operands[1] = immed_double_const (c_lo & CONST_DOUBLE_LOW (mask), + c_hi & CONST_DOUBLE_HIGH (mask), + DImode); + } + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (and:DI (unspec:DI + [(match_operand:QI 2 "const_int_operand" "")] + UNSPEC_ZAP) + (match_operand:DI 1 "register_operand" "")))] + "" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 2)))] +{ + operands[2] = alpha_expand_zap_mask (INTVAL (operands[2])); + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (operands[2] == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } +}) + +(define_expand "builtin_zapnot" + [(set (match_operand:DI 0 "register_operand" "") + (and:DI (unspec:DI + [(not:QI (match_operand:DI 2 "reg_or_cint_operand" ""))] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand" "")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx mask = alpha_expand_zap_mask (~ INTVAL (operands[2])); + + if (mask == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (mask == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_anddi3 (operands[0], operands[1], mask)); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_insn "*builtin_zapnot_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (unspec:DI + [(not:QI (match_operand:QI 2 "register_operand" "r"))] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "" + "zapnot %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "builtin_amask" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")] + UNSPEC_AMASK))] + "" + "amask %1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "builtin_implver" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_IMPLVER))] + "" + "implver %0" + [(set_attr "type" "ilog")]) + +(define_insn "builtin_rpcc" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))] + "" + "rpcc %0" + [(set_attr "type" "ilog")]) + +(define_expand "builtin_minub8" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minsb8" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minuw4" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minsw4" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxub8" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxsb8" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxuw4" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxsw4" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "reg_or_0_operand" "") + (match_operand:DI 2 "reg_or_0_operand" "")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_insn "builtin_perr" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rJ")] + UNSPEC_PERR))] + "TARGET_MAX" + "perr %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_pklb" + [(set (match_operand:DI 0 "register_operand" "") + (vec_concat:V8QI + (vec_concat:V4QI + (truncate:V2QI (match_operand:DI 1 "register_operand" "")) + (match_dup 2)) + (match_dup 3)))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V8QImode, operands[0]); + operands[1] = gen_lowpart (V2SImode, operands[1]); + operands[2] = CONST0_RTX (V2QImode); + operands[3] = CONST0_RTX (V4QImode); +}) + +(define_insn "*pklb" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (vec_concat:V8QI + (vec_concat:V4QI + (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r")) + (match_operand:V2QI 2 "const0_operand" "")) + (match_operand:V4QI 3 "const0_operand" "")))] + "TARGET_MAX" + "pklb %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_pkwb" + [(set (match_operand:DI 0 "register_operand" "") + (vec_concat:V8QI + (truncate:V4QI (match_operand:DI 1 "register_operand" "")) + (match_dup 2)))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V8QImode, operands[0]); + operands[1] = gen_lowpart (V4HImode, operands[1]); + operands[2] = CONST0_RTX (V4QImode); +}) + +(define_insn "*pkwb" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (vec_concat:V8QI + (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r")) + (match_operand:V4QI 2 "const0_operand" "")))] + "TARGET_MAX" + "pkwb %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_unpkbl" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:V2SI + (vec_select:V2QI (match_operand:DI 1 "register_operand" "") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V2SImode, operands[0]); + operands[1] = gen_lowpart (V8QImode, operands[1]); +}) + +(define_insn "*unpkbl" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (zero_extend:V2SI + (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_MAX" + "unpkbl %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_unpkbw" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:DI 1 "register_operand" "") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V4HImode, operands[0]); + operands[1] = gen_lowpart (V8QImode, operands[1]); +}) + +(define_insn "*unpkbw" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_MAX" + "unpkbw %r1,%0" + [(set_attr "type" "mvi")]) + +(include "sync.md") + +;; The call patterns are at the end of the file because their +;; wildcard operand0 interferes with nice recognition. + +(define_insn "*call_value_osf_1_er_noreturn" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,%1\t\t!samegp + ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),%1\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_value_osf_1_er" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,(%1),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%* + bsr $26,%1\t\t!samegp + ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. Consider the case of { bar(); while (1); }. +(define_peephole2 + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[1], Pmode) + && (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (match_dup 2))) + (use (reg:DI 29)) + (use (match_dup 1)) + (use (match_dup 4)) + (clobber (reg:DI 26))])] +{ + if (CONSTANT_P (operands[1])) + { + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx, + operands[1], operands[4])); + } + else + { + operands[3] = operands[1]; + operands[1] = const0_rtx; + operands[4] = const0_rtx; + } +}) + +(define_peephole2 + [(parallel [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[1], Pmode) + && ! (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (match_dup 2))) + (set (match_dup 6) + (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (match_dup 5)) + (clobber (reg:DI 26))]) + (set (match_dup 6) + (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))] +{ + if (CONSTANT_P (operands[1])) + { + operands[3] = gen_rtx_REG (Pmode, 27); + operands[5] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx, + operands[1], operands[5])); + } + else + { + operands[3] = operands[1]; + operands[1] = const0_rtx; + operands[5] = const0_rtx; + } + operands[4] = GEN_INT (alpha_next_sequence_number++); + operands[6] = pic_offset_table_rtx; +}) + +(define_insn "*call_value_osf_2_er_nogp" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "register_operand" "c")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" "")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%1),%3%J4" + [(set_attr "type" "jsr")]) + +(define_insn "*call_value_osf_2_er" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "register_operand" "c")) + (match_operand 2 "" ""))) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand" "")] + UNSPEC_LDGP1)) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" "")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%1),%3%J4\;ldah $29,0($26)\t\t!gpdisp!%5" + [(set_attr "type" "jsr") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_value_osf_1_noreturn" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,$%1..ng + jsr $26,%1" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn_and_split "call_value_osf_tlsgd" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand" "")] UNSPEC_TLSGD_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "HAVE_AS_TLS" + "#" + "&& reload_completed" + [(set (match_dup 3) + (unspec:DI [(match_dup 5) + (match_dup 1) + (match_dup 2)] UNSPEC_LITERAL)) + (parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (const_int 0))) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] +{ + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "call_value_osf_tlsldm" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand" "")] UNSPEC_TLSLDM_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "HAVE_AS_TLS" + "#" + "&& reload_completed" + [(set (match_dup 3) + (unspec:DI [(match_dup 5) + (match_dup 1) + (match_dup 2)] UNSPEC_LITERAL)) + (parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (const_int 0))) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] +{ + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +} + [(set_attr "type" "multi")]) + +(define_insn "*call_value_osf_1" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2 "" ""))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,($27),0\;ldgp $29,0($26) + bsr $26,$%1..ng + jsr $26,%1\;ldgp $29,0($26)" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +(define_insn "*sibcall_value_osf_1_er" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s")) + (match_operand 2 "" ""))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,%1\t\t!samegp + ldq $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +(define_insn "*sibcall_value_osf_1" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s")) + (match_operand 2 "" ""))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,$%1..ng + lda $27,%1\;jmp $31,($27),%1" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +(define_insn "*call_value_nt_1" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "r,R,s")) + (match_operand 2 "" ""))) + (clobber (reg:DI 26))] + "TARGET_ABI_WINDOWS_NT" + "@ + jsr $26,(%1) + bsr $26,%1 + jsr $26,%1" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,12")]) + +; GAS relies on the order and position of instructions output below in order +; to generate relocs for VMS link to potentially optimize the call. +; Please do not molest. +(define_insn "*call_value_vms_1" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "r,s")) + (match_operand 2 "" ""))) + (use (match_operand:DI 3 "nonmemory_operand" "r,n")) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + switch (which_alternative) + { + case 0: + return "mov %3,$27\;jsr $26,0\;ldq $27,0($29)"; + case 1: + operands [3] = alpha_use_linkage (operands [1], cfun->decl, 1, 0); + operands [4] = alpha_use_linkage (operands [1], cfun->decl, 0, 0); + return "ldq $26,%4\;ldq $27,%3\;jsr $26,%1\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "jsr") + (set_attr "length" "12,16")]) + +(define_insn "*call_value_umk" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "call_operand" "r")) + (match_operand 2 "" ""))) + (use (reg:DI 25)) + (clobber (reg:DI 26))] + "TARGET_ABI_UNICOSMK" + "jsr $26,(%1)" + [(set_attr "type" "jsr")]) diff --git a/gcc/config/alpha/alpha.opt b/gcc/config/alpha/alpha.opt new file mode 100644 index 000000000..fb7db3775 --- /dev/null +++ b/gcc/config/alpha/alpha.opt @@ -0,0 +1,134 @@ +; Options for the DEC Alpha port of the compiler +; +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +msoft-float +Target Report Mask(SOFT_FP) +Do not use hardware fp + +mfp-regs +Target Report Mask(FPREGS) +Use fp registers + +mgas +Target RejectNegative Mask(GAS) +Assume GAS + +malpha-as +Target RejectNegative InverseMask(GAS) +Do not assume GAS + +mieee-conformant +Target RejectNegative Mask(IEEE_CONFORMANT) +Request IEEE-conformant math library routines (OSF/1) + +mieee +Target Report RejectNegative Mask(IEEE) +Emit IEEE-conformant code, without inexact exceptions + +mieee-with-inexact +Target Report RejectNegative Mask(IEEE_WITH_INEXACT) + +mbuild-constants +Target Report Mask(BUILD_CONSTANTS) +Do not emit complex integer constants to read-only memory + +mfloat-vax +Target Report RejectNegative Mask(FLOAT_VAX) +Use VAX fp + +mfloat-ieee +Target Report RejectNegative InverseMask(FLOAT_VAX) +Do not use VAX fp + +mbwx +Target Report Mask(BWX) +Emit code for the byte/word ISA extension + +mmax +Target Report Mask(MAX) +Emit code for the motion video ISA extension + +mfix +Target Report Mask(FIX) +Emit code for the fp move and sqrt ISA extension + +mcix +Target Report Mask(CIX) +Emit code for the counting ISA extension + +mexplicit-relocs +Target Report Mask(EXPLICIT_RELOCS) +Emit code using explicit relocation directives + +msmall-data +Target Report RejectNegative Mask(SMALL_DATA) +Emit 16-bit relocations to the small data areas + +mlarge-data +Target Report RejectNegative InverseMask(SMALL_DATA) +Emit 32-bit relocations to the small data areas + +msmall-text +Target Report RejectNegative Mask(SMALL_TEXT) +Emit direct branches to local functions + +mlarge-text +Target Report RejectNegative InverseMask(SMALL_TEXT) +Emit indirect branches to local functions + +mtls-kernel +Target Report Mask(TLS_KERNEL) +Emit rdval instead of rduniq for thread pointer + +mlong-double-128 +Target Report RejectNegative Mask(LONG_DOUBLE_128) +Use 128-bit long double + +mlong-double-64 +Target Report RejectNegative InverseMask(LONG_DOUBLE_128) +Use 64-bit long double + +mcpu= +Target RejectNegative Joined Var(alpha_cpu_string) +Use features of and schedule given CPU + +mtune= +Target RejectNegative Joined Var(alpha_tune_string) +Schedule given CPU + +mfp-rounding-mode= +Target RejectNegative Joined Var(alpha_fprm_string) +Control the generated fp rounding mode + +mfp-trap-mode= +Target RejectNegative Joined Var(alpha_fptm_string) +Control the IEEE trap mode + +mtrap-precision= +Target RejectNegative Joined Var(alpha_tp_string) +Control the precision given to fp exceptions + +mmemory-latency= +Target RejectNegative Joined Var(alpha_mlat_string) +Tune expected memory latency + +mtls-size= +Target RejectNegative Joined UInteger Var(alpha_tls_size) Init(32) +Specify bit size of immediate TLS offsets diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md new file mode 100644 index 000000000..3e9a88776 --- /dev/null +++ b/gcc/config/alpha/constraints.md @@ -0,0 +1,121 @@ +;; Constraint definitions for DEC Alpha. +;; Copyright (C) 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;; Unused letters: +;;; ABCDEF V YZ +;;; de ghijklmnopq stu wxyz + +;; Integer register constraints. + +(define_register_constraint "a" "R24_REG" + "General register 24, input to division routine") + +(define_register_constraint "b" "R25_REG" + "General register 24, input to division routine") + +(define_register_constraint "c" "R27_REG" + "General register 27, function call address") + +(define_register_constraint "f" "FLOAT_REGS" + "Any floating-point register") + +(define_register_constraint "v" "R0_REG" + "General register 0, function value return address") + +;; Integer constant constraints. +(define_constraint "I" + "An unsigned 8 bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 255)"))) + +(define_constraint "J" + "The constant zero" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "K" + "Signed 16-bit integer constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -32768, 32767)"))) + +(define_constraint "L" + "A shifted signed 16-bit constant appropriate for LDAH" + (and (match_code "const_int") + (match_test "(ival & 0xffff) == 0 + && (ival >> 31 == -1 || ival >> 31 == 0)"))) + +(define_constraint "M" + "A valid operand of a ZAP insn" + (and (match_code "const_int") + (match_test "zap_mask (ival) != 0"))) + +(define_constraint "N" + "A complemented unsigned 8-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (~ival, 0, 255)"))) + +(define_constraint "O" + "A negated unsigned 8-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (-ival, 0, 255)"))) + +(define_constraint "P" + "The constant 1, 2 or 3" + (and (match_code "const_int") + (match_test "ival == 1 || ival == 2 || ival == 3"))) + +(define_constraint "H" + "A valid operand of a ZAP insn, when building with 32-bit HOST_WIDE_INT" + (and (match_code "const_double") + (match_test "mode == VOIDmode && zap_mask (hval) && zap_mask (lval)"))) + +;; Floating-point constant constraints. +(define_constraint "G" + "The floating point zero constant" + (and (match_code "const_double") + (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT + && op == CONST0_RTX (mode)"))) + +;; "Extra" constraints. +(define_constraint "Q" + "@internal A normal_memory_operand" + (match_operand 0 "normal_memory_operand")) + +(define_constraint "R" + "@internal A direct_call_operand" + (match_operand:DI 0 "direct_call_operand")) + +(define_constraint "S" + "An unsigned 6-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 63)"))) + +(define_constraint "T" + "@internal A high-part symbol" + (match_code "high")) + +(define_constraint "U" + "@internal A UNICOSMK symbol" + (and (match_test "TARGET_ABI_UNICOSMK") + (match_operand 0 "symbolic_operand"))) + +(define_constraint "W" + "A vector zero constant" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (mode)"))) diff --git a/gcc/config/alpha/crtfastmath.c b/gcc/config/alpha/crtfastmath.c new file mode 100644 index 000000000..677b9edfa --- /dev/null +++ b/gcc/config/alpha/crtfastmath.c @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2001, 2009 Free Software Foundation, Inc. + * Contributed by Richard Henderson (rth@redhat.com) + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * . + */ + +/* Assume OSF/1 compatible interfaces. */ + +extern void __ieee_set_fp_control (unsigned long int); + +#define IEEE_MAP_DMZ (1UL<<12) /* Map denorm inputs to zero */ +#define IEEE_MAP_UMZ (1UL<<13) /* Map underflowed outputs to zero */ + +static void __attribute__((constructor)) +set_fast_math (void) +{ + __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ); +} diff --git a/gcc/config/alpha/driver-alpha.c b/gcc/config/alpha/driver-alpha.c new file mode 100644 index 000000000..d787886d1 --- /dev/null +++ b/gcc/config/alpha/driver-alpha.c @@ -0,0 +1,100 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2009 Free Software Foundation, Inc. + Contributed by Arthur Loiret + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "cpu" or "tune" as argument depending on if -mcpu=native + or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-mcpu=ev6" on an Alpha 21264 for + -mcpu=native. If the routine can't detect a known processor, + the -mcpu or -mtune option is discarded. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + const char *cpu = NULL; + char buf[128]; + FILE *f; + + static const struct cpu_names { + const char *const name; + const char *const cpu; + } cpu_names[] = { + { "EV79", "ev67" }, + { "EV7", "ev67" }, + { "EV69", "ev67" }, + { "EV68CX", "ev67" }, + { "EV68CB", "ev67" }, + { "EV68AL", "ev67" }, + { "EV67", "ev67" }, + { "EV6", "ev6" }, + { "PCA57", "pca56" }, + { "PCA56", "pca56" }, + { "EV56", "ev56" }, + { "EV5", "ev5" }, + { "LCA45", "ev45" }, + { "EV45", "ev45" }, + { "LCA4", "ev4" }, + { "EV4", "ev4" }, +/* { "EV3", "ev3" }, */ + { 0, 0 } + }; + + int i; + + if (argc < 1) + return NULL; + + if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune")) + return NULL; + + f = fopen ("/proc/cpuinfo", "r"); + if (f == NULL) + return NULL; + + while (fgets (buf, sizeof (buf), f) != NULL) + if (strncmp (buf, "cpu model", sizeof ("cpu model") - 1) == 0) + { + for (i = 0; cpu_names [i].name; i++) + if (strstr (buf, cpu_names [i].name) != NULL) + { + cpu = cpu_names [i].cpu; + break; + } + break; + } + + fclose (f); + + if (cpu == NULL) + return NULL; + + return concat ("-m", argv[0], "=", cpu, NULL); +} diff --git a/gcc/config/alpha/elf.h b/gcc/config/alpha/elf.h new file mode 100644 index 000000000..0293f26ee --- /dev/null +++ b/gcc/config/alpha/elf.h @@ -0,0 +1,452 @@ +/* Definitions of target machine for GNU compiler, for DEC Alpha w/ELF. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008, + 2009, 2010 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef OBJECT_FORMAT_COFF +#undef EXTENDED_COFF +#define OBJECT_FORMAT_ELF + +/* ??? Move all SDB stuff from alpha.h to osf.h. */ +#undef SDB_DEBUGGING_INFO + +#define DBX_DEBUGGING_INFO 1 +#define DWARF2_DEBUGGING_INFO 1 + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +#undef ASM_FINAL_SPEC + +/* alpha/ doesn't use elfos.h for some reason. */ +#define TARGET_OBJFMT_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__ELF__"); \ + } \ + while (0) + +#undef CC1_SPEC +#define CC1_SPEC "%{G*}" + +#undef ASM_SPEC +#define ASM_SPEC "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug}" + +#undef IDENT_ASM_OP +#define IDENT_ASM_OP "\t.ident\t" + +/* Output #ident as a .ident. */ +#undef ASM_OUTPUT_IDENT +#define ASM_OUTPUT_IDENT(FILE, NAME) \ + fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME); + +/* This is how to allocate empty space in some section. The .zero + pseudo-op is used for this on most svr4 assemblers. */ + +#undef SKIP_ASM_OP +#define SKIP_ASM_OP "\t.zero\t" + +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(FILE, SIZE) \ + fprintf (FILE, "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n", SKIP_ASM_OP, (SIZE)) + +/* Output the label which precedes a jumptable. Note that for all svr4 + systems where we actually generate jumptables (which is to say every + svr4 target except i386, where we use casesi instead) we put the jump- + tables into the .rodata section and since other stuff could have been + put into the .rodata section prior to any given jumptable, we have to + make sure that the location counter for the .rodata section gets pro- + perly re-aligned prior to the actual beginning of the jump table. */ + +#undef ALIGN_ASM_OP +#define ALIGN_ASM_OP "\t.align\t" + +#ifndef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \ + ASM_OUTPUT_ALIGN ((FILE), 2); +#endif + +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE) \ + do { \ + ASM_OUTPUT_BEFORE_CASE_LABEL (FILE, PREFIX, NUM, JUMPTABLE) \ + (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); \ + } while (0) + +/* The standard SVR4 assembler seems to require that certain builtin + library routines (e.g. .udiv) be explicitly declared as .globl + in each assembly file where they are referenced. */ + +#undef ASM_OUTPUT_EXTERNAL_LIBCALL +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ + (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0)) + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#undef COMMON_ASM_OP +#define COMMON_ASM_OP "\t.comm\t" + +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ +do { \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", (SIZE), (ALIGN) / BITS_PER_UNIT); \ +} while (0) + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ +do { \ + if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value) \ + switch_to_section (sbss_section); \ + else \ + switch_to_section (bss_section); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1); \ +} while (0) + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. */ + +#undef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN); \ +} while (0) + +/* The biggest alignment supported by ELF in bits. 32-bit ELF + supports section alignment up to (0x80000000 * 8), while + 64-bit ELF supports (0x8000000000000000 * 8). If this macro + is not defined, the default is the largest alignment supported + by 32-bit ELF and representable on a 32-bit host. Use this + macro to limit the alignment which can be specified using + the `__attribute__ ((aligned (N)))' construct. + + This value is really 2^63. Since gcc figures the alignment in bits, + we could only potentially get to 2^60 on suitable hosts. Due to other + considerations in varasm, we must restrict this to what fits in an int. */ + +#undef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT (((unsigned int) 1 << 28) * 8) + +/* This is the pseudo-op used to generate a contiguous sequence of byte + values from a double-quoted string WITHOUT HAVING A TERMINATING NUL + AUTOMATICALLY APPENDED. This is the same for most svr4 assemblers. */ + +#undef ASCII_DATA_ASM_OP +#define ASCII_DATA_ASM_OP "\t.ascii\t" + +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" +#undef BSS_SECTION_ASM_OP +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#undef SBSS_SECTION_ASM_OP +#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\"aw\"" +#undef SDATA_SECTION_ASM_OP +#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\"aw\"" + +/* On svr4, we *do* have support for the .init and .fini sections, and we + can put stuff in there to be executed before and after `main'. We let + crtstuff.c and other files know this by defining the following symbols. + The definitions say how to change sections to the .init and .fini + sections. This is the same for all known svr4 assemblers. */ + +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP "\t.section\t.init" +#undef FINI_SECTION_ASM_OP +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +#ifdef HAVE_GAS_SUBSECTION_ORDERING + +#define ASM_SECTION_START_OP "\t.subsection\t-1" + +/* Output assembly directive to move to the beginning of current section. */ +#define ASM_OUTPUT_SECTION_START(FILE) \ + fprintf ((FILE), "%s\n", ASM_SECTION_START_OP) + +#endif + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section +#define TARGET_ASM_SELECT_SECTION default_elf_select_section + +#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) + +/* Define the strings used for the special svr4 .type and .size directives. + These strings generally do not vary from one system running svr4 to + another, but if a given system (e.g. m88k running svr) needs to use + different pseudo-op names for these, they may be overridden in the + file which includes this one. */ + +#undef TYPE_ASM_OP +#define TYPE_ASM_OP "\t.type\t" +#undef SIZE_ASM_OP +#define SIZE_ASM_OP "\t.size\t" + +/* This is how we tell the assembler that a symbol is weak. */ + +#undef ASM_WEAKEN_LABEL +#define ASM_WEAKEN_LABEL(FILE, NAME) \ + do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ + fputc ('\n', FILE); } while (0) + +/* This is how we tell the assembler that two symbols have the same value. */ + +#undef ASM_OUTPUT_DEF +#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME) \ + do { \ + assemble_name(FILE, ALIAS); \ + fputs(" = ", FILE); \ + assemble_name(FILE, NAME); \ + fputc('\n', FILE); \ + } while (0) + +#undef ASM_OUTPUT_DEF_FROM_DECLS +#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \ + do { \ + const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + const char *name = IDENTIFIER_POINTER (TARGET); \ + if (TREE_CODE (DECL) == FUNCTION_DECL) \ + { \ + fputc ('$', FILE); \ + assemble_name (FILE, alias); \ + fputs ("..ng = $", FILE); \ + assemble_name (FILE, name); \ + fputs ("..ng\n", FILE); \ + } \ + assemble_name(FILE, alias); \ + fputs(" = ", FILE); \ + assemble_name(FILE, name); \ + fputc('\n', FILE); \ + } while (0) + +/* The following macro defines the format used to output the second + operand of the .type assembler directive. Different svr4 assemblers + expect various different forms for this operand. The one given here + is just a default. You may need to override it in your machine- + specific tm.h file (depending upon the particulars of your assembler). */ + +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "@%s" + +/* Write the extra assembler code needed to declare a function's result. + Most svr4 assemblers don't require any special declaration of the + result value, but there are exceptions. */ + +#ifndef ASM_DECLARE_RESULT +#define ASM_DECLARE_RESULT(FILE, RESULT) +#endif + +/* These macros generate the special .type and .size directives which + are used to set the corresponding fields of the linker symbol table + entries in an ELF object file under SVR4. These macros also output + the starting labels for the relevant functions/objects. */ + +/* Write the extra assembler code needed to declare an object properly. */ + +#ifdef HAVE_GAS_GNU_UNIQUE_OBJECT +#define USE_GNU_UNIQUE_OBJECT 1 +#else +#define USE_GNU_UNIQUE_OBJECT 0 +#endif + +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + do { \ + HOST_WIDE_INT size; \ + \ + /* For template static data member instantiations or \ + inline fn local statics and their guard variables, use \ + gnu_unique_object so that they will be combined even under \ + RTLD_LOCAL. Don't use gnu_unique_object for typeinfo, \ + vtables and other read-only artificial decls. */ \ + if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (DECL) \ + && (!DECL_ARTIFICIAL (DECL) || !TREE_READONLY (DECL))) \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "gnu_unique_object"); \ + else \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive \ + && (DECL) && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ + } \ + \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } while (0) + +/* Output the size directive for a decl in rest_of_decl_compilation + in the case where we did not do so before the initializer. + Once we find the error_mark_node, we know that the value of + size_directive_output was set + by ASM_DECLARE_OBJECT_NAME when it was run for the same decl. */ + +#undef ASM_FINISH_DECLARE_OBJECT +#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END) \ + do { \ + const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + HOST_WIDE_INT size; \ + if (!flag_inhibit_size_directive \ + && DECL_SIZE (DECL) \ + && ! AT_END && TOP_LEVEL \ + && DECL_INITIAL (DECL) == error_mark_node \ + && !size_directive_output \ + && (size = int_size_in_bytes (TREE_TYPE (DECL))) > 0) \ + { \ + size_directive_output = 1; \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ + } \ + } while (0) + +/* A table of bytes codes used by the ASM_OUTPUT_ASCII and + ASM_OUTPUT_LIMITED_STRING macros. Each byte in the table + corresponds to a particular byte value [0..255]. For any + given byte value, if the value in the corresponding table + position is zero, the given character can be output directly. + If the table value is 1, the byte must be output as a \ooo + octal escape. If the tables value is anything else, then the + byte value should be output as a \ followed by the value + in the table. Note that we can use standard UN*X escape + sequences for many control characters, but we don't use + \a to represent BEL because some svr4 assemblers (e.g. on + the i386) don't know about that. Also, we don't use \v + since some versions of gas, such as 2.2 did not accept it. */ + +#undef ESCAPES +#define ESCAPES \ +"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" + +/* Some svr4 assemblers have a limit on the number of characters which + can appear in the operand of a .string directive. If your assembler + has such a limitation, you should define STRING_LIMIT to reflect that + limit. Note that at least some svr4 assemblers have a limit on the + actual number of bytes in the double-quoted string, and that they + count each character in an escape sequence as one byte. Thus, an + escape sequence like \377 would count as four bytes. + + If your target assembler doesn't support the .string directive, you + should define this to zero. */ + +#undef STRING_LIMIT +#define STRING_LIMIT ((unsigned) 256) +#undef STRING_ASM_OP +#define STRING_ASM_OP "\t.string\t" + +/* GAS is the only Alpha/ELF assembler. */ +#undef TARGET_GAS +#define TARGET_GAS (1) + +/* Provide a STARTFILE_SPEC appropriate for ELF. Here we add the + (even more) magical crtbegin.o file which provides part of the + support for getting C++ file-scope static object constructed + before entering `main'. */ + +#undef STARTFILE_SPEC +#ifdef HAVE_LD_PIE +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\ + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#else +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\ + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#endif + +/* Provide a ENDFILE_SPEC appropriate for ELF. Here we tack on the + magical crtend.o file which provides part of the support for + getting C++ file-scope static object constructed before entering + `main', followed by a normal ELF "finalizer" file, `crtn.o'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + Since application size is already constrained to <2GB by the form of + the ldgp relocation, we can use a 32-bit pc-relative relocation to + static data. Dynamic data is accessed indirectly to allow for read + only EH sections. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) + +/* If defined, a C statement to be executed just prior to the output of + assembler code for INSN. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + (alpha_this_literal_sequence_number = 0, \ + alpha_this_gpdisp_sequence_number = 0) +extern int alpha_this_literal_sequence_number; +extern int alpha_this_gpdisp_sequence_number; + +/* Since the bits of the _init and _fini function is spread across + many object files, each potentially with its own GP, we must assume + we need to load our GP. Further, the .init/.fini section can + easily be more than 4MB away from the function to call so we can't + use bsr. */ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ +" br $29,1f\n" \ +"1: ldgp $29,0($29)\n" \ +" unop\n" \ +" jsr $26," USER_LABEL_PREFIX #FUNC "\n" \ +" .align 3\n" \ +" .previous"); + +/* If we have the capability create headers for efficient EH lookup. + As of Jan 2002, only glibc 2.2.4 can actually make use of this, but + I imagine that other systems will catch up. In the meantime, it + doesn't harm to make sure that the data exists to be used later. */ +#if defined(HAVE_LD_EH_FRAME_HDR) +#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " +#endif + +/* A C statement (sans semicolon) to output to the stdio stream STREAM + any text necessary for declaring the name of an external symbol + named NAME which is referenced in this compilation but not defined. + It is needed to properly support non-default visibility. */ + +#ifndef ASM_OUTPUT_EXTERNAL +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + default_elf_asm_output_external (FILE, DECL, NAME) +#endif diff --git a/gcc/config/alpha/elf.opt b/gcc/config/alpha/elf.opt new file mode 100644 index 000000000..edafd5d37 --- /dev/null +++ b/gcc/config/alpha/elf.opt @@ -0,0 +1,30 @@ +; Alpha ELF options. + +; Copyright (C) 2011 +; Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +relax +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/config/alpha/ev4.md b/gcc/config/alpha/ev4.md new file mode 100644 index 000000000..5b1899fc7 --- /dev/null +++ b/gcc/config/alpha/ev4.md @@ -0,0 +1,161 @@ +;; Scheduling description for Alpha EV4. +;; Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; On EV4 there are two classes of resources to consider: resources needed +; to issue, and resources needed to execute. IBUS[01] are in the first +; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second. +; (There are a few other register-like resources, but ...) + +(define_automaton "ev4_0,ev4_1,ev4_2") +(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0") +(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1") +(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2") +(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1") + +; Assume type "multi" single issues. +(define_insn_reservation "ev4_multi" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "multi")) + "ev4_ib0+ev4_ib1") + +; Loads from L0 completes in three cycles. adjust_cost still factors +; in user-specified memory latency, so return 1 here. +(define_insn_reservation "ev4_ld" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "ild,fld,ldsym,ld_l")) + "ev4_ib01+ev4_abox") + +; Stores can issue before the data (but not address) is ready. +(define_insn_reservation "ev4_ist" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "ist")) + "ev4_ib1+ev4_abox") + +; ??? Separate from ev4_ist because store_data_bypass_p can't handle +; the patterns with multiple sets, like store-conditional. +(define_insn_reservation "ev4_ist_c" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "st_c")) + "ev4_ib1+ev4_abox") + +(define_insn_reservation "ev4_fst" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "fst")) + "ev4_ib0+ev4_abox") + +; Memory barrier blocks ABOX insns until it's acknowledged by the external +; memory bus. This may be *quite* slow. Setting this to 4 cycles gets +; about all the benefit without making the DFA too large. +(define_insn_reservation "ev4_mb" 4 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "mb")) + "ev4_ib1+ev4_abox,ev4_abox*3") + +; Branches have no delay cost, but do tie up the unit for two cycles. +(define_insn_reservation "ev4_ibr" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "ibr,jsr")) + "ev4_ib1+ev4_bbox,ev4_bbox") + +(define_insn_reservation "ev4_callpal" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "callpal")) + "ev4_ib1+ev4_bbox,ev4_bbox") + +(define_insn_reservation "ev4_fbr" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "fbr")) + "ev4_ib0+ev4_bbox,ev4_bbox") + +; Arithmetic insns are normally have their results available after +; two cycles. There are a number of exceptions. + +(define_insn_reservation "ev4_iaddlog" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "iadd,ilog")) + "ev4_ib0+ev4_ebox") + +(define_bypass 1 + "ev4_iaddlog" + "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi") + +(define_insn_reservation "ev4_shiftcm" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "shift,icmov")) + "ev4_ib0+ev4_ebox") + +(define_insn_reservation "ev4_icmp" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "icmp")) + "ev4_ib0+ev4_ebox") + +(define_bypass 1 "ev4_icmp" "ev4_ibr") + +(define_bypass 0 + "ev4_iaddlog,ev4_shiftcm,ev4_icmp" + "ev4_ist" + "store_data_bypass_p") + +; Multiplies use a non-pipelined imul unit. Also, "no [ebox] insn can +; be issued exactly three cycles before an integer multiply completes". + +(define_insn_reservation "ev4_imulsi" 21 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "si"))) + "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox") + +(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p") + +(define_insn_reservation "ev4_imuldi" 23 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "!si"))) + "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox") + +(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p") + +; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in. +(define_insn_reservation "ev4_fpop" 6 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "fadd,fmul,fcpys,fcmov")) + "ev4_ib1+ev4_fbox") + +(define_bypass 4 "ev4_fpop" "ev4_fpop") + +; The floating point divider is not pipelined. Also, "no FPOP insn can be +; issued exactly five or exactly six cycles before an fdiv insn completes". + +(define_insn_reservation "ev4_fdivsf" 34 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox") + +(define_insn_reservation "ev4_fdivdf" 63 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox") + +; Traps don't consume or produce data. +(define_insn_reservation "ev4_misc" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "misc")) + "ev4_ib1") diff --git a/gcc/config/alpha/ev5.md b/gcc/config/alpha/ev5.md new file mode 100644 index 000000000..f22d391a4 --- /dev/null +++ b/gcc/config/alpha/ev5.md @@ -0,0 +1,194 @@ +;; Scheduling description for Alpha EV5. +;; Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; EV5 has two asymmetric integer units, E0 and E1, plus separate +;; FP add and multiply units. + +(define_automaton "ev5_0,ev5_1") +(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0") +(define_reservation "ev5_e01" "ev5_e0|ev5_e1") +(define_reservation "ev5_fam" "ev5_fa|ev5_fm") +(define_cpu_unit "ev5_imul" "ev5_0") +(define_cpu_unit "ev5_fdiv" "ev5_1") + +; Assume type "multi" single issues. +(define_insn_reservation "ev5_multi" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "multi")) + "ev5_e0+ev5_e1+ev5_fa+ev5_fm") + +; Stores can only issue to E0, and may not issue with loads. +; Model this with some fake units. + +(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0") +(define_reservation "ev5_ld" "ev5_l0|ev5_l1") +(exclusion_set "ev5_l0,ev5_l1" "ev5_st") + +(define_insn_reservation "ev5_st" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ist,fst,st_c,mb")) + "ev5_e0+ev5_st") + +; Loads from L0 complete in two cycles. adjust_cost still factors +; in user-specified memory latency, so return 1 here. +(define_insn_reservation "ev5_ld" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ild,fld,ldsym")) + "ev5_e01+ev5_ld") + +(define_insn_reservation "ev5_ld_l" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ld_l")) + "ev5_e0+ev5_ld") + +; Integer branches slot only to E1. +(define_insn_reservation "ev5_ibr" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ibr")) + "ev5_e1") + +(define_insn_reservation "ev5_callpal" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "callpal")) + "ev5_e1") + +(define_insn_reservation "ev5_jsr" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "jsr")) + "ev5_e1") + +(define_insn_reservation "ev5_shift" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "shift")) + "ev5_e0") + +(define_insn_reservation "ev5_mvi" 2 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "mvi")) + "ev5_e0") + +(define_insn_reservation "ev5_cmov" 2 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "icmov")) + "ev5_e01") + +(define_insn_reservation "ev5_iadd" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "iadd")) + "ev5_e01") + +(define_insn_reservation "ev5_ilogcmp" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ilog,icmp")) + "ev5_e01") + +; Conditional move and branch can issue the same cycle as the test. +(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p") + +; Multiplies use a non-pipelined imul unit. Also, "no insn can be issued +; to E0 exactly two cycles before an integer multiply completes". + +(define_insn_reservation "ev5_imull" 8 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "si"))) + "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0") + +(define_insn_reservation "ev5_imulq" 12 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "di"))) + "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0") + +(define_insn_reservation "ev5_imulh" 14 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "udi"))) + "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0") + +; The multiplier is unable to receive data from Ebox bypass paths. The +; instruction issues at the expected time, but its latency is increased +; by the time it takes for the input data to become available to the +; multiplier. For example, an IMULL instruction issued one cycle later +; than an ADDL instruction, which produced one of its operands, has a +; latency of 10 (8 + 2). If the IMULL instruction is issued two cycles +; later than the ADDL instruction, the latency is 9 (8 + 1). +; +; Model this instead with increased latency on the input instruction. + +(define_bypass 3 + "ev5_ld,ev5_ld_l,ev5_shift,ev5_mvi,ev5_cmov,ev5_iadd,ev5_ilogcmp" + "ev5_imull,ev5_imulq,ev5_imulh") + +(define_bypass 9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh") +(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh") +(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh") + +; Similarly for the FPU we have two asymmetric units. + +(define_insn_reservation "ev5_fadd" 4 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fadd,fcmov")) + "ev5_fa") + +(define_insn_reservation "ev5_fbr" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fbr")) + "ev5_fa") + +(define_insn_reservation "ev5_fcpys" 4 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fcpys")) + "ev5_fam") + +(define_insn_reservation "ev5_fmul" 4 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fmul")) + "ev5_fm") + +; The floating point divider is not pipelined. Also, "no insn can be issued +; to FA exactly five before an fdiv insn completes". +; +; ??? Do not model this late reservation due to the enormously increased +; size of the resulting DFA. +; +; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces +; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate +; automata produces incorrect results for insns that can choose one or +; the other, i.e. ev5_fcpys. + +(define_insn_reservation "ev5_fdivsf" 15 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + ; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4" + "ev5_fa+ev5_fdiv,ev5_fdiv*14") + +(define_insn_reservation "ev5_fdivdf" 22 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + ; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4" + "ev5_fa+ev5_fdiv,ev5_fdiv*21") + +; Traps don't consume or produce data; rpcc is latency 2 if we ever add it. +(define_insn_reservation "ev5_misc" 2 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "misc")) + "ev5_e0") diff --git a/gcc/config/alpha/ev6.md b/gcc/config/alpha/ev6.md new file mode 100644 index 000000000..adfe504bf --- /dev/null +++ b/gcc/config/alpha/ev6.md @@ -0,0 +1,177 @@ +;; Scheduling description for Alpha EV6. +;; Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; EV6 can issue 4 insns per clock. It's out-of-order, so this isn't +; expected to help over-much, but a precise description can be important +; for software pipelining. +; +; EV6 has two symmetric pairs ("clusters") of two asymmetric integer +; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1. +; +; ??? The clusters have independent register files that are re-synced +; every cycle. Thus there is one additional cycle of latency between +; insns issued on different clusters. Possibly model that by duplicating +; all EBOX insn_reservations that can issue to either cluster, increasing +; all latencies by one, and adding bypasses within the cluster. +; +; ??? In addition, instruction order affects cluster issue. + +(define_automaton "ev6_0,ev6_1") +(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0") +(define_reservation "ev6_u" "ev6_u0|ev6_u1") +(define_reservation "ev6_l" "ev6_l0|ev6_l1") +(define_reservation "ev6_ebox" "ev6_u|ev6_l") + +(define_cpu_unit "ev6_fa" "ev6_1") +(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0") +(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1") + +; Assume type "multi" single issues. +(define_insn_reservation "ev6_multi" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "multi")) + "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1") + +; Integer loads take at least 3 clocks, and only issue to lower units. +; adjust_cost still factors in user-specified memory latency, so return 1 here. +(define_insn_reservation "ev6_ild" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ild,ldsym,ld_l")) + "ev6_l") + +(define_insn_reservation "ev6_ist" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ist,st_c")) + "ev6_l") + +(define_insn_reservation "ev6_mb" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "mb")) + "ev6_l1") + +; FP loads take at least 4 clocks. adjust_cost still factors +; in user-specified memory latency, so return 2 here. +(define_insn_reservation "ev6_fld" 2 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fld")) + "ev6_l") + +; The FPU communicates with memory and the integer register file +; via two fp store units. We need a slot in the fst immediately, and +; a slot in LOW after the operand data is ready. At which point the +; data may be moved either to the store queue or the integer register +; file and the insn retired. + +(define_insn_reservation "ev6_fst" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fst")) + "ev6_fst,nothing,ev6_l") + +; Arithmetic goes anywhere. +(define_insn_reservation "ev6_arith" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "iadd,ilog,icmp")) + "ev6_ebox") + +; Motion video insns also issue only to U0, and take three ticks. +(define_insn_reservation "ev6_mvi" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "mvi")) + "ev6_u0") + +; Shifts issue to upper units. +(define_insn_reservation "ev6_shift" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "shift")) + "ev6_u") + +; Multiplies issue only to U1, and all take 7 ticks. +(define_insn_reservation "ev6_imul" 7 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "imul")) + "ev6_u1") + +; Conditional moves decompose into two independent primitives, each taking +; one cycle. Since ev6 is out-of-order, we can't see anything but two cycles. +(define_insn_reservation "ev6_icmov" 2 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "icmov")) + "ev6_ebox,ev6_ebox") + +; Integer branches issue to upper units +(define_insn_reservation "ev6_ibr" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ibr,callpal")) + "ev6_u") + +; Calls only issue to L0. +(define_insn_reservation "ev6_jsr" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "jsr")) + "ev6_l0") + +; Ftoi/itof only issue to lower pipes. +(define_insn_reservation "ev6_itof" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "itof")) + "ev6_l") + +(define_insn_reservation "ev6_ftoi" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ftoi")) + "ev6_fst,nothing,ev6_l") + +(define_insn_reservation "ev6_fmul" 4 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fmul")) + "ev6_fm") + +(define_insn_reservation "ev6_fadd" 4 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fadd,fcpys,fbr")) + "ev6_fa") + +(define_insn_reservation "ev6_fcmov" 8 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fcmov")) + "ev6_fa,nothing*3,ev6_fa") + +(define_insn_reservation "ev6_fdivsf" 12 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + "ev6_fa*9") + +(define_insn_reservation "ev6_fdivdf" 15 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + "ev6_fa*12") + +(define_insn_reservation "ev6_sqrtsf" 18 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "si"))) + "ev6_fa*15") + +(define_insn_reservation "ev6_sqrtdf" 33 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "di"))) + "ev6_fa*30") diff --git a/gcc/config/alpha/freebsd.h b/gcc/config/alpha/freebsd.h new file mode 100644 index 000000000..fbefde1fd --- /dev/null +++ b/gcc/config/alpha/freebsd.h @@ -0,0 +1,81 @@ +/* Definitions for DEC Alpha/AXP running FreeBSD using the ELF format + Copyright (C) 2000, 2002, 2004, 2005, 2007, 2010 + Free Software Foundation, Inc. + Contributed by David E. O'Brien and BSDi. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ + { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER } + +/* Provide a CPP_SPEC appropriate for FreeBSD/alpha -- dealing with + the GCC option `-posix'. */ + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" + +#define LINK_SPEC "%{G*} %{relax:-relax} \ + %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(fbsd_dynamic_linker) } \ + %{static:-Bstatic}} \ + %{symbolic:-Bsymbolic}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with . GCC defaults come from c-decl.c, + c-common.c, and config//.h. */ + +/* alpha.h gets this wrong for FreeBSD. We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (FreeBSD/alpha ELF)"); + +#define TARGET_ELF 1 + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS) + +#undef HAS_INIT_SECTION + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* This is the char to use for continuation (in case we need to turn + continuation back on). */ + +#undef DBX_CONTIN_CHAR +#define DBX_CONTIN_CHAR '?' + +/* Don't default to pcc-struct-return, we want to retain compatibility with + older FreeBSD releases AND pcc-struct-return may not be reentrant. */ + +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 0 diff --git a/gcc/config/alpha/gnu.h b/gcc/config/alpha/gnu.h new file mode 100644 index 000000000..ca7198039 --- /dev/null +++ b/gcc/config/alpha/gnu.h @@ -0,0 +1,49 @@ +/* Configuration for an Alpha running GNU with ELF as the target machine. + +Copyright (C) 2002, 2003, 2004, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (Alpha GNU)"); + +#undef TARGET_OS_CPP_BUILTINS /* config.gcc includes alpha/linux.h. */ +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + builtin_define ("_LONGLONG"); \ + } while (0) + +#undef ELF_DYNAMIC_LINKER +#define ELF_DYNAMIC_LINKER "/lib/ld.so" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: \ + %{!static: \ + %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} %{!p:crt1.o%s}}} \ + %{static:crt0.o%s}} \ + crti.o%s \ + %{!static:%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}}" + +/* FIXME: Is a Hurd-specific fallback mechanism necessary? */ +#undef MD_UNWIND_SUPPORT diff --git a/gcc/config/alpha/host-osf.c b/gcc/config/alpha/host-osf.c new file mode 100644 index 000000000..0a554b3ec --- /dev/null +++ b/gcc/config/alpha/host-osf.c @@ -0,0 +1,147 @@ +/* Tru64 UNIX host-specific hook definitions. + Copyright (C) 2011 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include +/* Inhibit inclusion of , unnecessary and errors out due to + use of poisoned bcmp, bcopy. */ +#define _SYS_MOUNT_H_ +#include +#include "hosthooks.h" +#include "hosthooks-def.h" + + +#undef HOST_HOOKS_GT_PCH_GET_ADDRESS +#define HOST_HOOKS_GT_PCH_GET_ADDRESS osf_gt_pch_get_address +#undef HOST_HOOKS_GT_PCH_USE_ADDRESS +#define HOST_HOOKS_GT_PCH_USE_ADDRESS osf_gt_pch_use_address + +/* The mmap ADDR parameter may be ignored without MAP_FIXED set. Before we + give up, check existing mappings with ioctl(PIOCMAP) to see if the space + is really free. */ + +static void * +mmap_fixed (void *addr, size_t len, int prot, int flags, int fd, off_t off) +{ + void *base; + + base = mmap ((caddr_t) addr, len, prot, flags, fd, off); + + if (base != addr) + { + /* PID_MAX is SHRT_MAX on Tru64 UNIX V4.0, but INT_MAX on V5.1. + Allow for both. "/proc/" + INT_MAX + '\0'. */ + char pname[6+10+1]; + int procfd, nmap; + prmap_t *pmap; + int i, overlap = 0; + + if (base != (void *) MAP_FAILED) + munmap ((caddr_t) base, len); + + /* Check if there's any mapping overlapping [addr, addr+len). */ + + snprintf (pname, sizeof (pname), "/proc/%d", getpid ()); + procfd = open (pname, O_RDONLY); + if (procfd == -1) + return ((void *) MAP_FAILED); + if (ioctl (procfd, PIOCNMAP, &nmap) == -1) + return ((void *) MAP_FAILED); + pmap = (prmap_t *) xmalloc (sizeof (*pmap) * (nmap+1)); + if (ioctl (procfd, PIOCMAP, pmap) == -1) + return ((void *) MAP_FAILED); + + /* It seems like pmap[] is sorted by address, but can we rely on + that? */ + for (i = 0; i < nmap; i++) + { + uintptr_t map_start = (uintptr_t) pmap[i].pr_vaddr; + uintptr_t map_end = map_start + pmap[i].pr_size; + + if ((uintptr_t) addr < map_end + && (uintptr_t) addr+len > map_start) + { + overlap = 1; + break; + } + } + free (pmap); + close (procfd); + + if (!overlap) + base = mmap ((caddr_t) addr, len, prot, flags | MAP_FIXED, fd, off); + else + base = mmap ((caddr_t) addr, len, prot, flags, fd, off); + } + + return base; +} + +/* For various ports, try to guess a fixed spot in the vm space that's + probably free. Take the middle between start of text segment and + dynamic loader space. See and Tru64 UNIX + Assembly Language Programmer's Guide, p.6-18, Figure 6-3: Default Layout + of Memory (User Program View). */ +#define TRY_EMPTY_VM_SPACE 0x20050000000 + +/* Determine a location where we might be able to reliably allocate + SIZE bytes. FD is the PCH file, though we should return with the + file unmapped. */ + +static void * +osf_gt_pch_get_address (size_t size, int fd) +{ + void *addr; + + addr = mmap_fixed ((caddr_t) TRY_EMPTY_VM_SPACE, size, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + /* If we failed the map, that means there's *no* free space. */ + if (addr == (void *) MAP_FAILED) + return NULL; + /* Unmap the area before returning. */ + munmap ((caddr_t) addr, size); + + return addr; +} + +/* Map SIZE bytes of FD+OFFSET at BASE. Return 1 if we succeeded at + mapping the data at BASE, -1 if we couldn't. */ + +static int +osf_gt_pch_use_address (void *base, size_t size, int fd, size_t offset) +{ + void *addr; + + /* We're called with size == 0 if we're not planning to load a PCH + file at all. This allows the hook to free any static space that + we might have allocated at link time. */ + if (size == 0) + return -1; + + addr = mmap_fixed ((caddr_t) base, size, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset); + + return addr == base ? 1 : -1; +} + + +const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER; diff --git a/gcc/config/alpha/libgcc-alpha-ldbl.ver b/gcc/config/alpha/libgcc-alpha-ldbl.ver new file mode 100644 index 000000000..8dc54a749 --- /dev/null +++ b/gcc/config/alpha/libgcc-alpha-ldbl.ver @@ -0,0 +1,50 @@ +# Copyright (C) 2006 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +%ifdef __LONG_DOUBLE_128__ + +# long double 128 bit support in libgcc_s.so.1 is only available +# when configured with --with-long-double-128. Make sure all the +# symbols are available at @@GCC_LDBL_* versions to make it clear +# there is a configurable symbol set. + +%exclude { + __fixtfdi + __fixunstfdi + __floatditf + + __divtc3 + __multc3 + __powitf2 +} + +%inherit GCC_LDBL_3.0 GCC_3.0 +GCC_LDBL_3.0 { + __fixtfdi + __fixunstfdi + __floatditf +} + +%inherit GCC_LDBL_4.0.0 GCC_4.0.0 +GCC_LDBL_4.0.0 { + __divtc3 + __multc3 + __powitf2 +} + +%endif diff --git a/gcc/config/alpha/linux-elf.h b/gcc/config/alpha/linux-elf.h new file mode 100644 index 000000000..e8eac2f91 --- /dev/null +++ b/gcc/config/alpha/linux-elf.h @@ -0,0 +1,57 @@ +/* Definitions of target machine for GNU compiler + for Alpha Linux-based GNU systems using ELF. + Copyright (C) 1996, 1997, 1998, 2001, 2002, 2003, 2006, 2007, 2010 + Free Software Foundation, Inc. + Contributed by Richard Henderson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (Alpha GNU/Linux for ELF)"); + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ +{ "elf_dynamic_linker", ELF_DYNAMIC_LINKER }, + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" +#if DEFAULT_LIBC == LIBC_UCLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}" +#elif DEFAULT_LIBC == LIBC_GLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}" +#else +#error "Unsupported DEFAULT_LIBC" +#endif +#define LINUX_DYNAMIC_LINKER \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER) + +#define ELF_DYNAMIC_LINKER LINUX_DYNAMIC_LINKER + +#define LINK_SPEC "-m elf64alpha %{G*} %{relax:-relax} \ + %{O*:-O3} %{!O*:-O1} \ + %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(elf_dynamic_linker)} \ + %{static:-static}}" + +#undef LIB_SPEC +#define LIB_SPEC \ +"%{pthread:-lpthread} %{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} " + +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack diff --git a/gcc/config/alpha/linux-unwind.h b/gcc/config/alpha/linux-unwind.h new file mode 100644 index 000000000..8c04b3b41 --- /dev/null +++ b/gcc/config/alpha/linux-unwind.h @@ -0,0 +1,99 @@ +/* DWARF2 EH unwinding support for Alpha Linux. + Copyright (C) 2004, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#include +#include + +#define MD_FALLBACK_FRAME_STATE_FOR alpha_fallback_frame_state + +static _Unwind_Reason_Code +alpha_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned int *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i; + + if (pc[0] != 0x47fe0410 /* mov $30,$16 */ + || pc[2] != 0x00000083) /* callsys */ + return _URC_END_OF_STACK; + if (context->cfa == 0) + return _URC_END_OF_STACK; + if (pc[1] == 0x201f0067) /* lda $0,NR_sigreturn */ + sc = context->cfa; + else if (pc[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */ + { + struct rt_sigframe { + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + sc = &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_regs[30]; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 30; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + for (i = 0; i < 30; ++i) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long) &sc->sc_regs[i] - new_cfa; + } + for (i = 0; i < 31; ++i) + { + fs->regs.reg[i+32].how = REG_SAVED_OFFSET; + fs->regs.reg[i+32].loc.offset + = (long) &sc->sc_fpregs[i] - new_cfa; + } + fs->regs.reg[64].how = REG_SAVED_OFFSET; + fs->regs.reg[64].loc.offset = (long)&sc->sc_pc - new_cfa; + fs->retaddr_column = 64; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#define MD_FROB_UPDATE_CONTEXT alpha_frob_update_context + +/* Fix up for signal handlers that don't have S flag set. */ + +static void +alpha_frob_update_context (struct _Unwind_Context *context, + _Unwind_FrameState *fs ATTRIBUTE_UNUSED) +{ + unsigned int *pc = context->ra; + + if (pc[0] == 0x47fe0410 /* mov $30,$16 */ + && pc[2] == 0x00000083 /* callsys */ + && (pc[1] == 0x201f0067 /* lda $0,NR_sigreturn */ + || pc[1] == 0x201f015f)) /* lda $0,NR_rt_sigreturn */ + _Unwind_SetSignalFrame (context, 1); +} diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h new file mode 100644 index 000000000..a1881c816 --- /dev/null +++ b/gcc/config/alpha/linux.h @@ -0,0 +1,106 @@ +/* Definitions of target machine for GNU compiler, + for Alpha Linux-based GNU systems. + Copyright (C) 1996, 1997, 1998, 2002, 2003, 2004, 2005, 2006, 2007, 2009, + 2010 Free Software Foundation, Inc. + Contributed by Richard Henderson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS) + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__gnu_linux__"); \ + builtin_define ("_LONGLONG"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + /* The GNU C++ standard library requires this. */ \ + if (c_dialect_cxx ()) \ + builtin_define ("_GNU_SOURCE"); \ + } while (0) + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared: %{profile:-lc_p}%{!profile:-lc}}" + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* Don't care about faults in the prologue. */ +#undef TARGET_CAN_FAULT_IN_PROLOGUE +#define TARGET_CAN_FAULT_IN_PROLOGUE 1 + +/* OS fixes up EV5 data fault on prefetch. */ +#undef TARGET_FIXUP_EV5_PREFETCH +#define TARGET_FIXUP_EV5_PREFETCH 1 + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#ifdef SINGLE_LIBC +#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#else +#define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#endif + +/* Determine whether the entire c99 runtime is present in the + runtime library. */ +#define TARGET_C99_FUNCTIONS (OPTION_GLIBC) + +/* Whether we have sincos that follows the GNU extension. */ +#define TARGET_HAS_SINCOS (OPTION_GLIBC) + +#define TARGET_POSIX_IO + +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +#define MD_UNWIND_SUPPORT "config/alpha/linux-unwind.h" + +/* Define if long doubles should be mangled as 'g'. */ +#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + +/* -mcpu=native handling only makes sense with compiler running on + an Alpha chip. */ +#if defined(__alpha__) || defined(__alpha) +extern const char *host_detect_local_cpu (int argc, const char **argv); +# define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, + +# define MCPU_MTUNE_NATIVE_SPECS \ + " %{mcpu=native:%. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS) + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + } while (0) + + +/* NetBSD doesn't use the LANGUAGE* built-ins. */ +#undef SUBTARGET_LANGUAGE_CPP_BUILTINS +#define SUBTARGET_LANGUAGE_CPP_BUILTINS() /* nothing */ + + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + + +/* Provide a CPP_SPEC appropriate for NetBSD/alpha. We use + this to pull in CPP specs that all NetBSD configurations need. */ + +#undef CPP_SPEC +#define CPP_SPEC NETBSD_CPP_SPEC + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, \ + { "netbsd_endfile_spec", NETBSD_ENDFILE_SPEC }, + + +/* Provide a LINK_SPEC appropriate for a NetBSD/alpha ELF target. */ + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{G*} %{relax:-relax} \ + %{O*:-O3} %{!O*:-O1} \ + %(netbsd_link_spec)" + +#define NETBSD_ENTRY_POINT "__start" + + +/* Provide an ENDFILE_SPEC appropriate for NetBSD/alpha ELF. Here we + add crtend.o, which provides part of the support for getting + C++ file-scope static objects deconstructed after exiting "main". + + We also need to handle the GCC option `-ffast-math'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \ + %(netbsd_endfile_spec)" + + +/* Attempt to enable execute permissions on the stack. */ + +#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK + + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (NetBSD/alpha ELF)"); diff --git a/gcc/config/alpha/openbsd.h b/gcc/config/alpha/openbsd.h new file mode 100644 index 000000000..8efbaf5f4 --- /dev/null +++ b/gcc/config/alpha/openbsd.h @@ -0,0 +1,45 @@ +/* Configuration file for an alpha OpenBSD target. + Copyright (C) 1999, 2003, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Controlling the compilation driver. */ + +/* run-time target specifications */ +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + OPENBSD_OS_CPP_BUILTINS_ELF(); \ + OPENBSD_OS_CPP_BUILTINS_LP64(); \ + } while (0) + +/* Layout of source language data types. */ + +/* This must agree with */ +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + + +#define LOCAL_LABEL_PREFIX "." diff --git a/gcc/config/alpha/osf5-unwind.h b/gcc/config/alpha/osf5-unwind.h new file mode 100644 index 000000000..c64909934 --- /dev/null +++ b/gcc/config/alpha/osf5-unwind.h @@ -0,0 +1,329 @@ +/* DWARF2 EH unwinding support for Alpha Tru64. + Copyright (C) 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file implements the MD_FALLBACK_FRAME_STATE_FOR macro, triggered when + the GCC table based unwinding process hits a frame for which no unwind info + has been registered. This typically occurs when raising an exception from a + signal handler, because the handler is actually called from the OS kernel. + + The basic idea is to detect that we are indeed trying to unwind past a + signal handler and to fill out the GCC internal unwinding structures for + the OS kernel frame as if it had been directly called from the interrupted + context. + + This is all assuming that the code to set the handler asked the kernel to + pass a pointer to such context information. */ + +/* -------------------------------------------------------------------------- + -- Basic principles of operation: + -------------------------------------------------------------------------- + + 1/ We first need a way to detect if we are trying to unwind past a signal + handler. + + The typical method that is used on most platforms is to look at the code + around the return address we have and check if it matches the OS code + calling a handler. To determine what this code is expected to be, get a + breakpoint into a real signal handler and look at the code around the + return address. Depending on the library versions the pattern of the + signal handler is different; this is the reason why we check against more + than one pattern. + + On this target, the return address is right after the call and every + instruction is 4 bytes long. For the simple case of a null dereference in + a single-threaded app, it went like: + + # Check that we indeed have something we expect: the instruction right + # before the return address is within a __sigtramp function and is a call. + + [... run gdb and break at the signal handler entry ...] + + (gdb) x /i $ra-4 + <__sigtramp+160>: jsr ra,(a3),0x3ff800d0ed4 <_fpdata+36468> + + # Look at the code around that return address, and eventually observe a + # significantly large chunk of *constant* code right before the call: + + (gdb) x /10i $ra-44 + <__sigtramp+120>: lda gp,-27988(gp) + <__sigtramp+124>: ldq at,-18968(gp) + <__sigtramp+128>: lda t0,-1 + <__sigtramp+132>: stq t0,0(at) + <__sigtramp+136>: ldq at,-18960(gp) + <__sigtramp+140>: ldl t1,8(at) + <__sigtramp+144>: ldq at,-18960(gp) + <__sigtramp+148>: stl t1,12(at) + <__sigtramp+152>: ldq at,-18960(gp) + <__sigtramp+156>: stl t0,8(at) + + # The hexadecimal equivalent that we will have to match is: + + (gdb) x /10x $ra-44 + <__sigtramp+120>: 0x23bd92ac 0xa79db5e8 0x203fffff 0xb43c0000 + <__sigtramp+136>: 0xa79db5f0 0xa05c0008 0xa79db5f0 0xb05c000c + <__sigtramp+152>: 0xa79db5f0 0xb03c0008 + + The problem observed on this target with this approach is that although + we found a constant set of instruction patterns there were some + gp-related offsets that made the machine code to differ from one + installation to another. This problem could have been overcome by masking + these offsets, but we found that it would be simpler and more efficient to + check whether the return address was part of a signal handler, by comparing + it against some expected code offset from __sigtramp. + + # Check that we indeed have something we expect: the instruction + # right before the return address is within a __sigtramp + # function and is a call. We also need to obtain the offset + # between the return address and the start address of __sigtramp. + + [... run gdb and break at the signal handler entry ...] + + (gdb) x /2i $ra-4 + <__sigtramp+160>: jsr ra,(a3),0x3ff800d0ed4 <_fpdata+36468> + <__sigtramp+164>: ldah gp,16381(ra) + + (gdb) p (long)$ra - (long)&__sigtramp + $2 = 164 + + -------------------------------------------------------------------------- + + 2/ Once we know we are going through a signal handler, we need a way to + retrieve information about the interrupted run-time context. + + On this platform, the third handler's argument is a pointer to a structure + describing this context (struct sigcontext *). We unfortunately have no + direct way to transfer this value here, so a couple of tricks are required + to compute it. + + As documented at least in some header files (e.g. sys/machine/context.h), + the structure the handler gets a pointer to is located on the stack. As of + today, while writing this macro, we have unfortunately not been able to + find a detailed description of the full stack layout at handler entry time, + so we'll have to resort to empirism :) + + When unwinding here, we have the handler's CFA at hand, as part of the + current unwinding context which is one of our arguments. We presume that + for each call to a signal handler by the same kernel routine, the context's + structure location on the stack is always at the same offset from the + handler's CFA, and we compute that offset from bare observation: + + For the simple case of a bare null dereference in a single-threaded app, + computing the offset was done using GNAT like this: + + # Break on the first handler's instruction, before the prologue to have the + # CFA in $sp, and get there: + + (gdb) b *&__gnat_error_handler + Breakpoint 1 at 0x120016090: file init.c, line 378. + + (gdb) r + Program received signal SIGSEGV, Segmentation fault. + + (gdb) c + Breakpoint 1, __gnat_error_handler (sig=..., sip=..., context=...) + + # The displayed argument value are meaningless because we stopped before + # their final "homing". We know they are passed through $a0, $a1 and $a2 + # from the ABI, though, so ... + + # Observe that $sp and the context pointer are in the same (stack) area, + # and compute the offset: + + (gdb) p /x $sp + $2 = 0x11fffbc80 + + (gdb) p /x $a2 + $3 = 0x11fffbcf8 + + (gdb) p /x (long)$a2 - (long)$sp + $4 = 0x78 + + -------------------------------------------------------------------------- + + 3/ Once we know we are unwinding through a signal handler and have the + address of the structure describing the interrupted context at hand, we + have to fill the internal frame-state/unwind-context structures properly + to allow the unwinding process to proceed. + + Roughly, we are provided with an *unwinding* CONTEXT, describing the state + of some point P in the call chain we are unwinding through. The macro we + implement has to fill a "frame state" structure FS that describe the P's + caller state, by way of *rules* to compute its CFA, return address, and + **saved** registers *locations*. + + For the case we are going to deal with, the caller is some kernel code + calling a signal handler, and: + + o The saved registers are all in the interrupted run-time context, + + o The CFA is the stack pointer value when the kernel code is entered, that + is, the stack pointer value at the interruption point, also part of the + interrupted run-time context. + + o We want the return address to appear as the address of the active + instruction at the interruption point, so that the unwinder proceeds as + if the interruption had been a regular call. This address is also part + of the interrupted run-time context. + + -- + + Also, note that there is an important difference between the return address + we need to claim for the kernel frame and the value of the return address + register at the interruption point. + + The latter might be required to be able to unwind past the interrupted + routine, for instance if it is interrupted before saving the incoming + register value in its own frame, which may typically happen during stack + probes for stack-checking purposes. + + It is then essential that the rules stated to locate the kernel frame + return address don't clobber the rules describing where is saved the return + address register at the interruption point, so some scratch register state + entry should be used for the former. We have DWARF_ALT_FRAME_RETURN_COLUMN + at hand exactly for that purpose. + + -------------------------------------------------------------------------- + + 4/ Depending on the context (single-threaded or multi-threaded app, ...), + the code calling the handler and the handler-cfa to interrupted-context + offset might change, so we use a simple generic data structure to track + the possible variants. */ + +/* This is the structure to wrap information about each possible sighandler + caller we may have to identify. */ + +typedef struct { + /* Expected return address when being called from a sighandler. */ + void *ra_value; + + /* Offset to get to the sigcontext structure from the handler's CFA + when the pattern matches. */ + int cfa_to_context_offset; + +} sighandler_call_t; + +/* Helper macro for MD_FALLBACK_FRAME_STATE_FOR below. + + Look at RA to see if it matches within a sighandler caller. + Set SIGCTX to the corresponding sigcontext structure (computed from + CFA) if it does, or to 0 otherwise. */ + +#define COMPUTE_SIGCONTEXT_FOR(RA,CFA,SIGCTX) \ +do { \ + /* Define and register the applicable patterns. */ \ + extern void __sigtramp (void); \ + \ + sighandler_call_t sighandler_calls [] = { \ + {__sigtramp + 164, 0x78} \ + }; \ + \ + int n_patterns_to_match \ + = sizeof (sighandler_calls) / sizeof (sighandler_call_t); \ + \ + int pn; /* pattern number */ \ + \ + int match = 0; /* Did last pattern match ? */ \ + \ + /* Try to match each pattern in turn. */ \ + for (pn = 0; !match && pn < n_patterns_to_match; pn ++) \ + match = ((RA) == sighandler_calls[pn].ra_value); \ + \ + (SIGCTX) = (struct sigcontext *) \ + (match ? ((CFA) + sighandler_calls[pn - 1].cfa_to_context_offset) : 0); \ +} while (0); + +#include + +#define REG_SP 30 /* hard reg for stack pointer */ +#define REG_RA 26 /* hard reg for return address */ + +#define MD_FALLBACK_FRAME_STATE_FOR alpha_fallback_frame_state + +static _Unwind_Reason_Code +alpha_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + /* Return address and CFA of the frame we're attempting to unwind through, + possibly a signal handler. */ + void *ctx_ra = (void *)context->ra; + void *ctx_cfa = (void *)context->cfa; + + /* CFA of the intermediate abstract kernel frame between the interrupted + code and the signal handler, if we're indeed unwinding through a signal + handler. */ + void *k_cfa; + + /* Pointer to the sigcontext structure pushed by the kernel when we're + unwinding through a signal handler. */ + struct sigcontext *sigctx; + int i; + + COMPUTE_SIGCONTEXT_FOR (ctx_ra, ctx_cfa, sigctx); + + if (sigctx == 0) + return _URC_END_OF_STACK; + + /* The kernel frame's CFA is exactly the stack pointer value at the + interruption point. */ + k_cfa = (void *) sigctx->sc_regs [REG_SP]; + + /* State the rules to compute the CFA we have the value of: use the + previous CFA and offset by the difference between the two. See + uw_update_context_1 for the supporting details. */ + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = k_cfa - ctx_cfa; + + /* Fill the internal frame_state structure with information stating + where each register of interest in the saved context can be found + from the CFA. */ + + /* The general registers are in sigctx->sc_regs. Leave out r31, which + is read-as-zero. It makes no sense restoring it, and we are going to + use the state entry for the kernel return address rule below. + + This loop must cover at least all the callee-saved registers, and + we just don't bother specializing the set here. */ + for (i = 0; i <= 30; i ++) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (void *) &sigctx->sc_regs[i] - (void *) k_cfa; + } + + /* Ditto for the floating point registers in sigctx->sc_fpregs. */ + for (i = 0; i <= 31; i ++) + { + fs->regs.reg[32+i].how = REG_SAVED_OFFSET; + fs->regs.reg[32+i].loc.offset + = (void *) &sigctx->sc_fpregs[i] - (void *) k_cfa; + } + + /* State the rules to find the kernel's code "return address", which + is the address of the active instruction when the signal was caught, + in sigctx->sc_pc. Use DWARF_ALT_FRAME_RETURN_COLUMN since the return + address register is a general register and should be left alone. */ + fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN; + fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_OFFSET; + fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset + = (void *) &sigctx->sc_pc - (void *) k_cfa; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} diff --git a/gcc/config/alpha/osf5.h b/gcc/config/alpha/osf5.h new file mode 100644 index 000000000..7713b7ee0 --- /dev/null +++ b/gcc/config/alpha/osf5.h @@ -0,0 +1,278 @@ +/* Definitions of target machine for GNU compiler, for DEC Alpha on + Tru64 UNIX V5.1. + Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* As of DEC OSF/1 V4.0, as can subtract adjacent labels. */ + +#undef TARGET_AS_CAN_SUBTRACT_LABELS +#define TARGET_AS_CAN_SUBTRACT_LABELS 1 + +/* The GEM libraries for X_float are present, though not used by C. */ + +#undef TARGET_HAS_XFLOATING_LIBS +#define TARGET_HAS_XFLOATING_LIBS 1 + +/* Tru64 UNIX V5.1 uses IEEE QUAD format. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS | MASK_LONG_DOUBLE_128) + +/* The linker appears to perform invalid code optimizations that result + in the ldgp emitted for the exception_receiver pattern being incorrectly + linked. */ +#undef TARGET_LD_BUGGY_LDGP +#define TARGET_LD_BUGGY_LDGP 1 + +/* Tru64 UNIX V5.1 has the float and long double forms of math functions. */ +#undef TARGET_C99_FUNCTIONS +#define TARGET_C99_FUNCTIONS 1 + +/* Names to predefine in the preprocessor for this target machine. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define_std ("unix"); \ + builtin_define_std ("SYSTYPE_BSD"); \ + builtin_define ("_SYSTYPE_BSD"); \ + builtin_define ("__osf__"); \ + builtin_define ("__digital__"); \ + builtin_define ("__arch64__"); \ + builtin_define ("_LONGLONG"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=xpg4"); \ + /* Tru64 UNIX V5 has a 16 byte long \ + double type and requires __X_FLOAT \ + to be defined for . */ \ + if (LONG_DOUBLE_TYPE_SIZE == 128) \ + builtin_define ("__X_FLOAT"); \ + \ + /* Tru64 UNIX V4/V5 provide several ISO C94 \ + features protected by the corresponding \ + __STDC_VERSION__ macro. libstdc++ v3 \ + needs them as well. */ \ + if (c_dialect_cxx ()) \ + builtin_define ("__STDC_VERSION__=199409L"); \ + } while (0) + +/* Accept DEC C flags for multithreaded programs. We use _PTHREAD_USE_D4 + instead of PTHREAD_USE_D4 since both have the same effect and the former + doesn't invade the users' namespace. */ + +#undef CPP_SPEC +#define CPP_SPEC \ +"%{pthread|threads:-D_REENTRANT} %{threads:-D_PTHREAD_USE_D4}" + +/* Under DEC OSF/1 V4, -p and -pg require -lprof1, and -lprof1 requires + -lpdf. */ + +#define LIB_SPEC \ +"%{p|pg:-lprof1%{pthread|threads:_r} -lpdf} \ + %{threads: -lpthreads} %{pthread|threads: -lpthread -lmach -lexc} -lc" + +/* Pass "-G 8" to ld because Alpha's CC does. Pass -O3 if we are + optimizing, -O1 if we are not. Pass -S to silence `weak symbol + multiply defined' warnings. Pass -shared, -non_shared or + -call_shared as appropriate. Pass -hidden_symbol so that our + constructor and call-frame data structures are not accidentally + overridden. */ +#define LINK_SPEC \ + "-G 8 %{O*:-O3} %{!O*:-O1} -S %{static:-non_shared} \ + %{!static:%{shared:-shared -hidden_symbol _GLOBAL_*} \ + %{!shared:-call_shared}} %{pg} %{taso} %{rpath*}" + +#define STARTFILE_SPEC \ + "%{!shared:%{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}" + +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" + +#define MD_STARTFILE_PREFIX "/usr/lib/cmplrs/cc/" + +/* In Tru64 UNIX V5.1, Compaq introduced a new assembler + (/usr/lib/cmplrs/cc/adu) which currently (versions between 3.04.29 and + 3.04.32) breaks mips-tfile. Passing the undocumented -oldas flag reverts + to using the old assembler (/usr/lib/cmplrs/cc/as[01]). + + It is clearly not desirable to depend on this undocumented flag, and + Compaq wants -oldas to go away soon, but until they have released a + new adu that works with mips-tfile, this is the only option. + + In some versions of the DTK, the assembler driver invokes ld after + assembly. This has been fixed in current versions, but adding -c + works as expected for all versions. */ + +#define ASM_OLDAS_SPEC "-oldas -c" + +/* In OSF/1 v3.2c, the assembler by default does not output file names which + causes mips-tfile to fail. Passing -g to the assembler fixes this problem. + ??? Strictly speaking, we need -g only if the user specifies -g. Passing + it always means that we get slightly larger than necessary object files + if the user does not specify -g. If we don't pass -g, then mips-tfile + will need to be fixed to work in this case. Pass -O0 since some + optimization are broken and don't help us anyway. */ +#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GAS) != 0 +#define ASM_SPEC "%{malpha-as:-g " ASM_OLDAS_SPEC " %{pg} -O0}" +#else +#define ASM_SPEC "%{!mgas:-g " ASM_OLDAS_SPEC " %{pg} -O0}" +#endif + +/* Specify to run a post-processor, mips-tfile after the assembler + has run to stuff the ecoff debug information into the object file. + This is needed because the Alpha assembler provides no way + of specifying such information in the assembly file. */ + +#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GAS) != 0 + +#define ASM_FINAL_SPEC "\ +%{malpha-as: %{!mno-mips-tfile: \ + \n mips-tfile %{v*: -v} \ + %{K: -I %b.o~} \ + %{!K: %{save-temps: -I %b.o~}} \ + %{c:%W{o*}%{!o*:-o %b.o}}%{!c:-o %U.o} \ + %{,assembler:%i;:%g.s}}}" + +#else +#define ASM_FINAL_SPEC "\ +%{!mgas: %{!mno-mips-tfile: \ + \n mips-tfile %{v*: -v} \ + %{K: -I %b.o~} \ + %{!K: %{save-temps: -I %b.o~}} \ + %{c:%W{o*}%{!o*:-o %b.o}}%{!c:-o %U.o} \ + %{,assembler:%i;:%g.s}}}" + +#endif + +/* Indicate that we have a stamp.h to use. */ +#ifndef CROSS_DIRECTORY_STRUCTURE +#define HAVE_STAMP_H 1 +#endif + +/* Attempt to turn on access permissions for the stack. */ + +#define ENABLE_EXECUTE_STACK \ +void \ +__enable_execute_stack (void *addr) \ +{ \ + extern int mprotect (const void *, size_t, int); \ + long size = getpagesize (); \ + long mask = ~(size-1); \ + char *page = (char *) (((long) addr) & mask); \ + char *end = (char *) ((((long) (addr + TRAMPOLINE_SIZE)) & mask) + size); \ + \ + /* 7 is PROT_READ | PROT_WRITE | PROT_EXEC */ \ + if (mprotect (page, end - page, 7) < 0) \ + perror ("mprotect of trampoline code"); \ +} + +/* Digital UNIX V4.0E (1091)/usr/include/sys/types.h 4.3.49.9 1997/08/14 */ +#define SIZE_TYPE "long unsigned int" +#define PTRDIFF_TYPE "long int" + +#define SIG_ATOMIC_TYPE "int" + +#define INT8_TYPE "signed char" +#define INT16_TYPE "short int" +#define INT32_TYPE "int" +#define INT64_TYPE "long int" +#define UINT8_TYPE "unsigned char" +#define UINT16_TYPE "short unsigned int" +#define UINT32_TYPE "unsigned int" +#define UINT64_TYPE "long unsigned int" + +#define INT_LEAST8_TYPE "signed char" +#define INT_LEAST16_TYPE "short int" +#define INT_LEAST32_TYPE "int" +#define INT_LEAST64_TYPE "long int" +#define UINT_LEAST8_TYPE "unsigned char" +#define UINT_LEAST16_TYPE "short unsigned int" +#define UINT_LEAST32_TYPE "unsigned int" +#define UINT_LEAST64_TYPE "long unsigned int" + +#define INT_FAST8_TYPE "signed char" +#define INT_FAST16_TYPE "int" +#define INT_FAST32_TYPE "int" +#define INT_FAST64_TYPE "long int" +#define UINT_FAST8_TYPE "unsigned char" +#define UINT_FAST16_TYPE "unsigned int" +#define UINT_FAST32_TYPE "unsigned int" +#define UINT_FAST64_TYPE "long unsigned int" + +#define INTPTR_TYPE "long int" +#define UINTPTR_TYPE "long unsigned int" + +/* The linker will stick __main into the .init section. */ +#define HAS_INIT_SECTION +#define LD_INIT_SWITCH "-init" +#define LD_FINI_SWITCH "-fini" + +/* From Tru64 UNIX Object File and Symbol Table Format Specification, + 2.3.5 Alignment, p.19. */ +#define MAX_OFILE_ALIGNMENT (64 * 1024 * BITS_PER_UNIT) + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + We really ought to be using the SREL32 relocations that ECOFF has, + but no version of the native assembler supports creating such things, + and Compaq has no plans to rectify this. Worse, the dynamic loader + cannot handle unaligned relocations, so we have to make sure that + things get padded appropriately. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (TARGET_GAS \ + ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \ + : DW_EH_PE_aligned) + +/* The Tru64 UNIX assembler warns on .lcomm with SIZE 0, so use 1 in that + case. */ +#undef ASM_OUTPUT_LOCAL +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE,ROUNDED) \ +( fputs ("\t.lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE) ? (SIZE) : 1)) + +/* This is how we tell the assembler that a symbol is weak. */ + +#define ASM_OUTPUT_WEAK_ALIAS(FILE, NAME, VALUE) \ + do \ + { \ + (*targetm.asm_out.globalize_label) (FILE, NAME); \ + fputs ("\t.weakext\t", FILE); \ + assemble_name (FILE, NAME); \ + if (VALUE) \ + { \ + fputc (' ', FILE); \ + assemble_name (FILE, VALUE); \ + } \ + fputc ('\n', FILE); \ + } \ + while (0) + +#define ASM_WEAKEN_LABEL(FILE, NAME) ASM_OUTPUT_WEAK_ALIAS(FILE, NAME, 0) + +/* The native assembler doesn't understand parenthesis. */ +#define TARGET_ASM_OPEN_PAREN "" +#define TARGET_ASM_CLOSE_PAREN "" + +/* Handle #pragma extern_prefix. */ +#define TARGET_HANDLE_PRAGMA_EXTERN_PREFIX 1 + +#define MD_UNWIND_SUPPORT "config/alpha/osf5-unwind.h" diff --git a/gcc/config/alpha/osf5.opt b/gcc/config/alpha/osf5.opt new file mode 100644 index 000000000..ba70e7ce9 --- /dev/null +++ b/gcc/config/alpha/osf5.opt @@ -0,0 +1,42 @@ +; Tru64 UNIX options. + +; Copyright (C) 2011 +; Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +K +Driver + +mno-mips-tfile +Target RejectNegative + +pthread +Driver + +taso +Driver + +threads +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/config/alpha/predicates.md b/gcc/config/alpha/predicates.md new file mode 100644 index 000000000..2f17fb9e3 --- /dev/null +++ b/gcc/config/alpha/predicates.md @@ -0,0 +1,621 @@ +;; Predicate definitions for DEC Alpha. +;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Return 1 if OP is the zero constant for MODE. +(define_predicate "const0_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +;; Returns true if OP is either the constant zero or a register. +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const0_operand"))) + +;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or +;; any register. +(define_predicate "reg_or_6bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is an 8-bit constant. +(define_predicate "cint8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256"))) + +;; Return 1 if OP is an 8-bit constant or any register. +(define_predicate "reg_or_8bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant or any register. +(define_predicate "reg_or_cint_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_int_operand"))) + +;; Return 1 if the operand is a valid second operand to an add insn. +(define_predicate "add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)") + (match_operand 0 "register_operand"))) + +;; Return 1 if the operand is a valid second operand to a +;; sign-extending add insn. +(define_predicate "sext_add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)") + (match_operand 0 "register_operand"))) + +;; Return 1 if the operand is a non-symbolic constant operand that +;; does not satisfy add_operand. +(define_predicate "non_add_const_operand" + (and (match_code "const_int,const_double,const_vector") + (not (match_operand 0 "add_operand")))) + +;; Return 1 if the operand is a non-symbolic, nonzero constant operand. +(define_predicate "non_zero_const_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op != CONST0_RTX (mode)"))) + +;; Return 1 if OP is the constant 4 or 8. +(define_predicate "const48_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 4 || INTVAL (op) == 8"))) + +;; Return 1 if OP is a valid first operand to an AND insn. +(define_predicate "and_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100 + || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100 + || zap_mask (INTVAL (op))") + (if_then_else (match_code "const_double") + (match_test "GET_MODE (op) == VOIDmode + && zap_mask (CONST_DOUBLE_LOW (op)) + && zap_mask (CONST_DOUBLE_HIGH (op))") + (match_operand 0 "register_operand")))) + +;; Return 1 if OP is a valid first operand to an IOR or XOR insn. +(define_predicate "or_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100 + || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant that is the width, in bits, of an integral +;; mode not larger than DImode. +(define_predicate "mode_width_operand" + (match_code "const_int") +{ + HOST_WIDE_INT i = INTVAL (op); + return i == 8 || i == 16 || i == 32 || i == 64; +}) + +;; Return 1 if OP is a constant that is a mask of ones of width of an +;; integral machine mode not larger than DImode. +(define_predicate "mode_mask_operand" + (match_code "const_int,const_double") +{ + if (CONST_INT_P (op)) + { + HOST_WIDE_INT value = INTVAL (op); + + if (value == 0xff) + return 1; + if (value == 0xffff) + return 1; + if (value == 0xffffffff) + return 1; + if (value == -1) + return 1; + } + else if (HOST_BITS_PER_WIDE_INT == 32 && GET_CODE (op) == CONST_DOUBLE) + { + if (CONST_DOUBLE_LOW (op) == 0xffffffff && CONST_DOUBLE_HIGH (op) == 0) + return 1; + } + return 0; +}) + +;; Return 1 if OP is a multiple of 8 less than 64. +(define_predicate "mul8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT i = INTVAL (op); + return i < 64 && i % 8 == 0; +}) + +;; Return 1 if OP is a hard floating-point register. +(define_predicate "hard_fp_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS; +}) + +;; Return 1 if OP is a hard general register. +(define_predicate "hard_int_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS; +}) + +;; Return 1 if OP is something that can be reloaded into a register; +;; if it is a MEM, it need not be valid. +(define_predicate "some_operand" + (ior (match_code "reg,mem,const_int,const_double,const_vector, + label_ref,symbol_ref,const,high") + (and (match_code "subreg") + (match_test "some_operand (SUBREG_REG (op), VOIDmode)")))) + +;; Likewise, but don't accept constants. +(define_predicate "some_ni_operand" + (ior (match_code "reg,mem") + (and (match_code "subreg") + (match_test "some_ni_operand (SUBREG_REG (op), VOIDmode)")))) + +;; Return 1 if OP is a valid operand for the source of a move insn. +(define_predicate "input_operand" + (match_code "label_ref,symbol_ref,const,high,reg,subreg,mem, + const_double,const_vector,const_int") +{ + switch (GET_CODE (op)) + { + case LABEL_REF: + case SYMBOL_REF: + case CONST: + if (TARGET_EXPLICIT_RELOCS) + { + /* We don't split symbolic operands into something unintelligable + until after reload, but we do not wish non-small, non-global + symbolic operands to be reconstructed from their high/lo_sum + form. */ + return (small_symbolic_operand (op, mode) + || global_symbolic_operand (op, mode) + || gotdtp_symbolic_operand (op, mode) + || gottp_symbolic_operand (op, mode)); + } + + /* This handles both the Windows/NT and OSF cases. */ + return mode == ptr_mode || mode == DImode; + + case HIGH: + return (TARGET_EXPLICIT_RELOCS + && local_symbolic_operand (XEXP (op, 0), mode)); + + case REG: + return 1; + + case SUBREG: + if (register_operand (op, mode)) + return 1; + /* ... fall through ... */ + case MEM: + return ((TARGET_BWX || (mode != HImode && mode != QImode)) + && general_operand (op, mode)); + + case CONST_DOUBLE: + return op == CONST0_RTX (mode); + + case CONST_VECTOR: + if (reload_in_progress || reload_completed) + return alpha_legitimate_constant_p (op); + return op == CONST0_RTX (mode); + + case CONST_INT: + if (mode == QImode || mode == HImode) + return true; + if (reload_in_progress || reload_completed) + return alpha_legitimate_constant_p (op); + return add_operand (op, mode); + + default: + gcc_unreachable (); + } + return 0; +}) + +;; Return 1 if OP is a SYMBOL_REF for a function known to be in this +;; file, and in the same section as the current function. + +(define_predicate "samegp_function_operand" + (match_code "symbol_ref") +{ + /* Easy test for recursion. */ + if (op == XEXP (DECL_RTL (current_function_decl), 0)) + return true; + + /* Functions that are not local can be overridden, and thus may + not share the same gp. */ + if (! SYMBOL_REF_LOCAL_P (op)) + return false; + + /* If -msmall-data is in effect, assume that there is only one GP + for the module, and so any local symbol has this property. We + need explicit relocations to be able to enforce this for symbols + not defined in this unit of translation, however. */ + if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) + return true; + + /* Functions that are not external are defined in this UoT, + and thus must share the same gp. */ + return ! SYMBOL_REF_EXTERNAL_P (op); +}) + +;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr. +(define_predicate "direct_call_operand" + (match_operand 0 "samegp_function_operand") +{ + /* If profiling is implemented via linker tricks, we can't jump + to the nogp alternate entry point. Note that crtl->profile + would not be correct, since that doesn't indicate if the target + function uses profiling. */ + /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test, + but is approximately correct for the OSF ABIs. Don't know + what to do for VMS, NT, or UMK. */ + if (!TARGET_PROFILING_NEEDS_GP && profile_flag) + return false; + + /* Must be a function. In some cases folks create thunks in static + data structures and then make calls to them. If we allow the + direct call, we'll get an error from the linker about !samegp reloc + against a symbol without a .prologue directive. */ + if (!SYMBOL_REF_FUNCTION_P (op)) + return false; + + /* Must be "near" so that the branch is assumed to reach. With + -msmall-text, this is assumed true of all local symbols. Since + we've already checked samegp, locality is already assured. */ + if (TARGET_SMALL_TEXT) + return true; + + return false; +}) + +;; Return 1 if OP is a valid operand for the MEM of a CALL insn. +;; +;; For TARGET_ABI_OSF, we want to restrict to R27 or a pseudo. +;; For TARGET_ABI_UNICOSMK, we want to restrict to registers. + +(define_predicate "call_operand" + (if_then_else (match_code "reg") + (match_test "!TARGET_ABI_OSF + || REGNO (op) == 27 || REGNO (op) > LAST_VIRTUAL_REGISTER") + (and (match_test "!TARGET_ABI_UNICOSMK") + (match_code "symbol_ref")))) + +;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing +;; a (non-tls) variable known to be defined in this file. +(define_predicate "local_symbolic_operand" + (match_code "label_ref,const,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) == LABEL_REF) + return 1; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + return (SYMBOL_REF_LOCAL_P (op) + && !SYMBOL_REF_WEAK (op) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Return true if OP is a SYMBOL_REF or CONST referencing a variable +;; known to be defined in this file in the small data area. +(define_predicate "small_symbolic_operand" + (match_code "const,symbol_ref") +{ + if (! TARGET_SMALL_DATA) + return 0; + + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + /* ??? There's no encode_section_info equivalent for the rtl + constant pool, so SYMBOL_FLAG_SMALL never gets set. */ + if (CONSTANT_POOL_ADDRESS_P (op)) + return GET_MODE_SIZE (get_pool_mode (op)) <= g_switch_value; + + return (SYMBOL_REF_LOCAL_P (op) + && SYMBOL_REF_SMALL_P (op) + && !SYMBOL_REF_WEAK (op) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Return true if OP is a SYMBOL_REF or CONST referencing a variable +;; not known (or known not) to be defined in this file. +(define_predicate "global_symbolic_operand" + (match_code "const,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op)) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref, +;; possibly with an offset. +(define_predicate "symbolic_operand" + (ior (match_code "symbol_ref,label_ref") + (and (match_code "const") + (match_test "GET_CODE (XEXP (op,0)) == PLUS + && (GET_CODE (XEXP (XEXP (op,0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (op,0), 0)) == LABEL_REF) + && CONST_INT_P (XEXP (XEXP (op,0), 1))")))) + +;; Return true if OP is valid for 16-bit DTP relative relocations. +(define_predicate "dtp16_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 32-bit DTP relative relocations. +(define_predicate "dtp32_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 64-bit DTP relative relocations. +(define_predicate "gotdtp_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 16-bit TP relative relocations. +(define_predicate "tp16_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)"))) + +;; Return true if OP is valid for 32-bit TP relative relocations. +(define_predicate "tp32_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)"))) + +;; Return true if OP is valid for 64-bit TP relative relocations. +(define_predicate "gottp_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)"))) + +;; Return 1 if this memory address is a known aligned register plus +;; a constant. It must be a valid address. This means that we can do +;; this as an aligned reference plus some offset. +;; +;; Take into account what reload will do. Oh god this is awful. +;; The horrible comma-operator construct below is to prevent genrecog +;; from thinking that this predicate accepts REG and SUBREG. We don't +;; use recog during reload, so pretending these codes are accepted +;; pessimizes things a tad. + +(define_special_predicate "aligned_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (match_code "mem")) +{ + rtx base; + int offset; + + if (MEM_ALIGN (op) >= 32) + return 1; + + op = XEXP (op, 0); + + /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo) + sorts of constructs. Dig for the real base register. */ + if (reload_in_progress + && GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == PLUS) + { + base = XEXP (XEXP (op, 0), 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + if (! memory_address_p (mode, op)) + return 0; + if (GET_CODE (op) == PLUS) + { + base = XEXP (op, 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + base = op; + offset = 0; + } + } + + if (offset % GET_MODE_SIZE (mode)) + return 0; + + return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32); +}) + +;; Similar, but return 1 if OP is a MEM which is not alignable. + +(define_special_predicate "unaligned_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (match_code "mem")) +{ + rtx base; + int offset; + + if (MEM_ALIGN (op) >= 32) + return 0; + + op = XEXP (op, 0); + + /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo) + sorts of constructs. Dig for the real base register. */ + if (reload_in_progress + && GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == PLUS) + { + base = XEXP (XEXP (op, 0), 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + if (! memory_address_p (mode, op)) + return 0; + if (GET_CODE (op) == PLUS) + { + base = XEXP (op, 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + base = op; + offset = 0; + } + } + + if (offset % GET_MODE_SIZE (mode)) + return 1; + + return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32); +}) + +;; Return 1 if OP is any memory location. During reload a pseudo matches. +(define_special_predicate "any_memory_operand" + (match_code "mem,reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (MEM_P (op)) + return true; + if (reload_in_progress && REG_P (op)) + { + unsigned regno = REGNO (op); + if (HARD_REGISTER_NUM_P (regno)) + return false; + else + return reg_renumber[regno] < 0; + } + + return false; +}) + +;; Return 1 is OP is a memory location that is not a reference +;; (using an AND) to an unaligned location. Take into account +;; what reload will do. +(define_special_predicate "normal_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) != AND")))) + +;; Returns 1 if OP is not an eliminable register. +;; +;; This exists to cure a pathological failure in the s8addq (et al) patterns, +;; +;; long foo () { long t; bar(); return (long) &t * 26107; } +;; +;; which run afoul of a hack in reload to cure a (presumably) similar +;; problem with lea-type instructions on other targets. But there is +;; one of us and many of them, so work around the problem by selectively +;; preventing combine from making the optimization. + +(define_predicate "reg_not_elim_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return op != frame_pointer_rtx && op != arg_pointer_rtx; +}) + +;; Accept a register, but not a subreg of any kind. This allows us to +;; avoid pathological cases in reload wrt data movement common in +;; int->fp conversion. */ +(define_predicate "reg_no_subreg_operand" + (and (match_code "reg") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a valid Alpha comparison operator for "cbranch" +;; instructions. +(define_predicate "alpha_cbranch_operator" + (ior (match_operand 0 "ordered_comparison_operator") + (match_code "ordered,unordered"))) + +;; Return 1 if OP is a valid Alpha comparison operator for "cmp" style +;; instructions. +(define_predicate "alpha_comparison_operator" + (match_code "eq,le,lt,leu,ltu")) + +;; Similarly, but with swapped operands. +(define_predicate "alpha_swapped_comparison_operator" + (match_code "eq,ge,gt,gtu")) + +;; Return 1 if OP is a valid Alpha comparison operator against zero +;; for "bcc" style instructions. +(define_predicate "alpha_zero_comparison_operator" + (match_code "eq,ne,le,lt,leu,ltu")) + +;; Return 1 if OP is a signed comparison operation. +(define_predicate "signed_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt")) + +;; Return 1 if OP is a valid Alpha floating point comparison operator. +(define_predicate "alpha_fp_comparison_operator" + (match_code "eq,le,lt,unordered")) + +;; Return 1 if this is a divide or modulus operator. +(define_predicate "divmod_operator" + (match_code "div,mod,udiv,umod")) + +;; Return 1 if this is a float->int conversion operator. +(define_predicate "fix_operator" + (match_code "fix,unsigned_fix")) + +;; Recognize an addition operation that includes a constant. Used to +;; convince reload to canonize (plus (plus reg c1) c2) during register +;; elimination. + +(define_predicate "addition_operation" + (and (match_code "plus") + (match_test "register_operand (XEXP (op, 0), mode) + && satisfies_constraint_K (XEXP (op, 1))"))) + +;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a +;; small symbolic operand until after reload. At which point we need +;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref)) +;; so that sched2 has the proper dependency information. */ +(define_predicate "some_small_symbolic_operand" + (match_code "set,parallel,prefetch,unspec,unspec_volatile") +{ + /* Avoid search unless necessary. */ + if (!TARGET_EXPLICIT_RELOCS || !reload_completed) + return false; + return for_each_rtx (&op, some_small_symbolic_operand_int, NULL); +}) diff --git a/gcc/config/alpha/qrnnd.asm b/gcc/config/alpha/qrnnd.asm new file mode 100644 index 000000000..51b13bce6 --- /dev/null +++ b/gcc/config/alpha/qrnnd.asm @@ -0,0 +1,163 @@ + # Alpha 21064 __udiv_qrnnd + # Copyright (C) 1992, 1994, 1995, 2000, 2009 Free Software Foundation, Inc. + + # This file is part of GCC. + + # The GNU MP Library is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by + # the Free Software Foundation; either version 3 of the License, or (at your + # option) any later version. + + # This file is distributed in the hope that it will be useful, but + # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + # License for more details. + + # Under Section 7 of GPL version 3, you are granted additional + # permissions described in the GCC Runtime Library Exception, version + # 3.1, as published by the Free Software Foundation. + + # You should have received a copy of the GNU General Public License and + # a copy of the GCC Runtime Library Exception along with this program; + # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + # . + +#ifdef __ELF__ +.section .note.GNU-stack,"" +#endif + + .set noreorder + .set noat + + .text + + .globl __udiv_qrnnd + .ent __udiv_qrnnd +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 + +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 +#define AT $at + + ldiq cnt,16 + blt d,$largedivisor + +$loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +$loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,$loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,$Odd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +$Odd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + + cmpult n1,n0,tmp # tmp := carry from addq + subq n1,d,AT + addq n0,tmp,n0 + cmovne tmp,AT,n1 + + cmpult n1,d,tmp + addq n0,1,AT + cmoveq tmp,AT,n0 + subq n1,d,AT + cmoveq tmp,AT,n1 + + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd diff --git a/gcc/config/alpha/sync.md b/gcc/config/alpha/sync.md new file mode 100644 index 000000000..bb7210239 --- /dev/null +++ b/gcc/config/alpha/sync.md @@ -0,0 +1,308 @@ +;; GCC machine description for Alpha synchronization instructions. +;; Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_code_iterator FETCHOP [plus minus ior xor and]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")]) +(define_code_attr fetchop_pred + [(plus "add_operand") (minus "reg_or_8bit_operand") + (ior "or_operand") (xor "or_operand") (and "and_operand")]) +(define_code_attr fetchop_constr + [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "riNHM")]) + + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MB))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MB))] + "" + "mb" + [(set_attr "type" "mb")]) + +(define_insn "load_locked_" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (unspec_volatile:I48MODE + [(match_operand:I48MODE 1 "memory_operand" "m")] + UNSPECV_LL))] + "" + "ld_l %0,%1" + [(set_attr "type" "ld_l")]) + +(define_insn "store_conditional_" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC)) + (set (match_operand:I48MODE 1 "memory_operand" "=m") + (match_operand:I48MODE 2 "reg_or_0_operand" "0"))] + "" + "st_c %0,%1" + [(set_attr "type" "st_c")]) + +;; The Alpha Architecture Handbook says that it is UNPREDICTABLE whether +;; the lock is cleared by a TAKEN branch. This means that we can not +;; expand a ll/sc sequence until after the final basic-block reordering pass. + +(define_insn_and_split "sync_" + [(set (match_operand:I48MODE 0 "memory_operand" "+m") + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 0) + (match_operand:I48MODE 1 "" ""))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 2 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (, operands[0], operands[1], + NULL, NULL, operands[2]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "sync_nand" + [(set (match_operand:I48MODE 0 "memory_operand" "+m") + (unspec:I48MODE + [(not:I48MODE + (and:I48MODE (match_dup 0) + (match_operand:I48MODE 1 "register_operand" "r")))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 2 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (NOT, operands[0], operands[1], + NULL, NULL, operands[2]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "sync_old_" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 1) + (match_operand:I48MODE 2 "" ""))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (, operands[1], operands[2], + operands[0], NULL, operands[3]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "sync_old_nand" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(not:I48MODE + (and:I48MODE (match_dup 1) + (match_operand:I48MODE 2 "register_operand" "r")))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (NOT, operands[1], operands[2], + operands[0], NULL, operands[3]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "sync_new_" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (FETCHOP:I48MODE + (match_operand:I48MODE 1 "memory_operand" "+m") + (match_operand:I48MODE 2 "" ""))) + (set (match_dup 1) + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 1) (match_dup 2))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (, operands[1], operands[2], + NULL, operands[0], operands[3]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "sync_new_nand" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (not:I48MODE + (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m") + (match_operand:I48MODE 2 "register_operand" "r")))) + (set (match_dup 1) + (unspec:I48MODE + [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2)))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (NOT, operands[1], operands[2], + NULL, operands[0], operands[3]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "sync_compare_and_swap" + [(match_operand:I12MODE 0 "register_operand" "") + (match_operand:I12MODE 1 "memory_operand" "") + (match_operand:I12MODE 2 "register_operand" "") + (match_operand:I12MODE 3 "add_operand" "")] + "" +{ + alpha_expand_compare_and_swap_12 (operands[0], operands[1], + operands[2], operands[3]); + DONE; +}) + +(define_insn_and_split "sync_compare_and_swap_1" + [(set (match_operand:DI 0 "register_operand" "=&r,&r") + (zero_extend:DI + (mem:I12MODE (match_operand:DI 1 "register_operand" "r,r")))) + (set (mem:I12MODE (match_dup 1)) + (unspec:I12MODE + [(match_operand:DI 2 "reg_or_8bit_operand" "J,rI") + (match_operand:DI 3 "register_operand" "r,r") + (match_operand:DI 4 "register_operand" "r,r")] + UNSPEC_CMPXCHG)) + (clobber (match_scratch:DI 5 "=&r,&r")) + (clobber (match_scratch:DI 6 "=X,&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_compare_and_swap_12 (mode, operands[0], operands[1], + operands[2], operands[3], operands[4], + operands[5], operands[6]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "sync_compare_and_swap" + [(parallel + [(set (match_operand:I48MODE 0 "register_operand" "") + (match_operand:I48MODE 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:I48MODE + [(match_operand:I48MODE 2 "reg_or_8bit_operand" "") + (match_operand:I48MODE 3 "add_operand" "rKL")] + UNSPEC_CMPXCHG)) + (clobber (match_scratch:I48MODE 4 "=&r"))])] + "" +{ + if (mode == SImode) + operands[2] = convert_modes (DImode, SImode, operands[2], 0); +}) + +(define_insn_and_split "*sync_compare_and_swap" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(match_operand:DI 2 "reg_or_8bit_operand" "rI") + (match_operand:I48MODE 3 "add_operand" "rKL")] + UNSPEC_CMPXCHG)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_compare_and_swap (operands[0], operands[1], operands[2], + operands[3], operands[4]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "sync_lock_test_and_set" + [(match_operand:I12MODE 0 "register_operand" "") + (match_operand:I12MODE 1 "memory_operand" "") + (match_operand:I12MODE 2 "register_operand" "")] + "" +{ + alpha_expand_lock_test_and_set_12 (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn_and_split "sync_lock_test_and_set_1" + [(set (match_operand:DI 0 "register_operand" "=&r") + (zero_extend:DI + (mem:I12MODE (match_operand:DI 1 "register_operand" "r")))) + (set (mem:I12MODE (match_dup 1)) + (unspec:I12MODE + [(match_operand:DI 2 "reg_or_8bit_operand" "rI") + (match_operand:DI 3 "register_operand" "r")] + UNSPEC_XCHG)) + (clobber (match_scratch:DI 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_lock_test_and_set_12 (mode, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "sync_lock_test_and_set" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(match_operand:I48MODE 2 "add_operand" "rKL")] + UNSPEC_XCHG)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_lock_test_and_set (operands[0], operands[1], + operands[2], operands[3]); + DONE; +} + [(set_attr "type" "multi")]) diff --git a/gcc/config/alpha/t-alpha b/gcc/config/alpha/t-alpha new file mode 100644 index 000000000..d0b58d69a --- /dev/null +++ b/gcc/config/alpha/t-alpha @@ -0,0 +1,2 @@ +# This is a support routine for longlong.h, used by libgcc2.c. +LIB2FUNCS_EXTRA = $(srcdir)/config/alpha/qrnnd.asm diff --git a/gcc/config/alpha/t-crtfm b/gcc/config/alpha/t-crtfm new file mode 100644 index 000000000..b4103834a --- /dev/null +++ b/gcc/config/alpha/t-crtfm @@ -0,0 +1,5 @@ +EXTRA_PARTS += crtfastmath.o + +$(T)crtfastmath.o: $(srcdir)/config/alpha/crtfastmath.c $(GCC_PASSES) + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -frandom-seed=gcc-crtfastmath -c \ + -o $(T)crtfastmath.o $(srcdir)/config/alpha/crtfastmath.c diff --git a/gcc/config/alpha/t-ieee b/gcc/config/alpha/t-ieee new file mode 100644 index 000000000..fe549dfc9 --- /dev/null +++ b/gcc/config/alpha/t-ieee @@ -0,0 +1,2 @@ +# All alphas get an IEEE complaint set of libraries. +TARGET_LIBGCC2_CFLAGS += -mieee diff --git a/gcc/config/alpha/t-linux b/gcc/config/alpha/t-linux new file mode 100644 index 000000000..b96f1dfeb --- /dev/null +++ b/gcc/config/alpha/t-linux @@ -0,0 +1,2 @@ +MULTIARCH_DIRNAME = $(call if_multiarch,alpha-linux-gnu) +SHLIB_MAPFILES += $(srcdir)/config/alpha/libgcc-alpha-ldbl.ver diff --git a/gcc/config/alpha/t-osf-pthread b/gcc/config/alpha/t-osf-pthread new file mode 100644 index 000000000..968e65cce --- /dev/null +++ b/gcc/config/alpha/t-osf-pthread @@ -0,0 +1,5 @@ +# Provide dummy POSIX threads functions +LIB2FUNCS_EXTRA += $(srcdir)/gthr-posix.c + +# Compile libgcc2 with POSIX threads supports +TARGET_LIBGCC2_CFLAGS=-pthread diff --git a/gcc/config/alpha/t-osf5 b/gcc/config/alpha/t-osf5 new file mode 100644 index 000000000..eabf2728f --- /dev/null +++ b/gcc/config/alpha/t-osf5 @@ -0,0 +1,48 @@ +# Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Compile crtbeginS.o and crtendS.o with pic. +CRTSTUFF_T_CFLAGS_S = -fPIC + +# Compile libgcc2.a with pic. +TARGET_LIBGCC2_CFLAGS = -fPIC + +# Build a shared libgcc library. +SHLIB_EXT = .so +SHLIB_NAME = @shlib_base_name@.so +SHLIB_SONAME = @shlib_base_name@.so.1 +SHLIB_OBJS = @shlib_objs@ + +# Beware *not* to hide the POSIX threads related symbols provided by +# gthr-posix.c, as this would prevent their preemption by real symbols. +SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \ + -Wl,-msym -Wl,-set_version,gcc.1 -Wl,-soname,$(SHLIB_SONAME) \ + -o $(SHLIB_NAME).tmp @multilib_flags@ $(SHLIB_OBJS) -lc && \ + rm -f $(SHLIB_SONAME) && \ + if [ -f $(SHLIB_NAME) ]; then \ + mv -f $(SHLIB_NAME) $(SHLIB_NAME).backup; \ + else true; fi && \ + mv $(SHLIB_NAME).tmp $(SHLIB_NAME) && \ + $(LN_S) $(SHLIB_NAME) $(SHLIB_SONAME) +# $(slibdir) double quoted to protect it from expansion while building +# libgcc.mk. We want this delayed until actual install time. +SHLIB_INSTALL = \ + $$(mkinstalldirs) $$(DESTDIR)$$(slibdir); \ + $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \ + rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \ + $(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME) diff --git a/gcc/config/alpha/t-vms b/gcc/config/alpha/t-vms new file mode 100644 index 000000000..410e219ff --- /dev/null +++ b/gcc/config/alpha/t-vms @@ -0,0 +1,65 @@ +# Copyright (C) 1996, 1997, 1998, 2001, 2002, +# 2007, 2009 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB2FUNCS_EXTRA = $(srcdir)/config/alpha/vms-gcc_shell_handler.c + +EXTRA_PARTS = vms-dwarf2.o vms-dwarf2eh.o $(VMS_EXTRA_PARTS) \ + crtbegin.o crtbeginS.o crtend.o crtendS.o + +# This object must be linked with in order to make the executable debuggable. +# vms-ld handles it automatically when passed -g. +$(T)vms-dwarf2.o : $(srcdir)/config/alpha/vms-dwarf2.asm + $(GCC_FOR_TARGET) -c -x assembler $< -o $@ + +$(T)vms-dwarf2eh.o : $(srcdir)/config/alpha/vms-dwarf2eh.asm + $(GCC_FOR_TARGET) -c -x assembler $< -o $@ + +MULTILIB_OPTIONS = mcpu=ev6 +MULTILIB_DIRNAMES = ev6 +MULTILIB_OSDIRNAMES = ev6 +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +shlib_version:=$(shell echo $(BASEVER_c) | sed -e 's/\./,/' -e 's/\.//g') +SHLIB_EXT = .exe +SHLIB_OBJS = @shlib_objs@ +SHLIB_NAME = @shlib_base_name@.exe +SHLIB_MULTILIB = +SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(libsubdir)/$(SHLIB_NAME) +SHLIB_SYMVEC = \ + grep -F -e "\$$BSS\$$" -e "\$$DATA\$$" -e " sdata " -e " data.rel " -e " data.rel.ro " -e " sbss " \ + -e "\$$LINK\$$" -e "\$$READONLY\$$" | \ + sed -e "s/.*\$$LINK\$$ \(.*\)/SYMBOL_VECTOR=(\1=PROCEDURE)/" \ + -e "s/.*\$$DATA\$$ \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \ + -e "s/.* sbss \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \ + -e "s/.* sdata \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \ + -e "s/.* data.rel \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \ + -e "s/.* data.rel.ro \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \ + -e "s/.*\$$BSS\$$ \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \ + -e "s/.*\$$READONLY\$$ \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" +SHLIB_SYMVECX2 := $(subst $$,$$$$,$(SHLIB_SYMVEC)) +SHLIB_LINK = \ + echo "case_sensitive=yes" > SYMVEC_$$$$$$$$.opt; \ + objdump --syms $(SHLIB_OBJS) | \ + $(SHLIB_SYMVECX2) >> SYMVEC_$$$$$$$$.opt ; \ + echo "case_sensitive=NO" >> SYMVEC_$$$$$$$$.opt; \ + $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -nodefaultlibs \ + -shared --for-linker=/noinform -o $(SHLIB_NAME) $(SHLIB_OBJS) \ + --for-linker=SYMVEC_$$$$$$$$.opt \ + --for-linker=gsmatch=equal,$(shlib_version) diff --git a/gcc/config/alpha/va_list.h b/gcc/config/alpha/va_list.h new file mode 100644 index 000000000..26e9515b4 --- /dev/null +++ b/gcc/config/alpha/va_list.h @@ -0,0 +1,42 @@ +/* A replacement for Digital Unix's . + +Copyright (C) 1998, 1999 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef __GNUC_VA_LIST +#define __GNUC_VA_LIST +typedef __builtin_va_list __gnuc_va_list; +#endif + +#if !defined(_VA_LIST) && !defined(_HIDDEN_VA_LIST) +#define _VA_LIST +typedef __gnuc_va_list va_list; + +#elif defined(_HIDDEN_VA_LIST) && !defined(_HIDDEN_VA_LIST_DONE) +#define _HIDDEN_VA_LIST_DONE +typedef __gnuc_va_list __va_list; + +#elif defined(_HIDDEN_VA_LIST) && defined(_VA_LIST) +#undef _HIDDEN_VA_LIST + +#endif diff --git a/gcc/config/alpha/vms-dwarf2.asm b/gcc/config/alpha/vms-dwarf2.asm new file mode 100644 index 000000000..531c7aa99 --- /dev/null +++ b/gcc/config/alpha/vms-dwarf2.asm @@ -0,0 +1,77 @@ +/* VMS dwarf2 section sequentializer. + Copyright (C) 2001, 2009 Free Software Foundation, Inc. + Contributed by Douglas B. Rupp (rupp@gnat.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Linking with this file forces Dwarf2 debug sections to be + sequentially loaded by the VMS linker, enabling GDB to read them. */ + +.section .debug_abbrev,NOWRT + .align 0 + .globl $dwarf2.debug_abbrev +$dwarf2.debug_abbrev: + +.section .debug_aranges,NOWRT + .align 0 + .globl $dwarf2.debug_aranges +$dwarf2.debug_aranges: + +.section .debug_frame,NOWRT + .align 0 + .globl $dwarf2.debug_frame +$dwarf2.debug_frame: + +.section .debug_info,NOWRT + .align 0 + .globl $dwarf2.debug_info +$dwarf2.debug_info: + +.section .debug_line,NOWRT + .align 0 + .globl $dwarf2.debug_line +$dwarf2.debug_line: + +.section .debug_loc,NOWRT + .align 0 + .globl $dwarf2.debug_loc +$dwarf2.debug_loc: + +.section .debug_macinfo,NOWRT + .align 0 + .globl $dwarf2.debug_macinfo +$dwarf2.debug_macinfo: + +.section .debug_pubnames,NOWRT + .align 0 + .globl $dwarf2.debug_pubnames +$dwarf2.debug_pubnames: + +.section .debug_str,NOWRT + .align 0 + .globl $dwarf2.debug_str +$dwarf2.debug_str: + +.section .debug_zzzzzz,NOWRT + .align 0 + .globl $dwarf2.debug_zzzzzz +$dwarf2.debug_zzzzzz: diff --git a/gcc/config/alpha/vms-dwarf2eh.asm b/gcc/config/alpha/vms-dwarf2eh.asm new file mode 100644 index 000000000..e0eaf9d37 --- /dev/null +++ b/gcc/config/alpha/vms-dwarf2eh.asm @@ -0,0 +1,30 @@ +/* VMS dwarf2 exception handling section sequentializer. + Copyright (C) 2002, 2009 Free Software Foundation, Inc. + Contributed by Douglas B. Rupp (rupp@gnat.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Linking with this file forces the Dwarf2 EH section to be + individually loaded by the VMS linker an the unwinder to read it. */ + +.section .eh_frame,NOWRT + .align 0 diff --git a/gcc/config/alpha/vms-gcc_shell_handler.c b/gcc/config/alpha/vms-gcc_shell_handler.c new file mode 100644 index 000000000..67d0fe7f9 --- /dev/null +++ b/gcc/config/alpha/vms-gcc_shell_handler.c @@ -0,0 +1,124 @@ +/* Static condition handler for Alpha/VMS. + Copyright (C) 2005-2009 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file implements __gcc_shell_handler, the static VMS condition handler + used as the indirection wrapper around user level handlers installed with + establish_vms_condition_handler GCC builtin. + + [ABI] in comments refers to the "HP OpenVMS calling standard" document + dated January 2005. */ + +#include +#include +#include + +typedef void * ADDR; +typedef unsigned long long REG; + +#define REG_AT(addr) (*(REG *)(addr)) + +/* Compute pointer to procedure descriptor (Procedure Value) from Frame + Pointer FP, according to the rules in [ABI-3.5.1 Current Procedure]. */ +#define PV_FOR(FP) \ + (((FP) != 0) \ + ? (((REG_AT (FP) & 0x7) == 0) ? *(PDSCDEF **)(FP) : (PDSCDEF *)(FP)) : 0) + +long +__gcc_shell_handler (struct chf$signal_array *sig_arr, + struct chf$mech_array *mech_arr); + +/* Helper for __gcc_shell_handler. Fetch the pointer to procedure currently + registered as the VMS condition handler for the live function with a frame + pointer FP. */ + +static ADDR +get_dyn_handler_pointer (REG fp) +{ + /* From the frame pointer we find the procedure descriptor, and fetch + the handler_data field from there. This field contains the offset + from FP at which the address of the currently installed handler is + to be found. */ + + PDSCDEF * pd = PV_FOR (fp); + /* Procedure descriptor pointer for the live subprogram with FP as the frame + pointer, and to which _gcc_shell_handler is attached as a condition + handler. */ + + REG handler_slot_offset; + /* Offset from FP at which the address of the currently established real + condition handler is to be found. This offset is available from the + handler_data field of the procedure descriptor. */ + + REG handler_data_offset; + /* The handler_data field position in the procedure descriptor, which + depends on the kind of procedure at hand. */ + + switch (pd->pdsc$w_flags & 0xf) + { + case PDSC$K_KIND_FP_STACK: /* [3.4.2 PD for stack frame procedures] */ + handler_data_offset = 40; + break; + + case PDSC$K_KIND_FP_REGISTER: /* [3.4.5 PD for reg frame procedures] */ + handler_data_offset = 32; + break; + + default: + handler_data_offset = 0; + break; + } + + /* If we couldn't determine the handler_data field position, give up. */ + if (handler_data_offset == 0) + return 0; + + /* Otherwise, fetch the fp offset at which the real handler address is to be + found, then fetch and return the latter in turn. */ + + handler_slot_offset = REG_AT ((REG)pd + handler_data_offset); + + return (ADDR) REG_AT (fp + handler_slot_offset); +} + +/* The static VMS condition handler for GCC code. Fetch the address of the + currently established condition handler, then resignal if there is none or + call the handler with the VMS condition arguments. */ + +long +__gcc_shell_handler (struct chf$signal_array *sig_arr, + struct chf$mech_array *mech_arr) +{ + long ret; + long (*user_handler) (struct chf$signal_array *, struct chf$mech_array *); + + user_handler = get_dyn_handler_pointer (mech_arr->chf$q_mch_frame); + if (!user_handler) + ret = SS$_RESIGNAL; + else + ret = user_handler (sig_arr, mech_arr); + + return ret; +} + diff --git a/gcc/config/alpha/vms-unwind.h b/gcc/config/alpha/vms-unwind.h new file mode 100644 index 000000000..ea2c3a319 --- /dev/null +++ b/gcc/config/alpha/vms-unwind.h @@ -0,0 +1,293 @@ +/* Fallback frame unwinding for Alpha/VMS. + Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2009, 2010 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include +#include +#include +#include +#include +#include + +#define MD_FALLBACK_FRAME_STATE_FOR alpha_vms_fallback_frame_state + +typedef void * ADDR; +typedef unsigned long long REG; +typedef PDSCDEF * PV; + +#define REG_AT(addr) (*(REG *)(addr)) +#define ADDR_AT(addr) (*(ADDR *)(addr)) + +/* Compute pointer to procedure descriptor (Procedure Value) from Frame + Pointer FP, according to the rules in [ABI-3.5.1 Current Procedure]. */ +#define PV_FOR(FP) \ + (((FP) != 0) \ + ? (((REG_AT (FP) & 0x7) == 0) ? *(PDSCDEF **)(FP) : (PDSCDEF *)(FP)) : 0) + +extern int SYS$GL_CALL_HANDL; +/* This is actually defined as a "long", but in system code where longs + are always 4bytes while GCC longs might be 8bytes. */ + +#define UPDATE_FS_FOR_CFA_GR(FS, GRN, LOC, CFA) \ +do { \ +(FS)->regs.reg[GRN].how = REG_SAVED_OFFSET; \ +(FS)->regs.reg[GRN].loc.offset = (_Unwind_Sword) ((REG) (LOC) - (REG) (CFA)); \ +} while (0); + +#define GIVEUP_ON_FAILURE(STATUS) \ + { if ((((STATUS) & 1) != 1)) return _URC_END_OF_STACK; } +#define DENOTES_EXC_DISPATCHER(PV) ((PV) == (ADDR) (REG) SYS$GL_CALL_HANDL) + +#define RA_COLUMN (DWARF_ALT_FRAME_RETURN_COLUMN) + +static int +alpha_vms_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + static int eh_debug = -1; + + /* Our goal is to update FS to reflect the state one step up CONTEXT, that + is: the CFA, return address and *saved* registers locations associated + with the function designated by CONTEXT->ra. We are called when the + libgcc unwinder has not found any dwarf FDE for this address, which + typically happens when trying to propagate a language exception through a + signal global vector or frame based handler. + + The CONTEXT->reg[] entries reflect the state/location of register saves + so designate values live at the CONTEXT->ra point. Of precious value to + us here is the frame pointer (r29), which gets us a procedure value. */ + + PV pv = (context->reg[29] != 0) ? PV_FOR (ADDR_AT (context->reg[29])) : 0; + + int pkind = pv ? pv->pdsc$w_flags & 0xf : 0; + /* VMS procedure kind, as indicated by the procedure descriptor. We only + know how to deal with FP_STACK or FP_REGISTER here. */ + + ADDR new_cfa = 0; + /* CFA we will establish for the caller, computed in different ways, + e.g. depending whether we cross an exception dispatcher frame. */ + + CHFCTX *chfctx = 0; + /* Pointer to the VMS CHF context associated with an exception dispatcher + frame, if we happen to come across one. */ + + int i,j; + + if (eh_debug == -1) + { + char * eh_debug_env = getenv ("EH_DEBUG"); + eh_debug = eh_debug_env ? atoi (eh_debug_env) : 0; + } + + if (eh_debug) + printf ("MD_FALLBACK running ...\n"); + + /* We only know how to deal with stack or reg frame procedures, so give + up if we're handed anything else. */ + if (pkind != PDSC$K_KIND_FP_STACK && pkind != PDSC$K_KIND_FP_REGISTER) + return _URC_END_OF_STACK; + + if (eh_debug) + printf ("FALLBACK: CTX FP = 0x%p, PV = 0x%p, EN = 0x%llx, RA = 0x%p\n", + ADDR_AT (context->reg[29]), pv, pv->pdsc$q_entry, context->ra); + + fs->retaddr_column = RA_COLUMN; + + /* If PV designates a VMS exception vector or condition handler, we need to + do as if the caller was the signaling point and estabish the state of the + intermediate VMS code (CFA, RA and saved register locations) as if it was + a single regular function. This requires special processing. + + The datastructures available from an condition dispatcher frame (signal + context) do not contain the values of most callee-saved registers, so + whathever PV designates, we need to account for the registers it saves. + + Besides, we need to express all the locations with respect to a + consistent CFA value, so we compute this first. */ + + if (DENOTES_EXC_DISPATCHER (pv)) + { + /* The CFA to establish is the signaling point's stack pointer. We + compute it using the system invocation context unwinding services and + save the CHF context data pointer along the way for later uses. */ + + INVO_CONTEXT_BLK icb; + int status, invo_handle; + + if (eh_debug) + printf ("FALLBACK: SYS$HANDLER\n"); + + icb.libicb$q_ireg [29] = REG_AT (context->reg[29]); + icb.libicb$q_ireg [30] = 0; + invo_handle = LIB$GET_INVO_HANDLE (&icb); + + status = LIB$GET_INVO_CONTEXT (invo_handle, &icb); + GIVEUP_ON_FAILURE (status); + + chfctx = (CHFCTX *) icb.libicb$ph_chfctx_addr; + + status = LIB$GET_PREV_INVO_CONTEXT (&icb); + GIVEUP_ON_FAILURE (status); + + new_cfa = (ADDR) icb.libicb$q_ireg[30]; + } + else + { + /* The CFA to establish is the SP value on entry of the procedure + designated by PV, which we compute as the corresponding frame base + register value + frame size. Note that the frame base may differ + from CONTEXT->cfa, typically if the caller has performed dynamic + stack allocations. */ + + int base_reg = pv->pdsc$w_flags & PDSC$M_BASE_REG_IS_FP ? 29 : 30; + ADDR base_addr = ADDR_AT (context->reg[base_reg]); + + new_cfa = base_addr + pv->pdsc$l_size; + } + + /* State to compute the caller's CFA by adding an offset to the current + one in CONTEXT. */ + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __builtin_dwarf_sp_column (); + fs->regs.cfa_offset = new_cfa - context->cfa; + + /* Regular unwind first, accounting for the register saves performed by + the procedure designated by PV. */ + + switch (pkind) + { + case PDSC$K_KIND_FP_STACK: + { + /* The saved registers are all located in the Register Save Area, + except for the procedure value register (R27) found at the frame + base address. */ + + int base_reg = pv->pdsc$w_flags & PDSC$M_BASE_REG_IS_FP ? 29 : 30; + ADDR base_addr = ADDR_AT (context->reg[base_reg]); + ADDR rsa_addr = base_addr + pv->pdsc$w_rsa_offset; + + if (eh_debug) + printf ("FALLBACK: STACK frame procedure\n"); + + UPDATE_FS_FOR_CFA_GR (fs, 27, base_addr, new_cfa); + + /* The first RSA entry is for the return address register, R26. */ + + UPDATE_FS_FOR_CFA_GR (fs, 26, rsa_addr, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, RA_COLUMN, rsa_addr, new_cfa); + + /* The following entries are for registers marked as saved according + to ireg_mask. */ + for (i = 0, j = 0; i < 32; i++) + if ((1 << i) & pv->pdsc$l_ireg_mask) + UPDATE_FS_FOR_CFA_GR (fs, i, rsa_addr + 8 * ++j, new_cfa); + + /* ??? floating point registers ? */ + + break; + } + + case PDSC$K_KIND_FP_REGISTER: + { + if (eh_debug) + printf ("FALLBACK: REGISTER frame procedure\n"); + + fs->regs.reg[RA_COLUMN].how = REG_SAVED_REG; + fs->regs.reg[RA_COLUMN].loc.reg = pv->pdsc$b_save_ra; + + fs->regs.reg[29].how = REG_SAVED_REG; + fs->regs.reg[29].loc.reg = pv->pdsc$b_save_fp; + + break; + } + + default: + /* Should never reach here. */ + return _URC_END_OF_STACK; + } + + /* If PV designates an exception dispatcher, we have to adjust the return + address column to get at the signal occurrence point, and account for + for what the CHF context contains. */ + + if (DENOTES_EXC_DISPATCHER (pv)) + { + /* The PC of the instruction causing the condition is available from the + signal argument vector. Extra saved register values are available + from the mechargs array. */ + + CHF$SIGNAL_ARRAY *sigargs + = (CHF$SIGNAL_ARRAY *) chfctx->chfctx$q_sigarglst; + + CHF$MECH_ARRAY *mechargs + = (CHF$MECH_ARRAY *) chfctx->chfctx$q_mcharglst; + + ADDR condpc_addr + = &((int *)(&sigargs->chf$l_sig_name)) [sigargs->chf$is_sig_args-2]; + + ADDR rei_frame_addr = (void *) mechargs->chf$q_mch_esf_addr; + + /* Adjust the return address location. */ + + UPDATE_FS_FOR_CFA_GR (fs, RA_COLUMN, condpc_addr, new_cfa); + + /* The frame pointer at the condition point is available from the + chf context directly. */ + + UPDATE_FS_FOR_CFA_GR (fs, 29, &chfctx->chfctx$q_expt_fp, new_cfa); + + /* Registers available from the mechargs array. */ + + UPDATE_FS_FOR_CFA_GR (fs, 0, &mechargs->chf$q_mch_savr0, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 1, &mechargs->chf$q_mch_savr1, new_cfa); + + UPDATE_FS_FOR_CFA_GR (fs, 16, &mechargs->chf$q_mch_savr16, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 17, &mechargs->chf$q_mch_savr17, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 18, &mechargs->chf$q_mch_savr18, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 19, &mechargs->chf$q_mch_savr19, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 20, &mechargs->chf$q_mch_savr20, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 21, &mechargs->chf$q_mch_savr21, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 22, &mechargs->chf$q_mch_savr22, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 23, &mechargs->chf$q_mch_savr23, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 24, &mechargs->chf$q_mch_savr24, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 25, &mechargs->chf$q_mch_savr25, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 26, &mechargs->chf$q_mch_savr26, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 27, &mechargs->chf$q_mch_savr27, new_cfa); + UPDATE_FS_FOR_CFA_GR (fs, 28, &mechargs->chf$q_mch_savr28, new_cfa); + + /* Registers R2 to R7 are available from the rei frame pointer. */ + + for (i = 2; i <= 7; i ++) + UPDATE_FS_FOR_CFA_GR (fs, i, rei_frame_addr+(i - 2)*8, new_cfa); + + /* ??? floating point registers ? */ + } + + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + + + diff --git a/gcc/config/alpha/vms.h b/gcc/config/alpha/vms.h new file mode 100644 index 000000000..fab0f2577 --- /dev/null +++ b/gcc/config/alpha/vms.h @@ -0,0 +1,360 @@ +/* Output variables, constants and external declarations, for GNU compiler. + Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2004, 2005, 2007, 2008, + 2009, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OBJECT_SUFFIX ".obj" +#define TARGET_EXECUTABLE_SUFFIX ".exe" + +/* Alpha/VMS object format is not really Elf, but this makes compiling + crtstuff.c and dealing with shared library initialization much easier. */ +#define OBJECT_FORMAT_ELF + +/* This enables certain macros in alpha.h, which will make an indirect + reference to an external symbol an invalid address. This needs to be + defined before we include alpha.h, since it determines which macros + are used for GO_IF_*. */ + +#define NO_EXTERNAL_INDIRECT_ADDRESS + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define_std ("vms"); \ + builtin_define_std ("VMS"); \ + builtin_define ("__ALPHA"); \ + builtin_assert ("system=vms"); \ + if (TARGET_FLOAT_VAX) \ + builtin_define ("__G_FLOAT"); \ + else \ + builtin_define ("__IEEE_FLOAT"); \ + } while (0) + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS|MASK_GAS) +#undef TARGET_ABI_OPEN_VMS +#define TARGET_ABI_OPEN_VMS 1 + +#undef TARGET_NAME +#define TARGET_NAME "OpenVMS/Alpha" +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (%s)", TARGET_NAME); + +#define VMS_DEBUG_MAIN_POINTER "TRANSFER$BREAK$GO" + +#undef PCC_STATIC_STRUCT_RETURN + +/* "long" is 32 bits, but 64 bits for Ada. */ +#undef LONG_TYPE_SIZE +#define LONG_TYPE_SIZE 32 +#define ADA_LONG_TYPE_SIZE 64 + +/* Pointer is 32 bits but the hardware has 64-bit addresses, sign extended. */ +#undef POINTER_SIZE +#define POINTER_SIZE 32 +#define POINTERS_EXTEND_UNSIGNED 0 + +#define MAX_OFILE_ALIGNMENT 524288 /* 8 x 2^16 by DEC Ada Test CD40VRA */ + +/* The maximum alignment 'malloc' honors. */ +#undef MALLOC_ABI_ALIGNMENT +#define MALLOC_ABI_ALIGNMENT ((TARGET_MALLOC64 ? 16 : 8) * BITS_PER_UNIT) + +#undef FIXED_REGISTERS +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } + +#undef CALL_USED_REGISTERS +#define CALL_USED_REGISTERS \ + {1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +/* List the order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. + + We allocate in the following order: + $f1 (nonsaved floating-point register) + $f10-$f15 (likewise) + $f22-$f30 (likewise) + $f21-$f16 (likewise, but input args) + $f0 (nonsaved, but return value) + $f2-$f9 (saved floating-point registers) + $1 (nonsaved integer registers) + $22-$25 (likewise) + $28 (likewise) + $0 (likewise, but return value) + $21-$16 (likewise, but input args) + $27 (procedure value in OSF, nonsaved in NT) + $2-$8 (saved integer registers) + $9-$14 (saved integer registers) + $26 (return PC) + $15 (frame pointer) + $29 (global pointer) + $30, $31, $f31 (stack pointer and always zero/ap & fp) */ + +#undef REG_ALLOC_ORDER +#define REG_ALLOC_ORDER \ + {33, \ + 42, 43, 44, 45, 46, 47, \ + 54, 55, 56, 57, 58, 59, 60, 61, 62, \ + 53, 52, 51, 50, 49, 48, \ + 32, \ + 34, 35, 36, 37, 38, 39, 40, 41, \ + 1, \ + 22, 23, 24, 25, \ + 28, \ + 0, \ + 21, 20, 19, 18, 17, 16, \ + 27, \ + 2, 3, 4, 5, 6, 7, 8, \ + 9, 10, 11, 12, 13, 14, \ + 26, \ + 15, \ + 29, \ + 30, 31, 63 } + +#undef HARD_FRAME_POINTER_REGNUM +#define HARD_FRAME_POINTER_REGNUM 29 + +/* Define registers used by the epilogue and return instruction. */ +#undef EPILOGUE_USES +#define EPILOGUE_USES(REGNO) ((REGNO) == 26 || (REGNO) == 29) + +#undef INITIAL_ELIMINATION_OFFSET +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = alpha_vms_initial_elimination_offset(FROM, TO)) + + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On Alpha/VMS, this is a structure that contains the number of + arguments and, for each argument, the datatype of that argument. + + The number of arguments is a number of words of arguments scanned so far. + Thus 6 or more means all following args should go on the stack. */ + +enum avms_arg_type {I64, FF, FD, FG, FS, FT}; +typedef struct {int num_args; enum avms_arg_type atypes[6];} avms_arg_info; + +#undef CUMULATIVE_ARGS +#define CUMULATIVE_ARGS avms_arg_info + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#undef INIT_CUMULATIVE_ARGS +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM).num_args = 0; \ + (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64; \ + (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64; + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#undef ASM_WEAKEN_LABEL +#define ASM_WEAKEN_LABEL(FILE, NAME) \ + do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ + fputc ('\n', FILE); } while (0) + +#define READONLY_DATA_SECTION_ASM_OP "\t.rdata" +#define CTORS_SECTION_ASM_OP "\t.ctors" +#define DTORS_SECTION_ASM_OP "\t.dtors" +#define SDATA_SECTION_ASM_OP "\t.sdata" +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t.long " #FUNC"\n"); + +#undef ASM_OUTPUT_ADDR_DIFF_ELT +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) gcc_unreachable () + +#undef ASM_OUTPUT_ADDR_VEC_ELT +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + fprintf (FILE, "\t.quad $L%d\n", (VALUE)) + +#undef CASE_VECTOR_MODE +#define CASE_VECTOR_MODE DImode +#undef CASE_VECTOR_PC_RELATIVE + +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN) \ +{ ASM_OUTPUT_ALIGN (FILE, 3); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); } + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. */ + +#define COMMON_ASM_OP "\t.comm\t" + +#undef ASM_OUTPUT_ALIGNED_DECL_COMMON +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + vms_output_aligned_decl_common (FILE, DECL, NAME, SIZE, ALIGN) + +/* Control how constructors and destructors are emitted. */ +#define TARGET_ASM_CONSTRUCTOR vms_asm_out_constructor +#define TARGET_ASM_DESTRUCTOR vms_asm_out_destructor + +#undef SDB_DEBUGGING_INFO +#undef MIPS_DEBUGGING_INFO +#undef DBX_DEBUGGING_INFO + +#define DWARF2_DEBUGGING_INFO 1 +#define VMS_DEBUGGING_INFO 1 + +#define DWARF2_UNWIND_INFO 1 + +#undef EH_RETURN_HANDLER_RTX +#define EH_RETURN_HANDLER_RTX \ + gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, 8)) + +#define LINK_EH_SPEC "vms-dwarf2eh.o%s " +#define LINK_GCC_C_SEQUENCE_SPEC "%G" + +#ifdef IN_LIBGCC2 +/* Get the definition for MD_FALLBACK_FRAME_STATE_FOR from a separate + file. This avoids having to recompile the world instead of libgcc only + when changes to this macro are exercised. */ + +#define MD_UNWIND_SUPPORT "config/alpha/vms-unwind.h" +#endif + +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + avms_asm_output_external (FILE, DECL, NAME) + +typedef struct crtl_name_spec +{ + const char *const name; + const char *deccname; + int referenced; +} crtl_name_spec; + +#include "config/vms/vms-crtl.h" + +/* Alias CRTL names to 32/64bit DECCRTL functions. + Fixme: This should do a binary search. */ +#define DO_CRTL_NAMES \ + do \ + { \ + int i; \ + static crtl_name_spec vms_crtl_names[] = CRTL_NAMES; \ + static int malloc64_init = 0; \ + \ + if ((malloc64_init == 0) && TARGET_MALLOC64) \ + { \ + for (i=0; vms_crtl_names [i].name; i++) \ + { \ + if (strcmp ("calloc", vms_crtl_names [i].name) == 0) \ + vms_crtl_names [i].deccname = "decc$_calloc64"; \ + else \ + if (strcmp ("malloc", vms_crtl_names [i].name) == 0) \ + vms_crtl_names [i].deccname = "decc$_malloc64"; \ + else \ + if (strcmp ("realloc", vms_crtl_names [i].name) == 0) \ + vms_crtl_names [i].deccname = "decc$_realloc64"; \ + else \ + if (strcmp ("strdup", vms_crtl_names [i].name) == 0) \ + vms_crtl_names [i].deccname = "decc$_strdup64"; \ + } \ + malloc64_init = 1; \ + } \ + for (i=0; vms_crtl_names [i].name; i++) \ + if (!vms_crtl_names [i].referenced && \ + (strcmp (name, vms_crtl_names [i].name) == 0)) \ + { \ + fprintf (file, "\t%s=%s\n", \ + name, vms_crtl_names [i].deccname); \ + vms_crtl_names [i].referenced = 1; \ + } \ + } while (0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#undef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + fprintf (FILE, "\t.align %d\n", LOG); + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION vms_asm_named_section + +#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \ + do { fprintf ((FILE), "\t.literals\n"); \ + in_section = NULL; \ + fprintf ((FILE), "\t"); \ + assemble_name (FILE, LABEL1); \ + fprintf (FILE, " = "); \ + assemble_name (FILE, LABEL2); \ + fprintf (FILE, "\n"); \ + } while (0) + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE VMS_AND_DWARF2_DEBUG + +#define ASM_PN_FORMAT "%s___%lu" + +/* ??? VMS uses different linkage. */ +#undef TARGET_ASM_OUTPUT_MI_THUNK + +#undef ASM_SPEC +#undef ASM_FINAL_SPEC + +/* The VMS convention is to always provide minimal debug info + for a traceback unless specifically overridden. */ + +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (write_symbols == NO_DEBUG \ + && debug_info_level == DINFO_LEVEL_NONE) \ + { \ + write_symbols = VMS_DEBUG; \ + debug_info_level = DINFO_LEVEL_TERSE; \ + } \ +} while (0) + +/* Link with vms-dwarf2.o if -g (except -g0). This causes the + VMS link to pull all the dwarf2 debug sections together. */ +#undef LINK_SPEC +#define LINK_SPEC "%{g:-g vms-dwarf2.o%s} %{g0} %{g1:-g1 vms-dwarf2.o%s} \ +%{g2:-g2 vms-dwarf2.o%s} %{g3:-g3 vms-dwarf2.o%s} %{shared} %{v} %{map}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ +"%{!shared:%{mvms-return-codes:vcrt0.o%s} %{!mvms-return-codes:pcrt0.o%s} \ + crtbegin.o%s} \ + %{!static:%{shared:crtbeginS.o%s}}" + +#define ENDFILE_SPEC \ +"%{!shared:crtend.o%s} %{!static:%{shared:crtendS.o%s}}" + +#define NAME__MAIN "__gccmain" +#define SYMBOL__MAIN __gccmain + +#define INIT_SECTION_ASM_OP "\t.section LIB$INITIALIZE,GBL,NOWRT" + +#define LONGLONG_STANDALONE 1 + +#undef TARGET_VALID_POINTER_MODE +#define TARGET_VALID_POINTER_MODE vms_valid_pointer_mode diff --git a/gcc/config/alpha/vms64.h b/gcc/config/alpha/vms64.h new file mode 100644 index 000000000..07424d0f2 --- /dev/null +++ b/gcc/config/alpha/vms64.h @@ -0,0 +1,53 @@ +/* Output variables, constants and external declarations, for GNU compiler. + Copyright (C) 2001, 2007, 2009 Free Software Foundation, Inc. + Contributed by Douglas Rupp (rupp@gnat.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define_std ("vms"); \ + builtin_define_std ("VMS"); \ + builtin_define ("__ALPHA"); \ + builtin_assert ("system=vms"); \ + builtin_define ("__IEEE_FLOAT"); \ + builtin_define ("__LONG_POINTERS=1"); \ + } while (0) + +#undef SUBTARGET_SWITCHES +#define SUBTARGET_SWITCHES \ + { "malloc64", MASK_MALLOC64, "Malloc data into P2 space" }, + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS | MASK_MALLOC64) + +#undef LONG_TYPE_SIZE +#define LONG_TYPE_SIZE 64 + +#undef POINTER_SIZE +#define POINTER_SIZE 64 + +/* Eventhough pointers are 64bits, only 32bit ever remain significant in code + addresses. */ +#define MASK_RETURN_ADDR (GEN_INT (0xffffffff)) + +/* Defaults to "long int" */ +#undef SIZE_TYPE +#undef PTRDIFF_TYPE + +# include "config/vms/vms-crtl-64.h" diff --git a/gcc/config/alpha/x-alpha b/gcc/config/alpha/x-alpha new file mode 100644 index 000000000..ecca70424 --- /dev/null +++ b/gcc/config/alpha/x-alpha @@ -0,0 +1,3 @@ +driver-alpha.o: $(srcdir)/config/alpha/driver-alpha.c \ + $(CONFIG_H) $(SYSTEM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< diff --git a/gcc/config/alpha/x-osf b/gcc/config/alpha/x-osf new file mode 100644 index 000000000..5bb9c9098 --- /dev/null +++ b/gcc/config/alpha/x-osf @@ -0,0 +1,4 @@ +host-osf.o : $(srcdir)/config/alpha/host-osf.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/alpha/host-osf.c diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def new file mode 100644 index 000000000..c2d2ceaf8 --- /dev/null +++ b/gcc/config/arc/arc-modes.def @@ -0,0 +1,24 @@ +/* Definitions of target machine for GNU compiler, Argonaut ARC cpu. + Copyright (C) 2002, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Some insns set all condition code flags, some only set the ZNC flags, and + some only set the ZN flags. */ + +CC_MODE (CCZNC); +CC_MODE (CCZN); diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h new file mode 100644 index 000000000..5550ebfbe --- /dev/null +++ b/gcc/config/arc/arc-protos.h @@ -0,0 +1,63 @@ +/* Definitions of target machine for GNU compiler, Argonaut ARC cpu. + Copyright (C) 2000, 2004, 2007, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifdef RTX_CODE +extern enum machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx); + +/* Define the function that build the compare insn for scc and bcc. */ +extern struct rtx_def *gen_compare_reg (enum rtx_code, rtx, rtx); +#endif + +/* Declarations for various fns used in the .md file. */ +extern const char *output_shift (rtx *); + +extern int symbolic_operand (rtx, enum machine_mode); +extern int arc_double_limm_p (rtx); +extern int arc_eligible_for_epilogue_delay (rtx, int); +extern void arc_initialize_trampoline (rtx, rtx, rtx); +extern void arc_print_operand (FILE *, rtx, int); +extern void arc_print_operand_address (FILE *, rtx); +extern void arc_final_prescan_insn (rtx, rtx *, int); +extern int call_address_operand (rtx, enum machine_mode); +extern int call_operand (rtx, enum machine_mode); +extern int symbolic_memory_operand (rtx, enum machine_mode); +extern int short_immediate_operand (rtx, enum machine_mode); +extern int long_immediate_operand (rtx, enum machine_mode); +extern int long_immediate_loadstore_operand (rtx, enum machine_mode); +extern int move_src_operand (rtx, enum machine_mode); +extern int move_double_src_operand (rtx, enum machine_mode); +extern int move_dest_operand (rtx, enum machine_mode); +extern int load_update_operand (rtx, enum machine_mode); +extern int store_update_operand (rtx, enum machine_mode); +extern int nonvol_nonimm_operand (rtx, enum machine_mode); +extern int const_sint32_operand (rtx, enum machine_mode); +extern int const_uint32_operand (rtx, enum machine_mode); +extern int proper_comparison_operator (rtx, enum machine_mode); +extern int shift_operator (rtx, enum machine_mode); + +extern enum arc_function_type arc_compute_function_type (tree); + + +extern unsigned int arc_compute_frame_size (int); +extern void arc_save_restore (FILE *, const char *, unsigned int, + unsigned int, const char *); +extern int arc_delay_slots_for_epilogue (void); +extern void arc_ccfsm_at_label (const char *, int); +extern int arc_ccfsm_branch_deleted_p (void); +extern void arc_ccfsm_record_branch_deleted (void); diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c new file mode 100644 index 000000000..f1afda20f --- /dev/null +++ b/gcc/config/arc/arc.c @@ -0,0 +1,2491 @@ +/* Subroutines used for code generation on the Argonaut ARC cpu. + Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, + 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* ??? This is an old port, and is undoubtedly suffering from bit rot. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "function.h" +#include "expr.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "df.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" + +/* Which cpu we're compiling for. */ +int arc_cpu_type; + +/* Name of mangle string to add to symbols to separate code compiled for each + cpu (or NULL). */ +const char *arc_mangle_cpu; + +/* Name of text, data, and rodata sections used in varasm.c. */ +const char *arc_text_section; +const char *arc_data_section; +const char *arc_rodata_section; + +/* Array of valid operand punctuation characters. */ +char arc_punct_chars[256]; + +/* Variables used by arc_final_prescan_insn to implement conditional + execution. */ +static int arc_ccfsm_state; +static int arc_ccfsm_current_cc; +static rtx arc_ccfsm_target_insn; +static int arc_ccfsm_target_label; + +/* The maximum number of insns skipped which will be conditionalised if + possible. */ +#define MAX_INSNS_SKIPPED 3 + +/* A nop is needed between a 4 byte insn that sets the condition codes and + a branch that uses them (the same isn't true for an 8 byte insn that sets + the condition codes). Set by arc_final_prescan_insn. Used by + arc_print_operand. */ +static int last_insn_set_cc_p; +static int current_insn_set_cc_p; +static bool arc_handle_option (size_t, const char *, int); +static void record_cc_ref (rtx); +static void arc_init_reg_tables (void); +static int get_arc_condition_code (rtx); +static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *); +static bool arc_assemble_integer (rtx, unsigned int, int); +static void arc_output_function_prologue (FILE *, HOST_WIDE_INT); +static void arc_output_function_epilogue (FILE *, HOST_WIDE_INT); +static void arc_file_start (void); +static void arc_internal_label (FILE *, const char *, unsigned long); +static void arc_va_start (tree, rtx); +static void arc_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, + tree, int *, int); +static bool arc_rtx_costs (rtx, int, int, int *, bool); +static int arc_address_cost (rtx, bool); +static void arc_external_libcall (rtx); +static bool arc_return_in_memory (const_tree, const_tree); +static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx arc_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static void arc_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static unsigned int arc_function_arg_boundary (enum machine_mode, const_tree); +static void arc_trampoline_init (rtx, tree, rtx); +static void arc_option_override (void); +static void arc_conditional_register_usage (void); + + +/* ARC specific attributs. */ + +static const struct attribute_spec arc_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute }, + { NULL, 0, 0, false, false, false, NULL } +}; + +/* Initialize the GCC target structure. */ +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER arc_assemble_integer + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE arc_output_function_prologue +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE arc_output_function_epilogue +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START arc_file_start +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE arc_attribute_table +#undef TARGET_ASM_INTERNAL_LABEL +#define TARGET_ASM_INTERNAL_LABEL arc_internal_label +#undef TARGET_ASM_EXTERNAL_LIBCALL +#define TARGET_ASM_EXTERNAL_LIBCALL arc_external_libcall + +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION arc_handle_option + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE arc_option_override + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS arc_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST arc_address_cost + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY arc_return_in_memory +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG arc_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY arc_function_arg_boundary +#undef TARGET_CALLEE_COPIES +#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START arc_va_start + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT arc_trampoline_init + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +arc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_mcpu_: + return strcmp (arg, "base") == 0 || ARC_EXTENSION_CPU (arg); + + default: + return true; + } +} + +/* Implement TARGET_OPTION_OVERRIDE. + These need to be done at start up. It's convenient to do them here. */ + +static void +arc_option_override (void) +{ + char *tmp; + + /* Set the pseudo-ops for the various standard sections. */ + arc_text_section = tmp = XNEWVEC (char, strlen (arc_text_string) + sizeof (ARC_SECTION_FORMAT) + 1); + sprintf (tmp, ARC_SECTION_FORMAT, arc_text_string); + arc_data_section = tmp = XNEWVEC (char, strlen (arc_data_string) + sizeof (ARC_SECTION_FORMAT) + 1); + sprintf (tmp, ARC_SECTION_FORMAT, arc_data_string); + arc_rodata_section = tmp = XNEWVEC (char, strlen (arc_rodata_string) + sizeof (ARC_SECTION_FORMAT) + 1); + sprintf (tmp, ARC_SECTION_FORMAT, arc_rodata_string); + + arc_init_reg_tables (); + + /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ + memset (arc_punct_chars, 0, sizeof (arc_punct_chars)); + arc_punct_chars['#'] = 1; + arc_punct_chars['*'] = 1; + arc_punct_chars['?'] = 1; + arc_punct_chars['!'] = 1; + arc_punct_chars['~'] = 1; +} + +/* The condition codes of the ARC, and the inverse function. */ +static const char *const arc_condition_codes[] = +{ + "al", 0, "eq", "ne", "p", "n", "c", "nc", "v", "nv", + "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0 +}; + +#define ARC_INVERSE_CONDITION_CODE(X) ((X) ^ 1) + +/* Returns the index of the ARC condition code string in + `arc_condition_codes'. COMPARISON should be an rtx like + `(eq (...) (...))'. */ + +static int +get_arc_condition_code (rtx comparison) +{ + switch (GET_CODE (comparison)) + { + case EQ : return 2; + case NE : return 3; + case GT : return 10; + case LE : return 11; + case GE : return 12; + case LT : return 13; + case GTU : return 14; + case LEU : return 15; + case LTU : return 6; + case GEU : return 7; + default : gcc_unreachable (); + } + /*NOTREACHED*/ + return (42); +} + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ + +enum machine_mode +arc_select_cc_mode (enum rtx_code op, + rtx x ATTRIBUTE_UNUSED, + rtx y ATTRIBUTE_UNUSED) +{ + switch (op) + { + case EQ : + case NE : + return CCZNmode; + default : + switch (GET_CODE (x)) + { + case AND : + case IOR : + case XOR : + case SIGN_EXTEND : + case ZERO_EXTEND : + return CCZNmode; + case ASHIFT : + case ASHIFTRT : + case LSHIFTRT : + return CCZNCmode; + default: + break; + } + } + return CCmode; +} + +/* Vectors to keep interesting information about registers where it can easily + be got. We use to use the actual mode value as the bit number, but there + is (or may be) more than 32 modes now. Instead we use two tables: one + indexed by hard register number, and one indexed by mode. */ + +/* The purpose of arc_mode_class is to shrink the range of modes so that + they all fit (as bit numbers) in a 32-bit word (again). Each real mode is + mapped into one arc_mode_class mode. */ + +enum arc_mode_class { + C_MODE, + S_MODE, D_MODE, T_MODE, O_MODE, + SF_MODE, DF_MODE, TF_MODE, OF_MODE +}; + +/* Modes for condition codes. */ +#define C_MODES (1 << (int) C_MODE) + +/* Modes for single-word and smaller quantities. */ +#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) + +/* Modes for double-word and smaller quantities. */ +#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) + +/* Modes for quad-word and smaller quantities. */ +#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) + +/* Value is 1 if register/mode pair is acceptable on arc. */ + +const unsigned int arc_hard_regno_mode_ok[] = { + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES, + D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + + /* ??? Leave these as S_MODES for now. */ + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES +}; + +unsigned int arc_mode_class [NUM_MACHINE_MODES]; + +enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER]; + +static void +arc_init_reg_tables (void) +{ + int i; + + for (i = 0; i < NUM_MACHINE_MODES; i++) + { + switch (GET_MODE_CLASS (i)) + { + case MODE_INT: + case MODE_PARTIAL_INT: + case MODE_COMPLEX_INT: + if (GET_MODE_SIZE (i) <= 4) + arc_mode_class[i] = 1 << (int) S_MODE; + else if (GET_MODE_SIZE (i) == 8) + arc_mode_class[i] = 1 << (int) D_MODE; + else if (GET_MODE_SIZE (i) == 16) + arc_mode_class[i] = 1 << (int) T_MODE; + else if (GET_MODE_SIZE (i) == 32) + arc_mode_class[i] = 1 << (int) O_MODE; + else + arc_mode_class[i] = 0; + break; + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + if (GET_MODE_SIZE (i) <= 4) + arc_mode_class[i] = 1 << (int) SF_MODE; + else if (GET_MODE_SIZE (i) == 8) + arc_mode_class[i] = 1 << (int) DF_MODE; + else if (GET_MODE_SIZE (i) == 16) + arc_mode_class[i] = 1 << (int) TF_MODE; + else if (GET_MODE_SIZE (i) == 32) + arc_mode_class[i] = 1 << (int) OF_MODE; + else + arc_mode_class[i] = 0; + break; + case MODE_CC: + arc_mode_class[i] = 1 << (int) C_MODE; + break; + default: + arc_mode_class[i] = 0; + break; + } + } + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (i < 60) + arc_regno_reg_class[i] = GENERAL_REGS; + else if (i == 60) + arc_regno_reg_class[i] = LPCOUNT_REG; + else if (i == 61) + arc_regno_reg_class[i] = NO_REGS /* CC_REG: must be NO_REGS */; + else + arc_regno_reg_class[i] = NO_REGS; + } +} + +/* ARC specific attribute support. + + The ARC has these attributes: + interrupt - for interrupt functions +*/ + +/* Handle an "interrupt" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +arc_handle_interrupt_attribute (tree *node ATTRIBUTE_UNUSED, + tree name, + tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree value = TREE_VALUE (args); + + if (TREE_CODE (value) != STRING_CST) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not a string constant", + name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (value), "ilink1") + && strcmp (TREE_STRING_POINTER (value), "ilink2")) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not \"ilink1\" or \"ilink2\"", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + + +/* Acceptable arguments to the call insn. */ + +int +call_address_operand (rtx op, enum machine_mode mode) +{ + return (symbolic_operand (op, mode) + || (GET_CODE (op) == CONST_INT && LEGITIMATE_CONSTANT_P (op)) + || (GET_CODE (op) == REG)); +} + +int +call_operand (rtx op, enum machine_mode mode) +{ + if (GET_CODE (op) != MEM) + return 0; + op = XEXP (op, 0); + return call_address_operand (op, mode); +} + +/* Returns 1 if OP is a symbol reference. */ + +int +symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + case LABEL_REF: + case CONST : + return 1; + default: + return 0; + } +} + +/* Return truth value of statement that OP is a symbolic memory + operand of mode MODE. */ + +int +symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (GET_CODE (op) != MEM) + return 0; + op = XEXP (op, 0); + return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST + || GET_CODE (op) == LABEL_REF); +} + +/* Return true if OP is a short immediate (shimm) value. */ + +int +short_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (GET_CODE (op) != CONST_INT) + return 0; + return SMALL_INT (INTVAL (op)); +} + +/* Return true if OP will require a long immediate (limm) value. + This is currently only used when calculating length attributes. */ + +int +long_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + return !SMALL_INT (INTVAL (op)); + case CONST_DOUBLE : + /* These can happen because large unsigned 32-bit constants are + represented this way (the multiplication patterns can cause these + to be generated). They also occur for SFmode values. */ + return 1; + default: + break; + } + return 0; +} + +/* Return true if OP is a MEM that when used as a load or store address will + require an 8 byte insn. + Load and store instructions don't allow the same possibilities but they're + similar enough that this one function will do. + This is currently only used when calculating length attributes. */ + +int +long_immediate_loadstore_operand (rtx op, + enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (GET_CODE (op) != MEM) + return 0; + + op = XEXP (op, 0); + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + /* This must be handled as "st c,[limm]". Ditto for load. + Technically, the assembler could translate some possibilities to + "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't + assume that it does. */ + return 1; + case CONST_DOUBLE : + /* These can happen because large unsigned 32-bit constants are + represented this way (the multiplication patterns can cause these + to be generated). They also occur for SFmode values. */ + return 1; + case REG : + return 0; + case PLUS : + if (GET_CODE (XEXP (op, 1)) == CONST_INT + && !SMALL_INT (INTVAL (XEXP (op, 1)))) + return 1; + return 0; + default: + break; + } + return 0; +} + +/* Return true if OP is an acceptable argument for a single word + move source. */ + +int +move_src_operand (rtx op, enum machine_mode mode) +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + return (LARGE_INT (INTVAL (op))); + case CONST_DOUBLE : + /* We can handle DImode integer constants in SImode if the value + (signed or unsigned) will fit in 32 bits. This is needed because + large unsigned 32-bit constants are represented as CONST_DOUBLEs. */ + if (mode == SImode) + return arc_double_limm_p (op); + /* We can handle 32-bit floating point constants. */ + if (mode == SFmode) + return GET_MODE (op) == SFmode; + return 0; + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + default : + return 0; + } +} + +/* Return true if OP is an acceptable argument for a double word + move source. */ + +int +move_double_src_operand (rtx op, enum machine_mode mode) +{ + switch (GET_CODE (op)) + { + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return move_double_src_operand (SUBREG_REG (op), mode); + else + return register_operand (op, mode); + case MEM : + /* Disallow auto inc/dec for now. */ + if (GET_CODE (XEXP (op, 0)) == PRE_DEC + || GET_CODE (XEXP (op, 0)) == PRE_INC) + return 0; + return address_operand (XEXP (op, 0), mode); + case CONST_INT : + case CONST_DOUBLE : + return 1; + default : + return 0; + } +} + +/* Return true if OP is an acceptable argument for a move destination. */ + +int +move_dest_operand (rtx op, enum machine_mode mode) +{ + switch (GET_CODE (op)) + { + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + default : + return 0; + } +} + +/* Return true if OP is valid load with update operand. */ + +int +load_update_operand (rtx op, enum machine_mode mode) +{ + if (GET_CODE (op) != MEM + || GET_MODE (op) != mode) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_MODE (op) != Pmode + || !register_operand (XEXP (op, 0), Pmode) + || !nonmemory_operand (XEXP (op, 1), Pmode)) + return 0; + return 1; +} + +/* Return true if OP is valid store with update operand. */ + +int +store_update_operand (rtx op, enum machine_mode mode) +{ + if (GET_CODE (op) != MEM + || GET_MODE (op) != mode) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_MODE (op) != Pmode + || !register_operand (XEXP (op, 0), Pmode) + || !(GET_CODE (XEXP (op, 1)) == CONST_INT + && SMALL_INT (INTVAL (XEXP (op, 1))))) + return 0; + return 1; +} + +/* Return true if OP is a non-volatile non-immediate operand. + Volatile memory refs require a special "cache-bypass" instruction + and only the standard movXX patterns are set up to handle them. */ + +int +nonvol_nonimm_operand (rtx op, enum machine_mode mode) +{ + if (GET_CODE (op) == MEM && MEM_VOLATILE_P (op)) + return 0; + return nonimmediate_operand (op, mode); +} + +/* Accept integer operands in the range -0x80000000..0x7fffffff. We have + to check the range carefully since this predicate is used in DImode + contexts. */ + +int +const_sint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + /* All allowed constants will fit a CONST_INT. */ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) >= (-0x7fffffff - 1) && INTVAL (op) <= 0x7fffffff)); +} + +/* Accept integer operands in the range 0..0xffffffff. We have to check the + range carefully since this predicate is used in DImode contexts. Also, we + need some extra crud to make it work when hosted on 64-bit machines. */ + +int +const_uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ +#if HOST_BITS_PER_WIDE_INT > 32 + /* All allowed constants will fit a CONST_INT. */ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) <= 0xffffffffL)); +#else + return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0) + || (GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_HIGH (op) == 0)); +#endif +} + +/* Return 1 if OP is a comparison operator valid for the mode of CC. + This allows the use of MATCH_OPERATOR to recognize all the branch insns. + + Some insns only set a few bits in the condition code. So only allow those + comparisons that use the bits that are valid. */ + +int +proper_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + enum rtx_code code; + if (!COMPARISON_P (op)) + return 0; + + code = GET_CODE (op); + if (GET_MODE (XEXP (op, 0)) == CCZNmode) + return (code == EQ || code == NE); + if (GET_MODE (XEXP (op, 0)) == CCZNCmode) + return (code == EQ || code == NE + || code == LTU || code == GEU || code == GTU || code == LEU); + return 1; +} + +/* Misc. utilities. */ + +/* X and Y are two things to compare using CODE. Return the rtx + for the cc reg in the proper mode. */ + +rtx +gen_compare_reg (enum rtx_code code, rtx x, rtx y) +{ + enum machine_mode mode = SELECT_CC_MODE (code, x, y); + return gen_rtx_REG (mode, 61); +} + +/* Return 1 if VALUE, a const_double, will fit in a limm (4 byte number). + We assume the value can be either signed or unsigned. */ + +int +arc_double_limm_p (rtx value) +{ + HOST_WIDE_INT low, high; + + gcc_assert (GET_CODE (value) == CONST_DOUBLE); + + low = CONST_DOUBLE_LOW (value); + high = CONST_DOUBLE_HIGH (value); + + if (low & 0x80000000) + { + return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0) + || (((low & - (unsigned HOST_WIDE_INT) 0x80000000) + == - (unsigned HOST_WIDE_INT) 0x80000000) + && high == -1)); + } + else + { + return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0; + } +} + +/* Do any needed setup for a variadic function. For the ARC, we must + create a register parameter block, and then copy any anonymous arguments + in registers to memory. + + CUM has not been updated for the last named argument which has type TYPE + and mode MODE, and we rely on this fact. + + We do things a little weird here. We're supposed to only allocate space + for the anonymous arguments. However we need to keep the stack eight byte + aligned. So we round the space up if necessary, and leave it to va_start + to compensate. */ + +static void +arc_setup_incoming_varargs (CUMULATIVE_ARGS *cum, + enum machine_mode mode, + tree type ATTRIBUTE_UNUSED, + int *pretend_size, + int no_rtl) +{ + int first_anon_arg; + + /* All BLKmode values are passed by reference. */ + gcc_assert (mode != BLKmode); + + first_anon_arg = *cum + ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + + if (first_anon_arg < MAX_ARC_PARM_REGS && !no_rtl) + { + /* Note that first_reg_offset < MAX_ARC_PARM_REGS. */ + int first_reg_offset = first_anon_arg; + /* Size in words to "pretend" allocate. */ + int size = MAX_ARC_PARM_REGS - first_reg_offset; + /* Extra slop to keep stack eight byte aligned. */ + int align_slop = size & 1; + rtx regblock; + + regblock = gen_rtx_MEM (BLKmode, + plus_constant (arg_pointer_rtx, + FIRST_PARM_OFFSET (0) + + align_slop * UNITS_PER_WORD)); + set_mem_alias_set (regblock, get_varargs_alias_set ()); + set_mem_align (regblock, BITS_PER_WORD); + move_block_from_reg (first_reg_offset, regblock, + MAX_ARC_PARM_REGS - first_reg_offset); + + *pretend_size = ((MAX_ARC_PARM_REGS - first_reg_offset + align_slop) + * UNITS_PER_WORD); + } +} + +/* Cost functions. */ + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed ATTRIBUTE_UNUSED) +{ + switch (code) + { + /* Small integers are as cheap as registers. 4 byte values can + be fetched as immediate constants - let's give that the cost + of an extra insn. */ + case CONST_INT: + if (SMALL_INT (INTVAL (x))) + { + *total = 0; + return true; + } + /* FALLTHRU */ + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (1); + return true; + + case CONST_DOUBLE: + { + rtx high, low; + split_double (x, &high, &low); + *total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high)) + + !SMALL_INT (INTVAL (low))); + return true; + } + + /* Encourage synth_mult to find a synthetic multiply when reasonable. + If we need more than 12 insns to do a multiply, then go out-of-line, + since the call overhead will be < 10% of the cost of the multiply. */ + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (TARGET_SHIFTER) + *total = COSTS_N_INSNS (1); + else if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total = COSTS_N_INSNS (16); + else + *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1))); + return false; + + default: + return false; + } +} + + +/* Provide the costs of an addressing mode that contains ADDR. + If ADDR is not a valid address, its cost is irrelevant. */ + +static int +arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) +{ + switch (GET_CODE (addr)) + { + case REG : + return 1; + + case LABEL_REF : + case SYMBOL_REF : + case CONST : + return 2; + + case PLUS : + { + register rtx plus0 = XEXP (addr, 0); + register rtx plus1 = XEXP (addr, 1); + + if (GET_CODE (plus0) != REG) + break; + + switch (GET_CODE (plus1)) + { + case CONST_INT : + return SMALL_INT (INTVAL (plus1)) ? 1 : 2; + case CONST : + case SYMBOL_REF : + case LABEL_REF : + return 2; + default: + break; + } + break; + } + default: + break; + } + + return 4; +} + +/* Function prologue/epilogue handlers. */ + +/* ARC stack frames look like: + + Before call After call + +-----------------------+ +-----------------------+ + | | | | + high | local variables, | | local variables, | + mem | reg save area, etc. | | reg save area, etc. | + | | | | + +-----------------------+ +-----------------------+ + | | | | + | arguments on stack. | | arguments on stack. | + | | | | + SP+16->+-----------------------+FP+48->+-----------------------+ + | 4 word save area for | | reg parm save area, | + | return addr, prev %fp | | only created for | + SP+0->+-----------------------+ | variable argument | + | functions | + FP+16->+-----------------------+ + | 4 word save area for | + | return addr, prev %fp | + FP+0->+-----------------------+ + | | + | local variables | + | | + +-----------------------+ + | | + | register save area | + | | + +-----------------------+ + | | + | alloca allocations | + | | + +-----------------------+ + | | + | arguments on stack | + | | + SP+16->+-----------------------+ + low | 4 word save area for | + memory | return addr, prev %fp | + SP+0->+-----------------------+ + +Notes: +1) The "reg parm save area" does not exist for non variable argument fns. + The "reg parm save area" can be eliminated completely if we created our + own va-arc.h, but that has tradeoffs as well (so it's not done). */ + +/* Structure to be filled in by arc_compute_frame_size with register + save masks, and offsets for the current function. */ +struct arc_frame_info +{ + unsigned int total_size; /* # bytes that the entire frame takes up. */ + unsigned int extra_size; /* # bytes of extra stuff. */ + unsigned int pretend_size; /* # bytes we push and pretend caller did. */ + unsigned int args_size; /* # bytes that outgoing arguments take up. */ + unsigned int reg_size; /* # bytes needed to store regs. */ + unsigned int var_size; /* # bytes that variables take up. */ + unsigned int reg_offset; /* Offset from new sp to store regs. */ + unsigned int gmask; /* Mask of saved gp registers. */ + int initialized; /* Nonzero if frame size already calculated. */ +}; + +/* Current frame information calculated by arc_compute_frame_size. */ +static struct arc_frame_info current_frame_info; + +/* Zero structure to initialize current_frame_info. */ +static struct arc_frame_info zero_frame_info; + +/* Type of function DECL. + + The result is cached. To reset the cache at the end of a function, + call with DECL = NULL_TREE. */ + +enum arc_function_type +arc_compute_function_type (tree decl) +{ + tree a; + /* Cached value. */ + static enum arc_function_type fn_type = ARC_FUNCTION_UNKNOWN; + /* Last function we were called for. */ + static tree last_fn = NULL_TREE; + + /* Resetting the cached value? */ + if (decl == NULL_TREE) + { + fn_type = ARC_FUNCTION_UNKNOWN; + last_fn = NULL_TREE; + return fn_type; + } + + if (decl == last_fn && fn_type != ARC_FUNCTION_UNKNOWN) + return fn_type; + + /* Assume we have a normal function (not an interrupt handler). */ + fn_type = ARC_FUNCTION_NORMAL; + + /* Now see if this is an interrupt handler. */ + for (a = DECL_ATTRIBUTES (current_function_decl); + a; + a = TREE_CHAIN (a)) + { + tree name = TREE_PURPOSE (a), args = TREE_VALUE (a); + + if (name == get_identifier ("__interrupt__") + && list_length (args) == 1 + && TREE_CODE (TREE_VALUE (args)) == STRING_CST) + { + tree value = TREE_VALUE (args); + + if (!strcmp (TREE_STRING_POINTER (value), "ilink1")) + fn_type = ARC_FUNCTION_ILINK1; + else if (!strcmp (TREE_STRING_POINTER (value), "ilink2")) + fn_type = ARC_FUNCTION_ILINK2; + else + gcc_unreachable (); + break; + } + } + + last_fn = decl; + return fn_type; +} + +#define ILINK1_REGNUM 29 +#define ILINK2_REGNUM 30 +#define RETURN_ADDR_REGNUM 31 +#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) +#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM)) + +/* Tell prologue and epilogue if register REGNO should be saved / restored. + The return address and frame pointer are treated separately. + Don't consider them here. */ +#define MUST_SAVE_REGISTER(regno, interrupt_p) \ +((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \ + && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) + +#define MUST_SAVE_RETURN_ADDR (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) + +/* Return the bytes needed to compute the frame pointer from the current + stack pointer. + + SIZE is the size needed for local variables. */ + +unsigned int +arc_compute_frame_size (int size /* # of var. bytes allocated. */) +{ + int regno; + unsigned int total_size, var_size, args_size, pretend_size, extra_size; + unsigned int reg_size, reg_offset; + unsigned int gmask; + enum arc_function_type fn_type; + int interrupt_p; + + var_size = size; + args_size = crtl->outgoing_args_size; + pretend_size = crtl->args.pretend_args_size; + extra_size = FIRST_PARM_OFFSET (0); + total_size = extra_size + pretend_size + args_size + var_size; + reg_offset = FIRST_PARM_OFFSET(0) + crtl->outgoing_args_size; + reg_size = 0; + gmask = 0; + + /* See if this is an interrupt handler. Call used registers must be saved + for them too. */ + fn_type = arc_compute_function_type (current_function_decl); + interrupt_p = ARC_INTERRUPT_P (fn_type); + + /* Calculate space needed for registers. + ??? We ignore the extension registers for now. */ + + for (regno = 0; regno <= 31; regno++) + { + if (MUST_SAVE_REGISTER (regno, interrupt_p)) + { + reg_size += UNITS_PER_WORD; + gmask |= 1 << regno; + } + } + + total_size += reg_size; + + /* If the only space to allocate is the fp/blink save area this is an + empty frame. However, if we'll be making a function call we need to + allocate a stack frame for our callee's fp/blink save area. */ + if (total_size == extra_size + && !MUST_SAVE_RETURN_ADDR) + total_size = extra_size = 0; + + total_size = ARC_STACK_ALIGN (total_size); + + /* Save computed information. */ + current_frame_info.total_size = total_size; + current_frame_info.extra_size = extra_size; + current_frame_info.pretend_size = pretend_size; + current_frame_info.var_size = var_size; + current_frame_info.args_size = args_size; + current_frame_info.reg_size = reg_size; + current_frame_info.reg_offset = reg_offset; + current_frame_info.gmask = gmask; + current_frame_info.initialized = reload_completed; + + /* Ok, we're done. */ + return total_size; +} + +/* Common code to save/restore registers. */ + +void +arc_save_restore (FILE *file, + const char *base_reg, + unsigned int offset, + unsigned int gmask, + const char *op) +{ + int regno; + + if (gmask == 0) + return; + + for (regno = 0; regno <= 31; regno++) + { + if ((gmask & (1L << regno)) != 0) + { + fprintf (file, "\t%s %s,[%s,%d]\n", + op, reg_names[regno], base_reg, offset); + offset += UNITS_PER_WORD; + } + } +} + +/* Target hook to assemble an integer object. The ARC version needs to + emit a special directive for references to labels and function + symbols. */ + +static bool +arc_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + if (size == UNITS_PER_WORD && aligned_p + && ((GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x)) + || GET_CODE (x) == LABEL_REF)) + { + fputs ("\t.word\t%st(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")\n", asm_out_file); + return true; + } + return default_assemble_integer (x, size, aligned_p); +} + +/* Set up the stack and frame pointer (if desired) for the function. */ + +static void +arc_output_function_prologue (FILE *file, HOST_WIDE_INT size) +{ + const char *sp_str = reg_names[STACK_POINTER_REGNUM]; + const char *fp_str = reg_names[FRAME_POINTER_REGNUM]; + unsigned int gmask = current_frame_info.gmask; + enum arc_function_type fn_type = arc_compute_function_type (current_function_decl); + + /* If this is an interrupt handler, set up our stack frame. + ??? Optimize later. */ + if (ARC_INTERRUPT_P (fn_type)) + { + fprintf (file, "\t%s interrupt handler\n", + ASM_COMMENT_START); + fprintf (file, "\tsub %s,%s,16\n", sp_str, sp_str); + } + + /* This is only for the human reader. */ + fprintf (file, "\t%s BEGIN PROLOGUE %s vars= %d, regs= %d, args= %d, extra= %d\n", + ASM_COMMENT_START, ASM_COMMENT_START, + current_frame_info.var_size, + current_frame_info.reg_size / 4, + current_frame_info.args_size, + current_frame_info.extra_size); + + size = ARC_STACK_ALIGN (size); + size = (! current_frame_info.initialized + ? arc_compute_frame_size (size) + : current_frame_info.total_size); + + /* These cases shouldn't happen. Catch them now. */ + gcc_assert (size || !gmask); + + /* Allocate space for register arguments if this is a variadic function. */ + if (current_frame_info.pretend_size != 0) + fprintf (file, "\tsub %s,%s,%d\n", + sp_str, sp_str, current_frame_info.pretend_size); + + /* The home-grown ABI says link register is saved first. */ + if (MUST_SAVE_RETURN_ADDR) + fprintf (file, "\tst %s,[%s,%d]\n", + reg_names[RETURN_ADDR_REGNUM], sp_str, UNITS_PER_WORD); + + /* Set up the previous frame pointer next (if we need to). */ + if (frame_pointer_needed) + { + fprintf (file, "\tst %s,[%s]\n", fp_str, sp_str); + fprintf (file, "\tmov %s,%s\n", fp_str, sp_str); + } + + /* ??? We don't handle the case where the saved regs are more than 252 + bytes away from sp. This can be handled by decrementing sp once, saving + the regs, and then decrementing it again. The epilogue doesn't have this + problem as the `ld' insn takes reg+limm values (though it would be more + efficient to avoid reg+limm). */ + + /* Allocate the stack frame. */ + if (size - current_frame_info.pretend_size > 0) + fprintf (file, "\tsub %s,%s," HOST_WIDE_INT_PRINT_DEC "\n", + sp_str, sp_str, size - current_frame_info.pretend_size); + + /* Save any needed call-saved regs (and call-used if this is an + interrupt handler). */ + arc_save_restore (file, sp_str, current_frame_info.reg_offset, + /* The zeroing of these two bits is unnecessary, + but leave this in for clarity. */ + gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), + "st"); + + fprintf (file, "\t%s END PROLOGUE\n", ASM_COMMENT_START); +} + +/* Do any necessary cleanup after a function to restore stack, frame, + and regs. */ + +static void +arc_output_function_epilogue (FILE *file, HOST_WIDE_INT size) +{ + rtx epilogue_delay = crtl->epilogue_delay_list; + int noepilogue = FALSE; + enum arc_function_type fn_type = arc_compute_function_type (current_function_decl); + + /* This is only for the human reader. */ + fprintf (file, "\t%s EPILOGUE\n", ASM_COMMENT_START); + + size = ARC_STACK_ALIGN (size); + size = (!current_frame_info.initialized + ? arc_compute_frame_size (size) + : current_frame_info.total_size); + + if (size == 0 && epilogue_delay == 0) + { + rtx insn = get_last_insn (); + + /* If the last insn was a BARRIER, we don't have to write any code + because a jump (aka return) was put there. */ + if (GET_CODE (insn) == NOTE) + insn = prev_nonnote_insn (insn); + if (insn && GET_CODE (insn) == BARRIER) + noepilogue = TRUE; + } + + if (!noepilogue) + { + unsigned int pretend_size = current_frame_info.pretend_size; + unsigned int frame_size = size - pretend_size; + int restored, fp_restored_p; + int can_trust_sp_p = !cfun->calls_alloca; + const char *sp_str = reg_names[STACK_POINTER_REGNUM]; + const char *fp_str = reg_names[FRAME_POINTER_REGNUM]; + + /* ??? There are lots of optimizations that can be done here. + EG: Use fp to restore regs if it's closer. + Maybe in time we'll do them all. For now, always restore regs from + sp, but don't restore sp if we don't have to. */ + + if (!can_trust_sp_p) + { + gcc_assert (frame_pointer_needed); + fprintf (file,"\tsub %s,%s,%d\t\t%s sp not trusted here\n", + sp_str, fp_str, frame_size, ASM_COMMENT_START); + } + + /* Restore any saved registers. */ + arc_save_restore (file, sp_str, current_frame_info.reg_offset, + /* The zeroing of these two bits is unnecessary, + but leave this in for clarity. */ + current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), + "ld"); + + if (MUST_SAVE_RETURN_ADDR) + fprintf (file, "\tld %s,[%s,%d]\n", + reg_names[RETURN_ADDR_REGNUM], + frame_pointer_needed ? fp_str : sp_str, + UNITS_PER_WORD + (frame_pointer_needed ? 0 : frame_size)); + + /* Keep track of how much of the stack pointer we've restored. + It makes the following a lot more readable. */ + restored = 0; + fp_restored_p = 0; + + /* We try to emit the epilogue delay slot insn right after the load + of the return address register so that it can execute with the + stack intact. Secondly, loads are delayed. */ + /* ??? If stack intactness is important, always emit now. */ + if (MUST_SAVE_RETURN_ADDR && epilogue_delay != NULL_RTX) + { + final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL); + epilogue_delay = NULL_RTX; + } + + if (frame_pointer_needed) + { + /* Try to restore the frame pointer in the delay slot. We can't, + however, if any of these is true. */ + if (epilogue_delay != NULL_RTX + || !SMALL_INT (frame_size) + || pretend_size + || ARC_INTERRUPT_P (fn_type)) + { + /* Note that we restore fp and sp here! */ + fprintf (file, "\tld.a %s,[%s,%d]\n", fp_str, sp_str, frame_size); + restored += frame_size; + fp_restored_p = 1; + } + } + else if (!SMALL_INT (size /* frame_size + pretend_size */) + || ARC_INTERRUPT_P (fn_type)) + { + fprintf (file, "\tadd %s,%s,%d\n", sp_str, sp_str, frame_size); + restored += frame_size; + } + + /* These must be done before the return insn because the delay slot + does the final stack restore. */ + if (ARC_INTERRUPT_P (fn_type)) + { + if (epilogue_delay) + { + final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL); + } + } + + /* Emit the return instruction. */ + { + static const int regs[4] = { + 0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM + }; + + /* Update the flags, if returning from an interrupt handler. */ + if (ARC_INTERRUPT_P (fn_type)) + fprintf (file, "\tj.d.f %s\n", reg_names[regs[fn_type]]); + else + fprintf (file, "\tj.d %s\n", reg_names[regs[fn_type]]); + } + + /* If the only register saved is the return address, we need a + nop, unless we have an instruction to put into it. Otherwise + we don't since reloading multiple registers doesn't reference + the register being loaded. */ + + if (ARC_INTERRUPT_P (fn_type)) + fprintf (file, "\tadd %s,%s,16\n", sp_str, sp_str); + else if (epilogue_delay != NULL_RTX) + { + gcc_assert (!frame_pointer_needed || fp_restored_p); + gcc_assert (restored >= size); + final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL); + } + else if (frame_pointer_needed && !fp_restored_p) + { + gcc_assert (SMALL_INT (frame_size)); + /* Note that we restore fp and sp here! */ + fprintf (file, "\tld.a %s,[%s,%d]\n", fp_str, sp_str, frame_size); + } + else if (restored < size) + { + gcc_assert (SMALL_INT (size - restored)); + fprintf (file, "\tadd %s,%s," HOST_WIDE_INT_PRINT_DEC "\n", + sp_str, sp_str, size - restored); + } + else + fprintf (file, "\tnop\n"); + } + + /* Reset state info for each function. */ + current_frame_info = zero_frame_info; + arc_compute_function_type (NULL_TREE); +} + +/* Define the number of delay slots needed for the function epilogue. + + Interrupt handlers can't have any epilogue delay slots (it's always needed + for something else, I think). For normal functions, we have to worry about + using call-saved regs as they'll be restored before the delay slot insn. + Functions with non-empty frames already have enough choices for the epilogue + delay slot so for now we only consider functions with empty frames. */ + +int +arc_delay_slots_for_epilogue (void) +{ + if (arc_compute_function_type (current_function_decl) != ARC_FUNCTION_NORMAL) + return 0; + if (!current_frame_info.initialized) + (void) arc_compute_frame_size (get_frame_size ()); + if (current_frame_info.total_size == 0) + return 1; + return 0; +} + +/* Return true if TRIAL is a valid insn for the epilogue delay slot. + Any single length instruction which doesn't reference the stack or frame + pointer or any call-saved register is OK. SLOT will always be 0. */ + +int +arc_eligible_for_epilogue_delay (rtx trial, int slot) +{ + gcc_assert (!slot); + + if (get_attr_length (trial) == 1 + /* If registers where saved, presumably there's more than enough + possibilities for the delay slot. The alternative is something + more complicated (of course, if we expanded the epilogue as rtl + this problem would go away). */ + /* ??? Note that this will always be true since only functions with + empty frames have epilogue delay slots. See + arc_delay_slots_for_epilogue. */ + && current_frame_info.gmask == 0 + && ! reg_mentioned_p (stack_pointer_rtx, PATTERN (trial)) + && ! reg_mentioned_p (frame_pointer_rtx, PATTERN (trial))) + return 1; + return 0; +} + +/* Return true if OP is a shift operator. */ + +int +shift_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + switch (GET_CODE (op)) + { + case ASHIFTRT: + case LSHIFTRT: + case ASHIFT: + return 1; + default: + return 0; + } +} + +/* Output the assembler code for doing a shift. + We go to a bit of trouble to generate efficient code as the ARC only has + single bit shifts. This is taken from the h8300 port. We only have one + mode of shifting and can't access individual bytes like the h8300 can, so + this is greatly simplified (at the expense of not generating hyper- + efficient code). + + This function is not used if the variable shift insns are present. */ + +/* ??? We assume the output operand is the same as operand 1. + This can be optimized (deleted) in the case of 1 bit shifts. */ +/* ??? We use the loop register here. We don't use it elsewhere (yet) and + using it here will give us a chance to play with it. */ + +const char * +output_shift (rtx *operands) +{ + rtx shift = operands[3]; + enum machine_mode mode = GET_MODE (shift); + enum rtx_code code = GET_CODE (shift); + const char *shift_one; + + gcc_assert (mode == SImode); + + switch (code) + { + case ASHIFT: shift_one = "asl %0,%0"; break; + case ASHIFTRT: shift_one = "asr %0,%0"; break; + case LSHIFTRT: shift_one = "lsr %0,%0"; break; + default: gcc_unreachable (); + } + + if (GET_CODE (operands[2]) != CONST_INT) + { + if (optimize) + { + output_asm_insn ("sub.f 0,%2,0", operands); + output_asm_insn ("mov lp_count,%2", operands); + output_asm_insn ("bz 2f", operands); + } + else + output_asm_insn ("mov %4,%2", operands); + goto shiftloop; + } + else + { + int n; + + /* If the count is negative, make it 0. */ + n = INTVAL (operands[2]); + if (n < 0) + n = 0; + /* If the count is too big, truncate it. + ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to + do the intuitive thing. */ + else if (n > GET_MODE_BITSIZE (mode)) + n = GET_MODE_BITSIZE (mode); + + /* First see if we can do them inline. */ + if (n <= 8) + { + while (--n >= 0) + output_asm_insn (shift_one, operands); + } + /* See if we can use a rotate/and. */ + else if (n == BITS_PER_WORD - 1) + { + switch (code) + { + case ASHIFT : + output_asm_insn ("and %0,%0,1\n\tror %0,%0", operands); + break; + case ASHIFTRT : + /* The ARC doesn't have a rol insn. Use something else. */ + output_asm_insn ("asl.f 0,%0\n\tsbc %0,0,0", operands); + break; + case LSHIFTRT : + /* The ARC doesn't have a rol insn. Use something else. */ + output_asm_insn ("asl.f 0,%0\n\tadc %0,0,0", operands); + break; + default: + break; + } + } + /* Must loop. */ + else + { + char buf[100]; + + if (optimize) + output_asm_insn ("mov lp_count,%c2", operands); + else + output_asm_insn ("mov %4,%c2", operands); + shiftloop: + if (optimize) + { + if (flag_pic) + sprintf (buf, "lr %%4,[status]\n\tadd %%4,%%4,6\t%s single insn loop start", + ASM_COMMENT_START); + else + sprintf (buf, "mov %%4,%%%%st(1f)\t%s (single insn loop start) >> 2", + ASM_COMMENT_START); + output_asm_insn (buf, operands); + output_asm_insn ("sr %4,[lp_start]", operands); + output_asm_insn ("add %4,%4,1", operands); + output_asm_insn ("sr %4,[lp_end]", operands); + output_asm_insn ("nop\n\tnop", operands); + if (flag_pic) + fprintf (asm_out_file, "\t%s single insn loop\n", + ASM_COMMENT_START); + else + fprintf (asm_out_file, "1:\t%s single insn loop\n", + ASM_COMMENT_START); + output_asm_insn (shift_one, operands); + fprintf (asm_out_file, "2:\t%s end single insn loop\n", + ASM_COMMENT_START); + } + else + { + fprintf (asm_out_file, "1:\t%s begin shift loop\n", + ASM_COMMENT_START); + output_asm_insn ("sub.f %4,%4,1", operands); + output_asm_insn ("nop", operands); + output_asm_insn ("bn.nd 2f", operands); + output_asm_insn (shift_one, operands); + output_asm_insn ("b.nd 1b", operands); + fprintf (asm_out_file, "2:\t%s end shift loop\n", + ASM_COMMENT_START); + } + } + } + + return ""; +} + +/* Nested function support. */ + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ + +void +arc_initialize_trampoline (rtx tramp ATTRIBUTE_UNUSED, + rtx fnaddr ATTRIBUTE_UNUSED, + rtx cxt ATTRIBUTE_UNUSED) +{ +} + +/* Set the cpu type and print out other fancy things, + at the top of the file. */ + +static void +arc_file_start (void) +{ + default_file_start (); + fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string); +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +void +arc_print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case '#' : + /* Conditional branches. For now these are equivalent. */ + case '*' : + /* Unconditional branches. Output the appropriate delay slot suffix. */ + if (!final_sequence || XVECLEN (final_sequence, 0) == 1) + { + /* There's nothing in the delay slot. */ + fputs (".nd", file); + } + else + { + rtx jump = XVECEXP (final_sequence, 0, 0); + rtx delay = XVECEXP (final_sequence, 0, 1); + if (INSN_ANNULLED_BRANCH_P (jump)) + fputs (INSN_FROM_TARGET_P (delay) ? ".jd" : ".nd", file); + else + fputs (".d", file); + } + return; + case '?' : /* with leading "." */ + case '!' : /* without leading "." */ + /* This insn can be conditionally executed. See if the ccfsm machinery + says it should be conditionalized. */ + if (arc_ccfsm_state == 3 || arc_ccfsm_state == 4) + { + /* Is this insn in a delay slot? */ + if (final_sequence && XVECLEN (final_sequence, 0) == 2) + { + rtx insn = XVECEXP (final_sequence, 0, 1); + + /* If the insn is annulled and is from the target path, we need + to inverse the condition test. */ + if (INSN_ANNULLED_BRANCH_P (insn)) + { + if (INSN_FROM_TARGET_P (insn)) + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current_cc)]); + else + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[arc_ccfsm_current_cc]); + } + else + { + /* This insn is executed for either path, so don't + conditionalize it at all. */ + ; /* nothing to do */ + } + } + else + { + /* This insn isn't in a delay slot. */ + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[arc_ccfsm_current_cc]); + } + } + return; + case '~' : + /* Output a nop if we're between a set of the condition codes, + and a conditional branch. */ + if (last_insn_set_cc_p) + fputs ("nop\n\t", file); + return; + case 'd' : + fputs (arc_condition_codes[get_arc_condition_code (x)], file); + return; + case 'D' : + fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE + (get_arc_condition_code (x))], + file); + return; + case 'R' : + /* Write second word of DImode or DFmode reference, + register or memory. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)+1], file); + else if (GET_CODE (x) == MEM) + { + fputc ('[', file); + /* Handle possible auto-increment. Since it is pre-increment and + we have already done it, we can just use an offset of four. */ + /* ??? This is taken from rs6000.c I think. I don't think it is + currently necessary, but keep it around. */ + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC) + output_address (plus_constant (XEXP (XEXP (x, 0), 0), 4)); + else + output_address (plus_constant (XEXP (x, 0), 4)); + fputc (']', file); + } + else + output_operand_lossage ("invalid operand to %%R code"); + return; + case 'S' : + if ((GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x)) + || GET_CODE (x) == LABEL_REF) + { + fprintf (file, "%%st("); + output_addr_const (file, x); + fprintf (file, ")"); + return; + } + break; + case 'H' : + case 'L' : + if (GET_CODE (x) == REG) + { + /* L = least significant word, H = most significant word */ + if ((TARGET_BIG_ENDIAN != 0) ^ (code == 'L')) + fputs (reg_names[REGNO (x)], file); + else + fputs (reg_names[REGNO (x)+1], file); + } + else if (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE) + { + rtx first, second; + + split_double (x, &first, &second); + fprintf (file, "0x%08lx", + (long)(code == 'L' ? INTVAL (first) : INTVAL (second))); + } + else + output_operand_lossage ("invalid operand to %%H/%%L code"); + return; + case 'A' : + { + char str[30]; + + gcc_assert (GET_CODE (x) == CONST_DOUBLE + && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT); + + real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1); + fprintf (file, "%s", str); + return; + } + case 'U' : + /* Output a load/store with update indicator if appropriate. */ + if (GET_CODE (x) == MEM) + { + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC) + fputs (".a", file); + } + else + output_operand_lossage ("invalid operand to %%U code"); + return; + case 'V' : + /* Output cache bypass indicator for a load/store insn. Volatile memory + refs are defined to use the cache bypass mechanism. */ + if (GET_CODE (x) == MEM) + { + if (MEM_VOLATILE_P (x)) + fputs (".di", file); + } + else + output_operand_lossage ("invalid operand to %%V code"); + return; + case 0 : + /* Do nothing special. */ + break; + default : + /* Unknown flag. */ + output_operand_lossage ("invalid operand output code"); + } + + switch (GET_CODE (x)) + { + case REG : + fputs (reg_names[REGNO (x)], file); + break; + case MEM : + fputc ('[', file); + if (GET_CODE (XEXP (x, 0)) == PRE_INC) + output_address (plus_constant (XEXP (XEXP (x, 0), 0), + GET_MODE_SIZE (GET_MODE (x)))); + else if (GET_CODE (XEXP (x, 0)) == PRE_DEC) + output_address (plus_constant (XEXP (XEXP (x, 0), 0), + - GET_MODE_SIZE (GET_MODE (x)))); + else + output_address (XEXP (x, 0)); + fputc (']', file); + break; + case CONST_DOUBLE : + /* We handle SFmode constants here as output_addr_const doesn't. */ + if (GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE d; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (d, x); + REAL_VALUE_TO_TARGET_SINGLE (d, l); + fprintf (file, "0x%08lx", l); + break; + } + /* Fall through. Let output_addr_const deal with it. */ + default : + output_addr_const (file, x); + break; + } +} + +/* Print a memory address as an operand to reference that memory location. */ + +void +arc_print_operand_address (FILE *file, rtx addr) +{ + register rtx base, index = 0; + int offset = 0; + + switch (GET_CODE (addr)) + { + case REG : + fputs (reg_names[REGNO (addr)], file); + break; + case SYMBOL_REF : + if (/*???*/ 0 && SYMBOL_REF_FUNCTION_P (addr)) + { + fprintf (file, "%%st("); + output_addr_const (file, addr); + fprintf (file, ")"); + } + else + output_addr_const (file, addr); + break; + case PLUS : + if (GET_CODE (XEXP (addr, 0)) == CONST_INT) + offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); + else if (GET_CODE (XEXP (addr, 1)) == CONST_INT) + offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); + else + base = XEXP (addr, 0), index = XEXP (addr, 1); + gcc_assert (GET_CODE (base) == REG); + fputs (reg_names[REGNO (base)], file); + if (index == 0) + { + if (offset != 0) + fprintf (file, ",%d", offset); + } + else + { + switch (GET_CODE (index)) + { + case REG: + fprintf (file, ",%s", reg_names[REGNO (index)]); + break; + case SYMBOL_REF: + fputc (',', file), output_addr_const (file, index); + break; + default: + gcc_unreachable (); + } + } + break; + case PRE_INC : + case PRE_DEC : + /* We shouldn't get here as we've lost the mode of the memory object + (which says how much to inc/dec by. */ + gcc_unreachable (); + break; + default : + output_addr_const (file, addr); + break; + } +} + +/* Update compare/branch separation marker. */ + +static void +record_cc_ref (rtx insn) +{ + last_insn_set_cc_p = current_insn_set_cc_p; + + switch (get_attr_cond (insn)) + { + case COND_SET : + case COND_SET_ZN : + case COND_SET_ZNC : + if (get_attr_length (insn) == 1) + current_insn_set_cc_p = 1; + else + current_insn_set_cc_p = 0; + break; + default : + current_insn_set_cc_p = 0; + break; + } +} + +/* Conditional execution support. + + This is based on the ARM port but for now is much simpler. + + A finite state machine takes care of noticing whether or not instructions + can be conditionally executed, and thus decrease execution time and code + size by deleting branch instructions. The fsm is controlled by + final_prescan_insn, and controls the actions of PRINT_OPERAND. The patterns + in the .md file for the branch insns also have a hand in this. */ + +/* The state of the fsm controlling condition codes are: + 0: normal, do nothing special + 1: don't output this insn + 2: don't output this insn + 3: make insns conditional + 4: make insns conditional + + State transitions (state->state by whom, under what condition): + 0 -> 1 final_prescan_insn, if insn is conditional branch + 0 -> 2 final_prescan_insn, if the `target' is an unconditional branch + 1 -> 3 branch patterns, after having not output the conditional branch + 2 -> 4 branch patterns, after having not output the conditional branch + 3 -> 0 (*targetm.asm_out.internal_label), if the `target' label is reached + (the target label has CODE_LABEL_NUMBER equal to + arc_ccfsm_target_label). + 4 -> 0 final_prescan_insn, if `target' unconditional branch is reached + + If the jump clobbers the conditions then we use states 2 and 4. + + A similar thing can be done with conditional return insns. + + We also handle separating branches from sets of the condition code. + This is done here because knowledge of the ccfsm state is required, + we may not be outputting the branch. */ + +void +arc_final_prescan_insn (rtx insn, + rtx *opvec ATTRIBUTE_UNUSED, + int noperands ATTRIBUTE_UNUSED) +{ + /* BODY will hold the body of INSN. */ + register rtx body = PATTERN (insn); + + /* This will be 1 if trying to repeat the trick (i.e.: do the `else' part of + an if/then/else), and things need to be reversed. */ + int reverse = 0; + + /* If we start with a return insn, we only succeed if we find another one. */ + int seeking_return = 0; + + /* START_INSN will hold the insn from where we start looking. This is the + first insn after the following code_label if REVERSE is true. */ + rtx start_insn = insn; + + /* Update compare/branch separation marker. */ + record_cc_ref (insn); + + /* Allow -mdebug-ccfsm to turn this off so we can see how well it does. + We can't do this in macro FINAL_PRESCAN_INSN because its called from + final_scan_insn which has `optimize' as a local. */ + if (optimize < 2 || TARGET_NO_COND_EXEC) + return; + + /* If in state 4, check if the target branch is reached, in order to + change back to state 0. */ + if (arc_ccfsm_state == 4) + { + if (insn == arc_ccfsm_target_insn) + { + arc_ccfsm_target_insn = NULL; + arc_ccfsm_state = 0; + } + return; + } + + /* If in state 3, it is possible to repeat the trick, if this insn is an + unconditional branch to a label, and immediately following this branch + is the previous target label which is only used once, and the label this + branch jumps to is not too far off. Or in other words "we've done the + `then' part, see if we can do the `else' part." */ + if (arc_ccfsm_state == 3) + { + if (simplejump_p (insn)) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + { + /* ??? Isn't this always a barrier? */ + start_insn = next_nonnote_insn (start_insn); + } + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == arc_ccfsm_target_label + && LABEL_NUSES (start_insn) == 1) + reverse = TRUE; + else + return; + } + else if (GET_CODE (body) == RETURN) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == arc_ccfsm_target_label + && LABEL_NUSES (start_insn) == 1) + { + reverse = TRUE; + seeking_return = 1; + } + else + return; + } + else + return; + } + + if (GET_CODE (insn) != JUMP_INSN) + return; + + /* This jump might be paralleled with a clobber of the condition codes, + the jump should always come first. */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) + body = XVECEXP (body, 0, 0); + + if (reverse + || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) + { + int insns_skipped = 0, fail = FALSE, succeed = FALSE; + /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ + int then_not_else = TRUE; + /* Nonzero if next insn must be the target label. */ + int next_must_be_target_label_p; + rtx this_insn = start_insn, label = 0; + + /* Register the insn jumped to. */ + if (reverse) + { + if (!seeking_return) + label = XEXP (SET_SRC (body), 0); + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) + label = XEXP (XEXP (SET_SRC (body), 1), 0); + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) + { + label = XEXP (XEXP (SET_SRC (body), 2), 0); + then_not_else = FALSE; + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN) + seeking_return = 1; + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN) + { + seeking_return = 1; + then_not_else = FALSE; + } + else + gcc_unreachable (); + + /* See how many insns this branch skips, and what kind of insns. If all + insns are okay, and the label or unconditional branch to the same + label is not too far away, succeed. */ + for (insns_skipped = 0, next_must_be_target_label_p = FALSE; + !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED; + insns_skipped++) + { + rtx scanbody; + + this_insn = next_nonnote_insn (this_insn); + if (!this_insn) + break; + + if (next_must_be_target_label_p) + { + if (GET_CODE (this_insn) == BARRIER) + continue; + if (GET_CODE (this_insn) == CODE_LABEL + && this_insn == label) + { + arc_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + } + + scanbody = PATTERN (this_insn); + + switch (GET_CODE (this_insn)) + { + case CODE_LABEL: + /* Succeed if it is the target label, otherwise fail since + control falls in from somewhere else. */ + if (this_insn == label) + { + arc_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case BARRIER: + /* Succeed if the following insn is the target label. + Otherwise fail. + If return insns are used then the last insn in a function + will be a barrier. */ + next_must_be_target_label_p = TRUE; + break; + + case CALL_INSN: + /* Can handle a call insn if there are no insns after it. + IE: The next "insn" is the target label. We don't have to + worry about delay slots as such insns are SEQUENCE's inside + INSN's. ??? It is possible to handle such insns though. */ + if (get_attr_cond (this_insn) == COND_CANUSE) + next_must_be_target_label_p = TRUE; + else + fail = TRUE; + break; + + case JUMP_INSN: + /* If this is an unconditional branch to the same label, succeed. + If it is to another label, do nothing. If it is conditional, + fail. */ + /* ??? Probably, the test for the SET and the PC are unnecessary. */ + + if (GET_CODE (scanbody) == SET + && GET_CODE (SET_DEST (scanbody)) == PC) + { + if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF + && XEXP (SET_SRC (scanbody), 0) == label && !reverse) + { + arc_ccfsm_state = 2; + succeed = TRUE; + } + else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) + fail = TRUE; + } + else if (GET_CODE (scanbody) == RETURN + && seeking_return) + { + arc_ccfsm_state = 2; + succeed = TRUE; + } + else if (GET_CODE (scanbody) == PARALLEL) + { + if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + break; + + case INSN: + /* We can only do this with insns that can use the condition + codes (and don't set them). */ + if (GET_CODE (scanbody) == SET + || GET_CODE (scanbody) == PARALLEL) + { + if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + /* We can't handle other insns like sequences. */ + else + fail = TRUE; + break; + + default: + break; + } + } + + if (succeed) + { + if ((!seeking_return) && (arc_ccfsm_state == 1 || reverse)) + arc_ccfsm_target_label = CODE_LABEL_NUMBER (label); + else + { + gcc_assert (seeking_return || arc_ccfsm_state == 2); + while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) + { + this_insn = next_nonnote_insn (this_insn); + gcc_assert (!this_insn + || (GET_CODE (this_insn) != BARRIER + && GET_CODE (this_insn) != CODE_LABEL)); + } + if (!this_insn) + { + /* Oh dear! we ran off the end, give up. */ + extract_insn_cached (insn); + arc_ccfsm_state = 0; + arc_ccfsm_target_insn = NULL; + return; + } + arc_ccfsm_target_insn = this_insn; + } + + /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from + what it was. */ + if (!reverse) + arc_ccfsm_current_cc = get_arc_condition_code (XEXP (SET_SRC (body), + 0)); + + if (reverse || then_not_else) + arc_ccfsm_current_cc = ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current_cc); + } + + /* Restore recog_data. Getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call. */ + extract_insn_cached (insn); + } +} + +/* Record that we are currently outputting label NUM with prefix PREFIX. + It it's the label we're looking for, reset the ccfsm machinery. + + Called from (*targetm.asm_out.internal_label). */ + +void +arc_ccfsm_at_label (const char *prefix, int num) +{ + if (arc_ccfsm_state == 3 && arc_ccfsm_target_label == num + && !strcmp (prefix, "L")) + { + arc_ccfsm_state = 0; + arc_ccfsm_target_insn = NULL_RTX; + } +} + +/* See if the current insn, which is a conditional branch, is to be + deleted. */ + +int +arc_ccfsm_branch_deleted_p (void) +{ + if (arc_ccfsm_state == 1 || arc_ccfsm_state == 2) + return 1; + return 0; +} + +/* Record a branch isn't output because subsequent insns can be + conditionalized. */ + +void +arc_ccfsm_record_branch_deleted (void) +{ + /* Indicate we're conditionalizing insns now. */ + arc_ccfsm_state += 2; + + /* If the next insn is a subroutine call, we still need a nop between the + cc setter and user. We need to undo the effect of calling record_cc_ref + for the just deleted branch. */ + current_insn_set_cc_p = last_insn_set_cc_p; +} + +static void +arc_va_start (tree valist, rtx nextarg) +{ + /* See arc_setup_incoming_varargs for reasons for this oddity. */ + if (crtl->args.info < 8 + && (crtl->args.info & 1)) + nextarg = plus_constant (nextarg, UNITS_PER_WORD); + + std_expand_builtin_va_start (valist, nextarg); +} + +/* This is how to output a definition of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. */ + +static void +arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno) +{ + arc_ccfsm_at_label (prefix, labelno); + default_internal_label (stream, prefix, labelno); +} + +/* Worker function for TARGET_ASM_EXTERNAL_LIBCALL. */ + +static void +arc_external_libcall (rtx fun ATTRIBUTE_UNUSED) +{ +#if 0 +/* On the ARC we want to have libgcc's for multiple cpus in one binary. + We can't use `assemble_name' here as that will call ASM_OUTPUT_LABELREF + and we'll get another suffix added on if -mmangle-cpu. */ + if (TARGET_MANGLE_CPU_LIBGCC) + { + fprintf (FILE, "\t.rename\t_%s, _%s%s\n", + XSTR (SYMREF, 0), XSTR (SYMREF, 0), + arc_mangle_suffix); + } +#endif +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (AGGREGATE_TYPE_P (type)) + return true; + else + { + HOST_WIDE_INT size = int_size_in_bytes (type); + return (size == -1 || size > 8); + } +} + +/* For ARC, All aggregates and arguments greater than 8 bytes are + passed by reference. */ + +static bool +arc_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + unsigned HOST_WIDE_INT size; + + if (type) + { + if (AGGREGATE_TYPE_P (type)) + return true; + size = int_size_in_bytes (type); + } + else + size = GET_MODE_SIZE (mode); + + return size > 8; +} + +/* Round SIZE up to a word boundary. */ +#define ROUND_ADVANCE(SIZE) \ +(((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Round arg MODE/TYPE up to the next word boundary. */ +#define ROUND_ADVANCE_ARG(MODE, TYPE) \ +((MODE) == BLKmode \ + ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \ + : ROUND_ADVANCE (GET_MODE_SIZE (MODE))) + +/* Round CUM up to the necessary point for argument MODE/TYPE. */ +#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \ +((((MODE) == BLKmode ? TYPE_ALIGN (TYPE) : GET_MODE_BITSIZE (MODE)) \ + > BITS_PER_WORD) \ + ? (((CUM) + 1) & ~1) \ + : (CUM)) + +/* Return boolean indicating arg of type TYPE and mode MODE will be passed in + a reg. This includes arguments that have to be passed by reference as the + pointer to them is passed in a reg if one is available (and that is what + we're given). */ +#define PASS_IN_REG_P(CUM, MODE, TYPE) \ +((CUM) < MAX_ARC_PARM_REGS \ + && ((ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)) \ + + ROUND_ADVANCE_ARG ((MODE), (TYPE)) \ + <= MAX_ARC_PARM_REGS))) + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ +/* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers + and the rest are pushed. */ + +static rtx +arc_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return (PASS_IN_REG_P (*cum, mode, type) + ? gen_rtx_REG (mode, ROUND_ADVANCE_CUM (*cum, mode, type)) + : NULL_RTX); +} + +/* Worker function for TARGET_FUNCTION_ARG_ADVANCE. */ + +static void +arc_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + *cum = (ROUND_ADVANCE_CUM (*cum, mode, type) + + ROUND_ADVANCE_ARG (mode, type)); +} + +/* Worker function for TARGET_FUNCTION_ARG_BOUNDARY. */ + +static unsigned int +arc_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + return (type != NULL_TREE + ? TYPE_ALIGN (type) + : (GET_MODE_BITSIZE (mode) <= PARM_BOUNDARY + ? PARM_BOUNDARY + : 2 * PARM_BOUNDARY)); +} + +/* Trampolines. */ +/* ??? This doesn't work yet because GCC will use as the address of a nested + function the address of the trampoline. We need to use that address + right shifted by 2. It looks like we'll need PSImode after all. :-( + + ??? The above comment sounds like it's doable via + TARGET_TRAMPOLINE_ADJUST_ADDRESS; no PSImode needed. + + On the ARC, the trampoline is quite simple as we have 32-bit immediate + constants. + + mov r24,STATIC + j.nd FUNCTION +*/ + +static void +arc_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx mem; + + mem = adjust_address (m_tramp, SImode, 0); + emit_move_insn (mem, GEN_INT (0x631f7c00)); + + mem = adjust_address (m_tramp, SImode, 4); + emit_move_insn (mem, chain_value); + + mem = adjust_address (m_tramp, SImode, 8); + emit_move_insn (mem, GEN_INT (0x381f0000)); + + mem = adjust_address (m_tramp, SImode, 12); + emit_move_insn (mem, fnaddr); + + emit_insn (gen_flush_icache (m_tramp)); +} + +/* Worker function for TARGET_CONDITIONAL_REGISTER_USAGE. */ + +static void +arc_conditional_register_usage (void) +{ + if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } +} + diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h new file mode 100644 index 000000000..0f2b99c19 --- /dev/null +++ b/gcc/config/arc/arc.h @@ -0,0 +1,935 @@ +/* Definitions of target machine for GNU compiler, Argonaut ARC cpu. + Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005, + 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* ??? This is an old port, and is undoubtedly suffering from bit rot. */ + +/* Things to do: + + - incscc, decscc? + - print active compiler options in assembler output +*/ + + +#undef ASM_SPEC +#undef LINK_SPEC +#undef LIB_SPEC +#undef STARTFILE_SPEC +#undef ENDFILE_SPEC +#undef SIZE_TYPE +#undef PTRDIFF_TYPE +#undef WCHAR_TYPE +#undef WCHAR_TYPE_SIZE +#undef ASM_OUTPUT_LABELREF + +/* Print subsidiary information on the compiler version in use. */ +#define TARGET_VERSION fprintf (stderr, " (arc)") + +/* Names to predefine in the preprocessor for this target machine. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__arc__"); \ + if (TARGET_BIG_ENDIAN) \ + builtin_define ("__big_endian__"); \ + if (arc_cpu_type == 0) \ + builtin_define ("__base__"); \ + builtin_assert ("cpu=arc"); \ + builtin_assert ("machine=arc"); \ + } while (0) + +/* Pass -mmangle-cpu if we get -mcpu=*. + Doing it this way lets one have it on as default with -mcpu=*, + but also lets one turn it off with -mno-mangle-cpu. */ +#define CC1_SPEC "\ +%{mcpu=*:-mmangle-cpu} \ +%{EB:%{EL:%emay not use both -EB and -EL}} \ +%{EB:-mbig-endian} %{EL:-mlittle-endian} \ +" + +#define ASM_SPEC "%{EB} %{EL}" + +#define LINK_SPEC "%{v} %{EB} %{EL}" + +#define LIB_SPEC "-lc" + +#define STARTFILE_SPEC "%{!shared:crt0.o%s} crtinit.o%s" + +#define ENDFILE_SPEC "crtfini.o%s" + +/* Instruction set characteristics. + These are internal macros, set by the appropriate -mcpu= option. */ + +/* Nonzero means the cpu has a barrel shifter. */ +#define TARGET_SHIFTER 0 + +/* Which cpu we're compiling for. */ +extern int arc_cpu_type; + +/* Check if CPU is an extension and set `arc_cpu_type' and `arc_mangle_cpu' + appropriately. The result should be nonzero if the cpu is recognized, + otherwise zero. This is intended to be redefined in a cover file. + This is used by arc_handle_option. */ +#define ARC_EXTENSION_CPU(cpu) 0 + + +/* Target machine storage layout. */ + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 1 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ +if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ +{ \ + (MODE) = SImode; \ +} + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY 64 + +/* ALIGN FRAMES on word boundaries */ +#define ARC_STACK_ALIGN(LOC) (((LOC)+7) & ~7) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +/* This is bigger than currently necessary for the ARC. If 8 byte floats are + ever added it's not clear whether they'll need such alignment or not. For + now we assume they will. We can always relax it if necessary but the + reverse isn't true. */ +#define BIGGEST_ALIGNMENT 64 + +/* The best alignment to use in cases where we have a choice. */ +#define FASTEST_ALIGNMENT 32 + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Make arrays of chars word-aligned for the same reasons. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +/* On the ARC the lower address bits are masked to 0 as necessary. The chip + won't croak when given an unaligned address, but the insn will still fail + to produce the correct result. */ +#define STRICT_ALIGNMENT 1 + +/* Layout of source language data types. */ + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +#define SIZE_TYPE "long unsigned int" +#define PTRDIFF_TYPE "long int" +#define WCHAR_TYPE "short unsigned int" +#define WCHAR_TYPE_SIZE 16 + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. */ +/* Registers 61, 62, and 63 are not really registers and we needn't treat + them as such. We still need a register for the condition code. */ +#define FIRST_PSEUDO_REGISTER 62 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + + 0-28 - general purpose registers + 29 - ilink1 (interrupt link register) + 30 - ilink2 (interrupt link register) + 31 - blink (branch link register) + 32-59 - reserved for extensions + 60 - LP_COUNT + 61 - condition code + + For doc purposes: + 61 - short immediate data indicator (setting flags) + 62 - long immediate data indicator + 63 - short immediate data indicator (not setting flags). + + The general purpose registers are further broken down into: + 0-7 - arguments/results + 8-15 - call used + 16-23 - call saved + 24 - call used, static chain pointer + 25 - call used, gptmp + 26 - global pointer + 27 - frame pointer + 28 - stack pointer + + By default, the extension registers are not available. */ + +#define FIXED_REGISTERS \ +{ 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 1, 1, 1, 1, 0, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1 } + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ +{ 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1 } + +/* If defined, an initializer for a vector of integers, containing the + numbers of hard registers in the order in which GCC should + prefer to use them (from most preferred to least). */ +#define REG_ALLOC_ORDER \ +{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, \ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31, \ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, \ + 27, 28, 29, 30 } + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ +((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ +extern const unsigned int arc_hard_regno_mode_ok[]; +extern unsigned int arc_mode_class[]; +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ +((arc_hard_regno_mode_ok[REGNO] & arc_mode_class[MODE]) != 0) + +/* A C expression that is nonzero if it is desirable to choose + register allocation so as to avoid move instructions between a + value of mode MODE1 and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, + MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1, + MODE2)' must be zero. */ + +/* Tie QI/HI/SI modes together. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ +(GET_MODE_CLASS (MODE1) == MODE_INT \ + && GET_MODE_CLASS (MODE2) == MODE_INT \ + && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD \ + && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD) + +/* Register classes and constants. */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. + + It is important that any condition codes have class NO_REGS. + See `register_operand'. */ + +enum reg_class { + NO_REGS, LPCOUNT_REG, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ "NO_REGS", "LPCOUNT_REG", "GENERAL_REGS", "ALL_REGS" } + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ {0, 0}, {0, 0x10000000}, {0xffffffff, 0xfffffff}, \ + {0xffffffff, 0x1fffffff} } + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ +extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER]; +#define REGNO_REG_CLASS(REGNO) \ +(arc_regno_reg_class[REGNO]) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS GENERAL_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Get reg_class from a letter such as appears in the machine description. */ +#define REG_CLASS_FROM_LETTER(C) \ +((C) == 'l' ? LPCOUNT_REG /* ??? needed? */ \ + : NO_REGS) + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32) +#define REGNO_OK_FOR_INDEX_P(REGNO) \ +((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ +((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* The letters I, J, K, L, M, N, O, P in a register constraint string + can be used to stand for particular ranges of immediate operands. + This macro defines what the ranges are. + C is the letter, and VALUE is a constant value. + Return 1 if VALUE is in the range specified by C. */ +/* 'I' is used for short immediates (always signed). + 'J' is used for long immediates. + 'K' is used for any constant up to 64 bits (for 64x32 situations?). */ + +/* local to this file */ +#define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200) +/* local to this file */ +#define LARGE_INT(X) \ +((X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \ + && (unsigned HOST_WIDE_INT)(X) <= (unsigned HOST_WIDE_INT) 0xffffffff) + +#define CONST_OK_FOR_LETTER_P(VALUE, C) \ +((C) == 'I' ? SMALL_INT (VALUE) \ + : (C) == 'J' ? LARGE_INT (VALUE) \ + : (C) == 'K' ? 1 \ + : 0) + +/* Similar, but for floating constants, and defining letters G and H. + Here VALUE is the CONST_DOUBLE rtx itself. */ +/* 'G' is used for integer values for the multiplication insns where the + operands are extended from 4 bytes to 8 bytes. + 'H' is used when any 64-bit constant is allowed. */ +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ +((C) == 'G' ? arc_double_limm_p (VALUE) \ + : (C) == 'H' ? 1 \ + : 0) + +/* A C expression that defines the optional machine-dependent constraint + letters that can be used to segregate specific types of operands, + usually memory references, for the target machine. It should return 1 if + VALUE corresponds to the operand type represented by the constraint letter + C. If C is not defined as an extra constraint, the value returned should + be 0 regardless of VALUE. */ +/* ??? This currently isn't used. Waiting for PIC. */ +#if 0 +#define EXTRA_CONSTRAINT(VALUE, C) \ +((C) == 'R' ? (SYMBOL_REF_FUNCTION_P (VALUE) || GET_CODE (VALUE) == LABEL_REF) \ + : 0) +#endif + +/* Stack layout and stack pointer usage. */ + +/* Define this macro if pushing a word onto the stack moves the stack + pointer to a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. */ +#define STACK_POINTER_OFFSET FIRST_PARM_OFFSET (0) + +/* Offset of first parameter from the argument pointer register value. */ +/* 4 bytes for each of previous fp, return address, and previous gp. + 4 byte reserved area for future considerations. */ +#define FIRST_PARM_OFFSET(FNDECL) 16 + +/* A C expression whose value is RTL representing the address in a + stack frame where the pointer to the caller's frame is stored. + Assume that FRAMEADDR is an RTL expression for the address of the + stack frame itself. + + If you don't define this macro, the default is to return the value + of FRAMEADDR--that is, the stack frame address is also the address + of the stack word that points to the previous frame. */ +/* ??? unfinished */ +/*define DYNAMIC_CHAIN_ADDRESS (FRAMEADDR)*/ + +/* A C expression whose value is RTL representing the value of the + return address for the frame COUNT steps up from the current frame. + FRAMEADDR is the frame pointer of the COUNT frame, or the frame + pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' + is defined. */ +/* The current return address is in r31. The return address of anything + farther back is at [%fp,4]. */ +#if 0 /* The default value should work. */ +#define RETURN_ADDR_RTX(COUNT, FRAME) \ +(((COUNT) == -1) \ + ? gen_rtx_REG (Pmode, 31) \ + : copy_to_reg (gen_rtx_MEM (Pmode, \ + memory_address (Pmode, \ + plus_constant ((FRAME), \ + UNITS_PER_WORD))))) +#endif + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 28 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 27 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM + +/* Register in which static-chain is passed to a function. This must + not be a register used by the prologue. */ +#define STATIC_CHAIN_REGNUM 24 + +/* C statement to store the difference between the frame pointer + and the stack pointer values immediately after the function prologue. */ +#define INITIAL_FRAME_POINTER_OFFSET(VAR) \ +((VAR) = arc_compute_frame_size (get_frame_size ())) + +/* Function argument passing. */ + +/* If defined, the maximum amount of space required for outgoing + arguments will be computed and placed into the variable + `crtl->outgoing_args_size'. No space will be pushed + onto the stack for each call; instead, the function prologue should + increase the stack frame size by this amount. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. */ +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ +((CUM) = 0) + +/* The number of registers used for parameter passing. Local to this file. */ +#define MAX_ARC_PARM_REGS 8 + +/* 1 if N is a possible register number for function argument passing. */ +#define FUNCTION_ARG_REGNO_P(N) \ +((unsigned) (N) < MAX_ARC_PARM_REGS) + + +/* Function results. */ + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. */ +#define FUNCTION_VALUE(VALTYPE, FUNC) gen_rtx_REG (TYPE_MODE (VALTYPE), 0) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, 0) + +/* 1 if N is a possible register number for a function value + as seen by the caller. */ +/* ??? What about r1 in DI/DF values. */ +#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0) + +/* Tell GCC to use TARGET_RETURN_IN_MEMORY. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ +#define EXIT_IGNORE_STACK 0 + +/* Epilogue delay slots. */ +#define DELAY_SLOTS_FOR_EPILOGUE arc_delay_slots_for_epilogue () + +#define ELIGIBLE_FOR_EPILOGUE_DELAY(TRIAL, SLOTS_FILLED) \ +arc_eligible_for_epilogue_delay (TRIAL, SLOTS_FILLED) + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ +#define FUNCTION_PROFILER(FILE, LABELNO) + +#define TRAMPOLINE_ALIGNMENT 32 +#define TRAMPOLINE_SIZE 16 + +/* Addressing modes, and classification of registers for them. */ + +/* Maximum number of registers that can appear in a valid memory address. */ +/* The `ld' insn allows 2, but the `st' insn only allows 1. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* We have pre inc/dec (load/store with update). */ +#define HAVE_PRE_INCREMENT 1 +#define HAVE_PRE_DECREMENT 1 + +/* Recognize any constant value that is a valid address. */ +#define CONSTANT_ADDRESS_P(X) \ +(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST) + +/* Nonzero if the constant value X is a legitimate general operand. + We can handle any 32- or 64-bit constant. */ +/* "1" should work since the largest constant should be a 64 bit critter. */ +/* ??? Not sure what to do for 64x32 compiler. */ +#define LEGITIMATE_CONSTANT_P(X) 1 + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +#ifndef REG_OK_STRICT + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) \ +((unsigned) REGNO (X) - 32 >= FIRST_PSEUDO_REGISTER - 32) +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define REG_OK_FOR_BASE_P(X) \ +((unsigned) REGNO (X) - 32 >= FIRST_PSEUDO_REGISTER - 32) + +#else + +/* Nonzero if X is a hard reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#endif + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression + that is a valid memory address for an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. */ +/* The `ld' insn allows [reg],[reg+shimm],[reg+limm],[reg+reg],[limm] + but the `st' insn only allows [reg],[reg+shimm],[limm]. + The only thing we can do is only allow the most strict case `st' and hope + other parts optimize out the restrictions for `ld'. */ + +/* local to this file */ +#define RTX_OK_FOR_BASE_P(X) \ +(REG_P (X) && REG_OK_FOR_BASE_P (X)) + +/* local to this file */ +#define RTX_OK_FOR_INDEX_P(X) \ +(0 && /*???*/ REG_P (X) && REG_OK_FOR_INDEX_P (X)) + +/* local to this file */ +/* ??? Loads can handle any constant, stores can only handle small ones. */ +#define RTX_OK_FOR_OFFSET_P(X) \ +(GET_CODE (X) == CONST_INT && SMALL_INT (INTVAL (X))) + +#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X) \ +(GET_CODE (X) == PLUS \ + && RTX_OK_FOR_BASE_P (XEXP (X, 0)) \ + && (RTX_OK_FOR_INDEX_P (XEXP (X, 1)) \ + || RTX_OK_FOR_OFFSET_P (XEXP (X, 1)))) + +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ +{ if (RTX_OK_FOR_BASE_P (X)) \ + goto ADDR; \ + if (LEGITIMATE_OFFSET_ADDRESS_P ((MODE), (X))) \ + goto ADDR; \ + if (GET_CODE (X) == CONST_INT && LARGE_INT (INTVAL (X))) \ + goto ADDR; \ + if (GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || GET_CODE (X) == CONST) \ + goto ADDR; \ + if ((GET_CODE (X) == PRE_DEC || GET_CODE (X) == PRE_INC) \ + /* We're restricted here by the `st' insn. */ \ + && RTX_OK_FOR_BASE_P (XEXP ((X), 0))) \ + goto ADDR; \ +} + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ +#define SELECT_CC_MODE(OP, X, Y) \ +arc_select_cc_mode (OP, X, Y) + +/* Return nonzero if SELECT_CC_MODE will never return MODE for a + floating point inequality comparison. */ +#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/ + +/* Costs. */ + +/* Compute extra cost of moving data between one register class + and another. */ +#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) 2 + +/* Compute the cost of moving data between registers and memory. */ +/* Memory is 3 times as expensive as registers. + ??? Is that the right way to look at it? */ +#define MEMORY_MOVE_COST(MODE,CLASS,IN) \ +(GET_MODE_SIZE (MODE) <= UNITS_PER_WORD ? 6 : 12) + +/* The cost of a branch insn. */ +/* ??? What's the right value here? Branches are certainly more + expensive than reg->reg moves. */ +#define BRANCH_COST(speed_p, predictable_p) 2 + +/* Nonzero if access to memory by bytes is slow and undesirable. + For RISC chips, it means that access to memory by bytes is no + better than access by words when possible, so grab a whole word + and maybe make use of that. */ +#define SLOW_BYTE_ACCESS 1 + +/* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. */ +/* On the ARC, calling through registers is slow. */ +#define NO_FUNCTION_CSE + +/* Section selection. */ +/* WARNING: These section names also appear in dwarfout.c. */ + +/* The names of the text, data, and readonly-data sections are runtime + selectable. */ + +#define ARC_SECTION_FORMAT "\t.section %s" +#define ARC_DEFAULT_TEXT_SECTION ".text" +#define ARC_DEFAULT_DATA_SECTION ".data" +#define ARC_DEFAULT_RODATA_SECTION ".rodata" + +extern const char *arc_text_section, *arc_data_section, *arc_rodata_section; + +/* initfini.c uses this in an asm. */ +#if defined (CRT_INIT) || defined (CRT_FINI) +#define TEXT_SECTION_ASM_OP "\t.section .text" +#else +#define TEXT_SECTION_ASM_OP arc_text_section +#endif +#define DATA_SECTION_ASM_OP arc_data_section + +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP arc_rodata_section + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +/* Define this macro if jump tables (for tablejump insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. + This macro is irrelevant if there is no separate readonly data section. */ +/*#define JUMP_TABLES_IN_TEXT_SECTION*/ + +/* For DWARF. Marginally different than default so output is "prettier" + (and consistent with above). */ +#define PUSHSECTION_ASM_OP "\t.section " + +/* Tell crtstuff.c we're using ELF. */ +#define OBJECT_FORMAT_ELF + +/* PIC */ + +/* The register number of the register used to address a table of static + data addresses in memory. In some cases this register is defined by a + processor's ``application binary interface'' (ABI). When this macro + is defined, RTL is generated for this register once, as with the stack + pointer and frame pointer registers. If this macro is not defined, it + is up to the machine-dependent files to allocate such a register (if + necessary). */ +#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? 26 : INVALID_REGNUM) + +/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is + clobbered by calls. Do not define this macro if PIC_OFFSET_TABLE_REGNUM + is not defined. */ +/* This register is call-saved on the ARC. */ +/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/ + +/* A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent code. + You can assume that X satisfies CONSTANT_P, so you need not + check this. You can also assume `flag_pic' is true, so you need not + check it either. You need not define this macro if all constants + (including SYMBOL_REF) can be immediate operands when generating + position independent code. */ +/*#define LEGITIMATE_PIC_OPERAND_P(X)*/ + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will + end at the end of the line. */ +#define ASM_COMMENT_START ";" + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF "" + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ +/* We mangle all user labels to provide protection from linking code + compiled for different cpus. */ +/* We work around a dwarfout.c deficiency by watching for labels from it and + not adding the '_' prefix nor the cpu suffix. There is a comment in + dwarfout.c that says it should be using (*targetm.asm_out.internal_label). */ +extern const char *arc_mangle_cpu; +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ +do { \ + if ((NAME)[0] == '.' && (NAME)[1] == 'L') \ + fprintf (FILE, "%s", NAME); \ + else \ + { \ + fputc ('_', FILE); \ + if (TARGET_MANGLE_CPU && arc_mangle_cpu != NULL) \ + fprintf (FILE, "%s_", arc_mangle_cpu); \ + fprintf (FILE, "%s", NAME); \ + } \ +} while (0) + +/* Assembler pseudo-op to equate one value with another. */ +/* ??? This is needed because dwarfout.c provides a default definition too + late for defaults.h (which contains the default definition of ASM_OUTPUT_DEF + that we use). */ +#define SET_ASM_OP "\t.set\t" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ +#define REGISTER_NAMES \ +{"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "r26", "fp", "sp", "ilink1", "ilink2", "blink", \ + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \ + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", \ + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \ + "r56", "r57", "r58", "r59", "lp_count", "cc"} + +/* Entry to the insn conditionalizer. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ +arc_final_prescan_insn (INSN, OPVEC, NOPERANDS) + +/* A C expression which evaluates to true if CODE is a valid + punctuation character for use in the `PRINT_OPERAND' macro. */ +extern char arc_punct_chars[256]; +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \ +arc_punct_chars[(unsigned char) (CHAR)] + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ +#define PRINT_OPERAND(FILE, X, CODE) \ +arc_print_operand (FILE, X, CODE) + +/* A C compound statement to output to stdio stream STREAM the + assembler syntax for an instruction operand that is a memory + reference whose address is ADDR. ADDR is an RTL expression. */ +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ +arc_print_operand_address (FILE, ADDR) + +/* This is how to output an element of a case-vector that is absolute. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + fprintf (FILE, "\t.word %%st("); \ + assemble_name (FILE, label); \ + fprintf (FILE, ")\n"); \ +} while (0) + +/* This is how to output an element of a case-vector that is relative. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + fprintf (FILE, "\t.word %%st("); \ + assemble_name (FILE, label); \ + fprintf (FILE, "-"); \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", REL); \ + assemble_name (FILE, label); \ + fprintf (FILE, ")\n"); \ +} while (0) + +/* The desired alignment for the location counter at the beginning + of a loop. */ +/* On the ARC, align loops to 32 byte boundaries (cache line size) + if -malign-loops. */ +#define LOOP_ALIGN(LABEL) (TARGET_ALIGN_LOOPS ? 5 : 0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ +do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0) + +/* Debugging information. */ + +/* Generate DBX and DWARF debugging information. */ +#define DBX_DEBUGGING_INFO 1 + +/* Prefer STABS (for now). */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + +/* Turn off splitting of long stabs. */ +#define DBX_CONTIN_LENGTH 0 + +/* Miscellaneous. */ + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE Pmode + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +/* ??? The arc doesn't have full 32-bit pointers, but making this PSImode has + its own problems (you have to add extendpsisi2 and trucnsipsi2 but how does + one do it without getting excess code?). Try to avoid it. */ +#define Pmode SImode + +/* A function address in a call instruction. */ +#define FUNCTION_MODE SImode + +/* alloca should avoid clobbering the old register save area. */ +/* ??? Not defined in tm.texi. */ +#define SETJMP_VIA_SAVE_AREA + +/* ARC function types. */ +enum arc_function_type { + ARC_FUNCTION_UNKNOWN, ARC_FUNCTION_NORMAL, + /* These are interrupt handlers. The name corresponds to the register + name that contains the return address. */ + ARC_FUNCTION_ILINK1, ARC_FUNCTION_ILINK2 +}; +#define ARC_INTERRUPT_P(TYPE) \ +((TYPE) == ARC_FUNCTION_ILINK1 || (TYPE) == ARC_FUNCTION_ILINK2) +/* Compute the type of a function from its DECL. */ diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md new file mode 100644 index 000000000..09e47daf1 --- /dev/null +++ b/gcc/config/arc/arc.md @@ -0,0 +1,1376 @@ +;; Machine description of the Argonaut ARC cpu for GNU C compiler +;; Copyright (C) 1994, 1997, 1998, 1999, 2000, 2004, 2005, 2007, 2008 +;; Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; ??? This is an old port, and is undoubtedly suffering from bit rot. + +;; Insn type. Used to default other attribute values. + +(define_attr "type" + "move,load,store,cmove,unary,binary,compare,shift,mul,uncond_branch,branch,call,call_no_delay_slot,multi,misc" + (const_string "binary")) + +;; Length (in # of insns, long immediate constants counted too). +;; ??? There's a nasty interaction between the conditional execution fsm +;; and insn lengths: insns with shimm values cannot be conditionally executed. +(define_attr "length" "" + (cond [(eq_attr "type" "load") + (if_then_else (match_operand 1 "long_immediate_loadstore_operand" "") + (const_int 2) (const_int 1)) + + (eq_attr "type" "store") + (if_then_else (match_operand 0 "long_immediate_loadstore_operand" "") + (const_int 2) (const_int 1)) + + (eq_attr "type" "move,unary,compare") + (if_then_else (match_operand 1 "long_immediate_operand" "") + (const_int 2) (const_int 1)) + + (eq_attr "type" "binary,mul") + (if_then_else (match_operand 2 "long_immediate_operand" "") + (const_int 2) (const_int 1)) + + (eq_attr "type" "cmove") + (if_then_else (match_operand 2 "register_operand" "") + (const_int 1) (const_int 2)) + + (eq_attr "type" "multi") (const_int 2) + ] + + (const_int 1))) + +;; The length here is the length of a single asm. Unfortunately it might be +;; 1 or 2 so we must allow for 2. That's ok though. How often will users +;; lament asm's not being put in delay slots? +(define_asm_attributes + [(set_attr "length" "2") + (set_attr "type" "multi")]) + +;; Condition codes: this one is used by final_prescan_insn to speed up +;; conditionalizing instructions. It saves having to scan the rtl to see if +;; it uses or alters the condition codes. + +;; USE: This insn uses the condition codes (e.g.: a conditional branch). +;; CANUSE: This insn can use the condition codes (for conditional execution). +;; SET: All condition codes are set by this insn. +;; SET_ZN: the Z and N flags are set by this insn. +;; SET_ZNC: the Z, N, and C flags are set by this insn. +;; CLOB: The condition codes are set to unknown values by this insn. +;; NOCOND: This insn can't use and doesn't affect the condition codes. + +(define_attr "cond" "use,canuse,set,set_zn,set_znc,clob,nocond" + (cond [(and (eq_attr "type" "unary,binary,move") + (eq_attr "length" "1")) + (const_string "canuse") + + (eq_attr "type" "compare") + (const_string "set") + + (eq_attr "type" "cmove,branch") + (const_string "use") + + (eq_attr "type" "multi,misc") + (const_string "clob") + ] + + (const_string "nocond"))) + +;; Delay slots. + +(define_attr "in_delay_slot" "false,true" + (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi") + (const_string "false") + ] + + (if_then_else (eq_attr "length" "1") + (const_string "true") + (const_string "false")))) + +(define_delay (eq_attr "type" "call") + [(eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true")]) + +(define_delay (eq_attr "type" "branch,uncond_branch") + [(eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true")]) + +;; Scheduling description for the ARC + +(define_cpu_unit "branch") + +(define_insn_reservation "any_insn" 1 (eq_attr "type" "!load,compare,branch") + "nothing") + +;; 1) A conditional jump cannot immediately follow the insn setting the flags. +;; This isn't a complete solution as it doesn't come with guarantees. That +;; is done in the branch patterns and in arc_print_operand. This exists to +;; avoid inserting a nop when we can. + +(define_insn_reservation "compare" 1 (eq_attr "type" "compare") + "nothing,branch") + +(define_insn_reservation "branch" 1 (eq_attr "type" "branch") + "branch") + +;; 2) References to loaded registers should wait a cycle. + +;; Memory with load-delay of 1 (i.e., 2 cycle load). + +(define_insn_reservation "memory" 2 (eq_attr "type" "load") + "nothing") + +;; Move instructions. + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (QImode, operands[1]); +}") + +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "move_dest_operand" "=r,r,r,m") + (match_operand:QI 1 "move_src_operand" "rI,Ji,m,r"))] +;; ??? Needed? + "register_operand (operands[0], QImode) + || register_operand (operands[1], QImode)" + "@ + mov%? %0,%1 + mov%? %0,%1 + ldb%U1%V1 %0,%1 + stb%U0%V0 %1,%0" + [(set_attr "type" "move,move,load,store")]) + +;; ??? This may never match since there's no cmpqi insn. + +(define_insn "*movqi_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (sign_extend:SI (match_operand:QI 1 "move_src_operand" "rIJi")) + (const_int 0))) + (set (match_operand:QI 0 "move_dest_operand" "=r") + (match_dup 1))] + "" + "mov%?.f %0,%1" + [(set_attr "type" "move") + (set_attr "cond" "set_zn")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (HImode, operands[1]); +}") + +(define_insn "*movhi_insn" + [(set (match_operand:HI 0 "move_dest_operand" "=r,r,r,m") + (match_operand:HI 1 "move_src_operand" "rI,Ji,m,r"))] + "register_operand (operands[0], HImode) + || register_operand (operands[1], HImode)" + "@ + mov%? %0,%1 + mov%? %0,%1 + ldw%U1%V1 %0,%1 + stw%U0%V0 %1,%0" + [(set_attr "type" "move,move,load,store")]) + +;; ??? Will this ever match? + +(define_insn "*movhi_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (sign_extend:SI (match_operand:HI 1 "move_src_operand" "rIJi")) + (const_int 0))) + (set (match_operand:HI 0 "move_dest_operand" "=r") + (match_dup 1))] +;; ??? Needed? + "register_operand (operands[0], HImode) + || register_operand (operands[1], HImode)" + "mov%?.f %0,%1" + [(set_attr "type" "move") + (set_attr "cond" "set_zn")]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (SImode, operands[1]); +}") + +(define_insn "*movsi_insn" + [(set (match_operand:SI 0 "move_dest_operand" "=r,r,r,m") + (match_operand:SI 1 "move_src_operand" "rI,GJi,m,r"))] + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode)" + "@ + mov%? %0,%1 + mov%? %0,%S1 + ld%U1%V1 %0,%1 + st%U0%V0 %1,%0" + [(set_attr "type" "move,move,load,store")]) + +(define_insn "*movsi_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (match_operand:SI 1 "move_src_operand" "rIJi") + (const_int 0))) + (set (match_operand:SI 0 "move_dest_operand" "=r") + (match_dup 1))] + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode)" + "mov%?.f %0,%S1" + [(set_attr "type" "move") + (set_attr "cond" "set_zn")]) + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (DImode, operands[1]); +}") + +(define_insn "*movdi_insn" + [(set (match_operand:DI 0 "move_dest_operand" "=r,r,r,m") + (match_operand:DI 1 "move_double_src_operand" "r,HK,m,r"))] + "register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)" + "* +{ + switch (which_alternative) + { + case 0 : + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + if (REGNO (operands[0]) == REGNO (operands[1]) + 1) + return \"mov %R0,%R1\;mov %0,%1\"; + else + return \"mov %0,%1\;mov %R0,%R1\"; + case 1 : + return \"mov %0,%L1\;mov %R0,%H1\"; + case 2 : + /* If the low-address word is used in the address, we must load it + last. Otherwise, load it first. Note that we cannot have + auto-increment in that case since the address register is known to be + dead. */ + if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, + operands [1], 0)) + return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\"; + else + return \"ld%V1 %0,%1\;ld%V1 %R0,%R1\"; + case 3 : + return \"st%V0 %1,%0\;st%V0 %R1,%R0\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "move,move,load,store") + ;; ??? The ld/st values could be 4 if it's [reg,bignum]. + (set_attr "length" "2,4,2,2")]) + +;(define_expand "movdi" +; [(set (match_operand:DI 0 "general_operand" "") +; (match_operand:DI 1 "general_operand" ""))] +; "" +; " +;{ +; /* Flow doesn't understand that this is effectively a DFmode move. +; It doesn't know that all of `operands[0]' is set. */ +; emit_clobber (operands[0]); +; +; /* Emit insns that movsi_insn can handle. */ +; emit_insn (gen_movsi (operand_subword (operands[0], 0, 0, DImode), +; operand_subword (operands[1], 0, 0, DImode))); +; emit_insn (gen_movsi (operand_subword (operands[0], 1, 0, DImode), +; operand_subword (operands[1], 1, 0, DImode))); +; DONE; +;}") + +;; Floating point move insns. + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (SFmode, operands[1]); +}") + +(define_insn "*movsf_insn" + [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,m") + (match_operand:SF 1 "move_src_operand" "r,E,m,r"))] + "register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)" + "@ + mov%? %0,%1 + mov%? %0,%1 ; %A1 + ld%U1%V1 %0,%1 + st%U0%V0 %1,%0" + [(set_attr "type" "move,move,load,store")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (DFmode, operands[1]); +}") + +(define_insn "*movdf_insn" + [(set (match_operand:DF 0 "move_dest_operand" "=r,r,r,m") + (match_operand:DF 1 "move_double_src_operand" "r,E,m,r"))] + "register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode)" + "* +{ + switch (which_alternative) + { + case 0 : + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + if (REGNO (operands[0]) == REGNO (operands[1]) + 1) + return \"mov %R0,%R1\;mov %0,%1\"; + else + return \"mov %0,%1\;mov %R0,%R1\"; + case 1 : + return \"mov %0,%L1\;mov %R0,%H1 ; %A1\"; + case 2 : + /* If the low-address word is used in the address, we must load it + last. Otherwise, load it first. Note that we cannot have + auto-increment in that case since the address register is known to be + dead. */ + if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, + operands [1], 0)) + return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\"; + else + return \"ld%V1 %0,%1\;ld%V1 %R0,%R1\"; + case 3 : + return \"st%V0 %1,%0\;st%V0 %R1,%R0\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "move,move,load,store") + ;; ??? The ld/st values could be 4 if it's [reg,bignum]. + (set_attr "length" "2,4,2,2")]) + +;(define_expand "movdf" +; [(set (match_operand:DF 0 "general_operand" "") +; (match_operand:DF 1 "general_operand" ""))] +; "" +; " +;{ +; /* Flow doesn't understand that this is effectively a DFmode move. +; It doesn't know that all of `operands[0]' is set. */ +; emit_clobber (operands[0]); +; +; /* Emit insns that movsi_insn can handle. */ +; emit_insn (gen_movsi (operand_subword (operands[0], 0, 0, DFmode), +; operand_subword (operands[1], 0, 0, DFmode))); +; emit_insn (gen_movsi (operand_subword (operands[0], 1, 0, DFmode), +; operand_subword (operands[1], 1, 0, DFmode))); +; DONE; +;}") + +;; Load/Store with update instructions. +;; +;; Some of these we can get by using pre-decrement or pre-increment, but the +;; hardware can also do cases where the increment is not the size of the +;; object. +;; +;; In all these cases, we use operands 0 and 1 for the register being +;; incremented because those are the operands that local-alloc will +;; tie and these are the pair most likely to be tieable (and the ones +;; that will benefit the most). +;; +;; We use match_operator here because we need to know whether the memory +;; object is volatile or not. + +(define_insn "*loadqi_update" + [(set (match_operand:QI 3 "register_operand" "=r,r") + (match_operator:QI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")])) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldb.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*load_zeroextendqisi_update" + [(set (match_operand:SI 3 "register_operand" "=r,r") + (zero_extend:SI (match_operator:QI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")]))) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldb.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*load_signextendqisi_update" + [(set (match_operand:SI 3 "register_operand" "=r,r") + (sign_extend:SI (match_operator:QI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")]))) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldb.x.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*storeqi_update" + [(set (match_operator:QI 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:QI 3 "register_operand" "r")) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "stb.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "1")]) + +(define_insn "*loadhi_update" + [(set (match_operand:HI 3 "register_operand" "=r,r") + (match_operator:HI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")])) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldw.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*load_zeroextendhisi_update" + [(set (match_operand:SI 3 "register_operand" "=r,r") + (zero_extend:SI (match_operator:HI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")]))) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldw.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*load_signextendhisi_update" + [(set (match_operand:SI 3 "register_operand" "=r,r") + (sign_extend:SI (match_operator:HI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")]))) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ldw.x.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*storehi_update" + [(set (match_operator:HI 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:HI 3 "register_operand" "r")) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "stw.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "1")]) + +(define_insn "*loadsi_update" + [(set (match_operand:SI 3 "register_operand" "=r,r") + (match_operator:SI 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")])) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ld.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*storesi_update" + [(set (match_operator:SI 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:SI 3 "register_operand" "r")) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "st.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "1")]) + +(define_insn "*loadsf_update" + [(set (match_operand:SF 3 "register_operand" "=r,r") + (match_operator:SF 4 "load_update_operand" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rI,J")])) + (set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "ld.a%V4 %3,[%0,%2]" + [(set_attr "type" "load,load") + (set_attr "length" "1,2")]) + +(define_insn "*storesf_update" + [(set (match_operator:SF 4 "store_update_operand" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "short_immediate_operand" "I")]) + (match_operand:SF 3 "register_operand" "r")) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "st.a%V4 %3,[%0,%2]" + [(set_attr "type" "store") + (set_attr "length" "1")]) + +;; Conditional move instructions. + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "nonmemory_operand" "") + (match_operand:SI 3 "register_operand" "")))] + "" + " +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx cc_reg = gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx); +}") + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (match_operand 1 "comparison_operator" "") + (match_operand:SF 2 "nonmemory_operand" "") + (match_operand:SF 3 "register_operand" "")))] + "" + " +{ + enum rtx_code code = GET_CODE (operands[1]); + rtx cc_reg = gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx); +}") + +(define_insn "*movsicc_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "nonmemory_operand" "rJi") + (match_operand:SI 3 "register_operand" "0")))] + "" + "mov.%d1 %0,%S2" + [(set_attr "type" "cmove")]) + +(define_insn "*movsfcc_insn" + [(set (match_operand:SF 0 "register_operand" "=r,r") + (if_then_else:SF (match_operand 1 "comparison_operator" "") + (match_operand:SF 2 "nonmemory_operand" "r,E") + (match_operand:SF 3 "register_operand" "0,0")))] + "" + "@ + mov.%d1 %0,%2 + mov.%d1 %0,%2 ; %A2" + [(set_attr "type" "cmove,cmove")]) + + +;; Zero extension instructions. +;; ??? We don't support volatile memrefs here, but I'm not sure why. + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))] + "" + "@ + extb%? %0,%1 + ldb%U1 %0,%1" + [(set_attr "type" "unary,load")]) + +(define_insn "*zero_extendqihi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (zero_extend:SI (match_operand:QI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_dup 1)))] + "" + "extb%?.f %0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))] + "" + "@ + extb%? %0,%1 + ldb%U1 %0,%1" + [(set_attr "type" "unary,load")]) + +(define_insn "*zero_extendqisi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (zero_extend:SI (match_operand:QI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_dup 1)))] + "" + "extb%?.f %0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +(define_insn "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))] + "" + "@ + extw%? %0,%1 + ldw%U1 %0,%1" + [(set_attr "type" "unary,load")]) + +(define_insn "*zero_extendhisi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_dup 1)))] + "" + "extw%?.f %0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +;; Sign extension instructions. + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))] + "" + "@ + sexb%? %0,%1 + ldb.x%U1 %0,%1" + [(set_attr "type" "unary,load")]) + +(define_insn "*extendqihi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (sign_extend:SI (match_operand:QI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (match_dup 1)))] + "" + "sexb%?.f %0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))] + "" + "@ + sexb%? %0,%1 + ldb.x%U1 %0,%1" + [(set_attr "type" "unary,load")]) + +(define_insn "*extendqisi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (sign_extend:SI (match_operand:QI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_dup 1)))] + "" + "sexb%?.f %0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))] + "" + "@ + sexw%? %0,%1 + ldw.x%U1 %0,%1" + [(set_attr "type" "unary,load")]) + +(define_insn "*extendhisi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (sign_extend:SI (match_operand:HI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_dup 1)))] + "" + "sexw%?.f %0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +;; Arithmetic instructions. + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")))] + "" + "add%? %0,%1,%2") + +(define_insn "*addsi3_set_cc_insn" + [(set (reg:CC 61) (compare:CC + (plus:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "" + "add%?.f %0,%1,%2" + [(set_attr "cond" "set")]) + +(define_insn "adddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "nonmemory_operand" "%r") + (match_operand:DI 2 "nonmemory_operand" "ri"))) + (clobber (reg:CC 61))] + "" + "* +{ + rtx op2 = operands[2]; + + if (GET_CODE (op2) == CONST_INT) + { + int sign = INTVAL (op2); + if (sign < 0) + return \"add.f %L0,%L1,%2\;adc %H0,%H1,-1\"; + else + return \"add.f %L0,%L1,%2\;adc %H0,%H1,0\"; + } + else + return \"add.f %L0,%L1,%L2\;adc %H0,%H1,%H2\"; +}" + [(set_attr "length" "2")]) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")))] + "" + "sub%? %0,%1,%2") + +(define_insn "*subsi3_set_cc_insn" + [(set (reg:CC 61) (compare:CC + (minus:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_dup 1) + (match_dup 2)))] + "" + "sub%?.f %0,%1,%2" + [(set_attr "cond" "set")]) + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (match_operand:DI 1 "nonmemory_operand" "r") + (match_operand:DI 2 "nonmemory_operand" "ri"))) + (clobber (reg:CC 61))] + "" + "* +{ + rtx op2 = operands[2]; + + if (GET_CODE (op2) == CONST_INT) + { + int sign = INTVAL (op2); + if (sign < 0) + return \"sub.f %L0,%L1,%2\;sbc %H0,%H1,-1\"; + else + return \"sub.f %L0,%L1,%2\;sbc %H0,%H1,0\"; + } + else + return \"sub.f %L0,%L1,%L2\;sbc %H0,%H1,%H2\"; +}" + [(set_attr "length" "2")]) + +;; Boolean instructions. +;; +;; We don't define the DImode versions as expand_binop does a good enough job. + +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")))] + "" + "and%? %0,%1,%2") + +(define_insn "*andsi3_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (and:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (and:SI (match_dup 1) + (match_dup 2)))] + "" + "and%?.f %0,%1,%2" + [(set_attr "cond" "set_zn")]) + +(define_insn "*bicsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (and:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J") + (not:SI (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r"))))] + "" + "bic%? %0,%1,%2" + [(set_attr "length" "1,2,1,2")]) + +(define_insn "*bicsi3_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (and:SI (match_operand:SI 1 "register_operand" "%r") + (not:SI (match_operand:SI 2 "nonmemory_operand" "rIJ"))) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (and:SI (match_dup 1) + (not:SI (match_dup 2))))] + "" + "bic%?.f %0,%1,%2" + [(set_attr "cond" "set_zn")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")))] + "" + "or%? %0,%1,%2") + +(define_insn "*iorsi3_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (ior:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (match_dup 1) + (match_dup 2)))] + "" + "or%?.f %0,%1,%2" + [(set_attr "cond" "set_zn")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")))] + "" + "xor%? %0,%1,%2") + +(define_insn "*xorsi3_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rIJ")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_dup 1) + (match_dup 2)))] + "" + "xor%?.f %0,%1,%2" + [(set_attr "cond" "set_zn")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "sub%? %0,0,%1" + [(set_attr "type" "unary")]) + +(define_insn "*negsi2_set_cc_insn" + [(set (reg:CC 61) (compare:CC + (neg:SI (match_operand:SI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_dup 1)))] + "" + "sub%?.f %0,0,%1" + [(set_attr "type" "unary") + (set_attr "cond" "set")]) + +(define_insn "negdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_operand:DI 1 "register_operand" "r"))) + (clobber (reg:SI 61))] + "" + "sub.f %L0,0,%L1\;sbc %H0,0,%H1" + [(set_attr "type" "unary") + (set_attr "length" "2")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "xor%? %0,%1,-1" + [(set_attr "type" "unary")]) + +(define_insn "*one_cmplsi2_set_cc_insn" + [(set (reg:CCZN 61) (compare:CCZN + (not:SI (match_operand:SI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_dup 1)))] + "" + "xor%?.f %0,%1,-1" + [(set_attr "type" "unary") + (set_attr "cond" "set_zn")]) + +;; Shift instructions. + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! TARGET_SHIFTER) + { + emit_insn (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFT (SImode, operands[1], + operands[2])), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_SCRATCH (SImode))))); + DONE; + } +}") + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! TARGET_SHIFTER) + { + emit_insn (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + operands[2])), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_SCRATCH (SImode))))); + DONE; + } +}") + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (! TARGET_SHIFTER) + { + emit_insn (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_LSHIFTRT (SImode, + operands[1], + operands[2])), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_SCRATCH (SImode))))); + DONE; + } +}") + +(define_insn "*ashlsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (ashift:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J") + (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))] + "TARGET_SHIFTER" + "asl%? %0,%1,%2" + [(set_attr "type" "shift") + (set_attr "length" "1,2,1,2")]) + +(define_insn "*ashrsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J") + (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))] + "TARGET_SHIFTER" + "asr%? %0,%1,%2" + [(set_attr "type" "shift") + (set_attr "length" "1,2,1,2")]) + +(define_insn "*lshrsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J") + (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))] + "TARGET_SHIFTER" + "lsr%? %0,%1,%2" + [(set_attr "type" "shift") + (set_attr "length" "1,2,1,2")]) + +(define_insn "*shift_si3" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "rIJ")])) + (clobber (match_scratch:SI 4 "=&r"))] + "! TARGET_SHIFTER" + "* return output_shift (operands);" + [(set_attr "type" "shift") + (set_attr "length" "8")]) + +;; Compare instructions. +;; This controls RTL generation and register allocation. + +;; ??? We may be able to relax this a bit by adding a new constant 'K' for 0. +;; This assumes sub.f 0,symbol,0 is a valid insn. +;; Note that "sub.f 0,r0,1" is an 8 byte insn. To avoid unnecessarily +;; creating 8 byte insns we duplicate %1 in the destination reg of the insn +;; if it's a small constant. + +(define_insn "*cmpsi_cc_insn" + [(set (reg:CC 61) + (compare:CC (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "nonmemory_operand" "r,I,J")))] + "" + "@ + sub.f 0,%0,%1 + sub.f %1,%0,%1 + sub.f 0,%0,%1" + [(set_attr "type" "compare,compare,compare")]) + +(define_insn "*cmpsi_cczn_insn" + [(set (reg:CCZN 61) + (compare:CCZN (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "nonmemory_operand" "r,I,J")))] + "" + "@ + sub.f 0,%0,%1 + sub.f %1,%0,%1 + sub.f 0,%0,%1" + [(set_attr "type" "compare,compare,compare")]) + +(define_insn "*cmpsi_ccznc_insn" + [(set (reg:CCZNC 61) + (compare:CCZNC (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "nonmemory_operand" "r,I,J")))] + "" + "@ + sub.f 0,%0,%1 + sub.f %1,%0,%1 + sub.f 0,%0,%1" + [(set_attr "type" "compare,compare,compare")]) + +;; Next come the scc insn and its expander. + +(define_expand "cstoresi4" + [(set (match_dup 4) + (match_op_dup 5 + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "nonmemory_operand" "")])) + (set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_dup 4) + (const_int 0)]))] + "" + " +{ + operands[4] = gen_compare_reg (GET_CODE (operands[1]), + operands[2], operands[3]); + operands[5] = gen_rtx_fmt_ee (COMPARE, + GET_MODE (operands[4]), + operands[2], operands[3]); +}") + +(define_insn "*scc_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "comparison_operator" [(reg 61) (const_int 0)]))] + "" + "mov %0,1\;sub.%D1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "2")]) + +;; ??? Look up negscc insn. See pa.md for example. +(define_insn "*neg_scc_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_operator:SI 1 "comparison_operator" + [(reg 61) (const_int 0)])))] + "" + "mov %0,-1\;sub.%D1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "2")]) + +(define_insn "*not_scc_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operator:SI 1 "comparison_operator" + [(reg 61) (const_int 0)])))] + "" + "mov %0,1\;sub.%d1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "2")]) + +;; These control RTL generation for conditional jump insns + +(define_expand "cbranchsi4" + [(set (match_dup 4) + (match_op_dup 5 + [(match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")])) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" + [(match_dup 4) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + " +{ + operands[4] = gen_compare_reg (GET_CODE (operands[0]), + operands[1], operands[2]); + operands[5] = gen_rtx_fmt_ee (COMPARE, + GET_MODE (operands[4]), + operands[1], operands[2]); +}") + +;; Now match both normal and inverted jump. + +(define_insn "*branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg 61) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* +{ + if (arc_ccfsm_branch_deleted_p ()) + { + arc_ccfsm_record_branch_deleted (); + return \"; branch deleted, next insns conditionalized\"; + } + else + return \"%~b%d1%# %l0\"; +}" + [(set_attr "type" "branch")]) + +(define_insn "*rev_branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg 61) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))" + "* +{ + if (arc_ccfsm_branch_deleted_p ()) + { + arc_ccfsm_record_branch_deleted (); + return \"; branch deleted, next insns conditionalized\"; + } + else + return \"%~b%D1%# %l0\"; +}" + [(set_attr "type" "branch")]) + +;; Unconditional and other jump instructions. + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "b%* %l0" + [(set_attr "type" "uncond_branch")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "address_operand" "p"))] + "" + "j%* %a0" + [(set_attr "type" "uncond_branch")]) + +;; Implement a switch statement. +;; This wouldn't be necessary in the non-pic case if we could distinguish +;; label refs of the jump table from other label refs. The problem is that +;; label refs are output as "%st(.LL42)" but we don't want the %st - we want +;; the real address since it's the address of the table. + +(define_expand "casesi" + [(set (match_dup 5) + (minus:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "nonmemory_operand" ""))) + (set (reg:CC 61) + (compare:CC (match_dup 5) + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else (gtu (reg:CC 61) + (const_int 0)) + (label_ref (match_operand 4 "" "")) + (pc))) + (parallel + [(set (pc) + (mem:SI (plus:SI (mult:SI (match_dup 5) + (const_int 4)) + (label_ref (match_operand 3 "" ""))))) + (clobber (match_scratch:SI 6 "")) + (clobber (match_scratch:SI 7 ""))])] + "" + " +{ + operands[5] = gen_reg_rtx (SImode); +}") + +(define_insn "*casesi_insn" + [(set (pc) + (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r") + (const_int 4)) + (label_ref (match_operand 1 "" ""))))) + (clobber (match_scratch:SI 2 "=r")) + (clobber (match_scratch:SI 3 "=r"))] + "" + "* +{ + output_asm_insn (\"mov %2,%1\", operands); + if (TARGET_SHIFTER) + output_asm_insn (\"asl %3,%0,2\", operands); + else + output_asm_insn (\"asl %3,%0\;asl %3,%3\", operands); + output_asm_insn (\"ld %2,[%2,%3]\", operands); + output_asm_insn (\"j.nd %a2\", operands); + return \"\"; +}" + [(set_attr "type" "uncond_branch") + (set_attr "length" "6")]) + +(define_insn "tablejump" + [(set (pc) (match_operand:SI 0 "address_operand" "p")) + (use (label_ref (match_operand 1 "" "")))] + "0 /* disabled -> using casesi now */" + "j%* %a0" + [(set_attr "type" "uncond_branch")]) + +(define_expand "call" + ;; operands[1] is stack_size_rtx + ;; operands[2] is next_arg_register + [(parallel [(call (match_operand:SI 0 "call_operand" "") + (match_operand 1 "" "")) + (clobber (reg:SI 31))])] + "" + "") + +(define_insn "*call_via_reg" + [(call (mem:SI (match_operand:SI 0 "register_operand" "r")) + (match_operand 1 "" "")) + (clobber (reg:SI 31))] + "" + "lr blink,[status]\;j.d %0\;add blink,blink,2" + [(set_attr "type" "call_no_delay_slot") + (set_attr "length" "3")]) + +(define_insn "*call_via_label" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" "")) + (match_operand 1 "" "")) + (clobber (reg:SI 31))] + "" + ; The %~ is necessary in case this insn gets conditionalized and the previous + ; insn is the cc setter. + "%~bl%!%* %0" + [(set_attr "type" "call") + (set_attr "cond" "canuse")]) + +(define_expand "call_value" + ;; operand 2 is stack_size_rtx + ;; operand 3 is next_arg_register + [(parallel [(set (match_operand 0 "register_operand" "=r") + (call (match_operand:SI 1 "call_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI 31))])] + "" + "") + +(define_insn "*call_value_via_reg" + [(set (match_operand 0 "register_operand" "=r") + (call (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand 2 "" ""))) + (clobber (reg:SI 31))] + "" + "lr blink,[status]\;j.d %1\;add blink,blink,2" + [(set_attr "type" "call_no_delay_slot") + (set_attr "length" "3")]) + +(define_insn "*call_value_via_label" + [(set (match_operand 0 "register_operand" "=r") + (call (mem:SI (match_operand:SI 1 "call_address_operand" "")) + (match_operand 2 "" ""))) + (clobber (reg:SI 31))] + "" + ; The %~ is necessary in case this insn gets conditionalized and the previous + ; insn is the cc setter. + "%~bl%!%* %1" + [(set_attr "type" "call") + (set_attr "cond" "canuse")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "type" "misc")]) + +;; Special pattern to flush the icache. +;; ??? Not sure what to do here. Some ARC's are known to support this. + +(define_insn "flush_icache" + [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 0)] + "" + "* return \"\";" + [(set_attr "type" "misc")]) + +;; Split up troublesome insns for better scheduling. + +;; Peepholes go at the end. diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt new file mode 100644 index 000000000..e5381f95a --- /dev/null +++ b/gcc/config/arc/arc.opt @@ -0,0 +1,60 @@ +; Options for the Argonaut ARC port of the compiler +; +; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +EB +Driver + +EL +Driver + +malign-loops +Target Undocumented Report Mask(ALIGN_LOOPS) + +mbig-endian +Target Undocumented Report RejectNegative Mask(BIG_ENDIAN) + +mlittle-endian +Target Undocumented Report RejectNegative InverseMask(BIG_ENDIAN) + +mmangle-cpu +Target Report Mask(MANGLE_CPU) +Prepend the name of the cpu to all public symbol names + +; mmangle-cpu-libgcc +; Target Undocumented Mask(MANGLE_CPU_LIBGC) + +mno-cond-exec +Target Undocumented Report RejectNegative Mask(NO_COND_EXEC) + +mcpu= +Target RejectNegative Joined Var(arc_cpu_string) Init("base") +-mcpu=CPU Compile code for ARC variant CPU + +mtext= +Target RejectNegative Joined Var(arc_text_string) Init(ARC_DEFAULT_TEXT_SECTION) +-mtext=SECTION Put functions in SECTION + +mdata= +Target RejectNegative Joined Var(arc_data_string) Init(ARC_DEFAULT_DATA_SECTION) +-mdata=SECTION Put data in SECTION + +mrodata= +Target RejectNegative Joined Var(arc_rodata_string) Init(ARC_DEFAULT_RODATA_SECTION) +-mrodata=SECTION Put read-only data in SECTION diff --git a/gcc/config/arc/initfini.c b/gcc/config/arc/initfini.c new file mode 100644 index 000000000..d7514133a --- /dev/null +++ b/gcc/config/arc/initfini.c @@ -0,0 +1,155 @@ +/* .init/.fini section handling + C++ global constructor/destructor handling. + This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm. + +Copyright (C) 1995, 1997, 1998, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Declare a pointer to void function type. */ +typedef void (*func_ptr) (void); + +#ifdef CRT_INIT + +/* NOTE: In order to be able to support SVR4 shared libraries, we arrange + to have one set of symbols { __CTOR_LIST__, __DTOR_LIST__, __CTOR_END__, + __DTOR_END__ } per root executable and also one set of these symbols + per shared library. So in any given whole process image, we may have + multiple definitions of each of these symbols. In order to prevent + these definitions from conflicting with one another, and in order to + ensure that the proper lists are used for the initialization/finalization + of each individual shared library (respectively), we give these symbols + only internal (i.e. `static') linkage, and we also make it a point to + refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__ + symbol in crtinit.o, where they are defined. */ + +static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"))) + = { (func_ptr) (-1) }; + +static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"))) + = { (func_ptr) (-1) }; + +/* Run all the global destructors on exit from the program. */ + +/* Some systems place the number of pointers in the first word of the + table. On SVR4 however, that word is -1. In all cases, the table is + null-terminated. On SVR4, we start from the beginning of the list and + invoke each per-compilation-unit destructor routine in order + until we find that null. + + Note that this function MUST be static. There will be one of these + functions in each root executable and one in each shared library, but + although they all have the same code, each one is unique in that it + refers to one particular associated `__DTOR_LIST__' which belongs to the + same particular root executable or shared library file. */ + +static void __do_global_dtors (void) +asm ("__do_global_dtors") __attribute__ ((section (".text"))); + +static void +__do_global_dtors (void) +{ + func_ptr *p; + for (p = __DTOR_LIST__ + 1; *p; p++) + (*p) (); +} + +/* .init section start. + This must appear at the start of the .init section. */ + +asm ("\n\ + .section .init\n\ + .global init\n\ + .word 0\n\ +init:\n\ + st blink,[sp,4]\n\ + st fp,[sp]\n\ + mov fp,sp\n\ + sub sp,sp,16\n\ +"); + +/* .fini section start. + This must appear at the start of the .init section. */ + +asm ("\n\ + .section .fini\n\ + .global fini\n\ + .word 0\n\ +fini:\n\ + st blink,[sp,4]\n\ + st fp,[sp]\n\ + mov fp,sp\n\ + sub sp,sp,16\n\ + bl.nd __do_global_dtors\n\ +"); + +#endif /* CRT_INIT */ + +#ifdef CRT_FINI + +/* Put a word containing zero at the end of each of our two lists of function + addresses. Note that the words defined here go into the .ctors and .dtors + sections of the crtend.o file, and since that file is always linked in + last, these words naturally end up at the very ends of the two lists + contained in these two sections. */ + +static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"))) + = { (func_ptr) 0 }; + +static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"))) + = { (func_ptr) 0 }; + +/* Run all global constructors for the program. + Note that they are run in reverse order. */ + +static void __do_global_ctors (void) +asm ("__do_global_ctors") __attribute__ ((section (".text"))); + +static void +__do_global_ctors (void) +{ + func_ptr *p; + for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--) + (*p) (); +} + +/* .init section end. + This must live at the end of the .init section. */ + +asm ("\n\ + .section .init\n\ + bl.nd __do_global_ctors\n\ + ld blink,[fp,4]\n\ + j.d blink\n\ + ld.a fp,[sp,16]\n\ +"); + +/* .fini section end. + This must live at the end of the .fini section. */ + +asm ("\n\ + .section .fini\n\ + ld blink,[fp,4]\n\ + j.d blink\n\ + ld.a fp,[sp,16]\n\ +"); + +#endif /* CRT_FINI */ diff --git a/gcc/config/arc/lib1funcs.asm b/gcc/config/arc/lib1funcs.asm new file mode 100644 index 000000000..c61f39a5c --- /dev/null +++ b/gcc/config/arc/lib1funcs.asm @@ -0,0 +1,266 @@ +; libgcc routines for ARC cpu. + +/* Copyright (C) 1995, 1997,2004, 2009 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef L_mulsi3 + .section .text + .align 4 + +#ifdef __base__ + .cpu base + .global ___mulsi3 +___mulsi3: + +/* This the simple version. + + while (a) + { + if (a & 1) + r += b; + a >>= 1; + b <<= 1; + } +*/ + mov r2,0 ; Accumulate result here. +.Lloop: + sub.f 0,r0,0 ; while (a) + nop + beq.nd .Ldone + and.f 0,r0,1 ; if (a & 1) + add.nz r2,r2,r1 ; r += b + lsr r0,r0 ; a >>= 1 + b.d .Lloop + lsl r1,r1 ; b <<= 1 +.Ldone: + j.d blink + mov r0,r2 +#endif + +#endif /* L_mulsi3 */ + +#ifdef L_umulsidi3 + .section .text + .align 4 + +#ifdef __base__ + .cpu base + .global ___umulsidi3 +___umulsidi3: + +/* This the simple version. + + while (a) + { + if (a & 1) + r += b; + a >>= 1; + b <<= 1; + } +*/ + mov r2,0 ; Top part of b. + mov r3,0 ; Accumulate result here. + mov r4,0 +.Lloop: + sub.f 0,r0,0 ; while (a) + nop + beq.nd .Ldone + and.f 0,r0,1 ; if (a & 1) + sub.f 0,r0,0 + nop + beq .Ldontadd + add.f r4,r4,r1 ; r += b + adc r3,r3,r2 +.Ldontadd: + lsr r0,r0 ; a >>= 1 + lsl.f r1,r1 ; b <<= 1 + b.d .Lloop + rlc r2,r2 +.Ldone: +#ifdef __big_endian__ + mov r1,r4 + j.d blink + mov r0,r3 +#else + mov r0,r4 + j.d blink + mov r1,r3 +#endif +#endif + +#endif /* L_umulsidi3 */ + +#ifdef L_divmod_tools + +; Utilities used by all routines. + + .section .text + .align 4 + +; inputs: r0 = numerator, r1 = denominator +; outputs: positive r0/r1, +; r6.bit1 = sign of numerator, r6.bit0 = sign of result + + .global ___divnorm +___divnorm: + mov r6,0 ; keep sign in r6 + sub.f 0,r0,0 ; is numerator -ve? + sub.lt r0,0,r0 ; negate numerator + mov.lt r6,3 ; sign is -ve + sub.f 0,r1,0 ; is denominator -ve? + sub.lt r1,0,r1 ; negate denominator + xor.lt r6,r6,1 ; toggle sign + j.nd blink + +/* +unsigned long +udivmodsi4(int modwanted, unsigned long num, unsigned long den) +{ + unsigned long bit = 1; + unsigned long res = 0; + + while (den < num && bit && !(den & (1L<<31))) + { + den <<=1; + bit <<=1; + } + while (bit) + { + if (num >= den) + { + num -= den; + res |= bit; + } + bit >>=1; + den >>=1; + } + if (modwanted) return num; + return res; +} +*/ + +; inputs: r0 = numerator, r1 = denominator +; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed + + .global ___udivmodsi4 +___udivmodsi4: + mov r2,1 ; bit = 1 + mov r3,0 ; res = 0 +.Lloop1: + sub.f 0,r1,r0 ; while (den < num + nop + bnc.nd .Lloop2 + sub.f 0,r2,0 ; && bit + nop + bz.nd .Lloop2 + lsl.f 0,r1 ; && !(den & (1<<31)) + nop + bc.nd .Lloop2 + lsl r1,r1 ; den <<= 1 + b.d .Lloop1 + lsl r2,r2 ; bit <<= 1 +.Lloop2: + sub.f 0,r2,0 ; while (bit) + nop + bz.nd .Ldivmodend + sub.f 0,r0,r1 ; if (num >= den) + nop + bc.nd .Lshiftdown + sub r0,r0,r1 ; num -= den + or r3,r3,r2 ; res |= bit +.Lshiftdown: + lsr r2,r2 ; bit >>= 1 + b.d .Lloop2 + lsr r1,r1 ; den >>= 1 +.Ldivmodend: + mov r1,r0 ; r1 = mod + j.d blink + mov r0,r3 ; r0 = res + +#endif + +#ifdef L_udivsi3 + .section .text + .align 4 + +#ifdef __base__ + .cpu base + .global ___udivsi3 +___udivsi3: + mov r7,blink + bl.nd ___udivmodsi4 + j.nd r7 +#endif + +#endif /* L_udivsi3 */ + +#ifdef L_divsi3 + .section .text + .align 4 + +#ifdef __base__ + .cpu base + .global ___divsi3 +___divsi3: + mov r7,blink + bl.nd ___divnorm + bl.nd ___udivmodsi4 + and.f 0,r6,1 + sub.nz r0,0,r0 ; cannot go in delay slot, has limm value + j.nd r7 +#endif + +#endif /* L_divsi3 */ + +#ifdef L_umodsi3 + .section .text + .align 4 + +#ifdef __base__ + .cpu base + .global ___umodsi3 +___umodsi3: + mov r7,blink + bl.nd ___udivmodsi4 + j.d r7 + mov r0,r1 +#endif + +#endif /* L_umodsi3 */ + +#ifdef L_modsi3 + .section .text + .align 4 + +#ifdef __base__ + .cpu base + .global ___modsi3 +___modsi3: + mov r7,blink + bl.nd ___divnorm + bl.nd ___udivmodsi4 + and.f 0,r6,2 + sub.nz r1,0,r1 + j.d r7 + mov r0,r1 +#endif + +#endif /* L_modsi3 */ diff --git a/gcc/config/arc/t-arc b/gcc/config/arc/t-arc new file mode 100644 index 000000000..a923479ca --- /dev/null +++ b/gcc/config/arc/t-arc @@ -0,0 +1,60 @@ +# Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003, +# 2004 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMSRC = arc/lib1funcs.asm +LIB1ASMFUNCS = _mulsi3 _umulsidi3 _udivsi3 _divsi3 _umodsi3 _modsi3 _divmod_tools + +# We need libgcc routines to be mangled according to which cpu they +# were compiled for. +# ??? -mmangle-cpu passed by default for now. +#LIBGCC2_CFLAGS = -g1 -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS) -mmangle-cpu + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifndef __big_endian__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifndef __big_endian__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +# .init/.fini section routines + +$(T)crtinit.o: $(srcdir)/config/arc/initfini.c $(GCC_PASSES) $(CONFIG_H) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(CRTSTUFF_T_CFLAGS) \ + $(MULTILIB_CFLAGS) -DCRT_INIT -finhibit-size-directive -fno-inline-functions \ + -g0 -c $(srcdir)/config/arc/initfini.c -o $(T)crtinit.o + +$(T)crtfini.o: $(srcdir)/config/arc/initfini.c $(GCC_PASSES) $(CONFIG_H) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(CRTSTUFF_T_CFLAGS) \ + -DCRT_FINI $(MULTILIB_CFLAGS) -finhibit-size-directive -fno-inline-functions \ + -g0 -c $(srcdir)/config/arc/initfini.c -o $(T)crtfini.o + +MULTILIB_OPTIONS = EB +MULTILIB_DIRNAMES = be +EXTRA_MULTILIB_PARTS = crtinit.o crtfini.o diff --git a/gcc/config/arm/README-interworking b/gcc/config/arm/README-interworking new file mode 100644 index 000000000..7f2eda83b --- /dev/null +++ b/gcc/config/arm/README-interworking @@ -0,0 +1,749 @@ + Arm / Thumb Interworking + ======================== + +The Cygnus GNU Pro Toolkit for the ARM7T processor supports function +calls between code compiled for the ARM instruction set and code +compiled for the Thumb instruction set and vice versa. This document +describes how that interworking support operates and explains the +command line switches that should be used in order to produce working +programs. + +Note: The Cygnus GNU Pro Toolkit does not support switching between +compiling for the ARM instruction set and the Thumb instruction set +on anything other than a per file basis. There are in fact two +completely separate compilers, one that produces ARM assembler +instructions and one that produces Thumb assembler instructions. The +two compilers share the same assembler, linker and so on. + + +1. Explicit interworking support for C and C++ files +==================================================== + +By default if a file is compiled without any special command line +switches then the code produced will not support interworking. +Provided that a program is made up entirely from object files and +libraries produced in this way and which contain either exclusively +ARM instructions or exclusively Thumb instructions then this will not +matter and a working executable will be created. If an attempt is +made to link together mixed ARM and Thumb object files and libraries, +then warning messages will be produced by the linker and a non-working +executable will be created. + +In order to produce code which does support interworking it should be +compiled with the + + -mthumb-interwork + +command line option. Provided that a program is made up entirely from +object files and libraries built with this command line switch a +working executable will be produced, even if both ARM and Thumb +instructions are used by the various components of the program. (No +warning messages will be produced by the linker either). + +Note that specifying -mthumb-interwork does result in slightly larger, +slower code being produced. This is why interworking support must be +specifically enabled by a switch. + + +2. Explicit interworking support for assembler files +==================================================== + +If assembler files are to be included into an interworking program +then the following rules must be obeyed: + + * Any externally visible functions must return by using the BX + instruction. + + * Normal function calls can just use the BL instruction. The + linker will automatically insert code to switch between ARM + and Thumb modes as necessary. + + * Calls via function pointers should use the BX instruction if + the call is made in ARM mode: + + .code 32 + mov lr, pc + bx rX + + This code sequence will not work in Thumb mode however, since + the mov instruction will not set the bottom bit of the lr + register. Instead a branch-and-link to the _call_via_rX + functions should be used instead: + + .code 16 + bl _call_via_rX + + where rX is replaced by the name of the register containing + the function address. + + * All externally visible functions which should be entered in + Thumb mode must have the .thumb_func pseudo op specified just + before their entry point. e.g.: + + .code 16 + .global function + .thumb_func + function: + ...start of function.... + + * All assembler files must be assembled with the switch + -mthumb-interwork specified on the command line. (If the file + is assembled by calling gcc it will automatically pass on the + -mthumb-interwork switch to the assembler, provided that it + was specified on the gcc command line in the first place.) + + +3. Support for old, non-interworking aware code. +================================================ + +If it is necessary to link together code produced by an older, +non-interworking aware compiler, or code produced by the new compiler +but without the -mthumb-interwork command line switch specified, then +there are two command line switches that can be used to support this. + +The switch + + -mcaller-super-interworking + +will allow calls via function pointers in Thumb mode to work, +regardless of whether the function pointer points to old, +non-interworking aware code or not. Specifying this switch does +produce slightly slower code however. + +Note: There is no switch to allow calls via function pointers in ARM +mode to be handled specially. Calls via function pointers from +interworking aware ARM code to non-interworking aware ARM code work +without any special considerations by the compiler. Calls via +function pointers from interworking aware ARM code to non-interworking +aware Thumb code however will not work. (Actually under some +circumstances they may work, but there are no guarantees). This is +because only the new compiler is able to produce Thumb code, and this +compiler already has a command line switch to produce interworking +aware code. + + +The switch + + -mcallee-super-interworking + +will allow non-interworking aware ARM or Thumb code to call Thumb +functions, either directly or via function pointers. Specifying this +switch does produce slightly larger, slower code however. + +Note: There is no switch to allow non-interworking aware ARM or Thumb +code to call ARM functions. There is no need for any special handling +of calls from non-interworking aware ARM code to interworking aware +ARM functions, they just work normally. Calls from non-interworking +aware Thumb functions to ARM code however, will not work. There is no +option to support this, since it is always possible to recompile the +Thumb code to be interworking aware. + +As an alternative to the command line switch +-mcallee-super-interworking, which affects all externally visible +functions in a file, it is possible to specify an attribute or +declspec for individual functions, indicating that that particular +function should support being called by non-interworking aware code. +The function should be defined like this: + + int __attribute__((interfacearm)) function + { + ... body of function ... + } + +or + + int __declspec(interfacearm) function + { + ... body of function ... + } + + + +4. Interworking support in dlltool +================================== + +It is possible to create DLLs containing mixed ARM and Thumb code. It +is also possible to call Thumb code in a DLL from an ARM program and +vice versa. It is even possible to call ARM DLLs that have been compiled +without interworking support (say by an older version of the compiler), +from Thumb programs and still have things work properly. + + A version of the `dlltool' program which supports the `--interwork' +command line switch is needed, as well as the following special +considerations when building programs and DLLs: + +*Use `-mthumb-interwork'* + When compiling files for a DLL or a program the `-mthumb-interwork' + command line switch should be specified if calling between ARM and + Thumb code can happen. If a program is being compiled and the + mode of the DLLs that it uses is not known, then it should be + assumed that interworking might occur and the switch used. + +*Use `-m thumb'* + If the exported functions from a DLL are all Thumb encoded then the + `-m thumb' command line switch should be given to dlltool when + building the stubs. This will make dlltool create Thumb encoded + stubs, rather than its default of ARM encoded stubs. + + If the DLL consists of both exported Thumb functions and exported + ARM functions then the `-m thumb' switch should not be used. + Instead the Thumb functions in the DLL should be compiled with the + `-mcallee-super-interworking' switch, or with the `interfacearm' + attribute specified on their prototypes. In this way they will be + given ARM encoded prologues, which will work with the ARM encoded + stubs produced by dlltool. + +*Use `-mcaller-super-interworking'* + If it is possible for Thumb functions in a DLL to call + non-interworking aware code via a function pointer, then the Thumb + code must be compiled with the `-mcaller-super-interworking' + command line switch. This will force the function pointer calls + to use the _interwork_call_via_rX stub functions which will + correctly restore Thumb mode upon return from the called function. + +*Link with `libgcc.a'* + When the dll is built it may have to be linked with the GCC + library (`libgcc.a') in order to extract the _call_via_rX functions + or the _interwork_call_via_rX functions. This represents a partial + redundancy since the same functions *may* be present in the + application itself, but since they only take up 372 bytes this + should not be too much of a consideration. + +*Use `--support-old-code'* + When linking a program with an old DLL which does not support + interworking, the `--support-old-code' command line switch to the + linker should be used. This causes the linker to generate special + interworking stubs which can cope with old, non-interworking aware + ARM code, at the cost of generating bulkier code. The linker will + still generate a warning message along the lines of: + "Warning: input file XXX does not support interworking, whereas YYY does." + but this can now be ignored because the --support-old-code switch + has been used. + + + +5. How interworking support works +================================= + +Switching between the ARM and Thumb instruction sets is accomplished +via the BX instruction which takes as an argument a register name. +Control is transfered to the address held in this register (with the +bottom bit masked out), and if the bottom bit is set, then Thumb +instruction processing is enabled, otherwise ARM instruction +processing is enabled. + +When the -mthumb-interwork command line switch is specified, gcc +arranges for all functions to return to their caller by using the BX +instruction. Thus provided that the return address has the bottom bit +correctly initialized to indicate the instruction set of the caller, +correct operation will ensue. + +When a function is called explicitly (rather than via a function +pointer), the compiler generates a BL instruction to do this. The +Thumb version of the BL instruction has the special property of +setting the bottom bit of the LR register after it has stored the +return address into it, so that a future BX instruction will correctly +return the instruction after the BL instruction, in Thumb mode. + +The BL instruction does not change modes itself however, so if an ARM +function is calling a Thumb function, or vice versa, it is necessary +to generate some extra instructions to handle this. This is done in +the linker when it is storing the address of the referenced function +into the BL instruction. If the BL instruction is an ARM style BL +instruction, but the referenced function is a Thumb function, then the +linker automatically generates a calling stub that converts from ARM +mode to Thumb mode, puts the address of this stub into the BL +instruction, and puts the address of the referenced function into the +stub. Similarly if the BL instruction is a Thumb BL instruction, and +the referenced function is an ARM function, the linker generates a +stub which converts from Thumb to ARM mode, puts the address of this +stub into the BL instruction, and the address of the referenced +function into the stub. + +This is why it is necessary to mark Thumb functions with the +.thumb_func pseudo op when creating assembler files. This pseudo op +allows the assembler to distinguish between ARM functions and Thumb +functions. (The Thumb version of GCC automatically generates these +pseudo ops for any Thumb functions that it generates). + +Calls via function pointers work differently. Whenever the address of +a function is taken, the linker examines the type of the function +being referenced. If the function is a Thumb function, then it sets +the bottom bit of the address. Technically this makes the address +incorrect, since it is now one byte into the start of the function, +but this is never a problem because: + + a. with interworking enabled all calls via function pointer + are done using the BX instruction and this ignores the + bottom bit when computing where to go to. + + b. the linker will always set the bottom bit when the address + of the function is taken, so it is never possible to take + the address of the function in two different places and + then compare them and find that they are not equal. + +As already mentioned any call via a function pointer will use the BX +instruction (provided that interworking is enabled). The only problem +with this is computing the return address for the return from the +called function. For ARM code this can easily be done by the code +sequence: + + mov lr, pc + bx rX + +(where rX is the name of the register containing the function +pointer). This code does not work for the Thumb instruction set, +since the MOV instruction will not set the bottom bit of the LR +register, so that when the called function returns, it will return in +ARM mode not Thumb mode. Instead the compiler generates this +sequence: + + bl _call_via_rX + +(again where rX is the name if the register containing the function +pointer). The special call_via_rX functions look like this: + + .thumb_func +_call_via_r0: + bx r0 + nop + +The BL instruction ensures that the correct return address is stored +in the LR register and then the BX instruction jumps to the address +stored in the function pointer, switch modes if necessary. + + +6. How caller-super-interworking support works +============================================== + +When the -mcaller-super-interworking command line switch is specified +it changes the code produced by the Thumb compiler so that all calls +via function pointers (including virtual function calls) now go via a +different stub function. The code to call via a function pointer now +looks like this: + + bl _interwork_call_via_r0 + +Note: The compiler does not insist that r0 be used to hold the +function address. Any register will do, and there are a suite of stub +functions, one for each possible register. The stub functions look +like this: + + .code 16 + .thumb_func +_interwork_call_via_r0 + bx pc + nop + + .code 32 + tst r0, #1 + stmeqdb r13!, {lr} + adreq lr, _arm_return + bx r0 + +The stub first switches to ARM mode, since it is a lot easier to +perform the necessary operations using ARM instructions. It then +tests the bottom bit of the register containing the address of the +function to be called. If this bottom bit is set then the function +being called uses Thumb instructions and the BX instruction to come +will switch back into Thumb mode before calling this function. (Note +that it does not matter how this called function chooses to return to +its caller, since the both the caller and callee are Thumb functions, +and mode switching is necessary). If the function being called is an +ARM mode function however, the stub pushes the return address (with +its bottom bit set) onto the stack, replaces the return address with +the address of the a piece of code called '_arm_return' and then +performs a BX instruction to call the function. + +The '_arm_return' code looks like this: + + .code 32 +_arm_return: + ldmia r13!, {r12} + bx r12 + .code 16 + + +It simply retrieves the return address from the stack, and then +performs a BX operation to return to the caller and switch back into +Thumb mode. + + +7. How callee-super-interworking support works +============================================== + +When -mcallee-super-interworking is specified on the command line the +Thumb compiler behaves as if every externally visible function that it +compiles has had the (interfacearm) attribute specified for it. What +this attribute does is to put a special, ARM mode header onto the +function which forces a switch into Thumb mode: + + without __attribute__((interfacearm)): + + .code 16 + .thumb_func + function: + ... start of function ... + + with __attribute__((interfacearm)): + + .code 32 + function: + orr r12, pc, #1 + bx r12 + + .code 16 + .thumb_func + .real_start_of_function: + + ... start of function ... + +Note that since the function now expects to be entered in ARM mode, it +no longer has the .thumb_func pseudo op specified for its name. +Instead the pseudo op is attached to a new label .real_start_of_ +(where is the name of the function) which indicates the start +of the Thumb code. This does have the interesting side effect in that +if this function is now called from a Thumb mode piece of code +outside of the current file, the linker will generate a calling stub +to switch from Thumb mode into ARM mode, and then this is immediately +overridden by the function's header which switches back into Thumb +mode. + +In addition the (interfacearm) attribute also forces the function to +return by using the BX instruction, even if has not been compiled with +the -mthumb-interwork command line flag, so that the correct mode will +be restored upon exit from the function. + + +8. Some examples +================ + + Given these two test files: + + int arm (void) { return 1 + thumb (); } + + int thumb (void) { return 2 + arm (); } + + The following pieces of assembler are produced by the ARM and Thumb +version of GCC depending upon the command line options used: + + `-O2': + .code 32 .code 16 + .global _arm .global _thumb + .thumb_func + _arm: _thumb: + mov ip, sp + stmfd sp!, {fp, ip, lr, pc} push {lr} + sub fp, ip, #4 + bl _thumb bl _arm + add r0, r0, #1 add r0, r0, #2 + ldmea fp, {fp, sp, pc} pop {pc} + + Note how the functions return without using the BX instruction. If +these files were assembled and linked together they would fail to work +because they do not change mode when returning to their caller. + + `-O2 -mthumb-interwork': + + .code 32 .code 16 + .global _arm .global _thumb + .thumb_func + _arm: _thumb: + mov ip, sp + stmfd sp!, {fp, ip, lr, pc} push {lr} + sub fp, ip, #4 + bl _thumb bl _arm + add r0, r0, #1 add r0, r0, #2 + ldmea fp, {fp, sp, lr} pop {r1} + bx lr bx r1 + + Now the functions use BX to return their caller. They have grown by +4 and 2 bytes respectively, but they can now successfully be linked +together and be expect to work. The linker will replace the +destinations of the two BL instructions with the addresses of calling +stubs which convert to the correct mode before jumping to the called +function. + + `-O2 -mcallee-super-interworking': + + .code 32 .code 32 + .global _arm .global _thumb + _arm: _thumb: + orr r12, pc, #1 + bx r12 + mov ip, sp .code 16 + stmfd sp!, {fp, ip, lr, pc} push {lr} + sub fp, ip, #4 + bl _thumb bl _arm + add r0, r0, #1 add r0, r0, #2 + ldmea fp, {fp, sp, lr} pop {r1} + bx lr bx r1 + + The thumb function now has an ARM encoded prologue, and it no longer +has the `.thumb-func' pseudo op attached to it. The linker will not +generate a calling stub for the call from arm() to thumb(), but it will +still have to generate a stub for the call from thumb() to arm(). Also +note how specifying `--mcallee-super-interworking' automatically +implies `-mthumb-interworking'. + + +9. Some Function Pointer Examples +================================= + + Given this test file: + + int func (void) { return 1; } + + int call (int (* ptr)(void)) { return ptr (); } + + The following varying pieces of assembler are produced by the Thumb +version of GCC depending upon the command line options used: + + `-O2': + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .thumb_func + _call: + push {lr} + bl __call_via_r0 + pop {pc} + + Note how the two functions have different exit sequences. In +particular call() uses pop {pc} to return, which would not work if the +caller was in ARM mode. func() however, uses the BX instruction, even +though `-mthumb-interwork' has not been specified, as this is the most +efficient way to exit a function when the return address is held in the +link register. + + `-O2 -mthumb-interwork': + + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .thumb_func + _call: + push {lr} + bl __call_via_r0 + pop {r1} + bx r1 + + This time both functions return by using the BX instruction. This +means that call() is now two bytes longer and several cycles slower +than the previous version. + + `-O2 -mcaller-super-interworking': + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .thumb_func + _call: + push {lr} + bl __interwork_call_via_r0 + pop {pc} + + Very similar to the first (non-interworking) version, except that a +different stub is used to call via the function pointer. This new stub +will work even if the called function is not interworking aware, and +tries to return to call() in ARM mode. Note that the assembly code for +call() is still not interworking aware itself, and so should not be +called from ARM code. + + `-O2 -mcallee-super-interworking': + + .code 32 + .globl _func + _func: + orr r12, pc, #1 + bx r12 + + .code 16 + .globl .real_start_of_func + .thumb_func + .real_start_of_func: + mov r0, #1 + bx lr + + .code 32 + .globl _call + _call: + orr r12, pc, #1 + bx r12 + + .code 16 + .globl .real_start_of_call + .thumb_func + .real_start_of_call: + push {lr} + bl __call_via_r0 + pop {r1} + bx r1 + + Now both functions have an ARM coded prologue, and both functions +return by using the BX instruction. These functions are interworking +aware therefore and can safely be called from ARM code. The code for +the call() function is now 10 bytes longer than the original, non +interworking aware version, an increase of over 200%. + + If a prototype for call() is added to the source code, and this +prototype includes the `interfacearm' attribute: + + int __attribute__((interfacearm)) call (int (* ptr)(void)); + + then this code is produced (with only -O2 specified on the command +line): + + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .code 32 + _call: + orr r12, pc, #1 + bx r12 + + .code 16 + .globl .real_start_of_call + .thumb_func + .real_start_of_call: + push {lr} + bl __call_via_r0 + pop {r1} + bx r1 + + So now both call() and func() can be safely called via +non-interworking aware ARM code. If, when such a file is assembled, +the assembler detects the fact that call() is being called by another +function in the same file, it will automatically adjust the target of +the BL instruction to point to .real_start_of_call. In this way there +is no need for the linker to generate a Thumb-to-ARM calling stub so +that call can be entered in ARM mode. + + +10. How to use dlltool to build ARM/Thumb DLLs +============================================== + Given a program (`prog.c') like this: + + extern int func_in_dll (void); + + int main (void) { return func_in_dll(); } + + And a DLL source file (`dll.c') like this: + + int func_in_dll (void) { return 1; } + + Here is how to build the DLL and the program for a purely ARM based +environment: + +*Step One + Build a `.def' file describing the DLL: + + ; example.def + ; This file describes the contents of the DLL + LIBRARY example + HEAPSIZE 0x40000, 0x2000 + EXPORTS + func_in_dll 1 + +*Step Two + Compile the DLL source code: + + arm-pe-gcc -O2 -c dll.c + +*Step Three + Use `dlltool' to create an exports file and a library file: + + dlltool --def example.def --output-exp example.o --output-lib example.a + +*Step Four + Link together the complete DLL: + + arm-pe-ld dll.o example.o -o example.dll + +*Step Five + Compile the program's source code: + + arm-pe-gcc -O2 -c prog.c + +*Step Six + Link together the program and the DLL's library file: + + arm-pe-gcc prog.o example.a -o prog + + If instead this was a Thumb DLL being called from an ARM program, the +steps would look like this. (To save space only those steps that are +different from the previous version are shown): + +*Step Two + Compile the DLL source code (using the Thumb compiler): + + thumb-pe-gcc -O2 -c dll.c -mthumb-interwork + +*Step Three + Build the exports and library files (and support interworking): + + dlltool -d example.def -z example.o -l example.a --interwork -m thumb + +*Step Five + Compile the program's source code (and support interworking): + + arm-pe-gcc -O2 -c prog.c -mthumb-interwork + + If instead, the DLL was an old, ARM DLL which does not support +interworking, and which cannot be rebuilt, then these steps would be +used. + +*Step One + Skip. If you do not have access to the sources of a DLL, there is + no point in building a `.def' file for it. + +*Step Two + Skip. With no DLL sources there is nothing to compile. + +*Step Three + Skip. Without a `.def' file you cannot use dlltool to build an + exports file or a library file. + +*Step Four + Skip. Without a set of DLL object files you cannot build the DLL. + Besides it has already been built for you by somebody else. + +*Step Five + Compile the program's source code, this is the same as before: + + arm-pe-gcc -O2 -c prog.c + +*Step Six + Link together the program and the DLL's library file, passing the + `--support-old-code' option to the linker: + + arm-pe-gcc prog.o example.a -Wl,--support-old-code -o prog + + Ignore the warning message about the input file not supporting + interworking as the --support-old-code switch has taken care if this. + + +Copyright (C) 1998, 2002, 2003, 2004 Free Software Foundation, Inc. + +Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h new file mode 100644 index 000000000..f8e7367fd --- /dev/null +++ b/gcc/config/arm/aout.h @@ -0,0 +1,380 @@ +/* Definitions of target machine for GNU compiler, for ARM with a.out + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2004, 2007, 2008, 2010 + Free Software Foundation, Inc. + Contributed by Richard Earnshaw (rearnsha@armltd.co.uk). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef ASM_APP_ON +#define ASM_APP_ON "" +#endif +#ifndef ASM_APP_OFF +#define ASM_APP_OFF "" +#endif + +/* Switch to the text or data segment. */ +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.bss" + +/* Note: If USER_LABEL_PREFIX or LOCAL_LABEL_PREFIX are changed, + make sure that this change is reflected in the function + coff_arm_is_local_label_name() in bfd/coff-arm.c. */ +#ifndef REGISTER_PREFIX +#define REGISTER_PREFIX "" +#endif + +#ifndef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" +#endif + +#ifndef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "" +#endif + +/* The assembler's names for the registers. Note that the ?xx registers are + there so that VFPv3/NEON registers D16-D31 have the same spacing as D0-D15 + (each of which is overlaid on two S registers), although there are no + actual single-precision registers which correspond to D16-D31. */ +#ifndef REGISTER_NAMES +#define REGISTER_NAMES \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "sl", "fp", "ip", "sp", "lr", "pc", \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "cc", "sfp", "afp", \ + "mv0", "mv1", "mv2", "mv3", \ + "mv4", "mv5", "mv6", "mv7", \ + "mv8", "mv9", "mv10", "mv11", \ + "mv12", "mv13", "mv14", "mv15", \ + "wcgr0", "wcgr1", "wcgr2", "wcgr3", \ + "wr0", "wr1", "wr2", "wr3", \ + "wr4", "wr5", "wr6", "wr7", \ + "wr8", "wr9", "wr10", "wr11", \ + "wr12", "wr13", "wr14", "wr15", \ + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", \ + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", \ + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", \ + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", \ + "d16", "?16", "d17", "?17", "d18", "?18", "d19", "?19", \ + "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23", \ + "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27", \ + "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31", \ + "vfpcc" \ +} +#endif + +#ifndef ADDITIONAL_REGISTER_NAMES +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + {"a1", 0}, \ + {"a2", 1}, \ + {"a3", 2}, \ + {"a4", 3}, \ + {"v1", 4}, \ + {"v2", 5}, \ + {"v3", 6}, \ + {"v4", 7}, \ + {"v5", 8}, \ + {"v6", 9}, \ + {"rfp", 9}, /* Gcc used to call it this */ \ + {"sb", 9}, \ + {"v7", 10}, \ + {"r10", 10}, /* sl */ \ + {"r11", 11}, /* fp */ \ + {"r12", 12}, /* ip */ \ + {"r13", 13}, /* sp */ \ + {"r14", 14}, /* lr */ \ + {"r15", 15}, /* pc */ \ + {"mvf0", 27}, \ + {"mvf1", 28}, \ + {"mvf2", 29}, \ + {"mvf3", 30}, \ + {"mvf4", 31}, \ + {"mvf5", 32}, \ + {"mvf6", 33}, \ + {"mvf7", 34}, \ + {"mvf8", 35}, \ + {"mvf9", 36}, \ + {"mvf10", 37}, \ + {"mvf11", 38}, \ + {"mvf12", 39}, \ + {"mvf13", 40}, \ + {"mvf14", 41}, \ + {"mvf15", 42}, \ + {"mvd0", 27}, \ + {"mvd1", 28}, \ + {"mvd2", 29}, \ + {"mvd3", 30}, \ + {"mvd4", 31}, \ + {"mvd5", 32}, \ + {"mvd6", 33}, \ + {"mvd7", 34}, \ + {"mvd8", 35}, \ + {"mvd9", 36}, \ + {"mvd10", 37}, \ + {"mvd11", 38}, \ + {"mvd12", 39}, \ + {"mvd13", 40}, \ + {"mvd14", 41}, \ + {"mvd15", 42}, \ + {"mvfx0", 27}, \ + {"mvfx1", 28}, \ + {"mvfx2", 29}, \ + {"mvfx3", 30}, \ + {"mvfx4", 31}, \ + {"mvfx5", 32}, \ + {"mvfx6", 33}, \ + {"mvfx7", 34}, \ + {"mvfx8", 35}, \ + {"mvfx9", 36}, \ + {"mvfx10", 37}, \ + {"mvfx11", 38}, \ + {"mvfx12", 39}, \ + {"mvfx13", 40}, \ + {"mvfx14", 41}, \ + {"mvfx15", 42}, \ + {"mvdx0", 27}, \ + {"mvdx1", 28}, \ + {"mvdx2", 29}, \ + {"mvdx3", 30}, \ + {"mvdx4", 31}, \ + {"mvdx5", 32}, \ + {"mvdx6", 33}, \ + {"mvdx7", 34}, \ + {"mvdx8", 35}, \ + {"mvdx9", 36}, \ + {"mvdx10", 37}, \ + {"mvdx11", 38}, \ + {"mvdx12", 39}, \ + {"mvdx13", 40}, \ + {"mvdx14", 41}, \ + {"mvdx15", 42} \ +} +#endif + +#ifndef OVERLAPPING_REGISTER_NAMES +#define OVERLAPPING_REGISTER_NAMES \ +{ \ + {"d0", 63, 2}, \ + {"d1", 65, 2}, \ + {"d2", 67, 2}, \ + {"d3", 69, 2}, \ + {"d4", 71, 2}, \ + {"d5", 73, 2}, \ + {"d6", 75, 2}, \ + {"d7", 77, 2}, \ + {"d8", 79, 2}, \ + {"d9", 81, 2}, \ + {"d10", 83, 2}, \ + {"d11", 85, 2}, \ + {"d12", 87, 2}, \ + {"d13", 89, 2}, \ + {"d14", 91, 2}, \ + {"d15", 93, 2}, \ + {"q0", 63, 4}, \ + {"q1", 67, 4}, \ + {"q2", 71, 4}, \ + {"q3", 75, 4}, \ + {"q4", 79, 4}, \ + {"q5", 83, 4}, \ + {"q6", 87, 4}, \ + {"q7", 91, 4}, \ + {"q8", 95, 4}, \ + {"q9", 99, 4}, \ + {"q10", 103, 4}, \ + {"q11", 107, 4}, \ + {"q12", 111, 4}, \ + {"q13", 115, 4}, \ + {"q14", 119, 4}, \ + {"q15", 123, 4} \ +} +#endif + +#ifndef NO_DOLLAR_IN_LABEL +#define NO_DOLLAR_IN_LABEL 1 +#endif + +/* Generate DBX debugging information. riscix.h will undefine this because + the native assembler does not support stabs. */ +#define DBX_DEBUGGING_INFO 1 + +/* Acorn dbx moans about continuation chars, so don't use any. */ +#ifndef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 0 +#endif + +/* Output a function label definition. */ +#ifndef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ + do \ + { \ + ARM_DECLARE_FUNCTION_NAME (STREAM, NAME, DECL); \ + ASM_OUTPUT_LABEL (STREAM, NAME); \ + } \ + while (0) +#endif + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* Make an internal label into a string. */ +#ifndef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \ + sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned int)(NUM)) +#endif + +/* Output an element of a dispatch table. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + do \ + { \ + gcc_assert (!TARGET_THUMB2); \ + asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE); \ + } \ + while (0) + + +/* Thumb-2 always uses addr_diff_elf so that the Table Branch instructions + can be used. For non-pic code where the offsets do not suitable for + TBB/TBH the elements are output as absolute labels. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + do \ + { \ + if (TARGET_ARM) \ + asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE); \ + else if (TARGET_THUMB1) \ + { \ + if (flag_pic || optimize_size) \ + { \ + switch (GET_MODE(body)) \ + { \ + case QImode: \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case HImode: /* TBH */ \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d\n", \ + VALUE, REL); \ + break; \ + default: \ + gcc_unreachable(); \ + } \ + } \ + else \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE); \ + } \ + else /* Thumb-2 */ \ + { \ + switch (GET_MODE(body)) \ + { \ + case QImode: /* TBB */ \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case HImode: /* TBH */ \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + if (flag_pic) \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1-%LL%d\n", VALUE, REL); \ + else \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE); \ + break; \ + default: \ + gcc_unreachable(); \ + } \ + } \ + } \ + while (0) + + +#undef ASM_OUTPUT_ASCII +#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN) \ + output_ascii_pseudo_op (STREAM, (const unsigned char *) (PTR), LEN) + +/* Output a gap. In fact we fill it with nulls. */ +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(STREAM, NBYTES) \ + fprintf (STREAM, "\t.space\t%d\n", (int) (NBYTES)) + +/* Align output to a power of two. Horrible /bin/as. */ +#ifndef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + do \ + { \ + register int amount = 1 << (POWER); \ + \ + if (amount == 2) \ + fprintf (STREAM, "\t.even\n"); \ + else if (amount != 1) \ + fprintf (STREAM, "\t.align\t%d\n", amount - 4); \ + } \ + while (0) +#endif + +/* Output a common block. */ +#ifndef ASM_OUTPUT_COMMON +#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED) \ + do \ + { \ + fprintf (STREAM, "\t.comm\t"); \ + assemble_name (STREAM, NAME); \ + asm_fprintf (STREAM, ", %d\t%@ %d\n", \ + (int)(ROUNDED), (int)(SIZE)); \ + } \ + while (0) +#endif + +/* Output a local common block. /bin/as can't do this, so hack a + `.space' into the bss segment. Note that this is *bad* practice, + which is guaranteed NOT to work since it doesn't define STATIC + COMMON space but merely STATIC BSS space. */ +#ifndef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \ + do \ + { \ + switch_to_section (bss_section); \ + ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (STREAM, NAME); \ + fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE)); \ + } \ + while (0) +#endif + +/* Output a zero-initialized block. */ +#ifndef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(STREAM, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (STREAM, DECL, NAME, SIZE, ALIGN) +#endif + +/* Output a #ident directive. */ +#ifndef ASM_OUTPUT_IDENT +#define ASM_OUTPUT_IDENT(STREAM,STRING) \ + asm_fprintf (STREAM, "%@ - - - ident %s\n", STRING) +#endif + +#ifndef ASM_COMMENT_START +#define ASM_COMMENT_START "@" +#endif + +/* This works for GAS and some other assemblers. */ +#define SET_ASM_OP "\t.set\t" diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c new file mode 100644 index 000000000..f9ad1c9e1 --- /dev/null +++ b/gcc/config/arm/arm-c.c @@ -0,0 +1,45 @@ +/* Copyright (C) 2007, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "tree.h" +#include "output.h" +#include "c-family/c-common.h" + + +/* Output C specific EABI object attributes. These can not be done in + arm.c because they require information from the C frontend. */ + +static void arm_output_c_attributes(void) +{ + /* Tag_ABI_PCS_wchar_t. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n", + (int)(TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT)); +} + + +/* Setup so that common code calls arm_output_c_attributes. */ + +void arm_lang_object_attributes_init(void) +{ + arm_lang_output_object_attributes_hook = arm_output_c_attributes; +} diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def new file mode 100644 index 000000000..0bb9aa3ee --- /dev/null +++ b/gcc/config/arm/arm-cores.def @@ -0,0 +1,136 @@ +/* ARM CPU Cores + Copyright (C) 2003, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Written by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Before using #include to read this file, define a macro: + + ARM_CORE(CORE_NAME, CORE_IDENT, ARCH, FLAGS, COSTS) + + The CORE_NAME is the name of the core, represented as a string constant. + The CORE_IDENT is the name of the core, represented as an identifier. + ARCH is the architecture revision implemented by the chip. + FLAGS are the bitwise-or of the traits that apply to that core. + This need not include flags implied by the architecture. + COSTS is the name of the rtx_costs routine to use. + + If you update this table, you must update the "tune" attribute in + arm.md. + + Some tools assume no whitespace up to the first "," in each entry. */ + +/* V2/V2A Architecture Processors */ +ARM_CORE("arm2", arm2, 2, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm250", arm250, 2, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm3", arm3, 2, FL_CO_PROC | FL_MODE26, slowmul) + +/* V3 Architecture Processors */ +ARM_CORE("arm6", arm6, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm60", arm60, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm600", arm600, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm610", arm610, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm620", arm620, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7", arm7, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm7d", arm7d, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm7di", arm7di, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm70", arm70, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm700", arm700, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm700i", arm700i, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm710", arm710, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm720", arm720, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm710c", arm710c, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7100", arm7100, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7500", arm7500, 3, FL_MODE26 | FL_WBUF, slowmul) +/* Doesn't have an external co-proc, but does have embedded fpa. */ +ARM_CORE("arm7500fe", arm7500fe, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) + +/* V3M Architecture Processors */ +/* arm7m doesn't exist on its own, but only with D, ("and", and I), but + those don't alter the code, so arm7m is sometimes used. */ +ARM_CORE("arm7m", arm7m, 3M, FL_CO_PROC | FL_MODE26, fastmul) +ARM_CORE("arm7dm", arm7dm, 3M, FL_CO_PROC | FL_MODE26, fastmul) +ARM_CORE("arm7dmi", arm7dmi, 3M, FL_CO_PROC | FL_MODE26, fastmul) + +/* V4 Architecture Processors */ +ARM_CORE("arm8", arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) +ARM_CORE("arm810", arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) +ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) +ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul) +ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul) + +/* V4T Architecture Processors */ +ARM_CORE("arm7tdmi", arm7tdmi, 4T, FL_CO_PROC , fastmul) +ARM_CORE("arm7tdmi-s", arm7tdmis, 4T, FL_CO_PROC , fastmul) +ARM_CORE("arm710t", arm710t, 4T, FL_WBUF, fastmul) +ARM_CORE("arm720t", arm720t, 4T, FL_WBUF, fastmul) +ARM_CORE("arm740t", arm740t, 4T, FL_WBUF, fastmul) +ARM_CORE("arm9", arm9, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm9tdmi", arm9tdmi, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm920", arm920, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm920t", arm920t, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm922t", arm922t, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm940t", arm940t, 4T, FL_LDSCHED, fastmul) +ARM_CORE("ep9312", ep9312, 4T, FL_LDSCHED | FL_CIRRUS, fastmul) + +/* V5T Architecture Processors */ +ARM_CORE("arm10tdmi", arm10tdmi, 5T, FL_LDSCHED, fastmul) +ARM_CORE("arm1020t", arm1020t, 5T, FL_LDSCHED, fastmul) + +/* V5TE Architecture Processors */ +ARM_CORE("arm9e", arm9e, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm946e-s", arm946es, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm966e-s", arm966es, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm968e-s", arm968es, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm10e", arm10e, 5TE, FL_LDSCHED, fastmul) +ARM_CORE("arm1020e", arm1020e, 5TE, FL_LDSCHED, fastmul) +ARM_CORE("arm1022e", arm1022e, 5TE, FL_LDSCHED, fastmul) +ARM_CORE("xscale", xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale) +ARM_CORE("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) +ARM_CORE("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) +ARM_CORE("fa606te", fa606te, 5TE, FL_LDSCHED, 9e) +ARM_CORE("fa626te", fa626te, 5TE, FL_LDSCHED, 9e) +ARM_CORE("fmp626", fmp626, 5TE, FL_LDSCHED, 9e) +ARM_CORE("fa726te", fa726te, 5TE, FL_LDSCHED, fa726te) + +/* V5TEJ Architecture Processors */ +ARM_CORE("arm926ej-s", arm926ejs, 5TEJ, FL_LDSCHED, 9e) +ARM_CORE("arm1026ej-s", arm1026ejs, 5TEJ, FL_LDSCHED, 9e) + +/* V6 Architecture Processors */ +ARM_CORE("arm1136j-s", arm1136js, 6J, FL_LDSCHED, 9e) +ARM_CORE("arm1136jf-s", arm1136jfs, 6J, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("arm1176jz-s", arm1176jzs, 6ZK, FL_LDSCHED, 9e) +ARM_CORE("arm1176jzf-s", arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) +ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) +ARM_CORE("arm1156t2f-s", arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("cortex-a5", cortexa5, 7A, FL_LDSCHED, 9e) +ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) +ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) +ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED, 9e) +ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) +ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) +ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e) +ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) +ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e) +ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e) diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md new file mode 100644 index 000000000..44e758692 --- /dev/null +++ b/gcc/config/arm/arm-generic.md @@ -0,0 +1,153 @@ +;; Generic ARM Pipeline Description +;; Copyright (C) 2003, 2007, 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +(define_automaton "arm") + +;; Write buffer +; +; Strictly, we should model a 4-deep write buffer for ARM7xx based chips +; +; The write buffer on some of the arm6 processors is hard to model exactly. +; There is room in the buffer for up to two addresses and up to eight words +; of memory, but the two needn't be split evenly. When writing the two +; addresses are fully pipelined. However, a read from memory that is not +; currently in the cache will block until the writes have completed. +; It is normally the case that FCLK and MCLK will be in the ratio 2:1, so +; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous +; (they aren't allowed to be at present) then there is a startup cost of 1MCLK +; cycle to add as well. +(define_cpu_unit "write_buf" "arm") + +;; Write blockage unit +; +; The write_blockage unit models (partially), the fact that reads will stall +; until the write buffer empties. +; The f_mem_r and r_mem_f could also block, but they are to the stack, +; so we don't model them here +(define_cpu_unit "write_blockage" "arm") + +;; Core +; +(define_cpu_unit "core" "arm") + +(define_insn_reservation "r_mem_f_wbuf" 5 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "r_mem_f"))) + "core+write_buf*3") + +(define_insn_reservation "store_wbuf" 5 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store1"))) + "core+write_buf*3+write_blockage*5") + +(define_insn_reservation "store2_wbuf" 7 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store2"))) + "core+write_buf*4+write_blockage*7") + +(define_insn_reservation "store3_wbuf" 9 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store3"))) + "core+write_buf*5+write_blockage*9") + +(define_insn_reservation "store4_wbuf" 11 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store4"))) + "core+write_buf*6+write_blockage*11") + +(define_insn_reservation "store2" 3 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "no") + (eq_attr "type" "store2"))) + "core*3") + +(define_insn_reservation "store3" 4 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "no") + (eq_attr "type" "store3"))) + "core*4") + +(define_insn_reservation "store4" 5 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "no") + (eq_attr "type" "store4"))) + "core*5") + +(define_insn_reservation "store_ldsched" 1 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (eq_attr "type" "store1"))) + "core") + +(define_insn_reservation "load_ldsched_xscale" 3 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "type" "load_byte,load1") + (eq_attr "tune" "xscale,iwmmxt,iwmmxt2")))) + "core") + +(define_insn_reservation "load_ldsched" 2 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "type" "load_byte,load1") + (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2")))) + "core") + +(define_insn_reservation "load_or_store" 2 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "!yes") + (eq_attr "type" "load_byte,load1,load2,load3,load4,store1"))) + "core*2") + +(define_insn_reservation "mult" 16 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "no") (eq_attr "type" "mult"))) + "core*16") + +(define_insn_reservation "mult_ldsched_strongarm" 3 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "tune" + "strongarm,strongarm110,strongarm1100,strongarm1110") + (eq_attr "type" "mult")))) + "core*2") + +(define_insn_reservation "mult_ldsched" 4 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "tune" + "!strongarm,strongarm110,strongarm1100,strongarm1110") + (eq_attr "type" "mult")))) + "core*4") + +(define_insn_reservation "multi_cycle" 32 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "core_cycles" "multi") + (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4"))) + "core*32") + +(define_insn_reservation "single_cycle" 1 + (and (eq_attr "generic_sched" "yes") + (eq_attr "core_cycles" "single")) + "core") diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml new file mode 100644 index 000000000..221edd2aa --- /dev/null +++ b/gcc/config/arm/arm-ldmstm.ml @@ -0,0 +1,332 @@ +(* Auto-generate ARM ldm/stm patterns + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Run with: + ocaml arm-ldmstm.ml >/path/to/gcc/config/arm/ldmstm.md +*) + +type amode = IA | IB | DA | DB + +type optype = IN | OUT | INOUT + +let rec string_of_addrmode addrmode = + match addrmode with + IA -> "ia" | IB -> "ib" | DA -> "da" | DB -> "db" + +let rec initial_offset addrmode nregs = + match addrmode with + IA -> 0 + | IB -> 4 + | DA -> -4 * nregs + 4 + | DB -> -4 * nregs + +let rec final_offset addrmode nregs = + match addrmode with + IA -> nregs * 4 + | IB -> nregs * 4 + | DA -> -4 * nregs + | DB -> -4 * nregs + +let constr thumb = + if thumb then "l" else "rk" + +let inout_constr op_type = + match op_type with + OUT -> "=&" + | INOUT -> "+&" + | IN -> "" + +let destreg nregs first op_type thumb = + if not first then + Printf.sprintf "(match_dup %d)" (nregs + 1) + else + Printf.sprintf ("(match_operand:SI %d \"s_register_operand\" \"%s%s\")") + (nregs + 1) (inout_constr op_type) (constr thumb) + +let write_ldm_set thumb nregs offset opnr first = + let indent = " " in + Printf.printf "%s" (if first then " [" else indent); + Printf.printf "(set (match_operand:SI %d \"arm_hard_register_operand\" \"\")\n" opnr; + Printf.printf "%s (mem:SI " indent; + begin if offset != 0 then Printf.printf "(plus:SI " end; + Printf.printf "%s" (destreg nregs first IN thumb); + begin if offset != 0 then Printf.printf "\n%s (const_int %d))" indent offset end; + Printf.printf "))" + +let write_stm_set thumb nregs offset opnr first = + let indent = " " in + Printf.printf "%s" (if first then " [" else indent); + Printf.printf "(set (mem:SI "; + begin if offset != 0 then Printf.printf "(plus:SI " end; + Printf.printf "%s" (destreg nregs first IN thumb); + begin if offset != 0 then Printf.printf " (const_int %d))" offset end; + Printf.printf ")\n%s (match_operand:SI %d \"arm_hard_register_operand\" \"\"))" indent opnr + +let write_ldm_peep_set extra_indent nregs opnr first = + let indent = " " ^ extra_indent in + Printf.printf "%s" (if first then extra_indent ^ " [" else indent); + Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr; + Printf.printf "%s (match_operand:SI %d \"memory_operand\" \"\"))" indent (nregs + opnr) + +let write_stm_peep_set extra_indent nregs opnr first = + let indent = " " ^ extra_indent in + Printf.printf "%s" (if first then extra_indent ^ " [" else indent); + Printf.printf "(set (match_operand:SI %d \"memory_operand\" \"\")\n" (nregs + opnr); + Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\"))" indent opnr + +let write_any_load optype nregs opnr first = + let indent = " " in + Printf.printf "%s" (if first then " [" else indent); + Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr; + Printf.printf "%s (match_operand:SI %d \"%s\" \"\"))" indent (nregs * 2 + opnr) optype + +let write_const_store nregs opnr first = + let indent = " " in + Printf.printf "%s(set (match_operand:SI %d \"memory_operand\" \"\")\n" indent (nregs + opnr); + Printf.printf "%s (match_dup %d))" indent opnr + +let write_const_stm_peep_set nregs opnr first = + write_any_load "const_int_operand" nregs opnr first; + Printf.printf "\n"; + write_const_store nregs opnr false + + +let rec write_pat_sets func opnr offset first n_left = + func offset opnr first; + begin + if n_left > 1 then begin + Printf.printf "\n"; + write_pat_sets func (opnr + 1) (offset + 4) false (n_left - 1); + end else + Printf.printf "]" + end + +let rec write_peep_sets func opnr first n_left = + func opnr first; + begin + if n_left > 1 then begin + Printf.printf "\n"; + write_peep_sets func (opnr + 1) false (n_left - 1); + end + end + +let can_thumb addrmode update is_store = + match addrmode, update, is_store with + (* Thumb1 mode only supports IA with update. However, for LDMIA, + if the address register also appears in the list of loaded + registers, the loaded value is stored, hence the RTL pattern + to describe such an insn does not have an update. We check + in the match_parallel predicate that the condition described + above is met. *) + IA, _, false -> true + | IA, true, true -> true + | _ -> false + +let target addrmode thumb = + match addrmode, thumb with + IA, true -> "TARGET_THUMB1" + | IA, false -> "TARGET_32BIT" + | DB, false -> "TARGET_32BIT" + | _, false -> "TARGET_ARM" + +let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = + let astr = string_of_addrmode addrmode in + Printf.printf "(define_insn \"*%s%s%d_%s%s\"\n" + (if thumb then "thumb_" else "") name nregs astr + (if update then "_update" else ""); + Printf.printf " [(match_parallel 0 \"%s_multiple_operation\"\n" ls; + begin + if update then begin + Printf.printf " [(set %s\n (plus:SI %s" + (destreg nregs true INOUT thumb) (destreg nregs false IN thumb); + Printf.printf " (const_int %d)))\n" + (final_offset addrmode nregs) + end + end; + write_pat_sets + (write_set_fn thumb nregs) 1 + (initial_offset addrmode nregs) + (not update) nregs; + Printf.printf ")]\n \"%s && XVECLEN (operands[0], 0) == %d\"\n" + (target addrmode thumb) + (if update then nregs + 1 else nregs); + Printf.printf " \"%s%%(%s%%)\\t%%%d%s, {" + name astr (nregs + 1) (if update then "!" else ""); + for n = 1 to nregs; do + Printf.printf "%%%d%s" n (if n < nregs then ", " else "") + done; + Printf.printf "}\"\n"; + Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs; + begin if not thumb then + Printf.printf "\n (set_attr \"predicable\" \"yes\")"; + end; + Printf.printf "])\n\n" + +let write_ldm_pattern addrmode nregs update = + write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update false; + begin if can_thumb addrmode update false then + write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update true; + end + +let write_stm_pattern addrmode nregs update = + write_pattern_1 "stm" "store" addrmode nregs write_stm_set update false; + begin if can_thumb addrmode update true then + write_pattern_1 "stm" "store" addrmode nregs write_stm_set update true; + end + +let write_ldm_commutative_peephole thumb = + let nregs = 2 in + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs; + let indent = " " in + if thumb then begin + Printf.printf "\n%s(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2); + Printf.printf "%s (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1); + Printf.printf "%s [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2); + Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))]\n" indent (nregs * 2 + 3) + end else begin + Printf.printf "\n%s(parallel\n" indent; + Printf.printf "%s [(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2); + Printf.printf "%s (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1); + Printf.printf "%s [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2); + Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))\n" indent (nregs * 2 + 3); + Printf.printf "%s (clobber (reg:CC CC_REGNUM))])]\n" indent + end; + Printf.printf " \"(((operands[%d] == operands[0] && operands[%d] == operands[1])\n" (nregs * 2 + 2) (nregs * 2 + 3); + Printf.printf " || (operands[%d] == operands[0] && operands[%d] == operands[1]))\n" (nregs * 2 + 3) (nregs * 2 + 2); + Printf.printf " && peep2_reg_dead_p (%d, operands[0]) && peep2_reg_dead_p (%d, operands[1]))\"\n" (nregs + 1) (nregs + 1); + begin + if thumb then + Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))]\n" + (nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3) + else begin + Printf.printf " [(parallel\n"; + Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))\n" + (nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3); + Printf.printf " (clobber (reg:CC CC_REGNUM))])]\n" + end + end; + Printf.printf "{\n if (!gen_ldm_seq (operands, %d, true))\n FAIL;\n" nregs; + Printf.printf "})\n\n" + +let write_ldm_peephole nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_ldm_seq (operands, %d, false))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let write_ldm_peephole_b nregs = + if nregs > 2 then begin + Printf.printf "(define_peephole2\n"; + write_ldm_peep_set "" nregs 0 true; + Printf.printf "\n (parallel\n"; + write_peep_sets (write_ldm_peep_set " " nregs) 1 true (nregs - 1); + Printf.printf "])]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_ldm_seq (operands, %d, false))\n DONE;\n else\n FAIL;\n})\n\n" nregs + end + +let write_stm_peephole nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_stm_peep_set "" nregs) 0 true nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let write_stm_peephole_b nregs = + if nregs > 2 then begin + Printf.printf "(define_peephole2\n"; + write_stm_peep_set "" nregs 0 true; + Printf.printf "\n (parallel\n"; + write_peep_sets (write_stm_peep_set "" nregs) 1 true (nregs - 1); + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + end + +let write_const_stm_peephole_a nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_const_stm_peep_set nregs) 0 true nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_const_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let write_const_stm_peephole_b nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_any_load "const_int_operand" nregs) 0 true nregs; + Printf.printf "\n"; + write_peep_sets (write_const_store nregs) 0 false nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_const_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let patterns () = + let addrmodes = [ IA; IB; DA; DB ] in + let sizes = [ 4; 3; 2] in + List.iter + (fun n -> + List.iter + (fun addrmode -> + write_ldm_pattern addrmode n false; + write_ldm_pattern addrmode n true; + write_stm_pattern addrmode n false; + write_stm_pattern addrmode n true) + addrmodes; + write_ldm_peephole n; + write_ldm_peephole_b n; + write_const_stm_peephole_a n; + write_const_stm_peephole_b n; + write_stm_peephole n;) + sizes; + write_ldm_commutative_peephole false; + write_ldm_commutative_peephole true + +let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) + +(* Do it. *) + +let _ = + print_lines [ +"/* ARM ldm/stm instruction patterns. This file was automatically generated"; +" using arm-ldmstm.ml. Please do not edit manually."; +""; +" Copyright (C) 2010 Free Software Foundation, Inc."; +" Contributed by CodeSourcery."; +""; +" This file is part of GCC."; +""; +" GCC is free software; you can redistribute it and/or modify it"; +" under the terms of the GNU General Public License as published"; +" by the Free Software Foundation; either version 3, or (at your"; +" option) any later version."; +""; +" GCC is distributed in the hope that it will be useful, but WITHOUT"; +" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; +" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; +" License for more details."; +""; +" You should have received a copy of the GNU General Public License and"; +" a copy of the GCC Runtime Library Exception along with this program;"; +" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see"; +" . */"; +""]; + patterns (); diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def new file mode 100644 index 000000000..24e3d90a5 --- /dev/null +++ b/gcc/config/arm/arm-modes.def @@ -0,0 +1,78 @@ +/* Definitions of target machine for GNU compiler, for ARM. + Copyright (C) 2002, 2004, 2007, 2010 Free Software Foundation, Inc. + Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) + and Martin Simmons (@harleqn.co.uk). + More major hacks by Richard Earnshaw (rearnsha@arm.com) + Minor hacks by Nick Clifton (nickc@cygnus.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Extended precision floating point. + FIXME What format is this? */ +FLOAT_MODE (XF, 12, 0); + +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + ? &arm_half_format : &ieee_half_format)); + +/* CCFPEmode should be used with floating inequalities, + CCFPmode should be used with floating equalities. + CC_NOOVmode should be used with SImode integer equalities. + CC_Zmode should be used if only the Z flag is set correctly + CC_Cmode should be used if only the C flag is set correctly, after an + addition. + CC_Nmode should be used if only the N (sign) flag is set correctly + CC_CZmode should be used if only the C and Z flags are correct + (used for DImode unsigned comparisons). + CC_NCVmode should be used if only the N, C, and V flags are correct + (used for DImode signed comparisons). + CCmode should be used otherwise. */ + +CC_MODE (CC_NOOV); +CC_MODE (CC_Z); +CC_MODE (CC_CZ); +CC_MODE (CC_NCV); +CC_MODE (CC_SWP); +CC_MODE (CCFP); +CC_MODE (CCFPE); +CC_MODE (CC_DNE); +CC_MODE (CC_DEQ); +CC_MODE (CC_DLE); +CC_MODE (CC_DLT); +CC_MODE (CC_DGE); +CC_MODE (CC_DGT); +CC_MODE (CC_DLEU); +CC_MODE (CC_DLTU); +CC_MODE (CC_DGEU); +CC_MODE (CC_DGTU); +CC_MODE (CC_C); +CC_MODE (CC_N); + +/* Vector modes. */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ + +/* Opaque integer modes for 3, 4, 6 or 8 Neon double registers (2 is + TImode). */ +INT_MODE (EI, 24); +INT_MODE (OI, 32); +INT_MODE (CI, 48); +INT_MODE (XI, 64); diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h new file mode 100644 index 000000000..f037a456a --- /dev/null +++ b/gcc/config/arm/arm-protos.h @@ -0,0 +1,231 @@ +/* Prototypes for exported functions defined in arm.c and pe.c + Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, + 2009, 2010 Free Software Foundation, Inc. + Contributed by Richard Earnshaw (rearnsha@arm.com) + Minor hacks by Nick Clifton (nickc@cygnus.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_ARM_PROTOS_H +#define GCC_ARM_PROTOS_H + +extern int use_return_insn (int, rtx); +extern enum reg_class arm_regno_class (int); +extern void arm_load_pic_register (unsigned long); +extern int arm_volatile_func (void); +extern const char *arm_output_epilogue (rtx); +extern void arm_expand_prologue (void); +extern const char *arm_strip_name_encoding (const char *); +extern void arm_asm_output_labelref (FILE *, const char *); +extern void thumb2_asm_output_opcode (FILE *); +extern unsigned long arm_current_func_type (void); +extern HOST_WIDE_INT arm_compute_initial_elimination_offset (unsigned int, + unsigned int); +extern HOST_WIDE_INT thumb_compute_initial_elimination_offset (unsigned int, + unsigned int); +extern unsigned int arm_dbx_register_number (unsigned int); +extern void arm_output_fn_unwind (FILE *, bool); + + +#ifdef RTX_CODE +extern bool arm_vector_mode_supported_p (enum machine_mode); +extern bool arm_small_register_classes_for_mode_p (enum machine_mode); +extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); +extern int const_ok_for_arm (HOST_WIDE_INT); +extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, + HOST_WIDE_INT, rtx, rtx, int); +extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); +extern int legitimate_pic_operand_p (rtx); +extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); +extern rtx legitimize_tls_address (rtx, rtx); +extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int); +extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT); +extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); +extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); +extern int arm_const_double_rtx (rtx); +extern int neg_const_double_rtx_ok_for_fpa (rtx); +extern int vfp3_const_double_rtx (rtx); +extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); +extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, + int *); +extern char *neon_output_logic_immediate (const char *, rtx *, + enum machine_mode, int, int); +extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, + rtx (*) (rtx, rtx, rtx)); +extern rtx neon_make_constant (rtx); +extern void neon_expand_vector_init (rtx, rtx); +extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern HOST_WIDE_INT neon_element_bits (enum machine_mode); +extern void neon_reinterpret (rtx, rtx); +extern void neon_emit_pair_result_insn (enum machine_mode, + rtx (*) (rtx, rtx, rtx, rtx), + rtx, rtx, rtx); +extern void neon_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int); +extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, + bool); +extern bool arm_tls_referenced_p (rtx); +extern bool arm_cannot_force_const_mem (rtx); + +extern int cirrus_memory_offset (rtx); +extern int arm_coproc_mem_operand (rtx, bool); +extern int neon_vector_mem_operand (rtx, int); +extern int neon_struct_mem_operand (rtx); +extern int arm_no_early_store_addr_dep (rtx, rtx); +extern int arm_early_store_addr_dep (rtx, rtx); +extern int arm_early_load_addr_dep (rtx, rtx); +extern int arm_no_early_alu_shift_dep (rtx, rtx); +extern int arm_no_early_alu_shift_value_dep (rtx, rtx); +extern int arm_no_early_mul_dep (rtx, rtx); +extern int arm_mac_accumulator_is_mul_result (rtx, rtx); + +extern int tls_mentioned_p (rtx); +extern int symbol_mentioned_p (rtx); +extern int label_mentioned_p (rtx); +extern RTX_CODE minmax_code (rtx); +extern int adjacent_mem_locations (rtx, rtx); +extern bool gen_ldm_seq (rtx *, int, bool); +extern bool gen_stm_seq (rtx *, int); +extern bool gen_const_stm_seq (rtx *, int); +extern rtx arm_gen_load_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); +extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); +extern int arm_gen_movmemqi (rtx *); +extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); +extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, + HOST_WIDE_INT); +extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx); +extern rtx arm_gen_return_addr_mask (void); +extern void arm_reload_in_hi (rtx *); +extern void arm_reload_out_hi (rtx *); +extern int arm_const_double_inline_cost (rtx); +extern bool arm_const_double_by_parts (rtx); +extern bool arm_const_double_by_immediates (rtx); +extern const char *fp_immediate_constant (rtx); +extern void arm_emit_call_insn (rtx, rtx); +extern const char *output_call (rtx *); +extern const char *output_call_mem (rtx *); +void arm_emit_movpair (rtx, rtx); +extern const char *output_mov_long_double_fpa_from_arm (rtx *); +extern const char *output_mov_long_double_arm_from_fpa (rtx *); +extern const char *output_mov_long_double_arm_from_arm (rtx *); +extern const char *output_mov_double_fpa_from_arm (rtx *); +extern const char *output_mov_double_arm_from_fpa (rtx *); +extern const char *output_move_double (rtx *); +extern const char *output_move_quad (rtx *); +extern const char *output_move_vfp (rtx *operands); +extern const char *output_move_neon (rtx *operands); +extern int arm_attr_length_move_neon (rtx); +extern int arm_address_offset_is_imm (rtx); +extern const char *output_add_immediate (rtx *); +extern const char *arithmetic_instr (rtx, int); +extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int); +extern const char *output_return_instruction (rtx, int, int); +extern void arm_poke_function_name (FILE *, const char *); +extern void arm_final_prescan_insn (rtx); +extern int arm_debugger_arg_offset (int, rtx); +extern bool arm_is_long_call_p (tree); +extern int arm_emit_vector_const (FILE *, rtx); +extern void arm_emit_fp16_const (rtx c); +extern const char * arm_output_load_gr (rtx *); +extern const char *vfp_output_fstmd (rtx *); +extern void arm_set_return_address (rtx, rtx); +extern int arm_eliminable_register (rtx); +extern const char *arm_output_shift(rtx *, int); +extern void arm_expand_sync (enum machine_mode, struct arm_sync_generator *, + rtx, rtx, rtx, rtx); +extern const char *arm_output_memory_barrier (rtx *); +extern const char *arm_output_sync_insn (rtx, rtx *); +extern unsigned int arm_sync_loop_insns (rtx , rtx *); + +#if defined TREE_CODE +extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); +extern bool arm_pad_arg_upward (enum machine_mode, const_tree); +extern bool arm_pad_reg_upward (enum machine_mode, tree, int); +#endif +extern int arm_apply_result_size (void); +extern rtx aapcs_libcall_value (enum machine_mode); + +#endif /* RTX_CODE */ + +extern int arm_float_words_big_endian (void); + +/* Thumb functions. */ +extern void arm_init_expanders (void); +extern const char *thumb_unexpanded_epilogue (void); +extern void thumb1_expand_prologue (void); +extern void thumb1_expand_epilogue (void); +#ifdef TREE_CODE +extern int is_called_in_ARM_mode (tree); +#endif +extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); +#ifdef RTX_CODE +extern void thumb1_final_prescan_insn (rtx); +extern void thumb2_final_prescan_insn (rtx); +extern const char *thumb_load_double_from_address (rtx *); +extern const char *thumb_output_move_mem_multiple (int, rtx *); +extern const char *thumb_call_via_reg (rtx); +extern void thumb_expand_movmemqi (rtx *); +extern rtx arm_return_addr (int, rtx); +extern void thumb_reload_out_hi (rtx *); +extern void thumb_reload_in_hi (rtx *); +extern void thumb_set_return_address (rtx, rtx); +extern const char *thumb1_output_casesi (rtx *); +extern const char *thumb2_output_casesi (rtx *); +#endif + +/* Defined in pe.c. */ +extern int arm_dllexport_name_p (const char *); +extern int arm_dllimport_name_p (const char *); + +#ifdef TREE_CODE +extern void arm_pe_unique_section (tree, int); +extern void arm_pe_encode_section_info (tree, rtx, int); +extern int arm_dllexport_p (tree); +extern int arm_dllimport_p (tree); +extern void arm_mark_dllexport (tree); +extern void arm_mark_dllimport (tree); +#endif + +extern void arm_pr_long_calls (struct cpp_reader *); +extern void arm_pr_no_long_calls (struct cpp_reader *); +extern void arm_pr_long_calls_off (struct cpp_reader *); + +extern void arm_lang_object_attributes_init(void); + +extern const char *arm_mangle_type (const_tree); + +extern void arm_order_regs_for_local_alloc (void); + +#ifdef RTX_CODE +/* This needs to be here because we need RTX_CODE and similar. */ + +struct tune_params +{ + bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); + bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); + int constant_limit; + int num_prefetch_slots; + int l1_cache_size; + int l1_cache_line_size; +}; + +extern const struct tune_params *current_tune; +#endif /* RTX_CODE */ + +#endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md new file mode 100644 index 000000000..9b664e7e6 --- /dev/null +++ b/gcc/config/arm/arm-tune.md @@ -0,0 +1,5 @@ +;; -*- buffer-read-only: t -*- +;; Generated automatically by gentune.sh from arm-cores.def +(define_attr "tune" + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c new file mode 100644 index 000000000..c3c5aa168 --- /dev/null +++ b/gcc/config/arm/arm.c @@ -0,0 +1,23712 @@ +/* Output routines for GCC for ARM. + Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) + and Martin Simmons (@harleqn.co.uk). + More major hacks by Richard Earnshaw (rearnsha@arm.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "obstack.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "reload.h" +#include "function.h" +#include "expr.h" +#include "optabs.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "cgraph.h" +#include "ggc.h" +#include "except.h" +#include "c-family/c-pragma.h" /* ??? */ +#include "integrate.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "debug.h" +#include "langhooks.h" +#include "df.h" +#include "intl.h" +#include "libfuncs.h" +#include "params.h" + +/* Forward definitions of types. */ +typedef struct minipool_node Mnode; +typedef struct minipool_fixup Mfix; + +void (*arm_lang_output_object_attributes_hook)(void); + +/* Forward function declarations. */ +static bool arm_needs_doubleword_align (enum machine_mode, const_tree); +static int arm_compute_static_chain_stack_bytes (void); +static arm_stack_offsets *arm_get_frame_offsets (void); +static void arm_add_gc_roots (void); +static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, + HOST_WIDE_INT, rtx, rtx, int, int); +static unsigned bit_count (unsigned long); +static int arm_address_register_rtx_p (rtx, int); +static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int); +static int thumb2_legitimate_index_p (enum machine_mode, rtx, int); +static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int); +static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); +static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); +inline static int thumb1_index_register_rtx_p (rtx, int); +static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); +static int thumb_far_jump_used_p (void); +static bool thumb_force_lr_save (void); +static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); +static rtx emit_sfm (int, int); +static unsigned arm_size_return_regs (void); +static bool arm_assemble_integer (rtx, unsigned int, int); +static void arm_print_operand (FILE *, rtx, int); +static void arm_print_operand_address (FILE *, rtx); +static bool arm_print_operand_punct_valid_p (unsigned char code); +static const char *fp_const_from_val (REAL_VALUE_TYPE *); +static arm_cc get_arm_condition_code (rtx); +static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); +static rtx is_jump_table (rtx); +static const char *output_multi_immediate (rtx *, const char *, const char *, + int, HOST_WIDE_INT); +static const char *shift_op (rtx, HOST_WIDE_INT *); +static struct machine_function *arm_init_machine_status (void); +static void thumb_exit (FILE *, int); +static rtx is_jump_table (rtx); +static HOST_WIDE_INT get_jump_table_size (rtx); +static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT); +static Mnode *add_minipool_forward_ref (Mfix *); +static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT); +static Mnode *add_minipool_backward_ref (Mfix *); +static void assign_minipool_offsets (Mfix *); +static void arm_print_value (FILE *, rtx); +static void dump_minipool (rtx); +static int arm_barrier_cost (rtx); +static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT); +static void push_minipool_barrier (rtx, HOST_WIDE_INT); +static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode, + rtx); +static void arm_reorg (void); +static bool note_invalid_constants (rtx, HOST_WIDE_INT, int); +static unsigned long arm_compute_save_reg0_reg12_mask (void); +static unsigned long arm_compute_save_reg_mask (void); +static unsigned long arm_isr_value (tree); +static unsigned long arm_compute_func_type (void); +static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); +static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); +static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); +#endif +static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); +static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); +static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT); +static int arm_comp_type_attributes (const_tree, const_tree); +static void arm_set_default_type_attributes (tree); +static int arm_adjust_cost (rtx, rtx, rtx, int); +static int count_insns_for_constant (HOST_WIDE_INT, int); +static int arm_get_strip_length (int); +static bool arm_function_ok_for_sibcall (tree, tree); +static enum machine_mode arm_promote_function_mode (const_tree, + enum machine_mode, int *, + const_tree, int); +static bool arm_return_in_memory (const_tree, const_tree); +static rtx arm_function_value (const_tree, const_tree, bool); +static rtx arm_libcall_value (enum machine_mode, const_rtx); + +static void arm_internal_label (FILE *, const char *, unsigned long); +static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, + tree); +static bool arm_have_conditional_execution (void); +static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); +static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); +static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_rtx_costs (rtx, int, int, int *, bool); +static int arm_address_cost (rtx, bool); +static bool arm_memory_load_p (rtx); +static bool arm_cirrus_insn_p (rtx); +static void cirrus_reorg (rtx); +static void arm_init_builtins (void); +static void arm_init_iwmmxt_builtins (void); +static rtx safe_vector_operand (rtx, enum machine_mode); +static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); +static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); +static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); +static void emit_constant_insn (rtx cond, rtx pattern); +static rtx emit_set_insn (rtx, rtx); +static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree); +static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, + const_tree); +static int aapcs_select_return_coproc (const_tree, const_tree); + +#ifdef OBJECT_FORMAT_ELF +static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; +static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; +#endif +#ifndef ARM_PE +static void arm_encode_section_info (tree, rtx, int); +#endif + +static void arm_file_end (void); +static void arm_file_start (void); + +static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, + tree, int *, int); +static bool arm_pass_by_reference (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static bool arm_promote_prototypes (const_tree); +static bool arm_default_short_enums (void); +static bool arm_align_anon_bitfield (void); +static bool arm_return_in_msb (const_tree); +static bool arm_must_pass_in_stack (enum machine_mode, const_tree); +static bool arm_return_in_memory (const_tree, const_tree); +#if ARM_UNWIND_INFO +static void arm_unwind_emit (FILE *, rtx); +static bool arm_output_ttype (rtx); +static void arm_asm_emit_except_personality (rtx); +static void arm_asm_init_sections (void); +#endif +static enum unwind_info_type arm_except_unwind_info (struct gcc_options *); +static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); +static rtx arm_dwarf_register_span (rtx); + +static tree arm_cxx_guard_type (void); +static bool arm_cxx_guard_mask_bit (void); +static tree arm_get_cookie_size (tree); +static bool arm_cookie_has_size (void); +static bool arm_cxx_cdtor_returns_this (void); +static bool arm_cxx_key_method_may_be_inline (void); +static void arm_cxx_determine_class_data_visibility (tree); +static bool arm_cxx_class_data_always_comdat (void); +static bool arm_cxx_use_aeabi_atexit (void); +static void arm_init_libfuncs (void); +static tree arm_build_builtin_va_list (void); +static void arm_expand_builtin_va_start (tree, rtx); +static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); +static void arm_option_override (void); +static bool arm_handle_option (size_t, const char *, int); +static void arm_target_help (void); +static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); +static bool arm_cannot_copy_insn_p (rtx); +static bool arm_tls_symbol_p (rtx x); +static int arm_issue_rate (void); +static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; +static bool arm_output_addr_const_extra (FILE *, rtx); +static bool arm_allocate_stack_slots_for_args (void); +static const char *arm_invalid_parameter_type (const_tree t); +static const char *arm_invalid_return_type (const_tree t); +static tree arm_promoted_type (const_tree t); +static tree arm_convert_to_type (tree type, tree expr); +static bool arm_scalar_mode_supported_p (enum machine_mode); +static bool arm_frame_pointer_required (void); +static bool arm_can_eliminate (const int, const int); +static void arm_asm_trampoline_template (FILE *); +static void arm_trampoline_init (rtx, tree, rtx); +static rtx arm_trampoline_adjust_address (rtx); +static rtx arm_pic_static_addr (rtx orig, rtx reg); +static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); +static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); +static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *); +static enum machine_mode arm_preferred_simd_mode (enum machine_mode); +static bool arm_class_likely_spilled_p (reg_class_t); +static HOST_WIDE_INT arm_vector_alignment (const_tree type); +static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); +static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, + const_tree type, + int misalignment, + bool is_packed); +static void arm_conditional_register_usage (void); +static reg_class_t arm_preferred_rename_class (reg_class_t rclass); + + +/* Table of machine attributes. */ +static const struct attribute_spec arm_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + /* Function calls made to this symbol must be done indirectly, because + it may lie outside of the 26 bit addressing range of a normal function + call. */ + { "long_call", 0, 0, false, true, true, NULL }, + /* Whereas these functions are always known to reside within the 26 bit + addressing range. */ + { "short_call", 0, 0, false, true, true, NULL }, + /* Specify the procedure call conventions for a function. */ + { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute }, + /* Interrupt Service Routines have special prologue and epilogue requirements. */ + { "isr", 0, 1, false, false, false, arm_handle_isr_attribute }, + { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute }, + { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute }, +#ifdef ARM_PE + /* ARM/PE has three new attributes: + interfacearm - ? + dllexport - for exporting a function/variable that will live in a dll + dllimport - for importing a function/variable from a dll + + Microsoft allows multiple declspecs in one __declspec, separating + them with spaces. We do NOT support this. Instead, use __declspec + multiple times. + */ + { "dllimport", 0, 0, true, false, false, NULL }, + { "dllexport", 0, 0, true, false, false, NULL }, + { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute }, +#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES + { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, + { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, + { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, +#endif + { NULL, 0, 0, false, false, false, NULL } +}; + +/* Set default optimization options. */ +static const struct default_options arm_option_optimization_table[] = + { + /* Enable section anchors by default at -O1 or higher. */ + { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Initialize the GCC target structure. */ +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +#undef TARGET_MERGE_DECL_ATTRIBUTES +#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes +#endif + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE arm_attribute_table + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START arm_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END arm_file_end + +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP NULL +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER arm_assemble_integer + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND arm_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p + +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG) +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION arm_handle_option +#undef TARGET_HELP +#define TARGET_HELP arm_target_help +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE arm_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes + +#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES +#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST arm_adjust_cost + +#undef TARGET_ENCODE_SECTION_INFO +#ifdef ARM_PE +#define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info +#else +#define TARGET_ENCODE_SECTION_INFO arm_encode_section_info +#endif + +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding + +#undef TARGET_ASM_INTERNAL_LABEL +#define TARGET_ASM_INTERNAL_LABEL arm_internal_label + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE arm_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE arm_libcall_value + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS arm_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST arm_address_cost + +#undef TARGET_SHIFT_TRUNCATION_MASK +#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS arm_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN arm_expand_builtin + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS arm_init_libfuncs + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG arm_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs + +#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS +#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT arm_trampoline_init +#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address + +#undef TARGET_DEFAULT_SHORT_ENUMS +#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums + +#undef TARGET_ALIGN_ANON_BITFIELD +#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield + +#undef TARGET_NARROW_VOLATILE_BITFIELD +#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false + +#undef TARGET_CXX_GUARD_TYPE +#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type + +#undef TARGET_CXX_GUARD_MASK_BIT +#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit + +#undef TARGET_CXX_GET_COOKIE_SIZE +#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size + +#undef TARGET_CXX_COOKIE_HAS_SIZE +#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size + +#undef TARGET_CXX_CDTOR_RETURNS_THIS +#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this + +#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE +#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline + +#undef TARGET_CXX_USE_AEABI_ATEXIT +#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit + +#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY +#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \ + arm_cxx_determine_class_data_visibility + +#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT +#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat + +#undef TARGET_RETURN_IN_MSB +#define TARGET_RETURN_IN_MSB arm_return_in_msb + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY arm_return_in_memory + +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack + +#if ARM_UNWIND_INFO +#undef TARGET_ASM_UNWIND_EMIT +#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit + +/* EABI unwinding tables use a different format for the typeinfo tables. */ +#undef TARGET_ASM_TTYPE +#define TARGET_ASM_TTYPE arm_output_ttype + +#undef TARGET_ARM_EABI_UNWINDER +#define TARGET_ARM_EABI_UNWINDER true + +#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY +#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality + +#undef TARGET_ASM_INIT_SECTIONS +#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections +#endif /* ARM_UNWIND_INFO */ + +#undef TARGET_EXCEPT_UNWIND_INFO +#define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info + +#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC +#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec + +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span + +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif + +#undef TARGET_HAVE_CONDITIONAL_EXECUTION +#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem + +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET 4095 + +/* The minimum is set such that the total size of the block + for a particular anchor is -4088 + 1 + 4095 bytes, which is + divisible by eight, ensuring natural spacing of anchors. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET -4088 + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE arm_issue_rate + +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE arm_mangle_type + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr + +#ifdef HAVE_AS_TLS +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel +#endif + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p + +#undef TARGET_INVALID_PARAMETER_TYPE +#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type + +#undef TARGET_INVALID_RETURN_TYPE +#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type + +#undef TARGET_PROMOTED_TYPE +#define TARGET_PROMOTED_TYPE arm_promoted_type + +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE arm_convert_to_type + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE arm_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage + +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p + +#undef TARGET_VECTOR_ALIGNMENT +#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment + +#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ + arm_vector_alignment_reachable + +#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT +#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ + arm_builtin_support_vector_misalignment + +#undef TARGET_PREFERRED_RENAME_CLASS +#define TARGET_PREFERRED_RENAME_CLASS \ + arm_preferred_rename_class + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Obstack for minipool constant handling. */ +static struct obstack minipool_obstack; +static char * minipool_startobj; + +/* The maximum number of insns skipped which + will be conditionalised if possible. */ +static int max_insns_skipped = 5; + +extern FILE * asm_out_file; + +/* True if we are currently building a constant table. */ +int making_const_table; + +/* The processor for which instructions should be scheduled. */ +enum processor_type arm_tune = arm_none; + +/* The current tuning set. */ +const struct tune_params *current_tune; + +/* Which floating point hardware to schedule for. */ +int arm_fpu_attr; + +/* Which floating popint hardware to use. */ +const struct arm_fpu_desc *arm_fpu_desc; + +/* Whether to use floating point hardware. */ +enum float_abi_type arm_float_abi; + +/* Which __fp16 format to use. */ +enum arm_fp16_format_type arm_fp16_format; + +/* Which ABI to use. */ +enum arm_abi_type arm_abi; + +/* Which thread pointer model to use. */ +enum arm_tp_type target_thread_pointer = TP_AUTO; + +/* Used to parse -mstructure_size_boundary command line option. */ +int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY; + +/* Used for Thumb call_via trampolines. */ +rtx thumb_call_via_label[14]; +static int thumb_call_reg_needed; + +/* Bit values used to identify processor capabilities. */ +#define FL_CO_PROC (1 << 0) /* Has external co-processor bus */ +#define FL_ARCH3M (1 << 1) /* Extended multiply */ +#define FL_MODE26 (1 << 2) /* 26-bit mode support */ +#define FL_MODE32 (1 << 3) /* 32-bit mode support */ +#define FL_ARCH4 (1 << 4) /* Architecture rel 4 */ +#define FL_ARCH5 (1 << 5) /* Architecture rel 5 */ +#define FL_THUMB (1 << 6) /* Thumb aware */ +#define FL_LDSCHED (1 << 7) /* Load scheduling necessary */ +#define FL_STRONG (1 << 8) /* StrongARM */ +#define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ +#define FL_XSCALE (1 << 10) /* XScale */ +#define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */ +#define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds + media instructions. */ +#define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */ +#define FL_WBUF (1 << 14) /* Schedule for write buffer ops. + Note: ARM6 & 7 derivatives only. */ +#define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */ +#define FL_THUMB2 (1 << 16) /* Thumb-2. */ +#define FL_NOTM (1 << 17) /* Instructions not present in the 'M' + profile. */ +#define FL_DIV (1 << 18) /* Hardware divide. */ +#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ +#define FL_NEON (1 << 20) /* Neon instructions. */ +#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M + architecture. */ +#define FL_ARCH7 (1 << 22) /* Architecture 7. */ + +#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ + +/* Flags that only effect tuning, not available instructions. */ +#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ + | FL_CO_PROC) + +#define FL_FOR_ARCH2 FL_NOTM +#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) +#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) +#define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) +#define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB) +#define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5) +#define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB) +#define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E) +#define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB) +#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE +#define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6) +#define FL_FOR_ARCH6J FL_FOR_ARCH6 +#define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) +#define FL_FOR_ARCH6Z FL_FOR_ARCH6 +#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K +#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) +#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) +#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) +#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) +#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) +#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) +#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) + +/* The bits in this mask specify which + instructions we are allowed to generate. */ +static unsigned long insn_flags = 0; + +/* The bits in this mask specify which instruction scheduling options should + be used. */ +static unsigned long tune_flags = 0; + +/* The following are used in the arm.md file as equivalents to bits + in the above two flag variables. */ + +/* Nonzero if this chip supports the ARM Architecture 3M extensions. */ +int arm_arch3m = 0; + +/* Nonzero if this chip supports the ARM Architecture 4 extensions. */ +int arm_arch4 = 0; + +/* Nonzero if this chip supports the ARM Architecture 4t extensions. */ +int arm_arch4t = 0; + +/* Nonzero if this chip supports the ARM Architecture 5 extensions. */ +int arm_arch5 = 0; + +/* Nonzero if this chip supports the ARM Architecture 5E extensions. */ +int arm_arch5e = 0; + +/* Nonzero if this chip supports the ARM Architecture 6 extensions. */ +int arm_arch6 = 0; + +/* Nonzero if this chip supports the ARM 6K extensions. */ +int arm_arch6k = 0; + +/* Nonzero if this chip supports the ARM 7 extensions. */ +int arm_arch7 = 0; + +/* Nonzero if instructions not present in the 'M' profile can be used. */ +int arm_arch_notm = 0; + +/* Nonzero if instructions present in ARMv7E-M can be used. */ +int arm_arch7em = 0; + +/* Nonzero if this chip can benefit from load scheduling. */ +int arm_ld_sched = 0; + +/* Nonzero if this chip is a StrongARM. */ +int arm_tune_strongarm = 0; + +/* Nonzero if this chip is a Cirrus variant. */ +int arm_arch_cirrus = 0; + +/* Nonzero if this chip supports Intel Wireless MMX technology. */ +int arm_arch_iwmmxt = 0; + +/* Nonzero if this chip is an XScale. */ +int arm_arch_xscale = 0; + +/* Nonzero if tuning for XScale */ +int arm_tune_xscale = 0; + +/* Nonzero if we want to tune for stores that access the write-buffer. + This typically means an ARM6 or ARM7 with MMU or MPU. */ +int arm_tune_wbuf = 0; + +/* Nonzero if tuning for Cortex-A9. */ +int arm_tune_cortex_a9 = 0; + +/* Nonzero if generating Thumb instructions. */ +int thumb_code = 0; + +/* Nonzero if generating Thumb-1 instructions. */ +int thumb1_code = 0; + +/* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around + problems in GLD which doesn't understand that armv5t code is + interworking clean. */ +int arm_cpp_interwork = 0; + +/* Nonzero if chip supports Thumb 2. */ +int arm_arch_thumb2; + +/* Nonzero if chip supports integer division instruction. */ +int arm_arch_hwdiv; + +/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, + we must report the mode of the memory reference from + TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ +enum machine_mode output_memory_reference_mode; + +/* The register number to be used for the PIC offset register. */ +unsigned arm_pic_register = INVALID_REGNUM; + +/* Set to 1 after arm_reorg has started. Reset to start at the start of + the next function. */ +static int after_arm_reorg = 0; + +enum arm_pcs arm_pcs_default; + +/* For an explanation of these variables, see final_prescan_insn below. */ +int arm_ccfsm_state; +/* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ +enum arm_cond_code arm_current_cc; + +rtx arm_target_insn; +int arm_target_label; +/* The number of conditionally executed insns, including the current insn. */ +int arm_condexec_count = 0; +/* A bitmask specifying the patterns for the IT block. + Zero means do not output an IT block before this insn. */ +int arm_condexec_mask = 0; +/* The number of bits used in arm_condexec_mask. */ +int arm_condexec_masklen = 0; + +/* The condition codes of the ARM, and the inverse function. */ +static const char * const arm_condition_codes[] = +{ + "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" +}; + +/* The register numbers in sequence, for passing to arm_gen_load_multiple. */ +int arm_regs_in_sequence[] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +}; + +#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") +#define streq(string1, string2) (strcmp (string1, string2) == 0) + +#define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ + | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ + | (1 << PIC_OFFSET_TABLE_REGNUM))) + +/* Initialization code. */ + +struct processors +{ + const char *const name; + enum processor_type core; + const char *arch; + const unsigned long flags; + const struct tune_params *const tune; +}; + + +#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1 +#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \ + prefetch_slots, \ + l1_size, \ + l1_line_size + +const struct tune_params arm_slowmul_tune = +{ + arm_slowmul_rtx_costs, + NULL, + 3, + ARM_PREFETCH_NOT_BENEFICIAL +}; + +const struct tune_params arm_fastmul_tune = +{ + arm_fastmul_rtx_costs, + NULL, + 1, + ARM_PREFETCH_NOT_BENEFICIAL +}; + +const struct tune_params arm_xscale_tune = +{ + arm_xscale_rtx_costs, + xscale_sched_adjust_cost, + 2, + ARM_PREFETCH_NOT_BENEFICIAL +}; + +const struct tune_params arm_9e_tune = +{ + arm_9e_rtx_costs, + NULL, + 1, + ARM_PREFETCH_NOT_BENEFICIAL +}; + +const struct tune_params arm_cortex_a9_tune = +{ + arm_9e_rtx_costs, + cortex_a9_sched_adjust_cost, + 1, + ARM_PREFETCH_BENEFICIAL(4,32,32) +}; + +const struct tune_params arm_fa726te_tune = +{ + arm_9e_rtx_costs, + fa726te_sched_adjust_cost, + 1, + ARM_PREFETCH_NOT_BENEFICIAL +}; + + +/* Not all of these give usefully different compilation alternatives, + but there is no simple way of generalizing them. */ +static const struct processors all_cores[] = +{ + /* ARM Cores */ +#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ + {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, +#include "arm-cores.def" +#undef ARM_CORE + {NULL, arm_none, NULL, 0, NULL} +}; + +static const struct processors all_architectures[] = +{ + /* ARM Architectures */ + /* We don't specify tuning costs here as it will be figured out + from the core. */ + + {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, + {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, + {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL}, + {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL}, + {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL}, + /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no + implementations that support it, so we will leave it out for now. */ + {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL}, + {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL}, + {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL}, + {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL}, + {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL}, + {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL}, + {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL}, + {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL}, + {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL}, + {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL}, + {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL}, + {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL}, + {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL}, + {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, + {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, + {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, + {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, + {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, + {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, + {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, + {NULL, arm_none, NULL, 0 , NULL} +}; + + +/* These are populated as commandline arguments are processed, or NULL + if not specified. */ +static const struct processors *arm_selected_arch; +static const struct processors *arm_selected_cpu; +static const struct processors *arm_selected_tune; + +/* The name of the preprocessor macro to define for this architecture. */ + +char arm_arch_name[] = "__ARM_ARCH_0UNK__"; + +/* Available values for -mfpu=. */ + +static const struct arm_fpu_desc all_fpus[] = +{ + {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false}, + {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false}, + {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false}, + {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false}, + {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, + {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, + {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true}, + {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, + {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true}, + {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false}, + {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true}, + {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, + {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, + {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true}, + {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true}, + {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true}, + {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true}, + /* Compatibility aliases. */ + {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, +}; + + +struct float_abi +{ + const char * name; + enum float_abi_type abi_type; +}; + + +/* Available values for -mfloat-abi=. */ + +static const struct float_abi all_float_abis[] = +{ + {"soft", ARM_FLOAT_ABI_SOFT}, + {"softfp", ARM_FLOAT_ABI_SOFTFP}, + {"hard", ARM_FLOAT_ABI_HARD} +}; + + +struct fp16_format +{ + const char *name; + enum arm_fp16_format_type fp16_format_type; +}; + + +/* Available values for -mfp16-format=. */ + +static const struct fp16_format all_fp16_formats[] = +{ + {"none", ARM_FP16_FORMAT_NONE}, + {"ieee", ARM_FP16_FORMAT_IEEE}, + {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} +}; + + +struct abi_name +{ + const char *name; + enum arm_abi_type abi_type; +}; + + +/* Available values for -mabi=. */ + +static const struct abi_name arm_all_abis[] = +{ + {"apcs-gnu", ARM_ABI_APCS}, + {"atpcs", ARM_ABI_ATPCS}, + {"aapcs", ARM_ABI_AAPCS}, + {"iwmmxt", ARM_ABI_IWMMXT}, + {"aapcs-linux", ARM_ABI_AAPCS_LINUX} +}; + +/* Supported TLS relocations. */ + +enum tls_reloc { + TLS_GD32, + TLS_LDM32, + TLS_LDO32, + TLS_IE32, + TLS_LE32 +}; + +/* The maximum number of insns to be used when loading a constant. */ +inline static int +arm_constant_limit (bool size_p) +{ + return size_p ? 1 : current_tune->constant_limit; +} + +/* Emit an insn that's a simple single-set. Both the operands must be known + to be valid. */ +inline static rtx +emit_set_insn (rtx x, rtx y) +{ + return emit_insn (gen_rtx_SET (VOIDmode, x, y)); +} + +/* Return the number of bits set in VALUE. */ +static unsigned +bit_count (unsigned long value) +{ + unsigned long count = 0; + + while (value) + { + count++; + value &= value - 1; /* Clear the least-significant set bit. */ + } + + return count; +} + +/* Set up library functions unique to ARM. */ + +static void +arm_init_libfuncs (void) +{ + /* There are no special library functions unless we are using the + ARM BPABI. */ + if (!TARGET_BPABI) + return; + + /* The functions below are described in Section 4 of the "Run-Time + ABI for the ARM architecture", Version 1.0. */ + + /* Double-precision floating-point arithmetic. Table 2. */ + set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd"); + set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv"); + set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul"); + set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg"); + set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub"); + + /* Double-precision comparisons. Table 3. */ + set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq"); + set_optab_libfunc (ne_optab, DFmode, NULL); + set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt"); + set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple"); + set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge"); + set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt"); + set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun"); + + /* Single-precision floating-point arithmetic. Table 4. */ + set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd"); + set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv"); + set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul"); + set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg"); + set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub"); + + /* Single-precision comparisons. Table 5. */ + set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq"); + set_optab_libfunc (ne_optab, SFmode, NULL); + set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt"); + set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple"); + set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge"); + set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt"); + set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun"); + + /* Floating-point to integer conversions. Table 6. */ + set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz"); + set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz"); + set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz"); + set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz"); + set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz"); + set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz"); + set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz"); + set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz"); + + /* Conversions between floating types. Table 7. */ + set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f"); + set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d"); + + /* Integer to floating-point conversions. Table 8. */ + set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d"); + set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d"); + set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d"); + set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d"); + set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f"); + set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f"); + set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f"); + set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f"); + + /* Long long. Table 9. */ + set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul"); + set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod"); + set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod"); + set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl"); + set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr"); + set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr"); + set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp"); + set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp"); + + /* Integer (32/32->32) division. \S 4.3.1. */ + set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod"); + set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod"); + + /* The divmod functions are designed so that they can be used for + plain division, even though they return both the quotient and the + remainder. The quotient is returned in the usual location (i.e., + r0 for SImode, {r0, r1} for DImode), just as would be expected + for an ordinary division routine. Because the AAPCS calling + conventions specify that all of { r0, r1, r2, r3 } are + callee-saved registers, there is no need to tell the compiler + explicitly that those registers are clobbered by these + routines. */ + set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod"); + set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod"); + + /* For SImode division the ABI provides div-without-mod routines, + which are faster. */ + set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv"); + set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv"); + + /* We don't have mod libcalls. Fortunately gcc knows how to use the + divmod libcalls instead. */ + set_optab_libfunc (smod_optab, DImode, NULL); + set_optab_libfunc (umod_optab, DImode, NULL); + set_optab_libfunc (smod_optab, SImode, NULL); + set_optab_libfunc (umod_optab, SImode, NULL); + + /* Half-precision float operations. The compiler handles all operations + with NULL libfuncs by converting the SFmode. */ + switch (arm_fp16_format) + { + case ARM_FP16_FORMAT_IEEE: + case ARM_FP16_FORMAT_ALTERNATIVE: + + /* Conversions. */ + set_conv_libfunc (trunc_optab, HFmode, SFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_f2h_ieee" + : "__gnu_f2h_alternative")); + set_conv_libfunc (sext_optab, SFmode, HFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_h2f_ieee" + : "__gnu_h2f_alternative")); + + /* Arithmetic. */ + set_optab_libfunc (add_optab, HFmode, NULL); + set_optab_libfunc (sdiv_optab, HFmode, NULL); + set_optab_libfunc (smul_optab, HFmode, NULL); + set_optab_libfunc (neg_optab, HFmode, NULL); + set_optab_libfunc (sub_optab, HFmode, NULL); + + /* Comparisons. */ + set_optab_libfunc (eq_optab, HFmode, NULL); + set_optab_libfunc (ne_optab, HFmode, NULL); + set_optab_libfunc (lt_optab, HFmode, NULL); + set_optab_libfunc (le_optab, HFmode, NULL); + set_optab_libfunc (ge_optab, HFmode, NULL); + set_optab_libfunc (gt_optab, HFmode, NULL); + set_optab_libfunc (unord_optab, HFmode, NULL); + break; + + default: + break; + } + + if (TARGET_AAPCS_BASED) + synchronize_libfunc = init_one_libfunc ("__sync_synchronize"); +} + +/* On AAPCS systems, this is the "struct __va_list". */ +static GTY(()) tree va_list_type; + +/* Return the type to use as __builtin_va_list. */ +static tree +arm_build_builtin_va_list (void) +{ + tree va_list_name; + tree ap_field; + + if (!TARGET_AAPCS_BASED) + return std_build_builtin_va_list (); + + /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type + defined as: + + struct __va_list + { + void *__ap; + }; + + The C Library ABI further reinforces this definition in \S + 4.1. + + We must follow this definition exactly. The structure tag + name is visible in C++ mangled names, and thus forms a part + of the ABI. The field name may be used by people who + #include . */ + /* Create the type. */ + va_list_type = lang_hooks.types.make_type (RECORD_TYPE); + /* Give it the required name. */ + va_list_name = build_decl (BUILTINS_LOCATION, + TYPE_DECL, + get_identifier ("__va_list"), + va_list_type); + DECL_ARTIFICIAL (va_list_name) = 1; + TYPE_NAME (va_list_type) = va_list_name; + TYPE_STUB_DECL (va_list_type) = va_list_name; + /* Create the __ap field. */ + ap_field = build_decl (BUILTINS_LOCATION, + FIELD_DECL, + get_identifier ("__ap"), + ptr_type_node); + DECL_ARTIFICIAL (ap_field) = 1; + DECL_FIELD_CONTEXT (ap_field) = va_list_type; + TYPE_FIELDS (va_list_type) = ap_field; + /* Compute its layout. */ + layout_type (va_list_type); + + return va_list_type; +} + +/* Return an expression of type "void *" pointing to the next + available argument in a variable-argument list. VALIST is the + user-level va_list object, of type __builtin_va_list. */ +static tree +arm_extract_valist_ptr (tree valist) +{ + if (TREE_TYPE (valist) == error_mark_node) + return error_mark_node; + + /* On an AAPCS target, the pointer is stored within "struct + va_list". */ + if (TARGET_AAPCS_BASED) + { + tree ap_field = TYPE_FIELDS (TREE_TYPE (valist)); + valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field), + valist, ap_field, NULL_TREE); + } + + return valist; +} + +/* Implement TARGET_EXPAND_BUILTIN_VA_START. */ +static void +arm_expand_builtin_va_start (tree valist, rtx nextarg) +{ + valist = arm_extract_valist_ptr (valist); + std_expand_builtin_va_start (valist, nextarg); +} + +/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ +static tree +arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + valist = arm_extract_valist_ptr (valist); + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); +} + +/* Lookup NAME in SEL. */ + +static const struct processors * +arm_find_cpu (const char *name, const struct processors *sel, const char *desc) +{ + if (!(name && *name)) + return NULL; + + for (; sel->name != NULL; sel++) + { + if (streq (name, sel->name)) + return sel; + } + + error ("bad value (%s) for %s switch", name, desc); + return NULL; +} + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_march_: + arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march"); + return true; + + case OPT_mcpu_: + arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu"); + return true; + + case OPT_mhard_float: + target_float_abi_name = "hard"; + return true; + + case OPT_msoft_float: + target_float_abi_name = "soft"; + return true; + + case OPT_mtune_: + arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune"); + return true; + + default: + return true; + } +} + +static void +arm_target_help (void) +{ + int i; + static int columns = 0; + int remaining; + + /* If we have not done so already, obtain the desired maximum width of + the output. Note - this is a duplication of the code at the start of + gcc/opts.c:print_specific_help() - the two copies should probably be + replaced by a single function. */ + if (columns == 0) + { + const char *p; + + p = getenv ("COLUMNS"); + if (p != NULL) + { + int value = atoi (p); + + if (value > 0) + columns = value; + } + + if (columns == 0) + /* Use a reasonable default. */ + columns = 80; + } + + printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n"); + + /* The - 2 is because we know that the last entry in the array is NULL. */ + i = ARRAY_SIZE (all_cores) - 2; + gcc_assert (i > 0); + printf (" %s", all_cores[i].name); + remaining = columns - (strlen (all_cores[i].name) + 4); + gcc_assert (remaining >= 0); + + while (i--) + { + int len = strlen (all_cores[i].name); + + if (remaining > len + 2) + { + printf (", %s", all_cores[i].name); + remaining -= len + 2; + } + else + { + if (remaining > 0) + printf (","); + printf ("\n %s", all_cores[i].name); + remaining = columns - (len + 4); + } + } + + printf ("\n\n Known ARM architectures (for use with the -march= option):\n"); + + i = ARRAY_SIZE (all_architectures) - 2; + gcc_assert (i > 0); + + printf (" %s", all_architectures[i].name); + remaining = columns - (strlen (all_architectures[i].name) + 4); + gcc_assert (remaining >= 0); + + while (i--) + { + int len = strlen (all_architectures[i].name); + + if (remaining > len + 2) + { + printf (", %s", all_architectures[i].name); + remaining -= len + 2; + } + else + { + if (remaining > 0) + printf (","); + printf ("\n %s", all_architectures[i].name); + remaining = columns - (len + 4); + } + } + printf ("\n"); + +} + +/* Fix up any incompatible options that the user has specified. */ +static void +arm_option_override (void) +{ + unsigned i; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + + if (arm_selected_arch) + { + if (arm_selected_cpu) + { + /* Check for conflict between mcpu and march. */ + if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE) + { + warning (0, "switch -mcpu=%s conflicts with -march=%s switch", + arm_selected_cpu->name, arm_selected_arch->name); + /* -march wins for code generation. + -mcpu wins for default tuning. */ + if (!arm_selected_tune) + arm_selected_tune = arm_selected_cpu; + + arm_selected_cpu = arm_selected_arch; + } + else + /* -mcpu wins. */ + arm_selected_arch = NULL; + } + else + /* Pick a CPU based on the architecture. */ + arm_selected_cpu = arm_selected_arch; + } + + /* If the user did not specify a processor, choose one for them. */ + if (!arm_selected_cpu) + { + const struct processors * sel; + unsigned int sought; + + arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; + if (!arm_selected_cpu->name) + { +#ifdef SUBTARGET_CPU_DEFAULT + /* Use the subtarget default CPU if none was specified by + configure. */ + arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT]; +#endif + /* Default to ARM6. */ + if (!arm_selected_cpu->name) + arm_selected_cpu = &all_cores[arm6]; + } + + sel = arm_selected_cpu; + insn_flags = sel->flags; + + /* Now check to see if the user has specified some command line + switch that require certain abilities from the cpu. */ + sought = 0; + + if (TARGET_INTERWORK || TARGET_THUMB) + { + sought |= (FL_THUMB | FL_MODE32); + + /* There are no ARM processors that support both APCS-26 and + interworking. Therefore we force FL_MODE26 to be removed + from insn_flags here (if it was set), so that the search + below will always be able to find a compatible processor. */ + insn_flags &= ~FL_MODE26; + } + + if (sought != 0 && ((sought & insn_flags) != sought)) + { + /* Try to locate a CPU type that supports all of the abilities + of the default CPU, plus the extra abilities requested by + the user. */ + for (sel = all_cores; sel->name != NULL; sel++) + if ((sel->flags & sought) == (sought | insn_flags)) + break; + + if (sel->name == NULL) + { + unsigned current_bit_count = 0; + const struct processors * best_fit = NULL; + + /* Ideally we would like to issue an error message here + saying that it was not possible to find a CPU compatible + with the default CPU, but which also supports the command + line options specified by the programmer, and so they + ought to use the -mcpu= command line option to + override the default CPU type. + + If we cannot find a cpu that has both the + characteristics of the default cpu and the given + command line options we scan the array again looking + for a best match. */ + for (sel = all_cores; sel->name != NULL; sel++) + if ((sel->flags & sought) == sought) + { + unsigned count; + + count = bit_count (sel->flags & insn_flags); + + if (count >= current_bit_count) + { + best_fit = sel; + current_bit_count = count; + } + } + + gcc_assert (best_fit); + sel = best_fit; + } + + arm_selected_cpu = sel; + } + } + + gcc_assert (arm_selected_cpu); + /* The selected cpu may be an architecture, so lookup tuning by core ID. */ + if (!arm_selected_tune) + arm_selected_tune = &all_cores[arm_selected_cpu->core]; + + sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch); + insn_flags = arm_selected_cpu->flags; + + arm_tune = arm_selected_tune->core; + tune_flags = arm_selected_tune->flags; + current_tune = arm_selected_tune->tune; + + if (target_fp16_format_name) + { + for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) + { + if (streq (all_fp16_formats[i].name, target_fp16_format_name)) + { + arm_fp16_format = all_fp16_formats[i].fp16_format_type; + break; + } + } + if (i == ARRAY_SIZE (all_fp16_formats)) + error ("invalid __fp16 format option: -mfp16-format=%s", + target_fp16_format_name); + } + else + arm_fp16_format = ARM_FP16_FORMAT_NONE; + + if (target_abi_name) + { + for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) + { + if (streq (arm_all_abis[i].name, target_abi_name)) + { + arm_abi = arm_all_abis[i].abi_type; + break; + } + } + if (i == ARRAY_SIZE (arm_all_abis)) + error ("invalid ABI option: -mabi=%s", target_abi_name); + } + else + arm_abi = ARM_DEFAULT_ABI; + + /* Make sure that the processor choice does not conflict with any of the + other command line choices. */ + if (TARGET_ARM && !(insn_flags & FL_NOTM)) + error ("target CPU does not support ARM mode"); + + /* BPABI targets use linker tricks to allow interworking on cores + without thumb support. */ + if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI)) + { + warning (0, "target CPU does not support interworking" ); + target_flags &= ~MASK_INTERWORK; + } + + if (TARGET_THUMB && !(insn_flags & FL_THUMB)) + { + warning (0, "target CPU does not support THUMB instructions"); + target_flags &= ~MASK_THUMB; + } + + if (TARGET_APCS_FRAME && TARGET_THUMB) + { + /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ + target_flags &= ~MASK_APCS_FRAME; + } + + /* Callee super interworking implies thumb interworking. Adding + this to the flags here simplifies the logic elsewhere. */ + if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) + target_flags |= MASK_INTERWORK; + + /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done + from here where no function is being compiled currently. */ + if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) + warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); + + if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) + warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); + + if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) + { + warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); + target_flags |= MASK_APCS_FRAME; + } + + if (TARGET_POKE_FUNCTION_NAME) + target_flags |= MASK_APCS_FRAME; + + if (TARGET_APCS_REENT && flag_pic) + error ("-fpic and -mapcs-reent are incompatible"); + + if (TARGET_APCS_REENT) + warning (0, "APCS reentrant code not supported. Ignored"); + + /* If this target is normally configured to use APCS frames, warn if they + are turned off and debugging is turned on. */ + if (TARGET_ARM + && write_symbols != NO_DEBUG + && !TARGET_APCS_FRAME + && (TARGET_DEFAULT & MASK_APCS_FRAME)) + warning (0, "-g with -mno-apcs-frame may not give sensible debugging"); + + if (TARGET_APCS_FLOAT) + warning (0, "passing floating point arguments in fp regs not yet supported"); + + /* Initialize boolean versions of the flags, for use in the arm.md file. */ + arm_arch3m = (insn_flags & FL_ARCH3M) != 0; + arm_arch4 = (insn_flags & FL_ARCH4) != 0; + arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0); + arm_arch5 = (insn_flags & FL_ARCH5) != 0; + arm_arch5e = (insn_flags & FL_ARCH5E) != 0; + arm_arch6 = (insn_flags & FL_ARCH6) != 0; + arm_arch6k = (insn_flags & FL_ARCH6K) != 0; + arm_arch_notm = (insn_flags & FL_NOTM) != 0; + arm_arch7 = (insn_flags & FL_ARCH7) != 0; + arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; + arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; + arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; + arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; + + arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; + arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; + thumb_code = TARGET_ARM == 0; + thumb1_code = TARGET_THUMB1 != 0; + arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; + arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; + arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; + arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; + arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + + /* If we are not using the default (ARM mode) section anchor offset + ranges, then set the correct ranges now. */ + if (TARGET_THUMB1) + { + /* Thumb-1 LDR instructions cannot have negative offsets. + Permissible positive offset ranges are 5-bit (for byte loads), + 6-bit (for halfword loads), or 7-bit (for word loads). + Empirical results suggest a 7-bit anchor range gives the best + overall code size. */ + targetm.min_anchor_offset = 0; + targetm.max_anchor_offset = 127; + } + else if (TARGET_THUMB2) + { + /* The minimum is set such that the total size of the block + for a particular anchor is 248 + 1 + 4095 bytes, which is + divisible by eight, ensuring natural spacing of anchors. */ + targetm.min_anchor_offset = -248; + targetm.max_anchor_offset = 4095; + } + + /* V5 code we generate is completely interworking capable, so we turn off + TARGET_INTERWORK here to avoid many tests later on. */ + + /* XXX However, we must pass the right pre-processor defines to CPP + or GLD can get confused. This is a hack. */ + if (TARGET_INTERWORK) + arm_cpp_interwork = 1; + + if (arm_arch5) + target_flags &= ~MASK_INTERWORK; + + if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN) + error ("iwmmxt requires an AAPCS compatible ABI for proper operation"); + + if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) + error ("iwmmxt abi requires an iwmmxt capable cpu"); + + if (target_fpu_name == NULL && target_fpe_name != NULL) + { + if (streq (target_fpe_name, "2")) + target_fpu_name = "fpe2"; + else if (streq (target_fpe_name, "3")) + target_fpu_name = "fpe3"; + else + error ("invalid floating point emulation option: -mfpe=%s", + target_fpe_name); + } + + if (target_fpu_name == NULL) + { +#ifdef FPUTYPE_DEFAULT + target_fpu_name = FPUTYPE_DEFAULT; +#else + if (arm_arch_cirrus) + target_fpu_name = "maverick"; + else + target_fpu_name = "fpe2"; +#endif + } + + arm_fpu_desc = NULL; + for (i = 0; i < ARRAY_SIZE (all_fpus); i++) + { + if (streq (all_fpus[i].name, target_fpu_name)) + { + arm_fpu_desc = &all_fpus[i]; + break; + } + } + + if (!arm_fpu_desc) + { + error ("invalid floating point option: -mfpu=%s", target_fpu_name); + return; + } + + switch (arm_fpu_desc->model) + { + case ARM_FP_MODEL_FPA: + if (arm_fpu_desc->rev == 2) + arm_fpu_attr = FPU_FPE2; + else if (arm_fpu_desc->rev == 3) + arm_fpu_attr = FPU_FPE3; + else + arm_fpu_attr = FPU_FPA; + break; + + case ARM_FP_MODEL_MAVERICK: + arm_fpu_attr = FPU_MAVERICK; + break; + + case ARM_FP_MODEL_VFP: + arm_fpu_attr = FPU_VFP; + break; + + default: + gcc_unreachable(); + } + + if (target_float_abi_name != NULL) + { + /* The user specified a FP ABI. */ + for (i = 0; i < ARRAY_SIZE (all_float_abis); i++) + { + if (streq (all_float_abis[i].name, target_float_abi_name)) + { + arm_float_abi = all_float_abis[i].abi_type; + break; + } + } + if (i == ARRAY_SIZE (all_float_abis)) + error ("invalid floating point abi: -mfloat-abi=%s", + target_float_abi_name); + } + else + arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; + + if (TARGET_AAPCS_BASED + && (arm_fpu_desc->model == ARM_FP_MODEL_FPA)) + error ("FPA is unsupported in the AAPCS"); + + if (TARGET_AAPCS_BASED) + { + if (TARGET_CALLER_INTERWORKING) + error ("AAPCS does not support -mcaller-super-interworking"); + else + if (TARGET_CALLEE_INTERWORKING) + error ("AAPCS does not support -mcallee-super-interworking"); + } + + /* FPA and iWMMXt are incompatible because the insn encodings overlap. + VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon + will ever exist. GCC makes no attempt to support this combination. */ + if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT) + sorry ("iWMMXt and hardware floating point"); + + /* ??? iWMMXt insn patterns need auditing for Thumb-2. */ + if (TARGET_THUMB2 && TARGET_IWMMXT) + sorry ("Thumb-2 iWMMXt"); + + /* __fp16 support currently assumes the core has ldrh. */ + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) + sorry ("__fp16 and no ldrh"); + + /* If soft-float is specified then don't use FPU. */ + if (TARGET_SOFT_FLOAT) + arm_fpu_attr = FPU_NONE; + + if (TARGET_AAPCS_BASED) + { + if (arm_abi == ARM_ABI_IWMMXT) + arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; + else if (arm_float_abi == ARM_FLOAT_ABI_HARD + && TARGET_HARD_FLOAT + && TARGET_VFP) + arm_pcs_default = ARM_PCS_AAPCS_VFP; + else + arm_pcs_default = ARM_PCS_AAPCS; + } + else + { + if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) + sorry ("-mfloat-abi=hard and VFP"); + + if (arm_abi == ARM_ABI_APCS) + arm_pcs_default = ARM_PCS_APCS; + else + arm_pcs_default = ARM_PCS_ATPCS; + } + + /* For arm2/3 there is no need to do any scheduling if there is only + a floating point emulator, or we are doing software floating-point. */ + if ((TARGET_SOFT_FLOAT + || (TARGET_FPA && arm_fpu_desc->rev)) + && (tune_flags & FL_MODE32) == 0) + flag_schedule_insns = flag_schedule_insns_after_reload = 0; + + if (target_thread_switch) + { + if (strcmp (target_thread_switch, "soft") == 0) + target_thread_pointer = TP_SOFT; + else if (strcmp (target_thread_switch, "auto") == 0) + target_thread_pointer = TP_AUTO; + else if (strcmp (target_thread_switch, "cp15") == 0) + target_thread_pointer = TP_CP15; + else + error ("invalid thread pointer option: -mtp=%s", target_thread_switch); + } + + /* Use the cp15 method if it is available. */ + if (target_thread_pointer == TP_AUTO) + { + if (arm_arch6k && !TARGET_THUMB1) + target_thread_pointer = TP_CP15; + else + target_thread_pointer = TP_SOFT; + } + + if (TARGET_HARD_TP && TARGET_THUMB1) + error ("can not use -mtp=cp15 with 16-bit Thumb"); + + /* Override the default structure alignment for AAPCS ABI. */ + if (TARGET_AAPCS_BASED) + arm_structure_size_boundary = 8; + + if (structure_size_string != NULL) + { + int size = strtol (structure_size_string, NULL, 0); + + if (size == 8 || size == 32 + || (ARM_DOUBLEWORD_ALIGN && size == 64)) + arm_structure_size_boundary = size; + else + warning (0, "structure size boundary can only be set to %s", + ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32"); + } + + if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic) + { + error ("RTP PIC is incompatible with Thumb"); + flag_pic = 0; + } + + /* If stack checking is disabled, we can use r10 as the PIC register, + which keeps r9 available. The EABI specifies r9 as the PIC register. */ + if (flag_pic && TARGET_SINGLE_PIC_BASE) + { + if (TARGET_VXWORKS_RTP) + warning (0, "RTP PIC is incompatible with -msingle-pic-base"); + arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10; + } + + if (flag_pic && TARGET_VXWORKS_RTP) + arm_pic_register = 9; + + if (arm_pic_register_string != NULL) + { + int pic_register = decode_reg_name (arm_pic_register_string); + + if (!flag_pic) + warning (0, "-mpic-register= is useless without -fpic"); + + /* Prevent the user from choosing an obviously stupid PIC register. */ + else if (pic_register < 0 || call_used_regs[pic_register] + || pic_register == HARD_FRAME_POINTER_REGNUM + || pic_register == STACK_POINTER_REGNUM + || pic_register >= PC_REGNUM + || (TARGET_VXWORKS_RTP + && (unsigned int) pic_register != arm_pic_register)) + error ("unable to use '%s' for PIC register", arm_pic_register_string); + else + arm_pic_register = pic_register; + } + + /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ + if (fix_cm3_ldrd == 2) + { + if (arm_selected_cpu->core == cortexm3) + fix_cm3_ldrd = 1; + else + fix_cm3_ldrd = 0; + } + + if (TARGET_THUMB1 && flag_schedule_insns) + { + /* Don't warn since it's on by default in -O2. */ + flag_schedule_insns = 0; + } + + if (optimize_size) + { + /* If optimizing for size, bump the number of instructions that we + are prepared to conditionally execute (even on a StrongARM). */ + max_insns_skipped = 6; + } + else + { + /* StrongARM has early execution of branches, so a sequence + that is worth skipping is shorter. */ + if (arm_tune_strongarm) + max_insns_skipped = 3; + } + + /* Hot/Cold partitioning is not currently supported, since we can't + handle literal pool placement in that case. */ + if (flag_reorder_blocks_and_partition) + { + inform (input_location, + "-freorder-blocks-and-partition not supported on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + + if (flag_pic) + /* Hoisting PIC address calculations more aggressively provides a small, + but measurable, size reduction for PIC code. Therefore, we decrease + the bar for unrestricted expression hoisting to the cost of PIC address + calculation, which is 2 instructions. */ + maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2, + global_options.x_param_values, + global_options_set.x_param_values); + + /* ARM EABI defaults to strict volatile bitfields. */ + if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0 + && abi_version_at_least(2)) + flag_strict_volatile_bitfields = 1; + + /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed + it beneficial (signified by setting num_prefetch_slots to 1 or more.) */ + if (flag_prefetch_loop_arrays < 0 + && HAVE_prefetch + && optimize >= 3 + && current_tune->num_prefetch_slots > 0) + flag_prefetch_loop_arrays = 1; + + /* Set up parameters to be used in prefetching algorithm. Do not override the + defaults unless we are tuning for a core we have researched values for. */ + if (current_tune->num_prefetch_slots > 0) + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, + current_tune->num_prefetch_slots, + global_options.x_param_values, + global_options_set.x_param_values); + if (current_tune->l1_cache_line_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, + current_tune->l1_cache_line_size, + global_options.x_param_values, + global_options_set.x_param_values); + if (current_tune->l1_cache_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_SIZE, + current_tune->l1_cache_size, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Register global variables with the garbage collector. */ + arm_add_gc_roots (); +} + +static void +arm_add_gc_roots (void) +{ + gcc_obstack_init(&minipool_obstack); + minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0); +} + +/* A table of known ARM exception types. + For use with the interrupt function attribute. */ + +typedef struct +{ + const char *const arg; + const unsigned long return_value; +} +isr_attribute_arg; + +static const isr_attribute_arg isr_attribute_args [] = +{ + { "IRQ", ARM_FT_ISR }, + { "irq", ARM_FT_ISR }, + { "FIQ", ARM_FT_FIQ }, + { "fiq", ARM_FT_FIQ }, + { "ABORT", ARM_FT_ISR }, + { "abort", ARM_FT_ISR }, + { "ABORT", ARM_FT_ISR }, + { "abort", ARM_FT_ISR }, + { "UNDEF", ARM_FT_EXCEPTION }, + { "undef", ARM_FT_EXCEPTION }, + { "SWI", ARM_FT_EXCEPTION }, + { "swi", ARM_FT_EXCEPTION }, + { NULL, ARM_FT_NORMAL } +}; + +/* Returns the (interrupt) function type of the current + function, or ARM_FT_UNKNOWN if the type cannot be determined. */ + +static unsigned long +arm_isr_value (tree argument) +{ + const isr_attribute_arg * ptr; + const char * arg; + + if (!arm_arch_notm) + return ARM_FT_NORMAL | ARM_FT_STACKALIGN; + + /* No argument - default to IRQ. */ + if (argument == NULL_TREE) + return ARM_FT_ISR; + + /* Get the value of the argument. */ + if (TREE_VALUE (argument) == NULL_TREE + || TREE_CODE (TREE_VALUE (argument)) != STRING_CST) + return ARM_FT_UNKNOWN; + + arg = TREE_STRING_POINTER (TREE_VALUE (argument)); + + /* Check it against the list of known arguments. */ + for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++) + if (streq (arg, ptr->arg)) + return ptr->return_value; + + /* An unrecognized interrupt type. */ + return ARM_FT_UNKNOWN; +} + +/* Computes the type of the current function. */ + +static unsigned long +arm_compute_func_type (void) +{ + unsigned long type = ARM_FT_UNKNOWN; + tree a; + tree attr; + + gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL); + + /* Decide if the current function is volatile. Such functions + never return, and many memory cycles can be saved by not storing + register values that will never be needed again. This optimization + was added to speed up context switching in a kernel application. */ + if (optimize > 0 + && (TREE_NOTHROW (current_function_decl) + || !(flag_unwind_tables + || (flag_exceptions + && arm_except_unwind_info (&global_options) != UI_SJLJ))) + && TREE_THIS_VOLATILE (current_function_decl)) + type |= ARM_FT_VOLATILE; + + if (cfun->static_chain_decl != NULL) + type |= ARM_FT_NESTED; + + attr = DECL_ATTRIBUTES (current_function_decl); + + a = lookup_attribute ("naked", attr); + if (a != NULL_TREE) + type |= ARM_FT_NAKED; + + a = lookup_attribute ("isr", attr); + if (a == NULL_TREE) + a = lookup_attribute ("interrupt", attr); + + if (a == NULL_TREE) + type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL; + else + type |= arm_isr_value (TREE_VALUE (a)); + + return type; +} + +/* Returns the type of the current function. */ + +unsigned long +arm_current_func_type (void) +{ + if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN) + cfun->machine->func_type = arm_compute_func_type (); + + return cfun->machine->func_type; +} + +bool +arm_allocate_stack_slots_for_args (void) +{ + /* Naked functions should not allocate stack slots for arguments. */ + return !IS_NAKED (arm_current_func_type ()); +} + + +/* Output assembler code for a block containing the constant parts + of a trampoline, leaving space for the variable parts. + + On the ARM, (if r8 is the static chain regnum, and remembering that + referencing pc adds an offset of 8) the trampoline looks like: + ldr r8, [pc, #0] + ldr pc, [pc] + .word static chain value + .word function's address + XXX FIXME: When the trampoline returns, r8 will be clobbered. */ + +static void +arm_asm_trampoline_template (FILE *f) +{ + if (TARGET_ARM) + { + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); + } + else if (TARGET_THUMB2) + { + /* The Thumb-2 trampoline is similar to the arm implementation. + Unlike 16-bit Thumb, we enter the stub in thumb mode. */ + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", + STATIC_CHAIN_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM); + } + else + { + ASM_OUTPUT_ALIGN (f, 2); + fprintf (f, "\t.code\t16\n"); + fprintf (f, ".Ltrampoline_start:\n"); + asm_fprintf (f, "\tpush\t{r0, r1}\n"); + asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); + asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM); + asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); + asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM); + asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM); + } + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. */ + +static void +arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr, mem, a_tramp; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12); + emit_move_insn (mem, chain_value); + + mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16); + fnaddr = XEXP (DECL_RTL (fndecl), 0); + emit_move_insn (mem, fnaddr); + + a_tramp = XEXP (m_tramp, 0); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), + LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode, + plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode); +} + +/* Thumb trampolines should be entered in thumb mode, so set + the bottom bit of the address. */ + +static rtx +arm_trampoline_adjust_address (rtx addr) +{ + if (TARGET_THUMB) + addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx, + NULL, 0, OPTAB_LIB_WIDEN); + return addr; +} + +/* Return 1 if it is possible to return using a single instruction. + If SIBLING is non-null, this is a test for a return before a sibling + call. SIBLING is the call insn, so we can examine its register usage. */ + +int +use_return_insn (int iscond, rtx sibling) +{ + int regno; + unsigned int func_type; + unsigned long saved_int_regs; + unsigned HOST_WIDE_INT stack_adjust; + arm_stack_offsets *offsets; + + /* Never use a return instruction before reload has run. */ + if (!reload_completed) + return 0; + + func_type = arm_current_func_type (); + + /* Naked, volatile and stack alignment functions need special + consideration. */ + if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN)) + return 0; + + /* So do interrupt functions that use the frame pointer and Thumb + interrupt functions. */ + if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB)) + return 0; + + offsets = arm_get_frame_offsets (); + stack_adjust = offsets->outgoing_args - offsets->saved_regs; + + /* As do variadic functions. */ + if (crtl->args.pretend_args_size + || cfun->machine->uses_anonymous_args + /* Or if the function calls __builtin_eh_return () */ + || crtl->calls_eh_return + /* Or if the function calls alloca */ + || cfun->calls_alloca + /* Or if there is a stack adjustment. However, if the stack pointer + is saved on the stack, we can use a pre-incrementing stack load. */ + || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed + && stack_adjust == 4))) + return 0; + + saved_int_regs = offsets->saved_regs_mask; + + /* Unfortunately, the insn + + ldmib sp, {..., sp, ...} + + triggers a bug on most SA-110 based devices, such that the stack + pointer won't be correctly restored if the instruction takes a + page fault. We work around this problem by popping r3 along with + the other registers, since that is never slower than executing + another instruction. + + We test for !arm_arch5 here, because code for any architecture + less than this could potentially be run on one of the buggy + chips. */ + if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM) + { + /* Validate that r3 is a call-clobbered register (always true in + the default abi) ... */ + if (!call_used_regs[3]) + return 0; + + /* ... that it isn't being used for a return value ... */ + if (arm_size_return_regs () >= (4 * UNITS_PER_WORD)) + return 0; + + /* ... or for a tail-call argument ... */ + if (sibling) + { + gcc_assert (GET_CODE (sibling) == CALL_INSN); + + if (find_regno_fusage (sibling, USE, 3)) + return 0; + } + + /* ... and that there are no call-saved registers in r0-r2 + (always true in the default ABI). */ + if (saved_int_regs & 0x7) + return 0; + } + + /* Can't be done if interworking with Thumb, and any registers have been + stacked. */ + if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type)) + return 0; + + /* On StrongARM, conditional returns are expensive if they aren't + taken and multiple registers have been stacked. */ + if (iscond && arm_tune_strongarm) + { + /* Conditional return when just the LR is stored is a simple + conditional-load instruction, that's not expensive. */ + if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM)) + return 0; + + if (flag_pic + && arm_pic_register != INVALID_REGNUM + && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) + return 0; + } + + /* If there are saved registers but the LR isn't saved, then we need + two instructions for the return. */ + if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM))) + return 0; + + /* Can't be done if any of the FPA regs are pushed, + since this also requires an insn. */ + if (TARGET_HARD_FLOAT && TARGET_FPA) + for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++) + if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) + return 0; + + /* Likewise VFP regs. */ + if (TARGET_HARD_FLOAT && TARGET_VFP) + for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++) + if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) + return 0; + + if (TARGET_REALLY_IWMMXT) + for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++) + if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) + return 0; + + return 1; +} + +/* Return TRUE if int I is a valid immediate ARM constant. */ + +int +const_ok_for_arm (HOST_WIDE_INT i) +{ + int lowbit; + + /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must + be all zero, or all one. */ + if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0 + && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) + != ((~(unsigned HOST_WIDE_INT) 0) + & ~(unsigned HOST_WIDE_INT) 0xffffffff))) + return FALSE; + + i &= (unsigned HOST_WIDE_INT) 0xffffffff; + + /* Fast return for 0 and small values. We must do this for zero, since + the code below can't handle that one case. */ + if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0) + return TRUE; + + /* Get the number of trailing zeros. */ + lowbit = ffs((int) i) - 1; + + /* Only even shifts are allowed in ARM mode so round down to the + nearest even number. */ + if (TARGET_ARM) + lowbit &= ~1; + + if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0) + return TRUE; + + if (TARGET_ARM) + { + /* Allow rotated constants in ARM mode. */ + if (lowbit <= 4 + && ((i & ~0xc000003f) == 0 + || (i & ~0xf000000f) == 0 + || (i & ~0xfc000003) == 0)) + return TRUE; + } + else + { + HOST_WIDE_INT v; + + /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */ + v = i & 0xff; + v |= v << 16; + if (i == v || i == (v | (v << 8))) + return TRUE; + + /* Allow repeated pattern 0xXY00XY00. */ + v = i & 0xff00; + v |= v << 16; + if (i == v) + return TRUE; + } + + return FALSE; +} + +/* Return true if I is a valid constant for the operation CODE. */ +static int +const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) +{ + if (const_ok_for_arm (i)) + return 1; + + switch (code) + { + case PLUS: + case COMPARE: + case EQ: + case NE: + case GT: + case LE: + case LT: + case GE: + case GEU: + case LTU: + case GTU: + case LEU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNLT: + case UNGT: + case UNLE: + return const_ok_for_arm (ARM_SIGN_EXTEND (-i)); + + case MINUS: /* Should only occur with (MINUS I reg) => rsb */ + case XOR: + return 0; + + case IOR: + if (TARGET_THUMB2) + return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); + return 0; + + case AND: + return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); + + default: + gcc_unreachable (); + } +} + +/* Emit a sequence of insns to handle a large constant. + CODE is the code of the operation required, it can be any of SET, PLUS, + IOR, AND, XOR, MINUS; + MODE is the mode in which the operation is being performed; + VAL is the integer to operate on; + SOURCE is the other operand (a register, or a null-pointer for SET); + SUBTARGETS means it is safe to create scratch registers if that will + either produce a simpler sequence, or we will want to cse the values. + Return value is the number of insns emitted. */ + +/* ??? Tweak this for thumb2. */ +int +arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, + HOST_WIDE_INT val, rtx target, rtx source, int subtargets) +{ + rtx cond; + + if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC) + cond = COND_EXEC_TEST (PATTERN (insn)); + else + cond = NULL_RTX; + + if (subtargets || code == SET + || (GET_CODE (target) == REG && GET_CODE (source) == REG + && REGNO (target) != REGNO (source))) + { + /* After arm_reorg has been called, we can't fix up expensive + constants by pushing them into memory so we must synthesize + them in-line, regardless of the cost. This is only likely to + be more costly on chips that have load delay slots and we are + compiling without running the scheduler (so no splitting + occurred before the final instruction emission). + + Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c + */ + if (!after_arm_reorg + && !cond + && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, + 1, 0) + > (arm_constant_limit (optimize_function_for_size_p (cfun)) + + (code != SET)))) + { + if (code == SET) + { + /* Currently SET is the only monadic value for CODE, all + the rest are diadic. */ + if (TARGET_USE_MOVT) + arm_emit_movpair (target, GEN_INT (val)); + else + emit_set_insn (target, GEN_INT (val)); + + return 1; + } + else + { + rtx temp = subtargets ? gen_reg_rtx (mode) : target; + + if (TARGET_USE_MOVT) + arm_emit_movpair (temp, GEN_INT (val)); + else + emit_set_insn (temp, GEN_INT (val)); + + /* For MINUS, the value is subtracted from, since we never + have subtraction of a constant. */ + if (code == MINUS) + emit_set_insn (target, gen_rtx_MINUS (mode, temp, source)); + else + emit_set_insn (target, + gen_rtx_fmt_ee (code, mode, source, temp)); + return 2; + } + } + } + + return arm_gen_constant (code, mode, cond, val, target, source, subtargets, + 1); +} + +/* Return the number of instructions required to synthesize the given + constant, if we start emitting them from bit-position I. */ +static int +count_insns_for_constant (HOST_WIDE_INT remainder, int i) +{ + HOST_WIDE_INT temp1; + int step_size = TARGET_ARM ? 2 : 1; + int num_insns = 0; + + gcc_assert (TARGET_ARM || i == 0); + + do + { + int end; + + if (i <= 0) + i += 32; + if (remainder & (((1 << step_size) - 1) << (i - step_size))) + { + end = i - 8; + if (end < 0) + end += 32; + temp1 = remainder & ((0x0ff << end) + | ((i < end) ? (0xff >> (32 - end)) : 0)); + remainder &= ~temp1; + num_insns++; + i -= 8 - step_size; + } + i -= step_size; + } while (remainder); + return num_insns; +} + +static int +find_best_start (unsigned HOST_WIDE_INT remainder) +{ + int best_consecutive_zeros = 0; + int i; + int best_start = 0; + + /* If we aren't targetting ARM, the best place to start is always at + the bottom. */ + if (! TARGET_ARM) + return 0; + + for (i = 0; i < 32; i += 2) + { + int consecutive_zeros = 0; + + if (!(remainder & (3 << i))) + { + while ((i < 32) && !(remainder & (3 << i))) + { + consecutive_zeros += 2; + i += 2; + } + if (consecutive_zeros > best_consecutive_zeros) + { + best_consecutive_zeros = consecutive_zeros; + best_start = i - consecutive_zeros; + } + i -= 2; + } + } + + /* So long as it won't require any more insns to do so, it's + desirable to emit a small constant (in bits 0...9) in the last + insn. This way there is more chance that it can be combined with + a later addressing insn to form a pre-indexed load or store + operation. Consider: + + *((volatile int *)0xe0000100) = 1; + *((volatile int *)0xe0000110) = 2; + + We want this to wind up as: + + mov rA, #0xe0000000 + mov rB, #1 + str rB, [rA, #0x100] + mov rB, #2 + str rB, [rA, #0x110] + + rather than having to synthesize both large constants from scratch. + + Therefore, we calculate how many insns would be required to emit + the constant starting from `best_start', and also starting from + zero (i.e. with bit 31 first to be output). If `best_start' doesn't + yield a shorter sequence, we may as well use zero. */ + if (best_start != 0 + && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) + && (count_insns_for_constant (remainder, 0) <= + count_insns_for_constant (remainder, best_start))) + best_start = 0; + + return best_start; +} + +/* Emit an instruction with the indicated PATTERN. If COND is + non-NULL, conditionalize the execution of the instruction on COND + being true. */ + +static void +emit_constant_insn (rtx cond, rtx pattern) +{ + if (cond) + pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern); + emit_insn (pattern); +} + +/* As above, but extra parameter GENERATE which, if clear, suppresses + RTL generation. */ +/* ??? This needs more work for thumb2. */ + +static int +arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, + HOST_WIDE_INT val, rtx target, rtx source, int subtargets, + int generate) +{ + int can_invert = 0; + int can_negate = 0; + int final_invert = 0; + int can_negate_initial = 0; + int i; + int num_bits_set = 0; + int set_sign_bit_copies = 0; + int clear_sign_bit_copies = 0; + int clear_zero_bit_copies = 0; + int set_zero_bit_copies = 0; + int insns = 0; + unsigned HOST_WIDE_INT temp1, temp2; + unsigned HOST_WIDE_INT remainder = val & 0xffffffff; + int step_size = TARGET_ARM ? 2 : 1; + + /* Find out which operations are safe for a given CODE. Also do a quick + check for degenerate cases; these can occur when DImode operations + are split. */ + switch (code) + { + case SET: + can_invert = 1; + can_negate = 1; + break; + + case PLUS: + can_negate = 1; + can_negate_initial = 1; + break; + + case IOR: + if (remainder == 0xffffffff) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + GEN_INT (ARM_SIGN_EXTEND (val)))); + return 1; + } + + if (remainder == 0) + { + if (reload_completed && rtx_equal_p (target, source)) + return 0; + + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } + + if (TARGET_THUMB2) + can_invert = 1; + break; + + case AND: + if (remainder == 0) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, const0_rtx)); + return 1; + } + if (remainder == 0xffffffff) + { + if (reload_completed && rtx_equal_p (target, source)) + return 0; + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } + can_invert = 1; + break; + + case XOR: + if (remainder == 0) + { + if (reload_completed && rtx_equal_p (target, source)) + return 0; + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } + + if (remainder == 0xffffffff) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, source))); + return 1; + } + break; + + case MINUS: + /* We treat MINUS as (val - source), since (source - val) is always + passed as (source + (-val)). */ + if (remainder == 0) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NEG (mode, source))); + return 1; + } + if (const_ok_for_arm (val)) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_MINUS (mode, GEN_INT (val), + source))); + return 1; + } + can_negate = 1; + + break; + + default: + gcc_unreachable (); + } + + /* If we can do it in one insn get out quickly. */ + if (const_ok_for_arm (val) + || (can_negate_initial && const_ok_for_arm (-val)) + || (can_invert && const_ok_for_arm (~val))) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + (source + ? gen_rtx_fmt_ee (code, mode, source, + GEN_INT (val)) + : GEN_INT (val)))); + return 1; + } + + /* Calculate a few attributes that may be useful for specific + optimizations. */ + /* Count number of leading zeros. */ + for (i = 31; i >= 0; i--) + { + if ((remainder & (1 << i)) == 0) + clear_sign_bit_copies++; + else + break; + } + + /* Count number of leading 1's. */ + for (i = 31; i >= 0; i--) + { + if ((remainder & (1 << i)) != 0) + set_sign_bit_copies++; + else + break; + } + + /* Count number of trailing zero's. */ + for (i = 0; i <= 31; i++) + { + if ((remainder & (1 << i)) == 0) + clear_zero_bit_copies++; + else + break; + } + + /* Count number of trailing 1's. */ + for (i = 0; i <= 31; i++) + { + if ((remainder & (1 << i)) != 0) + set_zero_bit_copies++; + else + break; + } + + switch (code) + { + case SET: + /* See if we can use movw. */ + if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0) + { + if (generate) + emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, + GEN_INT (val))); + return 1; + } + + /* See if we can do this by sign_extending a constant that is known + to be negative. This is a good, way of doing it, since the shift + may well merge into a subsequent insn. */ + if (set_sign_bit_copies > 1) + { + if (const_ok_for_arm + (temp1 = ARM_SIGN_EXTEND (remainder + << (set_sign_bit_copies - 1)))) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + GEN_INT (temp1))); + emit_constant_insn (cond, + gen_ashrsi3 (target, new_src, + GEN_INT (set_sign_bit_copies - 1))); + } + return 2; + } + /* For an inverted constant, we will need to set the low bits, + these will be shifted out of harm's way. */ + temp1 |= (1 << (set_sign_bit_copies - 1)) - 1; + if (const_ok_for_arm (~temp1)) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + GEN_INT (temp1))); + emit_constant_insn (cond, + gen_ashrsi3 (target, new_src, + GEN_INT (set_sign_bit_copies - 1))); + } + return 2; + } + } + + /* See if we can calculate the value as the difference between two + valid immediates. */ + if (clear_sign_bit_copies + clear_zero_bit_copies <= 16) + { + int topshift = clear_sign_bit_copies & ~1; + + temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift)) + & (0xff000000 >> topshift)); + + /* If temp1 is zero, then that means the 9 most significant + bits of remainder were 1 and we've caused it to overflow. + When topshift is 0 we don't need to do anything since we + can borrow from 'bit 32'. */ + if (temp1 == 0 && topshift != 0) + temp1 = 0x80000000 >> (topshift - 1); + + temp2 = ARM_SIGN_EXTEND (temp1 - remainder); + + if (const_ok_for_arm (temp2)) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + GEN_INT (temp1))); + emit_constant_insn (cond, + gen_addsi3 (target, new_src, + GEN_INT (-temp2))); + } + + return 2; + } + } + + /* See if we can generate this by setting the bottom (or the top) + 16 bits, and then shifting these into the other half of the + word. We only look for the simplest cases, to do more would cost + too much. Be careful, however, not to generate this when the + alternative would take fewer insns. */ + if (val & 0xffff0000) + { + temp1 = remainder & 0xffff0000; + temp2 = remainder & 0x0000ffff; + + /* Overlaps outside this range are best done using other methods. */ + for (i = 9; i < 24; i++) + { + if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder) + && !const_ok_for_arm (temp2)) + { + rtx new_src = (subtargets + ? (generate ? gen_reg_rtx (mode) : NULL_RTX) + : target); + insns = arm_gen_constant (code, mode, cond, temp2, new_src, + source, subtargets, generate); + source = new_src; + if (generate) + emit_constant_insn + (cond, + gen_rtx_SET + (VOIDmode, target, + gen_rtx_IOR (mode, + gen_rtx_ASHIFT (mode, source, + GEN_INT (i)), + source))); + return insns + 1; + } + } + + /* Don't duplicate cases already considered. */ + for (i = 17; i < 24; i++) + { + if (((temp1 | (temp1 >> i)) == remainder) + && !const_ok_for_arm (temp1)) + { + rtx new_src = (subtargets + ? (generate ? gen_reg_rtx (mode) : NULL_RTX) + : target); + insns = arm_gen_constant (code, mode, cond, temp1, new_src, + source, subtargets, generate); + source = new_src; + if (generate) + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_IOR + (mode, + gen_rtx_LSHIFTRT (mode, source, + GEN_INT (i)), + source))); + return insns + 1; + } + } + } + break; + + case IOR: + case XOR: + /* If we have IOR or XOR, and the constant can be loaded in a + single instruction, and we can find a temporary to put it in, + then this can be done in two instructions instead of 3-4. */ + if (subtargets + /* TARGET can't be NULL if SUBTARGETS is 0 */ + || (reload_completed && !reg_mentioned_p (target, source))) + { + if (const_ok_for_arm (ARM_SIGN_EXTEND (~val))) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, sub, + GEN_INT (val))); + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_fmt_ee (code, mode, + source, sub))); + } + return 2; + } + } + + if (code == XOR) + break; + + /* Convert. + x = y | constant ( which is composed of set_sign_bit_copies of leading 1s + and the remainder 0s for e.g. 0xfff00000) + x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies) + + This can be done in 2 instructions by using shifts with mov or mvn. + e.g. for + x = x | 0xfff00000; + we generate. + mvn r0, r0, asl #12 + mvn r0, r0, lsr #12 */ + if (set_sign_bit_copies > 8 + && (val & (-1 << (32 - set_sign_bit_copies))) == val) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (set_sign_bit_copies); + + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_NOT (mode, + gen_rtx_ASHIFT (mode, + source, + shift)))); + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, + gen_rtx_LSHIFTRT (mode, sub, + shift)))); + } + return 2; + } + + /* Convert + x = y | constant (which has set_zero_bit_copies number of trailing ones). + to + x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies). + + For eg. r0 = r0 | 0xfff + mvn r0, r0, lsr #12 + mvn r0, r0, asl #12 + + */ + if (set_zero_bit_copies > 8 + && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (set_zero_bit_copies); + + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_NOT (mode, + gen_rtx_LSHIFTRT (mode, + source, + shift)))); + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, + gen_rtx_ASHIFT (mode, sub, + shift)))); + } + return 2; + } + + /* This will never be reached for Thumb2 because orn is a valid + instruction. This is for Thumb1 and the ARM 32 bit cases. + + x = y | constant (such that ~constant is a valid constant) + Transform this to + x = ~(~y & ~constant). + */ + if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val))) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_NOT (mode, source))); + source = sub; + if (subtargets) + sub = gen_reg_rtx (mode); + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_AND (mode, source, + GEN_INT (temp1)))); + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, sub))); + } + return 3; + } + break; + + case AND: + /* See if two shifts will do 2 or more insn's worth of work. */ + if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24) + { + HOST_WIDE_INT shift_mask = ((0xffffffff + << (32 - clear_sign_bit_copies)) + & 0xffffffff); + + if ((remainder | shift_mask) != 0xffffffff) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + new_src, source, subtargets, 1); + source = new_src; + } + else + { + rtx targ = subtargets ? NULL_RTX : target; + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + targ, source, subtargets, 0); + } + } + + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (clear_sign_bit_copies); + + emit_insn (gen_ashlsi3 (new_src, source, shift)); + emit_insn (gen_lshrsi3 (target, new_src, shift)); + } + + return insns + 2; + } + + if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24) + { + HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1; + + if ((remainder | shift_mask) != 0xffffffff) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + new_src, source, subtargets, 1); + source = new_src; + } + else + { + rtx targ = subtargets ? NULL_RTX : target; + + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + targ, source, subtargets, 0); + } + } + + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (clear_zero_bit_copies); + + emit_insn (gen_lshrsi3 (new_src, source, shift)); + emit_insn (gen_ashlsi3 (target, new_src, shift)); + } + + return insns + 2; + } + + break; + + default: + break; + } + + for (i = 0; i < 32; i++) + if (remainder & (1 << i)) + num_bits_set++; + + if ((code == AND) + || (code != IOR && can_invert && num_bits_set > 16)) + remainder ^= 0xffffffff; + else if (code == PLUS && num_bits_set > 16) + remainder = (-remainder) & 0xffffffff; + + /* For XOR, if more than half the bits are set and there's a sequence + of more than 8 consecutive ones in the pattern then we can XOR by the + inverted constant and then invert the final result; this may save an + instruction and might also lead to the final mvn being merged with + some other operation. */ + else if (code == XOR && num_bits_set > 16 + && (count_insns_for_constant (remainder ^ 0xffffffff, + find_best_start + (remainder ^ 0xffffffff)) + < count_insns_for_constant (remainder, + find_best_start (remainder)))) + { + remainder ^= 0xffffffff; + final_invert = 1; + } + else + { + can_invert = 0; + can_negate = 0; + } + + /* Now try and find a way of doing the job in either two or three + instructions. + We start by looking for the largest block of zeros that are aligned on + a 2-bit boundary, we then fill up the temps, wrapping around to the + top of the word when we drop off the bottom. + In the worst case this code should produce no more than four insns. + Thumb-2 constants are shifted, not rotated, so the MSB is always the + best place to start. */ + + /* ??? Use thumb2 replicated constants when the high and low halfwords are + the same. */ + { + /* Now start emitting the insns. */ + i = find_best_start (remainder); + do + { + int end; + + if (i <= 0) + i += 32; + if (remainder & (3 << (i - 2))) + { + end = i - 8; + if (end < 0) + end += 32; + temp1 = remainder & ((0x0ff << end) + | ((i < end) ? (0xff >> (32 - end)) : 0)); + remainder &= ~temp1; + + if (generate) + { + rtx new_src, temp1_rtx; + + if (code == SET || code == MINUS) + { + new_src = (subtargets ? gen_reg_rtx (mode) : target); + if (can_invert && code != MINUS) + temp1 = ~temp1; + } + else + { + if ((final_invert || remainder) && subtargets) + new_src = gen_reg_rtx (mode); + else + new_src = target; + if (can_invert) + temp1 = ~temp1; + else if (can_negate) + temp1 = -temp1; + } + + temp1 = trunc_int_for_mode (temp1, mode); + temp1_rtx = GEN_INT (temp1); + + if (code == SET) + ; + else if (code == MINUS) + temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); + else + temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); + + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + temp1_rtx)); + source = new_src; + } + + if (code == SET) + { + can_invert = 0; + code = PLUS; + } + else if (code == MINUS) + code = PLUS; + + insns++; + i -= 8 - step_size; + } + /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary + shifts. */ + i -= step_size; + } + while (remainder); + } + + if (final_invert) + { + if (generate) + emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, source))); + insns++; + } + + return insns; +} + +/* Canonicalize a comparison so that we are more likely to recognize it. + This can be done for a few constant compares, where we can make the + immediate value easier to load. */ + +enum rtx_code +arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) +{ + enum machine_mode mode; + unsigned HOST_WIDE_INT i, maxval; + + mode = GET_MODE (*op0); + if (mode == VOIDmode) + mode = GET_MODE (*op1); + + maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; + + /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode + we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either + reversed or (for constant OP1) adjusted to GE/LT. Similarly + for GTU/LEU in Thumb mode. */ + if (mode == DImode) + { + rtx tem; + + /* To keep things simple, always use the Cirrus cfcmp64 if it is + available. */ + if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK) + return code; + + if (code == GT || code == LE + || (!TARGET_ARM && (code == GTU || code == LEU))) + { + /* Missing comparison. First try to use an available + comparison. */ + if (GET_CODE (*op1) == CONST_INT) + { + i = INTVAL (*op1); + switch (code) + { + case GT: + case LE: + if (i != maxval + && arm_const_double_by_immediates (GEN_INT (i + 1))) + { + *op1 = GEN_INT (i + 1); + return code == GT ? GE : LT; + } + break; + case GTU: + case LEU: + if (i != ~((unsigned HOST_WIDE_INT) 0) + && arm_const_double_by_immediates (GEN_INT (i + 1))) + { + *op1 = GEN_INT (i + 1); + return code == GTU ? GEU : LTU; + } + break; + default: + gcc_unreachable (); + } + } + + /* If that did not work, reverse the condition. */ + tem = *op0; + *op0 = *op1; + *op1 = tem; + return swap_condition (code); + } + + return code; + } + + /* Comparisons smaller than DImode. Only adjust comparisons against + an out-of-range constant. */ + if (GET_CODE (*op1) != CONST_INT + || const_ok_for_arm (INTVAL (*op1)) + || const_ok_for_arm (- INTVAL (*op1))) + return code; + + i = INTVAL (*op1); + + switch (code) + { + case EQ: + case NE: + return code; + + case GT: + case LE: + if (i != maxval + && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) + { + *op1 = GEN_INT (i + 1); + return code == GT ? GE : LT; + } + break; + + case GE: + case LT: + if (i != ~maxval + && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) + { + *op1 = GEN_INT (i - 1); + return code == GE ? GT : LE; + } + break; + + case GTU: + case LEU: + if (i != ~((unsigned HOST_WIDE_INT) 0) + && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) + { + *op1 = GEN_INT (i + 1); + return code == GTU ? GEU : LTU; + } + break; + + case GEU: + case LTU: + if (i != 0 + && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) + { + *op1 = GEN_INT (i - 1); + return code == GEU ? GTU : LEU; + } + break; + + default: + gcc_unreachable (); + } + + return code; +} + + +/* Define how to find the value returned by a function. */ + +static rtx +arm_function_value(const_tree type, const_tree func, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode; + int unsignedp ATTRIBUTE_UNUSED; + rtx r ATTRIBUTE_UNUSED; + + mode = TYPE_MODE (type); + + if (TARGET_AAPCS_BASED) + return aapcs_allocate_return_reg (mode, type, func); + + /* Promote integer types. */ + if (INTEGRAL_TYPE_P (type)) + mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1); + + /* Promotes small structs returned in a register to full-word size + for big-endian AAPCS. */ + if (arm_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size % UNITS_PER_WORD != 0) + { + size += UNITS_PER_WORD - size % UNITS_PER_WORD; + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + } + + return LIBCALL_VALUE (mode); +} + +static int +libcall_eq (const void *p1, const void *p2) +{ + return rtx_equal_p ((const_rtx) p1, (const_rtx) p2); +} + +static hashval_t +libcall_hash (const void *p1) +{ + return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE); +} + +static void +add_libcall (htab_t htab, rtx libcall) +{ + *htab_find_slot (htab, libcall, INSERT) = libcall; +} + +static bool +arm_libcall_uses_aapcs_base (const_rtx libcall) +{ + static bool init_done = false; + static htab_t libcall_htab; + + if (!init_done) + { + init_done = true; + + libcall_htab = htab_create (31, libcall_hash, libcall_eq, + NULL); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, SFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, DFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, SFmode, DImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, DFmode, DImode)); + + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, SFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, DFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, SFmode, DImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, DFmode, DImode)); + + add_libcall (libcall_htab, + convert_optab_libfunc (sext_optab, SFmode, HFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (trunc_optab, HFmode, SFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfix_optab, SImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufix_optab, SImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfix_optab, DImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufix_optab, DImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfix_optab, DImode, SFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufix_optab, DImode, SFmode)); + + /* Values from double-precision helper functions are returned in core + registers if the selected core only supports single-precision + arithmetic, even if we are using the hard-float ABI. The same is + true for single-precision helpers, but we will never be using the + hard-float ABI on a CPU which doesn't support single-precision + operations in hardware. */ + add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode)); + add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode, + SFmode)); + add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode, + DFmode)); + } + + return libcall && htab_find (libcall_htab, libcall) != NULL; +} + +rtx +arm_libcall_value (enum machine_mode mode, const_rtx libcall) +{ + if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS + && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + /* The following libcalls return their result in integer registers, + even though they return a floating point value. */ + if (arm_libcall_uses_aapcs_base (libcall)) + return gen_rtx_REG (mode, ARG_REGISTER(1)); + + } + + return LIBCALL_VALUE (mode); +} + +/* Determine the amount of memory needed to store the possible return + registers of an untyped call. */ +int +arm_apply_result_size (void) +{ + int size = 16; + + if (TARGET_32BIT) + { + if (TARGET_HARD_FLOAT_ABI) + { + if (TARGET_VFP) + size += 32; + if (TARGET_FPA) + size += 12; + if (TARGET_MAVERICK) + size += 8; + } + if (TARGET_IWMMXT_ABI) + size += 8; + } + + return size; +} + +/* Decide whether TYPE should be returned in memory (true) + or in a register (false). FNTYPE is the type of the function making + the call. */ +static bool +arm_return_in_memory (const_tree type, const_tree fntype) +{ + HOST_WIDE_INT size; + + size = int_size_in_bytes (type); /* Negative if not fixed size. */ + + if (TARGET_AAPCS_BASED) + { + /* Simple, non-aggregate types (ie not including vectors and + complex) are always returned in a register (or registers). + We don't care about which register here, so we can short-cut + some of the detail. */ + if (!AGGREGATE_TYPE_P (type) + && TREE_CODE (type) != VECTOR_TYPE + && TREE_CODE (type) != COMPLEX_TYPE) + return false; + + /* Any return value that is no larger than one word can be + returned in r0. */ + if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) + return false; + + /* Check any available co-processors to see if they accept the + type as a register candidate (VFP, for example, can return + some aggregates in consecutive registers). These aren't + available if the call is variadic. */ + if (aapcs_select_return_coproc (type, fntype) >= 0) + return false; + + /* Vector values should be returned using ARM registers, not + memory (unless they're over 16 bytes, which will break since + we only have four call-clobbered registers to play with). */ + if (TREE_CODE (type) == VECTOR_TYPE) + return (size < 0 || size > (4 * UNITS_PER_WORD)); + + /* The rest go in memory. */ + return true; + } + + if (TREE_CODE (type) == VECTOR_TYPE) + return (size < 0 || size > (4 * UNITS_PER_WORD)); + + if (!AGGREGATE_TYPE_P (type) && + (TREE_CODE (type) != VECTOR_TYPE)) + /* All simple types are returned in registers. */ + return false; + + if (arm_abi != ARM_ABI_APCS) + { + /* ATPCS and later return aggregate types in memory only if they are + larger than a word (or are variable size). */ + return (size < 0 || size > UNITS_PER_WORD); + } + + /* For the arm-wince targets we choose to be compatible with Microsoft's + ARM and Thumb compilers, which always return aggregates in memory. */ +#ifndef ARM_WINCE + /* All structures/unions bigger than one word are returned in memory. + Also catch the case where int_size_in_bytes returns -1. In this case + the aggregate is either huge or of variable size, and in either case + we will want to return it via memory and not in a register. */ + if (size < 0 || size > UNITS_PER_WORD) + return true; + + if (TREE_CODE (type) == RECORD_TYPE) + { + tree field; + + /* For a struct the APCS says that we only return in a register + if the type is 'integer like' and every addressable element + has an offset of zero. For practical purposes this means + that the structure can have at most one non bit-field element + and that this element must be the first one in the structure. */ + + /* Find the first field, ignoring non FIELD_DECL things which will + have been created by C++. */ + for (field = TYPE_FIELDS (type); + field && TREE_CODE (field) != FIELD_DECL; + field = DECL_CHAIN (field)) + continue; + + if (field == NULL) + return false; /* An empty structure. Allowed by an extension to ANSI C. */ + + /* Check that the first field is valid for returning in a register. */ + + /* ... Floats are not allowed */ + if (FLOAT_TYPE_P (TREE_TYPE (field))) + return true; + + /* ... Aggregates that are not themselves valid for returning in + a register are not allowed. */ + if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) + return true; + + /* Now check the remaining fields, if any. Only bitfields are allowed, + since they are not addressable. */ + for (field = DECL_CHAIN (field); + field; + field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + if (!DECL_BIT_FIELD_TYPE (field)) + return true; + } + + return false; + } + + if (TREE_CODE (type) == UNION_TYPE) + { + tree field; + + /* Unions can be returned in registers if every element is + integral, or can be returned in an integer register. */ + for (field = TYPE_FIELDS (type); + field; + field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + if (FLOAT_TYPE_P (TREE_TYPE (field))) + return true; + + if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) + return true; + } + + return false; + } +#endif /* not ARM_WINCE */ + + /* Return all other types in memory. */ + return true; +} + +/* Indicate whether or not words of a double are in big-endian order. */ + +int +arm_float_words_big_endian (void) +{ + if (TARGET_MAVERICK) + return 0; + + /* For FPA, float words are always big-endian. For VFP, floats words + follow the memory system mode. */ + + if (TARGET_FPA) + { + return 1; + } + + if (TARGET_VFP) + return (TARGET_BIG_END ? 1 : 0); + + return 1; +} + +const struct pcs_attribute_arg +{ + const char *arg; + enum arm_pcs value; +} pcs_attribute_args[] = + { + {"aapcs", ARM_PCS_AAPCS}, + {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, +#if 0 + /* We could recognize these, but changes would be needed elsewhere + * to implement them. */ + {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, + {"atpcs", ARM_PCS_ATPCS}, + {"apcs", ARM_PCS_APCS}, +#endif + {NULL, ARM_PCS_UNKNOWN} + }; + +static enum arm_pcs +arm_pcs_from_attribute (tree attr) +{ + const struct pcs_attribute_arg *ptr; + const char *arg; + + /* Get the value of the argument. */ + if (TREE_VALUE (attr) == NULL_TREE + || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) + return ARM_PCS_UNKNOWN; + + arg = TREE_STRING_POINTER (TREE_VALUE (attr)); + + /* Check it against the list of known arguments. */ + for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) + if (streq (arg, ptr->arg)) + return ptr->value; + + /* An unrecognized interrupt type. */ + return ARM_PCS_UNKNOWN; +} + +/* Get the PCS variant to use for this call. TYPE is the function's type + specification, DECL is the specific declartion. DECL may be null if + the call could be indirect or if this is a library call. */ +static enum arm_pcs +arm_get_pcs_model (const_tree type, const_tree decl) +{ + bool user_convention = false; + enum arm_pcs user_pcs = arm_pcs_default; + tree attr; + + gcc_assert (type); + + attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); + if (attr) + { + user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); + user_convention = true; + } + + if (TARGET_AAPCS_BASED) + { + /* Detect varargs functions. These always use the base rules + (no argument is ever a candidate for a co-processor + register). */ + bool base_rules = stdarg_p (type); + + if (user_convention) + { + if (user_pcs > ARM_PCS_AAPCS_LOCAL) + sorry ("non-AAPCS derived PCS variant"); + else if (base_rules && user_pcs != ARM_PCS_AAPCS) + error ("variadic functions must use the base AAPCS variant"); + } + + if (base_rules) + return ARM_PCS_AAPCS; + else if (user_convention) + return user_pcs; + else if (decl && flag_unit_at_a_time) + { + /* Local functions never leak outside this compilation unit, + so we are free to use whatever conventions are + appropriate. */ + /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ + struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); + if (i && i->local) + return ARM_PCS_AAPCS_LOCAL; + } + } + else if (user_convention && user_pcs != arm_pcs_default) + sorry ("PCS variant"); + + /* For everything else we use the target's default. */ + return arm_pcs_default; +} + + +static void +aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + rtx libcall ATTRIBUTE_UNUSED, + const_tree fndecl ATTRIBUTE_UNUSED) +{ + /* Record the unallocated VFP registers. */ + pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; + pcum->aapcs_vfp_reg_alloc = 0; +} + +/* Walk down the type tree of TYPE counting consecutive base elements. + If *MODEP is VOIDmode, then set it to the first valid floating point + type. If a non-floating point type is found, or if a floating point + type that doesn't match a non-VOIDmode *MODEP is found, then return -1, + otherwise return the count in the sub-tree. */ +static int +aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) +{ + enum machine_mode mode; + HOST_WIDE_INT size; + + switch (TREE_CODE (type)) + { + case REAL_TYPE: + mode = TYPE_MODE (type); + if (mode != DFmode && mode != SFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 1; + + break; + + case COMPLEX_TYPE: + mode = TYPE_MODE (TREE_TYPE (type)); + if (mode != DFmode && mode != SFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 2; + + break; + + case VECTOR_TYPE: + /* Use V2SImode and V4SImode as representatives of all 64-bit + and 128-bit vector types, whether or not those modes are + supported with the present options. */ + size = int_size_in_bytes (type); + switch (size) + { + case 8: + mode = V2SImode; + break; + case 16: + mode = V4SImode; + break; + default: + return -1; + } + + if (*modep == VOIDmode) + *modep = mode; + + /* Vector modes are considered to be opaque: two vectors are + equivalent for the purposes of being homogeneous aggregates + if they are the same size. */ + if (*modep == mode) + return 1; + + break; + + case ARRAY_TYPE: + { + int count; + tree index = TYPE_DOMAIN (type); + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P(type)) + return -1; + + count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); + if (count == -1 + || !index + || !TYPE_MAX_VALUE (index) + || !host_integerp (TYPE_MAX_VALUE (index), 1) + || !TYPE_MIN_VALUE (index) + || !host_integerp (TYPE_MIN_VALUE (index), 1) + || count < 0) + return -1; + + count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1) + - tree_low_cst (TYPE_MIN_VALUE (index), 1)); + + /* There must be no padding. */ + if (!host_integerp (TYPE_SIZE (type), 1) + || (tree_low_cst (TYPE_SIZE (type), 1) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case RECORD_TYPE: + { + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P(type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count += sub_count; + } + + /* There must be no padding. */ + if (!host_integerp (TYPE_SIZE (type), 1) + || (tree_low_cst (TYPE_SIZE (type), 1) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + /* These aren't very interesting except in a degenerate case. */ + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P(type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count = count > sub_count ? count : sub_count; + } + + /* There must be no padding. */ + if (!host_integerp (TYPE_SIZE (type), 1) + || (tree_low_cst (TYPE_SIZE (type), 1) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + default: + break; + } + + return -1; +} + +/* Return true if PCS_VARIANT should use VFP registers. */ +static bool +use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) +{ + if (pcs_variant == ARM_PCS_AAPCS_VFP) + { + static bool seen_thumb1_vfp = false; + + if (TARGET_THUMB1 && !seen_thumb1_vfp) + { + sorry ("Thumb-1 hard-float VFP ABI"); + /* sorry() is not immediately fatal, so only display this once. */ + seen_thumb1_vfp = true; + } + + return true; + } + + if (pcs_variant != ARM_PCS_AAPCS_LOCAL) + return false; + + return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && + (TARGET_VFP_DOUBLE || !is_double)); +} + +static bool +aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type, + enum machine_mode *base_mode, int *count) +{ + enum machine_mode new_mode = VOIDmode; + + if (GET_MODE_CLASS (mode) == MODE_FLOAT + || GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + *count = 1; + new_mode = mode; + } + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + { + *count = 2; + new_mode = (mode == DCmode ? DFmode : SFmode); + } + else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) + { + int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); + + if (ag_count > 0 && ag_count <= 4) + *count = ag_count; + else + return false; + } + else + return false; + + + if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) + return false; + + *base_mode = new_mode; + return true; +} + +static bool +aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type) +{ + int count ATTRIBUTE_UNUSED; + enum machine_mode ag_mode ATTRIBUTE_UNUSED; + + if (!use_vfp_abi (pcs_variant, false)) + return false; + return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, + &ag_mode, &count); +} + +static bool +aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type) +{ + if (!use_vfp_abi (pcum->pcs_variant, false)) + return false; + + return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, + &pcum->aapcs_vfp_rmode, + &pcum->aapcs_vfp_rcount); +} + +static bool +aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED) +{ + int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); + unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; + int regno; + + for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) + if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) + { + pcum->aapcs_vfp_reg_alloc = mask << regno; + if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) + { + int i; + int rcount = pcum->aapcs_vfp_rcount; + int rshift = shift; + enum machine_mode rmode = pcum->aapcs_vfp_rmode; + rtx par; + if (!TARGET_NEON) + { + /* Avoid using unsupported vector modes. */ + if (rmode == V2SImode) + rmode = DImode; + else if (rmode == V4SImode) + { + rmode = DImode; + rcount *= 2; + rshift /= 2; + } + } + par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); + for (i = 0; i < rcount; i++) + { + rtx tmp = gen_rtx_REG (rmode, + FIRST_VFP_REGNUM + regno + i * rshift); + tmp = gen_rtx_EXPR_LIST + (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (rmode))); + XVECEXP (par, 0, i) = tmp; + } + + pcum->aapcs_reg = par; + } + else + pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); + return true; + } + return false; +} + +static rtx +aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, + enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED) +{ + if (!use_vfp_abi (pcs_variant, false)) + return false; + + if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) + { + int count; + enum machine_mode ag_mode; + int i; + rtx par; + int shift; + + aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, + &ag_mode, &count); + + if (!TARGET_NEON) + { + if (ag_mode == V2SImode) + ag_mode = DImode; + else if (ag_mode == V4SImode) + { + ag_mode = DImode; + count *= 2; + } + } + shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); + par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); + for (i = 0; i < count; i++) + { + rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (ag_mode))); + XVECEXP (par, 0, i) = tmp; + } + + return par; + } + + return gen_rtx_REG (mode, FIRST_VFP_REGNUM); +} + +static void +aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED) +{ + pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; + pcum->aapcs_vfp_reg_alloc = 0; + return; +} + +#define AAPCS_CP(X) \ + { \ + aapcs_ ## X ## _cum_init, \ + aapcs_ ## X ## _is_call_candidate, \ + aapcs_ ## X ## _allocate, \ + aapcs_ ## X ## _is_return_candidate, \ + aapcs_ ## X ## _allocate_return_reg, \ + aapcs_ ## X ## _advance \ + } + +/* Table of co-processors that can be used to pass arguments in + registers. Idealy no arugment should be a candidate for more than + one co-processor table entry, but the table is processed in order + and stops after the first match. If that entry then fails to put + the argument into a co-processor register, the argument will go on + the stack. */ +static struct +{ + /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ + void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); + + /* Return true if an argument of mode MODE (or type TYPE if MODE is + BLKmode) is a candidate for this co-processor's registers; this + function should ignore any position-dependent state in + CUMULATIVE_ARGS and only use call-type dependent information. */ + bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + + /* Return true if the argument does get a co-processor register; it + should set aapcs_reg to an RTX of the register allocated as is + required for a return from FUNCTION_ARG. */ + bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + + /* Return true if a result of mode MODE (or type TYPE if MODE is + BLKmode) is can be returned in this co-processor's registers. */ + bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); + + /* Allocate and return an RTX element to hold the return type of a + call, this routine must not fail and will only be called if + is_return_candidate returned true with the same parameters. */ + rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); + + /* Finish processing this argument and prepare to start processing + the next one. */ + void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); +} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = + { + AAPCS_CP(vfp) + }; + +#undef AAPCS_CP + +static int +aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type) +{ + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) + return i; + + return -1; +} + +static int +aapcs_select_return_coproc (const_tree type, const_tree fntype) +{ + /* We aren't passed a decl, so we can't check that a call is local. + However, it isn't clear that that would be a win anyway, since it + might limit some tail-calling opportunities. */ + enum arm_pcs pcs_variant; + + if (fntype) + { + const_tree fndecl = NULL_TREE; + + if (TREE_CODE (fntype) == FUNCTION_DECL) + { + fndecl = fntype; + fntype = TREE_TYPE (fntype); + } + + pcs_variant = arm_get_pcs_model (fntype, fndecl); + } + else + pcs_variant = arm_pcs_default; + + if (pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, + TYPE_MODE (type), + type)) + return i; + } + return -1; +} + +static rtx +aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, + const_tree fntype) +{ + /* We aren't passed a decl, so we can't check that a call is local. + However, it isn't clear that that would be a win anyway, since it + might limit some tail-calling opportunities. */ + enum arm_pcs pcs_variant; + int unsignedp ATTRIBUTE_UNUSED; + + if (fntype) + { + const_tree fndecl = NULL_TREE; + + if (TREE_CODE (fntype) == FUNCTION_DECL) + { + fndecl = fntype; + fntype = TREE_TYPE (fntype); + } + + pcs_variant = arm_get_pcs_model (fntype, fndecl); + } + else + pcs_variant = arm_pcs_default; + + /* Promote integer types. */ + if (type && INTEGRAL_TYPE_P (type)) + mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1); + + if (pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, + type)) + return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, + mode, type); + } + + /* Promotes small structs returned in a register to full-word size + for big-endian AAPCS. */ + if (type && arm_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size % UNITS_PER_WORD != 0) + { + size += UNITS_PER_WORD - size % UNITS_PER_WORD; + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + } + + return gen_rtx_REG (mode, R0_REGNUM); +} + +rtx +aapcs_libcall_value (enum machine_mode mode) +{ + return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); +} + +/* Lay out a function argument using the AAPCS rules. The rule + numbers referred to here are those in the AAPCS. */ +static void +aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type, bool named) +{ + int nregs, nregs2; + int ncrn; + + /* We only need to do this once per argument. */ + if (pcum->aapcs_arg_processed) + return; + + pcum->aapcs_arg_processed = true; + + /* Special case: if named is false then we are handling an incoming + anonymous argument which is on the stack. */ + if (!named) + return; + + /* Is this a potential co-processor register candidate? */ + if (pcum->pcs_variant != ARM_PCS_AAPCS) + { + int slot = aapcs_select_call_coproc (pcum, mode, type); + pcum->aapcs_cprc_slot = slot; + + /* We don't have to apply any of the rules from part B of the + preparation phase, these are handled elsewhere in the + compiler. */ + + if (slot >= 0) + { + /* A Co-processor register candidate goes either in its own + class of registers or on the stack. */ + if (!pcum->aapcs_cprc_failed[slot]) + { + /* C1.cp - Try to allocate the argument to co-processor + registers. */ + if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) + return; + + /* C2.cp - Put the argument on the stack and note that we + can't assign any more candidates in this slot. We also + need to note that we have allocated stack space, so that + we won't later try to split a non-cprc candidate between + core registers and the stack. */ + pcum->aapcs_cprc_failed[slot] = true; + pcum->can_split = false; + } + + /* We didn't get a register, so this argument goes on the + stack. */ + gcc_assert (pcum->can_split == false); + return; + } + } + + /* C3 - For double-word aligned arguments, round the NCRN up to the + next even number. */ + ncrn = pcum->aapcs_ncrn; + if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) + ncrn++; + + nregs = ARM_NUM_REGS2(mode, type); + + /* Sigh, this test should really assert that nregs > 0, but a GCC + extension allows empty structs and then gives them empty size; it + then allows such a structure to be passed by value. For some of + the code below we have to pretend that such an argument has + non-zero size so that we 'locate' it correctly either in + registers or on the stack. */ + gcc_assert (nregs >= 0); + + nregs2 = nregs ? nregs : 1; + + /* C4 - Argument fits entirely in core registers. */ + if (ncrn + nregs2 <= NUM_ARG_REGS) + { + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); + pcum->aapcs_next_ncrn = ncrn + nregs; + return; + } + + /* C5 - Some core registers left and there are no arguments already + on the stack: split this argument between the remaining core + registers and the stack. */ + if (ncrn < NUM_ARG_REGS && pcum->can_split) + { + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); + pcum->aapcs_next_ncrn = NUM_ARG_REGS; + pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; + return; + } + + /* C6 - NCRN is set to 4. */ + pcum->aapcs_next_ncrn = NUM_ARG_REGS; + + /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ + return; +} + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is NULL. */ +void +arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, + rtx libname, + tree fndecl ATTRIBUTE_UNUSED) +{ + /* Long call handling. */ + if (fntype) + pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); + else + pcum->pcs_variant = arm_pcs_default; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + if (arm_libcall_uses_aapcs_base (libname)) + pcum->pcs_variant = ARM_PCS_AAPCS; + + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_partial = 0; + pcum->aapcs_arg_processed = false; + pcum->aapcs_cprc_slot = -1; + pcum->can_split = true; + + if (pcum->pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + { + pcum->aapcs_cprc_failed[i] = false; + aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); + } + } + return; + } + + /* Legacy ABIs */ + + /* On the ARM, the offset starts at 0. */ + pcum->nregs = 0; + pcum->iwmmxt_nregs = 0; + pcum->can_split = true; + + /* Varargs vectors are treated the same as long long. + named_count avoids having to change the way arm handles 'named' */ + pcum->named_count = 0; + pcum->nargs = 0; + + if (TARGET_REALLY_IWMMXT && fntype) + { + tree fn_arg; + + for (fn_arg = TYPE_ARG_TYPES (fntype); + fn_arg; + fn_arg = TREE_CHAIN (fn_arg)) + pcum->named_count += 1; + + if (! pcum->named_count) + pcum->named_count = INT_MAX; + } +} + + +/* Return true if mode/type need doubleword alignment. */ +static bool +arm_needs_doubleword_align (enum machine_mode mode, const_tree type) +{ + return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY + || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); +} + + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On the ARM, normally the first 16 bytes are passed in registers r0-r3; all + other arguments are passed on the stack. If (NAMED == 0) (which happens + only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is + defined), say it is passed in the stack (function_prologue will + indeed make it pass in the stack if necessary). */ + +static rtx +arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type, bool named) +{ + int nregs; + + /* Handle the special case quickly. Pick an arbitrary value for op2 of + a call insn (op3 of a call_value insn). */ + if (mode == VOIDmode) + return const0_rtx; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + return pcum->aapcs_reg; + } + + /* Varargs vectors are treated the same as long long. + named_count avoids having to change the way arm handles 'named' */ + if (TARGET_IWMMXT_ABI + && arm_vector_mode_supported_p (mode) + && pcum->named_count > pcum->nargs + 1) + { + if (pcum->iwmmxt_nregs <= 9) + return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM); + else + { + pcum->can_split = false; + return NULL_RTX; + } + } + + /* Put doubleword aligned quantities in even register pairs. */ + if (pcum->nregs & 1 + && ARM_DOUBLEWORD_ALIGN + && arm_needs_doubleword_align (mode, type)) + pcum->nregs++; + + /* Only allow splitting an arg between regs and memory if all preceding + args were allocated to regs. For args passed by reference we only count + the reference pointer. */ + if (pcum->can_split) + nregs = 1; + else + nregs = ARM_NUM_REGS2 (mode, type); + + if (!named || pcum->nregs + nregs > NUM_ARG_REGS) + return NULL_RTX; + + return gen_rtx_REG (mode, pcum->nregs); +} + +/* The AAPCS sets the maximum alignment of a vector to 64 bits. */ +static HOST_WIDE_INT +arm_vector_alignment (const_tree type) +{ + HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0); + + if (TARGET_AAPCS_BASED) + align = MIN (align, 64); + + return align; +} + +static unsigned int +arm_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type) + ? DOUBLEWORD_ALIGNMENT + : PARM_BOUNDARY); +} + +static int +arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + tree type, bool named) +{ + int nregs = pcum->nregs; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + return pcum->aapcs_partial; + } + + if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) + return 0; + + if (NUM_ARG_REGS > nregs + && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type)) + && pcum->can_split) + return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; + + return 0; +} + +/* Update the data in PCUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type, bool named) +{ + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + + if (pcum->aapcs_cprc_slot >= 0) + { + aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, + type); + pcum->aapcs_cprc_slot = -1; + } + + /* Generic stuff. */ + pcum->aapcs_arg_processed = false; + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_partial = 0; + } + else + { + pcum->nargs += 1; + if (arm_vector_mode_supported_p (mode) + && pcum->named_count > pcum->nargs + && TARGET_IWMMXT_ABI) + pcum->iwmmxt_nregs += 1; + else + pcum->nregs += ARM_NUM_REGS2 (mode, type); + } +} + +/* Variable sized types are passed by reference. This is a GCC + extension to the ARM ABI. */ + +static bool +arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; +} + +/* Encode the current state of the #pragma [no_]long_calls. */ +typedef enum +{ + OFF, /* No #pragma [no_]long_calls is in effect. */ + LONG, /* #pragma long_calls is in effect. */ + SHORT /* #pragma no_long_calls is in effect. */ +} arm_pragma_enum; + +static arm_pragma_enum arm_pragma_long_calls = OFF; + +void +arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + arm_pragma_long_calls = LONG; +} + +void +arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + arm_pragma_long_calls = SHORT; +} + +void +arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + arm_pragma_long_calls = OFF; +} + +/* Handle an attribute requiring a FUNCTION_DECL; + arguments as in struct attribute_spec.handler. */ +static tree +arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle an "interrupt" or "isr" attribute; + arguments as in struct attribute_spec.handler. */ +static tree +arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, + bool *no_add_attrs) +{ + if (DECL_P (*node)) + { + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + /* FIXME: the argument if any is checked for type attributes; + should it be checked for decl ones? */ + } + else + { + if (TREE_CODE (*node) == FUNCTION_TYPE + || TREE_CODE (*node) == METHOD_TYPE) + { + if (arm_isr_value (args) == ARM_FT_UNKNOWN) + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + } + else if (TREE_CODE (*node) == POINTER_TYPE + && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE + || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE) + && arm_isr_value (args) != ARM_FT_UNKNOWN) + { + *node = build_variant_type_copy (*node); + TREE_TYPE (*node) = build_type_attribute_variant + (TREE_TYPE (*node), + tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node)))); + *no_add_attrs = true; + } + else + { + /* Possibly pass this attribute on from the type to a decl. */ + if (flags & ((int) ATTR_FLAG_DECL_NEXT + | (int) ATTR_FLAG_FUNCTION_NEXT + | (int) ATTR_FLAG_ARRAY_NEXT)) + { + *no_add_attrs = true; + return tree_cons (name, args, NULL_TREE); + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + } + } + } + + return NULL_TREE; +} + +/* Handle a "pcs" attribute; arguments as in struct + attribute_spec.handler. */ +static tree +arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + return NULL_TREE; +} + +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +/* Handle the "notshared" attribute. This attribute is another way of + requesting hidden visibility. ARM's compiler supports + "__declspec(notshared)"; we support the same thing via an + attribute. */ + +static tree +arm_handle_notshared_attribute (tree *node, + tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree decl = TYPE_NAME (*node); + + if (decl) + { + DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (decl) = 1; + *no_add_attrs = false; + } + return NULL_TREE; +} +#endif + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible, and 2 if they are nearly compatible (which causes a + warning to be generated). */ +static int +arm_comp_type_attributes (const_tree type1, const_tree type2) +{ + int l1, l2, s1, s2; + + /* Check for mismatch of non-default calling convention. */ + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + /* Check for mismatched call attributes. */ + l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; + s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; + s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; + + /* Only bother to check if an attribute is defined. */ + if (l1 | l2 | s1 | s2) + { + /* If one type has an attribute, the other must have the same attribute. */ + if ((l1 != l2) || (s1 != s2)) + return 0; + + /* Disallow mixed attributes. */ + if ((l1 & s2) || (l2 & s1)) + return 0; + } + + /* Check for mismatched ISR attribute. */ + l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL; + if (! l1) + l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL; + if (! l2) + l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL; + if (l1 != l2) + return 0; + + return 1; +} + +/* Assigns default attributes to newly defined type. This is used to + set short_call/long_call attributes for function types of + functions defined inside corresponding #pragma scopes. */ +static void +arm_set_default_type_attributes (tree type) +{ + /* Add __attribute__ ((long_call)) to all functions, when + inside #pragma long_calls or __attribute__ ((short_call)), + when inside #pragma no_long_calls. */ + if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) + { + tree type_attr_list, attr_name; + type_attr_list = TYPE_ATTRIBUTES (type); + + if (arm_pragma_long_calls == LONG) + attr_name = get_identifier ("long_call"); + else if (arm_pragma_long_calls == SHORT) + attr_name = get_identifier ("short_call"); + else + return; + + type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); + TYPE_ATTRIBUTES (type) = type_attr_list; + } +} + +/* Return true if DECL is known to be linked into section SECTION. */ + +static bool +arm_function_in_section_p (tree decl, section *section) +{ + /* We can only be certain about functions defined in the same + compilation unit. */ + if (!TREE_STATIC (decl)) + return false; + + /* Make sure that SYMBOL always binds to the definition in this + compilation unit. */ + if (!targetm.binds_local_p (decl)) + return false; + + /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ + if (!DECL_SECTION_NAME (decl)) + { + /* Make sure that we will not create a unique section for DECL. */ + if (flag_function_sections || DECL_ONE_ONLY (decl)) + return false; + } + + return function_section (decl) == section; +} + +/* Return nonzero if a 32-bit "long_call" should be generated for + a call from the current function to DECL. We generate a long_call + if the function: + + a. has an __attribute__((long call)) + or b. is within the scope of a #pragma long_calls + or c. the -mlong-calls command line switch has been specified + + However we do not generate a long call if the function: + + d. has an __attribute__ ((short_call)) + or e. is inside the scope of a #pragma no_long_calls + or f. is defined in the same section as the current function. */ + +bool +arm_is_long_call_p (tree decl) +{ + tree attrs; + + if (!decl) + return TARGET_LONG_CALLS; + + attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + if (lookup_attribute ("short_call", attrs)) + return false; + + /* For "f", be conservative, and only cater for cases in which the + whole of the current function is placed in the same section. */ + if (!flag_reorder_blocks_and_partition + && TREE_CODE (decl) == FUNCTION_DECL + && arm_function_in_section_p (decl, current_function_section ())) + return false; + + if (lookup_attribute ("long_call", attrs)) + return true; + + return TARGET_LONG_CALLS; +} + +/* Return nonzero if it is ok to make a tail-call to DECL. */ +static bool +arm_function_ok_for_sibcall (tree decl, tree exp) +{ + unsigned long func_type; + + if (cfun->machine->sibcall_blocked) + return false; + + /* Never tailcall something for which we have no decl, or if we + are generating code for Thumb-1. */ + if (decl == NULL || TARGET_THUMB1) + return false; + + /* The PIC register is live on entry to VxWorks PLT entries, so we + must make the call before restoring the PIC register. */ + if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) + return false; + + /* Cannot tail-call to long calls, since these are out of range of + a branch instruction. */ + if (arm_is_long_call_p (decl)) + return false; + + /* If we are interworking and the function is not declared static + then we can't tail-call it unless we know that it exists in this + compilation unit (since it might be a Thumb routine). */ + if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl)) + return false; + + func_type = arm_current_func_type (); + /* Never tailcall from an ISR routine - it needs a special exit sequence. */ + if (IS_INTERRUPT (func_type)) + return false; + + if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + { + /* Check that the return value locations are the same. For + example that we aren't returning a value from the sibling in + a VFP register but then need to transfer it to a core + register. */ + rtx a, b; + + a = arm_function_value (TREE_TYPE (exp), decl, false); + b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl, false); + if (!rtx_equal_p (a, b)) + return false; + } + + /* Never tailcall if function may be called with a misaligned SP. */ + if (IS_STACKALIGN (func_type)) + return false; + + /* The AAPCS says that, on bare-metal, calls to unresolved weak + references should become a NOP. Don't convert such calls into + sibling calls. */ + if (TARGET_AAPCS_BASED + && arm_abi == ARM_ABI_AAPCS + && DECL_WEAK (decl)) + return false; + + /* Everything else is ok. */ + return true; +} + + +/* Addressing mode support functions. */ + +/* Return nonzero if X is a legitimate immediate operand when compiling + for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */ +int +legitimate_pic_operand_p (rtx x) +{ + if (GET_CODE (x) == SYMBOL_REF + || (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) + return 0; + + return 1; +} + +/* Record that the current function needs a PIC register. Initialize + cfun->machine->pic_reg if we have not already done so. */ + +static void +require_pic_register (void) +{ + /* A lot of the logic here is made obscure by the fact that this + routine gets called as part of the rtx cost estimation process. + We don't want those calls to affect any assumptions about the real + function; and further, we can't call entry_of_function() until we + start the real expansion process. */ + if (!crtl->uses_pic_offset_table) + { + gcc_assert (can_create_pseudo_p ()); + if (arm_pic_register != INVALID_REGNUM) + { + if (!cfun->machine->pic_reg) + cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register); + + /* Play games to avoid marking the function as needing pic + if we are being called as part of the cost-estimation + process. */ + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) + crtl->uses_pic_offset_table = 1; + } + else + { + rtx seq, insn; + + if (!cfun->machine->pic_reg) + cfun->machine->pic_reg = gen_reg_rtx (Pmode); + + /* Play games to avoid marking the function as needing pic + if we are being called as part of the cost-estimation + process. */ + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) + { + crtl->uses_pic_offset_table = 1; + start_sequence (); + + arm_load_pic_register (0UL); + + seq = get_insns (); + end_sequence (); + + for (insn = seq; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + INSN_LOCATOR (insn) = prologue_locator; + + /* We can be called during expansion of PHI nodes, where + we can't yet emit instructions directly in the final + insn stream. Queue the insns on the entry edge, they will + be committed after everything else is expanded. */ + insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); + } + } + } +} + +rtx +legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +{ + if (GET_CODE (orig) == SYMBOL_REF + || GET_CODE (orig) == LABEL_REF) + { + rtx insn; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + /* VxWorks does not impose a fixed gap between segments; the run-time + gap can be different from the object-file gap. We therefore can't + use GOTOFF unless we are absolutely sure that the symbol is in the + same segment as the GOT. Unfortunately, the flexibility of linker + scripts means that we can't be sure of that in general, so assume + that GOTOFF is never valid on VxWorks. */ + if ((GET_CODE (orig) == LABEL_REF + || (GET_CODE (orig) == SYMBOL_REF && + SYMBOL_REF_LOCAL_P (orig))) + && NEED_GOT_RELOC + && !TARGET_VXWORKS_RTP) + insn = arm_pic_static_addr (orig, reg); + else + { + rtx pat; + rtx mem; + + /* If this function doesn't have a pic register, create one now. */ + require_pic_register (); + + pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); + + /* Make the MEM as close to a constant as possible. */ + mem = SET_SRC (pat); + gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + + insn = emit_insn (pat); + } + + /* Put a REG_EQUAL note on this insn, so that it can be optimized + by loop. */ + set_unique_reg_note (insn, REG_EQUAL, orig); + + return reg; + } + else if (GET_CODE (orig) == CONST) + { + rtx base, offset; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg) + return orig; + + /* Handle the case where we have: const (UNSPEC_TLS). */ + if (GET_CODE (XEXP (orig, 0)) == UNSPEC + && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS) + return orig; + + /* Handle the case where we have: + const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a + CONST_INT. */ + if (GET_CODE (XEXP (orig, 0)) == PLUS + && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC + && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS) + { + gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT); + return orig; + } + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + + base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); + offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, + base == reg ? 0 : reg); + + if (GET_CODE (offset) == CONST_INT) + { + /* The base register doesn't really matter, we only want to + test the index for the appropriate mode. */ + if (!arm_legitimate_index_p (mode, offset, SET, 0)) + { + gcc_assert (can_create_pseudo_p ()); + offset = force_reg (Pmode, offset); + } + + if (GET_CODE (offset) == CONST_INT) + return plus_constant (base, INTVAL (offset)); + } + + if (GET_MODE_SIZE (mode) > 4 + && (GET_MODE_CLASS (mode) == MODE_INT + || TARGET_SOFT_FLOAT)) + { + emit_insn (gen_addsi3 (reg, base, offset)); + return reg; + } + + return gen_rtx_PLUS (Pmode, base, offset); + } + + return orig; +} + + +/* Find a spare register to use during the prolog of a function. */ + +static int +thumb_find_work_register (unsigned long pushed_regs_mask) +{ + int reg; + + /* Check the argument registers first as these are call-used. The + register allocation order means that sometimes r3 might be used + but earlier argument registers might not, so check them all. */ + for (reg = LAST_ARG_REGNUM; reg >= 0; reg --) + if (!df_regs_ever_live_p (reg)) + return reg; + + /* Before going on to check the call-saved registers we can try a couple + more ways of deducing that r3 is available. The first is when we are + pushing anonymous arguments onto the stack and we have less than 4 + registers worth of fixed arguments(*). In this case r3 will be part of + the variable argument list and so we can be sure that it will be + pushed right at the start of the function. Hence it will be available + for the rest of the prologue. + (*): ie crtl->args.pretend_args_size is greater than 0. */ + if (cfun->machine->uses_anonymous_args + && crtl->args.pretend_args_size > 0) + return LAST_ARG_REGNUM; + + /* The other case is when we have fixed arguments but less than 4 registers + worth. In this case r3 might be used in the body of the function, but + it is not being used to convey an argument into the function. In theory + we could just check crtl->args.size to see how many bytes are + being passed in argument registers, but it seems that it is unreliable. + Sometimes it will have the value 0 when in fact arguments are being + passed. (See testcase execute/20021111-1.c for an example). So we also + check the args_info.nregs field as well. The problem with this field is + that it makes no allowances for arguments that are passed to the + function but which are not used. Hence we could miss an opportunity + when a function has an unused argument in r3. But it is better to be + safe than to be sorry. */ + if (! cfun->machine->uses_anonymous_args + && crtl->args.size >= 0 + && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD) + && crtl->args.info.nregs < 4) + return LAST_ARG_REGNUM; + + /* Otherwise look for a call-saved register that is going to be pushed. */ + for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --) + if (pushed_regs_mask & (1 << reg)) + return reg; + + if (TARGET_THUMB2) + { + /* Thumb-2 can use high regs. */ + for (reg = FIRST_HI_REGNUM; reg < 15; reg ++) + if (pushed_regs_mask & (1 << reg)) + return reg; + } + /* Something went wrong - thumb_compute_save_reg_mask() + should have arranged for a suitable register to be pushed. */ + gcc_unreachable (); +} + +static GTY(()) int pic_labelno; + +/* Generate code to load the PIC register. In thumb mode SCRATCH is a + low register. */ + +void +arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED) +{ + rtx l1, labelno, pic_tmp, pic_rtx, pic_reg; + + if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE) + return; + + gcc_assert (flag_pic); + + pic_reg = cfun->machine->pic_reg; + if (TARGET_VXWORKS_RTP) + { + pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); + pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); + emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); + + emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); + + pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); + emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); + } + else + { + /* We use an UNSPEC rather than a LABEL_REF because this label + never appears in the code stream. */ + + labelno = GEN_INT (pic_labelno++); + l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + l1 = gen_rtx_CONST (VOIDmode, l1); + + /* On the ARM the PC register contains 'dot + 8' at the time of the + addition, on the Thumb it is 'dot + 4'. */ + pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); + pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), + UNSPEC_GOTSYM_OFF); + pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); + + if (TARGET_32BIT) + { + emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); + } + else /* TARGET_THUMB1 */ + { + if (arm_pic_register != INVALID_REGNUM + && REGNO (pic_reg) > LAST_LO_REGNUM) + { + /* We will have pushed the pic register, so we should always be + able to find a work register. */ + pic_tmp = gen_rtx_REG (SImode, + thumb_find_work_register (saved_regs)); + emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx)); + emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp)); + emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); + } + else + emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); + } + } + + /* Need to emit this whether or not we obey regdecls, + since setjmp/longjmp can cause life info to screw up. */ + emit_use (pic_reg); +} + +/* Generate code to load the address of a static var when flag_pic is set. */ +static rtx +arm_pic_static_addr (rtx orig, rtx reg) +{ + rtx l1, labelno, offset_rtx, insn; + + gcc_assert (flag_pic); + + /* We use an UNSPEC rather than a LABEL_REF because this label + never appears in the code stream. */ + labelno = GEN_INT (pic_labelno++); + l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + l1 = gen_rtx_CONST (VOIDmode, l1); + + /* On the ARM the PC register contains 'dot + 8' at the time of the + addition, on the Thumb it is 'dot + 4'. */ + offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); + offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx), + UNSPEC_SYMBOL_OFFSET); + offset_rtx = gen_rtx_CONST (Pmode, offset_rtx); + + insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno)); + return insn; +} + +/* Return nonzero if X is valid as an ARM state addressing register. */ +static int +arm_address_register_rtx_p (rtx x, int strict_p) +{ + int regno; + + if (GET_CODE (x) != REG) + return 0; + + regno = REGNO (x); + + if (strict_p) + return ARM_REGNO_OK_FOR_BASE_P (regno); + + return (regno <= LAST_ARM_REGNUM + || regno >= FIRST_PSEUDO_REGISTER + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM); +} + +/* Return TRUE if this rtx is the difference of a symbol and a label, + and will reduce to a PC-relative relocation in the object file. + Expressions like this can be left alone when generating PIC, rather + than forced through the GOT. */ +static int +pcrel_constant_p (rtx x) +{ + if (GET_CODE (x) == MINUS) + return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); + + return FALSE; +} + +/* Return true if X will surely end up in an index register after next + splitting pass. */ +static bool +will_be_in_index_register (const_rtx x) +{ + /* arm.md: calculate_pic_address will split this into a register. */ + return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM); +} + +/* Return nonzero if X is a valid ARM state address operand. */ +int +arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, + int strict_p) +{ + bool use_ldrd; + enum rtx_code code = GET_CODE (x); + + if (arm_address_register_rtx_p (x, strict_p)) + return 1; + + use_ldrd = (TARGET_LDRD + && (mode == DImode + || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); + + if (code == POST_INC || code == PRE_DEC + || ((code == PRE_INC || code == POST_DEC) + && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) + return arm_address_register_rtx_p (XEXP (x, 0), strict_p); + + else if ((code == POST_MODIFY || code == PRE_MODIFY) + && arm_address_register_rtx_p (XEXP (x, 0), strict_p) + && GET_CODE (XEXP (x, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) + { + rtx addend = XEXP (XEXP (x, 1), 1); + + /* Don't allow ldrd post increment by register because it's hard + to fixup invalid register choices. */ + if (use_ldrd + && GET_CODE (x) == POST_MODIFY + && GET_CODE (addend) == REG) + return 0; + + return ((use_ldrd || GET_MODE_SIZE (mode) <= 4) + && arm_legitimate_index_p (mode, addend, outer, strict_p)); + } + + /* After reload constants split into minipools will have addresses + from a LABEL_REF. */ + else if (reload_completed + && (code == LABEL_REF + || (code == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))) + return 1; + + else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) + return 0; + + else if (code == PLUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + return ((arm_address_register_rtx_p (xop0, strict_p) + && ((GET_CODE(xop1) == CONST_INT + && arm_legitimate_index_p (mode, xop1, outer, strict_p)) + || (!strict_p && will_be_in_index_register (xop1)))) + || (arm_address_register_rtx_p (xop1, strict_p) + && arm_legitimate_index_p (mode, xop0, outer, strict_p))); + } + +#if 0 + /* Reload currently can't handle MINUS, so disable this for now */ + else if (GET_CODE (x) == MINUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + return (arm_address_register_rtx_p (xop0, strict_p) + && arm_legitimate_index_p (mode, xop1, outer, strict_p)); + } +#endif + + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && code == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) + && ! (flag_pic + && symbol_mentioned_p (get_pool_constant (x)) + && ! pcrel_constant_p (get_pool_constant (x)))) + return 1; + + return 0; +} + +/* Return nonzero if X is a valid Thumb-2 address operand. */ +static int +thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) +{ + bool use_ldrd; + enum rtx_code code = GET_CODE (x); + + if (arm_address_register_rtx_p (x, strict_p)) + return 1; + + use_ldrd = (TARGET_LDRD + && (mode == DImode + || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); + + if (code == POST_INC || code == PRE_DEC + || ((code == PRE_INC || code == POST_DEC) + && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) + return arm_address_register_rtx_p (XEXP (x, 0), strict_p); + + else if ((code == POST_MODIFY || code == PRE_MODIFY) + && arm_address_register_rtx_p (XEXP (x, 0), strict_p) + && GET_CODE (XEXP (x, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) + { + /* Thumb-2 only has autoincrement by constant. */ + rtx addend = XEXP (XEXP (x, 1), 1); + HOST_WIDE_INT offset; + + if (GET_CODE (addend) != CONST_INT) + return 0; + + offset = INTVAL(addend); + if (GET_MODE_SIZE (mode) <= 4) + return (offset > -256 && offset < 256); + + return (use_ldrd && offset > -1024 && offset < 1024 + && (offset & 3) == 0); + } + + /* After reload constants split into minipools will have addresses + from a LABEL_REF. */ + else if (reload_completed + && (code == LABEL_REF + || (code == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))) + return 1; + + else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) + return 0; + + else if (code == PLUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + return ((arm_address_register_rtx_p (xop0, strict_p) + && (thumb2_legitimate_index_p (mode, xop1, strict_p) + || (!strict_p && will_be_in_index_register (xop1)))) + || (arm_address_register_rtx_p (xop1, strict_p) + && thumb2_legitimate_index_p (mode, xop0, strict_p))); + } + + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && code == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) + && ! (flag_pic + && symbol_mentioned_p (get_pool_constant (x)) + && ! pcrel_constant_p (get_pool_constant (x)))) + return 1; + + return 0; +} + +/* Return nonzero if INDEX is valid for an address index operand in + ARM state. */ +static int +arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, + int strict_p) +{ + HOST_WIDE_INT range; + enum rtx_code code = GET_CODE (index); + + /* Standard coprocessor addressing modes. */ + if (TARGET_HARD_FLOAT + && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK) + && (mode == SFmode || mode == DFmode + || (TARGET_MAVERICK && mode == DImode))) + return (code == CONST_INT && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + /* For quad modes, we restrict the constant offset to be slightly less + than what the instruction format permits. We do this because for + quad mode moves, we will actually decompose them into two separate + double-mode reads or writes. INDEX must therefore be a valid + (double-mode) offset and so should INDEX+8. */ + if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1016 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + /* We have no such constraint on double mode offsets, so we permit the + full range of the instruction format. */ + if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + if (arm_address_register_rtx_p (index, strict_p) + && (GET_MODE_SIZE (mode) <= 4)) + return 1; + + if (mode == DImode || mode == DFmode) + { + if (code == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (index); + + if (TARGET_LDRD) + return val > -256 && val < 256; + else + return val > -4096 && val < 4092; + } + + return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); + } + + if (GET_MODE_SIZE (mode) <= 4 + && ! (arm_arch4 + && (mode == HImode + || mode == HFmode + || (mode == QImode && outer == SIGN_EXTEND)))) + { + if (code == MULT) + { + rtx xiop0 = XEXP (index, 0); + rtx xiop1 = XEXP (index, 1); + + return ((arm_address_register_rtx_p (xiop0, strict_p) + && power_of_two_operand (xiop1, SImode)) + || (arm_address_register_rtx_p (xiop1, strict_p) + && power_of_two_operand (xiop0, SImode))); + } + else if (code == LSHIFTRT || code == ASHIFTRT + || code == ASHIFT || code == ROTATERT) + { + rtx op = XEXP (index, 1); + + return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) + && GET_CODE (op) == CONST_INT + && INTVAL (op) > 0 + && INTVAL (op) <= 31); + } + } + + /* For ARM v4 we may be doing a sign-extend operation during the + load. */ + if (arm_arch4) + { + if (mode == HImode + || mode == HFmode + || (outer == SIGN_EXTEND && mode == QImode)) + range = 256; + else + range = 4096; + } + else + range = (mode == HImode || mode == HFmode) ? 4095 : 4096; + + return (code == CONST_INT + && INTVAL (index) < range + && INTVAL (index) > -range); +} + +/* Return true if OP is a valid index scaling factor for Thumb-2 address + index operand. i.e. 1, 2, 4 or 8. */ +static bool +thumb2_index_mul_operand (rtx op) +{ + HOST_WIDE_INT val; + + if (GET_CODE(op) != CONST_INT) + return false; + + val = INTVAL(op); + return (val == 1 || val == 2 || val == 4 || val == 8); +} + +/* Return nonzero if INDEX is a valid Thumb-2 address index operand. */ +static int +thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p) +{ + enum rtx_code code = GET_CODE (index); + + /* ??? Combine arm and thumb2 coprocessor addressing modes. */ + /* Standard coprocessor addressing modes. */ + if (TARGET_HARD_FLOAT + && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK) + && (mode == SFmode || mode == DFmode + || (TARGET_MAVERICK && mode == DImode))) + return (code == CONST_INT && INTVAL (index) < 1024 + /* Thumb-2 allows only > -256 index range for it's core register + load/stores. Since we allow SF/DF in core registers, we have + to use the intersection between -256~4096 (core) and -1024~1024 + (coprocessor). */ + && INTVAL (index) > -256 + && (INTVAL (index) & 3) == 0); + + if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) + { + /* For DImode assume values will usually live in core regs + and only allow LDRD addressing modes. */ + if (!TARGET_LDRD || mode != DImode) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + } + + /* For quad modes, we restrict the constant offset to be slightly less + than what the instruction format permits. We do this because for + quad mode moves, we will actually decompose them into two separate + double-mode reads or writes. INDEX must therefore be a valid + (double-mode) offset and so should INDEX+8. */ + if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1016 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + /* We have no such constraint on double mode offsets, so we permit the + full range of the instruction format. */ + if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + if (arm_address_register_rtx_p (index, strict_p) + && (GET_MODE_SIZE (mode) <= 4)) + return 1; + + if (mode == DImode || mode == DFmode) + { + if (code == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (index); + /* ??? Can we assume ldrd for thumb2? */ + /* Thumb-2 ldrd only has reg+const addressing modes. */ + /* ldrd supports offsets of +-1020. + However the ldr fallback does not. */ + return val > -256 && val < 256 && (val & 3) == 0; + } + else + return 0; + } + + if (code == MULT) + { + rtx xiop0 = XEXP (index, 0); + rtx xiop1 = XEXP (index, 1); + + return ((arm_address_register_rtx_p (xiop0, strict_p) + && thumb2_index_mul_operand (xiop1)) + || (arm_address_register_rtx_p (xiop1, strict_p) + && thumb2_index_mul_operand (xiop0))); + } + else if (code == ASHIFT) + { + rtx op = XEXP (index, 1); + + return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) + && GET_CODE (op) == CONST_INT + && INTVAL (op) > 0 + && INTVAL (op) <= 3); + } + + return (code == CONST_INT + && INTVAL (index) < 4096 + && INTVAL (index) > -256); +} + +/* Return nonzero if X is valid as a 16-bit Thumb state base register. */ +static int +thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p) +{ + int regno; + + if (GET_CODE (x) != REG) + return 0; + + regno = REGNO (x); + + if (strict_p) + return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode); + + return (regno <= LAST_LO_REGNUM + || regno > LAST_VIRTUAL_REGISTER + || regno == FRAME_POINTER_REGNUM + || (GET_MODE_SIZE (mode) >= 4 + && (regno == STACK_POINTER_REGNUM + || regno >= FIRST_PSEUDO_REGISTER + || x == hard_frame_pointer_rtx + || x == arg_pointer_rtx))); +} + +/* Return nonzero if x is a legitimate index register. This is the case + for any base register that can access a QImode object. */ +inline static int +thumb1_index_register_rtx_p (rtx x, int strict_p) +{ + return thumb1_base_register_rtx_p (x, QImode, strict_p); +} + +/* Return nonzero if x is a legitimate 16-bit Thumb-state address. + + The AP may be eliminated to either the SP or the FP, so we use the + least common denominator, e.g. SImode, and offsets from 0 to 64. + + ??? Verify whether the above is the right approach. + + ??? Also, the FP may be eliminated to the SP, so perhaps that + needs special handling also. + + ??? Look at how the mips16 port solves this problem. It probably uses + better ways to solve some of these problems. + + Although it is not incorrect, we don't accept QImode and HImode + addresses based on the frame pointer or arg pointer until the + reload pass starts. This is so that eliminating such addresses + into stack based ones won't produce impossible code. */ +static int +thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) +{ + /* ??? Not clear if this is right. Experiment. */ + if (GET_MODE_SIZE (mode) < 4 + && !(reload_in_progress || reload_completed) + && (reg_mentioned_p (frame_pointer_rtx, x) + || reg_mentioned_p (arg_pointer_rtx, x) + || reg_mentioned_p (virtual_incoming_args_rtx, x) + || reg_mentioned_p (virtual_outgoing_args_rtx, x) + || reg_mentioned_p (virtual_stack_dynamic_rtx, x) + || reg_mentioned_p (virtual_stack_vars_rtx, x))) + return 0; + + /* Accept any base register. SP only in SImode or larger. */ + else if (thumb1_base_register_rtx_p (x, mode, strict_p)) + return 1; + + /* This is PC relative data before arm_reorg runs. */ + else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) + && GET_CODE (x) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic) + return 1; + + /* This is PC relative data after arm_reorg runs. */ + else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) + && reload_completed + && (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))) + return 1; + + /* Post-inc indexing only supported for SImode and larger. */ + else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4 + && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)) + return 1; + + else if (GET_CODE (x) == PLUS) + { + /* REG+REG address can be any two index registers. */ + /* We disallow FRAME+REG addressing since we know that FRAME + will be replaced with STACK, and SP relative addressing only + permits SP+OFFSET. */ + if (GET_MODE_SIZE (mode) <= 4 + && XEXP (x, 0) != frame_pointer_rtx + && XEXP (x, 1) != frame_pointer_rtx + && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) + && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) + || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) + return 1; + + /* REG+const has 5-7 bit offset for non-SP registers. */ + else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) + || XEXP (x, 0) == arg_pointer_rtx) + && GET_CODE (XEXP (x, 1)) == CONST_INT + && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) + return 1; + + /* REG+const has 10-bit offset for SP, but only SImode and + larger is supported. */ + /* ??? Should probably check for DI/DFmode overflow here + just like GO_IF_LEGITIMATE_OFFSET does. */ + else if (GET_CODE (XEXP (x, 0)) == REG + && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM + && GET_MODE_SIZE (mode) >= 4 + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) >= 0 + && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024 + && (INTVAL (XEXP (x, 1)) & 3) == 0) + return 1; + + else if (GET_CODE (XEXP (x, 0)) == REG + && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM + || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM + || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER + && REGNO (XEXP (x, 0)) + <= LAST_VIRTUAL_POINTER_REGISTER)) + && GET_MODE_SIZE (mode) >= 4 + && GET_CODE (XEXP (x, 1)) == CONST_INT + && (INTVAL (XEXP (x, 1)) & 3) == 0) + return 1; + } + + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && GET_MODE_SIZE (mode) == 4 + && GET_CODE (x) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) + && ! (flag_pic + && symbol_mentioned_p (get_pool_constant (x)) + && ! pcrel_constant_p (get_pool_constant (x)))) + return 1; + + return 0; +} + +/* Return nonzero if VAL can be used as an offset in a Thumb-state address + instruction of mode MODE. */ +int +thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val) +{ + switch (GET_MODE_SIZE (mode)) + { + case 1: + return val >= 0 && val < 32; + + case 2: + return val >= 0 && val < 64 && (val & 1) == 0; + + default: + return (val >= 0 + && (val + GET_MODE_SIZE (mode)) <= 128 + && (val & 3) == 0); + } +} + +bool +arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p) +{ + if (TARGET_ARM) + return arm_legitimate_address_outer_p (mode, x, SET, strict_p); + else if (TARGET_THUMB2) + return thumb2_legitimate_address_p (mode, x, strict_p); + else /* if (TARGET_THUMB1) */ + return thumb1_legitimate_address_p (mode, x, strict_p); +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY(()) rtx tls_get_addr_libfunc; + +static rtx +get_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +static rtx +arm_load_tp (rtx target) +{ + if (!target) + target = gen_reg_rtx (SImode); + + if (TARGET_HARD_TP) + { + /* Can return in any reg. */ + emit_insn (gen_load_tp_hard (target)); + } + else + { + /* Always returned in r0. Immediately copy the result into a pseudo, + otherwise other uses of r0 (e.g. setting up function arguments) may + clobber the value. */ + + rtx tmp; + + emit_insn (gen_load_tp_soft ()); + + tmp = gen_rtx_REG (SImode, 0); + emit_move_insn (target, tmp); + } + return target; +} + +static rtx +load_tls_operand (rtx x, rtx reg) +{ + rtx tmp; + + if (reg == NULL_RTX) + reg = gen_reg_rtx (SImode); + + tmp = gen_rtx_CONST (SImode, x); + + emit_move_insn (reg, tmp); + + return reg; +} + +static rtx +arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc) +{ + rtx insns, label, labelno, sum; + + start_sequence (); + + labelno = GEN_INT (pic_labelno++); + label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + label = gen_rtx_CONST (VOIDmode, label); + + sum = gen_rtx_UNSPEC (Pmode, + gen_rtvec (4, x, GEN_INT (reloc), label, + GEN_INT (TARGET_ARM ? 8 : 4)), + UNSPEC_TLS); + reg = load_tls_operand (sum, reg); + + if (TARGET_ARM) + emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); + else if (TARGET_THUMB2) + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + else /* TARGET_THUMB1 */ + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + + *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */ + Pmode, 1, reg, Pmode); + + insns = get_insns (); + end_sequence (); + + return insns; +} + +rtx +legitimize_tls_address (rtx x, rtx reg) +{ + rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend; + unsigned int model = SYMBOL_REF_TLS_MODEL (x); + + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32); + dest = gen_reg_rtx (Pmode); + emit_libcall_block (insns, dest, ret, x); + return dest; + + case TLS_MODEL_LOCAL_DYNAMIC: + insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32); + + /* Attach a unique REG_EQUIV, to allow the RTL optimizers to + share the LDM result with other LD model accesses. */ + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), + UNSPEC_TLS); + dest = gen_reg_rtx (Pmode); + emit_libcall_block (insns, dest, ret, eqv); + + /* Load the addend. */ + addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)), + UNSPEC_TLS); + addend = force_reg (SImode, gen_rtx_CONST (SImode, addend)); + return gen_rtx_PLUS (Pmode, dest, addend); + + case TLS_MODEL_INITIAL_EXEC: + labelno = GEN_INT (pic_labelno++); + label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + label = gen_rtx_CONST (VOIDmode, label); + sum = gen_rtx_UNSPEC (Pmode, + gen_rtvec (4, x, GEN_INT (TLS_IE32), label, + GEN_INT (TARGET_ARM ? 8 : 4)), + UNSPEC_TLS); + reg = load_tls_operand (sum, reg); + + if (TARGET_ARM) + emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); + else if (TARGET_THUMB2) + emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); + else + { + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + emit_move_insn (reg, gen_const_mem (SImode, reg)); + } + + tp = arm_load_tp (NULL_RTX); + + return gen_rtx_PLUS (Pmode, tp, reg); + + case TLS_MODEL_LOCAL_EXEC: + tp = arm_load_tp (NULL_RTX); + + reg = gen_rtx_UNSPEC (Pmode, + gen_rtvec (2, x, GEN_INT (TLS_LE32)), + UNSPEC_TLS); + reg = force_reg (SImode, gen_rtx_CONST (SImode, reg)); + + return gen_rtx_PLUS (Pmode, tp, reg); + + default: + abort (); + } +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. */ +rtx +arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) +{ + if (!TARGET_ARM) + { + /* TODO: legitimize_address for Thumb2. */ + if (TARGET_THUMB2) + return x; + return thumb_legitimize_address (x, orig_x, mode); + } + + if (arm_tls_symbol_p (x)) + return legitimize_tls_address (x, NULL_RTX); + + if (GET_CODE (x) == PLUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0)) + xop0 = force_reg (SImode, xop0); + + if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1)) + xop1 = force_reg (SImode, xop1); + + if (ARM_BASE_REGISTER_RTX_P (xop0) + && GET_CODE (xop1) == CONST_INT) + { + HOST_WIDE_INT n, low_n; + rtx base_reg, val; + n = INTVAL (xop1); + + /* VFP addressing modes actually allow greater offsets, but for + now we just stick with the lowest common denominator. */ + if (mode == DImode + || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode)) + { + low_n = n & 0x0f; + n &= ~0x0f; + if (low_n > 4) + { + n += 16; + low_n -= 16; + } + } + else + { + low_n = ((mode) == TImode ? 0 + : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff)); + n -= low_n; + } + + base_reg = gen_reg_rtx (SImode); + val = force_operand (plus_constant (xop0, n), NULL_RTX); + emit_move_insn (base_reg, val); + x = plus_constant (base_reg, low_n); + } + else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) + x = gen_rtx_PLUS (SImode, xop0, xop1); + } + + /* XXX We don't allow MINUS any more -- see comment in + arm_legitimate_address_outer_p (). */ + else if (GET_CODE (x) == MINUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + if (CONSTANT_P (xop0)) + xop0 = force_reg (SImode, xop0); + + if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1)) + xop1 = force_reg (SImode, xop1); + + if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) + x = gen_rtx_MINUS (SImode, xop0, xop1); + } + + /* Make sure to take full advantage of the pre-indexed addressing mode + with absolute addresses which often allows for the base register to + be factorized for multiple adjacent memory references, and it might + even allows for the mini pool to be avoided entirely. */ + else if (GET_CODE (x) == CONST_INT && optimize > 0) + { + unsigned int bits; + HOST_WIDE_INT mask, base, index; + rtx base_reg; + + /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only + use a 8-bit index. So let's use a 12-bit index for SImode only and + hope that arm_gen_constant will enable ldrb to use more bits. */ + bits = (mode == SImode) ? 12 : 8; + mask = (1 << bits) - 1; + base = INTVAL (x) & ~mask; + index = INTVAL (x) & mask; + if (bit_count (base & 0xffffffff) > (32 - bits)/2) + { + /* It'll most probably be more efficient to generate the base + with more bits set and use a negative index instead. */ + base |= mask; + index -= mask; + } + base_reg = force_reg (SImode, GEN_INT (base)); + x = plus_constant (base_reg, index); + } + + if (flag_pic) + { + /* We need to find and carefully transform any SYMBOL and LABEL + references; so go back to the original address expression. */ + rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); + + if (new_x != orig_x) + x = new_x; + } + + return x; +} + + +/* Try machine-dependent ways of modifying an illegitimate Thumb address + to be legitimate. If we find one, return the new, valid address. */ +rtx +thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) +{ + if (arm_tls_symbol_p (x)) + return legitimize_tls_address (x, NULL_RTX); + + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == CONST_INT + && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode) + || INTVAL (XEXP (x, 1)) < 0)) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + HOST_WIDE_INT offset = INTVAL (xop1); + + /* Try and fold the offset into a biasing of the base register and + then offsetting that. Don't do this when optimizing for space + since it can cause too many CSEs. */ + if (optimize_size && offset >= 0 + && offset < 256 + 31 * GET_MODE_SIZE (mode)) + { + HOST_WIDE_INT delta; + + if (offset >= 256) + delta = offset - (256 - GET_MODE_SIZE (mode)); + else if (offset < 32 * GET_MODE_SIZE (mode) + 8) + delta = 31 * GET_MODE_SIZE (mode); + else + delta = offset & (~31 * GET_MODE_SIZE (mode)); + + xop0 = force_operand (plus_constant (xop0, offset - delta), + NULL_RTX); + x = plus_constant (xop0, delta); + } + else if (offset < 0 && offset > -256) + /* Small negative offsets are best done with a subtract before the + dereference, forcing these into a register normally takes two + instructions. */ + x = force_operand (x, NULL_RTX); + else + { + /* For the remaining cases, force the constant into a register. */ + xop1 = force_reg (SImode, xop1); + x = gen_rtx_PLUS (SImode, xop0, xop1); + } + } + else if (GET_CODE (x) == PLUS + && s_register_operand (XEXP (x, 1), SImode) + && !s_register_operand (XEXP (x, 0), SImode)) + { + rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX); + + x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1)); + } + + if (flag_pic) + { + /* We need to find and carefully transform any SYMBOL and LABEL + references; so go back to the original address expression. */ + rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); + + if (new_x != orig_x) + x = new_x; + } + + return x; +} + +bool +arm_legitimize_reload_address (rtx *p, + enum machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*p) == PLUS + && GET_CODE (XEXP (*p, 0)) == REG + && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) + && GET_CODE (XEXP (*p, 1)) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); + HOST_WIDE_INT low, high; + + if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT)) + low = ((val & 0xf) ^ 0x8) - 0x8; + else if (TARGET_MAVERICK && TARGET_HARD_FLOAT) + /* Need to be careful, -256 is not a valid offset. */ + low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); + else if (mode == SImode + || (mode == SFmode && TARGET_SOFT_FLOAT) + || ((mode == HImode || mode == QImode) && ! arm_arch4)) + /* Need to be careful, -4096 is not a valid offset. */ + low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff); + else if ((mode == HImode || mode == QImode) && arm_arch4) + /* Need to be careful, -256 is not a valid offset. */ + low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); + else if (GET_MODE_CLASS (mode) == MODE_FLOAT + && TARGET_HARD_FLOAT && TARGET_FPA) + /* Need to be careful, -1024 is not a valid offset. */ + low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff); + else + return false; + + high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff) + ^ (unsigned HOST_WIDE_INT) 0x80000000) + - (unsigned HOST_WIDE_INT) 0x80000000); + /* Check for overflow or zero */ + if (low == 0 || high == 0 || (high + low != val)) + return false; + + /* Reload the high part into a base reg; leave the low part + in the mem. */ + *p = gen_rtx_PLUS (GET_MODE (*p), + gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0), + GEN_INT (high)), + GEN_INT (low)); + push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, + MODE_BASE_REG_CLASS (mode), GET_MODE (*p), + VOIDmode, 0, 0, opnum, (enum reload_type) type); + return true; + } + + return false; +} + +rtx +thumb_legitimize_reload_address (rtx *x_p, + enum machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + rtx x = *x_p; + + if (GET_CODE (x) == PLUS + && GET_MODE_SIZE (mode) < 4 + && REG_P (XEXP (x, 0)) + && XEXP (x, 0) == stack_pointer_rtx + && GET_CODE (XEXP (x, 1)) == CONST_INT + && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) + { + rtx orig_x = x; + + x = copy_rtx (x); + push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), + Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); + return x; + } + + /* If both registers are hi-regs, then it's better to reload the + entire expression rather than each register individually. That + only requires one reload register rather than two. */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && REG_P (XEXP (x, 1)) + && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode) + && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode)) + { + rtx orig_x = x; + + x = copy_rtx (x); + push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), + Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); + return x; + } + + return NULL; +} + +/* Test for various thread-local symbols. */ + +/* Return TRUE if X is a thread-local symbol. */ + +static bool +arm_tls_symbol_p (rtx x) +{ + if (! TARGET_HAVE_TLS) + return false; + + if (GET_CODE (x) != SYMBOL_REF) + return false; + + return SYMBOL_REF_TLS_MODEL (x) != 0; +} + +/* Helper for arm_tls_referenced_p. */ + +static int +arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*x) == SYMBOL_REF) + return SYMBOL_REF_TLS_MODEL (*x) != 0; + + /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are + TLS offsets, not real symbol references. */ + if (GET_CODE (*x) == UNSPEC + && XINT (*x, 1) == UNSPEC_TLS) + return -1; + + return 0; +} + +/* Return TRUE if X contains any TLS symbol references. */ + +bool +arm_tls_referenced_p (rtx x) +{ + if (! TARGET_HAVE_TLS) + return false; + + return for_each_rtx (&x, arm_tls_operand_p_1, NULL); +} + +/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + +bool +arm_cannot_force_const_mem (rtx x) +{ + rtx base, offset; + + if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + { + split_const (x, &base, &offset); + if (GET_CODE (base) == SYMBOL_REF + && !offset_within_block_p (base, INTVAL (offset))) + return true; + } + return arm_tls_referenced_p (x); +} + +#define REG_OR_SUBREG_REG(X) \ + (GET_CODE (X) == REG \ + || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG)) + +#define REG_OR_SUBREG_RTX(X) \ + (GET_CODE (X) == REG ? (X) : SUBREG_REG (X)) + +static inline int +thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) +{ + enum machine_mode mode = GET_MODE (x); + int total; + + switch (code) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + case PLUS: + case MINUS: + case COMPARE: + case NEG: + case NOT: + return COSTS_N_INSNS (1); + + case MULT: + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + int cycles = 0; + unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); + + while (i) + { + i >>= 2; + cycles++; + } + return COSTS_N_INSNS (2) + cycles; + } + return COSTS_N_INSNS (1) + 16; + + case SET: + return (COSTS_N_INSNS (1) + + 4 * ((GET_CODE (SET_SRC (x)) == MEM) + + GET_CODE (SET_DEST (x)) == MEM)); + + case CONST_INT: + if (outer == SET) + { + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) + return 0; + if (thumb_shiftable_const (INTVAL (x))) + return COSTS_N_INSNS (2); + return COSTS_N_INSNS (3); + } + else if ((outer == PLUS || outer == COMPARE) + && INTVAL (x) < 256 && INTVAL (x) > -256) + return 0; + else if ((outer == IOR || outer == XOR || outer == AND) + && INTVAL (x) < 256 && INTVAL (x) >= -256) + return COSTS_N_INSNS (1); + else if (outer == AND) + { + int i; + /* This duplicates the tests in the andsi3 expander. */ + for (i = 9; i <= 31; i++) + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) + return COSTS_N_INSNS (2); + } + else if (outer == ASHIFT || outer == ASHIFTRT + || outer == LSHIFTRT) + return 0; + return COSTS_N_INSNS (2); + + case CONST: + case CONST_DOUBLE: + case LABEL_REF: + case SYMBOL_REF: + return COSTS_N_INSNS (3); + + case UDIV: + case UMOD: + case DIV: + case MOD: + return 100; + + case TRUNCATE: + return 99; + + case AND: + case XOR: + case IOR: + /* XXX guess. */ + return 8; + + case MEM: + /* XXX another guess. */ + /* Memory costs quite a lot for the first word, but subsequent words + load at the equivalent of a single insn each. */ + return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + ? 4 : 0)); + + case IF_THEN_ELSE: + /* XXX a guess. */ + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + return 14; + return 2; + + case SIGN_EXTEND: + case ZERO_EXTEND: + total = mode == DImode ? COSTS_N_INSNS (1) : 0; + total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code); + + if (mode == SImode) + return total; + + if (arm_arch6) + return total + COSTS_N_INSNS (1); + + /* Assume a two-shift sequence. Increase the cost slightly so + we prefer actual shifts over an extend operation. */ + return total + 1 + COSTS_N_INSNS (2); + + default: + return 99; + } +} + +static inline bool +arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + enum rtx_code subcode; + rtx operand; + enum rtx_code code = GET_CODE (x); + *total = 0; + + switch (code) + { + case MEM: + /* Memory costs quite a lot for the first word, but subsequent words + load at the equivalent of a single insn each. */ + *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); + return true; + + case DIV: + case MOD: + case UDIV: + case UMOD: + if (TARGET_HARD_FLOAT && mode == SFmode) + *total = COSTS_N_INSNS (2); + else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) + *total = COSTS_N_INSNS (4); + else + *total = COSTS_N_INSNS (20); + return false; + + case ROTATE: + if (GET_CODE (XEXP (x, 1)) == REG) + *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */ + else if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total = rtx_cost (XEXP (x, 1), code, speed); + + /* Fall through */ + case ROTATERT: + if (mode != SImode) + { + *total += COSTS_N_INSNS (4); + return true; + } + + /* Fall through */ + case ASHIFT: case LSHIFTRT: case ASHIFTRT: + *total += rtx_cost (XEXP (x, 0), code, speed); + if (mode == DImode) + { + *total += COSTS_N_INSNS (3); + return true; + } + + *total += COSTS_N_INSNS (1); + /* Increase the cost of complex shifts because they aren't any faster, + and reduce dual issue opportunities. */ + if (arm_tune_cortex_a9 + && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT) + ++*total; + + return true; + + case MINUS: + if (mode == DImode) + { + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + if (GET_CODE (XEXP (x, 0)) == CONST_INT + && const_ok_for_arm (INTVAL (XEXP (x, 0)))) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_arm (INTVAL (XEXP (x, 1)))) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE + && arm_const_double_rtx (XEXP (x, 0))) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE + && arm_const_double_rtx (XEXP (x, 1))) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } + *total = COSTS_N_INSNS (20); + return false; + } + + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == CONST_INT + && const_ok_for_arm (INTVAL (XEXP (x, 0)))) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + subcode = GET_CODE (XEXP (x, 1)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); + return true; + } + + /* A shift as a part of RSB costs no more than RSB itself. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed); + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + if (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); + return true; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); + if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG + && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM) + *total += COSTS_N_INSNS (1); + + return true; + } + + /* Fall through */ + + case PLUS: + if (code == PLUS && arm_arch6 && mode == SImode + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (1); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)), + speed); + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + } + + /* MLA: All arguments must be registers. We filter out + multiplication by a power of two, so that we fall down into + the code below. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + /* The cost comes from the cost of the multiply. */ + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE + && arm_const_double_rtx (XEXP (x, 1))) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } + + *total = COSTS_N_INSNS (20); + return false; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed); + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG + && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM) + *total += COSTS_N_INSNS (1); + return true; + } + + /* Fall through */ + + case AND: case XOR: case IOR: + + /* Normally the frame registers will be spilt into reg+const during + reload, so it is a bad idea to combine them with other instructions, + since then they might not be moved outside of loops. As a compromise + we allow integration with ops that have a constant as their second + operand. */ + if (REG_OR_SUBREG_REG (XEXP (x, 0)) + && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) + && GET_CODE (XEXP (x, 1)) != CONST_INT) + *total = COSTS_N_INSNS (1); + + if (mode == DImode) + { + *total += COSTS_N_INSNS (2); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + return false; + } + + *total += COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + if (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + if (subcode == UMIN || subcode == UMAX + || subcode == SMIN || subcode == SMAX) + { + *total = COSTS_N_INSNS (3); + return true; + } + + return false; + + case MULT: + /* This should have been handled by the CPU specific routines. */ + gcc_unreachable (); + + case TRUNCATE: + if (arm_arch3m && mode == SImode + && GET_CODE (XEXP (x, 0)) == LSHIFTRT + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) + == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))) + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND + || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND)) + { + *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed); + return true; + } + *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */ + return false; + + case NEG: + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + *total = COSTS_N_INSNS (2); + return false; + } + + /* Fall through */ + case NOT: + *total = COSTS_N_INSNS (ARM_NUM_REGS(mode)); + if (mode == SImode && code == NOT) + { + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT + || (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))) + { + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + /* Register shifts cost an extra cycle. */ + if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) + *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1), + subcode, speed); + return true; + } + } + + return false; + + case IF_THEN_ELSE: + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + { + *total = COSTS_N_INSNS (4); + return true; + } + + operand = XEXP (x, 0); + + if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE) + && GET_CODE (XEXP (operand, 0)) == REG + && REGNO (XEXP (operand, 0)) == CC_REGNUM)) + *total += COSTS_N_INSNS (1); + *total += (rtx_cost (XEXP (x, 1), code, speed) + + rtx_cost (XEXP (x, 2), code, speed)); + return true; + + case NE: + if (mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); + return true; + } + goto scc_insn; + + case GE: + if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM) + && mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); + return true; + } + goto scc_insn; + + case LT: + if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM) + && mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); + return true; + } + goto scc_insn; + + case EQ: + case GT: + case LE: + case GEU: + case LTU: + case GTU: + case LEU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNLT: + case UNGT: + case UNLE: + scc_insn: + /* SCC insns. In the case where the comparison has already been + performed, then they cost 2 instructions. Otherwise they need + an additional comparison before them. */ + *total = COSTS_N_INSNS (2); + if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM) + { + return true; + } + + /* Fall through */ + case COMPARE: + if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM) + { + *total = 0; + return true; + } + + *total += COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + if (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total += rtx_cost (XEXP (x, 1), code, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); + return true; + } + + return false; + + case UMIN: + case UMAX: + case SMIN: + case SMAX: + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed); + if (GET_CODE (XEXP (x, 1)) != CONST_INT + || !const_ok_for_arm (INTVAL (XEXP (x, 1)))) + *total += rtx_cost (XEXP (x, 1), code, speed); + return true; + + case ABS: + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + *total = COSTS_N_INSNS (20); + return false; + } + *total = COSTS_N_INSNS (1); + if (mode == DImode) + *total += COSTS_N_INSNS (3); + return false; + + case SIGN_EXTEND: + case ZERO_EXTEND: + *total = 0; + if (GET_MODE_CLASS (mode) == MODE_INT) + { + rtx op = XEXP (x, 0); + enum machine_mode opmode = GET_MODE (op); + + if (mode == DImode) + *total += COSTS_N_INSNS (1); + + if (opmode != SImode) + { + if (MEM_P (op)) + { + /* If !arm_arch4, we use one of the extendhisi2_mem + or movhi_bytes patterns for HImode. For a QImode + sign extension, we first zero-extend from memory + and then perform a shift sequence. */ + if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND)) + *total += COSTS_N_INSNS (2); + } + else if (arm_arch6) + *total += COSTS_N_INSNS (1); + + /* We don't have the necessary insn, so we need to perform some + other operation. */ + else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode) + /* An and with constant 255. */ + *total += COSTS_N_INSNS (1); + else + /* A shift sequence. Increase costs slightly to avoid + combining two shifts into an extend operation. */ + *total += COSTS_N_INSNS (2) + 1; + } + + return false; + } + + switch (GET_MODE (XEXP (x, 0))) + { + case V8QImode: + case V4HImode: + case V2SImode: + case V4QImode: + case V2HImode: + *total = COSTS_N_INSNS (1); + return false; + + default: + gcc_unreachable (); + } + gcc_unreachable (); + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed); + return true; + + case CONST_INT: + if (const_ok_for_arm (INTVAL (x)) + || const_ok_for_arm (~INTVAL (x))) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX, + INTVAL (x), NULL_RTX, + NULL_RTX, 0, 0)); + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (3); + return true; + + case HIGH: + *total = COSTS_N_INSNS (1); + return true; + + case LO_SUM: + *total = COSTS_N_INSNS (1); + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + + case CONST_DOUBLE: + if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) + && (mode == SFmode || !TARGET_VFP_SINGLE)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; + + case UNSPEC: + /* We cost this as high as our memory costs to allow this to + be hoisted from loops. */ + if (XINT (x, 1) == UNSPEC_PIC_UNIFIED) + { + *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); + } + return true; + + default: + *total = COSTS_N_INSNS (4); + return false; + } +} + +/* Estimates the size cost of thumb1 instructions. + For now most of the code is copied from thumb1_rtx_costs. We need more + fine grain tuning when we have more related test cases. */ +static inline int +thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) +{ + enum machine_mode mode = GET_MODE (x); + + switch (code) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + case PLUS: + case MINUS: + case COMPARE: + case NEG: + case NOT: + return COSTS_N_INSNS (1); + + case MULT: + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + /* Thumb1 mul instruction can't operate on const. We must Load it + into a register first. */ + int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); + return COSTS_N_INSNS (1) + const_size; + } + return COSTS_N_INSNS (1); + + case SET: + return (COSTS_N_INSNS (1) + + 4 * ((GET_CODE (SET_SRC (x)) == MEM) + + GET_CODE (SET_DEST (x)) == MEM)); + + case CONST_INT: + if (outer == SET) + { + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) + return COSTS_N_INSNS (1); + /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ + if (INTVAL (x) >= -255 && INTVAL (x) <= -1) + return COSTS_N_INSNS (2); + /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ + if (thumb_shiftable_const (INTVAL (x))) + return COSTS_N_INSNS (2); + return COSTS_N_INSNS (3); + } + else if ((outer == PLUS || outer == COMPARE) + && INTVAL (x) < 256 && INTVAL (x) > -256) + return 0; + else if ((outer == IOR || outer == XOR || outer == AND) + && INTVAL (x) < 256 && INTVAL (x) >= -256) + return COSTS_N_INSNS (1); + else if (outer == AND) + { + int i; + /* This duplicates the tests in the andsi3 expander. */ + for (i = 9; i <= 31; i++) + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) + return COSTS_N_INSNS (2); + } + else if (outer == ASHIFT || outer == ASHIFTRT + || outer == LSHIFTRT) + return 0; + return COSTS_N_INSNS (2); + + case CONST: + case CONST_DOUBLE: + case LABEL_REF: + case SYMBOL_REF: + return COSTS_N_INSNS (3); + + case UDIV: + case UMOD: + case DIV: + case MOD: + return 100; + + case TRUNCATE: + return 99; + + case AND: + case XOR: + case IOR: + /* XXX guess. */ + return 8; + + case MEM: + /* XXX another guess. */ + /* Memory costs quite a lot for the first word, but subsequent words + load at the equivalent of a single insn each. */ + return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + ? 4 : 0)); + + case IF_THEN_ELSE: + /* XXX a guess. */ + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + return 14; + return 2; + + case ZERO_EXTEND: + /* XXX still guessing. */ + switch (GET_MODE (XEXP (x, 0))) + { + case QImode: + return (1 + (mode == DImode ? 4 : 0) + + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + + case HImode: + return (4 + (mode == DImode ? 4 : 0) + + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + + case SImode: + return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + + default: + return 99; + } + + default: + return 99; + } +} + +/* RTX costs when optimizing for size. */ +static bool +arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total) +{ + enum machine_mode mode = GET_MODE (x); + if (TARGET_THUMB1) + { + *total = thumb1_size_rtx_costs (x, code, outer_code); + return true; + } + + /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ + switch (code) + { + case MEM: + /* A memory access costs 1 insn if the mode is small, or the address is + a single register, otherwise it costs one insn per word. */ + if (REG_P (XEXP (x, 0))) + *total = COSTS_N_INSNS (1); + else if (flag_pic + && GET_CODE (XEXP (x, 0)) == PLUS + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) + /* This will be split into two instructions. + See arm.md:calculate_pic_address. */ + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return true; + + case DIV: + case MOD: + case UDIV: + case UMOD: + /* Needs a libcall, so it costs about this. */ + *total = COSTS_N_INSNS (2); + return false; + + case ROTATE: + if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false); + return true; + } + /* Fall through */ + case ROTATERT: + case ASHIFT: + case LSHIFTRT: + case ASHIFTRT: + if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT) + { + *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false); + return true; + } + else if (mode == SImode) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false); + /* Slightly disparage register shifts, but not by much. */ + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += 1 + rtx_cost (XEXP (x, 1), code, false); + return true; + } + + /* Needs a libcall. */ + *total = COSTS_N_INSNS (2); + return false; + + case MINUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; + } + + if (mode == SImode) + { + enum rtx_code subcode0 = GET_CODE (XEXP (x, 0)); + enum rtx_code subcode1 = GET_CODE (XEXP (x, 1)); + + if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT + || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT + || subcode1 == ROTATE || subcode1 == ROTATERT + || subcode1 == ASHIFT || subcode1 == LSHIFTRT + || subcode1 == ASHIFTRT) + { + /* It's just the cost of the two operands. */ + *total = 0; + return false; + } + + *total = COSTS_N_INSNS (1); + return false; + } + + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return false; + + case PLUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; + } + + /* A shift as a part of ADD costs nothing. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false); + *total += rtx_cost (XEXP (x, 1), code, false); + return true; + } + + /* Fall through */ + case AND: case XOR: case IOR: + if (mode == SImode) + { + enum rtx_code subcode = GET_CODE (XEXP (x, 0)); + + if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT + || subcode == LSHIFTRT || subcode == ASHIFTRT + || (code == AND && subcode == NOT)) + { + /* It's just the cost of the two operands. */ + *total = 0; + return false; + } + } + + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return false; + + case MULT: + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return false; + + case NEG: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; + } + + /* Fall through */ + case NOT: + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + + return false; + + case IF_THEN_ELSE: + *total = 0; + return false; + + case COMPARE: + if (cc_register (XEXP (x, 0), VOIDmode)) + * total = 0; + else + *total = COSTS_N_INSNS (1); + return false; + + case ABS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); + return false; + + case SIGN_EXTEND: + case ZERO_EXTEND: + return arm_rtx_costs_1 (x, outer_code, total, 0); + + case CONST_INT: + if (const_ok_for_arm (INTVAL (x))) + /* A multiplication by a constant requires another instruction + to load the constant to a register. */ + *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT) + ? 1 : 0); + else if (const_ok_for_arm (~INTVAL (x))) + *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1); + else if (const_ok_for_arm (-INTVAL (x))) + { + if (outer_code == COMPARE || outer_code == PLUS + || outer_code == MINUS) + *total = 0; + else + *total = COSTS_N_INSNS (1); + } + else + *total = COSTS_N_INSNS (2); + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (2); + return true; + + case CONST_DOUBLE: + *total = COSTS_N_INSNS (4); + return true; + + case HIGH: + case LO_SUM: + /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the + cost of these slightly. */ + *total = COSTS_N_INSNS (1) + 1; + return true; + + default: + if (mode != VOIDmode) + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + else + *total = COSTS_N_INSNS (4); /* How knows? */ + return false; + } +} + +/* RTX costs when optimizing for size. */ +static bool +arm_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) +{ + if (!speed) + return arm_size_rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, total); + else + return current_tune->rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, + total, speed); +} + +/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not + supported on any "slowmul" cores, so it can be ignored. */ + +static bool +arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB) + { + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + + switch (code) + { + case MULT: + if (GET_MODE_CLASS (mode) == MODE_FLOAT + || mode == DImode) + { + *total = COSTS_N_INSNS (20); + return false; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) + & (unsigned HOST_WIDE_INT) 0xffffffff); + int cost, const_ok = const_ok_for_arm (i); + int j, booth_unit_size; + + /* Tune as appropriate. */ + cost = const_ok ? 4 : 8; + booth_unit_size = 2; + for (j = 0; i && j < 32; j += booth_unit_size) + { + i >>= booth_unit_size; + cost++; + } + + *total = COSTS_N_INSNS (cost); + *total += rtx_cost (XEXP (x, 0), code, speed); + return true; + } + + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed);; + } +} + + +/* RTX cost for cores with a fast multiply unit (M variants). */ + +static bool +arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB1) + { + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + + /* ??? should thumb2 use different costs? */ + switch (code) + { + case MULT: + /* There is no point basing this on the tuning, since it is always the + fast variant if it exists at all. */ + if (mode == DImode + && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS(2); + return false; + } + + + if (mode == DImode) + { + *total = COSTS_N_INSNS (5); + return false; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) + & (unsigned HOST_WIDE_INT) 0xffffffff); + int cost, const_ok = const_ok_for_arm (i); + int j, booth_unit_size; + + /* Tune as appropriate. */ + cost = const_ok ? 4 : 8; + booth_unit_size = 8; + for (j = 0; i && j < 32; j += booth_unit_size) + { + i >>= booth_unit_size; + cost++; + } + + *total = COSTS_N_INSNS(cost); + return false; + } + + if (mode == SImode) + { + *total = COSTS_N_INSNS (4); + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + } + + /* Requires a lib call */ + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed); + } +} + + +/* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, + so it can be ignored. */ + +static bool +arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB) + { + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + + switch (code) + { + case COMPARE: + if (GET_CODE (XEXP (x, 0)) != MULT) + return arm_rtx_costs_1 (x, outer_code, total, speed); + + /* A COMPARE of a MULT is slow on XScale; the muls instruction + will stall until the multiplication is complete. */ + *total = COSTS_N_INSNS (3); + return false; + + case MULT: + /* There is no point basing this on the tuning, since it is always the + fast variant if it exists at all. */ + if (mode == DImode + && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (2); + return false; + } + + + if (mode == DImode) + { + *total = COSTS_N_INSNS (5); + return false; + } + + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + /* If operand 1 is a constant we can more accurately + calculate the cost of the multiply. The multiplier can + retire 15 bits on the first cycle and a further 12 on the + second. We do, of course, have to load the constant into + a register first. */ + unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); + /* There's a general overhead of one cycle. */ + int cost = 1; + unsigned HOST_WIDE_INT masked_const; + + if (i & 0x80000000) + i = ~i; + + i &= (unsigned HOST_WIDE_INT) 0xffffffff; + + masked_const = i & 0xffff8000; + if (masked_const != 0) + { + cost++; + masked_const = i & 0xf8000000; + if (masked_const != 0) + cost++; + } + *total = COSTS_N_INSNS (cost); + return false; + } + + if (mode == SImode) + { + *total = COSTS_N_INSNS (3); + return false; + } + + /* Requires a lib call */ + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed); + } +} + + +/* RTX costs for 9e (and later) cores. */ + +static bool +arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB1) + { + switch (code) + { + case MULT: + *total = COSTS_N_INSNS (3); + return true; + + default: + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + } + + switch (code) + { + case MULT: + /* There is no point basing this on the tuning, since it is always the + fast variant if it exists at all. */ + if (mode == DImode + && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (2); + return false; + } + + + if (mode == DImode) + { + *total = COSTS_N_INSNS (5); + return false; + } + + if (mode == SImode) + { + *total = COSTS_N_INSNS (2); + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + } + + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed); + } +} +/* All address computations that can be done are free, but rtx cost returns + the same for practically all of them. So we weight the different types + of address here in the order (most pref first): + PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */ +static inline int +arm_arm_address_cost (rtx x) +{ + enum rtx_code c = GET_CODE (x); + + if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC) + return 0; + if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) + return 10; + + if (c == PLUS) + { + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + return 2; + + if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1))) + return 3; + + return 4; + } + + return 6; +} + +static inline int +arm_thumb_address_cost (rtx x) +{ + enum rtx_code c = GET_CODE (x); + + if (c == REG) + return 1; + if (c == PLUS + && GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == CONST_INT) + return 1; + + return 2; +} + +static int +arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) +{ + return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); +} + +/* Adjust cost hook for XScale. */ +static bool +xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +{ + /* Some true dependencies can have a higher cost depending + on precisely how certain input operands are used. */ + if (REG_NOTE_KIND(link) == 0 + && recog_memoized (insn) >= 0 + && recog_memoized (dep) >= 0) + { + int shift_opnum = get_attr_shift (insn); + enum attr_type attr_type = get_attr_type (dep); + + /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted + operand for INSN. If we have a shifted input operand and the + instruction we depend on is another ALU instruction, then we may + have to account for an additional stall. */ + if (shift_opnum != 0 + && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG)) + { + rtx shifted_operand; + int opno; + + /* Get the shifted operand. */ + extract_insn (insn); + shifted_operand = recog_data.operand[shift_opnum]; + + /* Iterate over all the operands in DEP. If we write an operand + that overlaps with SHIFTED_OPERAND, then we have increase the + cost of this dependency. */ + extract_insn (dep); + preprocess_constraints (); + for (opno = 0; opno < recog_data.n_operands; opno++) + { + /* We can ignore strict inputs. */ + if (recog_data.operand_type[opno] == OP_IN) + continue; + + if (reg_overlap_mentioned_p (recog_data.operand[opno], + shifted_operand)) + { + *cost = 2; + return false; + } + } + } + } + return true; +} + +/* Adjust cost hook for Cortex A9. */ +static bool +cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +{ + switch (REG_NOTE_KIND (link)) + { + case REG_DEP_ANTI: + *cost = 0; + return false; + + case REG_DEP_TRUE: + case REG_DEP_OUTPUT: + if (recog_memoized (insn) >= 0 + && recog_memoized (dep) >= 0) + { + if (GET_CODE (PATTERN (insn)) == SET) + { + if (GET_MODE_CLASS + (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT + || GET_MODE_CLASS + (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT) + { + enum attr_type attr_type_insn = get_attr_type (insn); + enum attr_type attr_type_dep = get_attr_type (dep); + + /* By default all dependencies of the form + s0 = s0 s1 + s0 = s0 s2 + have an extra latency of 1 cycle because + of the input and output dependency in this + case. However this gets modeled as an true + dependency and hence all these checks. */ + if (REG_P (SET_DEST (PATTERN (insn))) + && REG_P (SET_DEST (PATTERN (dep))) + && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)), + SET_DEST (PATTERN (dep)))) + { + /* FMACS is a special case where the dependant + instruction can be issued 3 cycles before + the normal latency in case of an output + dependency. */ + if ((attr_type_insn == TYPE_FMACS + || attr_type_insn == TYPE_FMACD) + && (attr_type_dep == TYPE_FMACS + || attr_type_dep == TYPE_FMACD)) + { + if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + *cost = insn_default_latency (dep) - 3; + else + *cost = insn_default_latency (dep); + return false; + } + else + { + if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + *cost = insn_default_latency (dep) + 1; + else + *cost = insn_default_latency (dep); + } + return false; + } + } + } + } + break; + + default: + gcc_unreachable (); + } + + return true; +} + +/* Adjust cost hook for FA726TE. */ +static bool +fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +{ + /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated) + have penalty of 3. */ + if (REG_NOTE_KIND (link) == REG_DEP_TRUE + && recog_memoized (insn) >= 0 + && recog_memoized (dep) >= 0 + && get_attr_conds (dep) == CONDS_SET) + { + /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */ + if (get_attr_conds (insn) == CONDS_USE + && get_attr_type (insn) != TYPE_BRANCH) + { + *cost = 3; + return false; + } + + if (GET_CODE (PATTERN (insn)) == COND_EXEC + || get_attr_conds (insn) == CONDS_USE) + { + *cost = 0; + return false; + } + } + + return true; +} + +/* This function implements the target macro TARGET_SCHED_ADJUST_COST. + It corrects the value of COST based on the relationship between + INSN and DEP through the dependence LINK. It returns the new + value. There is a per-core adjust_cost hook to adjust scheduler costs + and the per-core hook can choose to completely override the generic + adjust_cost function. Only put bits of code into arm_adjust_cost that + are common across all cores. */ +static int +arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) +{ + rtx i_pat, d_pat; + + /* When generating Thumb-1 code, we want to place flag-setting operations + close to a conditional branch which depends on them, so that we can + omit the comparison. */ + if (TARGET_THUMB1 + && REG_NOTE_KIND (link) == 0 + && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn + && recog_memoized (dep) >= 0 + && get_attr_conds (dep) == CONDS_SET) + return 0; + + if (current_tune->sched_adjust_cost != NULL) + { + if (!current_tune->sched_adjust_cost (insn, link, dep, &cost)) + return cost; + } + + /* XXX This is not strictly true for the FPA. */ + if (REG_NOTE_KIND (link) == REG_DEP_ANTI + || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + return 0; + + /* Call insns don't incur a stall, even if they follow a load. */ + if (REG_NOTE_KIND (link) == 0 + && GET_CODE (insn) == CALL_INSN) + return 1; + + if ((i_pat = single_set (insn)) != NULL + && GET_CODE (SET_SRC (i_pat)) == MEM + && (d_pat = single_set (dep)) != NULL + && GET_CODE (SET_DEST (d_pat)) == MEM) + { + rtx src_mem = XEXP (SET_SRC (i_pat), 0); + /* This is a load after a store, there is no conflict if the load reads + from a cached area. Assume that loads from the stack, and from the + constant pool are cached, and that others will miss. This is a + hack. */ + + if ((GET_CODE (src_mem) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (src_mem)) + || reg_mentioned_p (stack_pointer_rtx, src_mem) + || reg_mentioned_p (frame_pointer_rtx, src_mem) + || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) + return 1; + } + + return cost; +} + +static int fp_consts_inited = 0; + +/* Only zero is valid for VFP. Other values are also valid for FPA. */ +static const char * const strings_fp[8] = +{ + "0", "1", "2", "3", + "4", "5", "0.5", "10" +}; + +static REAL_VALUE_TYPE values_fp[8]; + +static void +init_fp_table (void) +{ + int i; + REAL_VALUE_TYPE r; + + if (TARGET_VFP) + fp_consts_inited = 1; + else + fp_consts_inited = 8; + + for (i = 0; i < fp_consts_inited; i++) + { + r = REAL_VALUE_ATOF (strings_fp[i], DFmode); + values_fp[i] = r; + } +} + +/* Return TRUE if rtx X is a valid immediate FP constant. */ +int +arm_const_double_rtx (rtx x) +{ + REAL_VALUE_TYPE r; + int i; + + if (!fp_consts_inited) + init_fp_table (); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + if (REAL_VALUE_MINUS_ZERO (r)) + return 0; + + for (i = 0; i < fp_consts_inited; i++) + if (REAL_VALUES_EQUAL (r, values_fp[i])) + return 1; + + return 0; +} + +/* Return TRUE if rtx X is a valid immediate FPA constant. */ +int +neg_const_double_rtx_ok_for_fpa (rtx x) +{ + REAL_VALUE_TYPE r; + int i; + + if (!fp_consts_inited) + init_fp_table (); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + r = real_value_negate (&r); + if (REAL_VALUE_MINUS_ZERO (r)) + return 0; + + for (i = 0; i < 8; i++) + if (REAL_VALUES_EQUAL (r, values_fp[i])) + return 1; + + return 0; +} + + +/* VFPv3 has a fairly wide range of representable immediates, formed from + "quarter-precision" floating-point values. These can be evaluated using this + formula (with ^ for exponentiation): + + -1^s * n * 2^-r + + Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that + 16 <= n <= 31 and 0 <= r <= 7. + + These values are mapped onto an 8-bit integer ABCDEFGH s.t. + + - A (most-significant) is the sign bit. + - BCD are the exponent (encoded as r XOR 3). + - EFGH are the mantissa (encoded as n - 16). +*/ + +/* Return an integer index for a VFPv3 immediate operand X suitable for the + fconst[sd] instruction, or -1 if X isn't suitable. */ +static int +vfp3_const_double_index (rtx x) +{ + REAL_VALUE_TYPE r, m; + int sign, exponent; + unsigned HOST_WIDE_INT mantissa, mant_hi; + unsigned HOST_WIDE_INT mask; + HOST_WIDE_INT m1, m2; + int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; + + if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE) + return -1; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + + /* We can't represent these things, so detect them first. */ + if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) + return -1; + + /* Extract sign, exponent and mantissa. */ + sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; + r = real_value_abs (&r); + exponent = REAL_EXP (&r); + /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the + highest (sign) bit, with a fixed binary point at bit point_pos. + WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 + bits for the mantissa, this may fail (low bits would be lost). */ + real_ldexp (&m, &r, point_pos - exponent); + REAL_VALUE_TO_INT (&m1, &m2, m); + mantissa = m1; + mant_hi = m2; + + /* If there are bits set in the low part of the mantissa, we can't + represent this value. */ + if (mantissa != 0) + return -1; + + /* Now make it so that mantissa contains the most-significant bits, and move + the point_pos to indicate that the least-significant bits have been + discarded. */ + point_pos -= HOST_BITS_PER_WIDE_INT; + mantissa = mant_hi; + + /* We can permit four significant bits of mantissa only, plus a high bit + which is always 1. */ + mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; + if ((mantissa & mask) != 0) + return -1; + + /* Now we know the mantissa is in range, chop off the unneeded bits. */ + mantissa >>= point_pos - 5; + + /* The mantissa may be zero. Disallow that case. (It's possible to load the + floating-point immediate zero with Neon using an integer-zero load, but + that case is handled elsewhere.) */ + if (mantissa == 0) + return -1; + + gcc_assert (mantissa >= 16 && mantissa <= 31); + + /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where + normalized significands are in the range [1, 2). (Our mantissa is shifted + left 4 places at this point relative to normalized IEEE754 values). GCC + internally uses [0.5, 1) (see real.c), so the exponent returned from + REAL_EXP must be altered. */ + exponent = 5 - exponent; + + if (exponent < 0 || exponent > 7) + return -1; + + /* Sign, mantissa and exponent are now in the correct form to plug into the + formula described in the comment above. */ + return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16); +} + +/* Return TRUE if rtx X is a valid immediate VFPv3 constant. */ +int +vfp3_const_double_rtx (rtx x) +{ + if (!TARGET_VFP3) + return 0; + + return vfp3_const_double_index (x) != -1; +} + +/* Recognize immediates which can be used in various Neon instructions. Legal + immediates are described by the following table (for VMVN variants, the + bitwise inverse of the constant shown is recognized. In either case, VMOV + is output and the correct instruction to use for a given constant is chosen + by the assembler). The constant shown is replicated across all elements of + the destination vector. + + insn elems variant constant (binary) + ---- ----- ------- ----------------- + vmov i32 0 00000000 00000000 00000000 abcdefgh + vmov i32 1 00000000 00000000 abcdefgh 00000000 + vmov i32 2 00000000 abcdefgh 00000000 00000000 + vmov i32 3 abcdefgh 00000000 00000000 00000000 + vmov i16 4 00000000 abcdefgh + vmov i16 5 abcdefgh 00000000 + vmvn i32 6 00000000 00000000 00000000 abcdefgh + vmvn i32 7 00000000 00000000 abcdefgh 00000000 + vmvn i32 8 00000000 abcdefgh 00000000 00000000 + vmvn i32 9 abcdefgh 00000000 00000000 00000000 + vmvn i16 10 00000000 abcdefgh + vmvn i16 11 abcdefgh 00000000 + vmov i32 12 00000000 00000000 abcdefgh 11111111 + vmvn i32 13 00000000 00000000 abcdefgh 11111111 + vmov i32 14 00000000 abcdefgh 11111111 11111111 + vmvn i32 15 00000000 abcdefgh 11111111 11111111 + vmov i8 16 abcdefgh + vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd + eeeeeeee ffffffff gggggggg hhhhhhhh + vmov f32 18 aBbbbbbc defgh000 00000000 00000000 + + For case 18, B = !b. Representable values are exactly those accepted by + vfp3_const_double_index, but are output as floating-point numbers rather + than indices. + + Variants 0-5 (inclusive) may also be used as immediates for the second + operand of VORR/VBIC instructions. + + The INVERSE argument causes the bitwise inverse of the given operand to be + recognized instead (used for recognizing legal immediates for the VAND/VORN + pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is + *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be + output, rather than the real insns vbic/vorr). + + INVERSE makes no difference to the recognition of float vectors. + + The return value is the variant of immediate as shown in the above table, or + -1 if the given value doesn't match any of the listed patterns. +*/ +static int +neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, + rtx *modconst, int *elementwidth) +{ +#define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ + matches = 1; \ + for (i = 0; i < idx; i += (STRIDE)) \ + if (!(TEST)) \ + matches = 0; \ + if (matches) \ + { \ + immtype = (CLASS); \ + elsize = (ELSIZE); \ + break; \ + } + + unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); + unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); + unsigned char bytes[16]; + int immtype = -1, matches; + unsigned int invmask = inverse ? 0xff : 0; + + /* Vectors of float constants. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx el0 = CONST_VECTOR_ELT (op, 0); + REAL_VALUE_TYPE r0; + + if (!vfp3_const_double_rtx (el0)) + return -1; + + REAL_VALUE_FROM_CONST_DOUBLE (r0, el0); + + for (i = 1; i < n_elts; i++) + { + rtx elt = CONST_VECTOR_ELT (op, i); + REAL_VALUE_TYPE re; + + REAL_VALUE_FROM_CONST_DOUBLE (re, elt); + + if (!REAL_VALUES_EQUAL (r0, re)) + return -1; + } + + if (modconst) + *modconst = CONST_VECTOR_ELT (op, 0); + + if (elementwidth) + *elementwidth = 0; + + return 18; + } + + /* Splat vector constant out into a byte vector. */ + for (i = 0; i < n_elts; i++) + { + rtx el = CONST_VECTOR_ELT (op, i); + unsigned HOST_WIDE_INT elpart; + unsigned int part, parts; + + if (GET_CODE (el) == CONST_INT) + { + elpart = INTVAL (el); + parts = 1; + } + else if (GET_CODE (el) == CONST_DOUBLE) + { + elpart = CONST_DOUBLE_LOW (el); + parts = 2; + } + else + gcc_unreachable (); + + for (part = 0; part < parts; part++) + { + unsigned int byte; + for (byte = 0; byte < innersize; byte++) + { + bytes[idx++] = (elpart & 0xff) ^ invmask; + elpart >>= BITS_PER_UNIT; + } + if (GET_CODE (el) == CONST_DOUBLE) + elpart = CONST_DOUBLE_HIGH (el); + } + } + + /* Sanity check. */ + gcc_assert (idx == GET_MODE_SIZE (mode)); + + do + { + CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); + + CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]); + + CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0); + + CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]); + + CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); + + CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]); + + CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff); + + CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]); + + CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); + + CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); + + CHECK (1, 8, 16, bytes[i] == bytes[0]); + + CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) + && bytes[i] == bytes[(i + 8) % idx]); + } + while (0); + + if (immtype == -1) + return -1; + + if (elementwidth) + *elementwidth = elsize; + + if (modconst) + { + unsigned HOST_WIDE_INT imm = 0; + + /* Un-invert bytes of recognized vector, if necessary. */ + if (invmask != 0) + for (i = 0; i < idx; i++) + bytes[i] ^= invmask; + + if (immtype == 17) + { + /* FIXME: Broken on 32-bit H_W_I hosts. */ + gcc_assert (sizeof (HOST_WIDE_INT) == 8); + + for (i = 0; i < 8; i++) + imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) + << (i * BITS_PER_UNIT); + + *modconst = GEN_INT (imm); + } + else + { + unsigned HOST_WIDE_INT imm = 0; + + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + + *modconst = GEN_INT (imm); + } + } + + return immtype; +#undef CHECK +} + +/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, + VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for + float elements), and a modified constant (whatever should be output for a + VMOV) in *MODCONST. */ + +int +neon_immediate_valid_for_move (rtx op, enum machine_mode mode, + rtx *modconst, int *elementwidth) +{ + rtx tmpconst; + int tmpwidth; + int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth); + + if (retval == -1) + return 0; + + if (modconst) + *modconst = tmpconst; + + if (elementwidth) + *elementwidth = tmpwidth; + + return 1; +} + +/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If + the immediate is valid, write a constant suitable for using as an operand + to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to + *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ + +int +neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse, + rtx *modconst, int *elementwidth) +{ + rtx tmpconst; + int tmpwidth; + int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth); + + if (retval < 0 || retval > 5) + return 0; + + if (modconst) + *modconst = tmpconst; + + if (elementwidth) + *elementwidth = tmpwidth; + + return 1; +} + +/* Return a string suitable for output of Neon immediate logic operation + MNEM. */ + +char * +neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, + int inverse, int quad) +{ + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); + + gcc_assert (is_valid != 0); + + if (quad) + sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); + else + sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); + + return templ; +} + +/* Output a sequence of pairwise operations to implement a reduction. + NOTE: We do "too much work" here, because pairwise operations work on two + registers-worth of operands in one go. Unfortunately we can't exploit those + extra calculations to do the full operation in fewer steps, I don't think. + Although all vector elements of the result but the first are ignored, we + actually calculate the same result in each of the elements. An alternative + such as initially loading a vector with zero to use as each of the second + operands would use up an additional register and take an extra instruction, + for no particular gain. */ + +void +neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, + rtx (*reduc) (rtx, rtx, rtx)) +{ + enum machine_mode inner = GET_MODE_INNER (mode); + unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner); + rtx tmpsum = op1; + + for (i = parts / 2; i >= 1; i /= 2) + { + rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode); + emit_insn (reduc (dest, tmpsum, tmpsum)); + tmpsum = dest; + } +} + +/* If VALS is a vector constant that can be loaded into a register + using VDUP, generate instructions to do so and return an RTX to + assign to the register. Otherwise return NULL_RTX. */ + +static rtx +neon_vdup_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + bool all_same = true; + rtx x; + int i; + + if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) + return NULL_RTX; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (!all_same) + /* The elements are not all the same. We could handle repeating + patterns of a mode larger than INNER_MODE here (e.g. int8x8_t + {0, C, 0, C, 0, C, 0, C} which can be loaded using + vdup.i16). */ + return NULL_RTX; + + /* We can load this constant by using VDUP and a constant in a + single ARM register. This will be cheaper than a vector + load. */ + + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + return gen_rtx_VEC_DUPLICATE (mode, x); +} + +/* Generate code to load VALS, which is a PARALLEL containing only + constants (for vec_init) or CONST_VECTOR, efficiently into a + register. Returns an RTX to copy into the register, or NULL_RTX + for a PARALLEL that can not be converted into a CONST_VECTOR. */ + +rtx +neon_make_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + rtx target; + rtx const_vec = NULL_RTX; + int n_elts = GET_MODE_NUNITS (mode); + int n_const = 0; + int i; + + if (GET_CODE (vals) == CONST_VECTOR) + const_vec = vals; + else if (GET_CODE (vals) == PARALLEL) + { + /* A CONST_VECTOR must contain only CONST_INTs and + CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). + Only store valid constants in a CONST_VECTOR. */ + for (i = 0; i < n_elts; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + n_const++; + } + if (n_const == n_elts) + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); + } + else + gcc_unreachable (); + + if (const_vec != NULL + && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) + /* Load using VMOV. On Cortex-A8 this takes one cycle. */ + return const_vec; + else if ((target = neon_vdup_constant (vals)) != NULL_RTX) + /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON + pipeline cycle; creating the constant takes one or two ARM + pipeline cycles. */ + return target; + else if (const_vec != NULL_RTX) + /* Load from constant pool. On Cortex-A8 this takes two cycles + (for either double or quad vectors). We can not take advantage + of single-cycle VLD1 because we need a PC-relative addressing + mode. */ + return const_vec; + else + /* A PARALLEL containing something not valid inside CONST_VECTOR. + We can not construct an initializer. */ + return NULL_RTX; +} + +/* Initialize vector TARGET to VALS. */ + +void +neon_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true; + rtx x, mem; + int i; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!CONSTANT_P (x)) + ++n_var, one_var = i; + + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (n_var == 0) + { + rtx constant = neon_make_constant (vals); + if (constant != NULL_RTX) + { + emit_move_insn (target, constant); + return; + } + } + + /* Splat a single non-constant element if we can. */ + if (all_same && GET_MODE_SIZE (inner_mode) <= 4) + { + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_VEC_DUPLICATE (mode, x))); + return; + } + + /* One field is non-constant. Load constant then overwrite varying + field. This is more efficient than using the stack. */ + if (n_var == 1) + { + rtx copy = copy_rtx (vals); + rtx index = GEN_INT (one_var); + + /* Load constant part of vector, substitute neighboring value for + varying element. */ + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); + neon_expand_vector_init (target, copy); + + /* Insert variable. */ + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); + switch (mode) + { + case V8QImode: + emit_insn (gen_neon_vset_lanev8qi (target, x, target, index)); + break; + case V16QImode: + emit_insn (gen_neon_vset_lanev16qi (target, x, target, index)); + break; + case V4HImode: + emit_insn (gen_neon_vset_lanev4hi (target, x, target, index)); + break; + case V8HImode: + emit_insn (gen_neon_vset_lanev8hi (target, x, target, index)); + break; + case V2SImode: + emit_insn (gen_neon_vset_lanev2si (target, x, target, index)); + break; + case V4SImode: + emit_insn (gen_neon_vset_lanev4si (target, x, target, index)); + break; + case V2SFmode: + emit_insn (gen_neon_vset_lanev2sf (target, x, target, index)); + break; + case V4SFmode: + emit_insn (gen_neon_vset_lanev4sf (target, x, target, index)); + break; + case V2DImode: + emit_insn (gen_neon_vset_lanev2di (target, x, target, index)); + break; + default: + gcc_unreachable (); + } + return; + } + + /* Construct the vector in memory one field at a time + and load the whole vector. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner_mode, + i * GET_MODE_SIZE (inner_mode)), + XVECEXP (vals, 0, i)); + emit_move_insn (target, mem); +} + +/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise + ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so + reported source locations are bogus. */ + +static void +bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, + const char *err) +{ + HOST_WIDE_INT lane; + + gcc_assert (GET_CODE (operand) == CONST_INT); + + lane = INTVAL (operand); + + if (lane < low || lane >= high) + error (err); +} + +/* Bounds-check lanes. */ + +void +neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +{ + bounds_check (operand, low, high, "lane out of range"); +} + +/* Bounds-check constants. */ + +void +neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +{ + bounds_check (operand, low, high, "constant out of range"); +} + +HOST_WIDE_INT +neon_element_bits (enum machine_mode mode) +{ + if (mode == DImode) + return GET_MODE_BITSIZE (mode); + else + return GET_MODE_BITSIZE (GET_MODE_INNER (mode)); +} + + +/* Predicates for `match_operand' and `match_operator'. */ + +/* Return nonzero if OP is a valid Cirrus memory address pattern. */ +int +cirrus_memory_offset (rtx op) +{ + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return 0; + + if (GET_CODE (op) == MEM) + { + rtx ind; + + ind = XEXP (op, 0); + + /* Match: (mem (reg)). */ + if (GET_CODE (ind) == REG) + return 1; + + /* Match: + (mem (plus (reg) + (const))). */ + if (GET_CODE (ind) == PLUS + && GET_CODE (XEXP (ind, 0)) == REG + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) + && GET_CODE (XEXP (ind, 1)) == CONST_INT) + return 1; + } + + return 0; +} + +/* Return TRUE if OP is a valid coprocessor memory address pattern. + WB is true if full writeback address modes are allowed and is false + if limited writeback address modes (POST_INC and PRE_DEC) are + allowed. */ + +int +arm_coproc_mem_operand (rtx op, bool wb) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return FALSE; + + /* Constants are converted into offsets from labels. */ + if (GET_CODE (op) != MEM) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) + return TRUE; + + /* Match: (mem (reg)). */ + if (GET_CODE (ind) == REG) + return arm_address_register_rtx_p (ind, 0); + + /* Autoincremment addressing modes. POST_INC and PRE_DEC are + acceptable in any case (subject to verification by + arm_address_register_rtx_p). We need WB to be true to accept + PRE_INC and POST_DEC. */ + if (GET_CODE (ind) == POST_INC + || GET_CODE (ind) == PRE_DEC + || (wb + && (GET_CODE (ind) == PRE_INC + || GET_CODE (ind) == POST_DEC))) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + + if (wb + && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) + && arm_address_register_rtx_p (XEXP (ind, 0), 0) + && GET_CODE (XEXP (ind, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) + ind = XEXP (ind, 1); + + /* Match: + (plus (reg) + (const)). */ + if (GET_CODE (ind) == PLUS + && GET_CODE (XEXP (ind, 0)) == REG + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) + && GET_CODE (XEXP (ind, 1)) == CONST_INT + && INTVAL (XEXP (ind, 1)) > -1024 + && INTVAL (XEXP (ind, 1)) < 1024 + && (INTVAL (XEXP (ind, 1)) & 3) == 0) + return TRUE; + + return FALSE; +} + +/* Return TRUE if OP is a memory operand which we can load or store a vector + to/from. TYPE is one of the following values: + 0 - Vector load/stor (vldr) + 1 - Core registers (ldm) + 2 - Element/structure loads (vld1) + */ +int +neon_vector_mem_operand (rtx op, int type) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return FALSE; + + /* Constants are converted into offsets from labels. */ + if (GET_CODE (op) != MEM) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) + return TRUE; + + /* Match: (mem (reg)). */ + if (GET_CODE (ind) == REG) + return arm_address_register_rtx_p (ind, 0); + + /* Allow post-increment with Neon registers. */ + if ((type != 1 && GET_CODE (ind) == POST_INC) + || (type == 0 && GET_CODE (ind) == PRE_DEC)) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + + /* FIXME: vld1 allows register post-modify. */ + + /* Match: + (plus (reg) + (const)). */ + if (type == 0 + && GET_CODE (ind) == PLUS + && GET_CODE (XEXP (ind, 0)) == REG + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) + && GET_CODE (XEXP (ind, 1)) == CONST_INT + && INTVAL (XEXP (ind, 1)) > -1024 + && INTVAL (XEXP (ind, 1)) < 1016 + && (INTVAL (XEXP (ind, 1)) & 3) == 0) + return TRUE; + + return FALSE; +} + +/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct + type. */ +int +neon_struct_mem_operand (rtx op) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return FALSE; + + /* Constants are converted into offsets from labels. */ + if (GET_CODE (op) != MEM) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) + return TRUE; + + /* Match: (mem (reg)). */ + if (GET_CODE (ind) == REG) + return arm_address_register_rtx_p (ind, 0); + + return FALSE; +} + +/* Return true if X is a register that will be eliminated later on. */ +int +arm_eliminable_register (rtx x) +{ + return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM + || REGNO (x) == ARG_POINTER_REGNUM + || (REGNO (x) >= FIRST_VIRTUAL_REGISTER + && REGNO (x) <= LAST_VIRTUAL_REGISTER)); +} + +/* Return GENERAL_REGS if a scratch register required to reload x to/from + coprocessor registers. Otherwise return NO_REGS. */ + +enum reg_class +coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) +{ + if (mode == HFmode) + { + if (!TARGET_NEON_FP16) + return GENERAL_REGS; + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) + return NO_REGS; + return GENERAL_REGS; + } + + if (TARGET_NEON + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + && neon_vector_mem_operand (x, 0)) + return NO_REGS; + + if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) + return NO_REGS; + + return GENERAL_REGS; +} + +/* Values which must be returned in the most-significant end of the return + register. */ + +static bool +arm_return_in_msb (const_tree valtype) +{ + return (TARGET_AAPCS_BASED + && BYTES_BIG_ENDIAN + && (AGGREGATE_TYPE_P (valtype) + || TREE_CODE (valtype) == COMPLEX_TYPE)); +} + +/* Returns TRUE if INSN is an "LDR REG, ADDR" instruction. + Use by the Cirrus Maverick code which has to workaround + a hardware bug triggered by such instructions. */ +static bool +arm_memory_load_p (rtx insn) +{ + rtx body, lhs, rhs;; + + if (insn == NULL_RTX || GET_CODE (insn) != INSN) + return false; + + body = PATTERN (insn); + + if (GET_CODE (body) != SET) + return false; + + lhs = XEXP (body, 0); + rhs = XEXP (body, 1); + + lhs = REG_OR_SUBREG_RTX (lhs); + + /* If the destination is not a general purpose + register we do not have to worry. */ + if (GET_CODE (lhs) != REG + || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS) + return false; + + /* As well as loads from memory we also have to react + to loads of invalid constants which will be turned + into loads from the minipool. */ + return (GET_CODE (rhs) == MEM + || GET_CODE (rhs) == SYMBOL_REF + || note_invalid_constants (insn, -1, false)); +} + +/* Return TRUE if INSN is a Cirrus instruction. */ +static bool +arm_cirrus_insn_p (rtx insn) +{ + enum attr_cirrus attr; + + /* get_attr cannot accept USE or CLOBBER. */ + if (!insn + || GET_CODE (insn) != INSN + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return 0; + + attr = get_attr_cirrus (insn); + + return attr != CIRRUS_NOT; +} + +/* Cirrus reorg for invalid instruction combinations. */ +static void +cirrus_reorg (rtx first) +{ + enum attr_cirrus attr; + rtx body = PATTERN (first); + rtx t; + int nops; + + /* Any branch must be followed by 2 non Cirrus instructions. */ + if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN) + { + nops = 0; + t = next_nonnote_insn (first); + + if (arm_cirrus_insn_p (t)) + ++ nops; + + if (arm_cirrus_insn_p (next_nonnote_insn (t))) + ++ nops; + + while (nops --) + emit_insn_after (gen_nop (), first); + + return; + } + + /* (float (blah)) is in parallel with a clobber. */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) + body = XVECEXP (body, 0, 0); + + if (GET_CODE (body) == SET) + { + rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1); + + /* cfldrd, cfldr64, cfstrd, cfstr64 must + be followed by a non Cirrus insn. */ + if (get_attr_cirrus (first) == CIRRUS_DOUBLE) + { + if (arm_cirrus_insn_p (next_nonnote_insn (first))) + emit_insn_after (gen_nop (), first); + + return; + } + else if (arm_memory_load_p (first)) + { + unsigned int arm_regno; + + /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr, + ldr/cfmv64hr combination where the Rd field is the same + in both instructions must be split with a non Cirrus + insn. Example: + + ldr r0, blah + nop + cfmvsr mvf0, r0. */ + + /* Get Arm register number for ldr insn. */ + if (GET_CODE (lhs) == REG) + arm_regno = REGNO (lhs); + else + { + gcc_assert (GET_CODE (rhs) == REG); + arm_regno = REGNO (rhs); + } + + /* Next insn. */ + first = next_nonnote_insn (first); + + if (! arm_cirrus_insn_p (first)) + return; + + body = PATTERN (first); + + /* (float (blah)) is in parallel with a clobber. */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0)) + body = XVECEXP (body, 0, 0); + + if (GET_CODE (body) == FLOAT) + body = XEXP (body, 0); + + if (get_attr_cirrus (first) == CIRRUS_MOVE + && GET_CODE (XEXP (body, 1)) == REG + && arm_regno == REGNO (XEXP (body, 1))) + emit_insn_after (gen_nop (), first); + + return; + } + } + + /* get_attr cannot accept USE or CLOBBER. */ + if (!first + || GET_CODE (first) != INSN + || GET_CODE (PATTERN (first)) == USE + || GET_CODE (PATTERN (first)) == CLOBBER) + return; + + attr = get_attr_cirrus (first); + + /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...) + must be followed by a non-coprocessor instruction. */ + if (attr == CIRRUS_COMPARE) + { + nops = 0; + + t = next_nonnote_insn (first); + + if (arm_cirrus_insn_p (t)) + ++ nops; + + if (arm_cirrus_insn_p (next_nonnote_insn (t))) + ++ nops; + + while (nops --) + emit_insn_after (gen_nop (), first); + + return; + } +} + +/* Return TRUE if X references a SYMBOL_REF. */ +int +symbol_mentioned_p (rtx x) +{ + const char * fmt; + int i; + + if (GET_CODE (x) == SYMBOL_REF) + return 1; + + /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they + are constant offsets, not symbols. */ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return 0; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (symbol_mentioned_p (XVECEXP (x, i, j))) + return 1; + } + else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i))) + return 1; + } + + return 0; +} + +/* Return TRUE if X references a LABEL_REF. */ +int +label_mentioned_p (rtx x) +{ + const char * fmt; + int i; + + if (GET_CODE (x) == LABEL_REF) + return 1; + + /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing + instruction, but they are constant offsets, not symbols. */ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return 0; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (label_mentioned_p (XVECEXP (x, i, j))) + return 1; + } + else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i))) + return 1; + } + + return 0; +} + +int +tls_mentioned_p (rtx x) +{ + switch (GET_CODE (x)) + { + case CONST: + return tls_mentioned_p (XEXP (x, 0)); + + case UNSPEC: + if (XINT (x, 1) == UNSPEC_TLS) + return 1; + + default: + return 0; + } +} + +/* Must not copy any rtx that uses a pc-relative address. */ + +static int +arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*x) == UNSPEC + && (XINT (*x, 1) == UNSPEC_PIC_BASE + || XINT (*x, 1) == UNSPEC_PIC_UNIFIED)) + return 1; + return 0; +} + +static bool +arm_cannot_copy_insn_p (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL); +} + +enum rtx_code +minmax_code (rtx x) +{ + enum rtx_code code = GET_CODE (x); + + switch (code) + { + case SMAX: + return GE; + case SMIN: + return LE; + case UMIN: + return LEU; + case UMAX: + return GEU; + default: + gcc_unreachable (); + } +} + +/* Return 1 if memory locations are adjacent. */ +int +adjacent_mem_locations (rtx a, rtx b) +{ + /* We don't guarantee to preserve the order of these memory refs. */ + if (volatile_refs_p (a) || volatile_refs_p (b)) + return 0; + + if ((GET_CODE (XEXP (a, 0)) == REG + || (GET_CODE (XEXP (a, 0)) == PLUS + && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT)) + && (GET_CODE (XEXP (b, 0)) == REG + || (GET_CODE (XEXP (b, 0)) == PLUS + && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT))) + { + HOST_WIDE_INT val0 = 0, val1 = 0; + rtx reg0, reg1; + int val_diff; + + if (GET_CODE (XEXP (a, 0)) == PLUS) + { + reg0 = XEXP (XEXP (a, 0), 0); + val0 = INTVAL (XEXP (XEXP (a, 0), 1)); + } + else + reg0 = XEXP (a, 0); + + if (GET_CODE (XEXP (b, 0)) == PLUS) + { + reg1 = XEXP (XEXP (b, 0), 0); + val1 = INTVAL (XEXP (XEXP (b, 0), 1)); + } + else + reg1 = XEXP (b, 0); + + /* Don't accept any offset that will require multiple + instructions to handle, since this would cause the + arith_adjacentmem pattern to output an overlong sequence. */ + if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS)) + return 0; + + /* Don't allow an eliminable register: register elimination can make + the offset too large. */ + if (arm_eliminable_register (reg0)) + return 0; + + val_diff = val1 - val0; + + if (arm_ld_sched) + { + /* If the target has load delay slots, then there's no benefit + to using an ldm instruction unless the offset is zero and + we are optimizing for size. */ + return (optimize_size && (REGNO (reg0) == REGNO (reg1)) + && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4) + && (val_diff == 4 || val_diff == -4)); + } + + return ((REGNO (reg0) == REGNO (reg1)) + && (val_diff == 4 || val_diff == -4)); + } + + return 0; +} + +/* Return true iff it would be profitable to turn a sequence of NOPS loads + or stores (depending on IS_STORE) into a load-multiple or store-multiple + instruction. ADD_OFFSET is nonzero if the base address register needs + to be modified with an add instruction before we can use it. */ + +static bool +multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED, + int nops, HOST_WIDE_INT add_offset) + { + /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm + if the offset isn't small enough. The reason 2 ldrs are faster + is because these ARMs are able to do more than one cache access + in a single cycle. The ARM9 and StrongARM have Harvard caches, + whilst the ARM8 has a double bandwidth cache. This means that + these cores can do both an instruction fetch and a data fetch in + a single cycle, so the trick of calculating the address into a + scratch register (one of the result regs) and then doing a load + multiple actually becomes slower (and no smaller in code size). + That is the transformation + + ldr rd1, [rbase + offset] + ldr rd2, [rbase + offset + 4] + + to + + add rd1, rbase, offset + ldmia rd1, {rd1, rd2} + + produces worse code -- '3 cycles + any stalls on rd2' instead of + '2 cycles + any stalls on rd2'. On ARMs with only one cache + access per cycle, the first sequence could never complete in less + than 6 cycles, whereas the ldm sequence would only take 5 and + would make better use of sequential accesses if not hitting the + cache. + + We cheat here and test 'arm_ld_sched' which we currently know to + only be true for the ARM8, ARM9 and StrongARM. If this ever + changes, then the test below needs to be reworked. */ + if (nops == 2 && arm_ld_sched && add_offset != 0) + return false; + + /* XScale has load-store double instructions, but they have stricter + alignment requirements than load-store multiple, so we cannot + use them. + + For XScale ldm requires 2 + NREGS cycles to complete and blocks + the pipeline until completion. + + NREGS CYCLES + 1 3 + 2 4 + 3 5 + 4 6 + + An ldr instruction takes 1-3 cycles, but does not block the + pipeline. + + NREGS CYCLES + 1 1-3 + 2 2-6 + 3 3-9 + 4 4-12 + + Best case ldr will always win. However, the more ldr instructions + we issue, the less likely we are to be able to schedule them well. + Using ldr instructions also increases code size. + + As a compromise, we use ldr for counts of 1 or 2 regs, and ldm + for counts of 3 or 4 regs. */ + if (nops <= 2 && arm_tune_xscale && !optimize_size) + return false; + return true; +} + +/* Subroutine of load_multiple_sequence and store_multiple_sequence. + Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute + an array ORDER which describes the sequence to use when accessing the + offsets that produces an ascending order. In this sequence, each + offset must be larger by exactly 4 than the previous one. ORDER[0] + must have been filled in with the lowest offset by the caller. + If UNSORTED_REGS is nonnull, it is an array of register numbers that + we use to verify that ORDER produces an ascending order of registers. + Return true if it was possible to construct such an order, false if + not. */ + +static bool +compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order, + int *unsorted_regs) +{ + int i; + for (i = 1; i < nops; i++) + { + int j; + + order[i] = order[i - 1]; + for (j = 0; j < nops; j++) + if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4) + { + /* We must find exactly one offset that is higher than the + previous one by 4. */ + if (order[i] != order[i - 1]) + return false; + order[i] = j; + } + if (order[i] == order[i - 1]) + return false; + /* The register numbers must be ascending. */ + if (unsorted_regs != NULL + && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]]) + return false; + } + return true; +} + +/* Used to determine in a peephole whether a sequence of load + instructions can be changed into a load-multiple instruction. + NOPS is the number of separate load instructions we are examining. The + first NOPS entries in OPERANDS are the destination registers, the + next NOPS entries are memory operands. If this function is + successful, *BASE is set to the common base register of the memory + accesses; *LOAD_OFFSET is set to the first memory location's offset + from that base register. + REGS is an array filled in with the destination register numbers. + SAVED_ORDER (if nonnull), is an array filled in with an order that maps + insn numbers to to an ascending order of stores. If CHECK_REGS is true, + the sequence of registers in REGS matches the loads from ascending memory + locations, and the function verifies that the register numbers are + themselves ascending. If CHECK_REGS is false, the register numbers + are stored in the order they are found in the operands. */ +static int +load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order, + int *base, HOST_WIDE_INT *load_offset, bool check_regs) +{ + int unsorted_regs[MAX_LDM_STM_OPS]; + HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; + int order[MAX_LDM_STM_OPS]; + rtx base_reg_rtx = NULL; + int base_reg = -1; + int i, ldm_case; + + /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be + easily extended if required. */ + gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); + + memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); + + /* Loop over the operands and check that the memory references are + suitable (i.e. immediate offsets from the same base register). At + the same time, extract the target register, and the memory + offsets. */ + for (i = 0; i < nops; i++) + { + rtx reg; + rtx offset; + + /* Convert a subreg of a mem into the mem itself. */ + if (GET_CODE (operands[nops + i]) == SUBREG) + operands[nops + i] = alter_subreg (operands + (nops + i)); + + gcc_assert (GET_CODE (operands[nops + i]) == MEM); + + /* Don't reorder volatile memory references; it doesn't seem worth + looking for the case where the order is ok anyway. */ + if (MEM_VOLATILE_P (operands[nops + i])) + return 0; + + offset = const0_rtx; + + if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG + || (GET_CODE (reg) == SUBREG + && GET_CODE (reg = SUBREG_REG (reg)) == REG)) + || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS + && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0)) + == REG) + || (GET_CODE (reg) == SUBREG + && GET_CODE (reg = SUBREG_REG (reg)) == REG)) + && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) + == CONST_INT))) + { + if (i == 0) + { + base_reg = REGNO (reg); + base_reg_rtx = reg; + if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) + return 0; + } + else if (base_reg != (int) REGNO (reg)) + /* Not addressed from the same base register. */ + return 0; + + unsorted_regs[i] = (GET_CODE (operands[i]) == REG + ? REGNO (operands[i]) + : REGNO (SUBREG_REG (operands[i]))); + + /* If it isn't an integer register, or if it overwrites the + base register but isn't the last insn in the list, then + we can't do this. */ + if (unsorted_regs[i] < 0 + || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) + || unsorted_regs[i] > 14 + || (i != nops - 1 && unsorted_regs[i] == base_reg)) + return 0; + + unsorted_offsets[i] = INTVAL (offset); + if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) + order[0] = i; + } + else + /* Not a suitable memory address. */ + return 0; + } + + /* All the useful information has now been extracted from the + operands into unsorted_regs and unsorted_offsets; additionally, + order[0] has been set to the lowest offset in the list. Sort + the offsets into order, verifying that they are adjacent, and + check that the register numbers are ascending. */ + if (!compute_offset_order (nops, unsorted_offsets, order, + check_regs ? unsorted_regs : NULL)) + return 0; + + if (saved_order) + memcpy (saved_order, order, sizeof order); + + if (base) + { + *base = base_reg; + + for (i = 0; i < nops; i++) + regs[i] = unsorted_regs[check_regs ? order[i] : i]; + + *load_offset = unsorted_offsets[order[0]]; + } + + if (TARGET_THUMB1 + && !peep2_reg_dead_p (nops, base_reg_rtx)) + return 0; + + if (unsorted_offsets[order[0]] == 0) + ldm_case = 1; /* ldmia */ + else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) + ldm_case = 2; /* ldmib */ + else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) + ldm_case = 3; /* ldmda */ + else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) + ldm_case = 4; /* ldmdb */ + else if (const_ok_for_arm (unsorted_offsets[order[0]]) + || const_ok_for_arm (-unsorted_offsets[order[0]])) + ldm_case = 5; + else + return 0; + + if (!multiple_operation_profitable_p (false, nops, + ldm_case == 5 + ? unsorted_offsets[order[0]] : 0)) + return 0; + + return ldm_case; +} + +/* Used to determine in a peephole whether a sequence of store instructions can + be changed into a store-multiple instruction. + NOPS is the number of separate store instructions we are examining. + NOPS_TOTAL is the total number of instructions recognized by the peephole + pattern. + The first NOPS entries in OPERANDS are the source registers, the next + NOPS entries are memory operands. If this function is successful, *BASE is + set to the common base register of the memory accesses; *LOAD_OFFSET is set + to the first memory location's offset from that base register. REGS is an + array filled in with the source register numbers, REG_RTXS (if nonnull) is + likewise filled with the corresponding rtx's. + SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn + numbers to to an ascending order of stores. + If CHECK_REGS is true, the sequence of registers in *REGS matches the stores + from ascending memory locations, and the function verifies that the register + numbers are themselves ascending. If CHECK_REGS is false, the register + numbers are stored in the order they are found in the operands. */ +static int +store_multiple_sequence (rtx *operands, int nops, int nops_total, + int *regs, rtx *reg_rtxs, int *saved_order, int *base, + HOST_WIDE_INT *load_offset, bool check_regs) +{ + int unsorted_regs[MAX_LDM_STM_OPS]; + rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS]; + HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; + int order[MAX_LDM_STM_OPS]; + int base_reg = -1; + rtx base_reg_rtx = NULL; + int i, stm_case; + + /* Write back of base register is currently only supported for Thumb 1. */ + int base_writeback = TARGET_THUMB1; + + /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be + easily extended if required. */ + gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); + + memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); + + /* Loop over the operands and check that the memory references are + suitable (i.e. immediate offsets from the same base register). At + the same time, extract the target register, and the memory + offsets. */ + for (i = 0; i < nops; i++) + { + rtx reg; + rtx offset; + + /* Convert a subreg of a mem into the mem itself. */ + if (GET_CODE (operands[nops + i]) == SUBREG) + operands[nops + i] = alter_subreg (operands + (nops + i)); + + gcc_assert (GET_CODE (operands[nops + i]) == MEM); + + /* Don't reorder volatile memory references; it doesn't seem worth + looking for the case where the order is ok anyway. */ + if (MEM_VOLATILE_P (operands[nops + i])) + return 0; + + offset = const0_rtx; + + if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG + || (GET_CODE (reg) == SUBREG + && GET_CODE (reg = SUBREG_REG (reg)) == REG)) + || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS + && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0)) + == REG) + || (GET_CODE (reg) == SUBREG + && GET_CODE (reg = SUBREG_REG (reg)) == REG)) + && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) + == CONST_INT))) + { + unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG + ? operands[i] : SUBREG_REG (operands[i])); + unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]); + + if (i == 0) + { + base_reg = REGNO (reg); + base_reg_rtx = reg; + if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) + return 0; + } + else if (base_reg != (int) REGNO (reg)) + /* Not addressed from the same base register. */ + return 0; + + /* If it isn't an integer register, then we can't do this. */ + if (unsorted_regs[i] < 0 + || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) + /* The effects are unpredictable if the base register is + both updated and stored. */ + || (base_writeback && unsorted_regs[i] == base_reg) + || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM) + || unsorted_regs[i] > 14) + return 0; + + unsorted_offsets[i] = INTVAL (offset); + if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) + order[0] = i; + } + else + /* Not a suitable memory address. */ + return 0; + } + + /* All the useful information has now been extracted from the + operands into unsorted_regs and unsorted_offsets; additionally, + order[0] has been set to the lowest offset in the list. Sort + the offsets into order, verifying that they are adjacent, and + check that the register numbers are ascending. */ + if (!compute_offset_order (nops, unsorted_offsets, order, + check_regs ? unsorted_regs : NULL)) + return 0; + + if (saved_order) + memcpy (saved_order, order, sizeof order); + + if (base) + { + *base = base_reg; + + for (i = 0; i < nops; i++) + { + regs[i] = unsorted_regs[check_regs ? order[i] : i]; + if (reg_rtxs) + reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i]; + } + + *load_offset = unsorted_offsets[order[0]]; + } + + if (TARGET_THUMB1 + && !peep2_reg_dead_p (nops_total, base_reg_rtx)) + return 0; + + if (unsorted_offsets[order[0]] == 0) + stm_case = 1; /* stmia */ + else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) + stm_case = 2; /* stmib */ + else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) + stm_case = 3; /* stmda */ + else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) + stm_case = 4; /* stmdb */ + else + return 0; + + if (!multiple_operation_profitable_p (false, nops, 0)) + return 0; + + return stm_case; +} + +/* Routines for use in generating RTL. */ + +/* Generate a load-multiple instruction. COUNT is the number of loads in + the instruction; REGS and MEMS are arrays containing the operands. + BASEREG is the base register to be used in addressing the memory operands. + WBACK_OFFSET is nonzero if the instruction should update the base + register. */ + +static rtx +arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, + HOST_WIDE_INT wback_offset) +{ + int i = 0, j; + rtx result; + + if (!multiple_operation_profitable_p (false, count, 0)) + { + rtx seq; + + start_sequence (); + + for (i = 0; i < count; i++) + emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]); + + if (wback_offset != 0) + emit_move_insn (basereg, plus_constant (basereg, wback_offset)); + + seq = get_insns (); + end_sequence (); + + return seq; + } + + result = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); + if (wback_offset != 0) + { + XVECEXP (result, 0, 0) + = gen_rtx_SET (VOIDmode, basereg, + plus_constant (basereg, wback_offset)); + i = 1; + count++; + } + + for (j = 0; i < count; i++, j++) + XVECEXP (result, 0, i) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]); + + return result; +} + +/* Generate a store-multiple instruction. COUNT is the number of stores in + the instruction; REGS and MEMS are arrays containing the operands. + BASEREG is the base register to be used in addressing the memory operands. + WBACK_OFFSET is nonzero if the instruction should update the base + register. */ + +static rtx +arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, + HOST_WIDE_INT wback_offset) +{ + int i = 0, j; + rtx result; + + if (GET_CODE (basereg) == PLUS) + basereg = XEXP (basereg, 0); + + if (!multiple_operation_profitable_p (false, count, 0)) + { + rtx seq; + + start_sequence (); + + for (i = 0; i < count; i++) + emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i])); + + if (wback_offset != 0) + emit_move_insn (basereg, plus_constant (basereg, wback_offset)); + + seq = get_insns (); + end_sequence (); + + return seq; + } + + result = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); + if (wback_offset != 0) + { + XVECEXP (result, 0, 0) + = gen_rtx_SET (VOIDmode, basereg, + plus_constant (basereg, wback_offset)); + i = 1; + count++; + } + + for (j = 0; i < count; i++, j++) + XVECEXP (result, 0, i) + = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j])); + + return result; +} + +/* Generate either a load-multiple or a store-multiple instruction. This + function can be used in situations where we can start with a single MEM + rtx and adjust its address upwards. + COUNT is the number of operations in the instruction, not counting a + possible update of the base register. REGS is an array containing the + register operands. + BASEREG is the base register to be used in addressing the memory operands, + which are constructed from BASEMEM. + WRITE_BACK specifies whether the generated instruction should include an + update of the base register. + OFFSETP is used to pass an offset to and from this function; this offset + is not used when constructing the address (instead BASEMEM should have an + appropriate offset in its address), it is used only for setting + MEM_OFFSET. It is updated only if WRITE_BACK is true.*/ + +static rtx +arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg, + bool write_back, rtx basemem, HOST_WIDE_INT *offsetp) +{ + rtx mems[MAX_LDM_STM_OPS]; + HOST_WIDE_INT offset = *offsetp; + int i; + + gcc_assert (count <= MAX_LDM_STM_OPS); + + if (GET_CODE (basereg) == PLUS) + basereg = XEXP (basereg, 0); + + for (i = 0; i < count; i++) + { + rtx addr = plus_constant (basereg, i * 4); + mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset); + offset += 4; + } + + if (write_back) + *offsetp = offset; + + if (is_load) + return arm_gen_load_multiple_1 (count, regs, mems, basereg, + write_back ? 4 * count : 0); + else + return arm_gen_store_multiple_1 (count, regs, mems, basereg, + write_back ? 4 * count : 0); +} + +rtx +arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back, + rtx basemem, HOST_WIDE_INT *offsetp) +{ + return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem, + offsetp); +} + +rtx +arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back, + rtx basemem, HOST_WIDE_INT *offsetp) +{ + return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem, + offsetp); +} + +/* Called from a peephole2 expander to turn a sequence of loads into an + LDM instruction. OPERANDS are the operands found by the peephole matcher; + NOPS indicates how many separate loads we are trying to combine. SORT_REGS + is true if we can reorder the registers because they are used commutatively + subsequently. + Returns true iff we could generate a new instruction. */ + +bool +gen_ldm_seq (rtx *operands, int nops, bool sort_regs) +{ + int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; + rtx mems[MAX_LDM_STM_OPS]; + int i, j, base_reg; + rtx base_reg_rtx; + HOST_WIDE_INT offset; + int write_back = FALSE; + int ldm_case; + rtx addr; + + ldm_case = load_multiple_sequence (operands, nops, regs, mem_order, + &base_reg, &offset, !sort_regs); + + if (ldm_case == 0) + return false; + + if (sort_regs) + for (i = 0; i < nops - 1; i++) + for (j = i + 1; j < nops; j++) + if (regs[i] > regs[j]) + { + int t = regs[i]; + regs[i] = regs[j]; + regs[j] = t; + } + base_reg_rtx = gen_rtx_REG (Pmode, base_reg); + + if (TARGET_THUMB1) + { + gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx)); + gcc_assert (ldm_case == 1 || ldm_case == 5); + write_back = TRUE; + } + + if (ldm_case == 5) + { + rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]); + emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset))); + offset = 0; + if (!TARGET_THUMB1) + { + base_reg = regs[0]; + base_reg_rtx = newbase; + } + } + + for (i = 0; i < nops; i++) + { + addr = plus_constant (base_reg_rtx, offset + i * 4); + mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], + SImode, addr, 0); + } + emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx, + write_back ? offset + i * 4 : 0)); + return true; +} + +/* Called from a peephole2 expander to turn a sequence of stores into an + STM instruction. OPERANDS are the operands found by the peephole matcher; + NOPS indicates how many separate stores we are trying to combine. + Returns true iff we could generate a new instruction. */ + +bool +gen_stm_seq (rtx *operands, int nops) +{ + int i; + int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; + rtx mems[MAX_LDM_STM_OPS]; + int base_reg; + rtx base_reg_rtx; + HOST_WIDE_INT offset; + int write_back = FALSE; + int stm_case; + rtx addr; + bool base_reg_dies; + + stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL, + mem_order, &base_reg, &offset, true); + + if (stm_case == 0) + return false; + + base_reg_rtx = gen_rtx_REG (Pmode, base_reg); + + base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx); + if (TARGET_THUMB1) + { + gcc_assert (base_reg_dies); + write_back = TRUE; + } + + if (stm_case == 5) + { + gcc_assert (base_reg_dies); + emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); + offset = 0; + } + + addr = plus_constant (base_reg_rtx, offset); + + for (i = 0; i < nops; i++) + { + addr = plus_constant (base_reg_rtx, offset + i * 4); + mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], + SImode, addr, 0); + } + emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx, + write_back ? offset + i * 4 : 0)); + return true; +} + +/* Called from a peephole2 expander to turn a sequence of stores that are + preceded by constant loads into an STM instruction. OPERANDS are the + operands found by the peephole matcher; NOPS indicates how many + separate stores we are trying to combine; there are 2 * NOPS + instructions in the peephole. + Returns true iff we could generate a new instruction. */ + +bool +gen_const_stm_seq (rtx *operands, int nops) +{ + int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS]; + int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; + rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS]; + rtx mems[MAX_LDM_STM_OPS]; + int base_reg; + rtx base_reg_rtx; + HOST_WIDE_INT offset; + int write_back = FALSE; + int stm_case; + rtx addr; + bool base_reg_dies; + int i, j; + HARD_REG_SET allocated; + + stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs, + mem_order, &base_reg, &offset, false); + + if (stm_case == 0) + return false; + + memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs); + + /* If the same register is used more than once, try to find a free + register. */ + CLEAR_HARD_REG_SET (allocated); + for (i = 0; i < nops; i++) + { + for (j = i + 1; j < nops; j++) + if (regs[i] == regs[j]) + { + rtx t = peep2_find_free_register (0, nops * 2, + TARGET_THUMB1 ? "l" : "r", + SImode, &allocated); + if (t == NULL_RTX) + return false; + reg_rtxs[i] = t; + regs[i] = REGNO (t); + } + } + + /* Compute an ordering that maps the register numbers to an ascending + sequence. */ + reg_order[0] = 0; + for (i = 0; i < nops; i++) + if (regs[i] < regs[reg_order[0]]) + reg_order[0] = i; + + for (i = 1; i < nops; i++) + { + int this_order = reg_order[i - 1]; + for (j = 0; j < nops; j++) + if (regs[j] > regs[reg_order[i - 1]] + && (this_order == reg_order[i - 1] + || regs[j] < regs[this_order])) + this_order = j; + reg_order[i] = this_order; + } + + /* Ensure that registers that must be live after the instruction end + up with the correct value. */ + for (i = 0; i < nops; i++) + { + int this_order = reg_order[i]; + if ((this_order != mem_order[i] + || orig_reg_rtxs[this_order] != reg_rtxs[this_order]) + && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order])) + return false; + } + + /* Load the constants. */ + for (i = 0; i < nops; i++) + { + rtx op = operands[2 * nops + mem_order[i]]; + sorted_regs[i] = regs[reg_order[i]]; + emit_move_insn (reg_rtxs[reg_order[i]], op); + } + + base_reg_rtx = gen_rtx_REG (Pmode, base_reg); + + base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx); + if (TARGET_THUMB1) + { + gcc_assert (base_reg_dies); + write_back = TRUE; + } + + if (stm_case == 5) + { + gcc_assert (base_reg_dies); + emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); + offset = 0; + } + + addr = plus_constant (base_reg_rtx, offset); + + for (i = 0; i < nops; i++) + { + addr = plus_constant (base_reg_rtx, offset + i * 4); + mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], + SImode, addr, 0); + } + emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx, + write_back ? offset + i * 4 : 0)); + return true; +} + +int +arm_gen_movmemqi (rtx *operands) +{ + HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes; + HOST_WIDE_INT srcoffset, dstoffset; + int i; + rtx src, dst, srcbase, dstbase; + rtx part_bytes_reg = NULL; + rtx mem; + + if (GET_CODE (operands[2]) != CONST_INT + || GET_CODE (operands[3]) != CONST_INT + || INTVAL (operands[2]) > 64 + || INTVAL (operands[3]) & 3) + return 0; + + dstbase = operands[0]; + srcbase = operands[1]; + + dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0)); + src = copy_to_mode_reg (SImode, XEXP (srcbase, 0)); + + in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2])); + out_words_to_go = INTVAL (operands[2]) / 4; + last_bytes = INTVAL (operands[2]) & 3; + dstoffset = srcoffset = 0; + + if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0) + part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); + + for (i = 0; in_words_to_go >= 2; i+=4) + { + if (in_words_to_go > 4) + emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src, + TRUE, srcbase, &srcoffset)); + else + emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go, + src, FALSE, srcbase, + &srcoffset)); + + if (out_words_to_go) + { + if (out_words_to_go > 4) + emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst, + TRUE, dstbase, &dstoffset)); + else if (out_words_to_go != 1) + emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, + out_words_to_go, dst, + (last_bytes == 0 + ? FALSE : TRUE), + dstbase, &dstoffset)); + else + { + mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); + emit_move_insn (mem, gen_rtx_REG (SImode, 0)); + if (last_bytes != 0) + { + emit_insn (gen_addsi3 (dst, dst, GEN_INT (4))); + dstoffset += 4; + } + } + } + + in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4; + out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4; + } + + /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */ + if (out_words_to_go) + { + rtx sreg; + + mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); + sreg = copy_to_reg (mem); + + mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); + emit_move_insn (mem, sreg); + in_words_to_go--; + + gcc_assert (!in_words_to_go); /* Sanity check */ + } + + if (in_words_to_go) + { + gcc_assert (in_words_to_go > 0); + + mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); + part_bytes_reg = copy_to_mode_reg (SImode, mem); + } + + gcc_assert (!last_bytes || part_bytes_reg); + + if (BYTES_BIG_ENDIAN && last_bytes) + { + rtx tmp = gen_reg_rtx (SImode); + + /* The bytes we want are in the top end of the word. */ + emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, + GEN_INT (8 * (4 - last_bytes)))); + part_bytes_reg = tmp; + + while (last_bytes) + { + mem = adjust_automodify_address (dstbase, QImode, + plus_constant (dst, last_bytes - 1), + dstoffset + last_bytes - 1); + emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); + + if (--last_bytes) + { + tmp = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8))); + part_bytes_reg = tmp; + } + } + + } + else + { + if (last_bytes > 1) + { + mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset); + emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg)); + last_bytes -= 2; + if (last_bytes) + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_addsi3 (dst, dst, const2_rtx)); + emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16))); + part_bytes_reg = tmp; + dstoffset += 2; + } + } + + if (last_bytes) + { + mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset); + emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); + } + } + + return 1; +} + +/* Select a dominance comparison mode if possible for a test of the general + form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. + COND_OR == DOM_CC_X_AND_Y => (X && Y) + COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y) + COND_OR == DOM_CC_X_OR_Y => (X || Y) + In all cases OP will be either EQ or NE, but we don't need to know which + here. If we are unable to support a dominance comparison we return + CC mode. This will then fail to match for the RTL expressions that + generate this call. */ +enum machine_mode +arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) +{ + enum rtx_code cond1, cond2; + int swapped = 0; + + /* Currently we will probably get the wrong result if the individual + comparisons are not simple. This also ensures that it is safe to + reverse a comparison if necessary. */ + if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1)) + != CCmode) + || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1)) + != CCmode)) + return CCmode; + + /* The if_then_else variant of this tests the second condition if the + first passes, but is true if the first fails. Reverse the first + condition to get a true "inclusive-or" expression. */ + if (cond_or == DOM_CC_NX_OR_Y) + cond1 = reverse_condition (cond1); + + /* If the comparisons are not equal, and one doesn't dominate the other, + then we can't do this. */ + if (cond1 != cond2 + && !comparison_dominates_p (cond1, cond2) + && (swapped = 1, !comparison_dominates_p (cond2, cond1))) + return CCmode; + + if (swapped) + { + enum rtx_code temp = cond1; + cond1 = cond2; + cond2 = temp; + } + + switch (cond1) + { + case EQ: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DEQmode; + + switch (cond2) + { + case EQ: return CC_DEQmode; + case LE: return CC_DLEmode; + case LEU: return CC_DLEUmode; + case GE: return CC_DGEmode; + case GEU: return CC_DGEUmode; + default: gcc_unreachable (); + } + + case LT: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DLTmode; + + switch (cond2) + { + case LT: + return CC_DLTmode; + case LE: + return CC_DLEmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + case GT: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DGTmode; + + switch (cond2) + { + case GT: + return CC_DGTmode; + case GE: + return CC_DGEmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + case LTU: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DLTUmode; + + switch (cond2) + { + case LTU: + return CC_DLTUmode; + case LEU: + return CC_DLEUmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + case GTU: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DGTUmode; + + switch (cond2) + { + case GTU: + return CC_DGTUmode; + case GEU: + return CC_DGEUmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + /* The remaining cases only occur when both comparisons are the + same. */ + case NE: + gcc_assert (cond1 == cond2); + return CC_DNEmode; + + case LE: + gcc_assert (cond1 == cond2); + return CC_DLEmode; + + case GE: + gcc_assert (cond1 == cond2); + return CC_DGEmode; + + case LEU: + gcc_assert (cond1 == cond2); + return CC_DLEUmode; + + case GEU: + gcc_assert (cond1 == cond2); + return CC_DGEUmode; + + default: + gcc_unreachable (); + } +} + +enum machine_mode +arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) +{ + /* All floating point compares return CCFP if it is an equality + comparison, and CCFPE otherwise. */ + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + switch (op) + { + case EQ: + case NE: + case UNORDERED: + case ORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case LTGT: + return CCFPmode; + + case LT: + case LE: + case GT: + case GE: + if (TARGET_HARD_FLOAT && TARGET_MAVERICK) + return CCFPmode; + return CCFPEmode; + + default: + gcc_unreachable (); + } + } + + /* A compare with a shifted operand. Because of canonicalization, the + comparison will have to be swapped when we emit the assembler. */ + if (GET_MODE (y) == SImode + && (REG_P (y) || (GET_CODE (y) == SUBREG)) + && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT + || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE + || GET_CODE (x) == ROTATERT)) + return CC_SWPmode; + + /* This operation is performed swapped, but since we only rely on the Z + flag we don't need an additional mode. */ + if (GET_MODE (y) == SImode + && (REG_P (y) || (GET_CODE (y) == SUBREG)) + && GET_CODE (x) == NEG + && (op == EQ || op == NE)) + return CC_Zmode; + + /* This is a special case that is used by combine to allow a + comparison of a shifted byte load to be split into a zero-extend + followed by a comparison of the shifted integer (only valid for + equalities and unsigned inequalities). */ + if (GET_MODE (x) == SImode + && GET_CODE (x) == ASHIFT + && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24 + && GET_CODE (XEXP (x, 0)) == SUBREG + && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM + && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode + && (op == EQ || op == NE + || op == GEU || op == GTU || op == LTU || op == LEU) + && GET_CODE (y) == CONST_INT) + return CC_Zmode; + + /* A construct for a conditional compare, if the false arm contains + 0, then both conditions must be true, otherwise either condition + must be true. Not all conditions are possible, so CCmode is + returned if it can't be done. */ + if (GET_CODE (x) == IF_THEN_ELSE + && (XEXP (x, 2) == const0_rtx + || XEXP (x, 2) == const1_rtx) + && COMPARISON_P (XEXP (x, 0)) + && COMPARISON_P (XEXP (x, 1))) + return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), + INTVAL (XEXP (x, 2))); + + /* Alternate canonicalizations of the above. These are somewhat cleaner. */ + if (GET_CODE (x) == AND + && (op == EQ || op == NE) + && COMPARISON_P (XEXP (x, 0)) + && COMPARISON_P (XEXP (x, 1))) + return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), + DOM_CC_X_AND_Y); + + if (GET_CODE (x) == IOR + && (op == EQ || op == NE) + && COMPARISON_P (XEXP (x, 0)) + && COMPARISON_P (XEXP (x, 1))) + return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), + DOM_CC_X_OR_Y); + + /* An operation (on Thumb) where we want to test for a single bit. + This is done by shifting that bit up into the top bit of a + scratch register; we can then branch on the sign bit. */ + if (TARGET_THUMB1 + && GET_MODE (x) == SImode + && (op == EQ || op == NE) + && GET_CODE (x) == ZERO_EXTRACT + && XEXP (x, 1) == const1_rtx) + return CC_Nmode; + + /* An operation that sets the condition codes as a side-effect, the + V flag is not set correctly, so we can only use comparisons where + this doesn't matter. (For LT and GE we can use "mi" and "pl" + instead.) */ + /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */ + if (GET_MODE (x) == SImode + && y == const0_rtx + && (op == EQ || op == NE || op == LT || op == GE) + && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS + || GET_CODE (x) == AND || GET_CODE (x) == IOR + || GET_CODE (x) == XOR || GET_CODE (x) == MULT + || GET_CODE (x) == NOT || GET_CODE (x) == NEG + || GET_CODE (x) == LSHIFTRT + || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT + || GET_CODE (x) == ROTATERT + || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT))) + return CC_NOOVmode; + + if (GET_MODE (x) == QImode && (op == EQ || op == NE)) + return CC_Zmode; + + if (GET_MODE (x) == SImode && (op == LTU || op == GEU) + && GET_CODE (x) == PLUS + && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) + return CC_Cmode; + + if (GET_MODE (x) == DImode || GET_MODE (y) == DImode) + { + /* To keep things simple, always use the Cirrus cfcmp64 if it is + available. */ + if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK) + return CCmode; + + switch (op) + { + case EQ: + case NE: + /* A DImode comparison against zero can be implemented by + or'ing the two halves together. */ + if (y == const0_rtx) + return CC_Zmode; + + /* We can do an equality test in three Thumb instructions. */ + if (!TARGET_ARM) + return CC_Zmode; + + /* FALLTHROUGH */ + + case LTU: + case LEU: + case GTU: + case GEU: + /* DImode unsigned comparisons can be implemented by cmp + + cmpeq without a scratch register. Not worth doing in + Thumb-2. */ + if (TARGET_ARM) + return CC_CZmode; + + /* FALLTHROUGH */ + + case LT: + case LE: + case GT: + case GE: + /* DImode signed and unsigned comparisons can be implemented + by cmp + sbcs with a scratch register, but that does not + set the Z flag - we must reverse GT/LE/GTU/LEU. */ + gcc_assert (op != EQ && op != NE); + return CC_NCVmode; + + default: + gcc_unreachable (); + } + } + + return CCmode; +} + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for register 0 in the proper mode. FP means this is a + floating point compare: I don't think that it is needed on the arm. */ +rtx +arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) +{ + enum machine_mode mode; + rtx cc_reg; + int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode; + + /* We might have X as a constant, Y as a register because of the predicates + used for cmpdi. If so, force X to a register here. */ + if (dimode_comparison && !REG_P (x)) + x = force_reg (DImode, x); + + mode = SELECT_CC_MODE (code, x, y); + cc_reg = gen_rtx_REG (mode, CC_REGNUM); + + if (dimode_comparison + && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) + && mode != CC_CZmode) + { + rtx clobber, set; + + /* To compare two non-zero values for equality, XOR them and + then compare against zero. Not used for ARM mode; there + CC_CZmode is cheaper. */ + if (mode == CC_Zmode && y != const0_rtx) + { + x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN); + y = const0_rtx; + } + /* A scratch register is required. */ + clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)); + set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + } + else + emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + + return cc_reg; +} + +/* Generate a sequence of insns that will generate the correct return + address mask depending on the physical architecture that the program + is running on. */ +rtx +arm_gen_return_addr_mask (void) +{ + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_return_addr_mask (reg)); + return reg; +} + +void +arm_reload_in_hi (rtx *operands) +{ + rtx ref = operands[1]; + rtx base, scratch; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (ref) == SUBREG) + { + offset = SUBREG_BYTE (ref); + ref = SUBREG_REG (ref); + } + + if (GET_CODE (ref) == REG) + { + /* We have a pseudo which has been spilt onto the stack; there + are two cases here: the first where there is a simple + stack-slot replacement and a second where the stack-slot is + out of range, or is used as a subreg. */ + if (reg_equiv_mem[REGNO (ref)]) + { + ref = reg_equiv_mem[REGNO (ref)]; + base = find_replacement (&XEXP (ref, 0)); + } + else + /* The slot is out of range, or was dressed up in a SUBREG. */ + base = reg_equiv_address[REGNO (ref)]; + } + else + base = find_replacement (&XEXP (ref, 0)); + + /* Handle the case where the address is too complex to be offset by 1. */ + if (GET_CODE (base) == MINUS + || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT)) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + emit_set_insn (base_plus, base); + base = base_plus; + } + else if (GET_CODE (base) == PLUS) + { + /* The addend must be CONST_INT, or we would have dealt with it above. */ + HOST_WIDE_INT hi, lo; + + offset += INTVAL (XEXP (base, 1)); + base = XEXP (base, 0); + + /* Rework the address into a legal sequence of insns. */ + /* Valid range for lo is -4095 -> 4095 */ + lo = (offset >= 0 + ? (offset & 0xfff) + : -((-offset) & 0xfff)); + + /* Corner case, if lo is the max offset then we would be out of range + once we have added the additional 1 below, so bump the msb into the + pre-loading insn(s). */ + if (lo == 4095) + lo &= 0x7ff; + + hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff) + ^ (HOST_WIDE_INT) 0x80000000) + - (HOST_WIDE_INT) 0x80000000); + + gcc_assert (hi + lo == offset); + + if (hi != 0) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + /* Get the base address; addsi3 knows how to handle constants + that require more than one insn. */ + emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi))); + base = base_plus; + offset = lo; + } + } + + /* Operands[2] may overlap operands[0] (though it won't overlap + operands[1]), that's why we asked for a DImode reg -- so we can + use the bit that does not overlap. */ + if (REGNO (operands[2]) == REGNO (operands[0])) + scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + else + scratch = gen_rtx_REG (SImode, REGNO (operands[2])); + + emit_insn (gen_zero_extendqisi2 (scratch, + gen_rtx_MEM (QImode, + plus_constant (base, + offset)))); + emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0), + gen_rtx_MEM (QImode, + plus_constant (base, + offset + 1)))); + if (!BYTES_BIG_ENDIAN) + emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0), + gen_rtx_IOR (SImode, + gen_rtx_ASHIFT + (SImode, + gen_rtx_SUBREG (SImode, operands[0], 0), + GEN_INT (8)), + scratch)); + else + emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0), + gen_rtx_IOR (SImode, + gen_rtx_ASHIFT (SImode, scratch, + GEN_INT (8)), + gen_rtx_SUBREG (SImode, operands[0], 0))); +} + +/* Handle storing a half-word to memory during reload by synthesizing as two + byte stores. Take care not to clobber the input values until after we + have moved them somewhere safe. This code assumes that if the DImode + scratch in operands[2] overlaps either the input value or output address + in some way, then that value must die in this insn (we absolutely need + two scratch registers for some corner cases). */ +void +arm_reload_out_hi (rtx *operands) +{ + rtx ref = operands[0]; + rtx outval = operands[1]; + rtx base, scratch; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (ref) == SUBREG) + { + offset = SUBREG_BYTE (ref); + ref = SUBREG_REG (ref); + } + + if (GET_CODE (ref) == REG) + { + /* We have a pseudo which has been spilt onto the stack; there + are two cases here: the first where there is a simple + stack-slot replacement and a second where the stack-slot is + out of range, or is used as a subreg. */ + if (reg_equiv_mem[REGNO (ref)]) + { + ref = reg_equiv_mem[REGNO (ref)]; + base = find_replacement (&XEXP (ref, 0)); + } + else + /* The slot is out of range, or was dressed up in a SUBREG. */ + base = reg_equiv_address[REGNO (ref)]; + } + else + base = find_replacement (&XEXP (ref, 0)); + + scratch = gen_rtx_REG (SImode, REGNO (operands[2])); + + /* Handle the case where the address is too complex to be offset by 1. */ + if (GET_CODE (base) == MINUS + || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT)) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + /* Be careful not to destroy OUTVAL. */ + if (reg_overlap_mentioned_p (base_plus, outval)) + { + /* Updating base_plus might destroy outval, see if we can + swap the scratch and base_plus. */ + if (!reg_overlap_mentioned_p (scratch, outval)) + { + rtx tmp = scratch; + scratch = base_plus; + base_plus = tmp; + } + else + { + rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); + + /* Be conservative and copy OUTVAL into the scratch now, + this should only be necessary if outval is a subreg + of something larger than a word. */ + /* XXX Might this clobber base? I can't see how it can, + since scratch is known to overlap with OUTVAL, and + must be wider than a word. */ + emit_insn (gen_movhi (scratch_hi, outval)); + outval = scratch_hi; + } + } + + emit_set_insn (base_plus, base); + base = base_plus; + } + else if (GET_CODE (base) == PLUS) + { + /* The addend must be CONST_INT, or we would have dealt with it above. */ + HOST_WIDE_INT hi, lo; + + offset += INTVAL (XEXP (base, 1)); + base = XEXP (base, 0); + + /* Rework the address into a legal sequence of insns. */ + /* Valid range for lo is -4095 -> 4095 */ + lo = (offset >= 0 + ? (offset & 0xfff) + : -((-offset) & 0xfff)); + + /* Corner case, if lo is the max offset then we would be out of range + once we have added the additional 1 below, so bump the msb into the + pre-loading insn(s). */ + if (lo == 4095) + lo &= 0x7ff; + + hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff) + ^ (HOST_WIDE_INT) 0x80000000) + - (HOST_WIDE_INT) 0x80000000); + + gcc_assert (hi + lo == offset); + + if (hi != 0) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + /* Be careful not to destroy OUTVAL. */ + if (reg_overlap_mentioned_p (base_plus, outval)) + { + /* Updating base_plus might destroy outval, see if we + can swap the scratch and base_plus. */ + if (!reg_overlap_mentioned_p (scratch, outval)) + { + rtx tmp = scratch; + scratch = base_plus; + base_plus = tmp; + } + else + { + rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); + + /* Be conservative and copy outval into scratch now, + this should only be necessary if outval is a + subreg of something larger than a word. */ + /* XXX Might this clobber base? I can't see how it + can, since scratch is known to overlap with + outval. */ + emit_insn (gen_movhi (scratch_hi, outval)); + outval = scratch_hi; + } + } + + /* Get the base address; addsi3 knows how to handle constants + that require more than one insn. */ + emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi))); + base = base_plus; + offset = lo; + } + } + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_movqi (gen_rtx_MEM (QImode, + plus_constant (base, offset + 1)), + gen_lowpart (QImode, outval))); + emit_insn (gen_lshrsi3 (scratch, + gen_rtx_SUBREG (SImode, outval, 0), + GEN_INT (8))); + emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)), + gen_lowpart (QImode, scratch))); + } + else + { + emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)), + gen_lowpart (QImode, outval))); + emit_insn (gen_lshrsi3 (scratch, + gen_rtx_SUBREG (SImode, outval, 0), + GEN_INT (8))); + emit_insn (gen_movqi (gen_rtx_MEM (QImode, + plus_constant (base, offset + 1)), + gen_lowpart (QImode, scratch))); + } +} + +/* Return true if a type must be passed in memory. For AAPCS, small aggregates + (padded to the size of a word) should be passed in a register. */ + +static bool +arm_must_pass_in_stack (enum machine_mode mode, const_tree type) +{ + if (TARGET_AAPCS_BASED) + return must_pass_in_stack_var_size (mode, type); + else + return must_pass_in_stack_var_size_or_pad (mode, type); +} + + +/* For use by FUNCTION_ARG_PADDING (MODE, TYPE). + Return true if an argument passed on the stack should be padded upwards, + i.e. if the least-significant byte has useful data. + For legacy APCS ABIs we use the default. For AAPCS based ABIs small + aggregate types are placed in the lowest memory address. */ + +bool +arm_pad_arg_upward (enum machine_mode mode, const_tree type) +{ + if (!TARGET_AAPCS_BASED) + return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward; + + if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type)) + return false; + + return true; +} + + +/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). + For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant + byte of the register has useful data, and return the opposite if the + most significant byte does. + For AAPCS, small aggregates and small complex types are always padded + upwards. */ + +bool +arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, + tree type, int first ATTRIBUTE_UNUSED) +{ + if (TARGET_AAPCS_BASED + && BYTES_BIG_ENDIAN + && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE) + && int_size_in_bytes (type) <= 4) + return true; + + /* Otherwise, use default padding. */ + return !BYTES_BIG_ENDIAN; +} + + +/* Print a symbolic form of X to the debug file, F. */ +static void +arm_print_value (FILE *f, rtx x) +{ + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x)); + return; + + case CONST_DOUBLE: + fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3)); + return; + + case CONST_VECTOR: + { + int i; + + fprintf (f, "<"); + for (i = 0; i < CONST_VECTOR_NUNITS (x); i++) + { + fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i))); + if (i < (CONST_VECTOR_NUNITS (x) - 1)) + fputc (',', f); + } + fprintf (f, ">"); + } + return; + + case CONST_STRING: + fprintf (f, "\"%s\"", XSTR (x, 0)); + return; + + case SYMBOL_REF: + fprintf (f, "`%s'", XSTR (x, 0)); + return; + + case LABEL_REF: + fprintf (f, "L%d", INSN_UID (XEXP (x, 0))); + return; + + case CONST: + arm_print_value (f, XEXP (x, 0)); + return; + + case PLUS: + arm_print_value (f, XEXP (x, 0)); + fprintf (f, "+"); + arm_print_value (f, XEXP (x, 1)); + return; + + case PC: + fprintf (f, "pc"); + return; + + default: + fprintf (f, "????"); + return; + } +} + +/* Routines for manipulation of the constant pool. */ + +/* Arm instructions cannot load a large constant directly into a + register; they have to come from a pc relative load. The constant + must therefore be placed in the addressable range of the pc + relative load. Depending on the precise pc relative load + instruction the range is somewhere between 256 bytes and 4k. This + means that we often have to dump a constant inside a function, and + generate code to branch around it. + + It is important to minimize this, since the branches will slow + things down and make the code larger. + + Normally we can hide the table after an existing unconditional + branch so that there is no interruption of the flow, but in the + worst case the code looks like this: + + ldr rn, L1 + ... + b L2 + align + L1: .long value + L2: + ... + + ldr rn, L3 + ... + b L4 + align + L3: .long value + L4: + ... + + We fix this by performing a scan after scheduling, which notices + which instructions need to have their operands fetched from the + constant table and builds the table. + + The algorithm starts by building a table of all the constants that + need fixing up and all the natural barriers in the function (places + where a constant table can be dropped without breaking the flow). + For each fixup we note how far the pc-relative replacement will be + able to reach and the offset of the instruction into the function. + + Having built the table we then group the fixes together to form + tables that are as large as possible (subject to addressing + constraints) and emit each table of constants after the last + barrier that is within range of all the instructions in the group. + If a group does not contain a barrier, then we forcibly create one + by inserting a jump instruction into the flow. Once the table has + been inserted, the insns are then modified to reference the + relevant entry in the pool. + + Possible enhancements to the algorithm (not implemented) are: + + 1) For some processors and object formats, there may be benefit in + aligning the pools to the start of cache lines; this alignment + would need to be taken into account when calculating addressability + of a pool. */ + +/* These typedefs are located at the start of this file, so that + they can be used in the prototypes there. This comment is to + remind readers of that fact so that the following structures + can be understood more easily. + + typedef struct minipool_node Mnode; + typedef struct minipool_fixup Mfix; */ + +struct minipool_node +{ + /* Doubly linked chain of entries. */ + Mnode * next; + Mnode * prev; + /* The maximum offset into the code that this entry can be placed. While + pushing fixes for forward references, all entries are sorted in order + of increasing max_address. */ + HOST_WIDE_INT max_address; + /* Similarly for an entry inserted for a backwards ref. */ + HOST_WIDE_INT min_address; + /* The number of fixes referencing this entry. This can become zero + if we "unpush" an entry. In this case we ignore the entry when we + come to emit the code. */ + int refcount; + /* The offset from the start of the minipool. */ + HOST_WIDE_INT offset; + /* The value in table. */ + rtx value; + /* The mode of value. */ + enum machine_mode mode; + /* The size of the value. With iWMMXt enabled + sizes > 4 also imply an alignment of 8-bytes. */ + int fix_size; +}; + +struct minipool_fixup +{ + Mfix * next; + rtx insn; + HOST_WIDE_INT address; + rtx * loc; + enum machine_mode mode; + int fix_size; + rtx value; + Mnode * minipool; + HOST_WIDE_INT forwards; + HOST_WIDE_INT backwards; +}; + +/* Fixes less than a word need padding out to a word boundary. */ +#define MINIPOOL_FIX_SIZE(mode) \ + (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4) + +static Mnode * minipool_vector_head; +static Mnode * minipool_vector_tail; +static rtx minipool_vector_label; +static int minipool_pad; + +/* The linked list of all minipool fixes required for this function. */ +Mfix * minipool_fix_head; +Mfix * minipool_fix_tail; +/* The fix entry for the current minipool, once it has been placed. */ +Mfix * minipool_barrier; + +/* Determines if INSN is the start of a jump table. Returns the end + of the TABLE or NULL_RTX. */ +static rtx +is_jump_table (rtx insn) +{ + rtx table; + + if (GET_CODE (insn) == JUMP_INSN + && JUMP_LABEL (insn) != NULL + && ((table = next_real_insn (JUMP_LABEL (insn))) + == next_real_insn (insn)) + && table != NULL + && GET_CODE (table) == JUMP_INSN + && (GET_CODE (PATTERN (table)) == ADDR_VEC + || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC)) + return table; + + return NULL_RTX; +} + +#ifndef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION 0 +#endif + +static HOST_WIDE_INT +get_jump_table_size (rtx insn) +{ + /* ADDR_VECs only take room if read-only data does into the text + section. */ + if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section) + { + rtx body = PATTERN (insn); + int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0; + HOST_WIDE_INT size; + HOST_WIDE_INT modesize; + + modesize = GET_MODE_SIZE (GET_MODE (body)); + size = modesize * XVECLEN (body, elt); + switch (modesize) + { + case 1: + /* Round up size of TBB table to a halfword boundary. */ + size = (size + 1) & ~(HOST_WIDE_INT)1; + break; + case 2: + /* No padding necessary for TBH. */ + break; + case 4: + /* Add two bytes for alignment on Thumb. */ + if (TARGET_THUMB) + size += 2; + break; + default: + gcc_unreachable (); + } + return size; + } + + return 0; +} + +/* Move a minipool fix MP from its current location to before MAX_MP. + If MAX_MP is NULL, then MP doesn't need moving, but the addressing + constraints may need updating. */ +static Mnode * +move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp, + HOST_WIDE_INT max_address) +{ + /* The code below assumes these are different. */ + gcc_assert (mp != max_mp); + + if (max_mp == NULL) + { + if (max_address < mp->max_address) + mp->max_address = max_address; + } + else + { + if (max_address > max_mp->max_address - mp->fix_size) + mp->max_address = max_mp->max_address - mp->fix_size; + else + mp->max_address = max_address; + + /* Unlink MP from its current position. Since max_mp is non-null, + mp->prev must be non-null. */ + mp->prev->next = mp->next; + if (mp->next != NULL) + mp->next->prev = mp->prev; + else + minipool_vector_tail = mp->prev; + + /* Re-insert it before MAX_MP. */ + mp->next = max_mp; + mp->prev = max_mp->prev; + max_mp->prev = mp; + + if (mp->prev != NULL) + mp->prev->next = mp; + else + minipool_vector_head = mp; + } + + /* Save the new entry. */ + max_mp = mp; + + /* Scan over the preceding entries and adjust their addresses as + required. */ + while (mp->prev != NULL + && mp->prev->max_address > mp->max_address - mp->prev->fix_size) + { + mp->prev->max_address = mp->max_address - mp->prev->fix_size; + mp = mp->prev; + } + + return max_mp; +} + +/* Add a constant to the minipool for a forward reference. Returns the + node added or NULL if the constant will not fit in this pool. */ +static Mnode * +add_minipool_forward_ref (Mfix *fix) +{ + /* If set, max_mp is the first pool_entry that has a lower + constraint than the one we are trying to add. */ + Mnode * max_mp = NULL; + HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad; + Mnode * mp; + + /* If the minipool starts before the end of FIX->INSN then this FIX + can not be placed into the current pool. Furthermore, adding the + new constant pool entry may cause the pool to start FIX_SIZE bytes + earlier. */ + if (minipool_vector_head && + (fix->address + get_attr_length (fix->insn) + >= minipool_vector_head->max_address - fix->fix_size)) + return NULL; + + /* Scan the pool to see if a constant with the same value has + already been added. While we are doing this, also note the + location where we must insert the constant if it doesn't already + exist. */ + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + { + if (GET_CODE (fix->value) == GET_CODE (mp->value) + && fix->mode == mp->mode + && (GET_CODE (fix->value) != CODE_LABEL + || (CODE_LABEL_NUMBER (fix->value) + == CODE_LABEL_NUMBER (mp->value))) + && rtx_equal_p (fix->value, mp->value)) + { + /* More than one fix references this entry. */ + mp->refcount++; + return move_minipool_fix_forward_ref (mp, max_mp, max_address); + } + + /* Note the insertion point if necessary. */ + if (max_mp == NULL + && mp->max_address > max_address) + max_mp = mp; + + /* If we are inserting an 8-bytes aligned quantity and + we have not already found an insertion point, then + make sure that all such 8-byte aligned quantities are + placed at the start of the pool. */ + if (ARM_DOUBLEWORD_ALIGN + && max_mp == NULL + && fix->fix_size >= 8 + && mp->fix_size < 8) + { + max_mp = mp; + max_address = mp->max_address; + } + } + + /* The value is not currently in the minipool, so we need to create + a new entry for it. If MAX_MP is NULL, the entry will be put on + the end of the list since the placement is less constrained than + any existing entry. Otherwise, we insert the new fix before + MAX_MP and, if necessary, adjust the constraints on the other + entries. */ + mp = XNEW (Mnode); + mp->fix_size = fix->fix_size; + mp->mode = fix->mode; + mp->value = fix->value; + mp->refcount = 1; + /* Not yet required for a backwards ref. */ + mp->min_address = -65536; + + if (max_mp == NULL) + { + mp->max_address = max_address; + mp->next = NULL; + mp->prev = minipool_vector_tail; + + if (mp->prev == NULL) + { + minipool_vector_head = mp; + minipool_vector_label = gen_label_rtx (); + } + else + mp->prev->next = mp; + + minipool_vector_tail = mp; + } + else + { + if (max_address > max_mp->max_address - mp->fix_size) + mp->max_address = max_mp->max_address - mp->fix_size; + else + mp->max_address = max_address; + + mp->next = max_mp; + mp->prev = max_mp->prev; + max_mp->prev = mp; + if (mp->prev != NULL) + mp->prev->next = mp; + else + minipool_vector_head = mp; + } + + /* Save the new entry. */ + max_mp = mp; + + /* Scan over the preceding entries and adjust their addresses as + required. */ + while (mp->prev != NULL + && mp->prev->max_address > mp->max_address - mp->prev->fix_size) + { + mp->prev->max_address = mp->max_address - mp->prev->fix_size; + mp = mp->prev; + } + + return max_mp; +} + +static Mnode * +move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp, + HOST_WIDE_INT min_address) +{ + HOST_WIDE_INT offset; + + /* The code below assumes these are different. */ + gcc_assert (mp != min_mp); + + if (min_mp == NULL) + { + if (min_address > mp->min_address) + mp->min_address = min_address; + } + else + { + /* We will adjust this below if it is too loose. */ + mp->min_address = min_address; + + /* Unlink MP from its current position. Since min_mp is non-null, + mp->next must be non-null. */ + mp->next->prev = mp->prev; + if (mp->prev != NULL) + mp->prev->next = mp->next; + else + minipool_vector_head = mp->next; + + /* Reinsert it after MIN_MP. */ + mp->prev = min_mp; + mp->next = min_mp->next; + min_mp->next = mp; + if (mp->next != NULL) + mp->next->prev = mp; + else + minipool_vector_tail = mp; + } + + min_mp = mp; + + offset = 0; + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + { + mp->offset = offset; + if (mp->refcount > 0) + offset += mp->fix_size; + + if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size) + mp->next->min_address = mp->min_address + mp->fix_size; + } + + return min_mp; +} + +/* Add a constant to the minipool for a backward reference. Returns the + node added or NULL if the constant will not fit in this pool. + + Note that the code for insertion for a backwards reference can be + somewhat confusing because the calculated offsets for each fix do + not take into account the size of the pool (which is still under + construction. */ +static Mnode * +add_minipool_backward_ref (Mfix *fix) +{ + /* If set, min_mp is the last pool_entry that has a lower constraint + than the one we are trying to add. */ + Mnode *min_mp = NULL; + /* This can be negative, since it is only a constraint. */ + HOST_WIDE_INT min_address = fix->address - fix->backwards; + Mnode *mp; + + /* If we can't reach the current pool from this insn, or if we can't + insert this entry at the end of the pool without pushing other + fixes out of range, then we don't try. This ensures that we + can't fail later on. */ + if (min_address >= minipool_barrier->address + || (minipool_vector_tail->min_address + fix->fix_size + >= minipool_barrier->address)) + return NULL; + + /* Scan the pool to see if a constant with the same value has + already been added. While we are doing this, also note the + location where we must insert the constant if it doesn't already + exist. */ + for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev) + { + if (GET_CODE (fix->value) == GET_CODE (mp->value) + && fix->mode == mp->mode + && (GET_CODE (fix->value) != CODE_LABEL + || (CODE_LABEL_NUMBER (fix->value) + == CODE_LABEL_NUMBER (mp->value))) + && rtx_equal_p (fix->value, mp->value) + /* Check that there is enough slack to move this entry to the + end of the table (this is conservative). */ + && (mp->max_address + > (minipool_barrier->address + + minipool_vector_tail->offset + + minipool_vector_tail->fix_size))) + { + mp->refcount++; + return move_minipool_fix_backward_ref (mp, min_mp, min_address); + } + + if (min_mp != NULL) + mp->min_address += fix->fix_size; + else + { + /* Note the insertion point if necessary. */ + if (mp->min_address < min_address) + { + /* For now, we do not allow the insertion of 8-byte alignment + requiring nodes anywhere but at the start of the pool. */ + if (ARM_DOUBLEWORD_ALIGN + && fix->fix_size >= 8 && mp->fix_size < 8) + return NULL; + else + min_mp = mp; + } + else if (mp->max_address + < minipool_barrier->address + mp->offset + fix->fix_size) + { + /* Inserting before this entry would push the fix beyond + its maximum address (which can happen if we have + re-located a forwards fix); force the new fix to come + after it. */ + if (ARM_DOUBLEWORD_ALIGN + && fix->fix_size >= 8 && mp->fix_size < 8) + return NULL; + else + { + min_mp = mp; + min_address = mp->min_address + fix->fix_size; + } + } + /* Do not insert a non-8-byte aligned quantity before 8-byte + aligned quantities. */ + else if (ARM_DOUBLEWORD_ALIGN + && fix->fix_size < 8 + && mp->fix_size >= 8) + { + min_mp = mp; + min_address = mp->min_address + fix->fix_size; + } + } + } + + /* We need to create a new entry. */ + mp = XNEW (Mnode); + mp->fix_size = fix->fix_size; + mp->mode = fix->mode; + mp->value = fix->value; + mp->refcount = 1; + mp->max_address = minipool_barrier->address + 65536; + + mp->min_address = min_address; + + if (min_mp == NULL) + { + mp->prev = NULL; + mp->next = minipool_vector_head; + + if (mp->next == NULL) + { + minipool_vector_tail = mp; + minipool_vector_label = gen_label_rtx (); + } + else + mp->next->prev = mp; + + minipool_vector_head = mp; + } + else + { + mp->next = min_mp->next; + mp->prev = min_mp; + min_mp->next = mp; + + if (mp->next != NULL) + mp->next->prev = mp; + else + minipool_vector_tail = mp; + } + + /* Save the new entry. */ + min_mp = mp; + + if (mp->prev) + mp = mp->prev; + else + mp->offset = 0; + + /* Scan over the following entries and adjust their offsets. */ + while (mp->next != NULL) + { + if (mp->next->min_address < mp->min_address + mp->fix_size) + mp->next->min_address = mp->min_address + mp->fix_size; + + if (mp->refcount) + mp->next->offset = mp->offset + mp->fix_size; + else + mp->next->offset = mp->offset; + + mp = mp->next; + } + + return min_mp; +} + +static void +assign_minipool_offsets (Mfix *barrier) +{ + HOST_WIDE_INT offset = 0; + Mnode *mp; + + minipool_barrier = barrier; + + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + { + mp->offset = offset; + + if (mp->refcount > 0) + offset += mp->fix_size; + } +} + +/* Output the literal table */ +static void +dump_minipool (rtx scan) +{ + Mnode * mp; + Mnode * nmp; + int align64 = 0; + + if (ARM_DOUBLEWORD_ALIGN) + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + if (mp->refcount > 0 && mp->fix_size >= 8) + { + align64 = 1; + break; + } + + if (dump_file) + fprintf (dump_file, + ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n", + INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4); + + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan); + scan = emit_label_after (minipool_vector_label, scan); + + for (mp = minipool_vector_head; mp != NULL; mp = nmp) + { + if (mp->refcount > 0) + { + if (dump_file) + { + fprintf (dump_file, + ";; Offset %u, min %ld, max %ld ", + (unsigned) mp->offset, (unsigned long) mp->min_address, + (unsigned long) mp->max_address); + arm_print_value (dump_file, mp->value); + fputc ('\n', dump_file); + } + + switch (mp->fix_size) + { +#ifdef HAVE_consttable_1 + case 1: + scan = emit_insn_after (gen_consttable_1 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_2 + case 2: + scan = emit_insn_after (gen_consttable_2 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_4 + case 4: + scan = emit_insn_after (gen_consttable_4 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_8 + case 8: + scan = emit_insn_after (gen_consttable_8 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_16 + case 16: + scan = emit_insn_after (gen_consttable_16 (mp->value), scan); + break; + +#endif + default: + gcc_unreachable (); + } + } + + nmp = mp->next; + free (mp); + } + + minipool_vector_head = minipool_vector_tail = NULL; + scan = emit_insn_after (gen_consttable_end (), scan); + scan = emit_barrier_after (scan); +} + +/* Return the cost of forcibly inserting a barrier after INSN. */ +static int +arm_barrier_cost (rtx insn) +{ + /* Basing the location of the pool on the loop depth is preferable, + but at the moment, the basic block information seems to be + corrupt by this stage of the compilation. */ + int base_cost = 50; + rtx next = next_nonnote_insn (insn); + + if (next != NULL && GET_CODE (next) == CODE_LABEL) + base_cost -= 20; + + switch (GET_CODE (insn)) + { + case CODE_LABEL: + /* It will always be better to place the table before the label, rather + than after it. */ + return 50; + + case INSN: + case CALL_INSN: + return base_cost; + + case JUMP_INSN: + return base_cost - 10; + + default: + return base_cost + 10; + } +} + +/* Find the best place in the insn stream in the range + (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier. + Create the barrier by inserting a jump and add a new fix entry for + it. */ +static Mfix * +create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address) +{ + HOST_WIDE_INT count = 0; + rtx barrier; + rtx from = fix->insn; + /* The instruction after which we will insert the jump. */ + rtx selected = NULL; + int selected_cost; + /* The address at which the jump instruction will be placed. */ + HOST_WIDE_INT selected_address; + Mfix * new_fix; + HOST_WIDE_INT max_count = max_address - fix->address; + rtx label = gen_label_rtx (); + + selected_cost = arm_barrier_cost (from); + selected_address = fix->address; + + while (from && count < max_count) + { + rtx tmp; + int new_cost; + + /* This code shouldn't have been called if there was a natural barrier + within range. */ + gcc_assert (GET_CODE (from) != BARRIER); + + /* Count the length of this insn. */ + count += get_attr_length (from); + + /* If there is a jump table, add its length. */ + tmp = is_jump_table (from); + if (tmp != NULL) + { + count += get_jump_table_size (tmp); + + /* Jump tables aren't in a basic block, so base the cost on + the dispatch insn. If we select this location, we will + still put the pool after the table. */ + new_cost = arm_barrier_cost (from); + + if (count < max_count + && (!selected || new_cost <= selected_cost)) + { + selected = tmp; + selected_cost = new_cost; + selected_address = fix->address + count; + } + + /* Continue after the dispatch table. */ + from = NEXT_INSN (tmp); + continue; + } + + new_cost = arm_barrier_cost (from); + + if (count < max_count + && (!selected || new_cost <= selected_cost)) + { + selected = from; + selected_cost = new_cost; + selected_address = fix->address + count; + } + + from = NEXT_INSN (from); + } + + /* Make sure that we found a place to insert the jump. */ + gcc_assert (selected); + + /* Create a new JUMP_INSN that branches around a barrier. */ + from = emit_jump_insn_after (gen_jump (label), selected); + JUMP_LABEL (from) = label; + barrier = emit_barrier_after (from); + emit_label_after (label, barrier); + + /* Create a minipool barrier entry for the new barrier. */ + new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix)); + new_fix->insn = barrier; + new_fix->address = selected_address; + new_fix->next = fix->next; + fix->next = new_fix; + + return new_fix; +} + +/* Record that there is a natural barrier in the insn stream at + ADDRESS. */ +static void +push_minipool_barrier (rtx insn, HOST_WIDE_INT address) +{ + Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); + + fix->insn = insn; + fix->address = address; + + fix->next = NULL; + if (minipool_fix_head != NULL) + minipool_fix_tail->next = fix; + else + minipool_fix_head = fix; + + minipool_fix_tail = fix; +} + +/* Record INSN, which will need fixing up to load a value from the + minipool. ADDRESS is the offset of the insn since the start of the + function; LOC is a pointer to the part of the insn which requires + fixing; VALUE is the constant that must be loaded, which is of type + MODE. */ +static void +push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, + enum machine_mode mode, rtx value) +{ + Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); + + fix->insn = insn; + fix->address = address; + fix->loc = loc; + fix->mode = mode; + fix->fix_size = MINIPOOL_FIX_SIZE (mode); + fix->value = value; + fix->forwards = get_attr_pool_range (insn); + fix->backwards = get_attr_neg_pool_range (insn); + fix->minipool = NULL; + + /* If an insn doesn't have a range defined for it, then it isn't + expecting to be reworked by this code. Better to stop now than + to generate duff assembly code. */ + gcc_assert (fix->forwards || fix->backwards); + + /* If an entry requires 8-byte alignment then assume all constant pools + require 4 bytes of padding. Trying to do this later on a per-pool + basis is awkward because existing pool entries have to be modified. */ + if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8) + minipool_pad = 4; + + if (dump_file) + { + fprintf (dump_file, + ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ", + GET_MODE_NAME (mode), + INSN_UID (insn), (unsigned long) address, + -1 * (long)fix->backwards, (long)fix->forwards); + arm_print_value (dump_file, fix->value); + fprintf (dump_file, "\n"); + } + + /* Add it to the chain of fixes. */ + fix->next = NULL; + + if (minipool_fix_head != NULL) + minipool_fix_tail->next = fix; + else + minipool_fix_head = fix; + + minipool_fix_tail = fix; +} + +/* Return the cost of synthesizing a 64-bit constant VAL inline. + Returns the number of insns needed, or 99 if we don't know how to + do it. */ +int +arm_const_double_inline_cost (rtx val) +{ + rtx lowpart, highpart; + enum machine_mode mode; + + mode = GET_MODE (val); + + if (mode == VOIDmode) + mode = DImode; + + gcc_assert (GET_MODE_SIZE (mode) == 8); + + lowpart = gen_lowpart (SImode, val); + highpart = gen_highpart_mode (SImode, mode, val); + + gcc_assert (GET_CODE (lowpart) == CONST_INT); + gcc_assert (GET_CODE (highpart) == CONST_INT); + + return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart), + NULL_RTX, NULL_RTX, 0, 0) + + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart), + NULL_RTX, NULL_RTX, 0, 0)); +} + +/* Return true if it is worthwhile to split a 64-bit constant into two + 32-bit operations. This is the case if optimizing for size, or + if we have load delay slots, or if one 32-bit part can be done with + a single data operation. */ +bool +arm_const_double_by_parts (rtx val) +{ + enum machine_mode mode = GET_MODE (val); + rtx part; + + if (optimize_size || arm_ld_sched) + return true; + + if (mode == VOIDmode) + mode = DImode; + + part = gen_highpart_mode (SImode, mode, val); + + gcc_assert (GET_CODE (part) == CONST_INT); + + if (const_ok_for_arm (INTVAL (part)) + || const_ok_for_arm (~INTVAL (part))) + return true; + + part = gen_lowpart (SImode, val); + + gcc_assert (GET_CODE (part) == CONST_INT); + + if (const_ok_for_arm (INTVAL (part)) + || const_ok_for_arm (~INTVAL (part))) + return true; + + return false; +} + +/* Return true if it is possible to inline both the high and low parts + of a 64-bit constant into 32-bit data processing instructions. */ +bool +arm_const_double_by_immediates (rtx val) +{ + enum machine_mode mode = GET_MODE (val); + rtx part; + + if (mode == VOIDmode) + mode = DImode; + + part = gen_highpart_mode (SImode, mode, val); + + gcc_assert (GET_CODE (part) == CONST_INT); + + if (!const_ok_for_arm (INTVAL (part))) + return false; + + part = gen_lowpart (SImode, val); + + gcc_assert (GET_CODE (part) == CONST_INT); + + if (!const_ok_for_arm (INTVAL (part))) + return false; + + return true; +} + +/* Scan INSN and note any of its operands that need fixing. + If DO_PUSHES is false we do not actually push any of the fixups + needed. The function returns TRUE if any fixups were needed/pushed. + This is used by arm_memory_load_p() which needs to know about loads + of constants that will be converted into minipool loads. */ +static bool +note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes) +{ + bool result = false; + int opno; + + extract_insn (insn); + + if (!constrain_operands (1)) + fatal_insn_not_found (insn); + + if (recog_data.n_alternatives == 0) + return false; + + /* Fill in recog_op_alt with information about the constraints of + this insn. */ + preprocess_constraints (); + + for (opno = 0; opno < recog_data.n_operands; opno++) + { + /* Things we need to fix can only occur in inputs. */ + if (recog_data.operand_type[opno] != OP_IN) + continue; + + /* If this alternative is a memory reference, then any mention + of constants in this alternative is really to fool reload + into allowing us to accept one there. We need to fix them up + now so that we output the right code. */ + if (recog_op_alt[opno][which_alternative].memory_ok) + { + rtx op = recog_data.operand[opno]; + + if (CONSTANT_P (op)) + { + if (do_pushes) + push_minipool_fix (insn, address, recog_data.operand_loc[opno], + recog_data.operand_mode[opno], op); + result = true; + } + else if (GET_CODE (op) == MEM + && GET_CODE (XEXP (op, 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0))) + { + if (do_pushes) + { + rtx cop = avoid_constant_pool_reference (op); + + /* Casting the address of something to a mode narrower + than a word can cause avoid_constant_pool_reference() + to return the pool reference itself. That's no good to + us here. Lets just hope that we can use the + constant pool value directly. */ + if (op == cop) + cop = get_pool_constant (XEXP (op, 0)); + + push_minipool_fix (insn, address, + recog_data.operand_loc[opno], + recog_data.operand_mode[opno], cop); + } + + result = true; + } + } + } + + return result; +} + +/* Convert instructions to their cc-clobbering variant if possible, since + that allows us to use smaller encodings. */ + +static void +thumb2_reorg (void) +{ + basic_block bb; + regset_head live; + + INIT_REG_SET (&live); + + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + df_analyze (); + + FOR_EACH_BB (bb) + { + rtx insn; + + COPY_REG_SET (&live, DF_LR_OUT (bb)); + df_simulate_initialize_backwards (bb, &live); + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (NONJUMP_INSN_P (insn) + && !REGNO_REG_SET_P (&live, CC_REGNUM)) + { + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == SET + && low_register_operand (XEXP (pat, 0), SImode) + && thumb_16bit_operator (XEXP (pat, 1), SImode) + && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode) + && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode)) + { + rtx dst = XEXP (pat, 0); + rtx src = XEXP (pat, 1); + rtx op0 = XEXP (src, 0); + rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH + ? XEXP (src, 1) : NULL); + + if (rtx_equal_p (dst, op0) + || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) + { + rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); + rtvec vec = gen_rtvec (2, pat, clobber); + + PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); + INSN_CODE (insn) = -1; + } + /* We can also handle a commutative operation where the + second operand matches the destination. */ + else if (op1 && rtx_equal_p (dst, op1)) + { + rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); + rtvec vec; + + src = copy_rtx (src); + XEXP (src, 0) = op1; + XEXP (src, 1) = op0; + pat = gen_rtx_SET (VOIDmode, dst, src); + vec = gen_rtvec (2, pat, clobber); + PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); + INSN_CODE (insn) = -1; + } + } + } + + if (NONDEBUG_INSN_P (insn)) + df_simulate_one_insn_backwards (bb, insn, &live); + } + } + + CLEAR_REG_SET (&live); +} + +/* Gcc puts the pool in the wrong place for ARM, since we can only + load addresses a limited distance around the pc. We do some + special munging to move the constant pool values to the correct + point in the code. */ +static void +arm_reorg (void) +{ + rtx insn; + HOST_WIDE_INT address = 0; + Mfix * fix; + + if (TARGET_THUMB2) + thumb2_reorg (); + + minipool_fix_head = minipool_fix_tail = NULL; + + /* The first insn must always be a note, or the code below won't + scan it properly. */ + insn = get_insns (); + gcc_assert (GET_CODE (insn) == NOTE); + minipool_pad = 0; + + /* Scan all the insns and record the operands that will need fixing. */ + for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn)) + { + if (TARGET_CIRRUS_FIX_INVALID_INSNS + && (arm_cirrus_insn_p (insn) + || GET_CODE (insn) == JUMP_INSN + || arm_memory_load_p (insn))) + cirrus_reorg (insn); + + if (GET_CODE (insn) == BARRIER) + push_minipool_barrier (insn, address); + else if (INSN_P (insn)) + { + rtx table; + + note_invalid_constants (insn, address, true); + address += get_attr_length (insn); + + /* If the insn is a vector jump, add the size of the table + and skip the table. */ + if ((table = is_jump_table (insn)) != NULL) + { + address += get_jump_table_size (table); + insn = table; + } + } + } + + fix = minipool_fix_head; + + /* Now scan the fixups and perform the required changes. */ + while (fix) + { + Mfix * ftmp; + Mfix * fdel; + Mfix * last_added_fix; + Mfix * last_barrier = NULL; + Mfix * this_fix; + + /* Skip any further barriers before the next fix. */ + while (fix && GET_CODE (fix->insn) == BARRIER) + fix = fix->next; + + /* No more fixes. */ + if (fix == NULL) + break; + + last_added_fix = NULL; + + for (ftmp = fix; ftmp; ftmp = ftmp->next) + { + if (GET_CODE (ftmp->insn) == BARRIER) + { + if (ftmp->address >= minipool_vector_head->max_address) + break; + + last_barrier = ftmp; + } + else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL) + break; + + last_added_fix = ftmp; /* Keep track of the last fix added. */ + } + + /* If we found a barrier, drop back to that; any fixes that we + could have reached but come after the barrier will now go in + the next mini-pool. */ + if (last_barrier != NULL) + { + /* Reduce the refcount for those fixes that won't go into this + pool after all. */ + for (fdel = last_barrier->next; + fdel && fdel != ftmp; + fdel = fdel->next) + { + fdel->minipool->refcount--; + fdel->minipool = NULL; + } + + ftmp = last_barrier; + } + else + { + /* ftmp is first fix that we can't fit into this pool and + there no natural barriers that we could use. Insert a + new barrier in the code somewhere between the previous + fix and this one, and arrange to jump around it. */ + HOST_WIDE_INT max_address; + + /* The last item on the list of fixes must be a barrier, so + we can never run off the end of the list of fixes without + last_barrier being set. */ + gcc_assert (ftmp); + + max_address = minipool_vector_head->max_address; + /* Check that there isn't another fix that is in range that + we couldn't fit into this pool because the pool was + already too large: we need to put the pool before such an + instruction. The pool itself may come just after the + fix because create_fix_barrier also allows space for a + jump instruction. */ + if (ftmp->address < max_address) + max_address = ftmp->address + 1; + + last_barrier = create_fix_barrier (last_added_fix, max_address); + } + + assign_minipool_offsets (last_barrier); + + while (ftmp) + { + if (GET_CODE (ftmp->insn) != BARRIER + && ((ftmp->minipool = add_minipool_backward_ref (ftmp)) + == NULL)) + break; + + ftmp = ftmp->next; + } + + /* Scan over the fixes we have identified for this pool, fixing them + up and adding the constants to the pool itself. */ + for (this_fix = fix; this_fix && ftmp != this_fix; + this_fix = this_fix->next) + if (GET_CODE (this_fix->insn) != BARRIER) + { + rtx addr + = plus_constant (gen_rtx_LABEL_REF (VOIDmode, + minipool_vector_label), + this_fix->minipool->offset); + *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr); + } + + dump_minipool (last_barrier->insn); + fix = ftmp; + } + + /* From now on we must synthesize any constants that we can't handle + directly. This can happen if the RTL gets split during final + instruction generation. */ + after_arm_reorg = 1; + + /* Free the minipool memory. */ + obstack_free (&minipool_obstack, minipool_startobj); +} + +/* Routines to output assembly language. */ + +/* If the rtx is the correct value then return the string of the number. + In this way we can ensure that valid double constants are generated even + when cross compiling. */ +const char * +fp_immediate_constant (rtx x) +{ + REAL_VALUE_TYPE r; + int i; + + if (!fp_consts_inited) + init_fp_table (); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + for (i = 0; i < 8; i++) + if (REAL_VALUES_EQUAL (r, values_fp[i])) + return strings_fp[i]; + + gcc_unreachable (); +} + +/* As for fp_immediate_constant, but value is passed directly, not in rtx. */ +static const char * +fp_const_from_val (REAL_VALUE_TYPE *r) +{ + int i; + + if (!fp_consts_inited) + init_fp_table (); + + for (i = 0; i < 8; i++) + if (REAL_VALUES_EQUAL (*r, values_fp[i])) + return strings_fp[i]; + + gcc_unreachable (); +} + +/* Output the operands of a LDM/STM instruction to STREAM. + MASK is the ARM register set mask of which only bits 0-15 are important. + REG is the base register, either the frame pointer or the stack pointer, + INSTR is the possibly suffixed load or store instruction. + RFE is nonzero if the instruction should also copy spsr to cpsr. */ + +static void +print_multi_reg (FILE *stream, const char *instr, unsigned reg, + unsigned long mask, int rfe) +{ + unsigned i; + bool not_first = FALSE; + + gcc_assert (!rfe || (mask & (1 << PC_REGNUM))); + fputc ('\t', stream); + asm_fprintf (stream, instr, reg); + fputc ('{', stream); + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (mask & (1 << i)) + { + if (not_first) + fprintf (stream, ", "); + + asm_fprintf (stream, "%r", i); + not_first = TRUE; + } + + if (rfe) + fprintf (stream, "}^\n"); + else + fprintf (stream, "}\n"); +} + + +/* Output a FLDMD instruction to STREAM. + BASE if the register containing the address. + REG and COUNT specify the register range. + Extra registers may be added to avoid hardware bugs. + + We output FLDMD even for ARMv5 VFP implementations. Although + FLDMD is technically not supported until ARMv6, it is believed + that all VFP implementations support its use in this context. */ + +static void +vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count) +{ + int i; + + /* Workaround ARM10 VFPr1 bug. */ + if (count == 2 && !arm_arch6) + { + if (reg == 15) + reg--; + count++; + } + + /* FLDMD may not load more than 16 doubleword registers at a time. Split the + load into multiple parts if we have to handle more than 16 registers. */ + if (count > 16) + { + vfp_output_fldmd (stream, base, reg, 16); + vfp_output_fldmd (stream, base, reg + 16, count - 16); + return; + } + + fputc ('\t', stream); + asm_fprintf (stream, "fldmfdd\t%r!, {", base); + + for (i = reg; i < reg + count; i++) + { + if (i > reg) + fputs (", ", stream); + asm_fprintf (stream, "d%d", i); + } + fputs ("}\n", stream); + +} + + +/* Output the assembly for a store multiple. */ + +const char * +vfp_output_fstmd (rtx * operands) +{ + char pattern[100]; + int p; + int base; + int i; + + strcpy (pattern, "fstmfdd\t%m0!, {%P1"); + p = strlen (pattern); + + gcc_assert (GET_CODE (operands[1]) == REG); + + base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2; + for (i = 1; i < XVECLEN (operands[2], 0); i++) + { + p += sprintf (&pattern[p], ", d%d", base + i); + } + strcpy (&pattern[p], "}"); + + output_asm_insn (pattern, operands); + return ""; +} + + +/* Emit RTL to save block of VFP register pairs to the stack. Returns the + number of bytes pushed. */ + +static int +vfp_emit_fstmd (int base_reg, int count) +{ + rtx par; + rtx dwarf; + rtx tmp, reg; + int i; + + /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two + register pairs are stored by a store multiple insn. We avoid this + by pushing an extra pair. */ + if (count == 2 && !arm_arch6) + { + if (base_reg == LAST_VFP_REGNUM - 3) + base_reg -= 2; + count++; + } + + /* FSTMD may not store more than 16 doubleword registers at once. Split + larger stores into multiple parts (up to a maximum of two, in + practice). */ + if (count > 16) + { + int saved; + /* NOTE: base_reg is an internal register number, so each D register + counts as 2. */ + saved = vfp_emit_fstmd (base_reg + 32, count - 16); + saved += vfp_emit_fstmd (base_reg, 16); + return saved; + } + + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); + + reg = gen_rtx_REG (DFmode, base_reg); + base_reg += 2; + + XVECEXP (par, 0, 0) + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (BLKmode, + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + plus_constant + (stack_pointer_rtx, + - (count * 8))) + ), + gen_rtx_UNSPEC (BLKmode, + gen_rtvec (1, reg), + UNSPEC_PUSH_MULT)); + + tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -(count * 8))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (DFmode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 1) = tmp; + + for (i = 1; i < count; i++) + { + reg = gen_rtx_REG (DFmode, base_reg); + base_reg += 2; + XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg); + + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (DFmode, + plus_constant (stack_pointer_rtx, + i * 8)), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, i + 1) = tmp; + } + + par = emit_insn (par); + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (par) = 1; + + return count * 8; +} + +/* Emit a call instruction with pattern PAT. ADDR is the address of + the call target. */ + +void +arm_emit_call_insn (rtx pat, rtx addr) +{ + rtx insn; + + insn = emit_call_insn (pat); + + /* The PIC register is live on entry to VxWorks PIC PLT entries. + If the call might use such an entry, add a use of the PIC register + to the instruction's CALL_INSN_FUNCTION_USAGE. */ + if (TARGET_VXWORKS_RTP + && flag_pic + && GET_CODE (addr) == SYMBOL_REF + && (SYMBOL_REF_DECL (addr) + ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) + : !SYMBOL_REF_LOCAL_P (addr))) + { + require_pic_register (); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg); + } +} + +/* Output a 'call' insn. */ +const char * +output_call (rtx *operands) +{ + gcc_assert (!arm_arch5); /* Patterns should call blx directly. */ + + /* Handle calls to lr using ip (which may be clobbered in subr anyway). */ + if (REGNO (operands[0]) == LR_REGNUM) + { + operands[0] = gen_rtx_REG (SImode, IP_REGNUM); + output_asm_insn ("mov%?\t%0, %|lr", operands); + } + + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + + if (TARGET_INTERWORK || arm_arch4t) + output_asm_insn ("bx%?\t%0", operands); + else + output_asm_insn ("mov%?\t%|pc, %0", operands); + + return ""; +} + +/* Output a 'call' insn that is a reference in memory. This is + disabled for ARMv5 and we prefer a blx instead because otherwise + there's a significant performance overhead. */ +const char * +output_call_mem (rtx *operands) +{ + gcc_assert (!arm_arch5); + if (TARGET_INTERWORK) + { + output_asm_insn ("ldr%?\t%|ip, %0", operands); + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + output_asm_insn ("bx%?\t%|ip", operands); + } + else if (regno_use_in (LR_REGNUM, operands[0])) + { + /* LR is used in the memory address. We load the address in the + first instruction. It's safe to use IP as the target of the + load since the call will kill it anyway. */ + output_asm_insn ("ldr%?\t%|ip, %0", operands); + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + if (arm_arch4t) + output_asm_insn ("bx%?\t%|ip", operands); + else + output_asm_insn ("mov%?\t%|pc, %|ip", operands); + } + else + { + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + output_asm_insn ("ldr%?\t%|pc, %0", operands); + } + + return ""; +} + + +/* Output a move from arm registers to an fpa registers. + OPERANDS[0] is an fpa register. + OPERANDS[1] is the first registers of an arm register pair. */ +const char * +output_mov_long_double_fpa_from_arm (rtx *operands) +{ + int arm_reg0 = REGNO (operands[1]); + rtx ops[3]; + + gcc_assert (arm_reg0 != IP_REGNUM); + + ops[0] = gen_rtx_REG (SImode, arm_reg0); + ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); + ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0); + + output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops); + output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands); + + return ""; +} + +/* Output a move from an fpa register to arm registers. + OPERANDS[0] is the first registers of an arm register pair. + OPERANDS[1] is an fpa register. */ +const char * +output_mov_long_double_arm_from_fpa (rtx *operands) +{ + int arm_reg0 = REGNO (operands[0]); + rtx ops[3]; + + gcc_assert (arm_reg0 != IP_REGNUM); + + ops[0] = gen_rtx_REG (SImode, arm_reg0); + ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); + ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0); + + output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands); + output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops); + return ""; +} + +/* Output a move from arm registers to arm registers of a long double + OPERANDS[0] is the destination. + OPERANDS[1] is the source. */ +const char * +output_mov_long_double_arm_from_arm (rtx *operands) +{ + /* We have to be careful here because the two might overlap. */ + int dest_start = REGNO (operands[0]); + int src_start = REGNO (operands[1]); + rtx ops[2]; + int i; + + if (dest_start < src_start) + { + for (i = 0; i < 3; i++) + { + ops[0] = gen_rtx_REG (SImode, dest_start + i); + ops[1] = gen_rtx_REG (SImode, src_start + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + else + { + for (i = 2; i >= 0; i--) + { + ops[0] = gen_rtx_REG (SImode, dest_start + i); + ops[1] = gen_rtx_REG (SImode, src_start + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + + return ""; +} + +void +arm_emit_movpair (rtx dest, rtx src) + { + /* If the src is an immediate, simplify it. */ + if (CONST_INT_P (src)) + { + HOST_WIDE_INT val = INTVAL (src); + emit_set_insn (dest, GEN_INT (val & 0x0000ffff)); + if ((val >> 16) & 0x0000ffff) + emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16), + GEN_INT (16)), + GEN_INT ((val >> 16) & 0x0000ffff)); + return; + } + emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); + emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); + } + +/* Output a move from arm registers to an fpa registers. + OPERANDS[0] is an fpa register. + OPERANDS[1] is the first registers of an arm register pair. */ +const char * +output_mov_double_fpa_from_arm (rtx *operands) +{ + int arm_reg0 = REGNO (operands[1]); + rtx ops[2]; + + gcc_assert (arm_reg0 != IP_REGNUM); + + ops[0] = gen_rtx_REG (SImode, arm_reg0); + ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); + output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops); + output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands); + return ""; +} + +/* Output a move from an fpa register to arm registers. + OPERANDS[0] is the first registers of an arm register pair. + OPERANDS[1] is an fpa register. */ +const char * +output_mov_double_arm_from_fpa (rtx *operands) +{ + int arm_reg0 = REGNO (operands[0]); + rtx ops[2]; + + gcc_assert (arm_reg0 != IP_REGNUM); + + ops[0] = gen_rtx_REG (SImode, arm_reg0); + ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0); + output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands); + output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops); + return ""; +} + +/* Output a move between double words. It must be REG<-MEM + or MEM<-REG. */ +const char * +output_move_double (rtx *operands) +{ + enum rtx_code code0 = GET_CODE (operands[0]); + enum rtx_code code1 = GET_CODE (operands[1]); + rtx otherops[3]; + + if (code0 == REG) + { + unsigned int reg0 = REGNO (operands[0]); + + otherops[0] = gen_rtx_REG (SImode, 1 + reg0); + + gcc_assert (code1 == MEM); /* Constraints should ensure this. */ + + switch (GET_CODE (XEXP (operands[1], 0))) + { + case REG: + if (TARGET_LDRD + && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0)))) + output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands); + else + output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); + break; + + case PRE_INC: + gcc_assert (TARGET_LDRD); + output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands); + break; + + case PRE_DEC: + if (TARGET_LDRD) + output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands); + else + output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands); + break; + + case POST_INC: + if (TARGET_LDRD) + output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands); + else + output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands); + break; + + case POST_DEC: + gcc_assert (TARGET_LDRD); + output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands); + break; + + case PRE_MODIFY: + case POST_MODIFY: + /* Autoicrement addressing modes should never have overlapping + base and destination registers, and overlapping index registers + are already prohibited, so this doesn't need to worry about + fix_cm3_ldrd. */ + otherops[0] = operands[0]; + otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0); + otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1); + + if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY) + { + if (reg_overlap_mentioned_p (otherops[0], otherops[2])) + { + /* Registers overlap so split out the increment. */ + output_asm_insn ("add%?\t%1, %1, %2", otherops); + output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops); + } + else + { + /* Use a single insn if we can. + FIXME: IWMMXT allows offsets larger than ldrd can + handle, fix these up with a pair of ldr. */ + if (TARGET_THUMB2 + || GET_CODE (otherops[2]) != CONST_INT + || (INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256)) + output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops); + else + { + output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops); + output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); + } + } + } + else + { + /* Use a single insn if we can. + FIXME: IWMMXT allows offsets larger than ldrd can handle, + fix these up with a pair of ldr. */ + if (TARGET_THUMB2 + || GET_CODE (otherops[2]) != CONST_INT + || (INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256)) + output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops); + else + { + output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); + output_asm_insn ("ldr%?\t%0, [%1], %2", otherops); + } + } + break; + + case LABEL_REF: + case CONST: + /* We might be able to use ldrd %0, %1 here. However the range is + different to ldr/adr, and it is broken on some ARMv7-M + implementations. */ + /* Use the second register of the pair to avoid problematic + overlap. */ + otherops[1] = operands[1]; + output_asm_insn ("adr%?\t%0, %1", otherops); + operands[1] = otherops[0]; + if (TARGET_LDRD) + output_asm_insn ("ldr%(d%)\t%0, [%1]", operands); + else + output_asm_insn ("ldm%(ia%)\t%1, %M0", operands); + break; + + /* ??? This needs checking for thumb2. */ + default: + if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1), + GET_MODE (XEXP (XEXP (operands[1], 0), 1)))) + { + otherops[0] = operands[0]; + otherops[1] = XEXP (XEXP (operands[1], 0), 0); + otherops[2] = XEXP (XEXP (operands[1], 0), 1); + + if (GET_CODE (XEXP (operands[1], 0)) == PLUS) + { + if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD) + { + switch ((int) INTVAL (otherops[2])) + { + case -8: + output_asm_insn ("ldm%(db%)\t%1, %M0", otherops); + return ""; + case -4: + if (TARGET_THUMB2) + break; + output_asm_insn ("ldm%(da%)\t%1, %M0", otherops); + return ""; + case 4: + if (TARGET_THUMB2) + break; + output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops); + return ""; + } + } + otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1); + operands[1] = otherops[0]; + if (TARGET_LDRD + && (GET_CODE (otherops[2]) == REG + || TARGET_THUMB2 + || (GET_CODE (otherops[2]) == CONST_INT + && INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256))) + { + if (reg_overlap_mentioned_p (operands[0], + otherops[2])) + { + rtx tmp; + /* Swap base and index registers over to + avoid a conflict. */ + tmp = otherops[1]; + otherops[1] = otherops[2]; + otherops[2] = tmp; + } + /* If both registers conflict, it will usually + have been fixed by a splitter. */ + if (reg_overlap_mentioned_p (operands[0], otherops[2]) + || (fix_cm3_ldrd && reg0 == REGNO (otherops[1]))) + { + output_asm_insn ("add%?\t%0, %1, %2", otherops); + output_asm_insn ("ldr%(d%)\t%0, [%1]", operands); + } + else + { + otherops[0] = operands[0]; + output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops); + } + return ""; + } + + if (GET_CODE (otherops[2]) == CONST_INT) + { + if (!(const_ok_for_arm (INTVAL (otherops[2])))) + output_asm_insn ("sub%?\t%0, %1, #%n2", otherops); + else + output_asm_insn ("add%?\t%0, %1, %2", otherops); + } + else + output_asm_insn ("add%?\t%0, %1, %2", otherops); + } + else + output_asm_insn ("sub%?\t%0, %1, %2", otherops); + + if (TARGET_LDRD) + return "ldr%(d%)\t%0, [%1]"; + + return "ldm%(ia%)\t%1, %M0"; + } + else + { + otherops[1] = adjust_address (operands[1], SImode, 4); + /* Take care of overlapping base/data reg. */ + if (reg_mentioned_p (operands[0], operands[1])) + { + output_asm_insn ("ldr%?\t%0, %1", otherops); + output_asm_insn ("ldr%?\t%0, %1", operands); + } + else + { + output_asm_insn ("ldr%?\t%0, %1", operands); + output_asm_insn ("ldr%?\t%0, %1", otherops); + } + } + } + } + else + { + /* Constraints should ensure this. */ + gcc_assert (code0 == MEM && code1 == REG); + gcc_assert (REGNO (operands[1]) != IP_REGNUM); + + switch (GET_CODE (XEXP (operands[0], 0))) + { + case REG: + if (TARGET_LDRD) + output_asm_insn ("str%(d%)\t%1, [%m0]", operands); + else + output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); + break; + + case PRE_INC: + gcc_assert (TARGET_LDRD); + output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands); + break; + + case PRE_DEC: + if (TARGET_LDRD) + output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands); + else + output_asm_insn ("stm%(db%)\t%m0!, %M1", operands); + break; + + case POST_INC: + if (TARGET_LDRD) + output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands); + else + output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands); + break; + + case POST_DEC: + gcc_assert (TARGET_LDRD); + output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands); + break; + + case PRE_MODIFY: + case POST_MODIFY: + otherops[0] = operands[1]; + otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0); + otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1); + + /* IWMMXT allows offsets larger than ldrd can handle, + fix these up with a pair of ldr. */ + if (!TARGET_THUMB2 + && GET_CODE (otherops[2]) == CONST_INT + && (INTVAL(otherops[2]) <= -256 + || INTVAL(otherops[2]) >= 256)) + { + if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) + { + output_asm_insn ("str%?\t%0, [%1, %2]!", otherops); + output_asm_insn ("str%?\t%H0, [%1, #4]", otherops); + } + else + { + output_asm_insn ("str%?\t%H0, [%1, #4]", otherops); + output_asm_insn ("str%?\t%0, [%1], %2", otherops); + } + } + else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) + output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops); + else + output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops); + break; + + case PLUS: + otherops[2] = XEXP (XEXP (operands[0], 0), 1); + if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD) + { + switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1))) + { + case -8: + output_asm_insn ("stm%(db%)\t%m0, %M1", operands); + return ""; + + case -4: + if (TARGET_THUMB2) + break; + output_asm_insn ("stm%(da%)\t%m0, %M1", operands); + return ""; + + case 4: + if (TARGET_THUMB2) + break; + output_asm_insn ("stm%(ib%)\t%m0, %M1", operands); + return ""; + } + } + if (TARGET_LDRD + && (GET_CODE (otherops[2]) == REG + || TARGET_THUMB2 + || (GET_CODE (otherops[2]) == CONST_INT + && INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256))) + { + otherops[0] = operands[1]; + otherops[1] = XEXP (XEXP (operands[0], 0), 0); + output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops); + return ""; + } + /* Fall through */ + + default: + otherops[0] = adjust_address (operands[0], SImode, 4); + otherops[1] = operands[1]; + output_asm_insn ("str%?\t%1, %0", operands); + output_asm_insn ("str%?\t%H1, %0", otherops); + } + } + + return ""; +} + +/* Output a move, load or store for quad-word vectors in ARM registers. Only + handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */ + +const char * +output_move_quad (rtx *operands) +{ + if (REG_P (operands[0])) + { + /* Load, or reg->reg move. */ + + if (MEM_P (operands[1])) + { + switch (GET_CODE (XEXP (operands[1], 0))) + { + case REG: + output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); + break; + + case LABEL_REF: + case CONST: + output_asm_insn ("adr%?\t%0, %1", operands); + output_asm_insn ("ldm%(ia%)\t%0, %M0", operands); + break; + + default: + gcc_unreachable (); + } + } + else + { + rtx ops[2]; + int dest, src, i; + + gcc_assert (REG_P (operands[1])); + + dest = REGNO (operands[0]); + src = REGNO (operands[1]); + + /* This seems pretty dumb, but hopefully GCC won't try to do it + very often. */ + if (dest < src) + for (i = 0; i < 4; i++) + { + ops[0] = gen_rtx_REG (SImode, dest + i); + ops[1] = gen_rtx_REG (SImode, src + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + else + for (i = 3; i >= 0; i--) + { + ops[0] = gen_rtx_REG (SImode, dest + i); + ops[1] = gen_rtx_REG (SImode, src + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + } + else + { + gcc_assert (MEM_P (operands[0])); + gcc_assert (REG_P (operands[1])); + gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0])); + + switch (GET_CODE (XEXP (operands[0], 0))) + { + case REG: + output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); + break; + + default: + gcc_unreachable (); + } + } + + return ""; +} + +/* Output a VFP load or store instruction. */ + +const char * +output_move_vfp (rtx *operands) +{ + rtx reg, mem, addr, ops[2]; + int load = REG_P (operands[0]); + int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; + const char *templ; + char buff[50]; + enum machine_mode mode; + + reg = operands[!load]; + mem = operands[load]; + + mode = GET_MODE (reg); + + gcc_assert (REG_P (reg)); + gcc_assert (IS_VFP_REGNUM (REGNO (reg))); + gcc_assert (mode == SFmode + || mode == DFmode + || mode == SImode + || mode == DImode + || (TARGET_NEON && VALID_NEON_DREG_MODE (mode))); + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + switch (GET_CODE (addr)) + { + case PRE_DEC: + templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + case POST_INC: + templ = "f%smia%c%%?\t%%0!, {%%%s1}%s"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + default: + templ = "f%s%c%%?\t%%%s0, %%1%s"; + ops[0] = reg; + ops[1] = mem; + break; + } + + sprintf (buff, templ, + load ? "ld" : "st", + dp ? 'd' : 's', + dp ? "P" : "", + integer_p ? "\t%@ int" : ""); + output_asm_insn (buff, ops); + + return ""; +} + +/* Output a Neon quad-word load or store, or a load or store for + larger structure modes. + + WARNING: The ordering of elements is weird in big-endian mode, + because we use VSTM, as required by the EABI. GCC RTL defines + element ordering based on in-memory order. This can be differ + from the architectural ordering of elements within a NEON register. + The intrinsics defined in arm_neon.h use the NEON register element + ordering, not the GCC RTL element ordering. + + For example, the in-memory ordering of a big-endian a quadword + vector with 16-bit elements when stored from register pair {d0,d1} + will be (lowest address first, d0[N] is NEON register element N): + + [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]] + + When necessary, quadword registers (dN, dN+1) are moved to ARM + registers from rN in the order: + + dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2) + + So that STM/LDM can be used on vectors in ARM registers, and the + same memory layout will result as if VSTM/VLDM were used. */ + +const char * +output_move_neon (rtx *operands) +{ + rtx reg, mem, addr, ops[2]; + int regno, load = REG_P (operands[0]); + const char *templ; + char buff[50]; + enum machine_mode mode; + + reg = operands[!load]; + mem = operands[load]; + + mode = GET_MODE (reg); + + gcc_assert (REG_P (reg)); + regno = REGNO (reg); + gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno) + || NEON_REGNO_OK_FOR_QUAD (regno)); + gcc_assert (VALID_NEON_DREG_MODE (mode) + || VALID_NEON_QREG_MODE (mode) + || VALID_NEON_STRUCT_MODE (mode)); + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (plus (...))). */ + if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) + addr = XEXP (addr, 0); + + switch (GET_CODE (addr)) + { + case POST_INC: + templ = "v%smia%%?\t%%0!, %%h1"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + case PRE_DEC: + /* FIXME: We should be using vld1/vst1 here in BE mode? */ + templ = "v%smdb%%?\t%%0!, %%h1"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + case POST_MODIFY: + /* FIXME: Not currently enabled in neon_vector_mem_operand. */ + gcc_unreachable (); + + case LABEL_REF: + case PLUS: + { + int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; + int i; + int overlap = -1; + for (i = 0; i < nregs; i++) + { + /* We're only using DImode here because it's a convenient size. */ + ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); + ops[1] = adjust_address (mem, DImode, 8 * i); + if (reg_overlap_mentioned_p (ops[0], mem)) + { + gcc_assert (overlap == -1); + overlap = i; + } + else + { + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, ops); + } + } + if (overlap != -1) + { + ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap); + ops[1] = adjust_address (mem, SImode, 8 * overlap); + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, ops); + } + + return ""; + } + + default: + templ = "v%smia%%?\t%%m0, %%h1"; + ops[0] = mem; + ops[1] = reg; + } + + sprintf (buff, templ, load ? "ld" : "st"); + output_asm_insn (buff, ops); + + return ""; +} + +/* Compute and return the length of neon_mov, where is + one of VSTRUCT modes: EI, OI, CI or XI. */ +int +arm_attr_length_move_neon (rtx insn) +{ + rtx reg, mem, addr; + int load; + enum machine_mode mode; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) + { + mode = GET_MODE (recog_data.operand[0]); + switch (mode) + { + case EImode: + case OImode: + return 8; + case CImode: + return 12; + case XImode: + return 16; + default: + gcc_unreachable (); + } + } + + load = REG_P (recog_data.operand[0]); + reg = recog_data.operand[!load]; + mem = recog_data.operand[load]; + + gcc_assert (MEM_P (mem)); + + mode = GET_MODE (reg); + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (plus (...))). */ + if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS) + { + int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; + return insns * 4; + } + else + return 4; +} + +/* Return nonzero if the offset in the address is an immediate. Otherwise, + return zero. */ + +int +arm_address_offset_is_imm (rtx insn) +{ + rtx mem, addr; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0])) + return 0; + + mem = recog_data.operand[0]; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + if (GET_CODE (addr) == REG + || (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == REG + && GET_CODE (XEXP (addr, 1)) == CONST_INT)) + return 1; + else + return 0; +} + +/* Output an ADD r, s, #n where n may be too big for one instruction. + If adding zero to one register, output nothing. */ +const char * +output_add_immediate (rtx *operands) +{ + HOST_WIDE_INT n = INTVAL (operands[2]); + + if (n != 0 || REGNO (operands[0]) != REGNO (operands[1])) + { + if (n < 0) + output_multi_immediate (operands, + "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2, + -n); + else + output_multi_immediate (operands, + "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2, + n); + } + + return ""; +} + +/* Output a multiple immediate operation. + OPERANDS is the vector of operands referred to in the output patterns. + INSTR1 is the output pattern to use for the first constant. + INSTR2 is the output pattern to use for subsequent constants. + IMMED_OP is the index of the constant slot in OPERANDS. + N is the constant value. */ +static const char * +output_multi_immediate (rtx *operands, const char *instr1, const char *instr2, + int immed_op, HOST_WIDE_INT n) +{ +#if HOST_BITS_PER_WIDE_INT > 32 + n &= 0xffffffff; +#endif + + if (n == 0) + { + /* Quick and easy output. */ + operands[immed_op] = const0_rtx; + output_asm_insn (instr1, operands); + } + else + { + int i; + const char * instr = instr1; + + /* Note that n is never zero here (which would give no output). */ + for (i = 0; i < 32; i += 2) + { + if (n & (3 << i)) + { + operands[immed_op] = GEN_INT (n & (255 << i)); + output_asm_insn (instr, operands); + instr = instr2; + i += 6; + } + } + } + + return ""; +} + +/* Return the name of a shifter operation. */ +static const char * +arm_shift_nmem(enum rtx_code code) +{ + switch (code) + { + case ASHIFT: + return ARM_LSL_NAME; + + case ASHIFTRT: + return "asr"; + + case LSHIFTRT: + return "lsr"; + + case ROTATERT: + return "ror"; + + default: + abort(); + } +} + +/* Return the appropriate ARM instruction for the operation code. + The returned result should not be overwritten. OP is the rtx of the + operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator + was shifted. */ +const char * +arithmetic_instr (rtx op, int shift_first_arg) +{ + switch (GET_CODE (op)) + { + case PLUS: + return "add"; + + case MINUS: + return shift_first_arg ? "rsb" : "sub"; + + case IOR: + return "orr"; + + case XOR: + return "eor"; + + case AND: + return "and"; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + return arm_shift_nmem(GET_CODE(op)); + + default: + gcc_unreachable (); + } +} + +/* Ensure valid constant shifts and return the appropriate shift mnemonic + for the operation code. The returned result should not be overwritten. + OP is the rtx code of the shift. + On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant + shift. */ +static const char * +shift_op (rtx op, HOST_WIDE_INT *amountp) +{ + const char * mnem; + enum rtx_code code = GET_CODE (op); + + switch (GET_CODE (XEXP (op, 1))) + { + case REG: + case SUBREG: + *amountp = -1; + break; + + case CONST_INT: + *amountp = INTVAL (XEXP (op, 1)); + break; + + default: + gcc_unreachable (); + } + + switch (code) + { + case ROTATE: + gcc_assert (*amountp != -1); + *amountp = 32 - *amountp; + code = ROTATERT; + + /* Fall through. */ + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + mnem = arm_shift_nmem(code); + break; + + case MULT: + /* We never have to worry about the amount being other than a + power of 2, since this case can never be reloaded from a reg. */ + gcc_assert (*amountp != -1); + *amountp = int_log2 (*amountp); + return ARM_LSL_NAME; + + default: + gcc_unreachable (); + } + + if (*amountp != -1) + { + /* This is not 100% correct, but follows from the desire to merge + multiplication by a power of 2 with the recognizer for a + shift. >=32 is not a valid shift for "lsl", so we must try and + output a shift that produces the correct arithmetical result. + Using lsr #32 is identical except for the fact that the carry bit + is not set correctly if we set the flags; but we never use the + carry bit from such an operation, so we can ignore that. */ + if (code == ROTATERT) + /* Rotate is just modulo 32. */ + *amountp &= 31; + else if (*amountp != (*amountp & 31)) + { + if (code == ASHIFT) + mnem = "lsr"; + *amountp = 32; + } + + /* Shifts of 0 are no-ops. */ + if (*amountp == 0) + return NULL; + } + + return mnem; +} + +/* Obtain the shift from the POWER of two. */ + +static HOST_WIDE_INT +int_log2 (HOST_WIDE_INT power) +{ + HOST_WIDE_INT shift = 0; + + while ((((HOST_WIDE_INT) 1 << shift) & power) == 0) + { + gcc_assert (shift <= 31); + shift++; + } + + return shift; +} + +/* Output a .ascii pseudo-op, keeping track of lengths. This is + because /bin/as is horribly restrictive. The judgement about + whether or not each character is 'printable' (and can be output as + is) or not (and must be printed with an octal escape) must be made + with reference to the *host* character set -- the situation is + similar to that discussed in the comments above pp_c_char in + c-pretty-print.c. */ + +#define MAX_ASCII_LEN 51 + +void +output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len) +{ + int i; + int len_so_far = 0; + + fputs ("\t.ascii\t\"", stream); + + for (i = 0; i < len; i++) + { + int c = p[i]; + + if (len_so_far >= MAX_ASCII_LEN) + { + fputs ("\"\n\t.ascii\t\"", stream); + len_so_far = 0; + } + + if (ISPRINT (c)) + { + if (c == '\\' || c == '\"') + { + putc ('\\', stream); + len_so_far++; + } + putc (c, stream); + len_so_far++; + } + else + { + fprintf (stream, "\\%03o", c); + len_so_far += 4; + } + } + + fputs ("\"\n", stream); +} + +/* Compute the register save mask for registers 0 through 12 + inclusive. This code is used by arm_compute_save_reg_mask. */ + +static unsigned long +arm_compute_save_reg0_reg12_mask (void) +{ + unsigned long func_type = arm_current_func_type (); + unsigned long save_reg_mask = 0; + unsigned int reg; + + if (IS_INTERRUPT (func_type)) + { + unsigned int max_reg; + /* Interrupt functions must not corrupt any registers, + even call clobbered ones. If this is a leaf function + we can just examine the registers used by the RTL, but + otherwise we have to assume that whatever function is + called might clobber anything, and so we have to save + all the call-clobbered registers as well. */ + if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ) + /* FIQ handlers have registers r8 - r12 banked, so + we only need to check r0 - r7, Normal ISRs only + bank r14 and r15, so we must check up to r12. + r13 is the stack pointer which is always preserved, + so we do not need to consider it here. */ + max_reg = 7; + else + max_reg = 12; + + for (reg = 0; reg <= max_reg; reg++) + if (df_regs_ever_live_p (reg) + || (! current_function_is_leaf && call_used_regs[reg])) + save_reg_mask |= (1 << reg); + + /* Also save the pic base register if necessary. */ + if (flag_pic + && !TARGET_SINGLE_PIC_BASE + && arm_pic_register != INVALID_REGNUM + && crtl->uses_pic_offset_table) + save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; + } + else if (IS_VOLATILE(func_type)) + { + /* For noreturn functions we historically omitted register saves + altogether. However this really messes up debugging. As a + compromise save just the frame pointers. Combined with the link + register saved elsewhere this should be sufficient to get + a backtrace. */ + if (frame_pointer_needed) + save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; + if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM)) + save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; + if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM)) + save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM; + } + else + { + /* In the normal case we only need to save those registers + which are call saved and which are used by this function. */ + for (reg = 0; reg <= 11; reg++) + if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) + save_reg_mask |= (1 << reg); + + /* Handle the frame pointer as a special case. */ + if (frame_pointer_needed) + save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; + + /* If we aren't loading the PIC register, + don't stack it even though it may be live. */ + if (flag_pic + && !TARGET_SINGLE_PIC_BASE + && arm_pic_register != INVALID_REGNUM + && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM) + || crtl->uses_pic_offset_table)) + save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; + + /* The prologue will copy SP into R0, so save it. */ + if (IS_STACKALIGN (func_type)) + save_reg_mask |= 1; + } + + /* Save registers so the exception handler can modify them. */ + if (crtl->calls_eh_return) + { + unsigned int i; + + for (i = 0; ; i++) + { + reg = EH_RETURN_DATA_REGNO (i); + if (reg == INVALID_REGNUM) + break; + save_reg_mask |= 1 << reg; + } + } + + return save_reg_mask; +} + + +/* Compute the number of bytes used to store the static chain register on the + stack, above the stack frame. We need to know this accurately to get the + alignment of the rest of the stack frame correct. */ + +static int arm_compute_static_chain_stack_bytes (void) +{ + unsigned long func_type = arm_current_func_type (); + int static_chain_stack_bytes = 0; + + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM && + IS_NESTED (func_type) && + df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0) + static_chain_stack_bytes = 4; + + return static_chain_stack_bytes; +} + + +/* Compute a bit mask of which registers need to be + saved on the stack for the current function. + This is used by arm_get_frame_offsets, which may add extra registers. */ + +static unsigned long +arm_compute_save_reg_mask (void) +{ + unsigned int save_reg_mask = 0; + unsigned long func_type = arm_current_func_type (); + unsigned int reg; + + if (IS_NAKED (func_type)) + /* This should never really happen. */ + return 0; + + /* If we are creating a stack frame, then we must save the frame pointer, + IP (which will hold the old stack pointer), LR and the PC. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + save_reg_mask |= + (1 << ARM_HARD_FRAME_POINTER_REGNUM) + | (1 << IP_REGNUM) + | (1 << LR_REGNUM) + | (1 << PC_REGNUM); + + save_reg_mask |= arm_compute_save_reg0_reg12_mask (); + + /* Decide if we need to save the link register. + Interrupt routines have their own banked link register, + so they never need to save it. + Otherwise if we do not use the link register we do not need to save + it. If we are pushing other registers onto the stack however, we + can save an instruction in the epilogue by pushing the link register + now and then popping it back into the PC. This incurs extra memory + accesses though, so we only do it when optimizing for size, and only + if we know that we will not need a fancy return sequence. */ + if (df_regs_ever_live_p (LR_REGNUM) + || (save_reg_mask + && optimize_size + && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->calls_eh_return)) + save_reg_mask |= 1 << LR_REGNUM; + + if (cfun->machine->lr_save_eliminated) + save_reg_mask &= ~ (1 << LR_REGNUM); + + if (TARGET_REALLY_IWMMXT + && ((bit_count (save_reg_mask) + + ARM_NUM_INTS (crtl->args.pretend_args_size + + arm_compute_static_chain_stack_bytes()) + ) % 2) != 0) + { + /* The total number of registers that are going to be pushed + onto the stack is odd. We need to ensure that the stack + is 64-bit aligned before we start to save iWMMXt registers, + and also before we start to create locals. (A local variable + might be a double or long long which we will load/store using + an iWMMXt instruction). Therefore we need to push another + ARM register, so that the stack will be 64-bit aligned. We + try to avoid using the arg registers (r0 -r3) as they might be + used to pass values in a tail call. */ + for (reg = 4; reg <= 12; reg++) + if ((save_reg_mask & (1 << reg)) == 0) + break; + + if (reg <= 12) + save_reg_mask |= (1 << reg); + else + { + cfun->machine->sibcall_blocked = 1; + save_reg_mask |= (1 << 3); + } + } + + /* We may need to push an additional register for use initializing the + PIC base register. */ + if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic + && (save_reg_mask & THUMB2_WORK_REGS) == 0) + { + reg = thumb_find_work_register (1 << 4); + if (!call_used_regs[reg]) + save_reg_mask |= (1 << reg); + } + + return save_reg_mask; +} + + +/* Compute a bit mask of which registers need to be + saved on the stack for the current function. */ +static unsigned long +thumb1_compute_save_reg_mask (void) +{ + unsigned long mask; + unsigned reg; + + mask = 0; + for (reg = 0; reg < 12; reg ++) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + mask |= 1 << reg; + + if (flag_pic + && !TARGET_SINGLE_PIC_BASE + && arm_pic_register != INVALID_REGNUM + && crtl->uses_pic_offset_table) + mask |= 1 << PIC_OFFSET_TABLE_REGNUM; + + /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */ + if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) + mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; + + /* LR will also be pushed if any lo regs are pushed. */ + if (mask & 0xff || thumb_force_lr_save ()) + mask |= (1 << LR_REGNUM); + + /* Make sure we have a low work register if we need one. + We will need one if we are going to push a high register, + but we are not currently intending to push a low register. */ + if ((mask & 0xff) == 0 + && ((mask & 0x0f00) || TARGET_BACKTRACE)) + { + /* Use thumb_find_work_register to choose which register + we will use. If the register is live then we will + have to push it. Use LAST_LO_REGNUM as our fallback + choice for the register to select. */ + reg = thumb_find_work_register (1 << LAST_LO_REGNUM); + /* Make sure the register returned by thumb_find_work_register is + not part of the return value. */ + if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ()) + reg = LAST_LO_REGNUM; + + if (! call_used_regs[reg]) + mask |= 1 << reg; + } + + /* The 504 below is 8 bytes less than 512 because there are two possible + alignment words. We can't tell here if they will be present or not so we + have to play it safe and assume that they are. */ + if ((CALLER_INTERWORKING_SLOT_SIZE + + ROUND_UP_WORD (get_frame_size ()) + + crtl->outgoing_args_size) >= 504) + { + /* This is the same as the code in thumb1_expand_prologue() which + determines which register to use for stack decrement. */ + for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++) + if (mask & (1 << reg)) + break; + + if (reg > LAST_LO_REGNUM) + { + /* Make sure we have a register available for stack decrement. */ + mask |= 1 << LAST_LO_REGNUM; + } + } + + return mask; +} + + +/* Return the number of bytes required to save VFP registers. */ +static int +arm_get_vfp_saved_size (void) +{ + unsigned int regno; + int count; + int saved; + + saved = 0; + /* Space for saved VFP registers. */ + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + count = 0; + for (regno = FIRST_VFP_REGNUM; + regno < LAST_VFP_REGNUM; + regno += 2) + { + if ((!df_regs_ever_live_p (regno) || call_used_regs[regno]) + && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1])) + { + if (count > 0) + { + /* Workaround ARM10 VFPr1 bug. */ + if (count == 2 && !arm_arch6) + count++; + saved += count * 8; + } + count = 0; + } + else + count++; + } + if (count > 0) + { + if (count == 2 && !arm_arch6) + count++; + saved += count * 8; + } + } + return saved; +} + + +/* Generate a function exit sequence. If REALLY_RETURN is false, then do + everything bar the final return instruction. */ +const char * +output_return_instruction (rtx operand, int really_return, int reverse) +{ + char conditional[10]; + char instr[100]; + unsigned reg; + unsigned long live_regs_mask; + unsigned long func_type; + arm_stack_offsets *offsets; + + func_type = arm_current_func_type (); + + if (IS_NAKED (func_type)) + return ""; + + if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) + { + /* If this function was declared non-returning, and we have + found a tail call, then we have to trust that the called + function won't return. */ + if (really_return) + { + rtx ops[2]; + + /* Otherwise, trap an attempted return by aborting. */ + ops[0] = operand; + ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" + : "abort"); + assemble_external_libcall (ops[1]); + output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops); + } + + return ""; + } + + gcc_assert (!cfun->calls_alloca || really_return); + + sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd'); + + cfun->machine->return_used_this_function = 1; + + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + + if (live_regs_mask) + { + const char * return_reg; + + /* If we do not have any special requirements for function exit + (e.g. interworking) then we can load the return address + directly into the PC. Otherwise we must load it into LR. */ + if (really_return + && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK)) + return_reg = reg_names[PC_REGNUM]; + else + return_reg = reg_names[LR_REGNUM]; + + if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM)) + { + /* There are three possible reasons for the IP register + being saved. 1) a stack frame was created, in which case + IP contains the old stack pointer, or 2) an ISR routine + corrupted it, or 3) it was saved to align the stack on + iWMMXt. In case 1, restore IP into SP, otherwise just + restore IP. */ + if (frame_pointer_needed) + { + live_regs_mask &= ~ (1 << IP_REGNUM); + live_regs_mask |= (1 << SP_REGNUM); + } + else + gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT); + } + + /* On some ARM architectures it is faster to use LDR rather than + LDM to load a single register. On other architectures, the + cost is the same. In 26 bit mode, or for exception handlers, + we have to use LDM to load the PC so that the CPSR is also + restored. */ + for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) + if (live_regs_mask == (1U << reg)) + break; + + if (reg <= LAST_ARM_REGNUM + && (reg != LR_REGNUM + || ! really_return + || ! IS_INTERRUPT (func_type))) + { + sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional, + (reg == LR_REGNUM) ? return_reg : reg_names[reg]); + } + else + { + char *p; + int first = 1; + + /* Generate the load multiple instruction to restore the + registers. Note we can get here, even if + frame_pointer_needed is true, but only if sp already + points to the base of the saved core registers. */ + if (live_regs_mask & (1 << SP_REGNUM)) + { + unsigned HOST_WIDE_INT stack_adjust; + + stack_adjust = offsets->outgoing_args - offsets->saved_regs; + gcc_assert (stack_adjust == 0 || stack_adjust == 4); + + if (stack_adjust && arm_arch5 && TARGET_ARM) + if (TARGET_UNIFIED_ASM) + sprintf (instr, "ldmib%s\t%%|sp, {", conditional); + else + sprintf (instr, "ldm%sib\t%%|sp, {", conditional); + else + { + /* If we can't use ldmib (SA110 bug), + then try to pop r3 instead. */ + if (stack_adjust) + live_regs_mask |= 1 << 3; + + if (TARGET_UNIFIED_ASM) + sprintf (instr, "ldmfd%s\t%%|sp, {", conditional); + else + sprintf (instr, "ldm%sfd\t%%|sp, {", conditional); + } + } + else + if (TARGET_UNIFIED_ASM) + sprintf (instr, "pop%s\t{", conditional); + else + sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional); + + p = instr + strlen (instr); + + for (reg = 0; reg <= SP_REGNUM; reg++) + if (live_regs_mask & (1 << reg)) + { + int l = strlen (reg_names[reg]); + + if (first) + first = 0; + else + { + memcpy (p, ", ", 2); + p += 2; + } + + memcpy (p, "%|", 2); + memcpy (p + 2, reg_names[reg], l); + p += l + 2; + } + + if (live_regs_mask & (1 << LR_REGNUM)) + { + sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg); + /* If returning from an interrupt, restore the CPSR. */ + if (IS_INTERRUPT (func_type)) + strcat (p, "^"); + } + else + strcpy (p, "}"); + } + + output_asm_insn (instr, & operand); + + /* See if we need to generate an extra instruction to + perform the actual function return. */ + if (really_return + && func_type != ARM_FT_INTERWORKED + && (live_regs_mask & (1 << LR_REGNUM)) != 0) + { + /* The return has already been handled + by loading the LR into the PC. */ + really_return = 0; + } + } + + if (really_return) + { + switch ((int) ARM_FUNC_TYPE (func_type)) + { + case ARM_FT_ISR: + case ARM_FT_FIQ: + /* ??? This is wrong for unified assembly syntax. */ + sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional); + break; + + case ARM_FT_INTERWORKED: + sprintf (instr, "bx%s\t%%|lr", conditional); + break; + + case ARM_FT_EXCEPTION: + /* ??? This is wrong for unified assembly syntax. */ + sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional); + break; + + default: + /* Use bx if it's available. */ + if (arm_arch5 || arm_arch4t) + sprintf (instr, "bx%s\t%%|lr", conditional); + else + sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional); + break; + } + + output_asm_insn (instr, & operand); + } + + return ""; +} + +/* Write the function name into the code section, directly preceding + the function prologue. + + Code will be output similar to this: + t0 + .ascii "arm_poke_function_name", 0 + .align + t1 + .word 0xff000000 + (t1 - t0) + arm_poke_function_name + mov ip, sp + stmfd sp!, {fp, ip, lr, pc} + sub fp, ip, #4 + + When performing a stack backtrace, code can inspect the value + of 'pc' stored at 'fp' + 0. If the trace function then looks + at location pc - 12 and the top 8 bits are set, then we know + that there is a function name embedded immediately preceding this + location and has length ((pc[-3]) & 0xff000000). + + We assume that pc is declared as a pointer to an unsigned long. + + It is of no benefit to output the function name if we are assembling + a leaf function. These function types will not contain a stack + backtrace structure, therefore it is not possible to determine the + function name. */ +void +arm_poke_function_name (FILE *stream, const char *name) +{ + unsigned long alignlength; + unsigned long length; + rtx x; + + length = strlen (name) + 1; + alignlength = ROUND_UP_WORD (length); + + ASM_OUTPUT_ASCII (stream, name, length); + ASM_OUTPUT_ALIGN (stream, 2); + x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength); + assemble_aligned_integer (UNITS_PER_WORD, x); +} + +/* Place some comments into the assembler stream + describing the current function. */ +static void +arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) +{ + unsigned long func_type; + + if (TARGET_THUMB1) + { + thumb1_output_function_prologue (f, frame_size); + return; + } + + /* Sanity check. */ + gcc_assert (!arm_ccfsm_state && !arm_target_insn); + + func_type = arm_current_func_type (); + + switch ((int) ARM_FUNC_TYPE (func_type)) + { + default: + case ARM_FT_NORMAL: + break; + case ARM_FT_INTERWORKED: + asm_fprintf (f, "\t%@ Function supports interworking.\n"); + break; + case ARM_FT_ISR: + asm_fprintf (f, "\t%@ Interrupt Service Routine.\n"); + break; + case ARM_FT_FIQ: + asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n"); + break; + case ARM_FT_EXCEPTION: + asm_fprintf (f, "\t%@ ARM Exception Handler.\n"); + break; + } + + if (IS_NAKED (func_type)) + asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n"); + + if (IS_VOLATILE (func_type)) + asm_fprintf (f, "\t%@ Volatile: function does not return.\n"); + + if (IS_NESTED (func_type)) + asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n"); + if (IS_STACKALIGN (func_type)) + asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n"); + + asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n", + crtl->args.size, + crtl->args.pretend_args_size, frame_size); + + asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n", + frame_pointer_needed, + cfun->machine->uses_anonymous_args); + + if (cfun->machine->lr_save_eliminated) + asm_fprintf (f, "\t%@ link register save eliminated.\n"); + + if (crtl->calls_eh_return) + asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n"); + +} + +const char * +arm_output_epilogue (rtx sibling) +{ + int reg; + unsigned long saved_regs_mask; + unsigned long func_type; + /* Floats_offset is the offset from the "virtual" frame. In an APCS + frame that is $fp + 4 for a non-variadic function. */ + int floats_offset = 0; + rtx operands[3]; + FILE * f = asm_out_file; + unsigned int lrm_count = 0; + int really_return = (sibling == NULL); + int start_reg; + arm_stack_offsets *offsets; + + /* If we have already generated the return instruction + then it is futile to generate anything else. */ + if (use_return_insn (FALSE, sibling) && + (cfun->machine->return_used_this_function != 0)) + return ""; + + func_type = arm_current_func_type (); + + if (IS_NAKED (func_type)) + /* Naked functions don't have epilogues. */ + return ""; + + if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) + { + rtx op; + + /* A volatile function should never return. Call abort. */ + op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort"); + assemble_external_libcall (op); + output_asm_insn ("bl\t%a0", &op); + + return ""; + } + + /* If we are throwing an exception, then we really must be doing a + return, so we can't tail-call. */ + gcc_assert (!crtl->calls_eh_return || really_return); + + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + + if (TARGET_IWMMXT) + lrm_count = bit_count (saved_regs_mask); + + floats_offset = offsets->saved_args; + /* Compute how far away the floats will be. */ + for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) + if (saved_regs_mask & (1 << reg)) + floats_offset += 4; + + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + { + /* This variable is for the Virtual Frame Pointer, not VFP regs. */ + int vfp_offset = offsets->frame; + + if (TARGET_FPA_EMU2) + { + for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + { + floats_offset += 12; + asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n", + reg, FP_REGNUM, floats_offset - vfp_offset); + } + } + else + { + start_reg = LAST_FPA_REGNUM; + + for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) + { + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + { + floats_offset += 12; + + /* We can't unstack more than four registers at once. */ + if (start_reg - reg == 3) + { + asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n", + reg, FP_REGNUM, floats_offset - vfp_offset); + start_reg = reg - 1; + } + } + else + { + if (reg != start_reg) + asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n", + reg + 1, start_reg - reg, + FP_REGNUM, floats_offset - vfp_offset); + start_reg = reg - 1; + } + } + + /* Just in case the last register checked also needs unstacking. */ + if (reg != start_reg) + asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n", + reg + 1, start_reg - reg, + FP_REGNUM, floats_offset - vfp_offset); + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + int saved_size; + + /* The fldmd insns do not have base+offset addressing + modes, so we use IP to hold the address. */ + saved_size = arm_get_vfp_saved_size (); + + if (saved_size > 0) + { + floats_offset += saved_size; + asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM, + FP_REGNUM, floats_offset - vfp_offset); + } + start_reg = FIRST_VFP_REGNUM; + for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) + { + if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) + && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) + { + if (start_reg != reg) + vfp_output_fldmd (f, IP_REGNUM, + (start_reg - FIRST_VFP_REGNUM) / 2, + (reg - start_reg) / 2); + start_reg = reg + 2; + } + } + if (start_reg != reg) + vfp_output_fldmd (f, IP_REGNUM, + (start_reg - FIRST_VFP_REGNUM) / 2, + (reg - start_reg) / 2); + } + + if (TARGET_IWMMXT) + { + /* The frame pointer is guaranteed to be non-double-word aligned. + This is because it is set to (old_stack_pointer - 4) and the + old_stack_pointer was double word aligned. Thus the offset to + the iWMMXt registers to be loaded must also be non-double-word + sized, so that the resultant address *is* double-word aligned. + We can ignore floats_offset since that was already included in + the live_regs_mask. */ + lrm_count += (lrm_count % 2 ? 2 : 1); + + for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + { + asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n", + reg, FP_REGNUM, lrm_count * 4); + lrm_count += 2; + } + } + + /* saved_regs_mask should contain the IP, which at the time of stack + frame generation actually contains the old stack pointer. So a + quick way to unwind the stack is just pop the IP register directly + into the stack pointer. */ + gcc_assert (saved_regs_mask & (1 << IP_REGNUM)); + saved_regs_mask &= ~ (1 << IP_REGNUM); + saved_regs_mask |= (1 << SP_REGNUM); + + /* There are two registers left in saved_regs_mask - LR and PC. We + only need to restore the LR register (the return address), but to + save time we can load it directly into the PC, unless we need a + special function exit sequence, or we are not really returning. */ + if (really_return + && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->calls_eh_return) + /* Delete the LR from the register mask, so that the LR on + the stack is loaded into the PC in the register mask. */ + saved_regs_mask &= ~ (1 << LR_REGNUM); + else + saved_regs_mask &= ~ (1 << PC_REGNUM); + + /* We must use SP as the base register, because SP is one of the + registers being restored. If an interrupt or page fault + happens in the ldm instruction, the SP might or might not + have been restored. That would be bad, as then SP will no + longer indicate the safe area of stack, and we can get stack + corruption. Using SP as the base register means that it will + be reset correctly to the original value, should an interrupt + occur. If the stack pointer already points at the right + place, then omit the subtraction. */ + if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask)) + || cfun->calls_alloca) + asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM, + 4 * bit_count (saved_regs_mask)); + print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0); + + if (IS_INTERRUPT (func_type)) + /* Interrupt handlers will have pushed the + IP onto the stack, so restore it now. */ + print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0); + } + else + { + /* This branch is executed for ARM mode (non-apcs frames) and + Thumb-2 mode. Frame layout is essentially the same for those + cases, except that in ARM mode frame pointer points to the + first saved register, while in Thumb-2 mode the frame pointer points + to the last saved register. + + It is possible to make frame pointer point to last saved + register in both cases, and remove some conditionals below. + That means that fp setup in prologue would be just "mov fp, sp" + and sp restore in epilogue would be just "mov sp, fp", whereas + now we have to use add/sub in those cases. However, the value + of that would be marginal, as both mov and add/sub are 32-bit + in ARM mode, and it would require extra conditionals + in arm_expand_prologue to distingish ARM-apcs-frame case + (where frame pointer is required to point at first register) + and ARM-non-apcs-frame. Therefore, such change is postponed + until real need arise. */ + unsigned HOST_WIDE_INT amount; + int rfe; + /* Restore stack pointer if necessary. */ + if (TARGET_ARM && frame_pointer_needed) + { + operands[0] = stack_pointer_rtx; + operands[1] = hard_frame_pointer_rtx; + + operands[2] = GEN_INT (offsets->frame - offsets->saved_regs); + output_add_immediate (operands); + } + else + { + if (frame_pointer_needed) + { + /* For Thumb-2 restore sp from the frame pointer. + Operand restrictions mean we have to incrememnt FP, then copy + to SP. */ + amount = offsets->locals_base - offsets->saved_regs; + operands[0] = hard_frame_pointer_rtx; + } + else + { + unsigned long count; + operands[0] = stack_pointer_rtx; + amount = offsets->outgoing_args - offsets->saved_regs; + /* pop call clobbered registers if it avoids a + separate stack adjustment. */ + count = offsets->saved_regs - offsets->saved_args; + if (optimize_size + && count != 0 + && !crtl->calls_eh_return + && bit_count(saved_regs_mask) * 4 == count + && !IS_INTERRUPT (func_type) + && !crtl->tail_call_emit) + { + unsigned long mask; + /* Preserve return values, of any size. */ + mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1; + mask ^= 0xf; + mask &= ~saved_regs_mask; + reg = 0; + while (bit_count (mask) * 4 > amount) + { + while ((mask & (1 << reg)) == 0) + reg++; + mask &= ~(1 << reg); + } + if (bit_count (mask) * 4 == amount) { + amount = 0; + saved_regs_mask |= mask; + } + } + } + + if (amount) + { + operands[1] = operands[0]; + operands[2] = GEN_INT (amount); + output_add_immediate (operands); + } + if (frame_pointer_needed) + asm_fprintf (f, "\tmov\t%r, %r\n", + SP_REGNUM, HARD_FRAME_POINTER_REGNUM); + } + + if (TARGET_FPA_EMU2) + { + for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + asm_fprintf (f, "\tldfe\t%r, [%r], #12\n", + reg, SP_REGNUM); + } + else + { + start_reg = FIRST_FPA_REGNUM; + + for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) + { + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + { + if (reg - start_reg == 3) + { + asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n", + start_reg, SP_REGNUM); + start_reg = reg + 1; + } + } + else + { + if (reg != start_reg) + asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n", + start_reg, reg - start_reg, + SP_REGNUM); + + start_reg = reg + 1; + } + } + + /* Just in case the last register checked also needs unstacking. */ + if (reg != start_reg) + asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n", + start_reg, reg - start_reg, SP_REGNUM); + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + int end_reg = LAST_VFP_REGNUM + 1; + + /* Scan the registers in reverse order. We need to match + any groupings made in the prologue and generate matching + pop operations. */ + for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2) + { + if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) + && (!df_regs_ever_live_p (reg + 1) + || call_used_regs[reg + 1])) + { + if (end_reg > reg + 2) + vfp_output_fldmd (f, SP_REGNUM, + (reg + 2 - FIRST_VFP_REGNUM) / 2, + (end_reg - (reg + 2)) / 2); + end_reg = reg; + } + } + if (end_reg > reg + 2) + vfp_output_fldmd (f, SP_REGNUM, 0, + (end_reg - (reg + 2)) / 2); + } + + if (TARGET_IWMMXT) + for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM); + + /* If we can, restore the LR into the PC. */ + if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED + && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL) + && !IS_STACKALIGN (func_type) + && really_return + && crtl->args.pretend_args_size == 0 + && saved_regs_mask & (1 << LR_REGNUM) + && !crtl->calls_eh_return) + { + saved_regs_mask &= ~ (1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + rfe = IS_INTERRUPT (func_type); + } + else + rfe = 0; + + /* Load the registers off the stack. If we only have one register + to load use the LDR instruction - it is faster. For Thumb-2 + always use pop and the assembler will pick the best instruction.*/ + if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM) + && !IS_INTERRUPT(func_type)) + { + asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM); + } + else if (saved_regs_mask) + { + if (saved_regs_mask & (1 << SP_REGNUM)) + /* Note - write back to the stack register is not enabled + (i.e. "ldmfd sp!..."). We know that the stack pointer is + in the list of registers and if we add writeback the + instruction becomes UNPREDICTABLE. */ + print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, + rfe); + else if (TARGET_ARM) + print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, + rfe); + else + print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0); + } + + if (crtl->args.pretend_args_size) + { + /* Unwind the pre-pushed regs. */ + operands[0] = operands[1] = stack_pointer_rtx; + operands[2] = GEN_INT (crtl->args.pretend_args_size); + output_add_immediate (operands); + } + } + + /* We may have already restored PC directly from the stack. */ + if (!really_return || saved_regs_mask & (1 << PC_REGNUM)) + return ""; + + /* Stack adjustment for exception handler. */ + if (crtl->calls_eh_return) + asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM, + ARM_EH_STACKADJ_REGNUM); + + /* Generate the return instruction. */ + switch ((int) ARM_FUNC_TYPE (func_type)) + { + case ARM_FT_ISR: + case ARM_FT_FIQ: + asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM); + break; + + case ARM_FT_EXCEPTION: + asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM); + break; + + case ARM_FT_INTERWORKED: + asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM); + break; + + default: + if (IS_STACKALIGN (func_type)) + { + /* See comment in arm_expand_prologue. */ + asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0); + } + if (arm_arch5 || arm_arch4t) + asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM); + else + asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM); + break; + } + + return ""; +} + +static void +arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, + HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED) +{ + arm_stack_offsets *offsets; + + if (TARGET_THUMB1) + { + int regno; + + /* Emit any call-via-reg trampolines that are needed for v4t support + of call_reg and call_value_reg type insns. */ + for (regno = 0; regno < LR_REGNUM; regno++) + { + rtx label = cfun->machine->call_via[regno]; + + if (label != NULL) + { + switch_to_section (function_section (current_function_decl)); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (label)); + asm_fprintf (asm_out_file, "\tbx\t%r\n", regno); + } + } + + /* ??? Probably not safe to set this here, since it assumes that a + function will be emitted as assembly immediately after we generate + RTL for it. This does not happen for inline functions. */ + cfun->machine->return_used_this_function = 0; + } + else /* TARGET_32BIT */ + { + /* We need to take into account any stack-frame rounding. */ + offsets = arm_get_frame_offsets (); + + gcc_assert (!use_return_insn (FALSE, NULL) + || (cfun->machine->return_used_this_function != 0) + || offsets->saved_regs == offsets->outgoing_args + || frame_pointer_needed); + + /* Reset the ARM-specific per-function variables. */ + after_arm_reorg = 0; + } +} + +/* Generate and emit an insn that we will recognize as a push_multi. + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. */ +static rtx +emit_multi_reg_push (unsigned long mask) +{ + int num_regs = 0; + int num_dwarf_regs; + int i, j; + rtx par; + rtx dwarf; + int dwarf_par_index; + rtx tmp, reg; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + + /* We don't record the PC in the dwarf frame information. */ + num_dwarf_regs = num_regs; + if (mask & (1 << PC_REGNUM)) + num_dwarf_regs--; + + /* For the body of the insn we are going to generate an UNSPEC in + parallel with several USEs. This allows the insn to be recognized + by the push_multi pattern in the arm.md file. + + The body of the insn looks something like this: + + (parallel [ + (set (mem:BLK (pre_modify:SI (reg:SI sp) + (const_int:SI ))) + (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) + (use (reg:SI XX)) + (use (reg:SI YY)) + ... + ]) + + For the frame note however, we try to be more explicit and actually + show each register being stored into the stack frame, plus a (single) + decrement of the stack pointer. We do it this way in order to be + friendly to the stack unwinding code, which only wants to see a single + stack decrement per instruction. The RTL we generate for the note looks + something like this: + + (sequence [ + (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) + (set (mem:SI (reg:SI sp)) (reg:SI r4)) + (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX)) + (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY)) + ... + ]) + + FIXME:: In an ideal world the PRE_MODIFY would not exist and + instead we'd have a parallel expression detailing all + the stores to the various memory addresses so that debug + information is more up-to-date. Remember however while writing + this to take care of the constraints with the push instruction. + + Note also that this has to be taken care of for the VFP registers. + + For more see PR43399. */ + + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); + dwarf_par_index = 1; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + { + if (mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + + XVECEXP (par, 0, 0) + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (BLKmode, + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + plus_constant + (stack_pointer_rtx, + -4 * num_regs)) + ), + gen_rtx_UNSPEC (BLKmode, + gen_rtvec (1, reg), + UNSPEC_PUSH_MULT)); + + if (i != PC_REGNUM) + { + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (SImode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_par_index) = tmp; + dwarf_par_index++; + } + + break; + } + } + + for (j = 1, i++; j < num_regs; i++) + { + if (mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + + XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg); + + if (i != PC_REGNUM) + { + tmp + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (SImode, + plus_constant (stack_pointer_rtx, + 4 * j)), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; + } + + j++; + } + } + + par = emit_insn (par); + + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); + + return par; +} + +/* Calculate the size of the return value that is passed in registers. */ +static unsigned +arm_size_return_regs (void) +{ + enum machine_mode mode; + + if (crtl->return_rtx != 0) + mode = GET_MODE (crtl->return_rtx); + else + mode = DECL_MODE (DECL_RESULT (current_function_decl)); + + return GET_MODE_SIZE (mode); +} + +static rtx +emit_sfm (int base_reg, int count) +{ + rtx par; + rtx dwarf; + rtx tmp, reg; + int i; + + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); + + reg = gen_rtx_REG (XFmode, base_reg++); + + XVECEXP (par, 0, 0) + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (BLKmode, + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + plus_constant + (stack_pointer_rtx, + -12 * count)) + ), + gen_rtx_UNSPEC (BLKmode, + gen_rtvec (1, reg), + UNSPEC_PUSH_MULT)); + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (XFmode, stack_pointer_rtx), reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 1) = tmp; + + for (i = 1; i < count; i++) + { + reg = gen_rtx_REG (XFmode, base_reg++); + XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg); + + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (XFmode, + plus_constant (stack_pointer_rtx, + i * 12)), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, i + 1) = tmp; + } + + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, -12 * count)); + + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + par = emit_insn (par); + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); + + return par; +} + + +/* Return true if the current function needs to save/restore LR. */ + +static bool +thumb_force_lr_save (void) +{ + return !cfun->machine->lr_save_eliminated + && (!leaf_function_p () + || thumb_far_jump_used_p () + || df_regs_ever_live_p (LR_REGNUM)); +} + + +/* Return true if r3 is used by any of the tail call insns in the + current function. */ + +static bool +any_sibcall_uses_r3 (void) +{ + edge_iterator ei; + edge e; + + if (!crtl->tail_call_emit) + return false; + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + if (e->flags & EDGE_SIBCALL) + { + rtx call = BB_END (e->src); + if (!CALL_P (call)) + call = prev_nonnote_nondebug_insn (call); + gcc_assert (CALL_P (call) && SIBLING_CALL_P (call)); + if (find_regno_fusage (call, USE, 3)) + return true; + } + return false; +} + + +/* Compute the distance from register FROM to register TO. + These can be the arg pointer (26), the soft frame pointer (25), + the stack pointer (13) or the hard frame pointer (11). + In thumb mode r7 is used as the soft frame pointer, if needed. + Typical stack layout looks like this: + + old stack pointer -> | | + ---- + | | \ + | | saved arguments for + | | vararg functions + | | / + -- + hard FP & arg pointer -> | | \ + | | stack + | | frame + | | / + -- + | | \ + | | call saved + | | registers + soft frame pointer -> | | / + -- + | | \ + | | local + | | variables + locals base pointer -> | | / + -- + | | \ + | | outgoing + | | arguments + current stack pointer -> | | / + -- + + For a given function some or all of these stack components + may not be needed, giving rise to the possibility of + eliminating some of the registers. + + The values returned by this function must reflect the behavior + of arm_expand_prologue() and arm_compute_save_reg_mask(). + + The sign of the number returned reflects the direction of stack + growth, so the values are positive for all eliminations except + from the soft frame pointer to the hard frame pointer. + + SFP may point just inside the local variables block to ensure correct + alignment. */ + + +/* Calculate stack offsets. These are used to calculate register elimination + offsets and in prologue/epilogue code. Also calculates which registers + should be saved. */ + +static arm_stack_offsets * +arm_get_frame_offsets (void) +{ + struct arm_stack_offsets *offsets; + unsigned long func_type; + int leaf; + int saved; + int core_saved; + HOST_WIDE_INT frame_size; + int i; + + offsets = &cfun->machine->stack_offsets; + + /* We need to know if we are a leaf function. Unfortunately, it + is possible to be called after start_sequence has been called, + which causes get_insns to return the insns for the sequence, + not the function, which will cause leaf_function_p to return + the incorrect result. + + to know about leaf functions once reload has completed, and the + frame size cannot be changed after that time, so we can safely + use the cached value. */ + + if (reload_completed) + return offsets; + + /* Initially this is the size of the local variables. It will translated + into an offset once we have determined the size of preceding data. */ + frame_size = ROUND_UP_WORD (get_frame_size ()); + + leaf = leaf_function_p (); + + /* Space for variadic functions. */ + offsets->saved_args = crtl->args.pretend_args_size; + + /* In Thumb mode this is incorrect, but never used. */ + offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) + + arm_compute_static_chain_stack_bytes(); + + if (TARGET_32BIT) + { + unsigned int regno; + + offsets->saved_regs_mask = arm_compute_save_reg_mask (); + core_saved = bit_count (offsets->saved_regs_mask) * 4; + saved = core_saved; + + /* We know that SP will be doubleword aligned on entry, and we must + preserve that condition at any subroutine call. We also require the + soft frame pointer to be doubleword aligned. */ + + if (TARGET_REALLY_IWMMXT) + { + /* Check for the call-saved iWMMXt registers. */ + for (regno = FIRST_IWMMXT_REGNUM; + regno <= LAST_IWMMXT_REGNUM; + regno++) + if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) + saved += 8; + } + + func_type = arm_current_func_type (); + if (! IS_VOLATILE (func_type)) + { + /* Space for saved FPA registers. */ + for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++) + if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) + saved += 12; + + /* Space for saved VFP registers. */ + if (TARGET_HARD_FLOAT && TARGET_VFP) + saved += arm_get_vfp_saved_size (); + } + } + else /* TARGET_THUMB1 */ + { + offsets->saved_regs_mask = thumb1_compute_save_reg_mask (); + core_saved = bit_count (offsets->saved_regs_mask) * 4; + saved = core_saved; + if (TARGET_BACKTRACE) + saved += 16; + } + + /* Saved registers include the stack frame. */ + offsets->saved_regs = offsets->saved_args + saved + + arm_compute_static_chain_stack_bytes(); + offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE; + /* A leaf function does not need any stack alignment if it has nothing + on the stack. */ + if (leaf && frame_size == 0) + { + offsets->outgoing_args = offsets->soft_frame; + offsets->locals_base = offsets->soft_frame; + return offsets; + } + + /* Ensure SFP has the correct alignment. */ + if (ARM_DOUBLEWORD_ALIGN + && (offsets->soft_frame & 7)) + { + offsets->soft_frame += 4; + /* Try to align stack by pushing an extra reg. Don't bother doing this + when there is a stack frame as the alignment will be rolled into + the normal stack adjustment. */ + if (frame_size + crtl->outgoing_args_size == 0) + { + int reg = -1; + + /* If it is safe to use r3, then do so. This sometimes + generates better code on Thumb-2 by avoiding the need to + use 32-bit push/pop instructions. */ + if (! any_sibcall_uses_r3 () + && arm_size_return_regs () <= 12 + && (offsets->saved_regs_mask & (1 << 3)) == 0) + { + reg = 3; + } + else + for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) + { + if ((offsets->saved_regs_mask & (1 << i)) == 0) + { + reg = i; + break; + } + } + + if (reg != -1) + { + offsets->saved_regs += 4; + offsets->saved_regs_mask |= (1 << reg); + } + } + } + + offsets->locals_base = offsets->soft_frame + frame_size; + offsets->outgoing_args = (offsets->locals_base + + crtl->outgoing_args_size); + + if (ARM_DOUBLEWORD_ALIGN) + { + /* Ensure SP remains doubleword aligned. */ + if (offsets->outgoing_args & 7) + offsets->outgoing_args += 4; + gcc_assert (!(offsets->outgoing_args & 7)); + } + + return offsets; +} + + +/* Calculate the relative offsets for the different stack pointers. Positive + offsets are in the direction of stack growth. */ + +HOST_WIDE_INT +arm_compute_initial_elimination_offset (unsigned int from, unsigned int to) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + + /* OK, now we have enough information to compute the distances. + There must be an entry in these switch tables for each pair + of registers in ELIMINABLE_REGS, even if some of the entries + seem to be redundant or useless. */ + switch (from) + { + case ARG_POINTER_REGNUM: + switch (to) + { + case THUMB_HARD_FRAME_POINTER_REGNUM: + return 0; + + case FRAME_POINTER_REGNUM: + /* This is the reverse of the soft frame pointer + to hard frame pointer elimination below. */ + return offsets->soft_frame - offsets->saved_args; + + case ARM_HARD_FRAME_POINTER_REGNUM: + /* This is only non-zero in the case where the static chain register + is stored above the frame. */ + return offsets->frame - offsets->saved_args - 4; + + case STACK_POINTER_REGNUM: + /* If nothing has been pushed on the stack at all + then this will return -4. This *is* correct! */ + return offsets->outgoing_args - (offsets->saved_args + 4); + + default: + gcc_unreachable (); + } + gcc_unreachable (); + + case FRAME_POINTER_REGNUM: + switch (to) + { + case THUMB_HARD_FRAME_POINTER_REGNUM: + return 0; + + case ARM_HARD_FRAME_POINTER_REGNUM: + /* The hard frame pointer points to the top entry in the + stack frame. The soft frame pointer to the bottom entry + in the stack frame. If there is no stack frame at all, + then they are identical. */ + + return offsets->frame - offsets->soft_frame; + + case STACK_POINTER_REGNUM: + return offsets->outgoing_args - offsets->soft_frame; + + default: + gcc_unreachable (); + } + gcc_unreachable (); + + default: + /* You cannot eliminate from the stack pointer. + In theory you could eliminate from the hard frame + pointer to the stack pointer, but this will never + happen, since if a stack frame is not needed the + hard frame pointer will never be used. */ + gcc_unreachable (); + } +} + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. Frame pointer elimination is automatically handled. + + All eliminations are permissible. Note that ARG_POINTER_REGNUM and + HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame + pointer, we must eliminate FRAME_POINTER_REGNUM into + HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or + ARG_POINTER_REGNUM. */ + +bool +arm_can_eliminate (const int from, const int to) +{ + return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false : + (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false : + (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false : + (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false : + true); +} + +/* Emit RTL to save coprocessor registers on function entry. Returns the + number of bytes pushed. */ + +static int +arm_save_coproc_regs(void) +{ + int saved_size = 0; + unsigned reg; + unsigned start_reg; + rtx insn; + + for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) + { + insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); + insn = gen_rtx_MEM (V2SImode, insn); + insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); + RTX_FRAME_RELATED_P (insn) = 1; + saved_size += 8; + } + + /* Save any floating point call-saved registers used by this + function. */ + if (TARGET_FPA_EMU2) + { + for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + { + insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); + insn = gen_rtx_MEM (XFmode, insn); + insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg)); + RTX_FRAME_RELATED_P (insn) = 1; + saved_size += 12; + } + } + else + { + start_reg = LAST_FPA_REGNUM; + + for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) + { + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + { + if (start_reg - reg == 3) + { + insn = emit_sfm (reg, 4); + RTX_FRAME_RELATED_P (insn) = 1; + saved_size += 48; + start_reg = reg - 1; + } + } + else + { + if (start_reg != reg) + { + insn = emit_sfm (reg + 1, start_reg - reg); + RTX_FRAME_RELATED_P (insn) = 1; + saved_size += (start_reg - reg) * 12; + } + start_reg = reg - 1; + } + } + + if (start_reg != reg) + { + insn = emit_sfm (reg + 1, start_reg - reg); + saved_size += (start_reg - reg) * 12; + RTX_FRAME_RELATED_P (insn) = 1; + } + } + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + start_reg = FIRST_VFP_REGNUM; + + for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) + { + if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) + && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) + { + if (start_reg != reg) + saved_size += vfp_emit_fstmd (start_reg, + (reg - start_reg) / 2); + start_reg = reg + 2; + } + } + if (start_reg != reg) + saved_size += vfp_emit_fstmd (start_reg, + (reg - start_reg) / 2); + } + return saved_size; +} + + +/* Set the Thumb frame pointer from the stack pointer. */ + +static void +thumb_set_frame_pointer (arm_stack_offsets *offsets) +{ + HOST_WIDE_INT amount; + rtx insn, dwarf; + + amount = offsets->outgoing_args - offsets->locals_base; + if (amount < 1024) + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, GEN_INT (amount))); + else + { + emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount))); + /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1 + expects the first two operands to be the same. */ + if (TARGET_THUMB2) + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + else + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + stack_pointer_rtx)); + } + dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, + plus_constant (stack_pointer_rtx, amount)); + RTX_FRAME_RELATED_P (dwarf) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Generate the prologue instructions for entry into an ARM or Thumb-2 + function. */ +void +arm_expand_prologue (void) +{ + rtx amount; + rtx insn; + rtx ip_rtx; + unsigned long live_regs_mask; + unsigned long func_type; + int fp_offset = 0; + int saved_pretend_args = 0; + int saved_regs = 0; + unsigned HOST_WIDE_INT args_to_push; + arm_stack_offsets *offsets; + + func_type = arm_current_func_type (); + + /* Naked functions don't have prologues. */ + if (IS_NAKED (func_type)) + return; + + /* Make a copy of c_f_p_a_s as we may need to modify it locally. */ + args_to_push = crtl->args.pretend_args_size; + + /* Compute which register we will have to save onto the stack. */ + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + + ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + + if (IS_STACKALIGN (func_type)) + { + rtx dwarf; + rtx r0; + rtx r1; + /* Handle a word-aligned stack pointer. We generate the following: + + mov r0, sp + bic r1, r0, #7 + mov sp, r1 + + mov sp, r0 + bx lr + + The unwinder doesn't need to know about the stack realignment. + Just tell it we saved SP in r0. */ + gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0); + + r0 = gen_rtx_REG (SImode, 0); + r1 = gen_rtx_REG (SImode, 1); + /* Use a real rtvec rather than NULL_RTVEC so the rest of the + compiler won't choke. */ + dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN); + dwarf = gen_rtx_SET (VOIDmode, r0, dwarf); + insn = gen_movsi (r0, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + emit_insn (insn); + emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7))); + emit_insn (gen_movsi (stack_pointer_rtx, r1)); + } + + /* For APCS frames, if IP register is clobbered + when creating frame, save that register in a special + way. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + { + if (IS_INTERRUPT (func_type)) + { + /* Interrupt functions must not corrupt any registers. + Creating a frame pointer however, corrupts the IP + register, so we must push it first. */ + insn = emit_multi_reg_push (1 << IP_REGNUM); + + /* Do not set RTX_FRAME_RELATED_P on this insn. + The dwarf stack unwinding code only wants to see one + stack decrement per function, and this is not it. If + this instruction is labeled as being part of the frame + creation sequence then dwarf2out_frame_debug_expr will + die when it encounters the assignment of IP to FP + later on, since the use of SP here establishes SP as + the CFA register and not IP. + + Anyway this instruction is not really part of the stack + frame creation although it is part of the prologue. */ + } + else if (IS_NESTED (func_type)) + { + /* The Static chain register is the same as the IP register + used as a scratch register during stack frame creation. + To get around this need to find somewhere to store IP + whilst the frame is being created. We try the following + places in order: + + 1. The last argument register. + 2. A slot on the stack above the frame. (This only + works if the function is not a varargs function). + 3. Register r3, after pushing the argument registers + onto the stack. + + Note - we only need to tell the dwarf2 backend about the SP + adjustment in the second variant; the static chain register + doesn't need to be unwound, as it doesn't contain a value + inherited from the caller. */ + + if (df_regs_ever_live_p (3) == false) + insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); + else if (args_to_push == 0) + { + rtx dwarf; + + gcc_assert(arm_compute_static_chain_stack_bytes() == 4); + saved_regs += 4; + + insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx); + insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx); + fp_offset = 4; + + /* Just tell the dwarf backend that we adjusted SP. */ + dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -fp_offset)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + else + { + /* Store the args on the stack. */ + if (cfun->machine->uses_anonymous_args) + insn = emit_multi_reg_push + ((0xf0 >> (args_to_push / 4)) & 0xf); + else + insn = emit_insn + (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (- args_to_push))); + + RTX_FRAME_RELATED_P (insn) = 1; + + saved_pretend_args = 1; + fp_offset = args_to_push; + args_to_push = 0; + + /* Now reuse r3 to preserve IP. */ + emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); + } + } + + insn = emit_set_insn (ip_rtx, + plus_constant (stack_pointer_rtx, fp_offset)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (args_to_push) + { + /* Push the argument registers, or reserve space for them. */ + if (cfun->machine->uses_anonymous_args) + insn = emit_multi_reg_push + ((0xf0 >> (args_to_push / 4)) & 0xf); + else + insn = emit_insn + (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (- args_to_push))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* If this is an interrupt service routine, and the link register + is going to be pushed, and we're not generating extra + push of IP (needed when frame is needed and frame layout if apcs), + subtracting four from LR now will mean that the function return + can be done with a single instruction. */ + if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ) + && (live_regs_mask & (1 << LR_REGNUM)) != 0 + && !(frame_pointer_needed && TARGET_APCS_FRAME) + && TARGET_ARM) + { + rtx lr = gen_rtx_REG (SImode, LR_REGNUM); + + emit_set_insn (lr, plus_constant (lr, -4)); + } + + if (live_regs_mask) + { + saved_regs += bit_count (live_regs_mask) * 4; + if (optimize_size && !frame_pointer_needed + && saved_regs == offsets->saved_regs - offsets->saved_args) + { + /* If no coprocessor registers are being pushed and we don't have + to worry about a frame pointer then push extra registers to + create the stack frame. This is done is a way that does not + alter the frame layout, so is independent of the epilogue. */ + int n; + int frame; + n = 0; + while (n < 8 && (live_regs_mask & (1 << n)) == 0) + n++; + frame = offsets->outgoing_args - (offsets->saved_args + saved_regs); + if (frame && n * 4 >= frame) + { + n = frame / 4; + live_regs_mask |= (1 << n) - 1; + saved_regs += frame; + } + } + insn = emit_multi_reg_push (live_regs_mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (! IS_VOLATILE (func_type)) + saved_regs += arm_save_coproc_regs (); + + if (frame_pointer_needed && TARGET_ARM) + { + /* Create the new frame pointer. */ + if (TARGET_APCS_FRAME) + { + insn = GEN_INT (-(4 + args_to_push + fp_offset)); + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn)); + RTX_FRAME_RELATED_P (insn) = 1; + + if (IS_NESTED (func_type)) + { + /* Recover the static chain register. */ + if (!df_regs_ever_live_p (3) + || saved_pretend_args) + insn = gen_rtx_REG (SImode, 3); + else /* if (crtl->args.pretend_args_size == 0) */ + { + insn = plus_constant (hard_frame_pointer_rtx, 4); + insn = gen_frame_mem (SImode, insn); + } + emit_set_insn (ip_rtx, insn); + /* Add a USE to stop propagate_one_insn() from barfing. */ + emit_insn (gen_prologue_use (ip_rtx)); + } + } + else + { + insn = GEN_INT (saved_regs - 4); + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, insn)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (flag_stack_usage) + current_function_static_stack_size + = offsets->outgoing_args - offsets->saved_args; + + if (offsets->outgoing_args != offsets->saved_args + saved_regs) + { + /* This add can produce multiple insns for a large constant, so we + need to get tricky. */ + rtx last = get_last_insn (); + + amount = GEN_INT (offsets->saved_args + saved_regs + - offsets->outgoing_args); + + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + amount)); + do + { + last = last ? NEXT_INSN (last) : get_insns (); + RTX_FRAME_RELATED_P (last) = 1; + } + while (last != insn); + + /* If the frame pointer is needed, emit a special barrier that + will prevent the scheduler from moving stores to the frame + before the stack adjustment. */ + if (frame_pointer_needed) + insn = emit_insn (gen_stack_tie (stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + + + if (frame_pointer_needed && TARGET_THUMB2) + thumb_set_frame_pointer (offsets); + + if (flag_pic && arm_pic_register != INVALID_REGNUM) + { + unsigned long mask; + + mask = live_regs_mask; + mask &= THUMB2_WORK_REGS; + if (!IS_NESTED (func_type)) + mask |= (1 << IP_REGNUM); + arm_load_pic_register (mask); + } + + /* If we are profiling, make sure no instructions are scheduled before + the call to mcount. Similarly if the user has requested no + scheduling in the prolog. Similarly if we want non-call exceptions + using the EABI unwinder, to prevent faulting instructions from being + swapped with a stack adjustment. */ + if (crtl->profile || !TARGET_SCHED_PROLOG + || (arm_except_unwind_info (&global_options) == UI_TARGET + && cfun->can_throw_non_call_exceptions)) + emit_insn (gen_blockage ()); + + /* If the link register is being kept alive, with the return address in it, + then make sure that it does not get reused by the ce2 pass. */ + if ((live_regs_mask & (1 << LR_REGNUM)) == 0) + cfun->machine->lr_save_eliminated = 1; +} + +/* Print condition code to STREAM. Helper function for arm_print_operand. */ +static void +arm_print_condition (FILE *stream) +{ + if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4) + { + /* Branch conversion is not implemented for Thumb-2. */ + if (TARGET_THUMB) + { + output_operand_lossage ("predicated Thumb instruction"); + return; + } + if (current_insn_predicate != NULL) + { + output_operand_lossage + ("predicated instruction in conditional sequence"); + return; + } + + fputs (arm_condition_codes[arm_current_cc], stream); + } + else if (current_insn_predicate) + { + enum arm_cond_code code; + + if (TARGET_THUMB1) + { + output_operand_lossage ("predicated Thumb instruction"); + return; + } + + code = get_arm_condition_code (current_insn_predicate); + fputs (arm_condition_codes[code], stream); + } +} + + +/* If CODE is 'd', then the X is a condition operand and the instruction + should only be executed if the condition is true. + if CODE is 'D', then the X is a condition operand and the instruction + should only be executed if the condition is false: however, if the mode + of the comparison is CCFPEmode, then always execute the instruction -- we + do this because in these circumstances !GE does not necessarily imply LT; + in these cases the instruction pattern will take care to make sure that + an instruction containing %d will follow, thereby undoing the effects of + doing this instruction unconditionally. + If CODE is 'N' then X is a floating point operand that must be negated + before output. + If CODE is 'B' then output a bitwise inverted value of X (a const int). + If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */ +static void +arm_print_operand (FILE *stream, rtx x, int code) +{ + switch (code) + { + case '@': + fputs (ASM_COMMENT_START, stream); + return; + + case '_': + fputs (user_label_prefix, stream); + return; + + case '|': + fputs (REGISTER_PREFIX, stream); + return; + + case '?': + arm_print_condition (stream); + return; + + case '(': + /* Nothing in unified syntax, otherwise the current condition code. */ + if (!TARGET_UNIFIED_ASM) + arm_print_condition (stream); + break; + + case ')': + /* The current condition code in unified syntax, otherwise nothing. */ + if (TARGET_UNIFIED_ASM) + arm_print_condition (stream); + break; + + case '.': + /* The current condition code for a condition code setting instruction. + Preceded by 's' in unified syntax, otherwise followed by 's'. */ + if (TARGET_UNIFIED_ASM) + { + fputc('s', stream); + arm_print_condition (stream); + } + else + { + arm_print_condition (stream); + fputc('s', stream); + } + return; + + case '!': + /* If the instruction is conditionally executed then print + the current condition code, otherwise print 's'. */ + gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM); + if (current_insn_predicate) + arm_print_condition (stream); + else + fputc('s', stream); + break; + + /* %# is a "break" sequence. It doesn't output anything, but is used to + separate e.g. operand numbers from following text, if that text consists + of further digits which we don't want to be part of the operand + number. */ + case '#': + return; + + case 'N': + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + r = real_value_negate (&r); + fprintf (stream, "%s", fp_const_from_val (&r)); + } + return; + + /* An integer or symbol address without a preceding # sign. */ + case 'c': + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case SYMBOL_REF: + output_addr_const (stream, x); + break; + + default: + gcc_unreachable (); + } + return; + + case 'B': + if (GET_CODE (x) == CONST_INT) + { + HOST_WIDE_INT val; + val = ARM_SIGN_EXTEND (~INTVAL (x)); + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val); + } + else + { + putc ('~', stream); + output_addr_const (stream, x); + } + return; + + case 'L': + /* The low 16 bits of an immediate constant. */ + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); + return; + + case 'i': + fprintf (stream, "%s", arithmetic_instr (x, 1)); + return; + + /* Truncate Cirrus shift counts. */ + case 's': + if (GET_CODE (x) == CONST_INT) + { + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f); + return; + } + arm_print_operand (stream, x, 0); + return; + + case 'I': + fprintf (stream, "%s", arithmetic_instr (x, 0)); + return; + + case 'S': + { + HOST_WIDE_INT val; + const char *shift; + + if (!shift_operator (x, SImode)) + { + output_operand_lossage ("invalid shift operand"); + break; + } + + shift = shift_op (x, &val); + + if (shift) + { + fprintf (stream, ", %s ", shift); + if (val == -1) + arm_print_operand (stream, XEXP (x, 1), 0); + else + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); + } + } + return; + + /* An explanation of the 'Q', 'R' and 'H' register operands: + + In a pair of registers containing a DI or DF value the 'Q' + operand returns the register number of the register containing + the least significant part of the value. The 'R' operand returns + the register number of the register containing the most + significant part of the value. + + The 'H' operand returns the higher of the two register numbers. + On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the + same as the 'Q' operand, since the most significant part of the + value is held in the lower number register. The reverse is true + on systems where WORDS_BIG_ENDIAN is false. + + The purpose of these operands is to distinguish between cases + where the endian-ness of the values is important (for example + when they are added together), and cases where the endian-ness + is irrelevant, but the order of register operations is important. + For example when loading a value from memory into a register + pair, the endian-ness does not matter. Provided that the value + from the lower memory address is put into the lower numbered + register, and the value from the higher address is put into the + higher numbered register, the load will work regardless of whether + the value being loaded is big-wordian or little-wordian. The + order of the two register loads can matter however, if the address + of the memory location is actually held in one of the registers + being overwritten by the load. + + The 'Q' and 'R' constraints are also available for 64-bit + constants. */ + case 'Q': + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + { + rtx part = gen_lowpart (SImode, x); + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); + return; + } + + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)); + return; + + case 'R': + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + { + enum machine_mode mode = GET_MODE (x); + rtx part; + + if (mode == VOIDmode) + mode = DImode; + part = gen_highpart_mode (SImode, mode, x); + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); + return; + } + + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)); + return; + + case 'H': + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + 1); + return; + + case 'J': + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2)); + return; + + case 'K': + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3)); + return; + + case 'm': + asm_fprintf (stream, "%r", + GET_CODE (XEXP (x, 0)) == REG + ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0))); + return; + + case 'M': + asm_fprintf (stream, "{%r-%r}", + REGNO (x), + REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1); + return; + + /* Like 'M', but writing doubleword vector registers, for use by Neon + insns. */ + case 'h': + { + int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2; + int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2; + if (numregs == 1) + asm_fprintf (stream, "{d%d}", regno); + else + asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1); + } + return; + + case 'd': + /* CONST_TRUE_RTX means always -- that's the default. */ + if (x == const_true_rtx) + return; + + if (!COMPARISON_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fputs (arm_condition_codes[get_arm_condition_code (x)], + stream); + return; + + case 'D': + /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever + want to do that. */ + if (x == const_true_rtx) + { + output_operand_lossage ("instruction never executed"); + return; + } + if (!COMPARISON_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE + (get_arm_condition_code (x))], + stream); + return; + + /* Cirrus registers can be accessed in a variety of ways: + single floating point (f) + double floating point (d) + 32bit integer (fx) + 64bit integer (dx). */ + case 'W': /* Cirrus register in F mode. */ + case 'X': /* Cirrus register in D mode. */ + case 'Y': /* Cirrus register in FX mode. */ + case 'Z': /* Cirrus register in DX mode. */ + gcc_assert (GET_CODE (x) == REG + && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS); + + fprintf (stream, "mv%s%s", + code == 'W' ? "f" + : code == 'X' ? "d" + : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2); + + return; + + /* Print cirrus register in the mode specified by the register's mode. */ + case 'V': + { + int mode = GET_MODE (x); + + if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "mv%s%s", + mode == DFmode ? "d" + : mode == SImode ? "fx" + : mode == DImode ? "dx" + : "f", reg_names[REGNO (x)] + 2); + + return; + } + + case 'U': + if (GET_CODE (x) != REG + || REGNO (x) < FIRST_IWMMXT_GR_REGNUM + || REGNO (x) > LAST_IWMMXT_GR_REGNUM) + /* Bad value for wCG register number. */ + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + else + fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM); + return; + + /* Print an iWMMXt control register name. */ + case 'w': + if (GET_CODE (x) != CONST_INT + || INTVAL (x) < 0 + || INTVAL (x) >= 16) + /* Bad value for wC register number. */ + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + else + { + static const char * wc_reg_names [16] = + { + "wCID", "wCon", "wCSSF", "wCASF", + "wC4", "wC5", "wC6", "wC7", + "wCGR0", "wCGR1", "wCGR2", "wCGR3", + "wC12", "wC13", "wC14", "wC15" + }; + + fprintf (stream, wc_reg_names [INTVAL (x)]); + } + return; + + /* Print the high single-precision register of a VFP double-precision + register. */ + case 'p': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_DOUBLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1); + } + return; + + /* Print a VFP/Neon double precision or quad precision register name. */ + case 'P': + case 'q': + { + int mode = GET_MODE (x); + int is_quad = (code == 'q'); + int regno; + + if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + if (GET_CODE (x) != REG + || !IS_VFP_REGNUM (REGNO (x))) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno)) + || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno))) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "%c%d", is_quad ? 'q' : 'd', + (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1)); + } + return; + + /* These two codes print the low/high doubleword register of a Neon quad + register, respectively. For pair-structure types, can also print + low/high quadword registers. */ + case 'e': + case 'f': + { + int mode = GET_MODE (x); + int regno; + + if ((GET_MODE_SIZE (mode) != 16 + && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!NEON_REGNO_OK_FOR_QUAD (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + if (GET_MODE_SIZE (mode) == 16) + fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1) + + (code == 'f' ? 1 : 0)); + else + fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2) + + (code == 'f' ? 1 : 0)); + } + return; + + /* Print a VFPv3 floating-point constant, represented as an integer + index. */ + case 'G': + { + int index = vfp3_const_double_index (x); + gcc_assert (index != -1); + fprintf (stream, "%d", index); + } + return; + + /* Print bits representing opcode features for Neon. + + Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed + and polynomials as unsigned. + + Bit 1 is 1 for floats and polynomials, 0 for ordinary integers. + + Bit 2 is 1 for rounding functions, 0 otherwise. */ + + /* Identify the type as 's', 'u', 'p' or 'f'. */ + case 'T': + { + HOST_WIDE_INT bits = INTVAL (x); + fputc ("uspf"[bits & 3], stream); + } + return; + + /* Likewise, but signed and unsigned integers are both 'i'. */ + case 'F': + { + HOST_WIDE_INT bits = INTVAL (x); + fputc ("iipf"[bits & 3], stream); + } + return; + + /* As for 'T', but emit 'u' instead of 'p'. */ + case 't': + { + HOST_WIDE_INT bits = INTVAL (x); + fputc ("usuf"[bits & 3], stream); + } + return; + + /* Bit 2: rounding (vs none). */ + case 'O': + { + HOST_WIDE_INT bits = INTVAL (x); + fputs ((bits & 4) != 0 ? "r" : "", stream); + } + return; + + /* Memory operand for vld1/vst1 instruction. */ + case 'A': + { + rtx addr; + bool postinc = FALSE; + unsigned align, modesize, align_bits; + + gcc_assert (GET_CODE (x) == MEM); + addr = XEXP (x, 0); + if (GET_CODE (addr) == POST_INC) + { + postinc = 1; + addr = XEXP (addr, 0); + } + asm_fprintf (stream, "[%r", REGNO (addr)); + + /* We know the alignment of this access, so we can emit a hint in the + instruction (for some alignments) as an aid to the memory subsystem + of the target. */ + align = MEM_ALIGN (x) >> 3; + modesize = GET_MODE_SIZE (GET_MODE (x)); + + /* Only certain alignment specifiers are supported by the hardware. */ + if (modesize == 16 && (align % 32) == 0) + align_bits = 256; + else if ((modesize == 8 || modesize == 16) && (align % 16) == 0) + align_bits = 128; + else if ((align % 8) == 0) + align_bits = 64; + else + align_bits = 0; + + if (align_bits != 0) + asm_fprintf (stream, ":%d", align_bits); + + asm_fprintf (stream, "]"); + + if (postinc) + fputs("!", stream); + } + return; + + case 'C': + { + rtx addr; + + gcc_assert (GET_CODE (x) == MEM); + addr = XEXP (x, 0); + gcc_assert (GET_CODE (addr) == REG); + asm_fprintf (stream, "[%r]", REGNO (addr)); + } + return; + + /* Translate an S register number into a D register number and element index. */ + case 'y': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno / 2, regno % 2); + } + return; + + /* Register specifier for vld1.16/vst1.16. Translate the S register + number into a D register number and element index. */ + case 'z': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); + } + return; + + default: + if (x == 0) + { + output_operand_lossage ("missing operand"); + return; + } + + switch (GET_CODE (x)) + { + case REG: + asm_fprintf (stream, "%r", REGNO (x)); + break; + + case MEM: + output_memory_reference_mode = GET_MODE (x); + output_address (XEXP (x, 0)); + break; + + case CONST_DOUBLE: + if (TARGET_NEON) + { + char fpstr[20]; + real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), + sizeof (fpstr), 0, 1); + fprintf (stream, "#%s", fpstr); + } + else + fprintf (stream, "#%s", fp_immediate_constant (x)); + break; + + default: + gcc_assert (GET_CODE (x) != NEG); + fputc ('#', stream); + if (GET_CODE (x) == HIGH) + { + fputs (":lower16:", stream); + x = XEXP (x, 0); + } + + output_addr_const (stream, x); + break; + } + } +} + +/* Target hook for printing a memory address. */ +static void +arm_print_operand_address (FILE *stream, rtx x) +{ + if (TARGET_32BIT) + { + int is_minus = GET_CODE (x) == MINUS; + + if (GET_CODE (x) == REG) + asm_fprintf (stream, "[%r, #0]", REGNO (x)); + else if (GET_CODE (x) == PLUS || is_minus) + { + rtx base = XEXP (x, 0); + rtx index = XEXP (x, 1); + HOST_WIDE_INT offset = 0; + if (GET_CODE (base) != REG + || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM)) + { + /* Ensure that BASE is a register. */ + /* (one of them must be). */ + /* Also ensure the SP is not used as in index register. */ + rtx temp = base; + base = index; + index = temp; + } + switch (GET_CODE (index)) + { + case CONST_INT: + offset = INTVAL (index); + if (is_minus) + offset = -offset; + asm_fprintf (stream, "[%r, #%wd]", + REGNO (base), offset); + break; + + case REG: + asm_fprintf (stream, "[%r, %s%r]", + REGNO (base), is_minus ? "-" : "", + REGNO (index)); + break; + + case MULT: + case ASHIFTRT: + case LSHIFTRT: + case ASHIFT: + case ROTATERT: + { + asm_fprintf (stream, "[%r, %s%r", + REGNO (base), is_minus ? "-" : "", + REGNO (XEXP (index, 0))); + arm_print_operand (stream, index, 'S'); + fputs ("]", stream); + break; + } + + default: + gcc_unreachable (); + } + } + else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC + || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC) + { + extern enum machine_mode output_memory_reference_mode; + + gcc_assert (GET_CODE (XEXP (x, 0)) == REG); + + if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC) + asm_fprintf (stream, "[%r, #%s%d]!", + REGNO (XEXP (x, 0)), + GET_CODE (x) == PRE_DEC ? "-" : "", + GET_MODE_SIZE (output_memory_reference_mode)); + else + asm_fprintf (stream, "[%r], #%s%d", + REGNO (XEXP (x, 0)), + GET_CODE (x) == POST_DEC ? "-" : "", + GET_MODE_SIZE (output_memory_reference_mode)); + } + else if (GET_CODE (x) == PRE_MODIFY) + { + asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0))); + if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT) + asm_fprintf (stream, "#%wd]!", + INTVAL (XEXP (XEXP (x, 1), 1))); + else + asm_fprintf (stream, "%r]!", + REGNO (XEXP (XEXP (x, 1), 1))); + } + else if (GET_CODE (x) == POST_MODIFY) + { + asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0))); + if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT) + asm_fprintf (stream, "#%wd", + INTVAL (XEXP (XEXP (x, 1), 1))); + else + asm_fprintf (stream, "%r", + REGNO (XEXP (XEXP (x, 1), 1))); + } + else output_addr_const (stream, x); + } + else + { + if (GET_CODE (x) == REG) + asm_fprintf (stream, "[%r]", REGNO (x)); + else if (GET_CODE (x) == POST_INC) + asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0))); + else if (GET_CODE (x) == PLUS) + { + gcc_assert (GET_CODE (XEXP (x, 0)) == REG); + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + asm_fprintf (stream, "[%r, #%wd]", + REGNO (XEXP (x, 0)), + INTVAL (XEXP (x, 1))); + else + asm_fprintf (stream, "[%r, %r]", + REGNO (XEXP (x, 0)), + REGNO (XEXP (x, 1))); + } + else + output_addr_const (stream, x); + } +} + +/* Target hook for indicating whether a punctuation character for + TARGET_PRINT_OPERAND is valid. */ +static bool +arm_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '@' || code == '|' || code == '.' + || code == '(' || code == ')' || code == '#' + || (TARGET_32BIT && (code == '?')) + || (TARGET_THUMB2 && (code == '!')) + || (TARGET_THUMB && (code == '_'))); +} + +/* Target hook for assembling integer objects. The ARM version needs to + handle word-sized values specially. */ +static bool +arm_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + enum machine_mode mode; + + if (size == UNITS_PER_WORD && aligned_p) + { + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, x); + + /* Mark symbols as position independent. We only do this in the + .text segment, not in the .data segment. */ + if (NEED_GOT_RELOC && flag_pic && making_const_table && + (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)) + { + /* See legitimize_pic_address for an explanation of the + TARGET_VXWORKS_RTP check. */ + if (TARGET_VXWORKS_RTP + || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x))) + fputs ("(GOT)", asm_out_file); + else + fputs ("(GOTOFF)", asm_out_file); + } + fputc ('\n', asm_out_file); + return true; + } + + mode = GET_MODE (x); + + if (arm_vector_mode_supported_p (mode)) + { + int i, units; + + gcc_assert (GET_CODE (x) == CONST_VECTOR); + + units = CONST_VECTOR_NUNITS (x); + size = GET_MODE_SIZE (GET_MODE_INNER (mode)); + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + for (i = 0; i < units; i++) + { + rtx elt = CONST_VECTOR_ELT (x, i); + assemble_integer + (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1); + } + else + for (i = 0; i < units; i++) + { + rtx elt = CONST_VECTOR_ELT (x, i); + REAL_VALUE_TYPE rval; + + REAL_VALUE_FROM_CONST_DOUBLE (rval, elt); + + assemble_real + (rval, GET_MODE_INNER (mode), + i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT); + } + + return true; + } + + return default_assemble_integer (x, size, aligned_p); +} + +static void +arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor) +{ + section *s; + + if (!TARGET_AAPCS_BASED) + { + (is_ctor ? + default_named_section_asm_out_constructor + : default_named_section_asm_out_destructor) (symbol, priority); + return; + } + + /* Put these in the .init_array section, using a special relocation. */ + if (priority != DEFAULT_INIT_PRIORITY) + { + char buf[18]; + sprintf (buf, "%s.%.5u", + is_ctor ? ".init_array" : ".fini_array", + priority); + s = get_section (buf, SECTION_WRITE, NULL_TREE); + } + else if (is_ctor) + s = ctors_section; + else + s = dtors_section; + + switch_to_section (s); + assemble_align (POINTER_SIZE); + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, symbol); + fputs ("(target1)\n", asm_out_file); +} + +/* Add a function to the list of static constructors. */ + +static void +arm_elf_asm_constructor (rtx symbol, int priority) +{ + arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true); +} + +/* Add a function to the list of static destructors. */ + +static void +arm_elf_asm_destructor (rtx symbol, int priority) +{ + arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false); +} + +/* A finite state machine takes care of noticing whether or not instructions + can be conditionally executed, and thus decrease execution time and code + size by deleting branch instructions. The fsm is controlled by + final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */ + +/* The state of the fsm controlling condition codes are: + 0: normal, do nothing special + 1: make ASM_OUTPUT_OPCODE not output this instruction + 2: make ASM_OUTPUT_OPCODE not output this instruction + 3: make instructions conditional + 4: make instructions conditional + + State transitions (state->state by whom under condition): + 0 -> 1 final_prescan_insn if the `target' is a label + 0 -> 2 final_prescan_insn if the `target' is an unconditional branch + 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch + 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch + 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached + (the target label has CODE_LABEL_NUMBER equal to arm_target_label). + 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached + (the target insn is arm_target_insn). + + If the jump clobbers the conditions then we use states 2 and 4. + + A similar thing can be done with conditional return insns. + + XXX In case the `target' is an unconditional branch, this conditionalising + of the instructions always reduces code size, but not always execution + time. But then, I want to reduce the code size to somewhere near what + /bin/cc produces. */ + +/* In addition to this, state is maintained for Thumb-2 COND_EXEC + instructions. When a COND_EXEC instruction is seen the subsequent + instructions are scanned so that multiple conditional instructions can be + combined into a single IT block. arm_condexec_count and arm_condexec_mask + specify the length and true/false mask for the IT block. These will be + decremented/zeroed by arm_asm_output_opcode as the insns are output. */ + +/* Returns the index of the ARM condition code string in + `arm_condition_codes'. COMPARISON should be an rtx like + `(eq (...) (...))'. */ +static enum arm_cond_code +get_arm_condition_code (rtx comparison) +{ + enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + enum arm_cond_code code; + enum rtx_code comp_code = GET_CODE (comparison); + + if (GET_MODE_CLASS (mode) != MODE_CC) + mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0), + XEXP (comparison, 1)); + + switch (mode) + { + case CC_DNEmode: code = ARM_NE; goto dominance; + case CC_DEQmode: code = ARM_EQ; goto dominance; + case CC_DGEmode: code = ARM_GE; goto dominance; + case CC_DGTmode: code = ARM_GT; goto dominance; + case CC_DLEmode: code = ARM_LE; goto dominance; + case CC_DLTmode: code = ARM_LT; goto dominance; + case CC_DGEUmode: code = ARM_CS; goto dominance; + case CC_DGTUmode: code = ARM_HI; goto dominance; + case CC_DLEUmode: code = ARM_LS; goto dominance; + case CC_DLTUmode: code = ARM_CC; + + dominance: + gcc_assert (comp_code == EQ || comp_code == NE); + + if (comp_code == EQ) + return ARM_INVERSE_CONDITION_CODE (code); + return code; + + case CC_NOOVmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GE: return ARM_PL; + case LT: return ARM_MI; + default: gcc_unreachable (); + } + + case CC_Zmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + default: gcc_unreachable (); + } + + case CC_Nmode: + switch (comp_code) + { + case NE: return ARM_MI; + case EQ: return ARM_PL; + default: gcc_unreachable (); + } + + case CCFPEmode: + case CCFPmode: + /* These encodings assume that AC=1 in the FPA system control + byte. This allows us to handle all cases except UNEQ and + LTGT. */ + switch (comp_code) + { + case GE: return ARM_GE; + case GT: return ARM_GT; + case LE: return ARM_LS; + case LT: return ARM_MI; + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case ORDERED: return ARM_VC; + case UNORDERED: return ARM_VS; + case UNLT: return ARM_LT; + case UNLE: return ARM_LE; + case UNGT: return ARM_HI; + case UNGE: return ARM_PL; + /* UNEQ and LTGT do not have a representation. */ + case UNEQ: /* Fall through. */ + case LTGT: /* Fall through. */ + default: gcc_unreachable (); + } + + case CC_SWPmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GE: return ARM_LE; + case GT: return ARM_LT; + case LE: return ARM_GE; + case LT: return ARM_GT; + case GEU: return ARM_LS; + case GTU: return ARM_CC; + case LEU: return ARM_CS; + case LTU: return ARM_HI; + default: gcc_unreachable (); + } + + case CC_Cmode: + switch (comp_code) + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; + default: gcc_unreachable (); + } + + case CC_CZmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GEU: return ARM_CS; + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; + default: gcc_unreachable (); + } + + case CC_NCVmode: + switch (comp_code) + { + case GE: return ARM_GE; + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; + default: gcc_unreachable (); + } + + case CCmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GE: return ARM_GE; + case GT: return ARM_GT; + case LE: return ARM_LE; + case LT: return ARM_LT; + case GEU: return ARM_CS; + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; + default: gcc_unreachable (); + } + + default: gcc_unreachable (); + } +} + +/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed + instructions. */ +void +thumb2_final_prescan_insn (rtx insn) +{ + rtx first_insn = insn; + rtx body = PATTERN (insn); + rtx predicate; + enum arm_cond_code code; + int n; + int mask; + + /* Remove the previous insn from the count of insns to be output. */ + if (arm_condexec_count) + arm_condexec_count--; + + /* Nothing to do if we are already inside a conditional block. */ + if (arm_condexec_count) + return; + + if (GET_CODE (body) != COND_EXEC) + return; + + /* Conditional jumps are implemented directly. */ + if (GET_CODE (insn) == JUMP_INSN) + return; + + predicate = COND_EXEC_TEST (body); + arm_current_cc = get_arm_condition_code (predicate); + + n = get_attr_ce_count (insn); + arm_condexec_count = 1; + arm_condexec_mask = (1 << n) - 1; + arm_condexec_masklen = n; + /* See if subsequent instructions can be combined into the same block. */ + for (;;) + { + insn = next_nonnote_insn (insn); + + /* Jumping into the middle of an IT block is illegal, so a label or + barrier terminates the block. */ + if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN) + break; + + body = PATTERN (insn); + /* USE and CLOBBER aren't really insns, so just skip them. */ + if (GET_CODE (body) == USE + || GET_CODE (body) == CLOBBER) + continue; + + /* ??? Recognize conditional jumps, and combine them with IT blocks. */ + if (GET_CODE (body) != COND_EXEC) + break; + /* Allow up to 4 conditionally executed instructions in a block. */ + n = get_attr_ce_count (insn); + if (arm_condexec_masklen + n > 4) + break; + + predicate = COND_EXEC_TEST (body); + code = get_arm_condition_code (predicate); + mask = (1 << n) - 1; + if (arm_current_cc == code) + arm_condexec_mask |= (mask << arm_condexec_masklen); + else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code)) + break; + + arm_condexec_count++; + arm_condexec_masklen += n; + + /* A jump must be the last instruction in a conditional block. */ + if (GET_CODE(insn) == JUMP_INSN) + break; + } + /* Restore recog_data (getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call). */ + extract_constrain_insn_cached (first_insn); +} + +void +arm_final_prescan_insn (rtx insn) +{ + /* BODY will hold the body of INSN. */ + rtx body = PATTERN (insn); + + /* This will be 1 if trying to repeat the trick, and things need to be + reversed if it appears to fail. */ + int reverse = 0; + + /* If we start with a return insn, we only succeed if we find another one. */ + int seeking_return = 0; + + /* START_INSN will hold the insn from where we start looking. This is the + first insn after the following code_label if REVERSE is true. */ + rtx start_insn = insn; + + /* If in state 4, check if the target branch is reached, in order to + change back to state 0. */ + if (arm_ccfsm_state == 4) + { + if (insn == arm_target_insn) + { + arm_target_insn = NULL; + arm_ccfsm_state = 0; + } + return; + } + + /* If in state 3, it is possible to repeat the trick, if this insn is an + unconditional branch to a label, and immediately following this branch + is the previous target label which is only used once, and the label this + branch jumps to is not too far off. */ + if (arm_ccfsm_state == 3) + { + if (simplejump_p (insn)) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + { + /* XXX Isn't this always a barrier? */ + start_insn = next_nonnote_insn (start_insn); + } + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == arm_target_label + && LABEL_NUSES (start_insn) == 1) + reverse = TRUE; + else + return; + } + else if (GET_CODE (body) == RETURN) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == arm_target_label + && LABEL_NUSES (start_insn) == 1) + { + reverse = TRUE; + seeking_return = 1; + } + else + return; + } + else + return; + } + + gcc_assert (!arm_ccfsm_state || reverse); + if (GET_CODE (insn) != JUMP_INSN) + return; + + /* This jump might be paralleled with a clobber of the condition codes + the jump should always come first */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) + body = XVECEXP (body, 0, 0); + + if (reverse + || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) + { + int insns_skipped; + int fail = FALSE, succeed = FALSE; + /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ + int then_not_else = TRUE; + rtx this_insn = start_insn, label = 0; + + /* Register the insn jumped to. */ + if (reverse) + { + if (!seeking_return) + label = XEXP (SET_SRC (body), 0); + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) + label = XEXP (XEXP (SET_SRC (body), 1), 0); + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) + { + label = XEXP (XEXP (SET_SRC (body), 2), 0); + then_not_else = FALSE; + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN) + seeking_return = 1; + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN) + { + seeking_return = 1; + then_not_else = FALSE; + } + else + gcc_unreachable (); + + /* See how many insns this branch skips, and what kind of insns. If all + insns are okay, and the label or unconditional branch to the same + label is not too far away, succeed. */ + for (insns_skipped = 0; + !fail && !succeed && insns_skipped++ < max_insns_skipped;) + { + rtx scanbody; + + this_insn = next_nonnote_insn (this_insn); + if (!this_insn) + break; + + switch (GET_CODE (this_insn)) + { + case CODE_LABEL: + /* Succeed if it is the target label, otherwise fail since + control falls in from somewhere else. */ + if (this_insn == label) + { + arm_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case BARRIER: + /* Succeed if the following insn is the target label. + Otherwise fail. + If return insns are used then the last insn in a function + will be a barrier. */ + this_insn = next_nonnote_insn (this_insn); + if (this_insn && this_insn == label) + { + arm_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case CALL_INSN: + /* The AAPCS says that conditional calls should not be + used since they make interworking inefficient (the + linker can't transform BL into BLX). That's + only a problem if the machine has BLX. */ + if (arm_arch5) + { + fail = TRUE; + break; + } + + /* Succeed if the following insn is the target label, or + if the following two insns are a barrier and the + target label. */ + this_insn = next_nonnote_insn (this_insn); + if (this_insn && GET_CODE (this_insn) == BARRIER) + this_insn = next_nonnote_insn (this_insn); + + if (this_insn && this_insn == label + && insns_skipped < max_insns_skipped) + { + arm_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case JUMP_INSN: + /* If this is an unconditional branch to the same label, succeed. + If it is to another label, do nothing. If it is conditional, + fail. */ + /* XXX Probably, the tests for SET and the PC are + unnecessary. */ + + scanbody = PATTERN (this_insn); + if (GET_CODE (scanbody) == SET + && GET_CODE (SET_DEST (scanbody)) == PC) + { + if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF + && XEXP (SET_SRC (scanbody), 0) == label && !reverse) + { + arm_ccfsm_state = 2; + succeed = TRUE; + } + else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) + fail = TRUE; + } + /* Fail if a conditional return is undesirable (e.g. on a + StrongARM), but still allow this if optimizing for size. */ + else if (GET_CODE (scanbody) == RETURN + && !use_return_insn (TRUE, NULL) + && !optimize_size) + fail = TRUE; + else if (GET_CODE (scanbody) == RETURN + && seeking_return) + { + arm_ccfsm_state = 2; + succeed = TRUE; + } + else if (GET_CODE (scanbody) == PARALLEL) + { + switch (get_attr_conds (this_insn)) + { + case CONDS_NOCOND: + break; + default: + fail = TRUE; + break; + } + } + else + fail = TRUE; /* Unrecognized jump (e.g. epilogue). */ + + break; + + case INSN: + /* Instructions using or affecting the condition codes make it + fail. */ + scanbody = PATTERN (this_insn); + if (!(GET_CODE (scanbody) == SET + || GET_CODE (scanbody) == PARALLEL) + || get_attr_conds (this_insn) != CONDS_NOCOND) + fail = TRUE; + + /* A conditional cirrus instruction must be followed by + a non Cirrus instruction. However, since we + conditionalize instructions in this function and by + the time we get here we can't add instructions + (nops), because shorten_branches() has already been + called, we will disable conditionalizing Cirrus + instructions to be safe. */ + if (GET_CODE (scanbody) != USE + && GET_CODE (scanbody) != CLOBBER + && get_attr_cirrus (this_insn) != CIRRUS_NOT) + fail = TRUE; + break; + + default: + break; + } + } + if (succeed) + { + if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse)) + arm_target_label = CODE_LABEL_NUMBER (label); + else + { + gcc_assert (seeking_return || arm_ccfsm_state == 2); + + while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) + { + this_insn = next_nonnote_insn (this_insn); + gcc_assert (!this_insn + || (GET_CODE (this_insn) != BARRIER + && GET_CODE (this_insn) != CODE_LABEL)); + } + if (!this_insn) + { + /* Oh, dear! we ran off the end.. give up. */ + extract_constrain_insn_cached (insn); + arm_ccfsm_state = 0; + arm_target_insn = NULL; + return; + } + arm_target_insn = this_insn; + } + + /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from + what it was. */ + if (!reverse) + arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0)); + + if (reverse || then_not_else) + arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc); + } + + /* Restore recog_data (getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call. */ + extract_constrain_insn_cached (insn); + } +} + +/* Output IT instructions. */ +void +thumb2_asm_output_opcode (FILE * stream) +{ + char buff[5]; + int n; + + if (arm_condexec_mask) + { + for (n = 0; n < arm_condexec_masklen; n++) + buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e'; + buff[n] = 0; + asm_fprintf(stream, "i%s\t%s\n\t", buff, + arm_condition_codes[arm_current_cc]); + arm_condexec_mask = 0; + } +} + +/* Returns true if REGNO is a valid register + for holding a quantity of type MODE. */ +int +arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) +{ + if (GET_MODE_CLASS (mode) == MODE_CC) + return (regno == CC_REGNUM + || (TARGET_HARD_FLOAT && TARGET_VFP + && regno == VFPCC_REGNUM)); + + if (TARGET_THUMB1) + /* For the Thumb we only allow values bigger than SImode in + registers 0 - 6, so that there is always a second low + register available to hold the upper part of the value. + We probably we ought to ensure that the register is the + start of an even numbered register pair. */ + return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM); + + if (TARGET_HARD_FLOAT && TARGET_MAVERICK + && IS_CIRRUS_REGNUM (regno)) + /* We have outlawed SI values in Cirrus registers because they + reside in the lower 32 bits, but SF values reside in the + upper 32 bits. This causes gcc all sorts of grief. We can't + even split the registers into pairs because Cirrus SI values + get sign extended to 64bits-- aldyh. */ + return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode); + + if (TARGET_HARD_FLOAT && TARGET_VFP + && IS_VFP_REGNUM (regno)) + { + if (mode == SFmode || mode == SImode) + return VFP_REGNO_OK_FOR_SINGLE (regno); + + if (mode == DFmode) + return VFP_REGNO_OK_FOR_DOUBLE (regno); + + /* VFP registers can hold HFmode values, but there is no point in + putting them there unless we have hardware conversion insns. */ + if (mode == HFmode) + return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); + + if (TARGET_NEON) + return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) + || (VALID_NEON_QREG_MODE (mode) + && NEON_REGNO_OK_FOR_QUAD (regno)) + || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2)) + || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3)) + || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4)) + || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6)) + || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)); + + return FALSE; + } + + if (TARGET_REALLY_IWMMXT) + { + if (IS_IWMMXT_GR_REGNUM (regno)) + return mode == SImode; + + if (IS_IWMMXT_REGNUM (regno)) + return VALID_IWMMXT_REG_MODE (mode); + } + + /* We allow almost any value to be stored in the general registers. + Restrict doubleword quantities to even register pairs so that we can + use ldrd. Do not allow very large Neon structure opaque modes in + general registers; they would use too many. */ + if (regno <= LAST_ARM_REGNUM) + return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) + && ARM_NUM_REGS (mode) <= 4; + + if (regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + /* We only allow integers in the fake hard registers. */ + return GET_MODE_CLASS (mode) == MODE_INT; + + /* The only registers left are the FPA registers + which we only allow to hold FP values. */ + return (TARGET_HARD_FLOAT && TARGET_FPA + && GET_MODE_CLASS (mode) == MODE_FLOAT + && regno >= FIRST_FPA_REGNUM + && regno <= LAST_FPA_REGNUM); +} + +/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are + not used in arm mode. */ + +enum reg_class +arm_regno_class (int regno) +{ + if (TARGET_THUMB1) + { + if (regno == STACK_POINTER_REGNUM) + return STACK_REG; + if (regno == CC_REGNUM) + return CC_REG; + if (regno < 8) + return LO_REGS; + return HI_REGS; + } + + if (TARGET_THUMB2 && regno < 8) + return LO_REGS; + + if ( regno <= LAST_ARM_REGNUM + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS; + + if (regno == CC_REGNUM || regno == VFPCC_REGNUM) + return TARGET_THUMB2 ? CC_REG : NO_REGS; + + if (IS_CIRRUS_REGNUM (regno)) + return CIRRUS_REGS; + + if (IS_VFP_REGNUM (regno)) + { + if (regno <= D7_VFP_REGNUM) + return VFP_D0_D7_REGS; + else if (regno <= LAST_LO_VFP_REGNUM) + return VFP_LO_REGS; + else + return VFP_HI_REGS; + } + + if (IS_IWMMXT_REGNUM (regno)) + return IWMMXT_REGS; + + if (IS_IWMMXT_GR_REGNUM (regno)) + return IWMMXT_GR_REGS; + + return FPA_REGS; +} + +/* Handle a special case when computing the offset + of an argument from the frame pointer. */ +int +arm_debugger_arg_offset (int value, rtx addr) +{ + rtx insn; + + /* We are only interested if dbxout_parms() failed to compute the offset. */ + if (value != 0) + return 0; + + /* We can only cope with the case where the address is held in a register. */ + if (GET_CODE (addr) != REG) + return 0; + + /* If we are using the frame pointer to point at the argument, then + an offset of 0 is correct. */ + if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM) + return 0; + + /* If we are using the stack pointer to point at the + argument, then an offset of 0 is correct. */ + /* ??? Check this is consistent with thumb2 frame layout. */ + if ((TARGET_THUMB || !frame_pointer_needed) + && REGNO (addr) == SP_REGNUM) + return 0; + + /* Oh dear. The argument is pointed to by a register rather + than being held in a register, or being stored at a known + offset from the frame pointer. Since GDB only understands + those two kinds of argument we must translate the address + held in the register into an offset from the frame pointer. + We do this by searching through the insns for the function + looking to see where this register gets its value. If the + register is initialized from the frame pointer plus an offset + then we are in luck and we can continue, otherwise we give up. + + This code is exercised by producing debugging information + for a function with arguments like this: + + double func (double a, double b, int c, double d) {return d;} + + Without this code the stab for parameter 'd' will be set to + an offset of 0 from the frame pointer, rather than 8. */ + + /* The if() statement says: + + If the insn is a normal instruction + and if the insn is setting the value in a register + and if the register being set is the register holding the address of the argument + and if the address is computing by an addition + that involves adding to a register + which is the frame pointer + a constant integer + + then... */ + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if ( GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr) + && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS + && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG + && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM + && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT + ) + { + value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1)); + + break; + } + } + + if (value == 0) + { + debug_rtx (addr); + warning (0, "unable to compute real location of stacked parameter"); + value = 8; /* XXX magic hack */ + } + + return value; +} + +#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ + do \ + { \ + if ((MASK) & insn_flags) \ + add_builtin_function ((NAME), (TYPE), (CODE), \ + BUILT_IN_MD, NULL, NULL_TREE); \ + } \ + while (0) + +struct builtin_description +{ + const unsigned int mask; + const enum insn_code icode; + const char * const name; + const enum arm_builtins code; + const enum rtx_code comparison; + const unsigned int flag; +}; + +static const struct builtin_description bdesc_2arg[] = +{ +#define IWMMXT_BUILTIN(code, string, builtin) \ + { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ + ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + + IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) + IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) + IWMMXT_BUILTIN (addv2si3, "waddw", WADDW) + IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB) + IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH) + IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW) + IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB) + IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH) + IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW) + IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB) + IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH) + IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW) + IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB) + IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH) + IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW) + IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB) + IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH) + IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW) + IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL) + IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM) + IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM) + IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB) + IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH) + IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW) + IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB) + IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH) + IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW) + IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB) + IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH) + IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW) + IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB) + IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB) + IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH) + IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH) + IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW) + IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW) + IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB) + IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB) + IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH) + IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH) + IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW) + IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW) + IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND) + IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN) + IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR) + IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR) + IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B) + IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H) + IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR) + IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR) + IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB) + IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH) + IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW) + IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB) + IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH) + IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW) + IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS) + IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU) + +#define IWMMXT_BUILTIN2(code, builtin) \ + { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + + IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) + IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) + IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) + IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH) + IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI) + IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW) + IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI) + IWMMXT_BUILTIN2 (ashldi3_di, WSLLD) + IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI) + IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH) + IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI) + IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW) + IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI) + IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD) + IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI) + IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH) + IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI) + IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW) + IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI) + IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD) + IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI) + IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH) + IWMMXT_BUILTIN2 (rorv4hi3, WRORHI) + IWMMXT_BUILTIN2 (rorv2si3_di, WRORW) + IWMMXT_BUILTIN2 (rorv2si3, WRORWI) + IWMMXT_BUILTIN2 (rordi3_di, WRORD) + IWMMXT_BUILTIN2 (rordi3, WRORDI) + IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) + IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) +}; + +static const struct builtin_description bdesc_1arg[] = +{ + IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB) + IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH) + IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW) + IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB) + IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH) + IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW) + IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB) + IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH) + IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW) + IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB) + IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH) + IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW) + IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB) + IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH) + IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW) + IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB) + IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH) + IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW) +}; + +/* Set up all the iWMMXt builtins. This is + not called if TARGET_IWMMXT is zero. */ + +static void +arm_init_iwmmxt_builtins (void) +{ + const struct builtin_description * d; + size_t i; + tree endlink = void_list_node; + + tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); + tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); + tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); + + tree int_ftype_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink)); + tree v8qi_ftype_v8qi_v8qi_int + = build_function_type (V8QI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + tree v4hi_ftype_v4hi_int + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree v2si_ftype_v2si_int + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree v2si_ftype_di_di + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, long_long_integer_type_node, + tree_cons (NULL_TREE, long_long_integer_type_node, + endlink))); + tree di_ftype_di_int + = build_function_type (long_long_integer_type_node, + tree_cons (NULL_TREE, long_long_integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree di_ftype_di_int_int + = build_function_type (long_long_integer_type_node, + tree_cons (NULL_TREE, long_long_integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + tree int_ftype_v8qi + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + endlink)); + tree int_ftype_v4hi + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink)); + tree int_ftype_v2si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + endlink)); + tree int_ftype_v8qi_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree int_ftype_v4hi_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree int_ftype_v2si_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree v8qi_ftype_v8qi_int_int + = build_function_type (V8QI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + tree v4hi_ftype_v4hi_int_int + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + tree v2si_ftype_v2si_int_int + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + /* Miscellaneous. */ + tree v8qi_ftype_v4hi_v4hi + = build_function_type (V8QI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink))); + tree v4hi_ftype_v2si_v2si + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + endlink))); + tree v2si_ftype_v4hi_v4hi + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink))); + tree v2si_ftype_v8qi_v8qi + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + endlink))); + tree v4hi_ftype_v4hi_di + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, + long_long_integer_type_node, + endlink))); + tree v2si_ftype_v2si_di + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + tree_cons (NULL_TREE, + long_long_integer_type_node, + endlink))); + tree void_ftype_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + tree di_ftype_void + = build_function_type (long_long_unsigned_type_node, endlink); + tree di_ftype_v8qi + = build_function_type (long_long_integer_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + endlink)); + tree di_ftype_v4hi + = build_function_type (long_long_integer_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink)); + tree di_ftype_v2si + = build_function_type (long_long_integer_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + endlink)); + tree v2si_ftype_v4hi + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink)); + tree v4hi_ftype_v8qi + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + endlink)); + + tree di_ftype_di_v4hi_v4hi + = build_function_type (long_long_unsigned_type_node, + tree_cons (NULL_TREE, + long_long_unsigned_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, + V4HI_type_node, + endlink)))); + + tree di_ftype_v4hi_v4hi + = build_function_type (long_long_unsigned_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink))); + + /* Normal vector binops. */ + tree v8qi_ftype_v8qi_v8qi + = build_function_type (V8QI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + tree_cons (NULL_TREE, V8QI_type_node, + endlink))); + tree v4hi_ftype_v4hi_v4hi + = build_function_type (V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + tree_cons (NULL_TREE, V4HI_type_node, + endlink))); + tree v2si_ftype_v2si_v2si + = build_function_type (V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + tree_cons (NULL_TREE, V2SI_type_node, + endlink))); + tree di_ftype_di_di + = build_function_type (long_long_unsigned_type_node, + tree_cons (NULL_TREE, long_long_unsigned_type_node, + tree_cons (NULL_TREE, + long_long_unsigned_type_node, + endlink))); + + /* Add all builtins that are more or less simple operations on two + operands. */ + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + { + /* Use one of the operands; the target can have a different mode for + mask-generating compares. */ + enum machine_mode mode; + tree type; + + if (d->name == 0) + continue; + + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V8QImode: + type = v8qi_ftype_v8qi_v8qi; + break; + case V4HImode: + type = v4hi_ftype_v4hi_v4hi; + break; + case V2SImode: + type = v2si_ftype_v2si_v2si; + break; + case DImode: + type = di_ftype_di_di; + break; + + default: + gcc_unreachable (); + } + + def_mbuiltin (d->mask, d->name, type, d->code); + } + + /* Add the remaining MMX insns with somewhat more complicated types. */ + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ); + + def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB); + def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT); +} + +static void +arm_init_tls_builtins (void) +{ + tree ftype, decl; + + ftype = build_function_type (ptr_type_node, void_list_node); + decl = add_builtin_function ("__builtin_thread_pointer", ftype, + ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD, + NULL, NULL_TREE); + TREE_NOTHROW (decl) = 1; + TREE_READONLY (decl) = 1; +} + +enum neon_builtin_type_bits { + T_V8QI = 0x0001, + T_V4HI = 0x0002, + T_V2SI = 0x0004, + T_V2SF = 0x0008, + T_DI = 0x0010, + T_V16QI = 0x0020, + T_V8HI = 0x0040, + T_V4SI = 0x0080, + T_V4SF = 0x0100, + T_V2DI = 0x0200, + T_TI = 0x0400, + T_EI = 0x0800, + T_OI = 0x1000 +}; + +#define v8qi_UP T_V8QI +#define v4hi_UP T_V4HI +#define v2si_UP T_V2SI +#define v2sf_UP T_V2SF +#define di_UP T_DI +#define v16qi_UP T_V16QI +#define v8hi_UP T_V8HI +#define v4si_UP T_V4SI +#define v4sf_UP T_V4SF +#define v2di_UP T_V2DI +#define ti_UP T_TI +#define ei_UP T_EI +#define oi_UP T_OI + +#define UP(X) X##_UP + +#define T_MAX 13 + +typedef enum { + NEON_BINOP, + NEON_TERNOP, + NEON_UNOP, + NEON_GETLANE, + NEON_SETLANE, + NEON_CREATE, + NEON_DUP, + NEON_DUPLANE, + NEON_COMBINE, + NEON_SPLIT, + NEON_LANEMUL, + NEON_LANEMULL, + NEON_LANEMULH, + NEON_LANEMAC, + NEON_SCALARMUL, + NEON_SCALARMULL, + NEON_SCALARMULH, + NEON_SCALARMAC, + NEON_CONVERT, + NEON_FIXCONV, + NEON_SELECT, + NEON_RESULTPAIR, + NEON_REINTERP, + NEON_VTBL, + NEON_VTBX, + NEON_LOAD1, + NEON_LOAD1LANE, + NEON_STORE1, + NEON_STORE1LANE, + NEON_LOADSTRUCT, + NEON_LOADSTRUCTLANE, + NEON_STORESTRUCT, + NEON_STORESTRUCTLANE, + NEON_LOGICBINOP, + NEON_SHIFTINSERT, + NEON_SHIFTIMM, + NEON_SHIFTACC +} neon_itype; + +typedef struct { + const char *name; + const neon_itype itype; + const int bits; + const enum insn_code codes[T_MAX]; + const unsigned int num_vars; + unsigned int base_fcode; +} neon_builtin_datum; + +#define CF(N,X) CODE_FOR_neon_##N##X + +#define VAR1(T, N, A) \ + #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0 +#define VAR2(T, N, A, B) \ + #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0 +#define VAR3(T, N, A, B, C) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C), \ + { CF (N, A), CF (N, B), CF (N, C) }, 3, 0 +#define VAR4(T, N, A, B, C, D) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0 +#define VAR5(T, N, A, B, C, D, E) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0 +#define VAR6(T, N, A, B, C, D, E, F) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0 +#define VAR7(T, N, A, B, C, D, E, F, G) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ + CF (N, G) }, 7, 0 +#define VAR8(T, N, A, B, C, D, E, F, G, H) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ + | UP (H), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ + CF (N, G), CF (N, H) }, 8, 0 +#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ + | UP (H) | UP (I), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ + CF (N, G), CF (N, H), CF (N, I) }, 9, 0 +#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ + #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \ + | UP (H) | UP (I) | UP (J), \ + { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \ + CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0 + +/* The mode entries in the following table correspond to the "key" type of the + instruction variant, i.e. equivalent to that which would be specified after + the assembler mnemonic, which usually refers to the last vector operand. + (Signed/unsigned/polynomial types are not differentiated between though, and + are all mapped onto the same mode for a given element size.) The modes + listed per instruction should be the same as those defined for that + instruction's pattern in neon.md. + WARNING: Variants should be listed in the same increasing order as + neon_builtin_type_bits. */ + +static neon_builtin_datum neon_builtin_data[] = +{ + { VAR10 (BINOP, vadd, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) }, + { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) }, + { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) }, + { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) }, + { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) }, + { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) }, + { VAR2 (TERNOP, vqdmlal, v4hi, v2si) }, + { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) }, + { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) }, + { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) }, + { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) }, + { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) }, + { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) }, + { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) }, + { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) }, + { VAR2 (BINOP, vqdmull, v4hi, v2si) }, + { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) }, + { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) }, + { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) }, + { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) }, + { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR10 (BINOP, vsub, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) }, + { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) }, + { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) }, + { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR2 (BINOP, vcage, v2sf, v4sf) }, + { VAR2 (BINOP, vcagt, v2sf, v4sf) }, + { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) }, + { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) }, + { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) }, + { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) }, + { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) }, + { VAR2 (BINOP, vrecps, v2sf, v4sf) }, + { VAR2 (BINOP, vrsqrts, v2sf, v4sf) }, + { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) }, + { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + { VAR2 (UNOP, vcnt, v8qi, v16qi) }, + { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) }, + { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) }, + { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) }, + /* FIXME: vget_lane supports more variants than this! */ + { VAR10 (GETLANE, vget_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (SETLANE, vset_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) }, + { VAR10 (DUP, vdup_n, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (DUPLANE, vdup_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) }, + { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) }, + { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) }, + { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) }, + { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) }, + { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) }, + { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) }, + { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) }, + { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) }, + { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) }, + { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) }, + { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) }, + { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) }, + { VAR10 (BINOP, vext, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) }, + { VAR2 (UNOP, vrev16, v8qi, v16qi) }, + { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) }, + { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) }, + { VAR10 (SELECT, vbsl, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR1 (VTBL, vtbl1, v8qi) }, + { VAR1 (VTBL, vtbl2, v8qi) }, + { VAR1 (VTBL, vtbl3, v8qi) }, + { VAR1 (VTBL, vtbl4, v8qi) }, + { VAR1 (VTBX, vtbx1, v8qi) }, + { VAR1 (VTBX, vtbx2, v8qi) }, + { VAR1 (VTBX, vtbx3, v8qi) }, + { VAR1 (VTBX, vtbx4, v8qi) }, + { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) }, + { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) }, + { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) }, + { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) }, + { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) }, + { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) }, + { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (LOAD1, vld1, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (LOAD1LANE, vld1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (LOAD1, vld1_dup, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (STORE1, vst1, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (STORE1LANE, vst1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR9 (LOADSTRUCT, + vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, + { VAR7 (LOADSTRUCTLANE, vld2_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) }, + { VAR9 (STORESTRUCT, vst2, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, + { VAR7 (STORESTRUCTLANE, vst2_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR9 (LOADSTRUCT, + vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, + { VAR7 (LOADSTRUCTLANE, vld3_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) }, + { VAR9 (STORESTRUCT, vst3, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, + { VAR7 (STORESTRUCTLANE, vst3_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR9 (LOADSTRUCT, vld4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, + { VAR7 (LOADSTRUCTLANE, vld4_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) }, + { VAR9 (STORESTRUCT, vst4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) }, + { VAR7 (STORESTRUCTLANE, vst4_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) }, + { VAR10 (LOGICBINOP, vand, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (LOGICBINOP, vorr, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (BINOP, veor, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (LOGICBINOP, vbic, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }, + { VAR10 (LOGICBINOP, vorn, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) } +}; + +#undef CF +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 + +static void +arm_init_neon_builtins (void) +{ + unsigned int i, fcode = ARM_BUILTIN_NEON_BASE; + + tree neon_intQI_type_node; + tree neon_intHI_type_node; + tree neon_polyQI_type_node; + tree neon_polyHI_type_node; + tree neon_intSI_type_node; + tree neon_intDI_type_node; + tree neon_float_type_node; + + tree intQI_pointer_node; + tree intHI_pointer_node; + tree intSI_pointer_node; + tree intDI_pointer_node; + tree float_pointer_node; + + tree const_intQI_node; + tree const_intHI_node; + tree const_intSI_node; + tree const_intDI_node; + tree const_float_node; + + tree const_intQI_pointer_node; + tree const_intHI_pointer_node; + tree const_intSI_pointer_node; + tree const_intDI_pointer_node; + tree const_float_pointer_node; + + tree V8QI_type_node; + tree V4HI_type_node; + tree V2SI_type_node; + tree V2SF_type_node; + tree V16QI_type_node; + tree V8HI_type_node; + tree V4SI_type_node; + tree V4SF_type_node; + tree V2DI_type_node; + + tree intUQI_type_node; + tree intUHI_type_node; + tree intUSI_type_node; + tree intUDI_type_node; + + tree intEI_type_node; + tree intOI_type_node; + tree intCI_type_node; + tree intXI_type_node; + + tree V8QI_pointer_node; + tree V4HI_pointer_node; + tree V2SI_pointer_node; + tree V2SF_pointer_node; + tree V16QI_pointer_node; + tree V8HI_pointer_node; + tree V4SI_pointer_node; + tree V4SF_pointer_node; + tree V2DI_pointer_node; + + tree void_ftype_pv8qi_v8qi_v8qi; + tree void_ftype_pv4hi_v4hi_v4hi; + tree void_ftype_pv2si_v2si_v2si; + tree void_ftype_pv2sf_v2sf_v2sf; + tree void_ftype_pdi_di_di; + tree void_ftype_pv16qi_v16qi_v16qi; + tree void_ftype_pv8hi_v8hi_v8hi; + tree void_ftype_pv4si_v4si_v4si; + tree void_ftype_pv4sf_v4sf_v4sf; + tree void_ftype_pv2di_v2di_v2di; + + tree reinterp_ftype_dreg[5][5]; + tree reinterp_ftype_qreg[5][5]; + tree dreg_types[5], qreg_types[5]; + + /* Create distinguished type nodes for NEON vector element types, + and pointers to values of such types, so we can detect them later. */ + neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); + neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); + neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); + neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); + neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode)); + neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode)); + neon_float_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; + layout_type (neon_float_type_node); + + /* Define typedefs which exactly correspond to the modes we are basing vector + types on. If you change these names you'll need to change + the table used by arm_mangle_type too. */ + (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, + "__builtin_neon_qi"); + (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, + "__builtin_neon_hi"); + (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, + "__builtin_neon_si"); + (*lang_hooks.types.register_builtin_type) (neon_float_type_node, + "__builtin_neon_sf"); + (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, + "__builtin_neon_di"); + (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, + "__builtin_neon_poly8"); + (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, + "__builtin_neon_poly16"); + + intQI_pointer_node = build_pointer_type (neon_intQI_type_node); + intHI_pointer_node = build_pointer_type (neon_intHI_type_node); + intSI_pointer_node = build_pointer_type (neon_intSI_type_node); + intDI_pointer_node = build_pointer_type (neon_intDI_type_node); + float_pointer_node = build_pointer_type (neon_float_type_node); + + /* Next create constant-qualified versions of the above types. */ + const_intQI_node = build_qualified_type (neon_intQI_type_node, + TYPE_QUAL_CONST); + const_intHI_node = build_qualified_type (neon_intHI_type_node, + TYPE_QUAL_CONST); + const_intSI_node = build_qualified_type (neon_intSI_type_node, + TYPE_QUAL_CONST); + const_intDI_node = build_qualified_type (neon_intDI_type_node, + TYPE_QUAL_CONST); + const_float_node = build_qualified_type (neon_float_type_node, + TYPE_QUAL_CONST); + + const_intQI_pointer_node = build_pointer_type (const_intQI_node); + const_intHI_pointer_node = build_pointer_type (const_intHI_node); + const_intSI_pointer_node = build_pointer_type (const_intSI_node); + const_intDI_pointer_node = build_pointer_type (const_intDI_node); + const_float_pointer_node = build_pointer_type (const_float_node); + + /* Now create vector types based on our NEON element types. */ + /* 64-bit vectors. */ + V8QI_type_node = + build_vector_type_for_mode (neon_intQI_type_node, V8QImode); + V4HI_type_node = + build_vector_type_for_mode (neon_intHI_type_node, V4HImode); + V2SI_type_node = + build_vector_type_for_mode (neon_intSI_type_node, V2SImode); + V2SF_type_node = + build_vector_type_for_mode (neon_float_type_node, V2SFmode); + /* 128-bit vectors. */ + V16QI_type_node = + build_vector_type_for_mode (neon_intQI_type_node, V16QImode); + V8HI_type_node = + build_vector_type_for_mode (neon_intHI_type_node, V8HImode); + V4SI_type_node = + build_vector_type_for_mode (neon_intSI_type_node, V4SImode); + V4SF_type_node = + build_vector_type_for_mode (neon_float_type_node, V4SFmode); + V2DI_type_node = + build_vector_type_for_mode (neon_intDI_type_node, V2DImode); + + /* Unsigned integer types for various mode sizes. */ + intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); + intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); + intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); + intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); + + (*lang_hooks.types.register_builtin_type) (intUQI_type_node, + "__builtin_neon_uqi"); + (*lang_hooks.types.register_builtin_type) (intUHI_type_node, + "__builtin_neon_uhi"); + (*lang_hooks.types.register_builtin_type) (intUSI_type_node, + "__builtin_neon_usi"); + (*lang_hooks.types.register_builtin_type) (intUDI_type_node, + "__builtin_neon_udi"); + + /* Opaque integer types for structures of vectors. */ + intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode)); + intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode)); + intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode)); + intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode)); + + (*lang_hooks.types.register_builtin_type) (intTI_type_node, + "__builtin_neon_ti"); + (*lang_hooks.types.register_builtin_type) (intEI_type_node, + "__builtin_neon_ei"); + (*lang_hooks.types.register_builtin_type) (intOI_type_node, + "__builtin_neon_oi"); + (*lang_hooks.types.register_builtin_type) (intCI_type_node, + "__builtin_neon_ci"); + (*lang_hooks.types.register_builtin_type) (intXI_type_node, + "__builtin_neon_xi"); + + /* Pointers to vector types. */ + V8QI_pointer_node = build_pointer_type (V8QI_type_node); + V4HI_pointer_node = build_pointer_type (V4HI_type_node); + V2SI_pointer_node = build_pointer_type (V2SI_type_node); + V2SF_pointer_node = build_pointer_type (V2SF_type_node); + V16QI_pointer_node = build_pointer_type (V16QI_type_node); + V8HI_pointer_node = build_pointer_type (V8HI_type_node); + V4SI_pointer_node = build_pointer_type (V4SI_type_node); + V4SF_pointer_node = build_pointer_type (V4SF_type_node); + V2DI_pointer_node = build_pointer_type (V2DI_type_node); + + /* Operations which return results as pairs. */ + void_ftype_pv8qi_v8qi_v8qi = + build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, + V8QI_type_node, NULL); + void_ftype_pv4hi_v4hi_v4hi = + build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, + V4HI_type_node, NULL); + void_ftype_pv2si_v2si_v2si = + build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, + V2SI_type_node, NULL); + void_ftype_pv2sf_v2sf_v2sf = + build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, + V2SF_type_node, NULL); + void_ftype_pdi_di_di = + build_function_type_list (void_type_node, intDI_pointer_node, + neon_intDI_type_node, neon_intDI_type_node, NULL); + void_ftype_pv16qi_v16qi_v16qi = + build_function_type_list (void_type_node, V16QI_pointer_node, + V16QI_type_node, V16QI_type_node, NULL); + void_ftype_pv8hi_v8hi_v8hi = + build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, + V8HI_type_node, NULL); + void_ftype_pv4si_v4si_v4si = + build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, + V4SI_type_node, NULL); + void_ftype_pv4sf_v4sf_v4sf = + build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, + V4SF_type_node, NULL); + void_ftype_pv2di_v2di_v2di = + build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, + V2DI_type_node, NULL); + + dreg_types[0] = V8QI_type_node; + dreg_types[1] = V4HI_type_node; + dreg_types[2] = V2SI_type_node; + dreg_types[3] = V2SF_type_node; + dreg_types[4] = neon_intDI_type_node; + + qreg_types[0] = V16QI_type_node; + qreg_types[1] = V8HI_type_node; + qreg_types[2] = V4SI_type_node; + qreg_types[3] = V4SF_type_node; + qreg_types[4] = V2DI_type_node; + + for (i = 0; i < 5; i++) + { + int j; + for (j = 0; j < 5; j++) + { + reinterp_ftype_dreg[i][j] + = build_function_type_list (dreg_types[i], dreg_types[j], NULL); + reinterp_ftype_qreg[i][j] + = build_function_type_list (qreg_types[i], qreg_types[j], NULL); + } + } + + for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++) + { + neon_builtin_datum *d = &neon_builtin_data[i]; + unsigned int j, codeidx = 0; + + d->base_fcode = fcode; + + for (j = 0; j < T_MAX; j++) + { + const char* const modenames[] = { + "v8qi", "v4hi", "v2si", "v2sf", "di", + "v16qi", "v8hi", "v4si", "v4sf", "v2di" + }; + char namebuf[60]; + tree ftype = NULL; + enum insn_code icode; + int is_load = 0, is_store = 0; + + if ((d->bits & (1 << j)) == 0) + continue; + + icode = d->codes[codeidx++]; + + switch (d->itype) + { + case NEON_LOAD1: + case NEON_LOAD1LANE: + case NEON_LOADSTRUCT: + case NEON_LOADSTRUCTLANE: + is_load = 1; + /* Fall through. */ + case NEON_STORE1: + case NEON_STORE1LANE: + case NEON_STORESTRUCT: + case NEON_STORESTRUCTLANE: + if (!is_load) + is_store = 1; + /* Fall through. */ + case NEON_UNOP: + case NEON_BINOP: + case NEON_LOGICBINOP: + case NEON_SHIFTINSERT: + case NEON_TERNOP: + case NEON_GETLANE: + case NEON_SETLANE: + case NEON_CREATE: + case NEON_DUP: + case NEON_DUPLANE: + case NEON_SHIFTIMM: + case NEON_SHIFTACC: + case NEON_COMBINE: + case NEON_SPLIT: + case NEON_CONVERT: + case NEON_FIXCONV: + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: + case NEON_LANEMAC: + case NEON_SCALARMUL: + case NEON_SCALARMULL: + case NEON_SCALARMULH: + case NEON_SCALARMAC: + case NEON_SELECT: + case NEON_VTBL: + case NEON_VTBX: + { + int k; + tree return_type = void_type_node, args = void_list_node; + + /* Build a function type directly from the insn_data for this + builtin. The build_function_type() function takes care of + removing duplicates for us. */ + for (k = insn_data[icode].n_operands - 1; k >= 0; k--) + { + tree eltype; + + if (is_load && k == 1) + { + /* Neon load patterns always have the memory operand + (a SImode pointer) in the operand 1 position. We + want a const pointer to the element type in that + position. */ + gcc_assert (insn_data[icode].operand[k].mode == SImode); + + switch (1 << j) + { + case T_V8QI: + case T_V16QI: + eltype = const_intQI_pointer_node; + break; + + case T_V4HI: + case T_V8HI: + eltype = const_intHI_pointer_node; + break; + + case T_V2SI: + case T_V4SI: + eltype = const_intSI_pointer_node; + break; + + case T_V2SF: + case T_V4SF: + eltype = const_float_pointer_node; + break; + + case T_DI: + case T_V2DI: + eltype = const_intDI_pointer_node; + break; + + default: gcc_unreachable (); + } + } + else if (is_store && k == 0) + { + /* Similarly, Neon store patterns use operand 0 as + the memory location to store to (a SImode pointer). + Use a pointer to the element type of the store in + that position. */ + gcc_assert (insn_data[icode].operand[k].mode == SImode); + + switch (1 << j) + { + case T_V8QI: + case T_V16QI: + eltype = intQI_pointer_node; + break; + + case T_V4HI: + case T_V8HI: + eltype = intHI_pointer_node; + break; + + case T_V2SI: + case T_V4SI: + eltype = intSI_pointer_node; + break; + + case T_V2SF: + case T_V4SF: + eltype = float_pointer_node; + break; + + case T_DI: + case T_V2DI: + eltype = intDI_pointer_node; + break; + + default: gcc_unreachable (); + } + } + else + { + switch (insn_data[icode].operand[k].mode) + { + case VOIDmode: eltype = void_type_node; break; + /* Scalars. */ + case QImode: eltype = neon_intQI_type_node; break; + case HImode: eltype = neon_intHI_type_node; break; + case SImode: eltype = neon_intSI_type_node; break; + case SFmode: eltype = neon_float_type_node; break; + case DImode: eltype = neon_intDI_type_node; break; + case TImode: eltype = intTI_type_node; break; + case EImode: eltype = intEI_type_node; break; + case OImode: eltype = intOI_type_node; break; + case CImode: eltype = intCI_type_node; break; + case XImode: eltype = intXI_type_node; break; + /* 64-bit vectors. */ + case V8QImode: eltype = V8QI_type_node; break; + case V4HImode: eltype = V4HI_type_node; break; + case V2SImode: eltype = V2SI_type_node; break; + case V2SFmode: eltype = V2SF_type_node; break; + /* 128-bit vectors. */ + case V16QImode: eltype = V16QI_type_node; break; + case V8HImode: eltype = V8HI_type_node; break; + case V4SImode: eltype = V4SI_type_node; break; + case V4SFmode: eltype = V4SF_type_node; break; + case V2DImode: eltype = V2DI_type_node; break; + default: gcc_unreachable (); + } + } + + if (k == 0 && !is_store) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } + + ftype = build_function_type (return_type, args); + } + break; + + case NEON_RESULTPAIR: + { + switch (insn_data[icode].operand[1].mode) + { + case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; + case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; + case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; + case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; + case DImode: ftype = void_ftype_pdi_di_di; break; + case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; + case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; + case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; + case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; + case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; + default: gcc_unreachable (); + } + } + break; + + case NEON_REINTERP: + { + /* We iterate over 5 doubleword types, then 5 quadword + types. */ + int rhs = j % 5; + switch (insn_data[icode].operand[0].mode) + { + case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; + case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break; + case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break; + case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break; + case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break; + case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break; + case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break; + case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; + case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; + case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; + default: gcc_unreachable (); + } + } + break; + + default: + gcc_unreachable (); + } + + gcc_assert (ftype != NULL); + + sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]); + + add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL, + NULL_TREE); + } + } +} + +static void +arm_init_fp16_builtins (void) +{ + tree fp16_type = make_node (REAL_TYPE); + TYPE_PRECISION (fp16_type) = 16; + layout_type (fp16_type); + (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); +} + +static void +arm_init_builtins (void) +{ + arm_init_tls_builtins (); + + if (TARGET_REALLY_IWMMXT) + arm_init_iwmmxt_builtins (); + + if (TARGET_NEON) + arm_init_neon_builtins (); + + if (arm_fp16_format) + arm_init_fp16_builtins (); +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_parameter_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("function parameters cannot have __fp16 type"); + return NULL; +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_return_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("functions cannot return __fp16 type"); + return NULL; +} + +/* Implement TARGET_PROMOTED_TYPE. */ + +static tree +arm_promoted_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return float_type_node; + return NULL_TREE; +} + +/* Implement TARGET_CONVERT_TO_TYPE. + Specifically, this hook implements the peculiarity of the ARM + half-precision floating-point C semantics that requires conversions between + __fp16 to or from double to do an intermediate conversion to float. */ + +static tree +arm_convert_to_type (tree type, tree expr) +{ + tree fromtype = TREE_TYPE (expr); + if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) + return NULL_TREE; + if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) + || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) + return convert (type, convert (float_type_node, expr)); + return NULL_TREE; +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. + This simply adds HFmode as a supported mode; even though we don't + implement arithmetic on this type directly, it's supported by + optabs conversions, much the way the double-word arithmetic is + special-cased in the default hook. */ + +static bool +arm_scalar_mode_supported_p (enum machine_mode mode) +{ + if (mode == HFmode) + return (arm_fp16_format != ARM_FP16_FORMAT_NONE); + else + return default_scalar_mode_supported_p (mode); +} + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ + +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x != const0_rtx) + return x; + x = gen_reg_rtx (mode); + + emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x + : gen_rtx_SUBREG (DImode, x, 0))); + return x; +} + +/* Subroutine of arm_expand_builtin to take care of binop insns. */ + +static rtx +arm_expand_binop_builtin (enum insn_code icode, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of arm_expand_builtin to take care of unop insns. */ + +static rtx +arm_expand_unop_builtin (enum insn_code icode, + tree exp, rtx target, int do_load) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + if (do_load) + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + else + { + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + } + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +static int +neon_builtin_compare (const void *a, const void *b) +{ + const neon_builtin_datum *const key = (const neon_builtin_datum *) a; + const neon_builtin_datum *const memb = (const neon_builtin_datum *) b; + unsigned int soughtcode = key->base_fcode; + + if (soughtcode >= memb->base_fcode + && soughtcode < memb->base_fcode + memb->num_vars) + return 0; + else if (soughtcode < memb->base_fcode) + return -1; + else + return 1; +} + +static enum insn_code +locate_neon_builtin_icode (int fcode, neon_itype *itype) +{ + neon_builtin_datum key + = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 }; + neon_builtin_datum *found; + int idx; + + key.base_fcode = fcode; + found = (neon_builtin_datum *) + bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data), + sizeof (neon_builtin_data[0]), neon_builtin_compare); + gcc_assert (found); + idx = fcode - (int) found->base_fcode; + gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars); + + if (itype) + *itype = found->itype; + + return found->codes[idx]; +} + +typedef enum { + NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, + NEON_ARG_STOP +} builtin_arg; + +#define NEON_MAX_BUILTIN_ARGS 5 + +/* Expand a Neon builtin. */ +static rtx +arm_expand_neon_args (rtx target, int icode, int have_retval, + tree exp, ...) +{ + va_list ap; + rtx pat; + tree arg[NEON_MAX_BUILTIN_ARGS]; + rtx op[NEON_MAX_BUILTIN_ARGS]; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; + int argc = 0; + + if (have_retval + && (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode))) + target = gen_reg_rtx (tmode); + + va_start (ap, exp); + + for (;;) + { + builtin_arg thisarg = (builtin_arg) va_arg (ap, int); + + if (thisarg == NEON_ARG_STOP) + break; + else + { + arg[argc] = CALL_EXPR_ARG (exp, argc); + op[argc] = expand_normal (arg[argc]); + mode[argc] = insn_data[icode].operand[argc + have_retval].mode; + + switch (thisarg) + { + case NEON_ARG_COPY_TO_REG: + /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ + if (!(*insn_data[icode].operand[argc + have_retval].predicate) + (op[argc], mode[argc])) + op[argc] = copy_to_mode_reg (mode[argc], op[argc]); + break; + + case NEON_ARG_CONSTANT: + /* FIXME: This error message is somewhat unhelpful. */ + if (!(*insn_data[icode].operand[argc + have_retval].predicate) + (op[argc], mode[argc])) + error ("argument must be a constant"); + break; + + case NEON_ARG_STOP: + gcc_unreachable (); + } + + argc++; + } + } + + va_end (ap); + + if (have_retval) + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (target, op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + else + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + + if (!pat) + return 0; + + emit_insn (pat); + + return target; +} + +/* Expand a Neon builtin. These are "special" because they don't have symbolic + constants defined per-instruction or per instruction-variant. Instead, the + required info is looked up in the table neon_builtin_data. */ +static rtx +arm_expand_neon_builtin (int fcode, tree exp, rtx target) +{ + neon_itype itype; + enum insn_code icode = locate_neon_builtin_icode (fcode, &itype); + + switch (itype) + { + case NEON_UNOP: + case NEON_CONVERT: + case NEON_DUPLANE: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_BINOP: + case NEON_SETLANE: + case NEON_SCALARMUL: + case NEON_SCALARMULL: + case NEON_SCALARMULH: + case NEON_SHIFTINSERT: + case NEON_LOGICBINOP: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_TERNOP: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_GETLANE: + case NEON_FIXCONV: + case NEON_SHIFTIMM: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_CREATE: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_DUP: + case NEON_SPLIT: + case NEON_REINTERP: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_COMBINE: + case NEON_VTBL: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_RESULTPAIR: + return arm_expand_neon_args (target, icode, 0, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_LANEMAC: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SHIFTACC: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SCALARMAC: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SELECT: + case NEON_VTBX: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LOAD1: + case NEON_LOADSTRUCT: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_LOAD1LANE: + case NEON_LOADSTRUCTLANE: + return arm_expand_neon_args (target, icode, 1, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_STORE1: + case NEON_STORESTRUCT: + return arm_expand_neon_args (target, icode, 0, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_STORE1LANE: + case NEON_STORESTRUCTLANE: + return arm_expand_neon_args (target, icode, 0, exp, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + } + + gcc_unreachable (); +} + +/* Emit code to reinterpret one Neon type as another, without altering bits. */ +void +neon_reinterpret (rtx dest, rtx src) +{ + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); +} + +/* Emit code to place a Neon pair result in memory locations (with equal + registers). */ +void +neon_emit_pair_result_insn (enum machine_mode mode, + rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, + rtx op1, rtx op2) +{ + rtx mem = gen_rtx_MEM (mode, destaddr); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + + emit_insn (intfn (tmp1, op1, op2, tmp2)); + + emit_move_insn (mem, tmp1); + mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); + emit_move_insn (mem, tmp2); +} + +/* Set up OPERANDS for a register copy from SRC to DEST, taking care + not to early-clobber SRC registers in the process. + + We assume that the operands described by SRC and DEST represent a + decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the + number of components into which the copy has been decomposed. */ +void +neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count) +{ + unsigned int i; + + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) < REGNO (operands[1])) + { + for (i = 0; i < count; i++) + { + operands[2 * i] = dest[i]; + operands[2 * i + 1] = src[i]; + } + } + else + { + for (i = 0; i < count; i++) + { + operands[2 * i] = dest[count - i - 1]; + operands[2 * i + 1] = src[count - i - 1]; + } + } +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +arm_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + const struct builtin_description * d; + enum insn_code icode; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0; + tree arg1; + tree arg2; + rtx op0; + rtx op1; + rtx op2; + rtx pat; + int fcode = DECL_FUNCTION_CODE (fndecl); + size_t i; + enum machine_mode tmode; + enum machine_mode mode0; + enum machine_mode mode1; + enum machine_mode mode2; + + if (fcode >= ARM_BUILTIN_NEON_BASE) + return arm_expand_neon_builtin (fcode, exp, target); + + switch (fcode) + { + case ARM_BUILTIN_TEXTRMSB: + case ARM_BUILTIN_TEXTRMUB: + case ARM_BUILTIN_TEXTRMSH: + case ARM_BUILTIN_TEXTRMUH: + case ARM_BUILTIN_TEXTRMSW: + case ARM_BUILTIN_TEXTRMUW: + icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb + : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub + : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh + : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh + : CODE_FOR_iwmmxt_textrmw); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + { + /* @@@ better error message */ + error ("selector must be an immediate"); + return gen_reg_rtx (tmode); + } + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_TINSRB: + case ARM_BUILTIN_TINSRH: + case ARM_BUILTIN_TINSRW: + icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb + : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh + : CODE_FOR_iwmmxt_tinsrw); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + { + /* @@@ better error message */ + error ("selector must be an immediate"); + return const0_rtx; + } + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_SETWCX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = force_reg (SImode, expand_normal (arg0)); + op1 = expand_normal (arg1); + emit_insn (gen_iwmmxt_tmcr (op1, op0)); + return 0; + + case ARM_BUILTIN_GETWCX: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + target = gen_reg_rtx (SImode); + emit_insn (gen_iwmmxt_tmrc (target, op0)); + return target; + + case ARM_BUILTIN_WSHUFH: + icode = CODE_FOR_iwmmxt_wshufh; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + /* @@@ better error message */ + error ("mask must be an immediate"); + return const0_rtx; + } + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_WSADB: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target); + case ARM_BUILTIN_WSADH: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target); + case ARM_BUILTIN_WSADBZ: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target); + case ARM_BUILTIN_WSADHZ: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target); + + /* Several three-argument builtins. */ + case ARM_BUILTIN_WMACS: + case ARM_BUILTIN_WMACU: + case ARM_BUILTIN_WALIGN: + case ARM_BUILTIN_TMIA: + case ARM_BUILTIN_TMIAPH: + case ARM_BUILTIN_TMIATT: + case ARM_BUILTIN_TMIATB: + case ARM_BUILTIN_TMIABT: + case ARM_BUILTIN_TMIABB: + icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs + : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu + : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia + : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph + : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb + : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt + : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb + : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt + : CODE_FOR_iwmmxt_walign); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_WZERO: + target = gen_reg_rtx (DImode); + emit_insn (gen_iwmmxt_clrdi (target)); + return target; + + case ARM_BUILTIN_THREAD_POINTER: + return arm_load_tp (target); + + default: + break; + } + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == (const enum arm_builtins) fcode) + return arm_expand_binop_builtin (d->icode, exp, target); + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == (const enum arm_builtins) fcode) + return arm_expand_unop_builtin (d->icode, exp, target, 0); + + /* @@@ Should really do something sensible here. */ + return NULL_RTX; +} + +/* Return the number (counting from 0) of + the least significant set bit in MASK. */ + +inline static int +number_of_first_bit_set (unsigned mask) +{ + int bit; + + for (bit = 0; + (mask & (1 << bit)) == 0; + ++bit) + continue; + + return bit; +} + +/* Emit code to push or pop registers to or from the stack. F is the + assembly file. MASK is the registers to push or pop. PUSH is + nonzero if we should push, and zero if we should pop. For debugging + output, if pushing, adjust CFA_OFFSET by the amount of space added + to the stack. REAL_REGS should have the same number of bits set as + MASK, and will be used instead (in the same order) to describe which + registers were saved - this is used to mark the save slots when we + push high registers after moving them to low registers. */ +static void +thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset, + unsigned long real_regs) +{ + int regno; + int lo_mask = mask & 0xFF; + int pushed_words = 0; + + gcc_assert (mask); + + if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM))) + { + /* Special case. Do not generate a POP PC statement here, do it in + thumb_exit() */ + thumb_exit (f, -1); + return; + } + + if (push && arm_except_unwind_info (&global_options) == UI_TARGET) + { + fprintf (f, "\t.save\t{"); + for (regno = 0; regno < 15; regno++) + { + if (real_regs & (1 << regno)) + { + if (real_regs & ((1 << regno) -1)) + fprintf (f, ", "); + asm_fprintf (f, "%r", regno); + } + } + fprintf (f, "}\n"); + } + + fprintf (f, "\t%s\t{", push ? "push" : "pop"); + + /* Look at the low registers first. */ + for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1) + { + if (lo_mask & 1) + { + asm_fprintf (f, "%r", regno); + + if ((lo_mask & ~1) != 0) + fprintf (f, ", "); + + pushed_words++; + } + } + + if (push && (mask & (1 << LR_REGNUM))) + { + /* Catch pushing the LR. */ + if (mask & 0xFF) + fprintf (f, ", "); + + asm_fprintf (f, "%r", LR_REGNUM); + + pushed_words++; + } + else if (!push && (mask & (1 << PC_REGNUM))) + { + /* Catch popping the PC. */ + if (TARGET_INTERWORK || TARGET_BACKTRACE + || crtl->calls_eh_return) + { + /* The PC is never poped directly, instead + it is popped into r3 and then BX is used. */ + fprintf (f, "}\n"); + + thumb_exit (f, -1); + + return; + } + else + { + if (mask & 0xFF) + fprintf (f, ", "); + + asm_fprintf (f, "%r", PC_REGNUM); + } + } + + fprintf (f, "}\n"); + + if (push && pushed_words && dwarf2out_do_frame ()) + { + char *l = dwarf2out_cfi_label (false); + int pushed_mask = real_regs; + + *cfa_offset += pushed_words * 4; + dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset); + + pushed_words = 0; + pushed_mask = real_regs; + for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1) + { + if (pushed_mask & 1) + dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset); + } + } +} + +/* Generate code to return from a thumb function. + If 'reg_containing_return_addr' is -1, then the return address is + actually on the stack, at the stack pointer. */ +static void +thumb_exit (FILE *f, int reg_containing_return_addr) +{ + unsigned regs_available_for_popping; + unsigned regs_to_pop; + int pops_needed; + unsigned available; + unsigned required; + int mode; + int size; + int restore_a4 = FALSE; + + /* Compute the registers we need to pop. */ + regs_to_pop = 0; + pops_needed = 0; + + if (reg_containing_return_addr == -1) + { + regs_to_pop |= 1 << LR_REGNUM; + ++pops_needed; + } + + if (TARGET_BACKTRACE) + { + /* Restore the (ARM) frame pointer and stack pointer. */ + regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM); + pops_needed += 2; + } + + /* If there is nothing to pop then just emit the BX instruction and + return. */ + if (pops_needed == 0) + { + if (crtl->calls_eh_return) + asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); + + asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); + return; + } + /* Otherwise if we are not supporting interworking and we have not created + a backtrace structure and the function was not entered in ARM mode then + just pop the return address straight into the PC. */ + else if (!TARGET_INTERWORK + && !TARGET_BACKTRACE + && !is_called_in_ARM_mode (current_function_decl) + && !crtl->calls_eh_return) + { + asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM); + return; + } + + /* Find out how many of the (return) argument registers we can corrupt. */ + regs_available_for_popping = 0; + + /* If returning via __builtin_eh_return, the bottom three registers + all contain information needed for the return. */ + if (crtl->calls_eh_return) + size = 12; + else + { + /* If we can deduce the registers used from the function's + return value. This is more reliable that examining + df_regs_ever_live_p () because that will be set if the register is + ever used in the function, not just if the register is used + to hold a return value. */ + + if (crtl->return_rtx != 0) + mode = GET_MODE (crtl->return_rtx); + else + mode = DECL_MODE (DECL_RESULT (current_function_decl)); + + size = GET_MODE_SIZE (mode); + + if (size == 0) + { + /* In a void function we can use any argument register. + In a function that returns a structure on the stack + we can use the second and third argument registers. */ + if (mode == VOIDmode) + regs_available_for_popping = + (1 << ARG_REGISTER (1)) + | (1 << ARG_REGISTER (2)) + | (1 << ARG_REGISTER (3)); + else + regs_available_for_popping = + (1 << ARG_REGISTER (2)) + | (1 << ARG_REGISTER (3)); + } + else if (size <= 4) + regs_available_for_popping = + (1 << ARG_REGISTER (2)) + | (1 << ARG_REGISTER (3)); + else if (size <= 8) + regs_available_for_popping = + (1 << ARG_REGISTER (3)); + } + + /* Match registers to be popped with registers into which we pop them. */ + for (available = regs_available_for_popping, + required = regs_to_pop; + required != 0 && available != 0; + available &= ~(available & - available), + required &= ~(required & - required)) + -- pops_needed; + + /* If we have any popping registers left over, remove them. */ + if (available > 0) + regs_available_for_popping &= ~available; + + /* Otherwise if we need another popping register we can use + the fourth argument register. */ + else if (pops_needed) + { + /* If we have not found any free argument registers and + reg a4 contains the return address, we must move it. */ + if (regs_available_for_popping == 0 + && reg_containing_return_addr == LAST_ARG_REGNUM) + { + asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM); + reg_containing_return_addr = LR_REGNUM; + } + else if (size > 12) + { + /* Register a4 is being used to hold part of the return value, + but we have dire need of a free, low register. */ + restore_a4 = TRUE; + + asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM); + } + + if (reg_containing_return_addr != LAST_ARG_REGNUM) + { + /* The fourth argument register is available. */ + regs_available_for_popping |= 1 << LAST_ARG_REGNUM; + + --pops_needed; + } + } + + /* Pop as many registers as we can. */ + thumb_pushpop (f, regs_available_for_popping, FALSE, NULL, + regs_available_for_popping); + + /* Process the registers we popped. */ + if (reg_containing_return_addr == -1) + { + /* The return address was popped into the lowest numbered register. */ + regs_to_pop &= ~(1 << LR_REGNUM); + + reg_containing_return_addr = + number_of_first_bit_set (regs_available_for_popping); + + /* Remove this register for the mask of available registers, so that + the return address will not be corrupted by further pops. */ + regs_available_for_popping &= ~(1 << reg_containing_return_addr); + } + + /* If we popped other registers then handle them here. */ + if (regs_available_for_popping) + { + int frame_pointer; + + /* Work out which register currently contains the frame pointer. */ + frame_pointer = number_of_first_bit_set (regs_available_for_popping); + + /* Move it into the correct place. */ + asm_fprintf (f, "\tmov\t%r, %r\n", + ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer); + + /* (Temporarily) remove it from the mask of popped registers. */ + regs_available_for_popping &= ~(1 << frame_pointer); + regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM); + + if (regs_available_for_popping) + { + int stack_pointer; + + /* We popped the stack pointer as well, + find the register that contains it. */ + stack_pointer = number_of_first_bit_set (regs_available_for_popping); + + /* Move it into the stack register. */ + asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer); + + /* At this point we have popped all necessary registers, so + do not worry about restoring regs_available_for_popping + to its correct value: + + assert (pops_needed == 0) + assert (regs_available_for_popping == (1 << frame_pointer)) + assert (regs_to_pop == (1 << STACK_POINTER)) */ + } + else + { + /* Since we have just move the popped value into the frame + pointer, the popping register is available for reuse, and + we know that we still have the stack pointer left to pop. */ + regs_available_for_popping |= (1 << frame_pointer); + } + } + + /* If we still have registers left on the stack, but we no longer have + any registers into which we can pop them, then we must move the return + address into the link register and make available the register that + contained it. */ + if (regs_available_for_popping == 0 && pops_needed > 0) + { + regs_available_for_popping |= 1 << reg_containing_return_addr; + + asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, + reg_containing_return_addr); + + reg_containing_return_addr = LR_REGNUM; + } + + /* If we have registers left on the stack then pop some more. + We know that at most we will want to pop FP and SP. */ + if (pops_needed > 0) + { + int popped_into; + int move_to; + + thumb_pushpop (f, regs_available_for_popping, FALSE, NULL, + regs_available_for_popping); + + /* We have popped either FP or SP. + Move whichever one it is into the correct register. */ + popped_into = number_of_first_bit_set (regs_available_for_popping); + move_to = number_of_first_bit_set (regs_to_pop); + + asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into); + + regs_to_pop &= ~(1 << move_to); + + --pops_needed; + } + + /* If we still have not popped everything then we must have only + had one register available to us and we are now popping the SP. */ + if (pops_needed > 0) + { + int popped_into; + + thumb_pushpop (f, regs_available_for_popping, FALSE, NULL, + regs_available_for_popping); + + popped_into = number_of_first_bit_set (regs_available_for_popping); + + asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into); + /* + assert (regs_to_pop == (1 << STACK_POINTER)) + assert (pops_needed == 1) + */ + } + + /* If necessary restore the a4 register. */ + if (restore_a4) + { + if (reg_containing_return_addr != LR_REGNUM) + { + asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM); + reg_containing_return_addr = LR_REGNUM; + } + + asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM); + } + + if (crtl->calls_eh_return) + asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); + + /* Return to caller. */ + asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); +} + +/* Scan INSN just before assembler is output for it. + For Thumb-1, we track the status of the condition codes; this + information is used in the cbranchsi4_insn pattern. */ +void +thumb1_final_prescan_insn (rtx insn) +{ + if (flag_print_asm_name) + asm_fprintf (asm_out_file, "%@ 0x%04x\n", + INSN_ADDRESSES (INSN_UID (insn))); + /* Don't overwrite the previous setter when we get to a cbranch. */ + if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) + { + enum attr_conds conds; + + if (cfun->machine->thumb1_cc_insn) + { + if (modified_in_p (cfun->machine->thumb1_cc_op0, insn) + || modified_in_p (cfun->machine->thumb1_cc_op1, insn)) + CC_STATUS_INIT; + } + conds = get_attr_conds (insn); + if (conds == CONDS_SET) + { + rtx set = single_set (insn); + cfun->machine->thumb1_cc_insn = insn; + cfun->machine->thumb1_cc_op0 = SET_DEST (set); + cfun->machine->thumb1_cc_op1 = const0_rtx; + cfun->machine->thumb1_cc_mode = CC_NOOVmode; + if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn) + { + rtx src1 = XEXP (SET_SRC (set), 1); + if (src1 == const0_rtx) + cfun->machine->thumb1_cc_mode = CCmode; + } + } + else if (conds != CONDS_NOCOND) + cfun->machine->thumb1_cc_insn = NULL_RTX; + } +} + +int +thumb_shiftable_const (unsigned HOST_WIDE_INT val) +{ + unsigned HOST_WIDE_INT mask = 0xff; + int i; + + val = val & (unsigned HOST_WIDE_INT)0xffffffffu; + if (val == 0) /* XXX */ + return 0; + + for (i = 0; i < 25; i++) + if ((val & (mask << i)) == val) + return 1; + + return 0; +} + +/* Returns nonzero if the current function contains, + or might contain a far jump. */ +static int +thumb_far_jump_used_p (void) +{ + rtx insn; + + /* This test is only important for leaf functions. */ + /* assert (!leaf_function_p ()); */ + + /* If we have already decided that far jumps may be used, + do not bother checking again, and always return true even if + it turns out that they are not being used. Once we have made + the decision that far jumps are present (and that hence the link + register will be pushed onto the stack) we cannot go back on it. */ + if (cfun->machine->far_jump_used) + return 1; + + /* If this function is not being called from the prologue/epilogue + generation code then it must be being called from the + INITIAL_ELIMINATION_OFFSET macro. */ + if (!(ARM_DOUBLEWORD_ALIGN || reload_completed)) + { + /* In this case we know that we are being asked about the elimination + of the arg pointer register. If that register is not being used, + then there are no arguments on the stack, and we do not have to + worry that a far jump might force the prologue to push the link + register, changing the stack offsets. In this case we can just + return false, since the presence of far jumps in the function will + not affect stack offsets. + + If the arg pointer is live (or if it was live, but has now been + eliminated and so set to dead) then we do have to test to see if + the function might contain a far jump. This test can lead to some + false negatives, since before reload is completed, then length of + branch instructions is not known, so gcc defaults to returning their + longest length, which in turn sets the far jump attribute to true. + + A false negative will not result in bad code being generated, but it + will result in a needless push and pop of the link register. We + hope that this does not occur too often. + + If we need doubleword stack alignment this could affect the other + elimination offsets so we can't risk getting it wrong. */ + if (df_regs_ever_live_p (ARG_POINTER_REGNUM)) + cfun->machine->arg_pointer_live = 1; + else if (!cfun->machine->arg_pointer_live) + return 0; + } + + /* Check to see if the function contains a branch + insn with the far jump attribute set. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == JUMP_INSN + /* Ignore tablejump patterns. */ + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC + && get_attr_far_jump (insn) == FAR_JUMP_YES + ) + { + /* Record the fact that we have decided that + the function does use far jumps. */ + cfun->machine->far_jump_used = 1; + return 1; + } + } + + return 0; +} + +/* Return nonzero if FUNC must be entered in ARM mode. */ +int +is_called_in_ARM_mode (tree func) +{ + gcc_assert (TREE_CODE (func) == FUNCTION_DECL); + + /* Ignore the problem about functions whose address is taken. */ + if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func)) + return TRUE; + +#ifdef ARM_PE + return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE; +#else + return FALSE; +#endif +} + +/* Given the stack offsets and register mask in OFFSETS, decide how + many additional registers to push instead of subtracting a constant + from SP. For epilogues the principle is the same except we use pop. + FOR_PROLOGUE indicates which we're generating. */ +static int +thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue) +{ + HOST_WIDE_INT amount; + unsigned long live_regs_mask = offsets->saved_regs_mask; + /* Extract a mask of the ones we can give to the Thumb's push/pop + instruction. */ + unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff); + /* Then count how many other high registers will need to be pushed. */ + unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + int n_free, reg_base, size; + + if (!for_prologue && frame_pointer_needed) + amount = offsets->locals_base - offsets->saved_regs; + else + amount = offsets->outgoing_args - offsets->saved_regs; + + /* If the stack frame size is 512 exactly, we can save one load + instruction, which should make this a win even when optimizing + for speed. */ + if (!optimize_size && amount != 512) + return 0; + + /* Can't do this if there are high registers to push. */ + if (high_regs_pushed != 0) + return 0; + + /* Shouldn't do it in the prologue if no registers would normally + be pushed at all. In the epilogue, also allow it if we'll have + a pop insn for the PC. */ + if (l_mask == 0 + && (for_prologue + || TARGET_BACKTRACE + || (live_regs_mask & 1 << LR_REGNUM) == 0 + || TARGET_INTERWORK + || crtl->args.pretend_args_size != 0)) + return 0; + + /* Don't do this if thumb_expand_prologue wants to emit instructions + between the push and the stack frame allocation. */ + if (for_prologue + && ((flag_pic && arm_pic_register != INVALID_REGNUM) + || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))) + return 0; + + reg_base = 0; + n_free = 0; + if (!for_prologue) + { + size = arm_size_return_regs (); + reg_base = ARM_NUM_INTS (size); + live_regs_mask >>= reg_base; + } + + while (reg_base + n_free < 8 && !(live_regs_mask & 1) + && (for_prologue || call_used_regs[reg_base + n_free])) + { + live_regs_mask >>= 1; + n_free++; + } + + if (n_free == 0) + return 0; + gcc_assert (amount / 4 * 4 == amount); + + if (amount >= 512 && (amount - n_free * 4) < 512) + return (amount - 508) / 4; + if (amount <= n_free * 4) + return amount / 4; + return 0; +} + +/* The bits which aren't usefully expanded as rtl. */ +const char * +thumb_unexpanded_epilogue (void) +{ + arm_stack_offsets *offsets; + int regno; + unsigned long live_regs_mask = 0; + int high_regs_pushed = 0; + int extra_pop; + int had_to_push_lr; + int size; + + if (cfun->machine->return_used_this_function != 0) + return ""; + + if (IS_NAKED (arm_current_func_type ())) + return ""; + + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + + /* If we can deduce the registers used from the function's return value. + This is more reliable that examining df_regs_ever_live_p () because that + will be set if the register is ever used in the function, not just if + the register is used to hold a return value. */ + size = arm_size_return_regs (); + + extra_pop = thumb1_extra_regs_pushed (offsets, false); + if (extra_pop > 0) + { + unsigned long extra_mask = (1 << extra_pop) - 1; + live_regs_mask |= extra_mask << ARM_NUM_INTS (size); + } + + /* The prolog may have pushed some high registers to use as + work registers. e.g. the testsuite file: + gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c + compiles to produce: + push {r4, r5, r6, r7, lr} + mov r7, r9 + mov r6, r8 + push {r6, r7} + as part of the prolog. We have to undo that pushing here. */ + + if (high_regs_pushed) + { + unsigned long mask = live_regs_mask & 0xff; + int next_hi_reg; + + /* The available low registers depend on the size of the value we are + returning. */ + if (size <= 12) + mask |= 1 << 3; + if (size <= 8) + mask |= 1 << 2; + + if (mask == 0) + /* Oh dear! We have no low registers into which we can pop + high registers! */ + internal_error + ("no low registers available for popping high registers"); + + for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++) + if (live_regs_mask & (1 << next_hi_reg)) + break; + + while (high_regs_pushed) + { + /* Find lo register(s) into which the high register(s) can + be popped. */ + for (regno = 0; regno <= LAST_LO_REGNUM; regno++) + { + if (mask & (1 << regno)) + high_regs_pushed--; + if (high_regs_pushed == 0) + break; + } + + mask &= (2 << regno) - 1; /* A noop if regno == 8 */ + + /* Pop the values into the low register(s). */ + thumb_pushpop (asm_out_file, mask, 0, NULL, mask); + + /* Move the value(s) into the high registers. */ + for (regno = 0; regno <= LAST_LO_REGNUM; regno++) + { + if (mask & (1 << regno)) + { + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg, + regno); + + for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++) + if (live_regs_mask & (1 << next_hi_reg)) + break; + } + } + } + live_regs_mask &= ~0x0f00; + } + + had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0; + live_regs_mask &= 0xff; + + if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE) + { + /* Pop the return address into the PC. */ + if (had_to_push_lr) + live_regs_mask |= 1 << PC_REGNUM; + + /* Either no argument registers were pushed or a backtrace + structure was created which includes an adjusted stack + pointer, so just pop everything. */ + if (live_regs_mask) + thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, + live_regs_mask); + + /* We have either just popped the return address into the + PC or it is was kept in LR for the entire function. + Note that thumb_pushpop has already called thumb_exit if the + PC was in the list. */ + if (!had_to_push_lr) + thumb_exit (asm_out_file, LR_REGNUM); + } + else + { + /* Pop everything but the return address. */ + if (live_regs_mask) + thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, + live_regs_mask); + + if (had_to_push_lr) + { + if (size > 12) + { + /* We have no free low regs, so save one. */ + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM, + LAST_ARG_REGNUM); + } + + /* Get the return address into a temporary register. */ + thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL, + 1 << LAST_ARG_REGNUM); + + if (size > 12) + { + /* Move the return address to lr. */ + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM, + LAST_ARG_REGNUM); + /* Restore the low register. */ + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, + IP_REGNUM); + regno = LR_REGNUM; + } + else + regno = LAST_ARG_REGNUM; + } + else + regno = LR_REGNUM; + + /* Remove the argument registers that were pushed onto the stack. */ + asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n", + SP_REGNUM, SP_REGNUM, + crtl->args.pretend_args_size); + + thumb_exit (asm_out_file, regno); + } + + return ""; +} + +/* Functions to save and restore machine-specific function data. */ +static struct machine_function * +arm_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_alloc_cleared_machine_function (); + +#if ARM_FT_UNKNOWN != 0 + machine->func_type = ARM_FT_UNKNOWN; +#endif + return machine; +} + +/* Return an RTX indicating where the return address to the + calling function can be found. */ +rtx +arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return NULL_RTX; + + return get_hard_reg_initial_val (Pmode, LR_REGNUM); +} + +/* Do anything needed before RTL is emitted for each function. */ +void +arm_init_expanders (void) +{ + /* Arrange to initialize and mark the machine per-function status. */ + init_machine_status = arm_init_machine_status; + + /* This is to stop the combine pass optimizing away the alignment + adjustment of va_arg. */ + /* ??? It is claimed that this should not be necessary. */ + if (cfun) + mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY); +} + + +/* Like arm_compute_initial_elimination offset. Simpler because there + isn't an ABI specified frame pointer for Thumb. Instead, we set it + to point at the base of the local variables after static stack + space for a function has been allocated. */ + +HOST_WIDE_INT +thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + + switch (from) + { + case ARG_POINTER_REGNUM: + switch (to) + { + case STACK_POINTER_REGNUM: + return offsets->outgoing_args - offsets->saved_args; + + case FRAME_POINTER_REGNUM: + return offsets->soft_frame - offsets->saved_args; + + case ARM_HARD_FRAME_POINTER_REGNUM: + return offsets->saved_regs - offsets->saved_args; + + case THUMB_HARD_FRAME_POINTER_REGNUM: + return offsets->locals_base - offsets->saved_args; + + default: + gcc_unreachable (); + } + break; + + case FRAME_POINTER_REGNUM: + switch (to) + { + case STACK_POINTER_REGNUM: + return offsets->outgoing_args - offsets->soft_frame; + + case ARM_HARD_FRAME_POINTER_REGNUM: + return offsets->saved_regs - offsets->soft_frame; + + case THUMB_HARD_FRAME_POINTER_REGNUM: + return offsets->locals_base - offsets->soft_frame; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } +} + +/* Generate the rest of a function's prologue. */ +void +thumb1_expand_prologue (void) +{ + rtx insn, dwarf; + + HOST_WIDE_INT amount; + arm_stack_offsets *offsets; + unsigned long func_type; + int regno; + unsigned long live_regs_mask; + + func_type = arm_current_func_type (); + + /* Naked functions don't have prologues. */ + if (IS_NAKED (func_type)) + return; + + if (IS_INTERRUPT (func_type)) + { + error ("interrupt Service Routines cannot be coded in Thumb mode"); + return; + } + + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + /* Load the pic register before setting the frame pointer, + so we can use r7 as a temporary work register. */ + if (flag_pic && arm_pic_register != INVALID_REGNUM) + arm_load_pic_register (live_regs_mask); + + if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) + emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), + stack_pointer_rtx); + + if (flag_stack_usage) + current_function_static_stack_size + = offsets->outgoing_args - offsets->saved_args; + + amount = offsets->outgoing_args - offsets->saved_regs; + amount -= 4 * thumb1_extra_regs_pushed (offsets, true); + if (amount) + { + if (amount < 512) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (- amount))); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + rtx reg; + + /* The stack decrement is too big for an immediate value in a single + insn. In theory we could issue multiple subtracts, but after + three of them it becomes more space efficient to place the full + value in the constant pool and load into a register. (Also the + ARM debugger really likes to see only one stack decrement per + function). So instead we look for a scratch register into which + we can load the decrement, and then we subtract this from the + stack pointer. Unfortunately on the thumb the only available + scratch registers are the argument registers, and we cannot use + these as they may hold arguments to the function. Instead we + attempt to locate a call preserved register which is used by this + function. If we can find one, then we know that it will have + been pushed at the start of the prologue and so we can corrupt + it now. */ + for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++) + if (live_regs_mask & (1 << regno)) + break; + + gcc_assert(regno <= LAST_LO_REGNUM); + + reg = gen_rtx_REG (SImode, regno); + + emit_insn (gen_movsi (reg, GEN_INT (- amount))); + + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, reg)); + RTX_FRAME_RELATED_P (insn) = 1; + dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -amount)); + RTX_FRAME_RELATED_P (dwarf) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + } + + if (frame_pointer_needed) + thumb_set_frame_pointer (offsets); + + /* If we are profiling, make sure no instructions are scheduled before + the call to mcount. Similarly if the user has requested no + scheduling in the prolog. Similarly if we want non-call exceptions + using the EABI unwinder, to prevent faulting instructions from being + swapped with a stack adjustment. */ + if (crtl->profile || !TARGET_SCHED_PROLOG + || (arm_except_unwind_info (&global_options) == UI_TARGET + && cfun->can_throw_non_call_exceptions)) + emit_insn (gen_blockage ()); + + cfun->machine->lr_save_eliminated = !thumb_force_lr_save (); + if (live_regs_mask & 0xff) + cfun->machine->lr_save_eliminated = 0; +} + + +void +thumb1_expand_epilogue (void) +{ + HOST_WIDE_INT amount; + arm_stack_offsets *offsets; + int regno; + + /* Naked functions don't have prologues. */ + if (IS_NAKED (arm_current_func_type ())) + return; + + offsets = arm_get_frame_offsets (); + amount = offsets->outgoing_args - offsets->saved_regs; + + if (frame_pointer_needed) + { + emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); + amount = offsets->locals_base - offsets->saved_regs; + } + amount -= 4 * thumb1_extra_regs_pushed (offsets, false); + + gcc_assert (amount >= 0); + if (amount) + { + emit_insn (gen_blockage ()); + + if (amount < 512) + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (amount))); + else + { + /* r3 is always free in the epilogue. */ + rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); + + emit_insn (gen_movsi (reg, GEN_INT (amount))); + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg)); + } + } + + /* Emit a USE (stack_pointer_rtx), so that + the stack adjustment will not be deleted. */ + emit_insn (gen_prologue_use (stack_pointer_rtx)); + + if (crtl->profile || !TARGET_SCHED_PROLOG) + emit_insn (gen_blockage ()); + + /* Emit a clobber for each insn that will be restored in the epilogue, + so that flow2 will get register lifetimes correct. */ + for (regno = 0; regno < 13; regno++) + if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) + emit_clobber (gen_rtx_REG (SImode, regno)); + + if (! df_regs_ever_live_p (LR_REGNUM)) + emit_use (gen_rtx_REG (SImode, LR_REGNUM)); +} + +static void +thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + arm_stack_offsets *offsets; + unsigned long live_regs_mask = 0; + unsigned long l_mask; + unsigned high_regs_pushed = 0; + int cfa_offset = 0; + int regno; + + if (IS_NAKED (arm_current_func_type ())) + return; + + if (is_called_in_ARM_mode (current_function_decl)) + { + const char * name; + + gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM); + gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0)) + == SYMBOL_REF); + name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + + /* Generate code sequence to switch us into Thumb mode. */ + /* The .code 32 directive has already been emitted by + ASM_DECLARE_FUNCTION_NAME. */ + asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM); + + /* Generate a label, so that the debugger will notice the + change in instruction sets. This label is also used by + the assembler to bypass the ARM code when this function + is called from a Thumb encoded function elsewhere in the + same file. Hence the definition of STUB_NAME here must + agree with the definition in gas/config/tc-arm.c. */ + +#define STUB_NAME ".real_start_of" + + fprintf (f, "\t.code\t16\n"); +#ifdef ARM_PE + if (arm_dllexport_name_p (name)) + name = arm_strip_name_encoding (name); +#endif + asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name); + fprintf (f, "\t.thumb_func\n"); + asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name); + } + + if (crtl->args.pretend_args_size) + { + /* Output unwind directive for the stack adjustment. */ + if (arm_except_unwind_info (&global_options) == UI_TARGET) + fprintf (f, "\t.pad #%d\n", + crtl->args.pretend_args_size); + + if (cfun->machine->uses_anonymous_args) + { + int num_pushes; + + fprintf (f, "\tpush\t{"); + + num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size); + + for (regno = LAST_ARG_REGNUM + 1 - num_pushes; + regno <= LAST_ARG_REGNUM; + regno++) + asm_fprintf (f, "%r%s", regno, + regno == LAST_ARG_REGNUM ? "" : ", "); + + fprintf (f, "}\n"); + } + else + asm_fprintf (f, "\tsub\t%r, %r, #%d\n", + SP_REGNUM, SP_REGNUM, + crtl->args.pretend_args_size); + + /* We don't need to record the stores for unwinding (would it + help the debugger any if we did?), but record the change in + the stack pointer. */ + if (dwarf2out_do_frame ()) + { + char *l = dwarf2out_cfi_label (false); + + cfa_offset = cfa_offset + crtl->args.pretend_args_size; + dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); + } + } + + /* Get the registers we are going to push. */ + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + /* Extract a mask of the ones we can give to the Thumb's push instruction. */ + l_mask = live_regs_mask & 0x40ff; + /* Then count how many other high registers will need to be pushed. */ + high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + + if (TARGET_BACKTRACE) + { + unsigned offset; + unsigned work_register; + + /* We have been asked to create a stack backtrace structure. + The code looks like this: + + 0 .align 2 + 0 func: + 0 sub SP, #16 Reserve space for 4 registers. + 2 push {R7} Push low registers. + 4 add R7, SP, #20 Get the stack pointer before the push. + 6 str R7, [SP, #8] Store the stack pointer (before reserving the space). + 8 mov R7, PC Get hold of the start of this code plus 12. + 10 str R7, [SP, #16] Store it. + 12 mov R7, FP Get hold of the current frame pointer. + 14 str R7, [SP, #4] Store it. + 16 mov R7, LR Get hold of the current return address. + 18 str R7, [SP, #12] Store it. + 20 add R7, SP, #16 Point at the start of the backtrace structure. + 22 mov FP, R7 Put this value into the frame pointer. */ + + work_register = thumb_find_work_register (live_regs_mask); + + if (arm_except_unwind_info (&global_options) == UI_TARGET) + asm_fprintf (f, "\t.pad #16\n"); + + asm_fprintf + (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n", + SP_REGNUM, SP_REGNUM); + + if (dwarf2out_do_frame ()) + { + char *l = dwarf2out_cfi_label (false); + + cfa_offset = cfa_offset + 16; + dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); + } + + if (l_mask) + { + thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask); + offset = bit_count (l_mask) * UNITS_PER_WORD; + } + else + offset = 0; + + asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM, + offset + 16 + crtl->args.pretend_args_size); + + asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, + offset + 4); + + /* Make sure that the instruction fetching the PC is in the right place + to calculate "start of backtrace creation code + 12". */ + if (l_mask) + { + asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM); + asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, + offset + 12); + asm_fprintf (f, "\tmov\t%r, %r\n", work_register, + ARM_HARD_FRAME_POINTER_REGNUM); + asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, + offset); + } + else + { + asm_fprintf (f, "\tmov\t%r, %r\n", work_register, + ARM_HARD_FRAME_POINTER_REGNUM); + asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, + offset); + asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM); + asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, + offset + 12); + } + + asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM); + asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, + offset + 8); + asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM, + offset + 12); + asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n", + ARM_HARD_FRAME_POINTER_REGNUM, work_register); + } + /* Optimization: If we are not pushing any low registers but we are going + to push some high registers then delay our first push. This will just + be a push of LR and we can combine it with the push of the first high + register. */ + else if ((l_mask & 0xff) != 0 + || (high_regs_pushed == 0 && l_mask)) + { + unsigned long mask = l_mask; + mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1; + thumb_pushpop (f, mask, 1, &cfa_offset, mask); + } + + if (high_regs_pushed) + { + unsigned pushable_regs; + unsigned next_hi_reg; + + for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--) + if (live_regs_mask & (1 << next_hi_reg)) + break; + + pushable_regs = l_mask & 0xff; + + if (pushable_regs == 0) + pushable_regs = 1 << thumb_find_work_register (live_regs_mask); + + while (high_regs_pushed > 0) + { + unsigned long real_regs_mask = 0; + + for (regno = LAST_LO_REGNUM; regno >= 0; regno --) + { + if (pushable_regs & (1 << regno)) + { + asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg); + + high_regs_pushed --; + real_regs_mask |= (1 << next_hi_reg); + + if (high_regs_pushed) + { + for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM; + next_hi_reg --) + if (live_regs_mask & (1 << next_hi_reg)) + break; + } + else + { + pushable_regs &= ~((1 << regno) - 1); + break; + } + } + } + + /* If we had to find a work register and we have not yet + saved the LR then add it to the list of regs to push. */ + if (l_mask == (1 << LR_REGNUM)) + { + thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM), + 1, &cfa_offset, + real_regs_mask | (1 << LR_REGNUM)); + l_mask = 0; + } + else + thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask); + } + } +} + +/* Handle the case of a double word load into a low register from + a computed memory address. The computed address may involve a + register which is overwritten by the load. */ +const char * +thumb_load_double_from_address (rtx *operands) +{ + rtx addr; + rtx base; + rtx offset; + rtx arg1; + rtx arg2; + + gcc_assert (GET_CODE (operands[0]) == REG); + gcc_assert (GET_CODE (operands[1]) == MEM); + + /* Get the memory address. */ + addr = XEXP (operands[1], 0); + + /* Work out how the memory address is computed. */ + switch (GET_CODE (addr)) + { + case REG: + operands[2] = adjust_address (operands[1], SImode, 4); + + if (REGNO (operands[0]) == REGNO (addr)) + { + output_asm_insn ("ldr\t%H0, %2", operands); + output_asm_insn ("ldr\t%0, %1", operands); + } + else + { + output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldr\t%H0, %2", operands); + } + break; + + case CONST: + /* Compute
+ 4 for the high order load. */ + operands[2] = adjust_address (operands[1], SImode, 4); + + output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldr\t%H0, %2", operands); + break; + + case PLUS: + arg1 = XEXP (addr, 0); + arg2 = XEXP (addr, 1); + + if (CONSTANT_P (arg1)) + base = arg2, offset = arg1; + else + base = arg1, offset = arg2; + + gcc_assert (GET_CODE (base) == REG); + + /* Catch the case of
= + */ + if (GET_CODE (offset) == REG) + { + int reg_offset = REGNO (offset); + int reg_base = REGNO (base); + int reg_dest = REGNO (operands[0]); + + /* Add the base and offset registers together into the + higher destination register. */ + asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r", + reg_dest + 1, reg_base, reg_offset); + + /* Load the lower destination register from the address in + the higher destination register. */ + asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]", + reg_dest, reg_dest + 1); + + /* Load the higher destination register from its own address + plus 4. */ + asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]", + reg_dest + 1, reg_dest + 1); + } + else + { + /* Compute
+ 4 for the high order load. */ + operands[2] = adjust_address (operands[1], SImode, 4); + + /* If the computed address is held in the low order register + then load the high order register first, otherwise always + load the low order register first. */ + if (REGNO (operands[0]) == REGNO (base)) + { + output_asm_insn ("ldr\t%H0, %2", operands); + output_asm_insn ("ldr\t%0, %1", operands); + } + else + { + output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldr\t%H0, %2", operands); + } + } + break; + + case LABEL_REF: + /* With no registers to worry about we can just load the value + directly. */ + operands[2] = adjust_address (operands[1], SImode, 4); + + output_asm_insn ("ldr\t%H0, %2", operands); + output_asm_insn ("ldr\t%0, %1", operands); + break; + + default: + gcc_unreachable (); + } + + return ""; +} + +const char * +thumb_output_move_mem_multiple (int n, rtx *operands) +{ + rtx tmp; + + switch (n) + { + case 2: + if (REGNO (operands[4]) > REGNO (operands[5])) + { + tmp = operands[4]; + operands[4] = operands[5]; + operands[5] = tmp; + } + output_asm_insn ("ldmia\t%1!, {%4, %5}", operands); + output_asm_insn ("stmia\t%0!, {%4, %5}", operands); + break; + + case 3: + if (REGNO (operands[4]) > REGNO (operands[5])) + { + tmp = operands[4]; + operands[4] = operands[5]; + operands[5] = tmp; + } + if (REGNO (operands[5]) > REGNO (operands[6])) + { + tmp = operands[5]; + operands[5] = operands[6]; + operands[6] = tmp; + } + if (REGNO (operands[4]) > REGNO (operands[5])) + { + tmp = operands[4]; + operands[4] = operands[5]; + operands[5] = tmp; + } + + output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands); + output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands); + break; + + default: + gcc_unreachable (); + } + + return ""; +} + +/* Output a call-via instruction for thumb state. */ +const char * +thumb_call_via_reg (rtx reg) +{ + int regno = REGNO (reg); + rtx *labelp; + + gcc_assert (regno < LR_REGNUM); + + /* If we are in the normal text section we can use a single instance + per compilation unit. If we are doing function sections, then we need + an entry per section, since we can't rely on reachability. */ + if (in_section == text_section) + { + thumb_call_reg_needed = 1; + + if (thumb_call_via_label[regno] == NULL) + thumb_call_via_label[regno] = gen_label_rtx (); + labelp = thumb_call_via_label + regno; + } + else + { + if (cfun->machine->call_via[regno] == NULL) + cfun->machine->call_via[regno] = gen_label_rtx (); + labelp = cfun->machine->call_via + regno; + } + + output_asm_insn ("bl\t%a0", labelp); + return ""; +} + +/* Routines for generating rtl. */ +void +thumb_expand_movmemqi (rtx *operands) +{ + rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0)); + rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); + HOST_WIDE_INT len = INTVAL (operands[2]); + HOST_WIDE_INT offset = 0; + + while (len >= 12) + { + emit_insn (gen_movmem12b (out, in, out, in)); + len -= 12; + } + + if (len >= 8) + { + emit_insn (gen_movmem8b (out, in, out, in)); + len -= 8; + } + + if (len >= 4) + { + rtx reg = gen_reg_rtx (SImode); + emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in))); + emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg)); + len -= 4; + offset += 4; + } + + if (len >= 2) + { + rtx reg = gen_reg_rtx (HImode); + emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode, + plus_constant (in, offset)))); + emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)), + reg)); + len -= 2; + offset += 2; + } + + if (len) + { + rtx reg = gen_reg_rtx (QImode); + emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode, + plus_constant (in, offset)))); + emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)), + reg)); + } +} + +void +thumb_reload_out_hi (rtx *operands) +{ + emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2])); +} + +/* Handle reading a half-word from memory during reload. */ +void +thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED) +{ + gcc_unreachable (); +} + +/* Return the length of a function name prefix + that starts with the character 'c'. */ +static int +arm_get_strip_length (int c) +{ + switch (c) + { + ARM_NAME_ENCODING_LENGTHS + default: return 0; + } +} + +/* Return a pointer to a function's name with any + and all prefix encodings stripped from it. */ +const char * +arm_strip_name_encoding (const char *name) +{ + int skip; + + while ((skip = arm_get_strip_length (* name))) + name += skip; + + return name; +} + +/* If there is a '*' anywhere in the name's prefix, then + emit the stripped name verbatim, otherwise prepend an + underscore if leading underscores are being used. */ +void +arm_asm_output_labelref (FILE *stream, const char *name) +{ + int skip; + int verbatim = 0; + + while ((skip = arm_get_strip_length (* name))) + { + verbatim |= (*name == '*'); + name += skip; + } + + if (verbatim) + fputs (name, stream); + else + asm_fprintf (stream, "%U%s", name); +} + +static void +arm_file_start (void) +{ + int val; + + if (TARGET_UNIFIED_ASM) + asm_fprintf (asm_out_file, "\t.syntax unified\n"); + + if (TARGET_BPABI) + { + const char *fpu_name; + if (arm_selected_arch) + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); + else + asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name); + + if (TARGET_SOFT_FLOAT) + { + if (TARGET_VFP) + fpu_name = "softvfp"; + else + fpu_name = "softfpa"; + } + else + { + fpu_name = arm_fpu_desc->name; + if (arm_fpu_desc->model == ARM_FP_MODEL_VFP) + { + if (TARGET_HARD_FLOAT) + asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); + if (TARGET_HARD_FLOAT_ABI) + asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n"); + } + } + asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name); + + /* Some of these attributes only apply when the corresponding features + are used. However we don't have any easy way of figuring this out. + Conservatively record the setting that would have been used. */ + + /* Tag_ABI_FP_rounding. */ + if (flag_rounding_math) + asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n"); + if (!flag_unsafe_math_optimizations) + { + /* Tag_ABI_FP_denomal. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n"); + /* Tag_ABI_FP_exceptions. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n"); + } + /* Tag_ABI_FP_user_exceptions. */ + if (flag_signaling_nans) + asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n"); + /* Tag_ABI_FP_number_model. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n", + flag_finite_math_only ? 1 : 3); + + /* Tag_ABI_align8_needed. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n"); + /* Tag_ABI_align8_preserved. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n"); + /* Tag_ABI_enum_size. */ + asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n", + flag_short_enums ? 1 : 2); + + /* Tag_ABI_optimization_goals. */ + if (optimize_size) + val = 4; + else if (optimize >= 2) + val = 2; + else if (optimize) + val = 1; + else + val = 6; + asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + + /* Tag_ABI_FP_16bit_format. */ + if (arm_fp16_format) + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", + (int)arm_fp16_format); + + if (arm_lang_output_object_attributes_hook) + arm_lang_output_object_attributes_hook(); + } + default_file_start(); +} + +static void +arm_file_end (void) +{ + int regno; + + if (NEED_INDICATE_EXEC_STACK) + /* Add .note.GNU-stack. */ + file_end_indicate_exec_stack (); + + if (! thumb_call_reg_needed) + return; + + switch_to_section (text_section); + asm_fprintf (asm_out_file, "\t.code 16\n"); + ASM_OUTPUT_ALIGN (asm_out_file, 1); + + for (regno = 0; regno < LR_REGNUM; regno++) + { + rtx label = thumb_call_via_label[regno]; + + if (label != 0) + { + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (label)); + asm_fprintf (asm_out_file, "\tbx\t%r\n", regno); + } + } +} + +#ifndef ARM_PE +/* Symbols in the text segment can be accessed without indirecting via the + constant pool; it may take an extra binary operation, but this is still + faster than indirecting via memory. Don't do this when not optimizing, + since we won't be calculating al of the offsets necessary to do this + simplification. */ + +static void +arm_encode_section_info (tree decl, rtx rtl, int first) +{ + if (optimize > 0 && TREE_CONSTANT (decl)) + SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; + + default_encode_section_info (decl, rtl, first); +} +#endif /* !ARM_PE */ + +static void +arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno) +{ + if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno + && !strcmp (prefix, "L")) + { + arm_ccfsm_state = 0; + arm_target_insn = NULL; + } + default_internal_label (stream, prefix, labelno); +} + +/* Output code to add DELTA to the first argument, and then jump + to FUNCTION. Used for C++ multiple inheritance. */ +static void +arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree function) +{ + static int thunk_label = 0; + char label[256]; + char labelpc[256]; + int mi_delta = delta; + const char *const mi_op = mi_delta < 0 ? "sub" : "add"; + int shift = 0; + int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) + ? 1 : 0); + if (mi_delta < 0) + mi_delta = - mi_delta; + + if (TARGET_THUMB1) + { + int labelno = thunk_label++; + ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno); + /* Thunks are entered in arm mode when avaiable. */ + if (TARGET_THUMB1_ONLY) + { + /* push r3 so we can use it as a temporary. */ + /* TODO: Omit this save if r3 is not used. */ + fputs ("\tpush {r3}\n", file); + fputs ("\tldr\tr3, ", file); + } + else + { + fputs ("\tldr\tr12, ", file); + } + assemble_name (file, label); + fputc ('\n', file); + if (flag_pic) + { + /* If we are generating PIC, the ldr instruction below loads + "(target - 7) - .LTHUNKPCn" into r12. The pc reads as + the address of the add + 8, so we have: + + r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8) + = target + 1. + + Note that we have "+ 1" because some versions of GNU ld + don't set the low bit of the result for R_ARM_REL32 + relocations against thumb function symbols. + On ARMv6M this is +4, not +8. */ + ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno); + assemble_name (file, labelpc); + fputs (":\n", file); + if (TARGET_THUMB1_ONLY) + { + /* This is 2 insns after the start of the thunk, so we know it + is 4-byte aligned. */ + fputs ("\tadd\tr3, pc, r3\n", file); + fputs ("\tmov r12, r3\n", file); + } + else + fputs ("\tadd\tr12, pc, r12\n", file); + } + else if (TARGET_THUMB1_ONLY) + fputs ("\tmov r12, r3\n", file); + } + if (TARGET_THUMB1_ONLY) + { + if (mi_delta > 255) + { + fputs ("\tldr\tr3, ", file); + assemble_name (file, label); + fputs ("+4\n", file); + asm_fprintf (file, "\t%s\t%r, %r, r3\n", + mi_op, this_regno, this_regno); + } + else if (mi_delta != 0) + { + asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + mi_op, this_regno, this_regno, + mi_delta); + } + } + else + { + /* TODO: Use movw/movt for large constants when available. */ + while (mi_delta != 0) + { + if ((mi_delta & (3 << shift)) == 0) + shift += 2; + else + { + asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + mi_op, this_regno, this_regno, + mi_delta & (0xff << shift)); + mi_delta &= ~(0xff << shift); + shift += 8; + } + } + } + if (TARGET_THUMB1) + { + if (TARGET_THUMB1_ONLY) + fputs ("\tpop\t{r3}\n", file); + + fprintf (file, "\tbx\tr12\n"); + ASM_OUTPUT_ALIGN (file, 2); + assemble_name (file, label); + fputs (":\n", file); + if (flag_pic) + { + /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */ + rtx tem = XEXP (DECL_RTL (function), 0); + tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7)); + tem = gen_rtx_MINUS (GET_MODE (tem), + tem, + gen_rtx_SYMBOL_REF (Pmode, + ggc_strdup (labelpc))); + assemble_integer (tem, 4, BITS_PER_WORD, 1); + } + else + /* Output ".word .LTHUNKn". */ + assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1); + + if (TARGET_THUMB1_ONLY && mi_delta > 255) + assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1); + } + else + { + fputs ("\tb\t", file); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + if (NEED_PLT_RELOC) + fputs ("(PLT)", file); + fputc ('\n', file); + } +} + +int +arm_emit_vector_const (FILE *file, rtx x) +{ + int i; + const char * pattern; + + gcc_assert (GET_CODE (x) == CONST_VECTOR); + + switch (GET_MODE (x)) + { + case V2SImode: pattern = "%08x"; break; + case V4HImode: pattern = "%04x"; break; + case V8QImode: pattern = "%02x"; break; + default: gcc_unreachable (); + } + + fprintf (file, "0x"); + for (i = CONST_VECTOR_NUNITS (x); i--;) + { + rtx element; + + element = CONST_VECTOR_ELT (x, i); + fprintf (file, pattern, INTVAL (element)); + } + + return 1; +} + +/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. + HFmode constant pool entries are actually loaded with ldr. */ +void +arm_emit_fp16_const (rtx c) +{ + REAL_VALUE_TYPE r; + long bits; + + REAL_VALUE_FROM_CONST_DOUBLE (r, c); + bits = real_to_target (NULL, &r, HFmode); + if (WORDS_BIG_ENDIAN) + assemble_zeros (2); + assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); + if (!WORDS_BIG_ENDIAN) + assemble_zeros (2); +} + +const char * +arm_output_load_gr (rtx *operands) +{ + rtx reg; + rtx offset; + rtx wcgr; + rtx sum; + + if (GET_CODE (operands [1]) != MEM + || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS + || GET_CODE (reg = XEXP (sum, 0)) != REG + || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT + || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024))) + return "wldrw%?\t%0, %1"; + + /* Fix up an out-of-range load of a GR register. */ + output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg); + wcgr = operands[0]; + operands[0] = reg; + output_asm_insn ("ldr%?\t%0, %1", operands); + + operands[0] = wcgr; + operands[1] = reg; + output_asm_insn ("tmcr%?\t%0, %1", operands); + output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg); + + return ""; +} + +/* Worker function for TARGET_SETUP_INCOMING_VARARGS. + + On the ARM, PRETEND_SIZE is set in order to have the prologue push the last + named arg and all anonymous args onto the stack. + XXX I know the prologue shouldn't be pushing registers, but it is faster + that way. */ + +static void +arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, + enum machine_mode mode, + tree type, + int *pretend_size, + int second_time ATTRIBUTE_UNUSED) +{ + int nregs; + + cfun->machine->uses_anonymous_args = 1; + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + nregs = pcum->aapcs_ncrn; + if ((nregs & 1) && arm_needs_doubleword_align (mode, type)) + nregs++; + } + else + nregs = pcum->nregs; + + if (nregs < NUM_ARG_REGS) + *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; +} + +/* Return nonzero if the CONSUMER instruction (a store) does not need + PRODUCER's value to calculate the address. */ + +int +arm_no_early_store_addr_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx addr = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (addr) == COND_EXEC) + addr = COND_EXEC_CODE (addr); + if (GET_CODE (addr) == PARALLEL) + addr = XVECEXP (addr, 0, 0); + addr = XEXP (addr, 0); + + return !reg_overlap_mentioned_p (value, addr); +} + +/* Return nonzero if the CONSUMER instruction (a store) does need + PRODUCER's value to calculate the address. */ + +int +arm_early_store_addr_dep (rtx producer, rtx consumer) +{ + return !arm_no_early_store_addr_dep (producer, consumer); +} + +/* Return nonzero if the CONSUMER instruction (a load) does need + PRODUCER's value to calculate the address. */ + +int +arm_early_load_addr_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx addr = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (addr) == COND_EXEC) + addr = COND_EXEC_CODE (addr); + if (GET_CODE (addr) == PARALLEL) + addr = XVECEXP (addr, 0, 0); + addr = XEXP (addr, 1); + + return reg_overlap_mentioned_p (value, addr); +} + +/* Return nonzero if the CONSUMER instruction (an ALU op) does not + have an early register shift value or amount dependency on the + result of PRODUCER. */ + +int +arm_no_early_alu_shift_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx op = PATTERN (consumer); + rtx early_op; + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (op) == COND_EXEC) + op = COND_EXEC_CODE (op); + if (GET_CODE (op) == PARALLEL) + op = XVECEXP (op, 0, 0); + op = XEXP (op, 1); + + early_op = XEXP (op, 0); + /* This is either an actual independent shift, or a shift applied to + the first operand of another operation. We want the whole shift + operation. */ + if (GET_CODE (early_op) == REG) + early_op = op; + + return !reg_overlap_mentioned_p (value, early_op); +} + +/* Return nonzero if the CONSUMER instruction (an ALU op) does not + have an early register shift value dependency on the result of + PRODUCER. */ + +int +arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx op = PATTERN (consumer); + rtx early_op; + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (op) == COND_EXEC) + op = COND_EXEC_CODE (op); + if (GET_CODE (op) == PARALLEL) + op = XVECEXP (op, 0, 0); + op = XEXP (op, 1); + + early_op = XEXP (op, 0); + + /* This is either an actual independent shift, or a shift applied to + the first operand of another operation. We want the value being + shifted, in either case. */ + if (GET_CODE (early_op) != REG) + early_op = XEXP (early_op, 0); + + return !reg_overlap_mentioned_p (value, early_op); +} + +/* Return nonzero if the CONSUMER (a mul or mac op) does not + have an early register mult dependency on the result of + PRODUCER. */ + +int +arm_no_early_mul_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx op = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (op) == COND_EXEC) + op = COND_EXEC_CODE (op); + if (GET_CODE (op) == PARALLEL) + op = XVECEXP (op, 0, 0); + op = XEXP (op, 1); + + if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) + { + if (GET_CODE (XEXP (op, 0)) == MULT) + return !reg_overlap_mentioned_p (value, XEXP (op, 0)); + else + return !reg_overlap_mentioned_p (value, XEXP (op, 1)); + } + + return 0; +} + +/* We can't rely on the caller doing the proper promotion when + using APCS or ATPCS. */ + +static bool +arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED) +{ + return !TARGET_AAPCS_BASED; +} + +static enum machine_mode +arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return ATTRIBUTE_UNUSED) +{ + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + return SImode; + + return mode; +} + +/* AAPCS based ABIs use short enums by default. */ + +static bool +arm_default_short_enums (void) +{ + return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX; +} + + +/* AAPCS requires that anonymous bitfields affect structure alignment. */ + +static bool +arm_align_anon_bitfield (void) +{ + return TARGET_AAPCS_BASED; +} + + +/* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */ + +static tree +arm_cxx_guard_type (void) +{ + return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; +} + +/* Return non-zero if the consumer (a multiply-accumulate instruction) + has an accumulator dependency on the result of the producer (a + multiplication instruction) and no other dependency on that result. */ +int +arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) +{ + rtx mul = PATTERN (producer); + rtx mac = PATTERN (consumer); + rtx mul_result; + rtx mac_op0, mac_op1, mac_acc; + + if (GET_CODE (mul) == COND_EXEC) + mul = COND_EXEC_CODE (mul); + if (GET_CODE (mac) == COND_EXEC) + mac = COND_EXEC_CODE (mac); + + /* Check that mul is of the form (set (...) (mult ...)) + and mla is of the form (set (...) (plus (mult ...) (...))). */ + if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) + || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS + || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) + return 0; + + mul_result = XEXP (mul, 0); + mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); + mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); + mac_acc = XEXP (XEXP (mac, 1), 1); + + return (reg_overlap_mentioned_p (mul_result, mac_acc) + && !reg_overlap_mentioned_p (mul_result, mac_op0) + && !reg_overlap_mentioned_p (mul_result, mac_op1)); +} + + +/* The EABI says test the least significant bit of a guard variable. */ + +static bool +arm_cxx_guard_mask_bit (void) +{ + return TARGET_AAPCS_BASED; +} + + +/* The EABI specifies that all array cookies are 8 bytes long. */ + +static tree +arm_get_cookie_size (tree type) +{ + tree size; + + if (!TARGET_AAPCS_BASED) + return default_cxx_get_cookie_size (type); + + size = build_int_cst (sizetype, 8); + return size; +} + + +/* The EABI says that array cookies should also contain the element size. */ + +static bool +arm_cookie_has_size (void) +{ + return TARGET_AAPCS_BASED; +} + + +/* The EABI says constructors and destructors should return a pointer to + the object constructed/destroyed. */ + +static bool +arm_cxx_cdtor_returns_this (void) +{ + return TARGET_AAPCS_BASED; +} + +/* The EABI says that an inline function may never be the key + method. */ + +static bool +arm_cxx_key_method_may_be_inline (void) +{ + return !TARGET_AAPCS_BASED; +} + +static void +arm_cxx_determine_class_data_visibility (tree decl) +{ + if (!TARGET_AAPCS_BASED + || !TARGET_DLLIMPORT_DECL_ATTRIBUTES) + return; + + /* In general, \S 3.2.5.5 of the ARM EABI requires that class data + is exported. However, on systems without dynamic vague linkage, + \S 3.2.5.6 says that COMDAT class data has hidden linkage. */ + if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl)) + DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; + else + DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT; + DECL_VISIBILITY_SPECIFIED (decl) = 1; +} + +static bool +arm_cxx_class_data_always_comdat (void) +{ + /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have + vague linkage if the class has no key function. */ + return !TARGET_AAPCS_BASED; +} + + +/* The EABI says __aeabi_atexit should be used to register static + destructors. */ + +static bool +arm_cxx_use_aeabi_atexit (void) +{ + return TARGET_AAPCS_BASED; +} + + +void +arm_set_return_address (rtx source, rtx scratch) +{ + arm_stack_offsets *offsets; + HOST_WIDE_INT delta; + rtx addr; + unsigned long saved_regs; + + offsets = arm_get_frame_offsets (); + saved_regs = offsets->saved_regs_mask; + + if ((saved_regs & (1 << LR_REGNUM)) == 0) + emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); + else + { + if (frame_pointer_needed) + addr = plus_constant(hard_frame_pointer_rtx, -4); + else + { + /* LR will be the first saved register. */ + delta = offsets->outgoing_args - (offsets->frame + 4); + + + if (delta >= 4096) + { + emit_insn (gen_addsi3 (scratch, stack_pointer_rtx, + GEN_INT (delta & ~4095))); + addr = scratch; + delta &= 4095; + } + else + addr = stack_pointer_rtx; + + addr = plus_constant (addr, delta); + } + emit_move_insn (gen_frame_mem (Pmode, addr), source); + } +} + + +void +thumb_set_return_address (rtx source, rtx scratch) +{ + arm_stack_offsets *offsets; + HOST_WIDE_INT delta; + HOST_WIDE_INT limit; + int reg; + rtx addr; + unsigned long mask; + + emit_use (source); + + offsets = arm_get_frame_offsets (); + mask = offsets->saved_regs_mask; + if (mask & (1 << LR_REGNUM)) + { + limit = 1024; + /* Find the saved regs. */ + if (frame_pointer_needed) + { + delta = offsets->soft_frame - offsets->saved_args; + reg = THUMB_HARD_FRAME_POINTER_REGNUM; + if (TARGET_THUMB1) + limit = 128; + } + else + { + delta = offsets->outgoing_args - offsets->saved_args; + reg = SP_REGNUM; + } + /* Allow for the stack frame. */ + if (TARGET_THUMB1 && TARGET_BACKTRACE) + delta -= 16; + /* The link register is always the first saved register. */ + delta -= 4; + + /* Construct the address. */ + addr = gen_rtx_REG (SImode, reg); + if (delta > limit) + { + emit_insn (gen_movsi (scratch, GEN_INT (delta))); + emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx)); + addr = scratch; + } + else + addr = plus_constant (addr, delta); + + emit_move_insn (gen_frame_mem (Pmode, addr), source); + } + else + emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); +} + +/* Implements target hook vector_mode_supported_p. */ +bool +arm_vector_mode_supported_p (enum machine_mode mode) +{ + /* Neon also supports V2SImode, etc. listed in the clause below. */ + if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode + || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) + return true; + + if ((TARGET_NEON || TARGET_IWMMXT) + && ((mode == V2SImode) + || (mode == V4HImode) + || (mode == V8QImode))) + return true; + + return false; +} + +/* Use the option -mvectorize-with-neon-quad to override the use of doubleword + registers when autovectorizing for Neon, at least until multiple vector + widths are supported properly by the middle-end. */ + +static enum machine_mode +arm_preferred_simd_mode (enum machine_mode mode) +{ + if (TARGET_NEON) + switch (mode) + { + case SFmode: + return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode; + case SImode: + return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode; + case HImode: + return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode; + case QImode: + return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode; + case DImode: + if (TARGET_NEON_VECTORIZE_QUAD) + return V2DImode; + break; + + default:; + } + + if (TARGET_REALLY_IWMMXT) + switch (mode) + { + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + + default:; + } + + return word_mode; +} + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. + + We need to define this for LO_REGS on thumb. Otherwise we can end up + using r0-r4 for function arguments, r7 for the stack frame and don't + have enough left over to do doubleword arithmetic. */ + +static bool +arm_class_likely_spilled_p (reg_class_t rclass) +{ + if ((TARGET_THUMB && rclass == LO_REGS) + || rclass == CC_REG) + return true; + + return false; +} + +/* Implements target hook small_register_classes_for_mode_p. */ +bool +arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return TARGET_THUMB1; +} + +/* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal + ARM insns and therefore guarantee that the shift count is modulo 256. + DImode shifts (those implemented by lib1funcs.asm or by optabs.c) + guarantee no particular behavior for out-of-range counts. */ + +static unsigned HOST_WIDE_INT +arm_shift_truncation_mask (enum machine_mode mode) +{ + return mode == SImode ? 255 : 0; +} + + +/* Map internal gcc register numbers to DWARF2 register numbers. */ + +unsigned int +arm_dbx_register_number (unsigned int regno) +{ + if (regno < 16) + return regno; + + /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards + compatibility. The EABI defines them as registers 96-103. */ + if (IS_FPA_REGNUM (regno)) + return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM; + + if (IS_VFP_REGNUM (regno)) + { + /* See comment in arm_dwarf_register_span. */ + if (VFP_REGNO_OK_FOR_SINGLE (regno)) + return 64 + regno - FIRST_VFP_REGNUM; + else + return 256 + (regno - FIRST_VFP_REGNUM) / 2; + } + + if (IS_IWMMXT_GR_REGNUM (regno)) + return 104 + regno - FIRST_IWMMXT_GR_REGNUM; + + if (IS_IWMMXT_REGNUM (regno)) + return 112 + regno - FIRST_IWMMXT_REGNUM; + + gcc_unreachable (); +} + +/* Dwarf models VFPv3 registers as 32 64-bit registers. + GCC models tham as 64 32-bit registers, so we need to describe this to + the DWARF generation code. Other registers can use the default. */ +static rtx +arm_dwarf_register_span (rtx rtl) +{ + unsigned regno; + int nregs; + int i; + rtx p; + + regno = REGNO (rtl); + if (!IS_VFP_REGNUM (regno)) + return NULL_RTX; + + /* XXX FIXME: The EABI defines two VFP register ranges: + 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent) + 256-287: D0-D31 + The recommended encoding for S0-S31 is a DW_OP_bit_piece of the + corresponding D register. Until GDB supports this, we shall use the + legacy encodings. We also use these encodings for D0-D15 for + compatibility with older debuggers. */ + if (VFP_REGNO_OK_FOR_SINGLE (regno)) + return NULL_RTX; + + nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; + p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs)); + regno = (regno - FIRST_VFP_REGNUM) / 2; + for (i = 0; i < nregs; i++) + XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); + + return p; +} + +#if ARM_UNWIND_INFO +/* Emit unwind directives for a store-multiple instruction or stack pointer + push during alignment. + These should only ever be generated by the function prologue code, so + expect them to have a particular form. */ + +static void +arm_unwind_emit_sequence (FILE * asm_out_file, rtx p) +{ + int i; + HOST_WIDE_INT offset; + HOST_WIDE_INT nregs; + int reg_size; + unsigned reg; + unsigned lastreg; + rtx e; + + e = XVECEXP (p, 0, 0); + if (GET_CODE (e) != SET) + abort (); + + /* First insn will adjust the stack pointer. */ + if (GET_CODE (e) != SET + || GET_CODE (XEXP (e, 0)) != REG + || REGNO (XEXP (e, 0)) != SP_REGNUM + || GET_CODE (XEXP (e, 1)) != PLUS) + abort (); + + offset = -INTVAL (XEXP (XEXP (e, 1), 1)); + nregs = XVECLEN (p, 0) - 1; + + reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1)); + if (reg < 16) + { + /* The function prologue may also push pc, but not annotate it as it is + never restored. We turn this into a stack pointer adjustment. */ + if (nregs * 4 == offset - 4) + { + fprintf (asm_out_file, "\t.pad #4\n"); + offset -= 4; + } + reg_size = 4; + fprintf (asm_out_file, "\t.save {"); + } + else if (IS_VFP_REGNUM (reg)) + { + reg_size = 8; + fprintf (asm_out_file, "\t.vsave {"); + } + else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM) + { + /* FPA registers are done differently. */ + asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs); + return; + } + else + /* Unknown register type. */ + abort (); + + /* If the stack increment doesn't match the size of the saved registers, + something has gone horribly wrong. */ + if (offset != nregs * reg_size) + abort (); + + offset = 0; + lastreg = 0; + /* The remaining insns will describe the stores. */ + for (i = 1; i <= nregs; i++) + { + /* Expect (set (mem ) (reg)). + Where is (reg:SP) or (plus (reg:SP) (const_int)). */ + e = XVECEXP (p, 0, i); + if (GET_CODE (e) != SET + || GET_CODE (XEXP (e, 0)) != MEM + || GET_CODE (XEXP (e, 1)) != REG) + abort (); + + reg = REGNO (XEXP (e, 1)); + if (reg < lastreg) + abort (); + + if (i != 1) + fprintf (asm_out_file, ", "); + /* We can't use %r for vfp because we need to use the + double precision register names. */ + if (IS_VFP_REGNUM (reg)) + asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2); + else + asm_fprintf (asm_out_file, "%r", reg); + +#ifdef ENABLE_CHECKING + /* Check that the addresses are consecutive. */ + e = XEXP (XEXP (e, 0), 0); + if (GET_CODE (e) == PLUS) + { + offset += reg_size; + if (GET_CODE (XEXP (e, 0)) != REG + || REGNO (XEXP (e, 0)) != SP_REGNUM + || GET_CODE (XEXP (e, 1)) != CONST_INT + || offset != INTVAL (XEXP (e, 1))) + abort (); + } + else if (i != 1 + || GET_CODE (e) != REG + || REGNO (e) != SP_REGNUM) + abort (); +#endif + } + fprintf (asm_out_file, "}\n"); +} + +/* Emit unwind directives for a SET. */ + +static void +arm_unwind_emit_set (FILE * asm_out_file, rtx p) +{ + rtx e0; + rtx e1; + unsigned reg; + + e0 = XEXP (p, 0); + e1 = XEXP (p, 1); + switch (GET_CODE (e0)) + { + case MEM: + /* Pushing a single register. */ + if (GET_CODE (XEXP (e0, 0)) != PRE_DEC + || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG + || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM) + abort (); + + asm_fprintf (asm_out_file, "\t.save "); + if (IS_VFP_REGNUM (REGNO (e1))) + asm_fprintf(asm_out_file, "{d%d}\n", + (REGNO (e1) - FIRST_VFP_REGNUM) / 2); + else + asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1)); + break; + + case REG: + if (REGNO (e0) == SP_REGNUM) + { + /* A stack increment. */ + if (GET_CODE (e1) != PLUS + || GET_CODE (XEXP (e1, 0)) != REG + || REGNO (XEXP (e1, 0)) != SP_REGNUM + || GET_CODE (XEXP (e1, 1)) != CONST_INT) + abort (); + + asm_fprintf (asm_out_file, "\t.pad #%wd\n", + -INTVAL (XEXP (e1, 1))); + } + else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM) + { + HOST_WIDE_INT offset; + + if (GET_CODE (e1) == PLUS) + { + if (GET_CODE (XEXP (e1, 0)) != REG + || GET_CODE (XEXP (e1, 1)) != CONST_INT) + abort (); + reg = REGNO (XEXP (e1, 0)); + offset = INTVAL (XEXP (e1, 1)); + asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n", + HARD_FRAME_POINTER_REGNUM, reg, + offset); + } + else if (GET_CODE (e1) == REG) + { + reg = REGNO (e1); + asm_fprintf (asm_out_file, "\t.setfp %r, %r\n", + HARD_FRAME_POINTER_REGNUM, reg); + } + else + abort (); + } + else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM) + { + /* Move from sp to reg. */ + asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0)); + } + else if (GET_CODE (e1) == PLUS + && GET_CODE (XEXP (e1, 0)) == REG + && REGNO (XEXP (e1, 0)) == SP_REGNUM + && GET_CODE (XEXP (e1, 1)) == CONST_INT) + { + /* Set reg to offset from sp. */ + asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n", + REGNO (e0), (int)INTVAL(XEXP (e1, 1))); + } + else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN) + { + /* Stack pointer save before alignment. */ + reg = REGNO (e0); + asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n", + reg + 0x90, reg); + } + else + abort (); + break; + + default: + abort (); + } +} + + +/* Emit unwind directives for the given insn. */ + +static void +arm_unwind_emit (FILE * asm_out_file, rtx insn) +{ + rtx pat; + + if (arm_except_unwind_info (&global_options) != UI_TARGET) + return; + + if (!(flag_unwind_tables || crtl->uses_eh_lsda) + && (TREE_NOTHROW (current_function_decl) + || crtl->all_throwers_are_sibcalls)) + return; + + if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn)) + return; + + pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); + if (pat) + pat = XEXP (pat, 0); + else + pat = PATTERN (insn); + + switch (GET_CODE (pat)) + { + case SET: + arm_unwind_emit_set (asm_out_file, pat); + break; + + case SEQUENCE: + /* Store multiple. */ + arm_unwind_emit_sequence (asm_out_file, pat); + break; + + default: + abort(); + } +} + + +/* Output a reference from a function exception table to the type_info + object X. The EABI specifies that the symbol should be relocated by + an R_ARM_TARGET2 relocation. */ + +static bool +arm_output_ttype (rtx x) +{ + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, x); + /* Use special relocations for symbol references. */ + if (GET_CODE (x) != CONST_INT) + fputs ("(TARGET2)", asm_out_file); + fputc ('\n', asm_out_file); + + return TRUE; +} + +/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ + +static void +arm_asm_emit_except_personality (rtx personality) +{ + fputs ("\t.personality\t", asm_out_file); + output_addr_const (asm_out_file, personality); + fputc ('\n', asm_out_file); +} + +/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */ + +static void +arm_asm_init_sections (void) +{ + exception_section = get_unnamed_section (0, output_section_asm_op, + "\t.handlerdata"); +} +#endif /* ARM_UNWIND_INFO */ + +/* Implement TARGET_EXCEPT_UNWIND_INFO. */ + +static enum unwind_info_type +arm_except_unwind_info (struct gcc_options *opts) +{ + /* Honor the --enable-sjlj-exceptions configure switch. */ +#ifdef CONFIG_SJLJ_EXCEPTIONS + if (CONFIG_SJLJ_EXCEPTIONS) + return UI_SJLJ; +#endif + + /* If not using ARM EABI unwind tables... */ + if (ARM_UNWIND_INFO) + { + /* For simplicity elsewhere in this file, indicate that all unwind + info is disabled if we're not emitting unwind tables. */ + if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables) + return UI_NONE; + else + return UI_TARGET; + } + + /* ... we use sjlj exceptions for backwards compatibility. */ + return UI_SJLJ; +} + + +/* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic + stack alignment. */ + +static void +arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) +{ + rtx unspec = SET_SRC (pattern); + gcc_assert (GET_CODE (unspec) == UNSPEC); + + switch (index) + { + case UNSPEC_STACK_ALIGN: + /* ??? We should set the CFA = (SP & ~7). At this point we haven't + put anything on the stack, so hopefully it won't matter. + CFA = SP will be correct after alignment. */ + dwarf2out_reg_save_reg (label, stack_pointer_rtx, + SET_DEST (pattern)); + break; + default: + gcc_unreachable (); + } +} + + +/* Output unwind directives for the start/end of a function. */ + +void +arm_output_fn_unwind (FILE * f, bool prologue) +{ + if (arm_except_unwind_info (&global_options) != UI_TARGET) + return; + + if (prologue) + fputs ("\t.fnstart\n", f); + else + { + /* If this function will never be unwound, then mark it as such. + The came condition is used in arm_unwind_emit to suppress + the frame annotations. */ + if (!(flag_unwind_tables || crtl->uses_eh_lsda) + && (TREE_NOTHROW (current_function_decl) + || crtl->all_throwers_are_sibcalls)) + fputs("\t.cantunwind\n", f); + + fputs ("\t.fnend\n", f); + } +} + +static bool +arm_emit_tls_decoration (FILE *fp, rtx x) +{ + enum tls_reloc reloc; + rtx val; + + val = XVECEXP (x, 0, 0); + reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1)); + + output_addr_const (fp, val); + + switch (reloc) + { + case TLS_GD32: + fputs ("(tlsgd)", fp); + break; + case TLS_LDM32: + fputs ("(tlsldm)", fp); + break; + case TLS_LDO32: + fputs ("(tlsldo)", fp); + break; + case TLS_IE32: + fputs ("(gottpoff)", fp); + break; + case TLS_LE32: + fputs ("(tpoff)", fp); + break; + default: + gcc_unreachable (); + } + + switch (reloc) + { + case TLS_GD32: + case TLS_LDM32: + case TLS_IE32: + fputs (" + (. - ", fp); + output_addr_const (fp, XVECEXP (x, 0, 2)); + fputs (" - ", fp); + output_addr_const (fp, XVECEXP (x, 0, 3)); + fputc (')', fp); + break; + default: + break; + } + + return TRUE; +} + +/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */ + +static void +arm_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + gcc_assert (size == 4); + fputs ("\t.word\t", file); + output_addr_const (file, x); + fputs ("(tlsldo)", file); +} + +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +arm_output_addr_const_extra (FILE *fp, rtx x) +{ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return arm_emit_tls_decoration (fp, x); + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL) + { + char label[256]; + int labelno = INTVAL (XVECEXP (x, 0, 0)); + + ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno); + assemble_name_raw (fp, label); + + return TRUE; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF) + { + assemble_name (fp, "_GLOBAL_OFFSET_TABLE_"); + if (GOT_PCREL) + fputs ("+.", fp); + fputs ("-(", fp); + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputc (')', fp); + return TRUE; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET) + { + output_addr_const (fp, XVECEXP (x, 0, 0)); + if (GOT_PCREL) + fputs ("+.", fp); + fputs ("-(", fp); + output_addr_const (fp, XVECEXP (x, 0, 1)); + fputc (')', fp); + return TRUE; + } + else if (GET_CODE (x) == CONST_VECTOR) + return arm_emit_vector_const (fp, x); + + return FALSE; +} + +/* Output assembly for a shift instruction. + SET_FLAGS determines how the instruction modifies the condition codes. + 0 - Do not set condition codes. + 1 - Set condition codes. + 2 - Use smallest instruction. */ +const char * +arm_output_shift(rtx * operands, int set_flags) +{ + char pattern[100]; + static const char flag_chars[3] = {'?', '.', '!'}; + const char *shift; + HOST_WIDE_INT val; + char c; + + c = flag_chars[set_flags]; + if (TARGET_UNIFIED_ASM) + { + shift = shift_op(operands[3], &val); + if (shift) + { + if (val != -1) + operands[2] = GEN_INT(val); + sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c); + } + else + sprintf (pattern, "mov%%%c\t%%0, %%1", c); + } + else + sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c); + output_asm_insn (pattern, operands); + return ""; +} + +/* Output a Thumb-1 casesi dispatch sequence. */ +const char * +thumb1_output_casesi (rtx *operands) +{ + rtx diff_vec = PATTERN (next_real_insn (operands[0])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE(diff_vec)) + { + case QImode: + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? + "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi"); + case HImode: + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? + "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi"); + case SImode: + return "bl\t%___gnu_thumb1_case_si"; + default: + gcc_unreachable (); + } +} + +/* Output a Thumb-2 casesi instruction. */ +const char * +thumb2_output_casesi (rtx *operands) +{ + rtx diff_vec = PATTERN (next_real_insn (operands[2])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + output_asm_insn ("cmp\t%0, %1", operands); + output_asm_insn ("bhi\t%l3", operands); + switch (GET_MODE(diff_vec)) + { + case QImode: + return "tbb\t[%|pc, %0]"; + case HImode: + return "tbh\t[%|pc, %0, lsl #1]"; + case SImode: + if (flag_pic) + { + output_asm_insn ("adr\t%4, %l2", operands); + output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands); + output_asm_insn ("add\t%4, %4, %5", operands); + return "bx\t%4"; + } + else + { + output_asm_insn ("adr\t%4, %l2", operands); + return "ldr\t%|pc, [%4, %0, lsl #2]"; + } + default: + gcc_unreachable (); + } +} + +/* Most ARM cores are single issue, but some newer ones can dual issue. + The scheduler descriptions rely on this being correct. */ +static int +arm_issue_rate (void) +{ + switch (arm_tune) + { + case cortexr4: + case cortexr4f: + case cortexa5: + case cortexa8: + case cortexa9: + case fa726te: + return 2; + + default: + return 1; + } +} + +/* A table and a function to perform ARM-specific name mangling for + NEON vector types in order to conform to the AAPCS (see "Procedure + Call Standard for the ARM Architecture", Appendix A). To qualify + for emission with the mangled names defined in that document, a + vector type must not only be of the correct mode but also be + composed of NEON vector element types (e.g. __builtin_neon_qi). */ +typedef struct +{ + enum machine_mode mode; + const char *element_type_name; + const char *aapcs_name; +} arm_mangle_map_entry; + +static arm_mangle_map_entry arm_mangle_map[] = { + /* 64-bit containerized types. */ + { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" }, + { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, + { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, + { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, + { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, + { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, + { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, + { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" }, + { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" }, + /* 128-bit containerized types. */ + { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" }, + { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" }, + { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" }, + { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" }, + { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" }, + { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" }, + { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" }, + { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" }, + { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" }, + { VOIDmode, NULL, NULL } +}; + +const char * +arm_mangle_type (const_tree type) +{ + arm_mangle_map_entry *pos = arm_mangle_map; + + /* The ARM ABI documents (10th October 2008) say that "__va_list" + has to be managled as if it is in the "std" namespace. */ + if (TARGET_AAPCS_BASED + && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) + { + static bool warned; + if (!warned && warn_psabi && !in_system_header) + { + warned = true; + inform (input_location, + "the mangling of % has changed in GCC 4.4"); + } + return "St9__va_list"; + } + + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) + return "Dh"; + + if (TREE_CODE (type) != VECTOR_TYPE) + return NULL; + + /* Check the mode of the vector type, and the name of the vector + element type, against the table. */ + while (pos->mode != VOIDmode) + { + tree elt_type = TREE_TYPE (type); + + if (pos->mode == TYPE_MODE (type) + && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL + && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))), + pos->element_type_name)) + return pos->aapcs_name; + + pos++; + } + + /* Use the default mangling for unrecognized (possibly user-defined) + vector types. */ + return NULL; +} + +/* Order of allocation of core registers for Thumb: this allocation is + written over the corresponding initial entries of the array + initialized with REG_ALLOC_ORDER. We allocate all low registers + first. Saving and restoring a low register is usually cheaper than + using a call-clobbered high register. */ + +static const int thumb_core_reg_alloc_order[] = +{ + 3, 2, 1, 0, 4, 5, 6, 7, + 14, 12, 8, 9, 10, 11, 13, 15 +}; + +/* Adjust register allocation order when compiling for Thumb. */ + +void +arm_order_regs_for_local_alloc (void) +{ + const int arm_reg_alloc_order[] = REG_ALLOC_ORDER; + memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order)); + if (TARGET_THUMB) + memcpy (reg_alloc_order, thumb_core_reg_alloc_order, + sizeof (thumb_core_reg_alloc_order)); +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. */ + +bool +arm_frame_pointer_required (void) +{ + return (cfun->has_nonlocal_label + || SUBTARGET_FRAME_POINTER_REQUIRED + || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ())); +} + +/* Only thumb1 can't support conditional execution, so return true if + the target is not thumb1. */ +static bool +arm_have_conditional_execution (void) +{ + return !TARGET_THUMB1; +} + +/* Legitimize a memory reference for sync primitive implemented using + ldrex / strex. We currently force the form of the reference to be + indirect without offset. We do not yet support the indirect offset + addressing supported by some ARM targets for these + instructions. */ +static rtx +arm_legitimize_sync_memory (rtx memory) +{ + rtx addr = force_reg (Pmode, XEXP (memory, 0)); + rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr); + + set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER); + MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory); + return legitimate_memory; +} + +/* An instruction emitter. */ +typedef void (* emit_f) (int label, const char *, rtx *); + +/* An instruction emitter that emits via the conventional + output_asm_insn. */ +static void +arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands) +{ + output_asm_insn (pattern, operands); +} + +/* Count the number of emitted synchronization instructions. */ +static unsigned arm_insn_count; + +/* An emitter that counts emitted instructions but does not actually + emit instruction into the the instruction stream. */ +static void +arm_count (int label, + const char *pattern ATTRIBUTE_UNUSED, + rtx *operands ATTRIBUTE_UNUSED) +{ + if (! label) + ++ arm_insn_count; +} + +/* Construct a pattern using conventional output formatting and feed + it to output_asm_insn. Provides a mechanism to construct the + output pattern on the fly. Note the hard limit on the pattern + buffer size. */ +static void ATTRIBUTE_PRINTF_4 +arm_output_asm_insn (emit_f emit, int label, rtx *operands, + const char *pattern, ...) +{ + va_list ap; + char buffer[256]; + + va_start (ap, pattern); + vsprintf (buffer, pattern, ap); + va_end (ap); + emit (label, buffer, operands); +} + +/* Emit the memory barrier instruction, if any, provided by this + target to a specified emitter. */ +static void +arm_process_output_memory_barrier (emit_f emit, rtx *operands) +{ + if (TARGET_HAVE_DMB) + { + /* Note we issue a system level barrier. We should consider + issuing a inner shareabilty zone barrier here instead, ie. + "DMB ISH". */ + emit (0, "dmb\tsy", operands); + return; + } + + if (TARGET_HAVE_DMB_MCR) + { + emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands); + return; + } + + gcc_unreachable (); +} + +/* Emit the memory barrier instruction, if any, provided by this + target. */ +const char * +arm_output_memory_barrier (rtx *operands) +{ + arm_process_output_memory_barrier (arm_emit, operands); + return ""; +} + +/* Helper to figure out the instruction suffix required on ldrex/strex + for operations on an object of the specified mode. */ +static const char * +arm_ldrex_suffix (enum machine_mode mode) +{ + switch (mode) + { + case QImode: return "b"; + case HImode: return "h"; + case SImode: return ""; + case DImode: return "d"; + default: + gcc_unreachable (); + } + return ""; +} + +/* Emit an ldrex{b,h,d, } instruction appropriate for the specified + mode. */ +static void +arm_output_ldrex (emit_f emit, + enum machine_mode mode, + rtx target, + rtx memory) +{ + const char *suffix = arm_ldrex_suffix (mode); + rtx operands[2]; + + operands[0] = target; + operands[1] = memory; + arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); +} + +/* Emit a strex{b,h,d, } instruction appropriate for the specified + mode. */ +static void +arm_output_strex (emit_f emit, + enum machine_mode mode, + const char *cc, + rtx result, + rtx value, + rtx memory) +{ + const char *suffix = arm_ldrex_suffix (mode); + rtx operands[3]; + + operands[0] = result; + operands[1] = value; + operands[2] = memory; + arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix, + cc); +} + +/* Helper to emit a two operand instruction. */ +static void +arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s) +{ + rtx operands[2]; + + operands[0] = d; + operands[1] = s; + arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic); +} + +/* Helper to emit a three operand instruction. */ +static void +arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b) +{ + rtx operands[3]; + + operands[0] = d; + operands[1] = a; + operands[2] = b; + arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic); +} + +/* Emit a load store exclusive synchronization loop. + + do + old_value = [mem] + if old_value != required_value + break; + t1 = sync_op (old_value, new_value) + [mem] = t1, t2 = [0|1] + while ! t2 + + Note: + t1 == t2 is not permitted + t1 == old_value is permitted + + required_value: + + RTX register or const_int representing the required old_value for + the modify to continue, if NULL no comparsion is performed. */ +static void +arm_output_sync_loop (emit_f emit, + enum machine_mode mode, + rtx old_value, + rtx memory, + rtx required_value, + rtx new_value, + rtx t1, + rtx t2, + enum attr_sync_op sync_op, + int early_barrier_required) +{ + rtx operands[1]; + + gcc_assert (t1 != t2); + + if (early_barrier_required) + arm_process_output_memory_barrier (emit, NULL); + + arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX); + + arm_output_ldrex (emit, mode, old_value, memory); + + if (required_value) + { + rtx operands[2]; + + operands[0] = old_value; + operands[1] = required_value; + arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX); + } + + switch (sync_op) + { + case SYNC_OP_ADD: + arm_output_op3 (emit, "add", t1, old_value, new_value); + break; + + case SYNC_OP_SUB: + arm_output_op3 (emit, "sub", t1, old_value, new_value); + break; + + case SYNC_OP_IOR: + arm_output_op3 (emit, "orr", t1, old_value, new_value); + break; + + case SYNC_OP_XOR: + arm_output_op3 (emit, "eor", t1, old_value, new_value); + break; + + case SYNC_OP_AND: + arm_output_op3 (emit,"and", t1, old_value, new_value); + break; + + case SYNC_OP_NAND: + arm_output_op3 (emit, "and", t1, old_value, new_value); + arm_output_op2 (emit, "mvn", t1, t1); + break; + + case SYNC_OP_NONE: + t1 = new_value; + break; + } + + if (t2) + { + arm_output_strex (emit, mode, "", t2, t1, memory); + operands[0] = t2; + arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", + LOCAL_LABEL_PREFIX); + } + else + { + /* Use old_value for the return value because for some operations + the old_value can easily be restored. This saves one register. */ + arm_output_strex (emit, mode, "", old_value, t1, memory); + operands[0] = old_value; + arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", + LOCAL_LABEL_PREFIX); + + switch (sync_op) + { + case SYNC_OP_ADD: + arm_output_op3 (emit, "sub", old_value, t1, new_value); + break; + + case SYNC_OP_SUB: + arm_output_op3 (emit, "add", old_value, t1, new_value); + break; + + case SYNC_OP_XOR: + arm_output_op3 (emit, "eor", old_value, t1, new_value); + break; + + case SYNC_OP_NONE: + arm_output_op2 (emit, "mov", old_value, required_value); + break; + + default: + gcc_unreachable (); + } + } + + /* Note: label is before barrier so that in cmp failure case we still get + a barrier to stop subsequent loads floating upwards past the ldrex + PR target/48126. */ + arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); + arm_process_output_memory_barrier (emit, NULL); +} + +static rtx +arm_get_sync_operand (rtx *operands, int index, rtx default_value) +{ + if (index > 0) + default_value = operands[index - 1]; + + return default_value; +} + +#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \ + arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT); + +/* Extract the operands for a synchroniztion instruction from the + instructions attributes and emit the instruction. */ +static void +arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands) +{ + rtx result, memory, required_value, new_value, t1, t2; + int early_barrier; + enum machine_mode mode; + enum attr_sync_op sync_op; + + result = FETCH_SYNC_OPERAND(result, 0); + memory = FETCH_SYNC_OPERAND(memory, 0); + required_value = FETCH_SYNC_OPERAND(required_value, 0); + new_value = FETCH_SYNC_OPERAND(new_value, 0); + t1 = FETCH_SYNC_OPERAND(t1, 0); + t2 = FETCH_SYNC_OPERAND(t2, 0); + early_barrier = + get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES; + sync_op = get_attr_sync_op (insn); + mode = GET_MODE (memory); + + arm_output_sync_loop (emit, mode, result, memory, required_value, + new_value, t1, t2, sync_op, early_barrier); +} + +/* Emit a synchronization instruction loop. */ +const char * +arm_output_sync_insn (rtx insn, rtx *operands) +{ + arm_process_output_sync_insn (arm_emit, insn, operands); + return ""; +} + +/* Count the number of machine instruction that will be emitted for a + synchronization instruction. Note that the emitter used does not + emit instructions, it just counts instructions being carefull not + to count labels. */ +unsigned int +arm_sync_loop_insns (rtx insn, rtx *operands) +{ + arm_insn_count = 0; + arm_process_output_sync_insn (arm_count, insn, operands); + return arm_insn_count; +} + +/* Helper to call a target sync instruction generator, dealing with + the variation in operands required by the different generators. */ +static rtx +arm_call_generator (struct arm_sync_generator *generator, rtx old_value, + rtx memory, rtx required_value, rtx new_value) +{ + switch (generator->op) + { + case arm_sync_generator_omn: + gcc_assert (! required_value); + return generator->u.omn (old_value, memory, new_value); + + case arm_sync_generator_omrn: + gcc_assert (required_value); + return generator->u.omrn (old_value, memory, required_value, new_value); + } + + return NULL; +} + +/* Expand a synchronization loop. The synchronization loop is expanded + as an opaque block of instructions in order to ensure that we do + not subsequently get extraneous memory accesses inserted within the + critical region. The exclusive access property of ldrex/strex is + only guaranteed in there are no intervening memory accesses. */ +void +arm_expand_sync (enum machine_mode mode, + struct arm_sync_generator *generator, + rtx target, rtx memory, rtx required_value, rtx new_value) +{ + if (target == NULL) + target = gen_reg_rtx (mode); + + memory = arm_legitimize_sync_memory (memory); + if (mode != SImode) + { + rtx load_temp = gen_reg_rtx (SImode); + + if (required_value) + required_value = convert_modes (SImode, mode, required_value, true); + + new_value = convert_modes (SImode, mode, new_value, true); + emit_insn (arm_call_generator (generator, load_temp, memory, + required_value, new_value)); + emit_move_insn (target, gen_lowpart (mode, load_temp)); + } + else + { + emit_insn (arm_call_generator (generator, target, memory, required_value, + new_value)); + } +} + +static bool +arm_vector_alignment_reachable (const_tree type, bool is_packed) +{ + /* Vectors which aren't in packed structures will not be less aligned than + the natural alignment of their element type, so this is safe. */ + if (TARGET_NEON && !BYTES_BIG_ENDIAN) + return !is_packed; + + return default_builtin_vector_alignment_reachable (type, is_packed); +} + +static bool +arm_builtin_support_vector_misalignment (enum machine_mode mode, + const_tree type, int misalignment, + bool is_packed) +{ + if (TARGET_NEON && !BYTES_BIG_ENDIAN) + { + HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type); + + if (is_packed) + return align == 1; + + /* If the misalignment is unknown, we should be able to handle the access + so long as it is not to a member of a packed data structure. */ + if (misalignment == -1) + return true; + + /* Return true if the misalignment is a multiple of the natural alignment + of the vector's element type. This is probably always going to be + true in practice, since we've already established that this isn't a + packed access. */ + return ((misalignment % align) == 0); + } + + return default_builtin_support_vector_misalignment (mode, type, misalignment, + is_packed); +} + +static void +arm_conditional_register_usage (void) +{ + int regno; + + if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA) + { + for (regno = FIRST_FPA_REGNUM; + regno <= LAST_FPA_REGNUM; ++regno) + fixed_regs[regno] = call_used_regs[regno] = 1; + } + + if (TARGET_THUMB1 && optimize_size) + { + /* When optimizing for size on Thumb-1, it's better not + to use the HI regs, because of the overhead of + stacking them. */ + for (regno = FIRST_HI_REGNUM; + regno <= LAST_HI_REGNUM; ++regno) + fixed_regs[regno] = call_used_regs[regno] = 1; + } + + /* The link register can be clobbered by any branch insn, + but we have no way to track that at present, so mark + it as unavailable. */ + if (TARGET_THUMB1) + fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1; + + if (TARGET_32BIT && TARGET_HARD_FLOAT) + { + if (TARGET_MAVERICK) + { + for (regno = FIRST_FPA_REGNUM; + regno <= LAST_FPA_REGNUM; ++ regno) + fixed_regs[regno] = call_used_regs[regno] = 1; + for (regno = FIRST_CIRRUS_FP_REGNUM; + regno <= LAST_CIRRUS_FP_REGNUM; ++ regno) + { + fixed_regs[regno] = 0; + call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4; + } + } + if (TARGET_VFP) + { + /* VFPv3 registers are disabled when earlier VFP + versions are selected due to the definition of + LAST_VFP_REGNUM. */ + for (regno = FIRST_VFP_REGNUM; + regno <= LAST_VFP_REGNUM; ++ regno) + { + fixed_regs[regno] = 0; + call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16 + || regno >= FIRST_VFP_REGNUM + 32; + } + } + } + + if (TARGET_REALLY_IWMMXT) + { + regno = FIRST_IWMMXT_GR_REGNUM; + /* The 2002/10/09 revision of the XScale ABI has wCG0 + and wCG1 as call-preserved registers. The 2002/11/21 + revision changed this so that all wCG registers are + scratch registers. */ + for (regno = FIRST_IWMMXT_GR_REGNUM; + regno <= LAST_IWMMXT_GR_REGNUM; ++ regno) + fixed_regs[regno] = 0; + /* The XScale ABI has wR0 - wR9 as scratch registers, + the rest as call-preserved registers. */ + for (regno = FIRST_IWMMXT_REGNUM; + regno <= LAST_IWMMXT_REGNUM; ++ regno) + { + fixed_regs[regno] = 0; + call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10; + } + } + + if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } + else if (TARGET_APCS_STACK) + { + fixed_regs[10] = 1; + call_used_regs[10] = 1; + } + /* -mcaller-super-interworking reserves r11 for calls to + _interwork_r11_call_via_rN(). Making the register global + is an easy way of ensuring that it remains valid for all + calls. */ + if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING + || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) + { + fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; + call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; + if (TARGET_CALLER_INTERWORKING) + global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; + } + SUBTARGET_CONDITIONAL_REGISTER_USAGE +} + +static reg_class_t +arm_preferred_rename_class (reg_class_t rclass) +{ + /* Thumb-2 instructions using LO_REGS may be smaller than instructions + using GENERIC_REGS. During register rename pass, we prefer LO_REGS, + and code size can be reduced. */ + if (TARGET_THUMB2 && rclass == GENERAL_REGS) + return LO_REGS; + else + return NO_REGS; +} + +#include "gt-arm.h" diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h new file mode 100644 index 000000000..292b48f96 --- /dev/null +++ b/gcc/config/arm/arm.h @@ -0,0 +1,2464 @@ +/* Definitions of target machine for GNU compiler, for ARM. + Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) + and Martin Simmons (@harleqn.co.uk). + More major hacks by Richard Earnshaw (rearnsha@arm.com) + Minor hacks by Nick Clifton (nickc@cygnus.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_ARM_H +#define GCC_ARM_H + +/* We can't use enum machine_mode inside a generator file because it + hasn't been created yet; we shouldn't be using any code that + needs the real definition though, so this ought to be safe. */ +#ifdef GENERATOR_FILE +#define MACHMODE int +#else +#include "insn-modes.h" +#define MACHMODE enum machine_mode +#endif + +#include "config/vxworks-dummy.h" + +/* The architecture define. */ +extern char arm_arch_name[]; + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + if (TARGET_DSP_MULTIPLY) \ + builtin_define ("__ARM_FEATURE_DSP"); \ + /* Define __arm__ even when in thumb mode, for \ + consistency with armcc. */ \ + builtin_define ("__arm__"); \ + builtin_define ("__APCS_32__"); \ + if (TARGET_THUMB) \ + builtin_define ("__thumb__"); \ + if (TARGET_THUMB2) \ + builtin_define ("__thumb2__"); \ + \ + if (TARGET_BIG_END) \ + { \ + builtin_define ("__ARMEB__"); \ + if (TARGET_THUMB) \ + builtin_define ("__THUMBEB__"); \ + if (TARGET_LITTLE_WORDS) \ + builtin_define ("__ARMWEL__"); \ + } \ + else \ + { \ + builtin_define ("__ARMEL__"); \ + if (TARGET_THUMB) \ + builtin_define ("__THUMBEL__"); \ + } \ + \ + if (TARGET_SOFT_FLOAT) \ + builtin_define ("__SOFTFP__"); \ + \ + if (TARGET_VFP) \ + builtin_define ("__VFP_FP__"); \ + \ + if (TARGET_NEON) \ + builtin_define ("__ARM_NEON__"); \ + \ + /* Add a define for interworking. \ + Needed when building libgcc.a. */ \ + if (arm_cpp_interwork) \ + builtin_define ("__THUMB_INTERWORK__"); \ + \ + builtin_assert ("cpu=arm"); \ + builtin_assert ("machine=arm"); \ + \ + builtin_define (arm_arch_name); \ + if (arm_arch_cirrus) \ + builtin_define ("__MAVERICK__"); \ + if (arm_arch_xscale) \ + builtin_define ("__XSCALE__"); \ + if (arm_arch_iwmmxt) \ + builtin_define ("__IWMMXT__"); \ + if (TARGET_AAPCS_BASED) \ + { \ + if (arm_pcs_default == ARM_PCS_AAPCS_VFP) \ + builtin_define ("__ARM_PCS_VFP"); \ + else if (arm_pcs_default == ARM_PCS_AAPCS) \ + builtin_define ("__ARM_PCS"); \ + builtin_define ("__ARM_EABI__"); \ + } \ + } while (0) + +/* The various ARM cores. */ +enum processor_type +{ +#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ + IDENT, +#include "arm-cores.def" +#undef ARM_CORE + /* Used to indicate that no processor has been specified. */ + arm_none +}; + +enum target_cpus +{ +#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ + TARGET_CPU_##IDENT, +#include "arm-cores.def" +#undef ARM_CORE + TARGET_CPU_generic +}; + +/* The processor for which instructions should be scheduled. */ +extern enum processor_type arm_tune; + +enum arm_sync_generator_tag + { + arm_sync_generator_omn, + arm_sync_generator_omrn + }; + +/* Wrapper to pass around a polymorphic pointer to a sync instruction + generator and. */ +struct arm_sync_generator +{ + enum arm_sync_generator_tag op; + union + { + rtx (* omn) (rtx, rtx, rtx); + rtx (* omrn) (rtx, rtx, rtx, rtx); + } u; +}; + +typedef enum arm_cond_code +{ + ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC, + ARM_HI, ARM_LS, ARM_GE, ARM_LT, ARM_GT, ARM_LE, ARM_AL, ARM_NV +} +arm_cc; + +extern arm_cc arm_current_cc; + +#define ARM_INVERSE_CONDITION_CODE(X) ((arm_cc) (((int)X) ^ 1)) + +extern int arm_target_label; +extern int arm_ccfsm_state; +extern GTY(()) rtx arm_target_insn; +/* The label of the current constant pool. */ +extern rtx pool_vector_label; +/* Set to 1 when a return insn is output, this means that the epilogue + is not needed. */ +extern int return_used_this_function; +/* Callback to output language specific object attributes. */ +extern void (*arm_lang_output_object_attributes_hook)(void); + +/* Just in case configure has failed to define anything. */ +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT TARGET_CPU_generic +#endif + + +#undef CPP_SPEC +#define CPP_SPEC "%(subtarget_cpp_spec) \ +%{msoft-float:%{mhard-float: \ + %e-msoft-float and -mhard_float may not be used together}} \ +%{mbig-endian:%{mlittle-endian: \ + %e-mbig-endian and -mlittle-endian may not be used together}}" + +#ifndef CC1_SPEC +#define CC1_SPEC "" +#endif + +/* This macro defines names of additional specifications to put in the specs + that can be used in various specifications like CC1_SPEC. Its definition + is an initializer with a subgrouping for each command option. + + Each subgrouping contains a string constant, that defines the + specification name, and a string constant that used by the GCC driver + program. + + Do not define this macro if it does not need to do anything. */ +#define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "" +#endif + +/* Run-time Target Specification. */ +#ifndef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/generic)", stderr); +#endif + +#define TARGET_SOFT_FLOAT (arm_float_abi == ARM_FLOAT_ABI_SOFT) +/* Use hardware floating point instructions. */ +#define TARGET_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT) +/* Use hardware floating point calling convention. */ +#define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD) +#define TARGET_FPA (arm_fpu_desc->model == ARM_FP_MODEL_FPA) +#define TARGET_MAVERICK (arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK) +#define TARGET_VFP (arm_fpu_desc->model == ARM_FP_MODEL_VFP) +#define TARGET_IWMMXT (arm_arch_iwmmxt) +#define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT) +#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT) +#define TARGET_ARM (! TARGET_THUMB) +#define TARGET_EITHER 1 /* (TARGET_ARM | TARGET_THUMB) */ +#define TARGET_BACKTRACE (leaf_function_p () \ + ? TARGET_TPCS_LEAF_FRAME \ + : TARGET_TPCS_FRAME) +#define TARGET_LDRD (arm_arch5e && ARM_DOUBLEWORD_ALIGN) +#define TARGET_AAPCS_BASED \ + (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS) + +#define TARGET_HARD_TP (target_thread_pointer == TP_CP15) +#define TARGET_SOFT_TP (target_thread_pointer == TP_SOFT) + +/* Only 16-bit thumb code. */ +#define TARGET_THUMB1 (TARGET_THUMB && !arm_arch_thumb2) +/* Arm or Thumb-2 32-bit code. */ +#define TARGET_32BIT (TARGET_ARM || arm_arch_thumb2) +/* 32-bit Thumb-2 code. */ +#define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2) +/* Thumb-1 only. */ +#define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) +/* FPA emulator without LFM. */ +#define TARGET_FPA_EMU2 (TARGET_FPA && arm_fpu_desc->rev == 2) + +/* The following two macros concern the ability to execute coprocessor + instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently + only ever tested when we know we are generating for VFP hardware; we need + to be more careful with TARGET_NEON as noted below. */ + +/* FPU is has the full VFPv3/NEON register file of 32 D registers. */ +#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32) + +/* FPU supports VFPv3 instructions. */ +#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3) + +/* FPU only supports VFP single-precision instructions. */ +#define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE) + +/* FPU supports VFP double-precision instructions. */ +#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_fpu_desc->regs != VFP_REG_SINGLE) + +/* FPU supports half-precision floating-point with NEON element load/store. */ +#define TARGET_NEON_FP16 \ + (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16) + +/* FPU supports VFP half-precision floating-point. */ +#define TARGET_FP16 (TARGET_VFP && arm_fpu_desc->fp16) + +/* FPU supports Neon instructions. The setting of this macro gets + revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT + and TARGET_HARD_FLOAT to ensure that NEON instructions are + available. */ +#define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ + && TARGET_VFP && arm_fpu_desc->neon) + +/* "DSP" multiply instructions, eg. SMULxy. */ +#define TARGET_DSP_MULTIPLY \ + (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em)) +/* Integer SIMD instructions, and extend-accumulate instructions. */ +#define TARGET_INT_SIMD \ + (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em)) + +/* Should MOVW/MOVT be used in preference to a constant pool. */ +#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size) + +/* We could use unified syntax for arm mode, but for now we just use it + for Thumb-2. */ +#define TARGET_UNIFIED_ASM TARGET_THUMB2 + +/* Nonzero if this chip provides the DMB instruction. */ +#define TARGET_HAVE_DMB (arm_arch7) + +/* Nonzero if this chip implements a memory barrier via CP15. */ +#define TARGET_HAVE_DMB_MCR (arm_arch6 && ! TARGET_HAVE_DMB \ + && ! TARGET_THUMB1) + +/* Nonzero if this chip implements a memory barrier instruction. */ +#define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) + +/* Nonzero if this chip supports ldrex and strex */ +#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) + +/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}. */ +#define TARGET_HAVE_LDREXBHD ((arm_arch6k && TARGET_ARM) || arm_arch7) + +/* True iff the full BPABI is being used. If TARGET_BPABI is true, + then TARGET_AAPCS_BASED must be true -- but the converse does not + hold. TARGET_BPABI implies the use of the BPABI runtime library, + etc., in addition to just the AAPCS calling conventions. */ +#ifndef TARGET_BPABI +#define TARGET_BPABI false +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-arch is ignored if -march or -mcpu are specified. + --with-cpu is ignored if -march or -mcpu are specified, and is overridden + by --with-arch. + --with-tune is ignored if -mtune or -mcpu are specified (but not affected + by -march). + --with-float is ignored if -mhard-float, -msoft-float or -mfloat-abi are + specified. + --with-fpu is ignored if -mfpu is specified. + --with-abi is ignored is -mabi is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \ + {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}" }, \ + {"float", \ + "%{!msoft-float:%{!mhard-float:%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}}}" }, \ + {"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \ + {"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \ + {"mode", "%{!marm:%{!mthumb:-m%(VALUE)}}"}, + +/* Which floating point model to use. */ +enum arm_fp_model +{ + ARM_FP_MODEL_UNKNOWN, + /* FPA model (Hardware or software). */ + ARM_FP_MODEL_FPA, + /* Cirrus Maverick floating point model. */ + ARM_FP_MODEL_MAVERICK, + /* VFP floating point model. */ + ARM_FP_MODEL_VFP +}; + +enum vfp_reg_type +{ + VFP_NONE = 0, + VFP_REG_D16, + VFP_REG_D32, + VFP_REG_SINGLE +}; + +extern const struct arm_fpu_desc +{ + const char *name; + enum arm_fp_model model; + int rev; + enum vfp_reg_type regs; + int neon; + int fp16; +} *arm_fpu_desc; + +/* Which floating point hardware to schedule for. */ +extern int arm_fpu_attr; + +enum float_abi_type +{ + ARM_FLOAT_ABI_SOFT, + ARM_FLOAT_ABI_SOFTFP, + ARM_FLOAT_ABI_HARD +}; + +extern enum float_abi_type arm_float_abi; + +#ifndef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT +#endif + +/* Which __fp16 format to use. + The enumeration values correspond to the numbering for the + Tag_ABI_FP_16bit_format attribute. + */ +enum arm_fp16_format_type +{ + ARM_FP16_FORMAT_NONE = 0, + ARM_FP16_FORMAT_IEEE = 1, + ARM_FP16_FORMAT_ALTERNATIVE = 2 +}; + +extern enum arm_fp16_format_type arm_fp16_format; +#define LARGEST_EXPONENT_IS_NORMAL(bits) \ + ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + +/* Which ABI to use. */ +enum arm_abi_type +{ + ARM_ABI_APCS, + ARM_ABI_ATPCS, + ARM_ABI_AAPCS, + ARM_ABI_IWMMXT, + ARM_ABI_AAPCS_LINUX +}; + +extern enum arm_abi_type arm_abi; + +#ifndef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_APCS +#endif + +/* Which thread pointer access sequence to use. */ +enum arm_tp_type { + TP_AUTO, + TP_SOFT, + TP_CP15 +}; + +extern enum arm_tp_type target_thread_pointer; + +/* Nonzero if this chip supports the ARM Architecture 3M extensions. */ +extern int arm_arch3m; + +/* Nonzero if this chip supports the ARM Architecture 4 extensions. */ +extern int arm_arch4; + +/* Nonzero if this chip supports the ARM Architecture 4T extensions. */ +extern int arm_arch4t; + +/* Nonzero if this chip supports the ARM Architecture 5 extensions. */ +extern int arm_arch5; + +/* Nonzero if this chip supports the ARM Architecture 5E extensions. */ +extern int arm_arch5e; + +/* Nonzero if this chip supports the ARM Architecture 6 extensions. */ +extern int arm_arch6; + +/* Nonzero if this chip supports the ARM Architecture 6k extensions. */ +extern int arm_arch6k; + +/* Nonzero if this chip supports the ARM Architecture 7 extensions. */ +extern int arm_arch7; + +/* Nonzero if instructions not present in the 'M' profile can be used. */ +extern int arm_arch_notm; + +/* Nonzero if instructions present in ARMv7E-M can be used. */ +extern int arm_arch7em; + +/* Nonzero if this chip can benefit from load scheduling. */ +extern int arm_ld_sched; + +/* Nonzero if generating Thumb code, either Thumb-1 or Thumb-2. */ +extern int thumb_code; + +/* Nonzero if generating Thumb-1 code. */ +extern int thumb1_code; + +/* Nonzero if this chip is a StrongARM. */ +extern int arm_tune_strongarm; + +/* Nonzero if this chip is a Cirrus variant. */ +extern int arm_arch_cirrus; + +/* Nonzero if this chip supports Intel XScale with Wireless MMX technology. */ +extern int arm_arch_iwmmxt; + +/* Nonzero if this chip is an XScale. */ +extern int arm_arch_xscale; + +/* Nonzero if tuning for XScale. */ +extern int arm_tune_xscale; + +/* Nonzero if tuning for stores via the write buffer. */ +extern int arm_tune_wbuf; + +/* Nonzero if tuning for Cortex-A9. */ +extern int arm_tune_cortex_a9; + +/* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around + problems in GLD which doesn't understand that armv5t code is + interworking clean. */ +extern int arm_cpp_interwork; + +/* Nonzero if chip supports Thumb 2. */ +extern int arm_arch_thumb2; + +/* Nonzero if chip supports integer division instruction. */ +extern int arm_arch_hwdiv; + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) +#endif + +/* Nonzero if PIC code requires explicit qualifiers to generate + PLT and GOT relocs rather than the assembler doing so implicitly. + Subtargets can override these if required. */ +#ifndef NEED_GOT_RELOC +#define NEED_GOT_RELOC 0 +#endif +#ifndef NEED_PLT_RELOC +#define NEED_PLT_RELOC 0 +#endif + +/* Nonzero if we need to refer to the GOT with a PC-relative + offset. In other words, generate + + .word _GLOBAL_OFFSET_TABLE_ - [. - (.Lxx + 8)] + + rather than + + .word _GLOBAL_OFFSET_TABLE_ - (.Lxx + 8) + + The default is true, which matches NetBSD. Subtargets can + override this if required. */ +#ifndef GOT_PCREL +#define GOT_PCREL 1 +#endif + +/* Target machine storage Layout. */ + + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +/* It is far faster to zero extend chars than to sign extend them */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + { \ + if (MODE == QImode) \ + UNSIGNEDP = 1; \ + else if (MODE == HImode) \ + UNSIGNEDP = 1; \ + (MODE) = SImode; \ + } + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + Most ARM processors are run in little endian mode, so that is the default. + If you want to have it run-time selectable, change the definition in a + cover file to be TARGET_BIG_ENDIAN. */ +#define BYTES_BIG_ENDIAN (TARGET_BIG_END != 0) + +/* Define this if most significant word of a multiword number is the lowest + numbered. + This is always false, even when in big-endian mode. */ +#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN && ! TARGET_LITTLE_WORDS) + +/* Define this if most significant word of doubles is the lowest numbered. + The rules are different based on whether or not we use FPA-format, + VFP-format or some other floating point co-processor's format doubles. */ +#define FLOAT_WORDS_BIG_ENDIAN (arm_float_words_big_endian ()) + +#define UNITS_PER_WORD 4 + +/* True if natural alignment is used for doubleword types. */ +#define ARM_DOUBLEWORD_ALIGN TARGET_AAPCS_BASED + +#define DOUBLEWORD_ALIGNMENT 64 + +#define PARM_BOUNDARY 32 + +#define STACK_BOUNDARY (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32) + +#define PREFERRED_STACK_BOUNDARY \ + (arm_abi == ARM_ABI_ATPCS ? 64 : STACK_BOUNDARY) + +#define FUNCTION_BOUNDARY ((TARGET_THUMB && optimize_size) ? 16 : 32) + +/* The lowest bit is used to indicate Thumb-mode functions, so the + vbit must go into the delta field of pointers to member + functions. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta + +#define EMPTY_FIELD_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32) + +/* XXX Blah -- this macro is used directly by libobjc. Since it + supports no vector modes, cut out the complexity and fall back + on BIGGEST_FIELD_ALIGNMENT. */ +#ifdef IN_TARGET_LIBS +#define BIGGEST_FIELD_ALIGNMENT 64 +#endif + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT_FACTOR (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2) + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && !optimize_size \ + && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR) \ + ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN)) + +/* Align definitions of arrays, unions and structures so that + initializations and copies can be made more efficient. This is not + ABI-changing, so it only affects places where we can see the + definition. Increasing the alignment tends to introduce padding, + so don't do this when optimizing for size/conserving stack space. */ +#define ARM_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \ + (((COND) && ((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (EXP) == ARRAY_TYPE \ + || TREE_CODE (EXP) == UNION_TYPE \ + || TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) + +/* Align global data. */ +#define DATA_ALIGNMENT(EXP, ALIGN) \ + ARM_EXPAND_ALIGNMENT(!optimize_size, EXP, ALIGN) + +/* Similarly, make sure that objects on the stack are sensibly aligned. */ +#define LOCAL_ALIGNMENT(EXP, ALIGN) \ + ARM_EXPAND_ALIGNMENT(!flag_conserve_stack, EXP, ALIGN) + +/* Setting STRUCTURE_SIZE_BOUNDARY to 32 produces more efficient code, but the + value set in previous versions of this toolchain was 8, which produces more + compact structures. The command line option -mstructure_size_boundary= + can be used to change this value. For compatibility with the ARM SDK + however the value should be left at 32. ARM SDT Reference Manual (ARM DUI + 0020D) page 2-20 says "Structures are aligned on word boundaries". + The AAPCS specifies a value of 8. */ +#define STRUCTURE_SIZE_BOUNDARY arm_structure_size_boundary +extern int arm_structure_size_boundary; + +/* This is the value used to initialize arm_structure_size_boundary. If a + particular arm target wants to change the default value it should change + the definition of this macro, not STRUCTURE_SIZE_BOUNDARY. See netbsd.h + for an example of this. */ +#ifndef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 32 +#endif + +/* Nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* wchar_t is unsigned under the AAPCS. */ +#ifndef WCHAR_TYPE +#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "int") + +#define WCHAR_TYPE_SIZE BITS_PER_WORD +#endif + +#ifndef SIZE_TYPE +#define SIZE_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long unsigned int") +#endif + +#ifndef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int") +#endif + +/* AAPCS requires that structure alignment is affected by bitfields. */ +#ifndef PCC_BITFIELD_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS TARGET_AAPCS_BASED +#endif + + +/* Standard register usage. */ + +/* Register allocation in ARM Procedure Call Standard (as used on RISCiX): + (S - saved over call). + + r0 * argument word/integer result + r1-r3 argument word + + r4-r8 S register variable + r9 S (rfp) register variable (real frame pointer) + + r10 F S (sl) stack limit (used by -mapcs-stack-check) + r11 F S (fp) argument pointer + r12 (ip) temp workspace + r13 F S (sp) lower end of current stack frame + r14 (lr) link address/workspace + r15 F (pc) program counter + + f0 floating point result + f1-f3 floating point scratch + + f4-f7 S floating point variable + + cc This is NOT a real register, but is used internally + to represent things that use or set the condition + codes. + sfp This isn't either. It is used during rtl generation + since the offset between the frame pointer and the + auto's isn't known until after register allocation. + afp Nor this, we only need this because of non-local + goto. Without it fp appears to be used and the + elimination code won't get rid of sfp. It tracks + fp exactly at all times. + + *: See TARGET_CONDITIONAL_REGISTER_USAGE */ + +/* + mvf0 Cirrus floating point result + mvf1-mvf3 Cirrus floating point scratch + mvf4-mvf15 S Cirrus floating point variable. */ + +/* s0-s15 VFP scratch (aka d0-d7). + s16-s31 S VFP variable (aka d8-d15). + vfpcc Not a real register. Represents the VFP condition + code flags. */ + +/* The stack backtrace structure is as follows: + fp points to here: | save code pointer | [fp] + | return link value | [fp, #-4] + | return sp value | [fp, #-8] + | return fp value | [fp, #-12] + [| saved r10 value |] + [| saved r9 value |] + [| saved r8 value |] + [| saved r7 value |] + [| saved r6 value |] + [| saved r5 value |] + [| saved r4 value |] + [| saved r3 value |] + [| saved r2 value |] + [| saved r1 value |] + [| saved r0 value |] + [| saved f7 value |] three words + [| saved f6 value |] three words + [| saved f5 value |] three words + [| saved f4 value |] three words + r0-r3 are not normally saved in a C function. */ + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ +#define FIXED_REGISTERS \ +{ \ + 0,0,0,0,0,0,0,0, \ + 0,0,0,0,0,1,0,1, \ + 0,0,0,0,0,0,0,0, \ + 1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1 \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. + The CC is not preserved over function calls on the ARM 6, so it is + easier to assume this for all. SFP is preserved, since FP is. */ +#define CALL_USED_REGISTERS \ +{ \ + 1,1,1,1,0,0,0,0, \ + 0,0,0,0,1,1,1,1, \ + 1,1,1,1,0,0,0,0, \ + 1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1 \ +} + +#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE +#define SUBTARGET_CONDITIONAL_REGISTER_USAGE +#endif + +/* These are a couple of extensions to the formats accepted + by asm_fprintf: + %@ prints out ASM_COMMENT_START + %r prints out REGISTER_PREFIX reg_names[arg] */ +#define ASM_FPRINTF_EXTENSIONS(FILE, ARGS, P) \ + case '@': \ + fputs (ASM_COMMENT_START, FILE); \ + break; \ + \ + case 'r': \ + fputs (REGISTER_PREFIX, FILE); \ + fputs (reg_names [va_arg (ARGS, int)], FILE); \ + break; + +/* Round X up to the nearest word. */ +#define ROUND_UP_WORD(X) (((X) + 3) & ~3) + +/* Convert fron bytes to ints. */ +#define ARM_NUM_INTS(X) (((X) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* The number of (integer) registers required to hold a quantity of type MODE. + Also used for VFP registers. */ +#define ARM_NUM_REGS(MODE) \ + ARM_NUM_INTS (GET_MODE_SIZE (MODE)) + +/* The number of (integer) registers required to hold a quantity of TYPE MODE. */ +#define ARM_NUM_REGS2(MODE, TYPE) \ + ARM_NUM_INTS ((MODE) == BLKmode ? \ + int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) + +/* The number of (integer) argument register available. */ +#define NUM_ARG_REGS 4 + +/* And similarly for the VFP. */ +#define NUM_VFP_ARG_REGS 16 + +/* Return the register number of the N'th (integer) argument. */ +#define ARG_REGISTER(N) (N - 1) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* The number of the last argument register. */ +#define LAST_ARG_REGNUM ARG_REGISTER (NUM_ARG_REGS) + +/* The numbers of the Thumb register ranges. */ +#define FIRST_LO_REGNUM 0 +#define LAST_LO_REGNUM 7 +#define FIRST_HI_REGNUM 8 +#define LAST_HI_REGNUM 11 + +/* Overridden by config/arm/bpabi.h. */ +#ifndef ARM_UNWIND_INFO +#define ARM_UNWIND_INFO 0 +#endif + +/* Use r0 and r1 to pass exception handling information. */ +#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? N : INVALID_REGNUM) + +/* The register that holds the return address in exception handlers. */ +#define ARM_EH_STACKADJ_REGNUM 2 +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM) + +/* The native (Norcroft) Pascal compiler for the ARM passes the static chain + as an invisible last argument (possible since varargs don't exist in + Pascal), so the following is not true. */ +#define STATIC_CHAIN_REGNUM 12 + +/* Define this to be where the real frame pointer is if it is not possible to + work out the offset between the frame pointer and the automatic variables + until after register allocation has taken place. FRAME_POINTER_REGNUM + should point to a special register that we will make sure is eliminated. + + For the Thumb we have another problem. The TPCS defines the frame pointer + as r11, and GCC believes that it is always possible to use the frame pointer + as base register for addressing purposes. (See comments in + find_reloads_address()). But - the Thumb does not allow high registers, + including r11, to be used as base address registers. Hence our problem. + + The solution used here, and in the old thumb port is to use r7 instead of + r11 as the hard frame pointer and to have special code to generate + backtrace structures on the stack (if required to do so via a command line + option) using r11. This is the only 'user visible' use of r11 as a frame + pointer. */ +#define ARM_HARD_FRAME_POINTER_REGNUM 11 +#define THUMB_HARD_FRAME_POINTER_REGNUM 7 + +#define HARD_FRAME_POINTER_REGNUM \ + (TARGET_ARM \ + ? ARM_HARD_FRAME_POINTER_REGNUM \ + : THUMB_HARD_FRAME_POINTER_REGNUM) + +#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0 +#define HARD_FRAME_POINTER_IS_ARG_POINTER 0 + +#define FP_REGNUM HARD_FRAME_POINTER_REGNUM + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM SP_REGNUM + +/* ARM floating pointer registers. */ +#define FIRST_FPA_REGNUM 16 +#define LAST_FPA_REGNUM 23 +#define IS_FPA_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_FPA_REGNUM) && ((REGNUM) <= LAST_FPA_REGNUM)) + +#define FIRST_IWMMXT_GR_REGNUM 43 +#define LAST_IWMMXT_GR_REGNUM 46 +#define FIRST_IWMMXT_REGNUM 47 +#define LAST_IWMMXT_REGNUM 62 +#define IS_IWMMXT_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM)) +#define IS_IWMMXT_GR_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM)) + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 25 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 26 + +#define FIRST_CIRRUS_FP_REGNUM 27 +#define LAST_CIRRUS_FP_REGNUM 42 +#define IS_CIRRUS_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_CIRRUS_FP_REGNUM) && ((REGNUM) <= LAST_CIRRUS_FP_REGNUM)) + +#define FIRST_VFP_REGNUM 63 +#define D7_VFP_REGNUM 78 /* Registers 77 and 78 == VFP reg D7. */ +#define LAST_VFP_REGNUM \ + (TARGET_VFPD32 ? LAST_HI_VFP_REGNUM : LAST_LO_VFP_REGNUM) + +#define IS_VFP_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_VFP_REGNUM) && ((REGNUM) <= LAST_VFP_REGNUM)) + +/* VFP registers are split into two types: those defined by VFP versions < 3 + have D registers overlaid on consecutive pairs of S registers. VFP version 3 + defines 16 new D registers (d16-d31) which, for simplicity and correctness + in various parts of the backend, we implement as "fake" single-precision + registers (which would be S32-S63, but cannot be used in that way). The + following macros define these ranges of registers. */ +#define LAST_LO_VFP_REGNUM 94 +#define FIRST_HI_VFP_REGNUM 95 +#define LAST_HI_VFP_REGNUM 126 + +#define VFP_REGNO_OK_FOR_SINGLE(REGNUM) \ + ((REGNUM) <= LAST_LO_VFP_REGNUM) + +/* DFmode values are only valid in even register pairs. */ +#define VFP_REGNO_OK_FOR_DOUBLE(REGNUM) \ + ((((REGNUM) - FIRST_VFP_REGNUM) & 1) == 0) + +/* Neon Quad values must start at a multiple of four registers. */ +#define NEON_REGNO_OK_FOR_QUAD(REGNUM) \ + ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0) + +/* Neon structures of vectors must be in even register pairs and there + must be enough registers available. Because of various patterns + requiring quad registers, we require them to start at a multiple of + four. */ +#define NEON_REGNO_OK_FOR_NREGS(REGNUM, N) \ + ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \ + && (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1)) + +/* The number of hard registers is 16 ARM + 8 FPA + 1 CC + 1 SFP + 1 AFP. */ +/* + 16 Cirrus registers take us up to 43. */ +/* Intel Wireless MMX Technology registers add 16 + 4 more. */ +/* VFP (VFP3) adds 32 (64) + 1 more. */ +#define FIRST_PSEUDO_REGISTER 128 + +#define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO) + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may be accessed + via the stack pointer) in functions that seem suitable. + If we have to have a frame pointer we might as well make use of it. + APCS says that the frame pointer does not need to be pushed in leaf + functions, or simple tail call functions. */ + +#ifndef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED 0 +#endif + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + On the ARM regs are UNITS_PER_WORD bits wide; FPA regs can hold any FP + mode. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((TARGET_32BIT \ + && REGNO >= FIRST_FPA_REGNUM \ + && REGNO != FRAME_POINTER_REGNUM \ + && REGNO != ARG_POINTER_REGNUM) \ + && !IS_VFP_REGNUM (REGNO) \ + ? 1 : ARM_NUM_REGS (MODE)) + +/* Return true if REGNO is suitable for holding a quantity of type MODE. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + arm_hard_regno_mode_ok ((REGNO), (MODE)) + +/* Value is 1 if it is a good idea to tie two pseudo registers + when one has mode MODE1 and one has mode MODE2. + If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, + for any hard reg, then this must be 0 for correct output. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) + +#define VALID_IWMMXT_REG_MODE(MODE) \ + (arm_vector_mode_supported_p (MODE) || (MODE) == DImode) + +/* Modes valid for Neon D registers. */ +#define VALID_NEON_DREG_MODE(MODE) \ + ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ + || (MODE) == V2SFmode || (MODE) == DImode) + +/* Modes valid for Neon Q registers. */ +#define VALID_NEON_QREG_MODE(MODE) \ + ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \ + || (MODE) == V4SFmode || (MODE) == V2DImode) + +/* Structure modes valid for Neon registers. */ +#define VALID_NEON_STRUCT_MODE(MODE) \ + ((MODE) == TImode || (MODE) == EImode || (MODE) == OImode \ + || (MODE) == CImode || (MODE) == XImode) + +/* The register numbers in sequence, for passing to arm_gen_load_multiple. */ +extern int arm_regs_in_sequence[]; + +/* The order in which register should be allocated. It is good to use ip + since no saving is required (though calls clobber it) and it never contains + function parameters. It is quite good to use lr since other calls may + clobber it anyway. Allocate r0 through r3 in reverse order since r3 is + least likely to contain a function parameter; in addition results are + returned in r0. + For VFP/VFPv3, allocate D16-D31 first, then caller-saved registers (D0-D7), + then D8-D15. The reason for doing this is to attempt to reduce register + pressure when both single- and double-precision registers are used in a + function. */ + +#define REG_ALLOC_ORDER \ +{ \ + 3, 2, 1, 0, 12, 14, 4, 5, \ + 6, 7, 8, 10, 9, 11, 13, 15, \ + 16, 17, 18, 19, 20, 21, 22, 23, \ + 27, 28, 29, 30, 31, 32, 33, 34, \ + 35, 36, 37, 38, 39, 40, 41, 42, \ + 43, 44, 45, 46, 47, 48, 49, 50, \ + 51, 52, 53, 54, 55, 56, 57, 58, \ + 59, 60, 61, 62, \ + 24, 25, 26, \ + 95, 96, 97, 98, 99, 100, 101, 102, \ + 103, 104, 105, 106, 107, 108, 109, 110, \ + 111, 112, 113, 114, 115, 116, 117, 118, \ + 119, 120, 121, 122, 123, 124, 125, 126, \ + 78, 77, 76, 75, 74, 73, 72, 71, \ + 70, 69, 68, 67, 66, 65, 64, 63, \ + 79, 80, 81, 82, 83, 84, 85, 86, \ + 87, 88, 89, 90, 91, 92, 93, 94, \ + 127 \ +} + +/* Use different register alloc ordering for Thumb. */ +#define ADJUST_REG_ALLOC_ORDER arm_order_regs_for_local_alloc () + +/* Tell IRA to use the order we define rather than messing it up with its + own cost calculations. */ +#define HONOR_REG_ALLOC_ORDER + +/* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ +#define HARD_REGNO_RENAME_OK(SRC, DST) \ + (! IS_INTERRUPT (cfun->machine->func_type) || \ + df_regs_ever_live_p (DST)) + +/* Register and constant classes. */ + +/* Register classes: used to be simple, just all ARM regs or all FPA regs + Now that the Thumb is involved it has become more complicated. */ +enum reg_class +{ + NO_REGS, + FPA_REGS, + CIRRUS_REGS, + VFP_D0_D7_REGS, + VFP_LO_REGS, + VFP_HI_REGS, + VFP_REGS, + IWMMXT_GR_REGS, + IWMMXT_REGS, + LO_REGS, + STACK_REG, + BASE_REGS, + HI_REGS, + CC_REG, + VFPCC_REG, + GENERAL_REGS, + CORE_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "FPA_REGS", \ + "CIRRUS_REGS", \ + "VFP_D0_D7_REGS", \ + "VFP_LO_REGS", \ + "VFP_HI_REGS", \ + "VFP_REGS", \ + "IWMMXT_GR_REGS", \ + "IWMMXT_REGS", \ + "LO_REGS", \ + "STACK_REG", \ + "BASE_REGS", \ + "HI_REGS", \ + "CC_REG", \ + "VFPCC_REG", \ + "GENERAL_REGS", \ + "CORE_REGS", \ + "ALL_REGS", \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x00FF0000, 0x00000000, 0x00000000, 0x00000000 }, /* FPA_REGS */ \ + { 0xF8000000, 0x000007FF, 0x00000000, 0x00000000 }, /* CIRRUS_REGS */ \ + { 0x00000000, 0x80000000, 0x00007FFF, 0x00000000 }, /* VFP_D0_D7_REGS */ \ + { 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000 }, /* VFP_LO_REGS */ \ + { 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_HI_REGS */ \ + { 0x00000000, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF }, /* VFP_REGS */ \ + { 0x00000000, 0x00007800, 0x00000000, 0x00000000 }, /* IWMMXT_GR_REGS */ \ + { 0x00000000, 0x7FFF8000, 0x00000000, 0x00000000 }, /* IWMMXT_REGS */ \ + { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */ \ + { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ + { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */ \ + { 0x0000DF00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \ + { 0x01000000, 0x00000000, 0x00000000, 0x00000000 }, /* CC_REG */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x80000000 }, /* VFPCC_REG */ \ + { 0x0000DFFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \ + { 0x0000FFFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \ + { 0xFAFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF } /* ALL_REGS */ \ +} + +/* Any of the VFP register classes. */ +#define IS_VFP_CLASS(X) \ + ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \ + || (X) == VFP_HI_REGS || (X) == VFP_REGS) + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ +#define REGNO_REG_CLASS(REGNO) arm_regno_class (REGNO) + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, FPA_REGS, CIRRUS_REGS, VFP_REGS, IWMMXT_GR_REGS, IWMMXT_REGS,\ + LIM_REG_CLASSES \ +} + +/* FPA registers can't do subreg as all values are reformatted to internal + precision. VFP registers may only be accessed in the mode they + were set. */ +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ + ? reg_classes_intersect_p (FPA_REGS, (CLASS)) \ + || reg_classes_intersect_p (VFP_REGS, (CLASS)) \ + : 0) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS (TARGET_THUMB1 ? LO_REGS : GENERAL_REGS) +#define BASE_REG_CLASS (TARGET_THUMB1 ? LO_REGS : CORE_REGS) + +/* For the Thumb the high registers cannot be used as base registers + when addressing quantities in QI or HI mode; if we don't know the + mode, then we must be conservative. */ +#define MODE_BASE_REG_CLASS(MODE) \ + (TARGET_32BIT ? CORE_REGS : \ + (((MODE) == SImode) ? BASE_REGS : LO_REGS)) + +/* For Thumb we can not support SP+reg addressing, so we return LO_REGS + instead of BASE_REGS. */ +#define MODE_BASE_REG_REG_CLASS(MODE) BASE_REG_CLASS + +/* When this hook returns true for MODE, the compiler allows + registers explicitly used in the rtl to be used as spill registers + but prevents the compiler from extending the lifetime of these + registers. */ +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ + arm_small_register_classes_for_mode_p + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS, but for the Thumb core registers and + immediate constants we prefer a LO_REGS class or a subset. */ +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + (TARGET_32BIT ? (CLASS) : \ + ((CLASS) == GENERAL_REGS || (CLASS) == HI_REGS \ + || (CLASS) == NO_REGS || (CLASS) == STACK_REG \ + ? LO_REGS : (CLASS))) + +/* Must leave BASE_REGS reloads alone */ +#define THUMB_SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ + ((CLASS) != LO_REGS && (CLASS) != BASE_REGS \ + ? ((true_regnum (X) == -1 ? LO_REGS \ + : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS \ + : NO_REGS)) \ + : NO_REGS) + +#define THUMB_SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ + ((CLASS) != LO_REGS && (CLASS) != BASE_REGS \ + ? ((true_regnum (X) == -1 ? LO_REGS \ + : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS \ + : NO_REGS)) \ + : NO_REGS) + +/* Return the register class of a scratch register needed to copy IN into + or out of a register in CLASS in MODE. If it can be done directly, + NO_REGS is returned. */ +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ + /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \ + ((TARGET_VFP && TARGET_HARD_FLOAT \ + && IS_VFP_CLASS (CLASS)) \ + ? coproc_secondary_reload_class (MODE, X, FALSE) \ + : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) \ + ? coproc_secondary_reload_class (MODE, X, TRUE) \ + : TARGET_32BIT \ + ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \ + ? GENERAL_REGS : NO_REGS) \ + : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X)) + +/* If we need to load shorts byte-at-a-time, then we need a scratch. */ +#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ + /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \ + ((TARGET_VFP && TARGET_HARD_FLOAT \ + && IS_VFP_CLASS (CLASS)) \ + ? coproc_secondary_reload_class (MODE, X, FALSE) : \ + (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ? \ + coproc_secondary_reload_class (MODE, X, TRUE) : \ + /* Cannot load constants into Cirrus registers. */ \ + (TARGET_MAVERICK && TARGET_HARD_FLOAT \ + && (CLASS) == CIRRUS_REGS \ + && (CONSTANT_P (X) || GET_CODE (X) == SYMBOL_REF)) \ + ? GENERAL_REGS : \ + (TARGET_32BIT ? \ + (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS) \ + && CONSTANT_P (X)) \ + ? GENERAL_REGS : \ + (((MODE) == HImode && ! arm_arch4 \ + && (GET_CODE (X) == MEM \ + || ((GET_CODE (X) == REG || GET_CODE (X) == SUBREG) \ + && true_regnum (X) == -1))) \ + ? GENERAL_REGS : NO_REGS) \ + : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X))) + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. + + For the ARM, we wish to handle large displacements off a base + register by splitting the addend across a MOV and the mem insn. + This can cut the number of reloads needed. */ +#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN) \ + do \ + { \ + if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND)) \ + goto WIN; \ + } \ + while (0) + +/* XXX If an HImode FP+large_offset address is converted to an HImode + SP+large_offset address, then reload won't know how to fix it. It sees + only that SP isn't valid for HImode, and so reloads the SP into an index + register, but the resulting address is still invalid because the offset + is too big. We fix it here instead by reloading the entire address. */ +/* We could probably achieve better results by defining PROMOTE_MODE to help + cope with the variances between the Thumb's signed and unsigned byte and + halfword load instructions. */ +/* ??? This should be safe for thumb2, but we may be able to do better. */ +#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ +do { \ + rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ +} while (0) + +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \ + if (TARGET_ARM) \ + ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \ + else \ + THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. + ARM regs are UNITS_PER_WORD bits while FPA regs can hold any FP mode */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (((CLASS) == FPA_REGS || (CLASS) == CIRRUS_REGS) ? 1 : ARM_NUM_REGS (MODE)) + +/* If defined, gives a class of registers that cannot be used as the + operand of a SUBREG that changes the mode of the object illegally. */ + +/* Moves between FPA_REGS and GENERAL_REGS are two memory insns. + Moves between VFP_REGS and GENERAL_REGS are a single insn, but + it is typically more expensive than a single memory access. We set + the cost to less than two memory accesses so that floating + point to integer conversion does not go through memory. */ +#define REGISTER_MOVE_COST(MODE, FROM, TO) \ + (TARGET_32BIT ? \ + ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 : \ + (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 : \ + IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 15 : \ + !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 15 : \ + (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 : \ + (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 : \ + (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 : \ + (FROM) == CIRRUS_REGS && (TO) != CIRRUS_REGS ? 20 : \ + (FROM) != CIRRUS_REGS && (TO) == CIRRUS_REGS ? 20 : \ + 2) \ + : \ + ((FROM) == HI_REGS || (TO) == HI_REGS) ? 4 : 2) + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* The amount of scratch space needed by _interwork_{r7,r11}_call_via_rN(). + When present, it is one word in size, and sits at the top of the frame, + between the soft frame pointer and either r7 or r11. + + We only need _interwork_rM_call_via_rN() for -mcaller-super-interworking, + and only then if some outgoing arguments are passed on the stack. It would + be tempting to also check whether the stack arguments are passed by indirect + calls, but there seems to be no reason in principle why a post-reload pass + couldn't convert a direct call into an indirect one. */ +#define CALLER_INTERWORKING_SLOT_SIZE \ + (TARGET_CALLER_INTERWORKING \ + && crtl->outgoing_args_size != 0 \ + ? UNITS_PER_WORD : 0) + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. */ +/* The push insns do not do this rounding implicitly. + So don't define this. */ +/* #define PUSH_ROUNDING(NPUSHED) ROUND_UP_WORD (NPUSHED) */ + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) (TARGET_ARM ? 4 : 0) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +#define LIBCALL_VALUE(MODE) \ + (TARGET_AAPCS_BASED ? aapcs_libcall_value (MODE) \ + : (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \ + && GET_MODE_CLASS (MODE) == MODE_FLOAT) \ + ? gen_rtx_REG (MODE, FIRST_FPA_REGNUM) \ + : TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK \ + && GET_MODE_CLASS (MODE) == MODE_FLOAT \ + ? gen_rtx_REG (MODE, FIRST_CIRRUS_FP_REGNUM) \ + : TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (MODE) \ + ? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM) \ + : gen_rtx_REG (MODE, ARG_REGISTER (1))) + +/* 1 if REGNO is a possible register number for a function value. */ +#define FUNCTION_VALUE_REGNO_P(REGNO) \ + ((REGNO) == ARG_REGISTER (1) \ + || (TARGET_AAPCS_BASED && TARGET_32BIT \ + && TARGET_VFP && TARGET_HARD_FLOAT \ + && (REGNO) == FIRST_VFP_REGNUM) \ + || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \ + && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \ + || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \ + || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \ + && TARGET_HARD_FLOAT_ABI && TARGET_FPA)) + +/* Amount of memory needed for an untyped call to save all possible return + registers. */ +#define APPLY_RESULT_SIZE arm_apply_result_size() + +/* Define DEFAULT_PCC_STRUCT_RETURN to 1 if all structure and union return + values must be in memory. On the ARM, they need only do so if larger + than a word, or if they contain elements offset from zero in the struct. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* These bits describe the different types of function supported + by the ARM backend. They are exclusive. i.e. a function cannot be both a + normal function and an interworked function, for example. Knowing the + type of a function is important for determining its prologue and + epilogue sequences. + Note value 7 is currently unassigned. Also note that the interrupt + function types all have bit 2 set, so that they can be tested for easily. + Note that 0 is deliberately chosen for ARM_FT_UNKNOWN so that when the + machine_function structure is initialized (to zero) func_type will + default to unknown. This will force the first use of arm_current_func_type + to call arm_compute_func_type. */ +#define ARM_FT_UNKNOWN 0 /* Type has not yet been determined. */ +#define ARM_FT_NORMAL 1 /* Your normal, straightforward function. */ +#define ARM_FT_INTERWORKED 2 /* A function that supports interworking. */ +#define ARM_FT_ISR 4 /* An interrupt service routine. */ +#define ARM_FT_FIQ 5 /* A fast interrupt service routine. */ +#define ARM_FT_EXCEPTION 6 /* An ARM exception handler (subcase of ISR). */ + +#define ARM_FT_TYPE_MASK ((1 << 3) - 1) + +/* In addition functions can have several type modifiers, + outlined by these bit masks: */ +#define ARM_FT_INTERRUPT (1 << 2) /* Note overlap with FT_ISR and above. */ +#define ARM_FT_NAKED (1 << 3) /* No prologue or epilogue. */ +#define ARM_FT_VOLATILE (1 << 4) /* Does not return. */ +#define ARM_FT_NESTED (1 << 5) /* Embedded inside another func. */ +#define ARM_FT_STACKALIGN (1 << 6) /* Called with misaligned stack. */ + +/* Some macros to test these flags. */ +#define ARM_FUNC_TYPE(t) (t & ARM_FT_TYPE_MASK) +#define IS_INTERRUPT(t) (t & ARM_FT_INTERRUPT) +#define IS_VOLATILE(t) (t & ARM_FT_VOLATILE) +#define IS_NAKED(t) (t & ARM_FT_NAKED) +#define IS_NESTED(t) (t & ARM_FT_NESTED) +#define IS_STACKALIGN(t) (t & ARM_FT_STACKALIGN) + + +/* Structure used to hold the function stack frame layout. Offsets are + relative to the stack pointer on function entry. Positive offsets are + in the direction of stack growth. + Only soft_frame is used in thumb mode. */ + +typedef struct GTY(()) arm_stack_offsets +{ + int saved_args; /* ARG_POINTER_REGNUM. */ + int frame; /* ARM_HARD_FRAME_POINTER_REGNUM. */ + int saved_regs; + int soft_frame; /* FRAME_POINTER_REGNUM. */ + int locals_base; /* THUMB_HARD_FRAME_POINTER_REGNUM. */ + int outgoing_args; /* STACK_POINTER_REGNUM. */ + unsigned int saved_regs_mask; +} +arm_stack_offsets; + +#ifndef GENERATOR_FILE +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +typedef struct GTY(()) machine_function +{ + /* Additional stack adjustment in __builtin_eh_throw. */ + rtx eh_epilogue_sp_ofs; + /* Records if LR has to be saved for far jumps. */ + int far_jump_used; + /* Records if ARG_POINTER was ever live. */ + int arg_pointer_live; + /* Records if the save of LR has been eliminated. */ + int lr_save_eliminated; + /* The size of the stack frame. Only valid after reload. */ + arm_stack_offsets stack_offsets; + /* Records the type of the current function. */ + unsigned long func_type; + /* Record if the function has a variable argument list. */ + int uses_anonymous_args; + /* Records if sibcalls are blocked because an argument + register is needed to preserve stack alignment. */ + int sibcall_blocked; + /* The PIC register for this function. This might be a pseudo. */ + rtx pic_reg; + /* Labels for per-function Thumb call-via stubs. One per potential calling + register. We can never call via LR or PC. We can call via SP if a + trampoline happens to be on the top of the stack. */ + rtx call_via[14]; + /* Set to 1 when a return insn is output, this means that the epilogue + is not needed. */ + int return_used_this_function; + /* When outputting Thumb-1 code, record the last insn that provides + information about condition codes, and the comparison operands. */ + rtx thumb1_cc_insn; + rtx thumb1_cc_op0; + rtx thumb1_cc_op1; + /* Also record the CC mode that is supported. */ + enum machine_mode thumb1_cc_mode; +} +machine_function; +#endif + +/* As in the machine_function, a global set of call-via labels, for code + that is in text_section. */ +extern GTY(()) rtx thumb_call_via_label[14]; + +/* The number of potential ways of assigning to a co-processor. */ +#define ARM_NUM_COPROC_SLOTS 1 + +/* Enumeration of procedure calling standard variants. We don't really + support all of these yet. */ +enum arm_pcs +{ + ARM_PCS_AAPCS, /* Base standard AAPCS. */ + ARM_PCS_AAPCS_VFP, /* Use VFP registers for floating point values. */ + ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors. */ + /* This must be the last AAPCS variant. */ + ARM_PCS_AAPCS_LOCAL, /* Private call within this compilation unit. */ + ARM_PCS_ATPCS, /* ATPCS. */ + ARM_PCS_APCS, /* APCS (legacy Linux etc). */ + ARM_PCS_UNKNOWN +}; + +/* Default procedure calling standard of current compilation unit. */ +extern enum arm_pcs arm_pcs_default; + +/* A C type for declaring a variable that is used as the first argument of + `FUNCTION_ARG' and other related values. */ +typedef struct +{ + /* This is the number of registers of arguments scanned so far. */ + int nregs; + /* This is the number of iWMMXt register arguments scanned so far. */ + int iwmmxt_nregs; + int named_count; + int nargs; + /* Which procedure call variant to use for this call. */ + enum arm_pcs pcs_variant; + + /* AAPCS related state tracking. */ + int aapcs_arg_processed; /* No need to lay out this argument again. */ + int aapcs_cprc_slot; /* Index of co-processor rules to handle + this argument, or -1 if using core + registers. */ + int aapcs_ncrn; + int aapcs_next_ncrn; + rtx aapcs_reg; /* Register assigned to this argument. */ + int aapcs_partial; /* How many bytes are passed in regs (if + split between core regs and stack. + Zero otherwise. */ + int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS]; + int can_split; /* Argument can be split between core regs + and the stack. */ + /* Private data for tracking VFP register allocation */ + unsigned aapcs_vfp_regs_free; + unsigned aapcs_vfp_reg_alloc; + int aapcs_vfp_rcount; + MACHMODE aapcs_vfp_rmode; +} CUMULATIVE_ARGS; + +#define FUNCTION_ARG_PADDING(MODE, TYPE) \ + (arm_pad_arg_upward (MODE, TYPE) ? upward : downward) + +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (arm_pad_reg_upward (MODE, TYPE, FIRST) ? upward : downward) + +/* For AAPCS, padding should never be below the argument. For other ABIs, + * mimic the default. */ +#define PAD_VARARGS_DOWN \ + ((TARGET_AAPCS_BASED) ? 0 : BYTES_BIG_ENDIAN) + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. + On the ARM, the offset starts at 0. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + arm_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL)) + +/* 1 if N is a possible register number for function argument passing. + On the ARM, r0-r3 are used to pass args. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + (IN_RANGE ((REGNO), 0, 3) \ + || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT \ + && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \ + || (TARGET_IWMMXT_ABI \ + && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9))) + + +/* If your target environment doesn't prefix user functions with an + underscore, you may wish to re-define this to prevent any conflicts. */ +#ifndef ARM_MCOUNT_NAME +#define ARM_MCOUNT_NAME "*mcount" +#endif + +/* Call the function profiler with a given profile label. The Acorn + compiler puts this BEFORE the prolog but gcc puts it afterwards. + On the ARM the full profile code will look like: + .data + LP1 + .word 0 + .text + mov ip, lr + bl mcount + .word LP1 + + profile_function() in final.c outputs the .data section, FUNCTION_PROFILER + will output the .text section. + + The ``mov ip,lr'' seems like a good idea to stick with cc convention. + ``prof'' doesn't seem to mind about this! + + Note - this version of the code is designed to work in both ARM and + Thumb modes. */ +#ifndef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + char temp[20]; \ + rtx sym; \ + \ + asm_fprintf (STREAM, "\tmov\t%r, %r\n\tbl\t", \ + IP_REGNUM, LR_REGNUM); \ + assemble_name (STREAM, ARM_MCOUNT_NAME); \ + fputc ('\n', STREAM); \ + ASM_GENERATE_INTERNAL_LABEL (temp, "LP", LABELNO); \ + sym = gen_rtx_SYMBOL_REF (Pmode, temp); \ + assemble_aligned_integer (UNITS_PER_WORD, sym); \ +} +#endif + +#ifdef THUMB_FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM, LABELNO) \ + if (TARGET_ARM) \ + ARM_FUNCTION_PROFILER (STREAM, LABELNO) \ + else \ + THUMB_FUNCTION_PROFILER (STREAM, LABELNO) +#else +#define FUNCTION_PROFILER(STREAM, LABELNO) \ + ARM_FUNCTION_PROFILER (STREAM, LABELNO) +#endif + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. + + On the ARM, the function epilogue recovers the stack pointer from the + frame. */ +#define EXIT_IGNORE_STACK 1 + +#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNUM) + +/* Determine if the epilogue should be output as RTL. + You should override this if you define FUNCTION_EXTRA_EPILOGUE. */ +#define USE_RETURN_INSN(ISCOND) \ + (TARGET_32BIT ? use_return_insn (ISCOND, NULL) : 0) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + We have two registers that can be eliminated on the ARM. First, the + arg pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the pseudo frame pointer register can always + be eliminated; it is replaced with either the stack or the real frame + pointer. Note we have to use {ARM|THUMB}_HARD_FRAME_POINTER_REGNUM + because the definition of HARD_FRAME_POINTER_REGNUM is not a constant. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },\ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },\ + { ARG_POINTER_REGNUM, ARM_HARD_FRAME_POINTER_REGNUM },\ + { ARG_POINTER_REGNUM, THUMB_HARD_FRAME_POINTER_REGNUM },\ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },\ + { FRAME_POINTER_REGNUM, ARM_HARD_FRAME_POINTER_REGNUM },\ + { FRAME_POINTER_REGNUM, THUMB_HARD_FRAME_POINTER_REGNUM }} + +/* Define the offset between two registers, one to be eliminated, and the + other its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + if (TARGET_ARM) \ + (OFFSET) = arm_compute_initial_elimination_offset (FROM, TO); \ + else \ + (OFFSET) = thumb_compute_initial_elimination_offset (FROM, TO) + +/* Special case handling of the location of arguments passed on the stack. */ +#define DEBUGGER_ARG_OFFSET(value, addr) value ? value : arm_debugger_arg_offset (value, addr) + +/* Initialize data used by insn expanders. This is called from insn_emit, + once for every function before code is generated. */ +#define INIT_EXPANDERS arm_init_expanders () + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE (TARGET_32BIT ? 16 : 20) + +/* Alignment required for a trampoline in bits. */ +#define TRAMPOLINE_ALIGNMENT 32 + +/* Addressing modes, and classification of registers for them. */ +#define HAVE_POST_INCREMENT 1 +#define HAVE_PRE_INCREMENT TARGET_32BIT +#define HAVE_POST_DECREMENT TARGET_32BIT +#define HAVE_PRE_DECREMENT TARGET_32BIT +#define HAVE_PRE_MODIFY_DISP TARGET_32BIT +#define HAVE_POST_MODIFY_DISP TARGET_32BIT +#define HAVE_PRE_MODIFY_REG TARGET_32BIT +#define HAVE_POST_MODIFY_REG TARGET_32BIT + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ +#define TEST_REGNO(R, TEST, VALUE) \ + ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE)) + +/* Don't allow the pc to be used. */ +#define ARM_REGNO_OK_FOR_BASE_P(REGNO) \ + (TEST_REGNO (REGNO, <, PC_REGNUM) \ + || TEST_REGNO (REGNO, ==, FRAME_POINTER_REGNUM) \ + || TEST_REGNO (REGNO, ==, ARG_POINTER_REGNUM)) + +#define THUMB1_REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + (TEST_REGNO (REGNO, <=, LAST_LO_REGNUM) \ + || (GET_MODE_SIZE (MODE) >= 4 \ + && TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM))) + +#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + (TARGET_THUMB1 \ + ? THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO, MODE) \ + : ARM_REGNO_OK_FOR_BASE_P (REGNO)) + +/* Nonzero if X can be the base register in a reg+reg addressing mode. + For Thumb, we can not use SP + reg, so reject SP. */ +#define REGNO_MODE_OK_FOR_REG_BASE_P(X, MODE) \ + REGNO_MODE_OK_FOR_BASE_P (X, QImode) + +/* For ARM code, we don't care about the mode, but for Thumb, the index + must be suitable for use in a QImode load. */ +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + (REGNO_MODE_OK_FOR_BASE_P (REGNO, QImode) \ + && !TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM)) + +/* Maximum number of registers that can appear in a valid memory address. + Shifts in addresses can't be by a register. */ +#define MAX_REGS_PER_ADDRESS 2 + +/* Recognize any constant value that is a valid address. */ +/* XXX We can address any constant, eventually... */ +/* ??? Should the TARGET_ARM here also apply to thumb2? */ +#define CONSTANT_ADDRESS_P(X) \ + (GET_CODE (X) == SYMBOL_REF \ + && (CONSTANT_POOL_ADDRESS_P (X) \ + || (TARGET_ARM && optimize > 0 && SYMBOL_REF_FLAG (X)))) + +/* True if SYMBOL + OFFSET constants must refer to something within + SYMBOL's section. */ +#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0 + +/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32. */ +#ifndef TARGET_DEFAULT_WORD_RELOCATIONS +#define TARGET_DEFAULT_WORD_RELOCATIONS 0 +#endif + +/* Nonzero if the constant value X is a legitimate general operand. + It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. + + On the ARM, allow any integer (invalid ones are removed later by insn + patterns), nice doubles and symbol_refs which refer to the function's + constant pool XXX. + + When generating pic allow anything. */ +#define ARM_LEGITIMATE_CONSTANT_P(X) (flag_pic || ! label_mentioned_p (X)) + +#define THUMB_LEGITIMATE_CONSTANT_P(X) \ + ( GET_CODE (X) == CONST_INT \ + || GET_CODE (X) == CONST_DOUBLE \ + || CONSTANT_ADDRESS_P (X) \ + || flag_pic) + +#define LEGITIMATE_CONSTANT_P(X) \ + (!arm_cannot_force_const_mem (X) \ + && (TARGET_32BIT ? ARM_LEGITIMATE_CONSTANT_P (X) \ + : THUMB_LEGITIMATE_CONSTANT_P (X))) + +#ifndef SUBTARGET_NAME_ENCODING_LENGTHS +#define SUBTARGET_NAME_ENCODING_LENGTHS +#endif + +/* This is a C fragment for the inside of a switch statement. + Each case label should return the number of characters to + be stripped from the start of a function's name, if that + name starts with the indicated character. */ +#define ARM_NAME_ENCODING_LENGTHS \ + case '*': return 1; \ + SUBTARGET_NAME_ENCODING_LENGTHS + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + arm_asm_output_labelref (FILE, NAME) + +/* Output IT instructions for conditionally executed Thumb-2 instructions. */ +#define ASM_OUTPUT_OPCODE(STREAM, PTR) \ + if (TARGET_THUMB2) \ + thumb2_asm_output_opcode (STREAM); + +/* The EABI specifies that constructors should go in .init_array. + Other targets use .ctors for compatibility. */ +#ifndef ARM_EABI_CTORS_SECTION_OP +#define ARM_EABI_CTORS_SECTION_OP \ + "\t.section\t.init_array,\"aw\",%init_array" +#endif +#ifndef ARM_EABI_DTORS_SECTION_OP +#define ARM_EABI_DTORS_SECTION_OP \ + "\t.section\t.fini_array,\"aw\",%fini_array" +#endif +#define ARM_CTORS_SECTION_OP \ + "\t.section\t.ctors,\"aw\",%progbits" +#define ARM_DTORS_SECTION_OP \ + "\t.section\t.dtors,\"aw\",%progbits" + +/* Define CTORS_SECTION_ASM_OP. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP +#ifndef IN_LIBGCC2 +# define CTORS_SECTION_ASM_OP \ + (TARGET_AAPCS_BASED ? ARM_EABI_CTORS_SECTION_OP : ARM_CTORS_SECTION_OP) +# define DTORS_SECTION_ASM_OP \ + (TARGET_AAPCS_BASED ? ARM_EABI_DTORS_SECTION_OP : ARM_DTORS_SECTION_OP) +#else /* !defined (IN_LIBGCC2) */ +/* In libgcc, CTORS_SECTION_ASM_OP must be a compile-time constant, + so we cannot use the definition above. */ +# ifdef __ARM_EABI__ +/* The .ctors section is not part of the EABI, so we do not define + CTORS_SECTION_ASM_OP when in libgcc; that prevents crtstuff + from trying to use it. We do define it when doing normal + compilation, as .init_array can be used instead of .ctors. */ +/* There is no need to emit begin or end markers when using + init_array; the dynamic linker will compute the size of the + array itself based on special symbols created by the static + linker. However, we do need to arrange to set up + exception-handling here. */ +# define CTOR_LIST_BEGIN asm (ARM_EABI_CTORS_SECTION_OP) +# define CTOR_LIST_END /* empty */ +# define DTOR_LIST_BEGIN asm (ARM_EABI_DTORS_SECTION_OP) +# define DTOR_LIST_END /* empty */ +# else /* !defined (__ARM_EABI__) */ +# define CTORS_SECTION_ASM_OP ARM_CTORS_SECTION_OP +# define DTORS_SECTION_ASM_OP ARM_DTORS_SECTION_OP +# endif /* !defined (__ARM_EABI__) */ +#endif /* !defined (IN_LIBCC2) */ + +/* True if the operating system can merge entities with vague linkage + (e.g., symbols in COMDAT group) during dynamic linking. */ +#ifndef TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P +#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P true +#endif + +#define ARM_OUTPUT_FN_UNWIND(F, PROLOGUE) arm_output_fn_unwind (F, PROLOGUE) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + Thumb-2 has the same restrictions as arm. */ +#ifndef REG_OK_STRICT + +#define ARM_REG_OK_FOR_BASE_P(X) \ + (REGNO (X) <= LAST_ARM_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) == ARG_POINTER_REGNUM) + +#define ARM_REG_OK_FOR_INDEX_P(X) \ + ((REGNO (X) <= LAST_ARM_REGNUM \ + && REGNO (X) != STACK_POINTER_REGNUM) \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) == ARG_POINTER_REGNUM) + +#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE) \ + (REGNO (X) <= LAST_LO_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || (GET_MODE_SIZE (MODE) >= 4 \ + && (REGNO (X) == STACK_POINTER_REGNUM \ + || (X) == hard_frame_pointer_rtx \ + || (X) == arg_pointer_rtx))) + +#define REG_STRICT_P 0 + +#else /* REG_OK_STRICT */ + +#define ARM_REG_OK_FOR_BASE_P(X) \ + ARM_REGNO_OK_FOR_BASE_P (REGNO (X)) + +#define ARM_REG_OK_FOR_INDEX_P(X) \ + ARM_REGNO_OK_FOR_INDEX_P (REGNO (X)) + +#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE) \ + THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO (X), MODE) + +#define REG_STRICT_P 1 + +#endif /* REG_OK_STRICT */ + +/* Now define some helpers in terms of the above. */ + +#define REG_MODE_OK_FOR_BASE_P(X, MODE) \ + (TARGET_THUMB1 \ + ? THUMB1_REG_MODE_OK_FOR_BASE_P (X, MODE) \ + : ARM_REG_OK_FOR_BASE_P (X)) + +/* For 16-bit Thumb, a valid index register is anything that can be used in + a byte load instruction. */ +#define THUMB1_REG_OK_FOR_INDEX_P(X) \ + THUMB1_REG_MODE_OK_FOR_BASE_P (X, QImode) + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. On the Thumb, the stack pointer + is not suitable. */ +#define REG_OK_FOR_INDEX_P(X) \ + (TARGET_THUMB1 \ + ? THUMB1_REG_OK_FOR_INDEX_P (X) \ + : ARM_REG_OK_FOR_INDEX_P (X)) + +/* Nonzero if X can be the base register in a reg+reg addressing mode. + For Thumb, we can not use SP + reg, so reject SP. */ +#define REG_MODE_OK_FOR_REG_BASE_P(X, MODE) \ + REG_OK_FOR_INDEX_P (X) + +#define ARM_BASE_REGISTER_RTX_P(X) \ + (GET_CODE (X) == REG && ARM_REG_OK_FOR_BASE_P (X)) + +#define ARM_INDEX_REGISTER_RTX_P(X) \ + (GET_CODE (X) == REG && ARM_REG_OK_FOR_INDEX_P (X)) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE Pmode + +#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \ + || (TARGET_THUMB1 \ + && (optimize_size || flag_pic))) + +#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ + (TARGET_THUMB1 \ + ? (min >= 0 && max < 512 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \ + : min >= -256 && max < 256 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode) \ + : min >= 0 && max < 8192 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode) \ + : min >= -4096 && max < 4096 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ + : SImode) \ + : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode \ + : (max >= 0x200) ? HImode \ + : QImode)) + +/* signed 'char' is most compatible, but RISC OS wants it unsigned. + unsigned is probably best, but may break some code. */ +#ifndef DEFAULT_SIGNED_CHAR +#define DEFAULT_SIGNED_CHAR 0 +#endif + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 + +#undef MOVE_RATIO +#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) \ + (TARGET_THUMB ? ZERO_EXTEND : \ + ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND \ + : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN))) + +/* Nonzero if access to memory by bytes is slow and undesirable. */ +#define SLOW_BYTE_ACCESS 0 + +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 + +/* Immediate shift counts are truncated by the output routines (or was it + the assembler?). Shift counts in a register are truncated by ARM. Note + that the native compiler puts too large (> 32) immediate shift counts + into a register and shifts by the register, letting the ARM decide what + to do instead of doing that itself. */ +/* This is all wrong. Defining SHIFT_COUNT_TRUNCATED tells combine that + code like (X << (Y % 32)) for register X, Y is equivalent to (X << Y). + On the arm, Y in a register is used modulo 256 for the shift. Only for + rotates is modulo 32 used. */ +/* #define SHIFT_COUNT_TRUNCATED 1 */ + +/* All integers have the same format so truncation is easy. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Calling from registers is a massive pain. */ +#define NO_FUNCTION_CSE 1 + +/* The machine modes of pointers and functions */ +#define Pmode SImode +#define FUNCTION_MODE Pmode + +#define ARM_FRAME_RTX(X) \ + ( (X) == frame_pointer_rtx || (X) == stack_pointer_rtx \ + || (X) == arg_pointer_rtx) + +/* Moves to and from memory are quite expensive */ +#define MEMORY_MOVE_COST(M, CLASS, IN) \ + (TARGET_32BIT ? 10 : \ + ((GET_MODE_SIZE (M) < 4 ? 8 : 2 * GET_MODE_SIZE (M)) \ + * (CLASS == LO_REGS ? 1 : 2))) + +/* Try to generate sequences that don't involve branches, we can then use + conditional instructions */ +#define BRANCH_COST(speed_p, predictable_p) \ + (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0)) + +/* Position Independent Code. */ +/* We decide which register to use based on the compilation options and + the assembler in use; this is more general than the APCS restriction of + using sb (r9) all the time. */ +extern unsigned arm_pic_register; + +/* The register number of the register used to address a table of static + data addresses in memory. */ +#define PIC_OFFSET_TABLE_REGNUM arm_pic_register + +/* We can't directly access anything that contains a symbol, + nor can we indirect via the constant pool. One exception is + UNSPEC_TLS, which is always PIC. */ +#define LEGITIMATE_PIC_OPERAND_P(X) \ + (!(symbol_mentioned_p (X) \ + || label_mentioned_p (X) \ + || (GET_CODE (X) == SYMBOL_REF \ + && CONSTANT_POOL_ADDRESS_P (X) \ + && (symbol_mentioned_p (get_pool_constant (X)) \ + || label_mentioned_p (get_pool_constant (X))))) \ + || tls_mentioned_p (X)) + +/* We need to know when we are making a constant pool; this determines + whether data needs to be in the GOT or can be referenced via a GOT + offset. */ +extern int making_const_table; + +/* Handle pragmas for compatibility with Intel's compilers. */ +/* Also abuse this to register additional C specific EABI attributes. */ +#define REGISTER_TARGET_PRAGMAS() do { \ + c_register_pragma (0, "long_calls", arm_pr_long_calls); \ + c_register_pragma (0, "no_long_calls", arm_pr_no_long_calls); \ + c_register_pragma (0, "long_calls_off", arm_pr_long_calls_off); \ + arm_lang_object_attributes_init(); \ +} while (0) + +/* Condition code information. */ +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ + +#define SELECT_CC_MODE(OP, X, Y) arm_select_cc_mode (OP, X, Y) + +#define REVERSIBLE_CC_MODE(MODE) 1 + +#define REVERSE_CONDITION(CODE,MODE) \ + (((MODE) == CCFPmode || (MODE) == CCFPEmode) \ + ? reverse_condition_maybe_unordered (code) \ + : reverse_condition (code)) + +#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ + (CODE) = arm_canonicalize_comparison (CODE, &(OP0), &(OP1)) + +/* The arm5 clz instruction returns 32. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) + +#define CC_STATUS_INIT \ + do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0) + +#undef ASM_APP_OFF +#define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \ + TARGET_THUMB2 ? "\t.thumb\n" : "") + +/* Output a push or a pop instruction (only used when profiling). + We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1. We know + that ASM_OUTPUT_REG_PUSH will be matched with ASM_OUTPUT_REG_POP, and + that r7 isn't used by the function profiler, so we can use it as a + scratch reg. WARNING: This isn't safe in the general case! It may be + sensitive to future changes in final.c:profile_function. */ +#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \ + do \ + { \ + if (TARGET_ARM) \ + asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n", \ + STACK_POINTER_REGNUM, REGNO); \ + else if (TARGET_THUMB1 \ + && (REGNO) == STATIC_CHAIN_REGNUM) \ + { \ + asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ + asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\ + asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ + } \ + else \ + asm_fprintf (STREAM, "\tpush {%r}\n", REGNO); \ + } while (0) + + +/* See comment for ASM_OUTPUT_REG_PUSH concerning Thumb-1 issue. */ +#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \ + do \ + { \ + if (TARGET_ARM) \ + asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n", \ + STACK_POINTER_REGNUM, REGNO); \ + else if (TARGET_THUMB1 \ + && (REGNO) == STATIC_CHAIN_REGNUM) \ + { \ + asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ + asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\ + asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ + } \ + else \ + asm_fprintf (STREAM, "\tpop {%r}\n", REGNO); \ + } while (0) + +/* Jump table alignment is explicit in ASM_OUTPUT_CASE_LABEL. */ +#define ADDR_VEC_ALIGN(JUMPTABLE) 0 + +/* This is how to output a label which precedes a jumptable. Since + Thumb instructions are 2 bytes, we may need explicit alignment here. */ +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE) \ + do \ + { \ + if (TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) \ + ASM_OUTPUT_ALIGN (FILE, 2); \ + (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); \ + } \ + while (0) + +/* Make sure subsequent insns are aligned after a TBB. */ +#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE) \ + do \ + { \ + if (GET_MODE (PATTERN (JUMPTABLE)) == QImode) \ + ASM_OUTPUT_ALIGN (FILE, 1); \ + } \ + while (0) + +#define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ + do \ + { \ + if (TARGET_THUMB) \ + { \ + if (is_called_in_ARM_mode (DECL) \ + || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY \ + && cfun->is_thunk)) \ + fprintf (STREAM, "\t.code 32\n") ; \ + else if (TARGET_THUMB1) \ + fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ; \ + else \ + fprintf (STREAM, "\t.thumb\n\t.thumb_func\n") ; \ + } \ + if (TARGET_POKE_FUNCTION_NAME) \ + arm_poke_function_name (STREAM, (const char *) NAME); \ + } \ + while (0) + +/* For aliases of functions we use .thumb_set instead. */ +#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL1, DECL2) \ + do \ + { \ + const char *const LABEL1 = XSTR (XEXP (DECL_RTL (decl), 0), 0); \ + const char *const LABEL2 = IDENTIFIER_POINTER (DECL2); \ + \ + if (TARGET_THUMB && TREE_CODE (DECL1) == FUNCTION_DECL) \ + { \ + fprintf (FILE, "\t.thumb_set "); \ + assemble_name (FILE, LABEL1); \ + fprintf (FILE, ","); \ + assemble_name (FILE, LABEL2); \ + fprintf (FILE, "\n"); \ + } \ + else \ + ASM_OUTPUT_DEF (FILE, LABEL1, LABEL2); \ + } \ + while (0) + +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN +/* To support -falign-* switches we need to use .p2align so + that alignment directives in code sections will be padded + with no-op instructions, rather than zeroes. */ +#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP) \ + if ((LOG) != 0) \ + { \ + if ((MAX_SKIP) == 0) \ + fprintf ((FILE), "\t.p2align %d\n", (int) (LOG)); \ + else \ + fprintf ((FILE), "\t.p2align %d,,%d\n", \ + (int) (LOG), (int) (MAX_SKIP)); \ + } +#endif + +/* Add two bytes to the length of conditionally executed Thumb-2 + instructions for the IT instruction. */ +#define ADJUST_INSN_LENGTH(insn, length) \ + if (TARGET_THUMB2 && GET_CODE (PATTERN (insn)) == COND_EXEC) \ + length += 2; + +/* Only perform branch elimination (by making instructions conditional) if + we're optimizing. For Thumb-2 check if any IT instructions need + outputting. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + if (TARGET_ARM && optimize) \ + arm_final_prescan_insn (INSN); \ + else if (TARGET_THUMB2) \ + thumb2_final_prescan_insn (INSN); \ + else if (TARGET_THUMB1) \ + thumb1_final_prescan_insn (INSN) + +#define ARM_SIGN_EXTEND(x) ((HOST_WIDE_INT) \ + (HOST_BITS_PER_WIDE_INT <= 32 ? (unsigned HOST_WIDE_INT) (x) \ + : ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0xffffffff) |\ + ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0x80000000) \ + ? ((~ (unsigned HOST_WIDE_INT) 0) \ + & ~ (unsigned HOST_WIDE_INT) 0xffffffff) \ + : 0)))) + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + arm_return_addr (COUNT, FRAME) + +/* Mask of the bits in the PC that contain the real return address + when running in 26-bit mode. */ +#define RETURN_ADDR_MASK26 (0x03fffffc) + +/* Pick up the return address upon entry to a procedure. Used for + dwarf2 unwind information. This also enables the table driven + mechanism. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LR_REGNUM) + +/* Used to mask out junk bits from the return address, such as + processor state, interrupt status, condition codes and the like. */ +#define MASK_RETURN_ADDR \ + /* If we are generating code for an ARM2/ARM3 machine or for an ARM6 \ + in 26 bit mode, the condition codes must be masked out of the \ + return address. This does not apply to ARM6 and later processors \ + when running in 32 bit mode. */ \ + ((arm_arch4 || TARGET_THUMB) \ + ? (gen_int_mode ((unsigned long)0xffffffff, Pmode)) \ + : arm_gen_return_addr_mask ()) + + +/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have + symbolic names defined here (which would require too much duplication). + FIXME? */ +enum arm_builtins +{ + ARM_BUILTIN_GETWCX, + ARM_BUILTIN_SETWCX, + + ARM_BUILTIN_WZERO, + + ARM_BUILTIN_WAVG2BR, + ARM_BUILTIN_WAVG2HR, + ARM_BUILTIN_WAVG2B, + ARM_BUILTIN_WAVG2H, + + ARM_BUILTIN_WACCB, + ARM_BUILTIN_WACCH, + ARM_BUILTIN_WACCW, + + ARM_BUILTIN_WMACS, + ARM_BUILTIN_WMACSZ, + ARM_BUILTIN_WMACU, + ARM_BUILTIN_WMACUZ, + + ARM_BUILTIN_WSADB, + ARM_BUILTIN_WSADBZ, + ARM_BUILTIN_WSADH, + ARM_BUILTIN_WSADHZ, + + ARM_BUILTIN_WALIGN, + + ARM_BUILTIN_TMIA, + ARM_BUILTIN_TMIAPH, + ARM_BUILTIN_TMIABB, + ARM_BUILTIN_TMIABT, + ARM_BUILTIN_TMIATB, + ARM_BUILTIN_TMIATT, + + ARM_BUILTIN_TMOVMSKB, + ARM_BUILTIN_TMOVMSKH, + ARM_BUILTIN_TMOVMSKW, + + ARM_BUILTIN_TBCSTB, + ARM_BUILTIN_TBCSTH, + ARM_BUILTIN_TBCSTW, + + ARM_BUILTIN_WMADDS, + ARM_BUILTIN_WMADDU, + + ARM_BUILTIN_WPACKHSS, + ARM_BUILTIN_WPACKWSS, + ARM_BUILTIN_WPACKDSS, + ARM_BUILTIN_WPACKHUS, + ARM_BUILTIN_WPACKWUS, + ARM_BUILTIN_WPACKDUS, + + ARM_BUILTIN_WADDB, + ARM_BUILTIN_WADDH, + ARM_BUILTIN_WADDW, + ARM_BUILTIN_WADDSSB, + ARM_BUILTIN_WADDSSH, + ARM_BUILTIN_WADDSSW, + ARM_BUILTIN_WADDUSB, + ARM_BUILTIN_WADDUSH, + ARM_BUILTIN_WADDUSW, + ARM_BUILTIN_WSUBB, + ARM_BUILTIN_WSUBH, + ARM_BUILTIN_WSUBW, + ARM_BUILTIN_WSUBSSB, + ARM_BUILTIN_WSUBSSH, + ARM_BUILTIN_WSUBSSW, + ARM_BUILTIN_WSUBUSB, + ARM_BUILTIN_WSUBUSH, + ARM_BUILTIN_WSUBUSW, + + ARM_BUILTIN_WAND, + ARM_BUILTIN_WANDN, + ARM_BUILTIN_WOR, + ARM_BUILTIN_WXOR, + + ARM_BUILTIN_WCMPEQB, + ARM_BUILTIN_WCMPEQH, + ARM_BUILTIN_WCMPEQW, + ARM_BUILTIN_WCMPGTUB, + ARM_BUILTIN_WCMPGTUH, + ARM_BUILTIN_WCMPGTUW, + ARM_BUILTIN_WCMPGTSB, + ARM_BUILTIN_WCMPGTSH, + ARM_BUILTIN_WCMPGTSW, + + ARM_BUILTIN_TEXTRMSB, + ARM_BUILTIN_TEXTRMSH, + ARM_BUILTIN_TEXTRMSW, + ARM_BUILTIN_TEXTRMUB, + ARM_BUILTIN_TEXTRMUH, + ARM_BUILTIN_TEXTRMUW, + ARM_BUILTIN_TINSRB, + ARM_BUILTIN_TINSRH, + ARM_BUILTIN_TINSRW, + + ARM_BUILTIN_WMAXSW, + ARM_BUILTIN_WMAXSH, + ARM_BUILTIN_WMAXSB, + ARM_BUILTIN_WMAXUW, + ARM_BUILTIN_WMAXUH, + ARM_BUILTIN_WMAXUB, + ARM_BUILTIN_WMINSW, + ARM_BUILTIN_WMINSH, + ARM_BUILTIN_WMINSB, + ARM_BUILTIN_WMINUW, + ARM_BUILTIN_WMINUH, + ARM_BUILTIN_WMINUB, + + ARM_BUILTIN_WMULUM, + ARM_BUILTIN_WMULSM, + ARM_BUILTIN_WMULUL, + + ARM_BUILTIN_PSADBH, + ARM_BUILTIN_WSHUFH, + + ARM_BUILTIN_WSLLH, + ARM_BUILTIN_WSLLW, + ARM_BUILTIN_WSLLD, + ARM_BUILTIN_WSRAH, + ARM_BUILTIN_WSRAW, + ARM_BUILTIN_WSRAD, + ARM_BUILTIN_WSRLH, + ARM_BUILTIN_WSRLW, + ARM_BUILTIN_WSRLD, + ARM_BUILTIN_WRORH, + ARM_BUILTIN_WRORW, + ARM_BUILTIN_WRORD, + ARM_BUILTIN_WSLLHI, + ARM_BUILTIN_WSLLWI, + ARM_BUILTIN_WSLLDI, + ARM_BUILTIN_WSRAHI, + ARM_BUILTIN_WSRAWI, + ARM_BUILTIN_WSRADI, + ARM_BUILTIN_WSRLHI, + ARM_BUILTIN_WSRLWI, + ARM_BUILTIN_WSRLDI, + ARM_BUILTIN_WRORHI, + ARM_BUILTIN_WRORWI, + ARM_BUILTIN_WRORDI, + + ARM_BUILTIN_WUNPCKIHB, + ARM_BUILTIN_WUNPCKIHH, + ARM_BUILTIN_WUNPCKIHW, + ARM_BUILTIN_WUNPCKILB, + ARM_BUILTIN_WUNPCKILH, + ARM_BUILTIN_WUNPCKILW, + + ARM_BUILTIN_WUNPCKEHSB, + ARM_BUILTIN_WUNPCKEHSH, + ARM_BUILTIN_WUNPCKEHSW, + ARM_BUILTIN_WUNPCKEHUB, + ARM_BUILTIN_WUNPCKEHUH, + ARM_BUILTIN_WUNPCKEHUW, + ARM_BUILTIN_WUNPCKELSB, + ARM_BUILTIN_WUNPCKELSH, + ARM_BUILTIN_WUNPCKELSW, + ARM_BUILTIN_WUNPCKELUB, + ARM_BUILTIN_WUNPCKELUH, + ARM_BUILTIN_WUNPCKELUW, + + ARM_BUILTIN_THREAD_POINTER, + + ARM_BUILTIN_NEON_BASE, + + ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE /* FIXME: Wrong! */ +}; + +/* Do not emit .note.GNU-stack by default. */ +#ifndef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 0 +#endif + +/* The maximum number of parallel loads or stores we support in an ldm/stm + instruction. */ +#define MAX_LDM_STM_OPS 4 + +#endif /* ! GCC_ARM_H */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md new file mode 100644 index 000000000..130053b0b --- /dev/null +++ b/gcc/config/arm/arm.md @@ -0,0 +1,10746 @@ +;;- Machine description for ARM for GNU compiler +;; Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) +;; and Martin Simmons (@harleqn.co.uk). +;; More major hacks by Richard Earnshaw (rearnsha@arm.com). + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + + +;;--------------------------------------------------------------------------- +;; Constants + +;; Register numbers +(define_constants + [(R0_REGNUM 0) ; First CORE register + (IP_REGNUM 12) ; Scratch register + (SP_REGNUM 13) ; Stack pointer + (LR_REGNUM 14) ; Return address register + (PC_REGNUM 15) ; Program counter + (CC_REGNUM 24) ; Condition code pseudo register + (LAST_ARM_REGNUM 15) ; + (FPA_F0_REGNUM 16) ; FIRST_FPA_REGNUM + (FPA_F7_REGNUM 23) ; LAST_FPA_REGNUM + ] +) +;; 3rd operand to select_dominance_cc_mode +(define_constants + [(DOM_CC_X_AND_Y 0) + (DOM_CC_NX_OR_Y 1) + (DOM_CC_X_OR_Y 2) + ] +) + +;; UNSPEC Usage: +;; Note: sin and cos are no-longer used. +;; Unspec constants for Neon are defined in neon.md. + +(define_constants + [(UNSPEC_SIN 0) ; `sin' operation (MODE_FLOAT): + ; operand 0 is the result, + ; operand 1 the parameter. + (UNPSEC_COS 1) ; `cos' operation (MODE_FLOAT): + ; operand 0 is the result, + ; operand 1 the parameter. + (UNSPEC_PUSH_MULT 2) ; `push multiple' operation: + ; operand 0 is the first register, + ; subsequent registers are in parallel (use ...) + ; expressions. + (UNSPEC_PIC_SYM 3) ; A symbol that has been treated properly for pic + ; usage, that is, we will add the pic_register + ; value to it before trying to dereference it. + (UNSPEC_PIC_BASE 4) ; Add PC and all but the last operand together, + ; The last operand is the number of a PIC_LABEL + ; that points at the containing instruction. + (UNSPEC_PRLG_STK 5) ; A special barrier that prevents frame accesses + ; being scheduled before the stack adjustment insn. + (UNSPEC_PROLOGUE_USE 6) ; As USE insns are not meaningful after reload, + ; this unspec is used to prevent the deletion of + ; instructions setting registers for EH handling + ; and stack frame generation. Operand 0 is the + ; register to "use". + (UNSPEC_CHECK_ARCH 7); Set CCs to indicate 26-bit or 32-bit mode. + (UNSPEC_WSHUFH 8) ; Used by the intrinsic form of the iWMMXt WSHUFH instruction. + (UNSPEC_WACC 9) ; Used by the intrinsic form of the iWMMXt WACC instruction. + (UNSPEC_TMOVMSK 10) ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction. + (UNSPEC_WSAD 11) ; Used by the intrinsic form of the iWMMXt WSAD instruction. + (UNSPEC_WSADZ 12) ; Used by the intrinsic form of the iWMMXt WSADZ instruction. + (UNSPEC_WMACS 13) ; Used by the intrinsic form of the iWMMXt WMACS instruction. + (UNSPEC_WMACU 14) ; Used by the intrinsic form of the iWMMXt WMACU instruction. + (UNSPEC_WMACSZ 15) ; Used by the intrinsic form of the iWMMXt WMACSZ instruction. + (UNSPEC_WMACUZ 16) ; Used by the intrinsic form of the iWMMXt WMACUZ instruction. + (UNSPEC_CLRDI 17) ; Used by the intrinsic form of the iWMMXt CLRDI instruction. + (UNSPEC_WMADDS 18) ; Used by the intrinsic form of the iWMMXt WMADDS instruction. + (UNSPEC_WMADDU 19) ; Used by the intrinsic form of the iWMMXt WMADDU instruction. + (UNSPEC_TLS 20) ; A symbol that has been treated properly for TLS usage. + (UNSPEC_PIC_LABEL 21) ; A label used for PIC access that does not appear in the + ; instruction stream. + (UNSPEC_STACK_ALIGN 22) ; Doubleword aligned stack pointer. Used to + ; generate correct unwind information. + (UNSPEC_PIC_OFFSET 23) ; A symbolic 12-bit OFFSET that has been treated + ; correctly for PIC usage. + (UNSPEC_GOTSYM_OFF 24) ; The offset of the start of the the GOT from a + ; a given symbolic address. + (UNSPEC_THUMB1_CASESI 25) ; A Thumb1 compressed dispatch-table call. + (UNSPEC_RBIT 26) ; rbit operation. + (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from + ; another symbolic address. + (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier. + (UNSPEC_PIC_UNIFIED 29) ; Create a common pic addressing form. + ] +) + +;; UNSPEC_VOLATILE Usage: + +(define_constants + [(VUNSPEC_BLOCKAGE 0) ; `blockage' insn to prevent scheduling across an + ; insn in the code. + (VUNSPEC_EPILOGUE 1) ; `epilogue' insn, used to represent any part of the + ; instruction epilogue sequence that isn't expanded + ; into normal RTL. Used for both normal and sibcall + ; epilogues. + (VUNSPEC_ALIGN 2) ; `align' insn. Used at the head of a minipool table + ; for inlined constants. + (VUNSPEC_POOL_END 3) ; `end-of-table'. Used to mark the end of a minipool + ; table. + (VUNSPEC_POOL_1 4) ; `pool-entry(1)'. An entry in the constant pool for + ; an 8-bit object. + (VUNSPEC_POOL_2 5) ; `pool-entry(2)'. An entry in the constant pool for + ; a 16-bit object. + (VUNSPEC_POOL_4 6) ; `pool-entry(4)'. An entry in the constant pool for + ; a 32-bit object. + (VUNSPEC_POOL_8 7) ; `pool-entry(8)'. An entry in the constant pool for + ; a 64-bit object. + (VUNSPEC_POOL_16 8) ; `pool-entry(16)'. An entry in the constant pool for + ; a 128-bit object. + (VUNSPEC_TMRC 9) ; Used by the iWMMXt TMRC instruction. + (VUNSPEC_TMCR 10) ; Used by the iWMMXt TMCR instruction. + (VUNSPEC_ALIGN8 11) ; 8-byte alignment version of VUNSPEC_ALIGN + (VUNSPEC_WCMP_EQ 12) ; Used by the iWMMXt WCMPEQ instructions + (VUNSPEC_WCMP_GTU 13) ; Used by the iWMMXt WCMPGTU instructions + (VUNSPEC_WCMP_GT 14) ; Used by the iwMMXT WCMPGT instructions + (VUNSPEC_EH_RETURN 20); Use to override the return address for exception + ; handling. + (VUNSPEC_SYNC_COMPARE_AND_SWAP 21) ; Represent an atomic compare swap. + (VUNSPEC_SYNC_LOCK 22) ; Represent a sync_lock_test_and_set. + (VUNSPEC_SYNC_OP 23) ; Represent a sync_ + (VUNSPEC_SYNC_NEW_OP 24) ; Represent a sync_new_ + (VUNSPEC_SYNC_OLD_OP 25) ; Represent a sync_old_ + ] +) + +;;--------------------------------------------------------------------------- +;; Attributes + +; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when +; generating ARM code. This is used to control the length of some insn +; patterns that share the same RTL in both ARM and Thumb code. +(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code"))) + +; IS_ARCH6 is set to 'yes' when we are generating code form ARMv6. +(define_attr "is_arch6" "no,yes" (const (symbol_ref "arm_arch6"))) + +; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code. +(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code"))) + +;; Operand number of an input operand that is shifted. Zero if the +;; given instruction does not shift one of its input operands. +(define_attr "shift" "" (const_int 0)) + +; Floating Point Unit. If we only have floating point emulation, then there +; is no point in scheduling the floating point insns. (Well, for best +; performance we should try and group them together). +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" + (const (symbol_ref "arm_fpu_attr"))) + +(define_attr "sync_result" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_memory" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_required_value" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_new_value" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_t1" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_t2" "none,0,1,2,3,4,5" (const_string "none")) +(define_attr "sync_release_barrier" "yes,no" (const_string "yes")) +(define_attr "sync_op" "none,add,sub,ior,xor,and,nand" + (const_string "none")) + +; LENGTH of an instruction (in bytes) +(define_attr "length" "" + (cond [(not (eq_attr "sync_memory" "none")) + (symbol_ref "arm_sync_loop_insns (insn, operands) * 4") + ] (const_int 4))) + +; The architecture which supports the instruction (or alternative). +; This can be "a" for ARM, "t" for either of the Thumbs, "32" for +; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6" +; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without +; arm_arch6. This attribute is used to compute attribute "enabled", +; use type "any" to enable an alternative in all cases. +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6" + (const_string "any")) + +(define_attr "arch_enabled" "no,yes" + (cond [(eq_attr "arch" "any") + (const_string "yes") + + (and (eq_attr "arch" "a") + (ne (symbol_ref "TARGET_ARM") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "t") + (ne (symbol_ref "TARGET_THUMB") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "t1") + (ne (symbol_ref "TARGET_THUMB1") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "t2") + (ne (symbol_ref "TARGET_THUMB2") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "32") + (ne (symbol_ref "TARGET_32BIT") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "v6") + (ne (symbol_ref "(TARGET_32BIT && arm_arch6)") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "nov6") + (ne (symbol_ref "(TARGET_32BIT && !arm_arch6)") (const_int 0))) + (const_string "yes")] + (const_string "no"))) + +; Allows an insn to disable certain alternatives for reasons other than +; arch support. +(define_attr "insn_enabled" "no,yes" + (const_string "yes")) + +; Enable all alternatives that are both arch_enabled and insn_enabled. + (define_attr "enabled" "no,yes" + (if_then_else (eq_attr "insn_enabled" "yes") + (if_then_else (eq_attr "arch_enabled" "yes") + (const_string "yes") + (const_string "no")) + (const_string "no"))) + +; POOL_RANGE is how far away from a constant pool entry that this insn +; can be placed. If the distance is zero, then this insn will never +; reference the pool. +; NEG_POOL_RANGE is nonzero for insns that can reference a constant pool entry +; before its address. +(define_attr "arm_pool_range" "" (const_int 0)) +(define_attr "thumb2_pool_range" "" (const_int 0)) +(define_attr "arm_neg_pool_range" "" (const_int 0)) +(define_attr "thumb2_neg_pool_range" "" (const_int 0)) + +(define_attr "pool_range" "" + (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_pool_range")] + (attr "arm_pool_range"))) +(define_attr "neg_pool_range" "" + (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_neg_pool_range")] + (attr "arm_neg_pool_range"))) + +; An assembler sequence may clobber the condition codes without us knowing. +; If such an insn references the pool, then we have no way of knowing how, +; so use the most conservative value for pool_range. +(define_asm_attributes + [(set_attr "conds" "clob") + (set_attr "length" "4") + (set_attr "pool_range" "250")]) + +;; The instruction used to implement a particular pattern. This +;; information is used by pipeline descriptions to provide accurate +;; scheduling information. + +(define_attr "insn" + "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other" + (const_string "other")) + +; TYPE attribute is used to detect floating point instructions which, if +; running on a co-processor can run in parallel with other, basic instructions +; If write-buffer scheduling is enabled then it can also be used in the +; scheduling of writes. + +; Classification of each insn +; Note: vfp.md has different meanings for some of these, and some further +; types as well. See that file for details. +; alu any alu instruction that doesn't hit memory or fp +; regs or have a shifted source operand +; alu_shift any data instruction that doesn't hit memory or fp +; regs, but has a source operand shifted by a constant +; alu_shift_reg any data instruction that doesn't hit memory or fp +; regs, but has a source operand shifted by a register value +; mult a multiply instruction +; block blockage insn, this blocks all functional units +; float a floating point arithmetic operation (subject to expansion) +; fdivd DFmode floating point division +; fdivs SFmode floating point division +; fmul Floating point multiply +; ffmul Fast floating point multiply +; farith Floating point arithmetic (4 cycle) +; ffarith Fast floating point arithmetic (2 cycle) +; float_em a floating point arithmetic operation that is normally emulated +; even on a machine with an fpa. +; f_fpa_load a floating point load from memory. Only for the FPA. +; f_fpa_store a floating point store to memory. Only for the FPA. +; f_load[sd] A single/double load from memory. Used for VFP unit. +; f_store[sd] A single/double store to memory. Used for VFP unit. +; f_flag a transfer of co-processor flags to the CPSR +; f_mem_r a transfer of a floating point register to a real reg via mem +; r_mem_f the reverse of f_mem_r +; f_2_r fast transfer float to arm (no memory needed) +; r_2_f fast transfer arm to float +; f_cvt convert floating<->integral +; branch a branch +; call a subroutine call +; load_byte load byte(s) from memory to arm registers +; load1 load 1 word from memory to arm registers +; load2 load 2 words from memory to arm registers +; load3 load 3 words from memory to arm registers +; load4 load 4 words from memory to arm registers +; store store 1 word to memory from arm registers +; store2 store 2 words +; store3 store 3 words +; store4 store 4 (or more) words +; Additions for Cirrus Maverick co-processor: +; mav_farith Floating point arithmetic (4 cycle) +; mav_dmult Double multiplies (7 cycle) +; + +(define_attr "type" + "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_fpa_load,f_fpa_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys" + (if_then_else + (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") + (const_string "mult") + (const_string "alu"))) + +; Load scheduling, set from the arm_ld_sched variable +; initialized by arm_option_override() +(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) + +;; Classification of NEON instructions for scheduling purposes. +;; Do not set this attribute and the "type" attribute together in +;; any one instruction pattern. +(define_attr "neon_type" + "neon_int_1,\ + neon_int_2,\ + neon_int_3,\ + neon_int_4,\ + neon_int_5,\ + neon_vqneg_vqabs,\ + neon_vmov,\ + neon_vaba,\ + neon_vsma,\ + neon_vaba_qqq,\ + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mul_qqq_8_16_32_ddd_32,\ + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\ + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + neon_mla_qqq_8_16,\ + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\ + neon_mla_qqq_32_qqd_32_scalar,\ + neon_mul_ddd_16_scalar_32_16_long_scalar,\ + neon_mul_qqd_32_scalar,\ + neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\ + neon_shift_1,\ + neon_shift_2,\ + neon_shift_3,\ + neon_vshl_ddd,\ + neon_vqshl_vrshl_vqrshl_qqq,\ + neon_vsra_vrsra,\ + neon_fp_vadd_ddd_vabs_dd,\ + neon_fp_vadd_qqq_vabs_qq,\ + neon_fp_vsum,\ + neon_fp_vmul_ddd,\ + neon_fp_vmul_qqd,\ + neon_fp_vmla_ddd,\ + neon_fp_vmla_qqq,\ + neon_fp_vmla_ddd_scalar,\ + neon_fp_vmla_qqq_scalar,\ + neon_fp_vrecps_vrsqrts_ddd,\ + neon_fp_vrecps_vrsqrts_qqq,\ + neon_bp_simple,\ + neon_bp_2cycle,\ + neon_bp_3cycle,\ + neon_ldr,\ + neon_str,\ + neon_vld1_1_2_regs,\ + neon_vld1_3_4_regs,\ + neon_vld2_2_regs_vld1_vld2_all_lanes,\ + neon_vld2_4_regs,\ + neon_vld3_vld4,\ + neon_vst1_1_2_regs_vst2_2_regs,\ + neon_vst1_3_4_regs,\ + neon_vst2_4_regs_vst3_vst4,\ + neon_vst3_vst4,\ + neon_vld1_vld2_lane,\ + neon_vld3_vld4_lane,\ + neon_vst1_vst2_lane,\ + neon_vst3_vst4_lane,\ + neon_vld3_vld4_all_lanes,\ + neon_mcr,\ + neon_mcr_2_mcrr,\ + neon_mrc,\ + neon_mrrc,\ + neon_ldm_2,\ + neon_stm_2,\ + none" + (const_string "none")) + +; condition codes: this one is used by final_prescan_insn to speed up +; conditionalizing instructions. It saves having to scan the rtl to see if +; it uses or alters the condition codes. +; +; USE means that the condition codes are used by the insn in the process of +; outputting code, this means (at present) that we can't use the insn in +; inlined branches +; +; SET means that the purpose of the insn is to set the condition codes in a +; well defined manner. +; +; CLOB means that the condition codes are altered in an undefined manner, if +; they are altered at all +; +; UNCONDITIONAL means the instruction can not be conditionally executed and +; that the instruction does not use or alter the condition codes. +; +; NOCOND means that the instruction does not use or alter the condition +; codes but can be converted into a conditionally exectuted instruction. + +(define_attr "conds" "use,set,clob,unconditional,nocond" + (if_then_else + (ior (eq_attr "is_thumb1" "yes") + (eq_attr "type" "call")) + (const_string "clob") + (if_then_else (eq_attr "neon_type" "none") + (const_string "nocond") + (const_string "unconditional")))) + +; Predicable means that the insn can be conditionally executed based on +; an automatically added predicate (additional patterns are generated by +; gen...). We default to 'no' because no Thumb patterns match this rule +; and not all ARM patterns do. +(define_attr "predicable" "no,yes" (const_string "no")) + +; Only model the write buffer for ARM6 and ARM7. Earlier processors don't +; have one. Later ones, such as StrongARM, have write-back caches, so don't +; suffer blockages enough to warrant modelling this (and it can adversely +; affect the schedule). +(define_attr "model_wbuf" "no,yes" (const (symbol_ref "arm_tune_wbuf"))) + +; WRITE_CONFLICT implies that a read following an unrelated write is likely +; to stall the processor. Used with model_wbuf above. +(define_attr "write_conflict" "no,yes" + (if_then_else (eq_attr "type" + "block,float_em,f_fpa_load,f_fpa_store,f_mem_r,r_mem_f,call,load1") + (const_string "yes") + (const_string "no"))) + +; Classify the insns into those that take one cycle and those that take more +; than one on the main cpu execution unit. +(define_attr "core_cycles" "single,multi" + (if_then_else (eq_attr "type" + "alu,alu_shift,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith") + (const_string "single") + (const_string "multi"))) + +;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a +;; distant label. Only applicable to Thumb code. +(define_attr "far_jump" "yes,no" (const_string "no")) + + +;; The number of machine instructions this pattern expands to. +;; Used for Thumb-2 conditional execution. +(define_attr "ce_count" "" (const_int 1)) + +;;--------------------------------------------------------------------------- +;; Mode iterators + +(include "iterators.md") + +;;--------------------------------------------------------------------------- +;; Predicates + +(include "predicates.md") +(include "constraints.md") + +;;--------------------------------------------------------------------------- +;; Pipeline descriptions + +;; Processor type. This is created automatically from arm-cores.def. +(include "arm-tune.md") + +(define_attr "tune_cortexr4" "yes,no" + (const (if_then_else + (eq_attr "tune" "cortexr4,cortexr4f") + (const_string "yes") + (const_string "no")))) + +;; True if the generic scheduling description should be used. + +(define_attr "generic_sched" "yes,no" + (const (if_then_else + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") + (eq_attr "tune_cortexr4" "yes")) + (const_string "no") + (const_string "yes")))) + +(define_attr "generic_vfp" "yes,no" + (const (if_then_else + (and (eq_attr "fpu" "vfp") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4") + (eq_attr "tune_cortexr4" "no")) + (const_string "yes") + (const_string "no")))) + +(include "arm-generic.md") +(include "arm926ejs.md") +(include "arm1020e.md") +(include "arm1026ejs.md") +(include "arm1136jfs.md") +(include "fa526.md") +(include "fa606te.md") +(include "fa626te.md") +(include "fmp626.md") +(include "fa726te.md") +(include "cortex-a5.md") +(include "cortex-a8.md") +(include "cortex-a9.md") +(include "cortex-r4.md") +(include "cortex-r4f.md") +(include "cortex-m4.md") +(include "cortex-m4-fpu.md") +(include "vfp11.md") + + +;;--------------------------------------------------------------------------- +;; Insn patterns +;; +;; Addition insns. + +;; Note: For DImode insns, there is normally no reason why operands should +;; not be in the same register, what we don't want is for something being +;; written to partially overlap something that is an input. +;; Cirrus 64bit additions should not be split because we have a native +;; 64bit addition instructions. + +(define_expand "adddi3" + [(parallel + [(set (match_operand:DI 0 "s_register_operand" "") + (plus:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_EITHER" + " + if (TARGET_HARD_FLOAT && TARGET_MAVERICK) + { + if (!cirrus_fp_register (operands[0], DImode)) + operands[0] = force_reg (DImode, operands[0]); + if (!cirrus_fp_register (operands[1], DImode)) + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_cirrus_adddi3 (operands[0], operands[1], operands[2])); + DONE; + } + + if (TARGET_THUMB1) + { + if (GET_CODE (operands[1]) != REG) + operands[1] = force_reg (DImode, operands[1]); + if (GET_CODE (operands[2]) != REG) + operands[2] = force_reg (DImode, operands[2]); + } + " +) + +(define_insn "*thumb1_adddi3" + [(set (match_operand:DI 0 "register_operand" "=l") + (plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "register_operand" "l"))) + (clobber (reg:CC CC_REGNUM)) + ] + "TARGET_THUMB1" + "add\\t%Q0, %Q0, %Q2\;adc\\t%R0, %R0, %R2" + [(set_attr "length" "4")] +) + +(define_insn_and_split "*arm_adddi3" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0") + (match_operand:DI 2 "s_register_operand" "r, 0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON" + "#" + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))" + [(parallel [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (match_dup 5)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn_and_split "*adddi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (plus:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)" + "#" + "TARGET_32BIT && reload_completed" + [(parallel [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (plus:SI (plus:SI (ashiftrt:SI (match_dup 2) + (const_int 31)) + (match_dup 4)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn_and_split "*adddi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (plus:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)" + "#" + "TARGET_32BIT && reload_completed" + [(parallel [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (const_int 0)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (plus:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + " + if (TARGET_32BIT && GET_CODE (operands[2]) == CONST_INT) + { + arm_split_constant (PLUS, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } + " +) + +; If there is a scratch available, this will be faster than synthesizing the +; addition. +(define_peephole2 + [(match_scratch:SI 3 "r") + (set (match_operand:SI 0 "arm_general_register_operand" "") + (plus:SI (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_32BIT && + !(const_ok_for_arm (INTVAL (operands[2])) + || const_ok_for_arm (-INTVAL (operands[2]))) + && const_ok_for_arm (~INTVAL (operands[2]))" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))] + "" +) + +;; The r/r/k alternative is required when reloading the address +;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will +;; put the duplicated register first, and not try the commutative version. +(define_insn_and_split "*arm_addsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r") + (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk") + (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))] + "TARGET_32BIT" + "@ + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %2, %1 + sub%?\\t%0, %1, #%n2 + sub%?\\t%0, %1, #%n2 + #" + "TARGET_32BIT + && GET_CODE (operands[2]) == CONST_INT + && !(const_ok_for_arm (INTVAL (operands[2])) + || const_ok_for_arm (-INTVAL (operands[2]))) + && (reload_completed || !arm_eliminable_register (operands[1]))" + [(clobber (const_int 0))] + " + arm_split_constant (PLUS, SImode, curr_insn, + INTVAL (operands[2]), operands[0], + operands[1], 0); + DONE; + " + [(set_attr "length" "4,4,4,4,4,16") + (set_attr "predicable" "yes")] +) + +(define_insn_and_split "*thumb1_addsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,k,l,l,l") + (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,k,k,0,l,k") + (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,M,O,Pa,Pb,Pc")))] + "TARGET_THUMB1" + "* + static const char * const asms[] = + { + \"add\\t%0, %0, %2\", + \"sub\\t%0, %0, #%n2\", + \"add\\t%0, %1, %2\", + \"add\\t%0, %0, %2\", + \"add\\t%0, %0, %2\", + \"add\\t%0, %1, %2\", + \"add\\t%0, %1, %2\", + \"#\", + \"#\", + \"#\" + }; + if ((which_alternative == 2 || which_alternative == 6) + && GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + return \"sub\\t%0, %1, #%n2\"; + return asms[which_alternative]; + " + "&& reload_completed && CONST_INT_P (operands[2]) + && ((operands[1] != stack_pointer_rtx + && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255)) + || (operands[1] == stack_pointer_rtx + && INTVAL (operands[2]) > 1020))" + [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))] + { + HOST_WIDE_INT offset = INTVAL (operands[2]); + if (operands[1] == stack_pointer_rtx) + offset -= 1020; + else + { + if (offset > 255) + offset = 255; + else if (offset < -255) + offset = -255; + } + operands[3] = GEN_INT (offset); + operands[2] = GEN_INT (INTVAL (operands[2]) - offset); + } + [(set_attr "length" "2,2,2,2,2,2,2,4,4,4")] +) + +;; Reloading and elimination of the frame pointer can +;; sometimes cause this optimization to be missed. +(define_peephole2 + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (set (match_dup 0) + (plus:SI (match_dup 0) (reg:SI SP_REGNUM)))] + "TARGET_THUMB1 + && (unsigned HOST_WIDE_INT) (INTVAL (operands[1])) < 1024 + && (INTVAL (operands[1]) & 3) == 0" + [(set (match_dup 0) (plus:SI (reg:SI SP_REGNUM) (match_dup 1)))] + "" +) + +(define_insn "*addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "r, r") + (match_operand:SI 2 "arm_add_operand" "rI,L")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_ARM" + "@ + add%.\\t%0, %1, %2 + sub%.\\t%0, %1, #%n2" + [(set_attr "conds" "set")] +) + +(define_insn "*addsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 0 "s_register_operand" "r, r") + (match_operand:SI 1 "arm_add_operand" "rI,L")) + (const_int 0)))] + "TARGET_ARM" + "@ + cmn%?\\t%0, %1 + cmp%?\\t%0, #%n1" + [(set_attr "conds" "set")] +) + +(define_insn "*compare_negsi_si" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (neg:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT" + "cmn%?\\t%1, %0" + [(set_attr "conds" "set")] +) + +;; This is the canonicalization of addsi3_compare0_for_combiner when the +;; addend is a constant. +(define_insn "*cmpsi2_addneg" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_addimm_operand" "L,I"))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_dup 1) + (match_operand:SI 3 "arm_addimm_operand" "I,L")))] + "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])" + "@ + add%.\\t%0, %1, %3 + sub%.\\t%0, %1, #%n3" + [(set_attr "conds" "set")] +) + +;; Convert the sequence +;; sub rd, rn, #1 +;; cmn rd, #1 (equivalent to cmp rd, #-1) +;; bne dest +;; into +;; subs rd, rn, #1 +;; bcs dest ((unsigned)rn >= 1) +;; similarly for the beq variant using bcc. +;; This is a common looping idiom (while (n--)) +(define_peephole2 + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (plus:SI (match_operand:SI 1 "arm_general_register_operand" "") + (const_int -1))) + (set (match_operand 2 "cc_register" "") + (compare (match_dup 0) (const_int -1))) + (set (pc) + (if_then_else (match_operator 3 "equality_operator" + [(match_dup 2) (const_int 0)]) + (match_operand 4 "" "") + (match_operand 5 "" "")))] + "TARGET_32BIT && peep2_reg_dead_p (3, operands[2])" + [(parallel[ + (set (match_dup 2) + (compare:CC + (match_dup 1) (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (const_int -1)))]) + (set (pc) + (if_then_else (match_op_dup 3 [(match_dup 2) (const_int 0)]) + (match_dup 4) + (match_dup 5)))] + "operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); + operands[3] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE + ? GEU : LTU), + VOIDmode, + operands[2], const0_rtx);" +) + +;; The next four insns work because they compare the result with one of +;; the operands, and we know that the use of the condition code is +;; either GEU or LTU, so we can use the carry flag from the addition +;; instead of doing the compare a second time. +(define_insn "*addsi3_compare_op1" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_add_operand" "rI,L")) + (match_dup 1))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + add%.\\t%0, %1, %2 + sub%.\\t%0, %1, #%n2" + [(set_attr "conds" "set")] +) + +(define_insn "*addsi3_compare_op2" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_add_operand" "rI,L")) + (match_dup 2))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + add%.\\t%0, %1, %2 + sub%.\\t%0, %1, #%n2" + [(set_attr "conds" "set")] +) + +(define_insn "*compare_addsi2_op0" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 0 "s_register_operand" "r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L")) + (match_dup 0)))] + "TARGET_32BIT" + "@ + cmn%?\\t%0, %1 + cmp%?\\t%0, #%n1" + [(set_attr "conds" "set")] +) + +(define_insn "*compare_addsi2_op1" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 0 "s_register_operand" "r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L")) + (match_dup 1)))] + "TARGET_32BIT" + "@ + cmn%?\\t%0, %1 + cmp%?\\t%0, #%n1" + [(set_attr "conds" "set")] +) + +(define_insn "*addsi3_carryin_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r") + (match_operand:SI 2 "arm_rhs_operand" "rI")) + (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "adc%?\\t%0, %1, %2" + [(set_attr "conds" "use")] +) + +(define_insn "*addsi3_carryin_alt2_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)) + (match_operand:SI 1 "s_register_operand" "%r")) + (match_operand:SI 2 "arm_rhs_operand" "rI")))] + "TARGET_32BIT" + "adc%?\\t%0, %1, %2" + [(set_attr "conds" "use")] +) + +(define_insn "*addsi3_carryin_shift_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (plus:SI + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "reg_or_int_operand" "rM")]) + (match_operand:SI 1 "s_register_operand" "r")) + (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "adc%?\\t%0, %1, %3%S2" + [(set_attr "conds" "use") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_expand "incscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand:CC 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "" +) + +(define_insn "*arm_incscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand:CC 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "s_register_operand" "0,?r")))] + "TARGET_ARM" + "@ + add%d2\\t%0, %1, #1 + mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1" + [(set_attr "conds" "use") + (set_attr "length" "4,8")] +) + +; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (plus:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "s_register_operand" "")) + (const_int -1))) + (clobber (match_operand:SI 3 "s_register_operand" ""))] + "TARGET_32BIT" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 0) (not:SI (ashift:SI (match_dup 3) (match_dup 2))))] + " + operands[1] = GEN_INT (~(INTVAL (operands[1]) - 1)); +") + +(define_expand "addsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (plus:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_add_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + if (TARGET_MAVERICK + && !cirrus_fp_register (operands[2], SFmode)) + operands[2] = force_reg (SFmode, operands[2]); +") + +(define_expand "adddf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (plus:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_add_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK + && !cirrus_fp_register (operands[2], DFmode)) + operands[2] = force_reg (DFmode, operands[2]); +") + +(define_expand "subdi3" + [(parallel + [(set (match_operand:DI 0 "s_register_operand" "") + (minus:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_EITHER" + " + if (TARGET_HARD_FLOAT && TARGET_MAVERICK + && TARGET_32BIT + && cirrus_fp_register (operands[0], DImode) + && cirrus_fp_register (operands[1], DImode)) + { + emit_insn (gen_cirrus_subdi3 (operands[0], operands[1], operands[2])); + DONE; + } + + if (TARGET_THUMB1) + { + if (GET_CODE (operands[1]) != REG) + operands[1] = force_reg (DImode, operands[1]); + if (GET_CODE (operands[2]) != REG) + operands[2] = force_reg (DImode, operands[2]); + } + " +) + +(define_insn "*arm_subdi3" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0") + (match_operand:DI 2 "s_register_operand" "r,0,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !TARGET_NEON" + "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*thumb_subdi3" + [(set (match_operand:DI 0 "register_operand" "=l") + (minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB1" + "sub\\t%Q0, %Q0, %Q2\;sbc\\t%R0, %R0, %R2" + [(set_attr "length" "4")] +) + +(define_insn "*subdi_di_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "0,r") + (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*subdi_di_sesidi" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "0,r") + (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*subdi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*subdi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*subdi_zesidi_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (minus:DI (zero_extend:DI + (match_operand:SI 1 "s_register_operand" "r")) + (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (minus:SI (match_operand:SI 1 "reg_or_int_operand" "") + (match_operand:SI 2 "s_register_operand" "")))] + "TARGET_EITHER" + " + if (GET_CODE (operands[1]) == CONST_INT) + { + if (TARGET_32BIT) + { + arm_split_constant (MINUS, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], + operands[2], optimize && can_create_pseudo_p ()); + DONE; + } + else /* TARGET_THUMB1 */ + operands[1] = force_reg (SImode, operands[1]); + } + " +) + +(define_insn "thumb1_subsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (minus:SI (match_operand:SI 1 "register_operand" "l") + (match_operand:SI 2 "reg_or_int_operand" "lPd")))] + "TARGET_THUMB1" + "sub\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +; ??? Check Thumb-2 split length +(define_insn_and_split "*arm_subsi3_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,rk,r") + (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n") + (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))] + "TARGET_32BIT" + "@ + rsb%?\\t%0, %2, %1 + sub%?\\t%0, %1, %2 + sub%?\\t%0, %1, %2 + #" + "&& (GET_CODE (operands[1]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[1])))" + [(clobber (const_int 0))] + " + arm_split_constant (MINUS, SImode, curr_insn, + INTVAL (operands[1]), operands[0], operands[2], 0); + DONE; + " + [(set_attr "length" "4,4,4,16") + (set_attr "predicable" "yes")] +) + +(define_peephole2 + [(match_scratch:SI 3 "r") + (set (match_operand:SI 0 "arm_general_register_operand" "") + (minus:SI (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "arm_general_register_operand" "")))] + "TARGET_32BIT + && !const_ok_for_arm (INTVAL (operands[1])) + && const_ok_for_arm (~INTVAL (operands[1]))" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 0) (minus:SI (match_dup 3) (match_dup 2)))] + "" +) + +(define_insn "*subsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,I") + (match_operand:SI 2 "arm_rhs_operand" "rI,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + sub%.\\t%0, %1, %2 + rsb%.\\t%0, %2, %1" + [(set_attr "conds" "set")] +) + +(define_insn "*subsi3_compare" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,I") + (match_operand:SI 2 "arm_rhs_operand" "rI,r"))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + sub%.\\t%0, %1, %2 + rsb%.\\t%0, %2, %1" + [(set_attr "conds" "set")] +) + +(define_expand "decscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + (match_operator:SI 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)])))] + "TARGET_32BIT" + "" +) + +(define_insn "*arm_decscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + (match_operator:SI 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)])))] + "TARGET_ARM" + "@ + sub%d2\\t%0, %1, #1 + mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1" + [(set_attr "conds" "use") + (set_attr "length" "*,8")] +) + +(define_expand "subsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (minus:SF (match_operand:SF 1 "arm_float_rhs_operand" "") + (match_operand:SF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + if (TARGET_MAVERICK) + { + if (!cirrus_fp_register (operands[1], SFmode)) + operands[1] = force_reg (SFmode, operands[1]); + if (!cirrus_fp_register (operands[2], SFmode)) + operands[2] = force_reg (SFmode, operands[2]); + } +") + +(define_expand "subdf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK) + { + if (!cirrus_fp_register (operands[1], DFmode)) + operands[1] = force_reg (DFmode, operands[1]); + if (!cirrus_fp_register (operands[2], DFmode)) + operands[2] = force_reg (DFmode, operands[2]); + } +") + + +;; Multiplication insns + +(define_expand "mulsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (mult:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_EITHER" + "" +) + +;; Use `&' and then `0' to prevent the operands 0 and 1 being the same +(define_insn "*arm_mulsi3" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 1 "s_register_operand" "%0,r")))] + "TARGET_32BIT && !arm_arch6" + "mul%?\\t%0, %2, %1" + [(set_attr "insn" "mul") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_mulsi3_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_32BIT && arm_arch6" + "mul%?\\t%0, %1, %2" + [(set_attr "insn" "mul") + (set_attr "predicable" "yes")] +) + +; Unfortunately with the Thumb the '&'/'0' trick can fails when operands +; 1 and 2; are the same, because reload will make operand 0 match +; operand 1 without realizing that this conflicts with operand 2. We fix +; this by adding another alternative to match this case, and then `reload' +; it ourselves. This alternative must come first. +(define_insn "*thumb_mulsi3" + [(set (match_operand:SI 0 "register_operand" "=&l,&l,&l") + (mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0") + (match_operand:SI 2 "register_operand" "l,l,l")))] + "TARGET_THUMB1 && !arm_arch6" + "* + if (which_alternative < 2) + return \"mov\\t%0, %1\;mul\\t%0, %2\"; + else + return \"mul\\t%0, %2\"; + " + [(set_attr "length" "4,4,2") + (set_attr "insn" "mul")] +) + +(define_insn "*thumb_mulsi3_v6" + [(set (match_operand:SI 0 "register_operand" "=l,l,l") + (mult:SI (match_operand:SI 1 "register_operand" "0,l,0") + (match_operand:SI 2 "register_operand" "l,0,0")))] + "TARGET_THUMB1 && arm_arch6" + "@ + mul\\t%0, %2 + mul\\t%0, %1 + mul\\t%0, %1" + [(set_attr "length" "2") + (set_attr "insn" "mul")] +) + +(define_insn "*mulsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 1 "s_register_operand" "%0,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (mult:SI (match_dup 2) (match_dup 1)))] + "TARGET_ARM && !arm_arch6" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "insn" "muls")] +) + +(define_insn "*mulsi3_compare0_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (match_dup 2) (match_dup 1)))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "insn" "muls")] +) + +(define_insn "*mulsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 1 "s_register_operand" "%0,r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=&r,&r"))] + "TARGET_ARM && !arm_arch6" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "insn" "muls")] +) + +(define_insn "*mulsi_compare0_scratch_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "insn" "muls")] +) + +;; Unnamed templates to match MLA instruction. + +(define_insn "*mulsi3addsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") + (plus:SI + (mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) + (match_operand:SI 3 "s_register_operand" "r,r,0,0")))] + "TARGET_32BIT && !arm_arch6" + "mla%?\\t%0, %2, %1, %3" + [(set_attr "insn" "mla") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsi3addsi_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI + (mult:SI (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_32BIT && arm_arch6" + "mla%?\\t%0, %2, %1, %3" + [(set_attr "insn" "mla") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsi3addsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) + (match_operand:SI 3 "s_register_operand" "r,r,0,0")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") + (plus:SI (mult:SI (match_dup 2) (match_dup 1)) + (match_dup 3)))] + "TARGET_ARM && arm_arch6" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "insn" "mlas")] +) + +(define_insn "*mulsi3addsi_compare0_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:SI 3 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (mult:SI (match_dup 2) (match_dup 1)) + (match_dup 3)))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "insn" "mlas")] +) + +(define_insn "*mulsi3addsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) + (match_operand:SI 3 "s_register_operand" "?r,r,0,0")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))] + "TARGET_ARM && !arm_arch6" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "insn" "mlas")] +) + +(define_insn "*mulsi3addsi_compare0_scratch_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:SI 3 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "insn" "mlas")] +) + +(define_insn "*mulsi3subsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI + (match_operand:SI 3 "s_register_operand" "r") + (mult:SI (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch_thumb2" + "mls%?\\t%0, %2, %1, %3" + [(set_attr "insn" "mla") + (set_attr "predicable" "yes")] +) + +(define_expand "maddsidi4" + [(set (match_operand:DI 0 "s_register_operand" "") + (plus:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (match_operand:DI 3 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" + "") + +(define_insn "*mulsidi3adddi" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (plus:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r")) + (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "smlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "insn" "smlal") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsidi3adddi_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r")) + (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch6" + "smlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "insn" "smlal") + (set_attr "predicable" "yes")] +) + +;; 32x32->64 widening multiply. +;; As with mulsi3, the only difference between the v3-5 and v6+ +;; versions of these patterns is the requirement that the output not +;; overlap the inputs, but that still means we have to have a named +;; expander and two different starred insns. + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))))] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*mulsidi3_nov6" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "smull%?\\t%Q0, %R0, %1, %2" + [(set_attr "insn" "smull") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsidi3_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch6" + "smull%?\\t%Q0, %R0, %1, %2" + [(set_attr "insn" "smull") + (set_attr "predicable" "yes")] +) + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))))] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*umulsidi3_nov6" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "umull%?\\t%Q0, %R0, %1, %2" + [(set_attr "insn" "umull") + (set_attr "predicable" "yes")] +) + +(define_insn "*umulsidi3_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch6" + "umull%?\\t%Q0, %R0, %1, %2" + [(set_attr "insn" "umull") + (set_attr "predicable" "yes")] +) + +(define_expand "umaddsidi4" + [(set (match_operand:DI 0 "s_register_operand" "") + (plus:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (match_operand:DI 3 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" + "") + +(define_insn "*umulsidi3adddi" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (plus:DI + (mult:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r")) + (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "umlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "insn" "umlal") + (set_attr "predicable" "yes")] +) + +(define_insn "*umulsidi3adddi_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r")) + (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch6" + "umlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "insn" "umlal") + (set_attr "predicable" "yes")] +) + +(define_expand "smulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 ""))])] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*smulsi3_highpart_nov6" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=&r,&r"))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "smull%?\\t%3, %0, %2, %1" + [(set_attr "insn" "smull") + (set_attr "predicable" "yes")] +) + +(define_insn "*smulsi3_highpart_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=r"))] + "TARGET_32BIT && arm_arch6" + "smull%?\\t%3, %0, %2, %1" + [(set_attr "insn" "smull") + (set_attr "predicable" "yes")] +) + +(define_expand "umulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 ""))])] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*umulsi3_highpart_nov6" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=&r,&r"))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "umull%?\\t%3, %0, %2, %1" + [(set_attr "insn" "umull") + (set_attr "predicable" "yes")] +) + +(define_insn "*umulsi3_highpart_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=r"))] + "TARGET_32BIT && arm_arch6" + "umull%?\\t%3, %0, %2, %1" + [(set_attr "insn" "umull") + (set_attr "predicable" "yes")] +) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "s_register_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))))] + "TARGET_DSP_MULTIPLY" + "smulbb%?\\t%0, %1, %2" + [(set_attr "insn" "smulxy") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulhisi3tb" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))))] + "TARGET_DSP_MULTIPLY" + "smultb%?\\t%0, %1, %2" + [(set_attr "insn" "smulxy") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulhisi3bt" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "s_register_operand" "r")) + (ashiftrt:SI + (match_operand:SI 2 "s_register_operand" "r") + (const_int 16))))] + "TARGET_DSP_MULTIPLY" + "smulbt%?\\t%0, %1, %2" + [(set_attr "insn" "smulxy") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulhisi3tt" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "s_register_operand" "r") + (const_int 16))))] + "TARGET_DSP_MULTIPLY" + "smultt%?\\t%0, %1, %2" + [(set_attr "insn" "smulxy") + (set_attr "predicable" "yes")] +) + +(define_insn "maddhisi4" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (mult:SI (sign_extend:SI + (match_operand:HI 1 "s_register_operand" "r")) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_DSP_MULTIPLY" + "smlabb%?\\t%0, %1, %2, %3" + [(set_attr "insn" "smlaxy") + (set_attr "predicable" "yes")] +) + +(define_insn "*maddhidi4" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI (sign_extend:DI + (match_operand:HI 1 "s_register_operand" "r")) + (sign_extend:DI + (match_operand:HI 2 "s_register_operand" "r"))) + (match_operand:DI 3 "s_register_operand" "0")))] + "TARGET_DSP_MULTIPLY" + "smlalbb%?\\t%Q0, %R0, %1, %2" + [(set_attr "insn" "smlalxy") + (set_attr "predicable" "yes")]) + +(define_expand "mulsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (mult:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + if (TARGET_MAVERICK + && !cirrus_fp_register (operands[2], SFmode)) + operands[2] = force_reg (SFmode, operands[2]); +") + +(define_expand "muldf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (mult:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK + && !cirrus_fp_register (operands[2], DFmode)) + operands[2] = force_reg (DFmode, operands[2]); +") + +;; Division insns + +(define_expand "divsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (div:SF (match_operand:SF 1 "arm_float_rhs_operand" "") + (match_operand:SF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "") + +(define_expand "divdf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (div:DF (match_operand:DF 1 "arm_float_rhs_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + "") + +;; Modulo insns + +(define_expand "modsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (mod:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "") + +(define_expand "moddf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (mod:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_rhs_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "") + +;; Boolean and,ior,xor insns + +;; Split up double word logical operations + +;; Split up simple DImode logical operations. Simply perform the logical +;; operation on the upper and lower halves of the registers. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (match_operator:DI 6 "logical_binary_operator" + [(match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "s_register_operand" "")]))] + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0]))) + && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))" + [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)])) + (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" +) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (match_operator:DI 6 "logical_binary_operator" + [(sign_extend:DI (match_operand:SI 2 "s_register_operand" "")) + (match_operand:DI 1 "s_register_operand" "")]))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)])) + (set (match_dup 3) (match_op_dup:SI 6 + [(ashiftrt:SI (match_dup 2) (const_int 31)) + (match_dup 4)]))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" +) + +;; The zero extend of operand 2 means we can just copy the high part of +;; operand1 into operand0. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (ior:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")) + (match_operand:DI 1 "s_register_operand" "")))] + "TARGET_32BIT && operands[0] != operands[1] && reload_completed" + [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) (match_dup 4))] + " + { + operands[4] = gen_highpart (SImode, operands[1]); + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" +) + +;; The zero extend of operand 2 means we can just copy the high part of +;; operand1 into operand0. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (xor:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")) + (match_operand:DI 1 "s_register_operand" "")))] + "TARGET_32BIT && operands[0] != operands[1] && reload_completed" + [(set (match_dup 0) (xor:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) (match_dup 4))] + " + { + operands[4] = gen_highpart (SImode, operands[1]); + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" +) + +(define_expand "anddi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (and:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "neon_inv_logic_op2" "")))] + "TARGET_32BIT" + "" +) + +(define_insn "*anddi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (match_operand:DI 1 "s_register_operand" "%0,r") + (match_operand:DI 2 "s_register_operand" "r,r")))] + "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" + "#" + [(set_attr "length" "8")] +) + +(define_insn_and_split "*anddi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + ; The zero extend of operand 2 clears the high word of the output + ; operand. + [(set (match_dup 0) (and:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) (const_int 0))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "8")] +) + +(define_insn "*anddi_sesdi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + [(set_attr "length" "8")] +) + +(define_expand "andsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (and:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[2]) == CONST_INT) + { + if (INTVAL (operands[2]) == 255 && arm_arch6) + { + operands[1] = convert_to_mode (QImode, operands[1], 1); + emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0], + operands[1])); + } + else + arm_split_constant (AND, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], + operands[1], + optimize && can_create_pseudo_p ()); + + DONE; + } + } + else /* TARGET_THUMB1 */ + { + if (GET_CODE (operands[2]) != CONST_INT) + { + rtx tmp = force_reg (SImode, operands[2]); + if (rtx_equal_p (operands[0], operands[1])) + operands[2] = tmp; + else + { + operands[2] = operands[1]; + operands[1] = tmp; + } + } + else + { + int i; + + if (((unsigned HOST_WIDE_INT) ~INTVAL (operands[2])) < 256) + { + operands[2] = force_reg (SImode, + GEN_INT (~INTVAL (operands[2]))); + + emit_insn (gen_thumb1_bicsi3 (operands[0], operands[2], operands[1])); + + DONE; + } + + for (i = 9; i <= 31; i++) + { + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (operands[2])) + { + emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i), + const0_rtx)); + DONE; + } + else if ((((HOST_WIDE_INT) 1) << i) - 1 + == ~INTVAL (operands[2])) + { + rtx shift = GEN_INT (i); + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_lshrsi3 (reg, operands[1], shift)); + emit_insn (gen_ashlsi3 (operands[0], reg, shift)); + + DONE; + } + } + + operands[2] = force_reg (SImode, operands[2]); + } + } + " +) + +; ??? Check split length for Thumb-2 +(define_insn_and_split "*arm_andsi3_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))] + "TARGET_32BIT" + "@ + and%?\\t%0, %1, %2 + bic%?\\t%0, %1, #%B2 + #" + "TARGET_32BIT + && GET_CODE (operands[2]) == CONST_INT + && !(const_ok_for_arm (INTVAL (operands[2])) + || const_ok_for_arm (~INTVAL (operands[2])))" + [(clobber (const_int 0))] + " + arm_split_constant (AND, SImode, curr_insn, + INTVAL (operands[2]), operands[0], operands[1], 0); + DONE; + " + [(set_attr "length" "4,4,16") + (set_attr "predicable" "yes")] +) + +(define_insn "*thumb1_andsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (and:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "and\\t%0, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_insn "*andsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_not_operand" "rI,K")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (and:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + and%.\\t%0, %1, %2 + bic%.\\t%0, %1, #%B2" + [(set_attr "conds" "set")] +) + +(define_insn "*andsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (match_operand:SI 0 "s_register_operand" "r,r") + (match_operand:SI 1 "arm_not_operand" "rI,K")) + (const_int 0))) + (clobber (match_scratch:SI 2 "=X,r"))] + "TARGET_32BIT" + "@ + tst%?\\t%0, %1 + bic%.\\t%2, %0, #%B1" + [(set_attr "conds" "set")] +) + +(define_insn "*zeroextractsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (zero_extract:SI + (match_operand:SI 0 "s_register_operand" "r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (const_int 0)))] + "TARGET_32BIT + && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32 + && INTVAL (operands[1]) > 0 + && INTVAL (operands[1]) + (INTVAL (operands[2]) & 1) <= 8 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32)" + "* + operands[1] = GEN_INT (((1 << INTVAL (operands[1])) - 1) + << INTVAL (operands[2])); + output_asm_insn (\"tst%?\\t%0, %1\", operands); + return \"\"; + " + [(set_attr "conds" "set")] +) + +(define_insn_and_split "*ne_zeroextractsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ne:SI (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n")) + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)" + "#" + "TARGET_32BIT + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (const_int 1)))] + " + operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1) + << INTVAL (operands[3])); + " + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 12) + (const_int 8)))] +) + +(define_insn_and_split "*ne_zeroextractsi_shifted" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ne:SI (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (const_int 0)) + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + "TARGET_ARM" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (const_int 1)))] + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + " + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn_and_split "*ite_ne_zeroextractsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (ne (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n")) + (const_int 0)) + (match_operand:SI 4 "arm_not_operand" "rIK") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32) + && !reg_overlap_mentioned_p (operands[0], operands[4])" + "#" + "TARGET_ARM + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32) + && !reg_overlap_mentioned_p (operands[0], operands[4])" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (match_dup 4)))] + " + operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1) + << INTVAL (operands[3])); + " + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn_and_split "*ite_ne_zeroextractsi_shifted" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (ne (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (const_int 0)) + (const_int 0)) + (match_operand:SI 3 "arm_not_operand" "rIK") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])" + "#" + "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (match_dup 3)))] + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + " + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extract:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" ""))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_THUMB1" + [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:SI (match_dup 4) (match_dup 3)))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[2]); + + operands[2] = GEN_INT (32 - temp - INTVAL (operands[3])); + operands[3] = GEN_INT (32 - temp); + }" +) + +;; ??? Use Thumb-2 has bitfield insert/extract instructions. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" + [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operand:SI 5 "s_register_operand" "")])) + (clobber (match_operand:SI 6 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) + (match_op_dup 1 + [(lshiftrt:SI (match_dup 6) (match_dup 4)) + (match_dup 5)]))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[3]); + + operands[3] = GEN_INT (32 - temp - INTVAL (operands[4])); + operands[4] = GEN_INT (32 - temp); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_THUMB1" + [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 3)))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[2]); + + operands[2] = GEN_INT (32 - temp - INTVAL (operands[3])); + operands[3] = GEN_INT (32 - temp); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" + [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operand:SI 5 "s_register_operand" "")])) + (clobber (match_operand:SI 6 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) + (match_op_dup 1 + [(ashiftrt:SI (match_dup 6) (match_dup 4)) + (match_dup 5)]))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[3]); + + operands[3] = GEN_INT (32 - temp - INTVAL (operands[4])); + operands[4] = GEN_INT (32 - temp); + }" +) + +;;; ??? This pattern is bogus. If operand3 has bits outside the range +;;; represented by the bitfield, then this will produce incorrect results. +;;; Somewhere, the value needs to be truncated. On targets like the m68k, +;;; which have a real bit-field insert instruction, the truncation happens +;;; in the bit-field insert instruction itself. Since arm does not have a +;;; bit-field insert instruction, we would have to emit code here to truncate +;;; the value before we insert. This loses some of the advantage of having +;;; this insv pattern, so this pattern needs to be reevalutated. + +(define_expand "insv" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")) + (match_operand:SI 3 "reg_or_int_operand" ""))] + "TARGET_ARM || arm_arch_thumb2" + " + { + int start_bit = INTVAL (operands[2]); + int width = INTVAL (operands[1]); + HOST_WIDE_INT mask = (((HOST_WIDE_INT)1) << width) - 1; + rtx target, subtarget; + + if (arm_arch_thumb2) + { + bool use_bfi = TRUE; + + if (GET_CODE (operands[3]) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (operands[3]) & mask; + + if (val == 0) + { + emit_insn (gen_insv_zero (operands[0], operands[1], + operands[2])); + DONE; + } + + /* See if the set can be done with a single orr instruction. */ + if (val == mask && const_ok_for_arm (val << start_bit)) + use_bfi = FALSE; + } + + if (use_bfi) + { + if (GET_CODE (operands[3]) != REG) + operands[3] = force_reg (SImode, operands[3]); + + emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + } + + target = copy_rtx (operands[0]); + /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical + subreg as the final target. */ + if (GET_CODE (target) == SUBREG) + { + subtarget = gen_reg_rtx (SImode); + if (GET_MODE_SIZE (GET_MODE (SUBREG_REG (target))) + < GET_MODE_SIZE (SImode)) + target = SUBREG_REG (target); + } + else + subtarget = target; + + if (GET_CODE (operands[3]) == CONST_INT) + { + /* Since we are inserting a known constant, we may be able to + reduce the number of bits that we have to clear so that + the mask becomes simple. */ + /* ??? This code does not check to see if the new mask is actually + simpler. It may not be. */ + rtx op1 = gen_reg_rtx (SImode); + /* ??? Truncate operand3 to fit in the bitfield. See comment before + start of this pattern. */ + HOST_WIDE_INT op3_value = mask & INTVAL (operands[3]); + HOST_WIDE_INT mask2 = ((mask & ~op3_value) << start_bit); + + emit_insn (gen_andsi3 (op1, operands[0], + gen_int_mode (~mask2, SImode))); + emit_insn (gen_iorsi3 (subtarget, op1, + gen_int_mode (op3_value << start_bit, SImode))); + } + else if (start_bit == 0 + && !(const_ok_for_arm (mask) + || const_ok_for_arm (~mask))) + { + /* A Trick, since we are setting the bottom bits in the word, + we can shift operand[3] up, operand[0] down, OR them together + and rotate the result back again. This takes 3 insns, and + the third might be mergeable into another op. */ + /* The shift up copes with the possibility that operand[3] is + wider than the bitfield. */ + rtx op0 = gen_reg_rtx (SImode); + rtx op1 = gen_reg_rtx (SImode); + + emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width))); + emit_insn (gen_lshrsi3 (op1, operands[0], operands[1])); + emit_insn (gen_iorsi3 (op1, op1, op0)); + emit_insn (gen_rotlsi3 (subtarget, op1, operands[1])); + } + else if ((width + start_bit == 32) + && !(const_ok_for_arm (mask) + || const_ok_for_arm (~mask))) + { + /* Similar trick, but slightly less efficient. */ + + rtx op0 = gen_reg_rtx (SImode); + rtx op1 = gen_reg_rtx (SImode); + + emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width))); + emit_insn (gen_ashlsi3 (op1, operands[0], operands[1])); + emit_insn (gen_lshrsi3 (op1, op1, operands[1])); + emit_insn (gen_iorsi3 (subtarget, op1, op0)); + } + else + { + rtx op0 = gen_int_mode (mask, SImode); + rtx op1 = gen_reg_rtx (SImode); + rtx op2 = gen_reg_rtx (SImode); + + if (!(const_ok_for_arm (mask) || const_ok_for_arm (~mask))) + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp, op0)); + op0 = tmp; + } + + /* Mask out any bits in operand[3] that are not needed. */ + emit_insn (gen_andsi3 (op1, operands[3], op0)); + + if (GET_CODE (op0) == CONST_INT + && (const_ok_for_arm (mask << start_bit) + || const_ok_for_arm (~(mask << start_bit)))) + { + op0 = gen_int_mode (~(mask << start_bit), SImode); + emit_insn (gen_andsi3 (op2, operands[0], op0)); + } + else + { + if (GET_CODE (op0) == CONST_INT) + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp, op0)); + op0 = tmp; + } + + if (start_bit != 0) + emit_insn (gen_ashlsi3 (op0, op0, operands[2])); + + emit_insn (gen_andsi_notsi_si (op2, operands[0], op0)); + } + + if (start_bit != 0) + emit_insn (gen_ashlsi3 (op1, op1, operands[2])); + + emit_insn (gen_iorsi3 (subtarget, op1, op2)); + } + + if (subtarget != target) + { + /* If TARGET is still a SUBREG, then it must be wider than a word, + so we must be careful only to set the subword we were asked to. */ + if (GET_CODE (target) == SUBREG) + emit_move_insn (target, subtarget); + else + emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); + } + + DONE; + }" +) + +(define_insn "insv_zero" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") + (match_operand:SI 1 "const_int_operand" "M") + (match_operand:SI 2 "const_int_operand" "M")) + (const_int 0))] + "arm_arch_thumb2" + "bfc%?\t%0, %2, %1" + [(set_attr "length" "4") + (set_attr "predicable" "yes")] +) + +(define_insn "insv_t2" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") + (match_operand:SI 1 "const_int_operand" "M") + (match_operand:SI 2 "const_int_operand" "M")) + (match_operand:SI 3 "s_register_operand" "r"))] + "arm_arch_thumb2" + "bfi%?\t%0, %3, %2, %1" + [(set_attr "length" "4") + (set_attr "predicable" "yes")] +) + +; constants for op 2 will never be given to these patterns. +(define_insn_and_split "*anddi_notdi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r")) + (match_operand:DI 2 "s_register_operand" "r,0")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0]))) + && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))" + [(set (match_dup 0) (and:SI (not:SI (match_dup 1)) (match_dup 2))) + (set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_insn_and_split "*anddi_notzesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (not:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r"))) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "@ + bic%?\\t%Q0, %Q1, %2 + #" + ; (not (zero_extend ...)) allows us to just copy the high word from + ; operand1 to operand0. + "TARGET_32BIT + && reload_completed + && operands[0] != operands[1]" + [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (match_dup 4))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes")] +) + +(define_insn_and_split "*anddi_notsesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (not:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r"))) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (and:SI (not:SI + (ashiftrt:SI (match_dup 2) (const_int 31))) + (match_dup 4)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_insn "andsi_notsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT" + "bic%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")] +) + +(define_insn "thumb1_bicsi3" + [(set (match_operand:SI 0 "register_operand" "=l") + (and:SI (not:SI (match_operand:SI 1 "register_operand" "l")) + (match_operand:SI 2 "register_operand" "0")))] + "TARGET_THUMB1" + "bic\\t%0, %1" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_insn "andsi_not_shiftsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rM")])) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_ARM" + "bic%?\\t%0, %1, %2%S4" + [(set_attr "predicable" "yes") + (set_attr "shift" "2") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_insn "*andsi_notsi_si_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_dup 2)) (match_dup 1)))] + "TARGET_32BIT" + "bic%.\\t%0, %1, %2" + [(set_attr "conds" "set")] +) + +(define_insn "*andsi_notsi_si_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "bic%.\\t%0, %1, %2" + [(set_attr "conds" "set")] +) + +(define_expand "iordi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ior:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "neon_logic_op2" "")))] + "TARGET_32BIT" + "" +) + +(define_insn "*iordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (match_operand:DI 1 "s_register_operand" "%0,r") + (match_operand:DI 2 "s_register_operand" "r,r")))] + "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" + "#" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_insn "*iordi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "@ + orr%?\\t%Q0, %Q1, %2 + #" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes")] +) + +(define_insn "*iordi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (ior:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + " + if (GET_CODE (operands[2]) == CONST_INT) + { + if (TARGET_32BIT) + { + arm_split_constant (IOR, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } + else /* TARGET_THUMB1 */ + { + rtx tmp = force_reg (SImode, operands[2]); + if (rtx_equal_p (operands[0], operands[1])) + operands[2] = tmp; + else + { + operands[2] = operands[1]; + operands[1] = tmp; + } + } + } + " +) + +(define_insn_and_split "*iorsi3_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r,r") + (match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))] + "TARGET_32BIT" + "@ + orr%?\\t%0, %1, %2 + orn%?\\t%0, %1, #%B2 + #" + "TARGET_32BIT + && GET_CODE (operands[2]) == CONST_INT + && !(const_ok_for_arm (INTVAL (operands[2])) + || (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))" + [(clobber (const_int 0))] +{ + arm_split_constant (IOR, SImode, curr_insn, + INTVAL (operands[2]), operands[0], operands[1], 0); + DONE; +} + [(set_attr "length" "4,4,16") + (set_attr "arch" "32,t2,32") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb1_iorsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (ior:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "orr\\t%0, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_peephole2 + [(match_scratch:SI 3 "r") + (set (match_operand:SI 0 "arm_general_register_operand" "") + (ior:SI (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_ARM + && !const_ok_for_arm (INTVAL (operands[2])) + && const_ok_for_arm (~INTVAL (operands[2]))" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (ior:SI (match_dup 1) (match_dup 3)))] + "" +) + +(define_insn "*iorsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r") + (match_operand:SI 2 "arm_rhs_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "orr%.\\t%0, %1, %2" + [(set_attr "conds" "set")] +) + +(define_insn "*iorsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r") + (match_operand:SI 2 "arm_rhs_operand" "rI")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "orr%.\\t%0, %1, %2" + [(set_attr "conds" "set")] +) + +(define_expand "xordi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (xor:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "s_register_operand" "")))] + "TARGET_32BIT" + "" +) + +(define_insn "*xordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (xor:DI (match_operand:DI 1 "s_register_operand" "%0,r") + (match_operand:DI 2 "s_register_operand" "r,r")))] + "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" + "#" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_insn "*xordi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (xor:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "@ + eor%?\\t%Q0, %Q1, %2 + #" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes")] +) + +(define_insn "*xordi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (xor:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (xor:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + "if (GET_CODE (operands[2]) == CONST_INT) + { + if (TARGET_32BIT) + { + arm_split_constant (XOR, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } + else /* TARGET_THUMB1 */ + { + rtx tmp = force_reg (SImode, operands[2]); + if (rtx_equal_p (operands[0], operands[1])) + operands[2] = tmp; + else + { + operands[2] = operands[1]; + operands[1] = tmp; + } + } + }" +) + +(define_insn "*arm_xorsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (xor:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")))] + "TARGET_32BIT" + "eor%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")] +) + +(define_insn "*thumb1_xorsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (xor:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "eor\\t%0, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_insn "*xorsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (xor:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (xor:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "eor%.\\t%0, %1, %2" + [(set_attr "conds" "set")] +) + +(define_insn "*xorsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (xor:SI (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (const_int 0)))] + "TARGET_32BIT" + "teq%?\\t%0, %1" + [(set_attr "conds" "set")] +) + +; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), +; (NOT D) we can sometimes merge the final NOT into one of the following +; insns. + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ior:SI (and:SI (not:SI (match_operand:SI 1 "s_register_operand" "")) + (not:SI (match_operand:SI 2 "arm_rhs_operand" ""))) + (match_operand:SI 3 "arm_rhs_operand" ""))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_32BIT" + [(set (match_dup 4) (and:SI (ior:SI (match_dup 1) (match_dup 2)) + (not:SI (match_dup 3)))) + (set (match_dup 0) (not:SI (match_dup 4)))] + "" +) + +(define_insn "*andsi_iorsi3_notsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r") + (and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")) + (not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))] + "TARGET_32BIT" + "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3" + [(set_attr "length" "8") + (set_attr "ce_count" "2") + (set_attr "predicable" "yes")] +) + +; ??? Are these four splitters still beneficial when the Thumb-2 bitfield +; insns are available? +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operator:SI 9 "logical_binary_operator" + [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")])])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(lshiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(match_operator:SI 9 "logical_binary_operator" + [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")]) + (zero_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" ""))])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(lshiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operator:SI 9 "logical_binary_operator" + [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")])])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(ashiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(match_operator:SI 9 "logical_binary_operator" + [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")]) + (sign_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" ""))])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(ashiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + + +;; Minimum and maximum insns + +(define_expand "smaxsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (smax:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + " + if (operands[2] == const0_rtx || operands[2] == constm1_rtx) + { + /* No need for a clobber of the condition code register here. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_SMAX (SImode, operands[1], + operands[2]))); + DONE; + } +") + +(define_insn "*smax_0" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (smax:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 0)))] + "TARGET_32BIT" + "bic%?\\t%0, %1, %1, asr #31" + [(set_attr "predicable" "yes")] +) + +(define_insn "*smax_m1" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (smax:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int -1)))] + "TARGET_32BIT" + "orr%?\\t%0, %1, %1, asr #31" + [(set_attr "predicable" "yes")] +) + +(define_insn "*arm_smax_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "@ + cmp\\t%1, %2\;movlt\\t%0, %2 + cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_expand "sminsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (smin:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + " + if (operands[2] == const0_rtx) + { + /* No need for a clobber of the condition code register here. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_SMIN (SImode, operands[1], + operands[2]))); + DONE; + } +") + +(define_insn "*smin_0" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (smin:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 0)))] + "TARGET_32BIT" + "and%?\\t%0, %1, %1, asr #31" + [(set_attr "predicable" "yes")] +) + +(define_insn "*arm_smin_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "@ + cmp\\t%1, %2\;movge\\t%0, %2 + cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_expand "umaxsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (umax:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + "" +) + +(define_insn "*arm_umaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "@ + cmp\\t%1, %2\;movcc\\t%0, %2 + cmp\\t%1, %2\;movcs\\t%0, %1 + cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "8,8,12")] +) + +(define_expand "uminsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (umin:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + "" +) + +(define_insn "*arm_uminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "@ + cmp\\t%1, %2\;movcs\\t%0, %2 + cmp\\t%1, %2\;movcc\\t%0, %1 + cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "8,8,12")] +) + +(define_insn "*store_minmaxsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (match_operator:SI 3 "minmax_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "* + operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, + operands[1], operands[2]); + output_asm_insn (\"cmp\\t%1, %2\", operands); + if (TARGET_THUMB2) + output_asm_insn (\"ite\t%d3\", operands); + output_asm_insn (\"str%d3\\t%1, %0\", operands); + output_asm_insn (\"str%D3\\t%2, %0\", operands); + return \"\"; + " + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 14) + (const_int 12))) + (set_attr "type" "store1")] +) + +; Reject the frame pointer in operand[1], since reloading this after +; it has been eliminated can cause carnage. +(define_insn "*minmax_arithsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 4 "shiftable_operator" + [(match_operator:SI 5 "minmax_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !arm_eliminable_register (operands[1])" + "* + { + enum rtx_code code = GET_CODE (operands[4]); + bool need_else; + + if (which_alternative != 0 || operands[3] != const0_rtx + || (code != PLUS && code != IOR && code != XOR)) + need_else = true; + else + need_else = false; + + operands[5] = gen_rtx_fmt_ee (minmax_code (operands[5]), SImode, + operands[2], operands[3]); + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (TARGET_THUMB2) + { + if (need_else) + output_asm_insn (\"ite\\t%d5\", operands); + else + output_asm_insn (\"it\\t%d5\", operands); + } + output_asm_insn (\"%i4%d5\\t%0, %1, %2\", operands); + if (need_else) + output_asm_insn (\"%i4%D5\\t%0, %1, %3\", operands); + return \"\"; + }" + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 14) + (const_int 12)))] +) + + +;; Shift and rotation insns + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ashift:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (GET_CODE (operands[2]) == CONST_INT) + { + if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) + { + emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); + DONE; + } + /* Ideally we shouldn't fail here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + FAIL; + } + else if (!TARGET_REALLY_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)) + FAIL; + " +) + +(define_insn "arm_ashldi3_1bit" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (ashift:DI (match_operand:DI 1 "s_register_operand" "0,r") + (const_int 1))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (ashift:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (GET_CODE (operands[2]) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + DONE; + } + " +) + +(define_insn "*thumb1_ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashift:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1" + "lsl\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (GET_CODE (operands[2]) == CONST_INT) + { + if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) + { + emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); + DONE; + } + /* Ideally we shouldn't fail here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + FAIL; + } + else if (!TARGET_REALLY_IWMMXT) + FAIL; + " +) + +(define_insn "arm_ashrdi3_1bit" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r") + (const_int 1))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx" + [(set_attr "conds" "clob") + (set_attr "insn" "mov") + (set_attr "length" "8")] +) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (ashiftrt:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (GET_CODE (operands[2]) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + operands[2] = GEN_INT (31); + " +) + +(define_insn "*thumb1_ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1" + "asr\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (GET_CODE (operands[2]) == CONST_INT) + { + if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1) + { + emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1])); + DONE; + } + /* Ideally we shouldn't fail here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + FAIL; + } + else if (!TARGET_REALLY_IWMMXT) + FAIL; + " +) + +(define_insn "arm_lshrdi3_1bit" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r") + (const_int 1))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx" + [(set_attr "conds" "clob") + (set_attr "insn" "mov") + (set_attr "length" "8")] +) + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (lshiftrt:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (GET_CODE (operands[2]) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + DONE; + } + " +) + +(define_insn "*thumb1_lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1" + "lsr\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "conds" "set")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (rotatert:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (GET_CODE (operands[2]) == CONST_INT) + operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32); + else + { + rtx reg = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2])); + operands[2] = reg; + } + " +) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (rotatert:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[2]) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + operands[2] = GEN_INT (INTVAL (operands[2]) % 32); + } + else /* TARGET_THUMB1 */ + { + if (GET_CODE (operands [2]) == CONST_INT) + operands [2] = force_reg (SImode, operands[2]); + } + " +) + +(define_insn "*thumb1_rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=l") + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "ror\\t%0, %0, %2" + [(set_attr "length" "2")] +) + +(define_insn "*arm_shiftsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "reg_or_int_operand" "rM")]))] + "TARGET_32BIT" + "* return arm_output_shift(operands, 0);" + [(set_attr "predicable" "yes") + (set_attr "shift" "1") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_insn "*shiftsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rM")]) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (match_op_dup 3 [(match_dup 1) (match_dup 2)]))] + "TARGET_32BIT" + "* return arm_output_shift(operands, 1);" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_insn "*shiftsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rM")]) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "* return arm_output_shift(operands, 1);" + [(set_attr "conds" "set") + (set_attr "shift" "1")] +) + +(define_insn "*not_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (not:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])))] + "TARGET_32BIT" + "mvn%?\\t%0, %1%S3" + [(set_attr "predicable" "yes") + (set_attr "shift" "1") + (set_attr "insn" "mvn") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*not_shiftsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (not:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))] + "TARGET_32BIT" + "mvn%.\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "insn" "mvn") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*not_shiftsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (not:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "mvn%.\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "insn" "mvn") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +;; We don't really have extzv, but defining this using shifts helps +;; to reduce register pressure later on. + +(define_expand "extzv" + [(set (match_dup 4) + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_dup 4) + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_THUMB1 || arm_arch_thumb2" + " + { + HOST_WIDE_INT lshift = 32 - INTVAL (operands[2]) - INTVAL (operands[3]); + HOST_WIDE_INT rshift = 32 - INTVAL (operands[2]); + + if (arm_arch_thumb2) + { + emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + + operands[3] = GEN_INT (rshift); + + if (lshift == 0) + { + emit_insn (gen_lshrsi3 (operands[0], operands[1], operands[3])); + DONE; + } + + operands[2] = GEN_INT (lshift); + operands[4] = gen_reg_rtx (SImode); + }" +) + +(define_insn "extv" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M") + (match_operand:SI 3 "const_int_operand" "M")))] + "arm_arch_thumb2" + "sbfx%?\t%0, %1, %3, %2" + [(set_attr "length" "4") + (set_attr "predicable" "yes")] +) + +(define_insn "extzv_t2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M") + (match_operand:SI 3 "const_int_operand" "M")))] + "arm_arch_thumb2" + "ubfx%?\t%0, %1, %3, %2" + [(set_attr "length" "4") + (set_attr "predicable" "yes")] +) + + +;; Unary arithmetic insns + +(define_expand "negdi2" + [(parallel + [(set (match_operand:DI 0 "s_register_operand" "") + (neg:DI (match_operand:DI 1 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_EITHER" + "" +) + +;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1). +;; The first alternative allows the common case of a *full* overlap. +(define_insn "*arm_negdi2" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (neg:DI (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*thumb1_negdi2" + [(set (match_operand:DI 0 "register_operand" "=&l") + (neg:DI (match_operand:DI 1 "register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB1" + "mov\\t%R0, #0\;neg\\t%Q0, %Q1\;sbc\\t%R0, %R1" + [(set_attr "length" "6")] +) + +(define_expand "negsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (neg:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_EITHER" + "" +) + +(define_insn "*arm_negsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT" + "rsb%?\\t%0, %1, #0" + [(set_attr "predicable" "yes")] +) + +(define_insn "*thumb1_negsi2" + [(set (match_operand:SI 0 "register_operand" "=l") + (neg:SI (match_operand:SI 1 "register_operand" "l")))] + "TARGET_THUMB1" + "neg\\t%0, %1" + [(set_attr "length" "2")] +) + +(define_expand "negsf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (neg:SF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "" +) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (neg:DF (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + "") + +;; abssi2 doesn't really clobber the condition codes if a different register +;; is being set. To keep things simple, assume during rtl manipulations that +;; it does, but tell the final scan operator the truth. Similarly for +;; (neg (abs...)) + +(define_expand "abssi2" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (abs:SI (match_operand:SI 1 "s_register_operand" ""))) + (clobber (match_dup 2))])] + "TARGET_EITHER" + " + if (TARGET_THUMB1) + operands[2] = gen_rtx_SCRATCH (SImode); + else + operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); +") + +(define_insn "*arm_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,&r") + (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "@ + cmp\\t%0, #0\;rsblt\\t%0, %0, #0 + eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" + [(set_attr "conds" "clob,*") + (set_attr "shift" "1") + ;; predicable can't be set based on the variant, so left as no + (set_attr "length" "8")] +) + +(define_insn_and_split "*thumb1_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (abs:SI (match_operand:SI 1 "s_register_operand" "l"))) + (clobber (match_scratch:SI 2 "=&l"))] + "TARGET_THUMB1" + "#" + "TARGET_THUMB1 && reload_completed" + [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))] + "" + [(set_attr "length" "6")] +) + +(define_insn "*arm_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,&r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "@ + cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 + eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" + [(set_attr "conds" "clob,*") + (set_attr "shift" "1") + ;; predicable can't be set based on the variant, so left as no + (set_attr "length" "8")] +) + +(define_insn_and_split "*thumb1_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "l")))) + (clobber (match_scratch:SI 2 "=&l"))] + "TARGET_THUMB1" + "#" + "TARGET_THUMB1 && reload_completed" + [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31))) + (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1))) + (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))] + "" + [(set_attr "length" "6")] +) + +(define_expand "abssf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (abs:SF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + "") + +(define_expand "absdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (abs:DF (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "") + +(define_expand "sqrtsf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (sqrt:SF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)" + "") + +(define_expand "sqrtdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + "") + +(define_insn_and_split "one_cmpldi2" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (not:DI (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (not:SI (match_dup 1))) + (set (match_dup 2) (not:SI (match_dup 3)))] + " + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes")] +) + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (not:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_EITHER" + "" +) + +(define_insn "*arm_one_cmplsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT" + "mvn%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "insn" "mvn")] +) + +(define_insn "*thumb1_one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=l") + (not:SI (match_operand:SI 1 "register_operand" "l")))] + "TARGET_THUMB1" + "mvn\\t%0, %1" + [(set_attr "length" "2") + (set_attr "insn" "mvn")] +) + +(define_insn "*notsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_dup 1)))] + "TARGET_32BIT" + "mvn%.\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "insn" "mvn")] +) + +(define_insn "*notsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "mvn%.\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "insn" "mvn")] +) + +;; Fixed <--> Floating conversion insns + +(define_expand "floatsihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:SI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatdihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:DI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (float:SF (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + if (TARGET_MAVERICK) + { + emit_insn (gen_cirrus_floatsisf2 (operands[0], operands[1])); + DONE; + } +") + +(define_expand "floatsidf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (float:DF (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK) + { + emit_insn (gen_cirrus_floatsidf2 (operands[0], operands[1])); + DONE; + } +") + +(define_expand "fix_trunchfsi2" + [(set (match_operand:SI 0 "general_operand" "") + (fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_trunchfdi2" + [(set (match_operand:DI 0 "general_operand" "") + (fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + if (TARGET_MAVERICK) + { + if (!cirrus_fp_register (operands[0], SImode)) + operands[0] = force_reg (SImode, operands[0]); + if (!cirrus_fp_register (operands[1], SFmode)) + operands[1] = force_reg (SFmode, operands[0]); + emit_insn (gen_cirrus_truncsfsi2 (operands[0], operands[1])); + DONE; + } +") + +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" ""))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " + if (TARGET_MAVERICK) + { + if (!cirrus_fp_register (operands[1], DFmode)) + operands[1] = force_reg (DFmode, operands[0]); + emit_insn (gen_cirrus_truncdfsi2 (operands[0], operands[1])); + DONE; + } +") + +;; Truncation insns + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "" +) + +/* DFmode -> HFmode conversions have to go through SFmode. */ +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "general_operand" "") + (float_truncate:HF + (match_operand:DF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +;; Zero and sign extension instructions. + +(define_insn "zero_extenddi2" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (zero_extend:DI (match_operand:QHSI 1 "" + "")))] + "TARGET_32BIT " + "#" + [(set_attr "length" "8") + (set_attr "ce_count" "2") + (set_attr "predicable" "yes")] +) + +(define_insn "extenddi2" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (sign_extend:DI (match_operand:QHSI 1 "" + "")))] + "TARGET_32BIT " + "#" + [(set_attr "length" "8") + (set_attr "ce_count" "2") + (set_attr "shift" "1") + (set_attr "predicable" "yes")] +) + +;; Splits for all extensions to DImode +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))] + "TARGET_32BIT" + [(set (match_dup 0) (match_dup 1))] +{ + rtx lo_part = gen_lowpart (SImode, operands[0]); + enum machine_mode src_mode = GET_MODE (operands[1]); + + if (REG_P (operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1])) + emit_clobber (operands[0]); + if (!REG_P (lo_part) || src_mode != SImode + || !rtx_equal_p (lo_part, operands[1])) + { + if (src_mode == SImode) + emit_move_insn (lo_part, operands[1]); + else + emit_insn (gen_rtx_SET (VOIDmode, lo_part, + gen_rtx_ZERO_EXTEND (SImode, operands[1]))); + operands[1] = lo_part; + } + operands[0] = gen_highpart (SImode, operands[0]); + operands[1] = const0_rtx; +}) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))] + "TARGET_32BIT" + [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))] +{ + rtx lo_part = gen_lowpart (SImode, operands[0]); + enum machine_mode src_mode = GET_MODE (operands[1]); + + if (REG_P (operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1])) + emit_clobber (operands[0]); + + if (!REG_P (lo_part) || src_mode != SImode + || !rtx_equal_p (lo_part, operands[1])) + { + if (src_mode == SImode) + emit_move_insn (lo_part, operands[1]); + else + emit_insn (gen_rtx_SET (VOIDmode, lo_part, + gen_rtx_SIGN_EXTEND (SImode, operands[1]))); + operands[1] = lo_part; + } + operands[0] = gen_highpart (SImode, operands[0]); +}) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_EITHER" +{ + if (TARGET_ARM && !arm_arch4 && MEM_P (operands[1])) + { + emit_insn (gen_movhi_bytes (operands[0], operands[1])); + DONE; + } + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16))); + emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (16))); + DONE; + } +}) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:HI 1 "s_register_operand" "")))] + "!TARGET_THUMB2 && !arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))] +{ + operands[2] = gen_lowpart (SImode, operands[1]); +}) + +(define_insn "*thumb1_zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))] + "TARGET_THUMB1" +{ + rtx mem; + + if (which_alternative == 0 && arm_arch6) + return "uxth\t%0, %1"; + if (which_alternative == 0) + return "#"; + + mem = XEXP (operands[1], 0); + + if (GET_CODE (mem) == CONST) + mem = XEXP (mem, 0); + + if (GET_CODE (mem) == PLUS) + { + rtx a = XEXP (mem, 0); + + /* This can happen due to bugs in reload. */ + if (GET_CODE (a) == REG && REGNO (a) == SP_REGNUM) + { + rtx ops[2]; + ops[0] = operands[0]; + ops[1] = a; + + output_asm_insn ("mov\t%0, %1", ops); + + XEXP (mem, 0) = operands[0]; + } + } + + return "ldrh\t%0, %1"; +} + [(set_attr_alternative "length" + [(if_then_else (eq_attr "is_arch6" "yes") + (const_int 2) (const_int 4)) + (const_int 4)]) + (set_attr "type" "alu_shift,load_byte")] +) + +(define_insn "*arm_zero_extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch4 && !arm_arch6" + "@ + # + ldr%(h%)\\t%0, %1" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendhisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch6" + "@ + uxth%?\\t%0, %1 + ldr%(h%)\\t%0, %1" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendhisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:HI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "uxtah%?\\t%0, %2, %1" + [(set_attr "type" "alu_shift") + (set_attr "predicable" "yes")] +) + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))] + "TARGET_EITHER" +{ + if (TARGET_ARM && !arm_arch6 && GET_CODE (operands[1]) != MEM) + { + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, operands[1]), + GEN_INT (255))); + DONE; + } + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24))); + emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (24))); + DONE; + } +}) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:QI 1 "s_register_operand" "")))] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0); + if (TARGET_ARM) + { + emit_insn (gen_andsi3 (operands[0], operands[2], GEN_INT (255))); + DONE; + } +}) + +(define_insn "*thumb1_zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))] + "TARGET_THUMB1 && !arm_arch6" + "@ + # + ldrb\\t%0, %1" + [(set_attr "length" "4,2") + (set_attr "type" "alu_shift,load_byte") + (set_attr "pool_range" "*,32")] +) + +(define_insn "*thumb1_zero_extendqisi2_v6" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))] + "TARGET_THUMB1 && arm_arch6" + "@ + uxtb\\t%0, %1 + ldrb\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "alu_shift,load_byte")] +) + +(define_insn "*arm_zero_extendqisi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && !arm_arch6" + "@ + # + ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" + [(set_attr "length" "8,4") + (set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendqisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch6" + "@ + uxtb%(%)\\t%0, %1 + ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendqisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:QI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "uxtab%?\\t%0, %2, %1" + [(set_attr "predicable" "yes") + (set_attr "insn" "xtab") + (set_attr "type" "alu_shift")] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 0))) + (clobber (match_operand:SI 2 "s_register_operand" ""))] + "TARGET_32BIT && (GET_CODE (operands[1]) != MEM) && ! BYTES_BIG_ENDIAN" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))] + "" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 3))) + (clobber (match_operand:SI 2 "s_register_operand" ""))] + "TARGET_32BIT && (GET_CODE (operands[1]) != MEM) && BYTES_BIG_ENDIAN" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))] + "" +) + + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ior_xor:SI (and:SI (ashift:SI + (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")) + (zero_extend:SI + (match_operator 5 "subreg_lowpart_operator" + [(match_operand:SI 4 "s_register_operand" "")]))))] + "TARGET_32BIT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) + == (GET_MODE_MASK (GET_MODE (operands[5])) + & (GET_MODE_MASK (GET_MODE (operands[5])) + << (INTVAL (operands[2])))))" + [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 4))) + (set (match_dup 0) (zero_extend:SI (match_dup 5)))] + "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);" +) + +(define_insn "*compareqi_eq0" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:QI 0 "s_register_operand" "r") + (const_int 0)))] + "TARGET_32BIT" + "tst\\t%0, #255" + [(set_attr "conds" "set")] +) + +(define_expand "extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_EITHER" +{ + if (TARGET_THUMB1) + { + emit_insn (gen_thumb1_extendhisi2 (operands[0], operands[1])); + DONE; + } + if (MEM_P (operands[1]) && TARGET_ARM && !arm_arch4) + { + emit_insn (gen_extendhisi2_mem (operands[0], operands[1])); + DONE; + } + + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16))); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (16))); + DONE; + } +}) + +(define_split + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "register_operand" ""))) + (clobber (match_scratch:SI 2 ""))])] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0); +}) + +;; We used to have an early-clobber on the scratch register here. +;; However, there's a bug somewhere in reload which means that this +;; can be partially ignored during spill allocation if the memory +;; address also needs reloading; this causes us to die later on when +;; we try to verify the operands. Fortunately, we don't really need +;; the early-clobber: we can always use operand 0 if operand 2 +;; overlaps the address. +(define_insn "thumb1_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m"))) + (clobber (match_scratch:SI 2 "=X,l"))] + "TARGET_THUMB1" + "* + { + rtx ops[4]; + rtx mem; + + if (which_alternative == 0 && !arm_arch6) + return \"#\"; + if (which_alternative == 0) + return \"sxth\\t%0, %1\"; + + mem = XEXP (operands[1], 0); + + /* This code used to try to use 'V', and fix the address only if it was + offsettable, but this fails for e.g. REG+48 because 48 is outside the + range of QImode offsets, and offsettable_address_p does a QImode + address check. */ + + if (GET_CODE (mem) == CONST) + mem = XEXP (mem, 0); + + if (GET_CODE (mem) == LABEL_REF) + return \"ldr\\t%0, %1\"; + + if (GET_CODE (mem) == PLUS) + { + rtx a = XEXP (mem, 0); + rtx b = XEXP (mem, 1); + + if (GET_CODE (a) == LABEL_REF + && GET_CODE (b) == CONST_INT) + return \"ldr\\t%0, %1\"; + + if (GET_CODE (b) == REG) + return \"ldrsh\\t%0, %1\"; + + ops[1] = a; + ops[2] = b; + } + else + { + ops[1] = mem; + ops[2] = const0_rtx; + } + + gcc_assert (GET_CODE (ops[1]) == REG); + + ops[0] = operands[0]; + if (reg_mentioned_p (operands[2], ops[1])) + ops[3] = ops[0]; + else + ops[3] = operands[2]; + output_asm_insn (\"mov\\t%3, %2\;ldrsh\\t%0, [%1, %3]\", ops); + return \"\"; + }" + [(set_attr_alternative "length" + [(if_then_else (eq_attr "is_arch6" "yes") + (const_int 2) (const_int 4)) + (const_int 4)]) + (set_attr "type" "alu_shift,load_byte") + (set_attr "pool_range" "*,1020")] +) + +;; This pattern will only be used when ldsh is not available +(define_expand "extendhisi2_mem" + [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" ""))) + (set (match_dup 3) + (zero_extend:SI (match_dup 7))) + (set (match_dup 6) (ashift:SI (match_dup 4) (const_int 24))) + (set (match_operand:SI 0 "" "") + (ior:SI (ashiftrt:SI (match_dup 6) (const_int 16)) (match_dup 5)))] + "TARGET_ARM" + " + { + rtx mem1, mem2; + rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); + + mem1 = change_address (operands[1], QImode, addr); + mem2 = change_address (operands[1], QImode, plus_constant (addr, 1)); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = mem1; + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = mem2; + + if (BYTES_BIG_ENDIAN) + { + operands[4] = operands[2]; + operands[5] = operands[3]; + } + else + { + operands[4] = operands[3]; + operands[5] = operands[2]; + } + }" +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "register_operand" "")))] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0); +}) + +(define_insn "*arm_extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch4 && !arm_arch6" + "@ + # + ldr%(sh%)\\t%0, %1" + [(set_attr "length" "8,4") + (set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +;; ??? Check Thumb-2 pool range +(define_insn "*arm_extendhisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_32BIT && arm_arch6" + "@ + sxth%?\\t%0, %1 + ldr%(sh%)\\t%0, %1" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +(define_insn "*arm_extendhisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (sign_extend:SI (match_operand:HI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "sxtah%?\\t%0, %2, %1" +) + +(define_expand "extendqihi2" + [(set (match_dup 2) + (ashift:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "") + (const_int 24))) + (set (match_operand:HI 0 "s_register_operand" "") + (ashiftrt:SI (match_dup 2) + (const_int 24)))] + "TARGET_ARM" + " + { + if (arm_arch4 && GET_CODE (operands[1]) == MEM) + { + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_SIGN_EXTEND (HImode, operands[1]))); + DONE; + } + if (!s_register_operand (operands[1], QImode)) + operands[1] = copy_to_mode_reg (QImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_reg_rtx (SImode); + }" +) + +(define_insn "*arm_extendqihi_insn" + [(set (match_operand:HI 0 "s_register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "arm_extendqisi_mem_op" "Uq")))] + "TARGET_ARM && arm_arch4" + "ldr%(sb%)\\t%0, %1" + [(set_attr "type" "load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "256") + (set_attr "neg_pool_range" "244")] +) + +(define_expand "extendqisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "")))] + "TARGET_EITHER" +{ + if (!arm_arch4 && MEM_P (operands[1])) + operands[1] = copy_to_mode_reg (QImode, operands[1]); + + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24))); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (24))); + DONE; + } +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "register_operand" "")))] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0); +}) + +(define_insn "*arm_extendqisi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))] + "TARGET_ARM && arm_arch4 && !arm_arch6" + "@ + # + ldr%(sb%)\\t%0, %1" + [(set_attr "length" "8,4") + (set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +(define_insn "*arm_extendqisi_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI + (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))] + "TARGET_ARM && arm_arch6" + "@ + sxtb%?\\t%0, %1 + ldr%(sb%)\\t%0, %1" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +(define_insn "*arm_extendqisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (sign_extend:SI (match_operand:QI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "sxtab%?\\t%0, %2, %1" + [(set_attr "type" "alu_shift") + (set_attr "insn" "xtab") + (set_attr "predicable" "yes")] +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "memory_operand" "")))] + "TARGET_THUMB1 && reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) (sign_extend:SI (match_dup 3)))] +{ + rtx addr = XEXP (operands[1], 0); + + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1))) + /* No split necessary. */ + FAIL; + + if (GET_CODE (addr) == PLUS + && !REG_P (XEXP (addr, 0)) && !REG_P (XEXP (addr, 1))) + FAIL; + + if (reg_overlap_mentioned_p (operands[0], addr)) + { + rtx t = gen_lowpart (QImode, operands[0]); + emit_move_insn (t, operands[1]); + emit_insn (gen_thumb1_extendqisi2 (operands[0], t)); + DONE; + } + + if (REG_P (addr)) + { + addr = gen_rtx_PLUS (Pmode, addr, operands[0]); + operands[2] = const0_rtx; + } + else if (GET_CODE (addr) != PLUS) + FAIL; + else if (REG_P (XEXP (addr, 0))) + { + operands[2] = XEXP (addr, 1); + addr = gen_rtx_PLUS (Pmode, XEXP (addr, 0), operands[0]); + } + else + { + operands[2] = XEXP (addr, 0); + addr = gen_rtx_PLUS (Pmode, XEXP (addr, 1), operands[0]); + } + + operands[3] = change_address (operands[1], QImode, addr); +}) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_dup 0) (match_operand 1 "const_int_operand"))) + (set (match_operand:SI 2 "register_operand" "") (const_int 0)) + (set (match_operand:SI 3 "register_operand" "") + (sign_extend:SI (match_operand:QI 4 "memory_operand" "")))] + "TARGET_THUMB1 + && GET_CODE (XEXP (operands[4], 0)) == PLUS + && rtx_equal_p (operands[0], XEXP (XEXP (operands[4], 0), 0)) + && rtx_equal_p (operands[2], XEXP (XEXP (operands[4], 0), 1)) + && (peep2_reg_dead_p (3, operands[0]) + || rtx_equal_p (operands[0], operands[3])) + && (peep2_reg_dead_p (3, operands[2]) + || rtx_equal_p (operands[2], operands[3]))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (sign_extend:SI (match_dup 4)))] +{ + rtx addr = gen_rtx_PLUS (Pmode, operands[0], operands[2]); + operands[4] = change_address (operands[4], QImode, addr); +}) + +(define_insn "thumb1_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l,l") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,V,m")))] + "TARGET_THUMB1" +{ + rtx addr; + + if (which_alternative == 0 && arm_arch6) + return "sxtb\\t%0, %1"; + if (which_alternative == 0) + return "#"; + + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1))) + return "ldrsb\\t%0, %1"; + + return "#"; +} + [(set_attr_alternative "length" + [(if_then_else (eq_attr "is_arch6" "yes") + (const_int 2) (const_int 4)) + (const_int 2) + (if_then_else (eq_attr "is_arch6" "yes") + (const_int 4) (const_int 6))]) + (set_attr "type" "alu_shift,load_byte,load_byte")] +) + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "" +) + +/* HFmode -> DFmode conversions have to go through SFmode. */ +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "general_operand" "") + (float_extend:DF (match_operand:HF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (DFmode, op1, 0); + emit_insn (gen_movdf (operands[0], op1)); + DONE; + }" +) + +;; Move insns (including loads and stores) + +;; XXX Just some ideas about movti. +;; I don't think these are a good idea on the arm, there just aren't enough +;; registers +;;(define_expand "loadti" +;; [(set (match_operand:TI 0 "s_register_operand" "") +;; (mem:TI (match_operand:SI 1 "address_operand" "")))] +;; "" "") + +;;(define_expand "storeti" +;; [(set (mem:TI (match_operand:TI 0 "address_operand" "")) +;; (match_operand:TI 1 "s_register_operand" ""))] +;; "" "") + +;;(define_expand "movti" +;; [(set (match_operand:TI 0 "general_operand" "") +;; (match_operand:TI 1 "general_operand" ""))] +;; "" +;; " +;;{ +;; rtx insn; +;; +;; if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) +;; operands[1] = copy_to_reg (operands[1]); +;; if (GET_CODE (operands[0]) == MEM) +;; insn = gen_storeti (XEXP (operands[0], 0), operands[1]); +;; else if (GET_CODE (operands[1]) == MEM) +;; insn = gen_loadti (operands[0], XEXP (operands[1], 0)); +;; else +;; FAIL; +;; +;; emit_insn (insn); +;; DONE; +;;}") + +;; Recognize garbage generated above. + +;;(define_insn "" +;; [(set (match_operand:TI 0 "general_operand" "=r,r,r,<,>,m") +;; (match_operand:TI 1 "general_operand" "<,>,m,r,r,r"))] +;; "" +;; "* +;; { +;; register mem = (which_alternative < 3); +;; register const char *template; +;; +;; operands[mem] = XEXP (operands[mem], 0); +;; switch (which_alternative) +;; { +;; case 0: template = \"ldmdb\\t%1!, %M0\"; break; +;; case 1: template = \"ldmia\\t%1!, %M0\"; break; +;; case 2: template = \"ldmia\\t%1, %M0\"; break; +;; case 3: template = \"stmdb\\t%0!, %M1\"; break; +;; case 4: template = \"stmia\\t%0!, %M1\"; break; +;; case 5: template = \"stmia\\t%0, %M1\"; break; +;; } +;; output_asm_insn (template, operands); +;; return \"\"; +;; }") + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (DImode, operands[1]); + } + " +) + +(define_insn "*arm_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m") + (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))] + "TARGET_32BIT + && !(TARGET_HARD_FLOAT && (TARGET_MAVERICK || TARGET_VFP)) + && !TARGET_IWMMXT + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + return \"#\"; + default: + return output_move_double (operands); + } + " + [(set_attr "length" "8,12,16,8,8") + (set_attr "type" "*,*,*,load2,store2") + (set_attr "arm_pool_range" "*,*,*,1020,*") + (set_attr "arm_neg_pool_range" "*,*,*,1008,*") + (set_attr "thumb2_pool_range" "*,*,*,4096,*") + (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")] +) + +(define_split + [(set (match_operand:ANY64 0 "arm_general_register_operand" "") + (match_operand:ANY64 1 "const_double_operand" ""))] + "TARGET_32BIT + && reload_completed + && (arm_const_double_inline_cost (operands[1]) + <= ((optimize_size || arm_ld_sched) ? 3 : 4))" + [(const_int 0)] + " + arm_split_constant (SET, SImode, curr_insn, + INTVAL (gen_lowpart (SImode, operands[1])), + gen_lowpart (SImode, operands[0]), NULL_RTX, 0); + arm_split_constant (SET, SImode, curr_insn, + INTVAL (gen_highpart_mode (SImode, + GET_MODE (operands[0]), + operands[1])), + gen_highpart (SImode, operands[0]), NULL_RTX, 0); + DONE; + " +) + +; If optimizing for size, or if we have load delay slots, then +; we want to split the constant into two separate operations. +; In both cases this may split a trivial part into a single data op +; leaving a single complex constant to load. We can also get longer +; offsets in a LDR which means we get better chances of sharing the pool +; entries. Finally, we can normally do a better job of scheduling +; LDR instructions than we can with LDM. +; This pattern will only match if the one above did not. +(define_split + [(set (match_operand:ANY64 0 "arm_general_register_operand" "") + (match_operand:ANY64 1 "const_double_operand" ""))] + "TARGET_ARM && reload_completed + && arm_const_double_by_parts (operands[1])" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + " + operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart_mode (SImode, GET_MODE (operands[0]), + operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + " +) + +(define_split + [(set (match_operand:ANY64 0 "arm_general_register_operand" "") + (match_operand:ANY64 1 "arm_general_register_operand" ""))] + "TARGET_EITHER && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + " + operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* Handle a partial overlap. */ + if (rtx_equal_p (operands[0], operands[3])) + { + rtx tmp0 = operands[0]; + rtx tmp1 = operands[1]; + + operands[0] = operands[2]; + operands[1] = operands[3]; + operands[2] = tmp0; + operands[3] = tmp1; + } + " +) + +;; We can't actually do base+index doubleword loads if the index and +;; destination overlap. Split here so that we at least have chance to +;; schedule. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (mem:DI (plus:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "s_register_operand" ""))))] + "TARGET_LDRD + && reg_overlap_mentioned_p (operands[0], operands[1]) + && reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 4) + (plus:SI (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (mem:DI (match_dup 4)))] + " + operands[4] = gen_rtx_REG (SImode, REGNO(operands[0])); + " +) + +;;; ??? This should have alternatives for constants. +;;; ??? This was originally identical to the movdf_insn pattern. +;;; ??? The 'i' constraint looks funny, but it should always be replaced by +;;; thumb_reorg with a memory reference. +(define_insn "*thumb1_movdi_insn" + [(set (match_operand:DI 0 "nonimmediate_operand" "=l,l,l,l,>,l, m,*r") + (match_operand:DI 1 "general_operand" "l, I,J,>,l,mi,l,*r"))] + "TARGET_THUMB1 + && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + { + switch (which_alternative) + { + default: + case 0: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"add\\t%0, %1, #0\;add\\t%H0, %H1, #0\"; + return \"add\\t%H0, %H1, #0\;add\\t%0, %1, #0\"; + case 1: + return \"mov\\t%Q0, %1\;mov\\t%R0, #0\"; + case 2: + operands[1] = GEN_INT (- INTVAL (operands[1])); + return \"mov\\t%Q0, %1\;neg\\t%Q0, %Q0\;asr\\t%R0, %Q0, #31\"; + case 3: + return \"ldmia\\t%1, {%0, %H0}\"; + case 4: + return \"stmia\\t%0, {%1, %H1}\"; + case 5: + return thumb_load_double_from_address (operands); + case 6: + operands[2] = gen_rtx_MEM (SImode, + plus_constant (XEXP (operands[0], 0), 4)); + output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands); + return \"\"; + case 7: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"mov\\t%0, %1\;mov\\t%H0, %H1\"; + return \"mov\\t%H0, %H1\;mov\\t%0, %1\"; + } + }" + [(set_attr "length" "4,4,6,2,2,6,4,4") + (set_attr "type" "*,*,*,load2,store2,load2,store2,*") + (set_attr "insn" "*,mov,*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,*,*,1020,*,*")] +) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "TARGET_EITHER" + " + { + rtx base, offset, tmp; + + if (TARGET_32BIT) + { + /* Everything except mem = const or mem = mem can be done easily. */ + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (SImode, operands[1]); + if (arm_general_register_operand (operands[0], SImode) + && GET_CODE (operands[1]) == CONST_INT + && !(const_ok_for_arm (INTVAL (operands[1])) + || const_ok_for_arm (~INTVAL (operands[1])))) + { + arm_split_constant (SET, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], NULL_RTX, + optimize && can_create_pseudo_p ()); + DONE; + } + + if (TARGET_USE_MOVT && !target_word_relocations + && GET_CODE (operands[1]) == SYMBOL_REF + && !flag_pic && !arm_tls_referenced_p (operands[1])) + { + arm_emit_movpair (operands[0], operands[1]); + DONE; + } + } + else /* TARGET_THUMB1... */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (SImode, operands[1]); + } + } + + if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + { + split_const (operands[1], &base, &offset); + if (GET_CODE (base) == SYMBOL_REF + && !offset_within_block_p (base, INTVAL (offset))) + { + tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + emit_move_insn (tmp, base); + emit_insn (gen_addsi3 (operands[0], tmp, offset)); + DONE; + } + } + + /* Recognize the case where operand[1] is a reference to thread-local + data and load its address to a register. */ + if (arm_tls_referenced_p (operands[1])) + { + rtx tmp = operands[1]; + rtx addend = NULL; + + if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) + { + addend = XEXP (XEXP (tmp, 0), 1); + tmp = XEXP (XEXP (tmp, 0), 0); + } + + gcc_assert (GET_CODE (tmp) == SYMBOL_REF); + gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0); + + tmp = legitimize_tls_address (tmp, + !can_create_pseudo_p () ? operands[0] : 0); + if (addend) + { + tmp = gen_rtx_PLUS (SImode, tmp, addend); + tmp = force_operand (tmp, operands[0]); + } + operands[1] = tmp; + } + else if (flag_pic + && (CONSTANT_P (operands[1]) + || symbol_mentioned_p (operands[1]) + || label_mentioned_p (operands[1]))) + operands[1] = legitimize_pic_address (operands[1], SImode, + (!can_create_pseudo_p () + ? operands[0] + : 0)); + } + " +) + +;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and +;; LO_SUM adds in the high bits. Fortunately these are opaque operations +;; so this does not matter. +(define_insn "*arm_movt" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "i")))] + "arm_arch_thumb2" + "movt%?\t%0, #:upper16:%c2" + [(set_attr "predicable" "yes") + (set_attr "length" "4")] +) + +(define_insn "*arm_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))] + "TARGET_ARM && ! TARGET_IWMMXT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + movw%?\\t%0, %1 + ldr%?\\t%0, %1 + str%?\\t%1, %0" + [(set_attr "type" "*,*,*,*,load1,store1") + (set_attr "insn" "mov,mov,mvn,mov,*,*") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,*,4096,*") + (set_attr "neg_pool_range" "*,*,*,*,4084,*")] +) + +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_32BIT + && (!(const_ok_for_arm (INTVAL (operands[1])) + || const_ok_for_arm (~INTVAL (operands[1]))))" + [(clobber (const_int 0))] + " + arm_split_constant (SET, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], NULL_RTX, 0); + DONE; + " +) + +(define_insn "*thumb1_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k") + (match_operand:SI 1 "general_operand" "l, I,J,K,>,l,mi,l,*l*h*k"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov %0, %1 + mov %0, %1 + # + # + ldmia\\t%1, {%0} + stmia\\t%0, {%1} + ldr\\t%0, %1 + str\\t%1, %0 + mov\\t%0, %1" + [(set_attr "length" "2,2,4,4,2,2,2,2,2") + (set_attr "type" "*,*,*,*,load1,store1,load1,store1,*") + (set_attr "pool_range" "*,*,*,*,*,*,1020,*,*") + (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_J (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (neg:SI (match_dup 2)))] + " + { + operands[1] = GEN_INT (- INTVAL (operands[1])); + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + }" +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] + " + { + unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; + unsigned HOST_WIDE_INT mask = 0xff; + int i; + + for (i = 0; i < 25; i++) + if ((val & (mask << i)) == val) + break; + + /* Don't split if the shift is zero. */ + if (i == 0) + FAIL; + + operands[1] = GEN_INT (val >> i); + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + operands[3] = GEN_INT (i); + }" +) + +;; When generating pic, we need to load the symbol offset into a register. +;; So that the optimizer does not confuse this with a normal symbol load +;; we use an unspec. The offset will be loaded from a constant pool entry, +;; since that is the only type of relocation we can use. + +;; Wrap calculation of the whole PIC address in a single pattern for the +;; benefit of optimizers, particularly, PRE and HOIST. Calculation of +;; a PIC address involves two loads from memory, so we want to CSE it +;; as often as possible. +;; This pattern will be split into one of the pic_load_addr_* patterns +;; and a move after GCSE optimizations. +;; +;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. +(define_expand "calculate_pic_address" + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" +) + +;; Split calculate_pic_address into pic_load_addr_* and a move. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" + [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) + (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] + "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" +) + +;; operand1 is the memory address to go into +;; pic_load_addr_32bit. +;; operand2 is the PIC label to be emitted +;; from pic_add_dot_plus_eight. +;; We do this to allow hoisting of the entire insn. +(define_insn_and_split "pic_load_addr_unified" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,l") + (unspec:SI [(match_operand:SI 1 "" "mX,mX,mX") + (match_operand:SI 2 "" "")] + UNSPEC_PIC_UNIFIED))] + "flag_pic" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_PIC_SYM)) + (set (match_dup 0) (unspec:SI [(match_dup 0) (match_dup 3) + (match_dup 2)] UNSPEC_PIC_BASE))] + "operands[3] = TARGET_THUMB ? GEN_INT (4) : GEN_INT (8);" + [(set_attr "type" "load1,load1,load1") + (set_attr "pool_range" "4096,4096,1024") + (set_attr "neg_pool_range" "4084,0,0") + (set_attr "arch" "a,t2,t1") + (set_attr "length" "8,6,4")] +) + +;; The rather odd constraints on the following are to force reload to leave +;; the insn alone, and to force the minipool generation pass to then move +;; the GOT symbol to memory. + +(define_insn "pic_load_addr_32bit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))] + "TARGET_32BIT && flag_pic" + "ldr%?\\t%0, %1" + [(set_attr "type" "load1") + (set_attr "pool_range" "4096") + (set (attr "neg_pool_range") + (if_then_else (eq_attr "is_thumb" "no") + (const_int 4084) + (const_int 0)))] +) + +(define_insn "pic_load_addr_thumb1" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))] + "TARGET_THUMB1 && flag_pic" + "ldr\\t%0, %1" + [(set_attr "type" "load1") + (set (attr "pool_range") (const_int 1024))] +) + +(define_insn "pic_add_dot_plus_four" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "0") + (const_int 4) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE))] + "TARGET_THUMB" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); + return \"add\\t%0, %|pc\"; + " + [(set_attr "length" "2")] +) + +(define_insn "pic_add_dot_plus_eight" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (const_int 8) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE))] + "TARGET_ARM" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); + return \"add%?\\t%0, %|pc, %1\"; + " + [(set_attr "predicable" "yes")] +) + +(define_insn "tls_load_dot_plus_eight" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (const_int 8) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE)))] + "TARGET_ARM" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); + return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\"; + " + [(set_attr "predicable" "yes")] +) + +;; PIC references to local variables can generate pic_add_dot_plus_eight +;; followed by a load. These sequences can be crunched down to +;; tls_load_dot_plus_eight by a peephole. + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 3 "register_operand" "") + (const_int 8) + (match_operand 1 "" "")] + UNSPEC_PIC_BASE)) + (set (match_operand:SI 2 "arm_general_register_operand" "") + (mem:SI (match_dup 0)))] + "TARGET_ARM && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (mem:SI (unspec:SI [(match_dup 3) + (const_int 8) + (match_dup 1)] + UNSPEC_PIC_BASE)))] + "" +) + +(define_insn "pic_offset_arm" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "" "X")] + UNSPEC_PIC_OFFSET))))] + "TARGET_VXWORKS_RTP && TARGET_ARM && flag_pic" + "ldr%?\\t%0, [%1,%2]" + [(set_attr "type" "load1")] +) + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "flag_pic" + " +{ + /* r3 is clobbered by set/longjmp, so we can use it as a scratch + register. */ + if (arm_pic_register != INVALID_REGNUM) + arm_load_pic_register (1UL << 3); + DONE; +}") + +;; If copying one reg to another we can set the condition codes according to +;; its value. Such a move is common after a return from subroutine and the +;; result is being tested against zero. + +(define_insn "*movsi_compare0" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "s_register_operand" "0,r") + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_dup 1))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, #0 + sub%.\\t%0, %1, #0" + [(set_attr "conds" "set")] +) + +;; Subroutine to store a half word from a register into memory. +;; Operand 0 is the source register (HImode) +;; Operand 1 is the destination address in a register (SImode) + +;; In both this routine and the next, we must be careful not to spill +;; a memory address of reg+large_const into a separate PLUS insn, since this +;; can generate unrecognizable rtl. + +(define_expand "storehi" + [;; store the low byte + (set (match_operand 1 "" "") (match_dup 3)) + ;; extract the high byte + (set (match_dup 2) + (ashiftrt:SI (match_operand 0 "" "") (const_int 8))) + ;; store the high byte + (set (match_dup 4) (match_dup 5))] + "TARGET_ARM" + " + { + rtx op1 = operands[1]; + rtx addr = XEXP (op1, 0); + enum rtx_code code = GET_CODE (addr); + + if ((code == PLUS && GET_CODE (XEXP (addr, 1)) != CONST_INT) + || code == MINUS) + op1 = replace_equiv_address (operands[1], force_reg (SImode, addr)); + + operands[4] = adjust_address (op1, QImode, 1); + operands[1] = adjust_address (operands[1], QImode, 0); + operands[3] = gen_lowpart (QImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_reg_rtx (SImode); + operands[5] = gen_lowpart (QImode, operands[2]); + }" +) + +(define_expand "storehi_bigend" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 2) + (ashiftrt:SI (match_operand 0 "" "") (const_int 8))) + (set (match_operand 1 "" "") (match_dup 5))] + "TARGET_ARM" + " + { + rtx op1 = operands[1]; + rtx addr = XEXP (op1, 0); + enum rtx_code code = GET_CODE (addr); + + if ((code == PLUS && GET_CODE (XEXP (addr, 1)) != CONST_INT) + || code == MINUS) + op1 = replace_equiv_address (op1, force_reg (SImode, addr)); + + operands[4] = adjust_address (op1, QImode, 1); + operands[1] = adjust_address (operands[1], QImode, 0); + operands[3] = gen_lowpart (QImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_reg_rtx (SImode); + operands[5] = gen_lowpart (QImode, operands[2]); + }" +) + +;; Subroutine to store a half word integer constant into memory. +(define_expand "storeinthi" + [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (set (match_dup 3) (match_dup 2))] + "TARGET_ARM" + " + { + HOST_WIDE_INT value = INTVAL (operands[1]); + rtx addr = XEXP (operands[0], 0); + rtx op0 = operands[0]; + enum rtx_code code = GET_CODE (addr); + + if ((code == PLUS && GET_CODE (XEXP (addr, 1)) != CONST_INT) + || code == MINUS) + op0 = replace_equiv_address (op0, force_reg (SImode, addr)); + + operands[1] = gen_reg_rtx (SImode); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_movsi (operands[1], GEN_INT ((value >> 8) & 255))); + if ((value & 255) == ((value >> 8) & 255)) + operands[2] = operands[1]; + else + { + operands[2] = gen_reg_rtx (SImode); + emit_insn (gen_movsi (operands[2], GEN_INT (value & 255))); + } + } + else + { + emit_insn (gen_movsi (operands[1], GEN_INT (value & 255))); + if ((value & 255) == ((value >> 8) & 255)) + operands[2] = operands[1]; + else + { + operands[2] = gen_reg_rtx (SImode); + emit_insn (gen_movsi (operands[2], GEN_INT ((value >> 8) & 255))); + } + } + + operands[3] = adjust_address (op0, QImode, 1); + operands[0] = adjust_address (operands[0], QImode, 0); + operands[2] = gen_lowpart (QImode, operands[2]); + operands[1] = gen_lowpart (QImode, operands[1]); + }" +) + +(define_expand "storehi_single_op" + [(set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "TARGET_32BIT && arm_arch4" + " + if (!s_register_operand (operands[1], HImode)) + operands[1] = copy_to_mode_reg (HImode, operands[1]); + " +) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_ARM) + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) == MEM) + { + if (arm_arch4) + { + emit_insn (gen_storehi_single_op (operands[0], operands[1])); + DONE; + } + if (GET_CODE (operands[1]) == CONST_INT) + emit_insn (gen_storeinthi (operands[0], operands[1])); + else + { + if (GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (HImode, operands[1]); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_storehi_bigend (operands[1], operands[0])); + else + emit_insn (gen_storehi (operands[1], operands[0])); + } + DONE; + } + /* Sign extend a constant, and keep it in an SImode reg. */ + else if (GET_CODE (operands[1]) == CONST_INT) + { + rtx reg = gen_reg_rtx (SImode); + HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff; + + /* If the constant is already valid, leave it alone. */ + if (!const_ok_for_arm (val)) + { + /* If setting all the top bits will make the constant + loadable in a single instruction, then set them. + Otherwise, sign extend the number. */ + + if (const_ok_for_arm (~(val | ~0xffff))) + val |= ~0xffff; + else if (val & 0x8000) + val |= ~0xffff; + } + + emit_insn (gen_movsi (reg, GEN_INT (val))); + operands[1] = gen_lowpart (HImode, reg); + } + else if (arm_arch4 && optimize && can_create_pseudo_p () + && GET_CODE (operands[1]) == MEM) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendhisi2 (reg, operands[1])); + operands[1] = gen_lowpart (HImode, reg); + } + else if (!arm_arch4) + { + if (GET_CODE (operands[1]) == MEM) + { + rtx base; + rtx offset = const0_rtx; + rtx reg = gen_reg_rtx (SImode); + + if ((GET_CODE (base = XEXP (operands[1], 0)) == REG + || (GET_CODE (base) == PLUS + && (GET_CODE (offset = XEXP (base, 1)) + == CONST_INT) + && ((INTVAL(offset) & 1) != 1) + && GET_CODE (base = XEXP (base, 0)) == REG)) + && REGNO_POINTER_ALIGN (REGNO (base)) >= 32) + { + rtx new_rtx; + + new_rtx = widen_memory_access (operands[1], SImode, + ((INTVAL (offset) & ~3) + - INTVAL (offset))); + emit_insn (gen_movsi (reg, new_rtx)); + if (((INTVAL (offset) & 2) != 0) + ^ (BYTES_BIG_ENDIAN ? 1 : 0)) + { + rtx reg2 = gen_reg_rtx (SImode); + + emit_insn (gen_lshrsi3 (reg2, reg, GEN_INT (16))); + reg = reg2; + } + } + else + emit_insn (gen_movhi_bytes (reg, operands[1])); + + operands[1] = gen_lowpart (HImode, reg); + } + } + } + /* Handle loading a large integer during reload. */ + else if (GET_CODE (operands[1]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[1])) + && !const_ok_for_arm (~INTVAL (operands[1]))) + { + /* Writing a constant to memory needs a scratch, which should + be handled with SECONDARY_RELOADs. */ + gcc_assert (GET_CODE (operands[0]) == REG); + + operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + } + else if (TARGET_THUMB2) + { + /* Thumb-2 can do everything except mem=mem and mem=const easily. */ + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (HImode, operands[1]); + /* Zero extend a constant, and keep it in an SImode reg. */ + else if (GET_CODE (operands[1]) == CONST_INT) + { + rtx reg = gen_reg_rtx (SImode); + HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff; + + emit_insn (gen_movsi (reg, GEN_INT (val))); + operands[1] = gen_lowpart (HImode, reg); + } + } + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[1]) == CONST_INT) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (reg, operands[1])); + operands[1] = gen_lowpart (HImode, reg); + } + + /* ??? We shouldn't really get invalid addresses here, but this can + happen if we are passed a SP (never OK for HImode/QImode) or + virtual register (also rejected as illegitimate for HImode/QImode) + relative address. */ + /* ??? This should perhaps be fixed elsewhere, for instance, in + fixup_stack_1, by checking for other kinds of invalid addresses, + e.g. a bare reference to a virtual register. This may confuse the + alpha though, which must handle this case differently. */ + if (GET_CODE (operands[0]) == MEM + && !memory_address_p (GET_MODE (operands[0]), + XEXP (operands[0], 0))) + operands[0] + = replace_equiv_address (operands[0], + copy_to_reg (XEXP (operands[0], 0))); + + if (GET_CODE (operands[1]) == MEM + && !memory_address_p (GET_MODE (operands[1]), + XEXP (operands[1], 0))) + operands[1] + = replace_equiv_address (operands[1], + copy_to_reg (XEXP (operands[1], 0))); + + if (GET_CODE (operands[1]) == MEM && optimize > 0) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendhisi2 (reg, operands[1])); + operands[1] = gen_lowpart (HImode, reg); + } + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (HImode, operands[1]); + } + else if (GET_CODE (operands[1]) == CONST_INT + && !satisfies_constraint_I (operands[1])) + { + /* Handle loading a large integer during reload. */ + + /* Writing a constant to memory needs a scratch, which should + be handled with SECONDARY_RELOADs. */ + gcc_assert (GET_CODE (operands[0]) == REG); + + operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + } + " +) + +(define_insn "*thumb1_movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l") + (match_operand:HI 1 "general_operand" "l,m,l,*h,*r,I"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" + "* + switch (which_alternative) + { + case 0: return \"add %0, %1, #0\"; + case 2: return \"strh %1, %0\"; + case 3: return \"mov %0, %1\"; + case 4: return \"mov %0, %1\"; + case 5: return \"mov %0, %1\"; + default: gcc_unreachable (); + case 1: + /* The stack pointer can end up being taken as an index register. + Catch this case here and deal with it. */ + if (GET_CODE (XEXP (operands[1], 0)) == PLUS + && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG + && REGNO (XEXP (XEXP (operands[1], 0), 0)) == SP_REGNUM) + { + rtx ops[2]; + ops[0] = operands[0]; + ops[1] = XEXP (XEXP (operands[1], 0), 0); + + output_asm_insn (\"mov %0, %1\", ops); + + XEXP (XEXP (operands[1], 0), 0) = operands[0]; + + } + return \"ldrh %0, %1\"; + }" + [(set_attr "length" "2,4,2,2,2,2") + (set_attr "type" "*,load1,store1,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) + + +(define_expand "movhi_bytes" + [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" ""))) + (set (match_dup 3) + (zero_extend:SI (match_dup 6))) + (set (match_operand:SI 0 "" "") + (ior:SI (ashift:SI (match_dup 4) (const_int 8)) (match_dup 5)))] + "TARGET_ARM" + " + { + rtx mem1, mem2; + rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); + + mem1 = change_address (operands[1], QImode, addr); + mem2 = change_address (operands[1], QImode, plus_constant (addr, 1)); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = mem1; + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (SImode); + operands[6] = mem2; + + if (BYTES_BIG_ENDIAN) + { + operands[4] = operands[2]; + operands[5] = operands[3]; + } + else + { + operands[4] = operands[3]; + operands[5] = operands[2]; + } + }" +) + +(define_expand "movhi_bigend" + [(set (match_dup 2) + (rotate:SI (subreg:SI (match_operand:HI 1 "memory_operand" "") 0) + (const_int 16))) + (set (match_dup 3) + (ashiftrt:SI (match_dup 2) (const_int 16))) + (set (match_operand:HI 0 "s_register_operand" "") + (match_dup 4))] + "TARGET_ARM" + " + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (SImode); + operands[4] = gen_lowpart (HImode, operands[3]); + " +) + +;; Pattern to recognize insn generated default case above +(define_insn "*movhi_insn_arch4" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") + (match_operand:HI 1 "general_operand" "rI,K,r,mi"))] + "TARGET_ARM + && arm_arch4 + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" + "@ + mov%?\\t%0, %1\\t%@ movhi + mvn%?\\t%0, #%B1\\t%@ movhi + str%(h%)\\t%1, %0\\t%@ movhi + ldr%(h%)\\t%0, %1\\t%@ movhi" + [(set_attr "type" "*,*,store1,load1") + (set_attr "predicable" "yes") + (set_attr "insn" "mov,mvn,*,*") + (set_attr "pool_range" "*,*,*,256") + (set_attr "neg_pool_range" "*,*,*,244")] +) + +(define_insn "*movhi_bytes" + [(set (match_operand:HI 0 "s_register_operand" "=r,r") + (match_operand:HI 1 "arm_rhs_operand" "rI,K"))] + "TARGET_ARM" + "@ + mov%?\\t%0, %1\\t%@ movhi + mvn%?\\t%0, #%B1\\t%@ movhi" + [(set_attr "predicable" "yes") + (set_attr "insn" "mov,mvn")] +) + +(define_expand "thumb_movhi_clobber" + [(set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "register_operand" "")) + (clobber (match_operand:DI 2 "register_operand" ""))] + "TARGET_THUMB1" + " + if (strict_memory_address_p (HImode, XEXP (operands[0], 0)) + && REGNO (operands[1]) <= LAST_LO_REGNUM) + { + emit_insn (gen_movhi (operands[0], operands[1])); + DONE; + } + /* XXX Fixme, need to handle other cases here as well. */ + gcc_unreachable (); + " +) + +;; We use a DImode scratch because we may occasionally need an additional +;; temporary if the address isn't offsettable -- push_reload doesn't seem +;; to take any notice of the "o" constraints on reload_memory_operand operand. +(define_expand "reload_outhi" + [(parallel [(match_operand:HI 0 "arm_reload_memory_operand" "=o") + (match_operand:HI 1 "s_register_operand" "r") + (match_operand:DI 2 "s_register_operand" "=&l")])] + "TARGET_EITHER" + "if (TARGET_ARM) + arm_reload_out_hi (operands); + else + thumb_reload_out_hi (operands); + DONE; + " +) + +(define_expand "reload_inhi" + [(parallel [(match_operand:HI 0 "s_register_operand" "=r") + (match_operand:HI 1 "arm_reload_memory_operand" "o") + (match_operand:DI 2 "s_register_operand" "=&r")])] + "TARGET_EITHER" + " + if (TARGET_ARM) + arm_reload_in_hi (operands); + else + thumb_reload_out_hi (operands); + DONE; +") + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "TARGET_EITHER" + " + /* Everything except mem = const or mem = mem can be done easily */ + + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[1]) == CONST_INT) + { + rtx reg = gen_reg_rtx (SImode); + + /* For thumb we want an unsigned immediate, then we are more likely + to be able to use a movs insn. */ + if (TARGET_THUMB) + operands[1] = GEN_INT (INTVAL (operands[1]) & 255); + + emit_insn (gen_movsi (reg, operands[1])); + operands[1] = gen_lowpart (QImode, reg); + } + + if (TARGET_THUMB) + { + /* ??? We shouldn't really get invalid addresses here, but this can + happen if we are passed a SP (never OK for HImode/QImode) or + virtual register (also rejected as illegitimate for HImode/QImode) + relative address. */ + /* ??? This should perhaps be fixed elsewhere, for instance, in + fixup_stack_1, by checking for other kinds of invalid addresses, + e.g. a bare reference to a virtual register. This may confuse the + alpha though, which must handle this case differently. */ + if (GET_CODE (operands[0]) == MEM + && !memory_address_p (GET_MODE (operands[0]), + XEXP (operands[0], 0))) + operands[0] + = replace_equiv_address (operands[0], + copy_to_reg (XEXP (operands[0], 0))); + if (GET_CODE (operands[1]) == MEM + && !memory_address_p (GET_MODE (operands[1]), + XEXP (operands[1], 0))) + operands[1] + = replace_equiv_address (operands[1], + copy_to_reg (XEXP (operands[1], 0))); + } + + if (GET_CODE (operands[1]) == MEM && optimize > 0) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (reg, operands[1])); + operands[1] = gen_lowpart (QImode, reg); + } + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (QImode, operands[1]); + } + else if (TARGET_THUMB + && GET_CODE (operands[1]) == CONST_INT + && !satisfies_constraint_I (operands[1])) + { + /* Handle loading a large integer during reload. */ + + /* Writing a constant to memory needs a scratch, which should + be handled with SECONDARY_RELOADs. */ + gcc_assert (GET_CODE (operands[0]) == REG); + + operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + " +) + + +(define_insn "*arm_movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:QI 1 "general_operand" "rI,K,m,r"))] + "TARGET_32BIT + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" + "@ + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + ldr%(b%)\\t%0, %1 + str%(b%)\\t%1, %0" + [(set_attr "type" "*,*,load1,store1") + (set_attr "insn" "mov,mvn,*,*") + (set_attr "predicable" "yes")] +) + +(define_insn "*thumb1_movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l") + (match_operand:QI 1 "general_operand" "l, m,l,*h,*r,I"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" + "@ + add\\t%0, %1, #0 + ldrb\\t%0, %1 + strb\\t%1, %0 + mov\\t%0, %1 + mov\\t%0, %1 + mov\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,*,*,*") + (set_attr "insn" "*,*,*,mov,mov,mov") + (set_attr "pool_range" "*,32,*,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) + +;; HFmode moves +(define_expand "movhf" + [(set (match_operand:HF 0 "general_operand" "") + (match_operand:HF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (HFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (HFmode, operands[1]); + } + } + " +) + +(define_insn "*arm32_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") + (match_operand:HF 1 "general_operand" " m,r,r,F"))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"str%(h%)\\t%1, %0\\t%@ __fp16\"; + case 2: /* ARM register from ARM register */ + return \"mov%?\\t%0, %1\\t%@ __fp16\"; + case 3: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw%?\\t%0, %1\", ops); + else + output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,*,*") + (set_attr "insn" "*,*,mov,mov") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes")] +) + +(define_insn "*thumb1_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") + (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 1: + { + rtx addr; + gcc_assert (GET_CODE(operands[1]) == MEM); + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == LABEL_REF + || (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)) + { + /* Constant pool entry. */ + return \"ldr\\t%0, %1\"; + } + return \"ldrh\\t%0, %1\"; + } + case 2: return \"strh\\t%1, %0\"; + default: return \"mov\\t%0, %1\"; + } + " + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,*,*") + (set_attr "insn" "mov,*,*,mov,mov") + (set_attr "pool_range" "*,1020,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond")]) + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (SFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (SFmode, operands[1]); + } + } + " +) + +;; Transform a floating-point move of a constant into a core register into +;; an SImode operation. +(define_split + [(set (match_operand:SF 0 "arm_general_register_operand" "") + (match_operand:SF 1 "immediate_operand" ""))] + "TARGET_EITHER + && reload_completed + && GET_CODE (operands[1]) == CONST_DOUBLE" + [(set (match_dup 2) (match_dup 3))] + " + operands[2] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_lowpart (SImode, operands[1]); + if (operands[2] == 0 || operands[3] == 0) + FAIL; + " +) + +(define_insn "*arm_movsf_soft_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:SF 1 "general_operand" "r,mE,r"))] + "TARGET_32BIT + && TARGET_SOFT_FLOAT + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], SFmode))" + "@ + mov%?\\t%0, %1 + ldr%?\\t%0, %1\\t%@ float + str%?\\t%1, %0\\t%@ float" + [(set_attr "predicable" "yes") + (set_attr "type" "*,load1,store1") + (set_attr "insn" "mov,*,*") + (set_attr "pool_range" "*,4096,*") + (set_attr "arm_neg_pool_range" "*,4084,*") + (set_attr "thumb2_neg_pool_range" "*,0,*")] +) + +;;; ??? This should have alternatives for constants. +(define_insn "*thumb1_movsf_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h") + (match_operand:SF 1 "general_operand" "l, >,l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode))" + "@ + add\\t%0, %1, #0 + ldmia\\t%1, {%0} + stmia\\t%0, {%1} + ldr\\t%0, %1 + str\\t%1, %0 + mov\\t%0, %1 + mov\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,load1,store1,*,*") + (set_attr "pool_range" "*,*,*,1020,*,*,*") + (set_attr "insn" "*,*,*,*,*,mov,mov") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")] +) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (DFmode, operands[1]); + } + else /* TARGET_THUMB */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (DFmode, operands[1]); + } + } + " +) + +;; Reloading a df mode value stored in integer regs to memory can require a +;; scratch reg. +(define_expand "reload_outdf" + [(match_operand:DF 0 "arm_reload_memory_operand" "=o") + (match_operand:DF 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "=&r")] + "TARGET_32BIT" + " + { + enum rtx_code code = GET_CODE (XEXP (operands[0], 0)); + + if (code == REG) + operands[2] = XEXP (operands[0], 0); + else if (code == POST_INC || code == PRE_DEC) + { + operands[0] = gen_rtx_SUBREG (DImode, operands[0], 0); + operands[1] = gen_rtx_SUBREG (DImode, operands[1], 0); + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + else if (code == PRE_INC) + { + rtx reg = XEXP (XEXP (operands[0], 0), 0); + + emit_insn (gen_addsi3 (reg, reg, GEN_INT (8))); + operands[2] = reg; + } + else if (code == POST_DEC) + operands[2] = XEXP (XEXP (operands[0], 0), 0); + else + emit_insn (gen_addsi3 (operands[2], XEXP (XEXP (operands[0], 0), 0), + XEXP (XEXP (operands[0], 0), 1))); + + emit_insn (gen_rtx_SET (VOIDmode, + replace_equiv_address (operands[0], operands[2]), + operands[1])); + + if (code == POST_DEC) + emit_insn (gen_addsi3 (operands[2], operands[2], GEN_INT (-8))); + + DONE; + }" +) + +(define_insn "*movdf_soft_insn" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m") + (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))] + "TARGET_32BIT && TARGET_SOFT_FLOAT + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + return \"#\"; + default: + return output_move_double (operands); + } + " + [(set_attr "length" "8,12,16,8,8") + (set_attr "type" "*,*,*,load2,store2") + (set_attr "pool_range" "*,*,*,1020,*") + (set_attr "arm_neg_pool_range" "*,*,*,1008,*") + (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")] +) + +;;; ??? This should have alternatives for constants. +;;; ??? This was originally identical to the movdi_insn pattern. +;;; ??? The 'F' constraint looks funny, but it should always be replaced by +;;; thumb_reorg with a memory reference. +(define_insn "*thumb_movdf_insn" + [(set (match_operand:DF 0 "nonimmediate_operand" "=l,l,>,l, m,*r") + (match_operand:DF 1 "general_operand" "l, >,l,mF,l,*r"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + switch (which_alternative) + { + default: + case 0: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"add\\t%0, %1, #0\;add\\t%H0, %H1, #0\"; + return \"add\\t%H0, %H1, #0\;add\\t%0, %1, #0\"; + case 1: + return \"ldmia\\t%1, {%0, %H0}\"; + case 2: + return \"stmia\\t%0, {%1, %H1}\"; + case 3: + return thumb_load_double_from_address (operands); + case 4: + operands[2] = gen_rtx_MEM (SImode, + plus_constant (XEXP (operands[0], 0), 4)); + output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands); + return \"\"; + case 5: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"mov\\t%0, %1\;mov\\t%H0, %H1\"; + return \"mov\\t%H0, %H1\;mov\\t%0, %1\"; + } + " + [(set_attr "length" "4,2,2,6,4,4") + (set_attr "type" "*,load2,store2,load2,store2,*") + (set_attr "insn" "*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,1020,*,*")] +) + +(define_expand "movxf" + [(set (match_operand:XF 0 "general_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + " + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (XFmode, operands[1]); + " +) + + + +;; load- and store-multiple insns +;; The arm can load/store any set of registers, provided that they are in +;; ascending order, but these expanders assume a contiguous set. + +(define_expand "load_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "TARGET_32BIT" +{ + HOST_WIDE_INT offset = 0; + + /* Support only fixed point registers. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) > 14 + || INTVAL (operands[2]) < 2 + || GET_CODE (operands[1]) != MEM + || GET_CODE (operands[0]) != REG + || REGNO (operands[0]) > (LAST_ARM_REGNUM - 1) + || REGNO (operands[0]) + INTVAL (operands[2]) > LAST_ARM_REGNUM) + FAIL; + + operands[3] + = arm_gen_load_multiple (arm_regs_in_sequence + REGNO (operands[0]), + INTVAL (operands[2]), + force_reg (SImode, XEXP (operands[1], 0)), + FALSE, operands[1], &offset); +}) + +(define_expand "store_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "TARGET_32BIT" +{ + HOST_WIDE_INT offset = 0; + + /* Support only fixed point registers. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) > 14 + || INTVAL (operands[2]) < 2 + || GET_CODE (operands[1]) != REG + || GET_CODE (operands[0]) != MEM + || REGNO (operands[1]) > (LAST_ARM_REGNUM - 1) + || REGNO (operands[1]) + INTVAL (operands[2]) > LAST_ARM_REGNUM) + FAIL; + + operands[3] + = arm_gen_store_multiple (arm_regs_in_sequence + REGNO (operands[1]), + INTVAL (operands[2]), + force_reg (SImode, XEXP (operands[0], 0)), + FALSE, operands[0], &offset); +}) + + +;; Move a block of memory if it is word aligned and MORE than 2 words long. +;; We could let this apply for blocks of less than this, but it clobbers so +;; many registers that there is then probably a better way. + +(define_expand "movmemqi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (arm_gen_movmemqi (operands)) + DONE; + FAIL; + } + else /* TARGET_THUMB1 */ + { + if ( INTVAL (operands[3]) != 4 + || INTVAL (operands[2]) > 48) + FAIL; + + thumb_expand_movmemqi (operands); + DONE; + } + " +) + +;; Thumb block-move insns + +(define_insn "movmem12b" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) + (mem:SI (plus:SI (match_dup 3) (const_int 4)))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) + (mem:SI (plus:SI (match_dup 3) (const_int 8)))) + (set (match_operand:SI 0 "register_operand" "=l") + (plus:SI (match_dup 2) (const_int 12))) + (set (match_operand:SI 1 "register_operand" "=l") + (plus:SI (match_dup 3) (const_int 12))) + (clobber (match_scratch:SI 4 "=&l")) + (clobber (match_scratch:SI 5 "=&l")) + (clobber (match_scratch:SI 6 "=&l"))] + "TARGET_THUMB1" + "* return thumb_output_move_mem_multiple (3, operands);" + [(set_attr "length" "4") + ; This isn't entirely accurate... It loads as well, but in terms of + ; scheduling the following insn it is better to consider it as a store + (set_attr "type" "store3")] +) + +(define_insn "movmem8b" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) + (mem:SI (plus:SI (match_dup 3) (const_int 4)))) + (set (match_operand:SI 0 "register_operand" "=l") + (plus:SI (match_dup 2) (const_int 8))) + (set (match_operand:SI 1 "register_operand" "=l") + (plus:SI (match_dup 3) (const_int 8))) + (clobber (match_scratch:SI 4 "=&l")) + (clobber (match_scratch:SI 5 "=&l"))] + "TARGET_THUMB1" + "* return thumb_output_move_mem_multiple (2, operands);" + [(set_attr "length" "4") + ; This isn't entirely accurate... It loads as well, but in terms of + ; scheduling the following insn it is better to consider it as a store + (set_attr "type" "store2")] +) + + + +;; Compare & branch insns +;; The range calculations are based as follows: +;; For forward branches, the address calculation returns the address of +;; the next instruction. This is 2 beyond the branch instruction. +;; For backward branches, the address calculation returns the address of +;; the first instruction in this pattern (cmp). This is 2 before the branch +;; instruction for the shortest sequence, and 4 before the branch instruction +;; if we have to jump around an unconditional branch. +;; To the basic branch range the PC offset must be added (this is +4). +;; So for forward branches we have +;; (pos_range - pos_base_offs + pc_offs) = (pos_range - 2 + 4). +;; And for backward branches we have +;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4). +;; +;; For a 'b' pos_range = 2046, neg_range = -2048 giving (-2040->2048). +;; For a 'b' pos_range = 254, neg_range = -256 giving (-250 ->256). + +(define_expand "cbranchsi4" + [(set (pc) (if_then_else + (match_operator 0 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1 || TARGET_32BIT" + " + if (!TARGET_THUMB1) + { + if (!arm_add_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + if (thumb1_cmpneg_operand (operands[2], SImode)) + { + emit_jump_insn (gen_cbranchsi4_scratch (NULL, operands[1], operands[2], + operands[3], operands[0])); + DONE; + } + if (!thumb1_cmp_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + ") + +;; A pattern to recognize a special situation and optimize for it. +;; On the thumb, zero-extension from memory is preferrable to sign-extension +;; due to the available addressing modes. Hence, convert a signed comparison +;; with zero into an unsigned comparison with 127 if possible. +(define_expand "cbranchqi4" + [(set (pc) (if_then_else + (match_operator 0 "lt_ge_comparison_operator" + [(match_operand:QI 1 "memory_operand" "") + (match_operand:QI 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" +{ + rtx xops[4]; + xops[1] = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendqisi2 (xops[1], operands[1])); + xops[2] = GEN_INT (127); + xops[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]) == GE ? LEU : GTU, + VOIDmode, xops[1], xops[2]); + xops[3] = operands[3]; + emit_insn (gen_cbranchsi4 (xops[0], xops[1], xops[2], xops[3])); + DONE; +}) + +(define_expand "cbranchsf4" + [(set (pc) (if_then_else + (match_operator 0 "arm_comparison_operator" + [(match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); DONE;" +) + +(define_expand "cbranchdf4" + [(set (pc) (if_then_else + (match_operator 0 "arm_comparison_operator" + [(match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); DONE;" +) + +(define_expand "cbranchdi4" + [(set (pc) (if_then_else + (match_operator 0 "arm_comparison_operator" + [(match_operand:DI 1 "cmpdi_operand" "") + (match_operand:DI 2 "cmpdi_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT" + "{ + rtx swap = NULL_RTX; + enum rtx_code code = GET_CODE (operands[0]); + + /* We should not have two constants. */ + gcc_assert (GET_MODE (operands[1]) == DImode + || GET_MODE (operands[2]) == DImode); + + /* Flip unimplemented DImode comparisons to a form that + arm_gen_compare_reg can handle. */ + switch (code) + { + case GT: + swap = gen_rtx_LT (VOIDmode, operands[2], operands[1]); break; + case LE: + swap = gen_rtx_GE (VOIDmode, operands[2], operands[1]); break; + case GTU: + swap = gen_rtx_LTU (VOIDmode, operands[2], operands[1]); break; + case LEU: + swap = gen_rtx_GEU (VOIDmode, operands[2], operands[1]); break; + default: + break; + } + if (swap) + emit_jump_insn (gen_cbranch_cc (swap, operands[2], operands[1], + operands[3])); + else + emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }" +) + +(define_insn "cbranchsi4_insn" + [(set (pc) (if_then_else + (match_operator 0 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "l,l*h") + (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" +{ + rtx t = cfun->machine->thumb1_cc_insn; + if (t != NULL_RTX) + { + if (!rtx_equal_p (cfun->machine->thumb1_cc_op0, operands[1]) + || !rtx_equal_p (cfun->machine->thumb1_cc_op1, operands[2])) + t = NULL_RTX; + if (cfun->machine->thumb1_cc_mode == CC_NOOVmode) + { + if (!noov_comparison_operator (operands[0], VOIDmode)) + t = NULL_RTX; + } + else if (cfun->machine->thumb1_cc_mode != CCmode) + t = NULL_RTX; + } + if (t == NULL_RTX) + { + output_asm_insn ("cmp\t%1, %2", operands); + cfun->machine->thumb1_cc_insn = insn; + cfun->machine->thumb1_cc_op0 = operands[1]; + cfun->machine->thumb1_cc_op1 = operands[2]; + cfun->machine->thumb1_cc_mode = CCmode; + } + else + /* Ensure we emit the right type of condition code on the jump. */ + XEXP (operands[0], 0) = gen_rtx_REG (cfun->machine->thumb1_cc_mode, + CC_REGNUM); + + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } +} + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + +(define_insn "cbranchsi4_scratch" + [(set (pc) (if_then_else + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "l,0") + (match_operand:SI 2 "thumb1_cmpneg_operand" "L,J")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 0 "=l,l"))] + "TARGET_THUMB1" + "* + output_asm_insn (\"add\\t%0, %1, #%n2\", operands); + + switch (get_attr_length (insn)) + { + case 4: return \"b%d4\\t%l3\"; + case 6: return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + +;; Two peepholes to generate subtract of 0 instead of a move if the +;; condition codes will be useful. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operand:SI 1 "low_register_operand" "")) + (set (pc) + (if_then_else (match_operator 2 "arm_comparison_operator" + [(match_dup 1) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + [(set (match_dup 0) (minus:SI (match_dup 1) (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 2 [(match_dup 0) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] + "") + +;; Sigh! This variant shouldn't be needed, but combine often fails to +;; merge cases like this because the op1 is a hard register in +;; arm_class_likely_spilled_p. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operand:SI 1 "low_register_operand" "")) + (set (pc) + (if_then_else (match_operator 2 "arm_comparison_operator" + [(match_dup 0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + [(set (match_dup 0) (minus:SI (match_dup 1) (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 2 [(match_dup 0) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] + "") + +(define_insn "*negated_cbranchsi4" + [(set (pc) + (if_then_else + (match_operator 0 "equality_operator" + [(match_operand:SI 1 "s_register_operand" "l") + (neg:SI (match_operand:SI 2 "s_register_operand" "l"))]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + "* + output_asm_insn (\"cmn\\t%1, %2\", operands); + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + +(define_insn "*tbit_cbranch" + [(set (pc) + (if_then_else + (match_operator 0 "equality_operator" + [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "i")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=l"))] + "TARGET_THUMB1" + "* + { + rtx op[3]; + op[0] = operands[4]; + op[1] = operands[1]; + op[2] = GEN_INT (32 - 1 - INTVAL (operands[2])); + + output_asm_insn (\"lsl\\t%0, %1, %2\", op); + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + }" + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + +(define_insn "*tlobits_cbranch" + [(set (pc) + (if_then_else + (match_operator 0 "equality_operator" + [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l") + (match_operand:SI 2 "const_int_operand" "i") + (const_int 0)) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=l"))] + "TARGET_THUMB1" + "* + { + rtx op[3]; + op[0] = operands[4]; + op[1] = operands[1]; + op[2] = GEN_INT (32 - INTVAL (operands[2])); + + output_asm_insn (\"lsl\\t%0, %1, %2\", op); + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + }" + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + +(define_insn "*tstsi3_cbranch" + [(set (pc) + (if_then_else + (match_operator 3 "equality_operator" + [(and:SI (match_operand:SI 0 "s_register_operand" "%l") + (match_operand:SI 1 "s_register_operand" "l")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_THUMB1" + "* + { + output_asm_insn (\"tst\\t%0, %1\", operands); + switch (get_attr_length (insn)) + { + case 4: return \"b%d3\\t%l2\"; + case 6: return \"b%D3\\t.LCB%=\;b\\t%l2\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D3\\t.LCB%=\;bl\\t%l2\\t%@far jump\\n.LCB%=:\"; + } + }" + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 2) (pc)) (const_int -250)) + (le (minus (match_dup 2) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 2) (pc)) (const_int -2040)) + (le (minus (match_dup 2) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + +(define_insn "*cbranchne_decr1" + [(set (pc) + (if_then_else (match_operator 3 "equality_operator" + [(match_operand:SI 2 "s_register_operand" "l,l,1,l") + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (set (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,*?h,*?m,*?m") + (plus:SI (match_dup 2) (const_int -1))) + (clobber (match_scratch:SI 1 "=X,l,&l,&l"))] + "TARGET_THUMB1" + "* + { + rtx cond[2]; + cond[0] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE + ? GEU : LTU), + VOIDmode, operands[2], const1_rtx); + cond[1] = operands[4]; + + if (which_alternative == 0) + output_asm_insn (\"sub\\t%0, %2, #1\", operands); + else if (which_alternative == 1) + { + /* We must provide an alternative for a hi reg because reload + cannot handle output reloads on a jump instruction, but we + can't subtract into that. Fortunately a mov from lo to hi + does not clobber the condition codes. */ + output_asm_insn (\"sub\\t%1, %2, #1\", operands); + output_asm_insn (\"mov\\t%0, %1\", operands); + } + else + { + /* Similarly, but the target is memory. */ + output_asm_insn (\"sub\\t%1, %2, #1\", operands); + output_asm_insn (\"str\\t%1, %0\", operands); + } + + switch (get_attr_length (insn) - (which_alternative ? 2 : 0)) + { + case 4: + output_asm_insn (\"b%d0\\t%l1\", cond); + return \"\"; + case 6: + output_asm_insn (\"b%D0\\t.LCB%=\", cond); + return \"b\\t%l4\\t%@long jump\\n.LCB%=:\"; + default: + output_asm_insn (\"b%D0\\t.LCB%=\", cond); + return \"bl\\t%l4\\t%@far jump\\n.LCB%=:\"; + } + } + " + [(set (attr "far_jump") + (if_then_else + (ior (and (eq (symbol_ref ("which_alternative")) + (const_int 0)) + (eq_attr "length" "8")) + (eq_attr "length" "10")) + (const_string "yes") + (const_string "no"))) + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -250)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2040)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))) + ;; Alternative 1 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -248)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10))) + ;; Alternative 2 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -248)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10))) + ;; Alternative 3 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -248)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10)))])] +) + +(define_insn "*addsi3_cbranch" + [(set (pc) + (if_then_else + (match_operator 4 "arm_comparison_operator" + [(plus:SI + (match_operand:SI 2 "s_register_operand" "%0,l,*l,1,1,1") + (match_operand:SI 3 "reg_or_int_operand" "IJ,lL,*l,lIJ,lIJ,lIJ")) + (const_int 0)]) + (label_ref (match_operand 5 "" "")) + (pc))) + (set + (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,l,*!h,*?h,*?m,*?m") + (plus:SI (match_dup 2) (match_dup 3))) + (clobber (match_scratch:SI 1 "=X,X,l,l,&l,&l"))] + "TARGET_THUMB1 + && (GET_CODE (operands[4]) == EQ + || GET_CODE (operands[4]) == NE + || GET_CODE (operands[4]) == GE + || GET_CODE (operands[4]) == LT)" + "* + { + rtx cond[3]; + + cond[0] = (which_alternative < 2) ? operands[0] : operands[1]; + cond[1] = operands[2]; + cond[2] = operands[3]; + + if (GET_CODE (cond[2]) == CONST_INT && INTVAL (cond[2]) < 0) + output_asm_insn (\"sub\\t%0, %1, #%n2\", cond); + else + output_asm_insn (\"add\\t%0, %1, %2\", cond); + + if (which_alternative >= 2 + && which_alternative < 4) + output_asm_insn (\"mov\\t%0, %1\", operands); + else if (which_alternative >= 4) + output_asm_insn (\"str\\t%1, %0\", operands); + + switch (get_attr_length (insn) - ((which_alternative >= 2) ? 2 : 0)) + { + case 4: + return \"b%d4\\t%l5\"; + case 6: + return \"b%D4\\t.LCB%=\;b\\t%l5\\t%@long jump\\n.LCB%=:\"; + default: + return \"b%D4\\t.LCB%=\;bl\\t%l5\\t%@far jump\\n.LCB%=:\"; + } + } + " + [(set (attr "far_jump") + (if_then_else + (ior (and (lt (symbol_ref ("which_alternative")) + (const_int 2)) + (eq_attr "length" "8")) + (eq_attr "length" "10")) + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (lt (symbol_ref ("which_alternative")) + (const_int 2)) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -250)) + (le (minus (match_dup 5) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -2040)) + (le (minus (match_dup 5) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -248)) + (le (minus (match_dup 5) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -2038)) + (le (minus (match_dup 5) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10)))))] +) + +(define_insn "*addsi3_cbranch_scratch" + [(set (pc) + (if_then_else + (match_operator 3 "arm_comparison_operator" + [(plus:SI + (match_operand:SI 1 "s_register_operand" "%l,l,l,0") + (match_operand:SI 2 "reg_or_int_operand" "J,l,L,IJ")) + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (match_scratch:SI 0 "=X,X,l,l"))] + "TARGET_THUMB1 + && (GET_CODE (operands[3]) == EQ + || GET_CODE (operands[3]) == NE + || GET_CODE (operands[3]) == GE + || GET_CODE (operands[3]) == LT)" + "* + { + switch (which_alternative) + { + case 0: + output_asm_insn (\"cmp\t%1, #%n2\", operands); + break; + case 1: + output_asm_insn (\"cmn\t%1, %2\", operands); + break; + case 2: + if (INTVAL (operands[2]) < 0) + output_asm_insn (\"sub\t%0, %1, %2\", operands); + else + output_asm_insn (\"add\t%0, %1, %2\", operands); + break; + case 3: + if (INTVAL (operands[2]) < 0) + output_asm_insn (\"sub\t%0, %0, %2\", operands); + else + output_asm_insn (\"add\t%0, %0, %2\", operands); + break; + } + + switch (get_attr_length (insn)) + { + case 4: + return \"b%d3\\t%l4\"; + case 6: + return \"b%D3\\t.LCB%=\;b\\t%l4\\t%@long jump\\n.LCB%=:\"; + default: + return \"b%D3\\t.LCB%=\;bl\\t%l4\\t%@far jump\\n.LCB%=:\"; + } + } + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -250)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2040)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))))] +) + + +;; Comparison and test insns + +(define_insn "*arm_cmpsi_insn" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "s_register_operand" "r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L")))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, %1 + cmn%?\\t%0, #%n1" + [(set_attr "conds" "set")] +) + +(define_insn "*cmpsi_shiftsi" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "s_register_operand" "r,r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])))] + "TARGET_32BIT" + "cmp%?\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*cmpsi_shiftsi_swp" + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")]) + (match_operand:SI 0 "s_register_operand" "r,r")))] + "TARGET_32BIT" + "cmp%?\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*arm_cmpsi_negshiftsi_si" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (neg:SI (match_operator:SI 1 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "reg_or_int_operand" "rM")])) + (match_operand:SI 0 "s_register_operand" "r")))] + "TARGET_ARM" + "cmn%?\\t%0, %2%S1" + [(set_attr "conds" "set") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +;; DImode comparisons. The generic code generates branches that +;; if-conversion can not reduce to a conditional compare, so we do +;; that directly. + +(define_insn "*arm_cmpdi_insn" + [(set (reg:CC_NCV CC_REGNUM) + (compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r") + (match_operand:DI 1 "arm_di_operand" "rDi"))) + (clobber (match_scratch:SI 2 "=r"))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)" + "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1" + [(set_attr "conds" "set") + (set_attr "length" "8")] +) + +(define_insn "*arm_cmpdi_unsigned" + [(set (reg:CC_CZ CC_REGNUM) + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r") + (match_operand:DI 1 "arm_di_operand" "rDi")))] + "TARGET_ARM" + "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1" + [(set_attr "conds" "set") + (set_attr "length" "8")] +) + +(define_insn "*arm_cmpdi_zero" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:DI 0 "s_register_operand" "r") + (const_int 0))) + (clobber (match_scratch:SI 1 "=r"))] + "TARGET_32BIT" + "orr%.\\t%1, %Q0, %R0" + [(set_attr "conds" "set")] +) + +(define_insn "*thumb_cmpdi_zero" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:DI 0 "s_register_operand" "l") + (const_int 0))) + (clobber (match_scratch:SI 1 "=l"))] + "TARGET_THUMB1" + "orr\\t%1, %Q0, %R0" + [(set_attr "conds" "set") + (set_attr "length" "2")] +) + +;; Cirrus SF compare instruction +(define_insn "*cirrus_cmpsf" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:SF 0 "cirrus_fp_register" "v") + (match_operand:SF 1 "cirrus_fp_register" "v")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcmps%?\\tr15, %V0, %V1" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "compare")] +) + +;; Cirrus DF compare instruction +(define_insn "*cirrus_cmpdf" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:DF 0 "cirrus_fp_register" "v") + (match_operand:DF 1 "cirrus_fp_register" "v")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcmpd%?\\tr15, %V0, %V1" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "compare")] +) + +(define_insn "*cirrus_cmpdi" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:DI 0 "cirrus_fp_register" "v") + (match_operand:DI 1 "cirrus_fp_register" "v")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcmp64%?\\tr15, %V0, %V1" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "compare")] +) + +; This insn allows redundant compares to be removed by cse, nothing should +; ever appear in the output file since (set (reg x) (reg x)) is a no-op that +; is deleted later on. The match_dup will match the mode here, so that +; mode changes of the condition codes aren't lost by this even though we don't +; specify what they are. + +(define_insn "*deleted_compare" + [(set (match_operand 0 "cc_register" "") (match_dup 0))] + "TARGET_32BIT" + "\\t%@ deleted compare" + [(set_attr "conds" "set") + (set_attr "length" "0")] +) + + +;; Conditional branch insns + +(define_expand "cbranch_cc" + [(set (pc) + (if_then_else (match_operator 0 "" [(match_operand 1 "" "") + (match_operand 2 "" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT" + "operands[1] = arm_gen_compare_reg (GET_CODE (operands[0]), + operands[1], operands[2]); + operands[2] = const0_rtx;" +) + +;; +;; Patterns to match conditional branch insns. +;; + +(define_insn "*arm_cond_branch" + [(set (pc) + (if_then_else (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_32BIT" + "* + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return \"b%d1\\t%l0\"; + " + [(set_attr "conds" "use") + (set_attr "type" "branch")] +) + +(define_insn "*arm_cond_branch_reversed" + [(set (pc) + (if_then_else (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "TARGET_32BIT" + "* + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return \"b%D1\\t%l0\"; + " + [(set_attr "conds" "use") + (set_attr "type" "branch")] +) + + + +; scc insns + +(define_expand "cstore_cc" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "" [(match_operand 2 "" "") + (match_operand 3 "" "")]))] + "TARGET_32BIT" + "operands[2] = arm_gen_compare_reg (GET_CODE (operands[1]), + operands[2], operands[3]); + operands[3] = const0_rtx;" +) + +(define_insn "*mov_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]))] + "TARGET_ARM" + "mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + [(set_attr "conds" "use") + (set_attr "insn" "mov") + (set_attr "length" "8")] +) + +(define_insn "*mov_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_ARM" + "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + [(set_attr "conds" "use") + (set_attr "insn" "mov") + (set_attr "length" "8")] +) + +(define_insn "*mov_notscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_ARM" + "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + [(set_attr "conds" "use") + (set_attr "insn" "mov") + (set_attr "length" "8")] +) + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "reg_or_int_operand" "")]))] + "TARGET_32BIT || TARGET_THUMB1" + "{ + rtx op3, scratch, scratch2; + + if (!TARGET_THUMB1) + { + if (!arm_add_operand (operands[3], SImode)) + operands[3] = force_reg (SImode, operands[3]); + emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (operands[3] == const0_rtx) + { + switch (GET_CODE (operands[1])) + { + case EQ: + emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], operands[2])); + break; + + case NE: + emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], operands[2])); + break; + + case LE: + scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx, + NULL_RTX, 0, OPTAB_WIDEN); + scratch = expand_binop (SImode, ior_optab, operands[2], scratch, + NULL_RTX, 0, OPTAB_WIDEN); + expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), + operands[0], 1, OPTAB_WIDEN); + break; + + case GE: + scratch = expand_unop (SImode, one_cmpl_optab, operands[2], + NULL_RTX, 1); + expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), + NULL_RTX, 1, OPTAB_WIDEN); + break; + + case GT: + scratch = expand_binop (SImode, ashr_optab, operands[2], + GEN_INT (31), NULL_RTX, 0, OPTAB_WIDEN); + scratch = expand_binop (SImode, sub_optab, scratch, operands[2], + NULL_RTX, 0, OPTAB_WIDEN); + expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0], + 0, OPTAB_WIDEN); + break; + + /* LT is handled by generic code. No need for unsigned with 0. */ + default: + FAIL; + } + DONE; + } + + switch (GET_CODE (operands[1])) + { + case EQ: + scratch = expand_binop (SImode, sub_optab, operands[2], operands[3], + NULL_RTX, 0, OPTAB_WIDEN); + emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], scratch)); + break; + + case NE: + scratch = expand_binop (SImode, sub_optab, operands[2], operands[3], + NULL_RTX, 0, OPTAB_WIDEN); + emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], scratch)); + break; + + case LE: + op3 = force_reg (SImode, operands[3]); + + scratch = expand_binop (SImode, lshr_optab, operands[2], GEN_INT (31), + NULL_RTX, 1, OPTAB_WIDEN); + scratch2 = expand_binop (SImode, ashr_optab, op3, GEN_INT (31), + NULL_RTX, 0, OPTAB_WIDEN); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2, + op3, operands[2])); + break; + + case GE: + op3 = operands[3]; + if (!thumb1_cmp_operand (op3, SImode)) + op3 = force_reg (SImode, op3); + scratch = expand_binop (SImode, ashr_optab, operands[2], GEN_INT (31), + NULL_RTX, 0, OPTAB_WIDEN); + scratch2 = expand_binop (SImode, lshr_optab, op3, GEN_INT (31), + NULL_RTX, 1, OPTAB_WIDEN); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2, + operands[2], op3)); + break; + + case LEU: + op3 = force_reg (SImode, operands[3]); + scratch = force_reg (SImode, const0_rtx); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch, + op3, operands[2])); + break; + + case GEU: + op3 = operands[3]; + if (!thumb1_cmp_operand (op3, SImode)) + op3 = force_reg (SImode, op3); + scratch = force_reg (SImode, const0_rtx); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch, + operands[2], op3)); + break; + + case LTU: + op3 = operands[3]; + if (!thumb1_cmp_operand (op3, SImode)) + op3 = force_reg (SImode, op3); + scratch = gen_reg_rtx (SImode); + emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], operands[2], op3)); + break; + + case GTU: + op3 = force_reg (SImode, operands[3]); + scratch = gen_reg_rtx (SImode); + emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], op3, operands[2])); + break; + + /* No good sequences for GT, LT. */ + default: + FAIL; + } + DONE; +}") + +(define_expand "cstoresf4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + "emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); DONE;" +) + +(define_expand "cstoredf4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); DONE;" +) + +(define_expand "cstoredi4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:DI 2 "cmpdi_operand" "") + (match_operand:DI 3 "cmpdi_operand" "")]))] + "TARGET_32BIT" + "{ + rtx swap = NULL_RTX; + enum rtx_code code = GET_CODE (operands[1]); + + /* We should not have two constants. */ + gcc_assert (GET_MODE (operands[2]) == DImode + || GET_MODE (operands[3]) == DImode); + + /* Flip unimplemented DImode comparisons to a form that + arm_gen_compare_reg can handle. */ + switch (code) + { + case GT: + swap = gen_rtx_LT (VOIDmode, operands[3], operands[2]); break; + case LE: + swap = gen_rtx_GE (VOIDmode, operands[3], operands[2]); break; + case GTU: + swap = gen_rtx_LTU (VOIDmode, operands[3], operands[2]); break; + case LEU: + swap = gen_rtx_GEU (VOIDmode, operands[3], operands[2]); break; + default: + break; + } + if (swap) + emit_insn (gen_cstore_cc (operands[0], swap, operands[3], + operands[2])); + else + emit_insn (gen_cstore_cc (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }" +) + +(define_expand "cstoresi_eq0_thumb1" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (match_dup:SI 2))])] + "TARGET_THUMB1" + "operands[2] = gen_reg_rtx (SImode);" +) + +(define_expand "cstoresi_ne0_thumb1" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (match_dup:SI 2))])] + "TARGET_THUMB1" + "operands[2] = gen_reg_rtx (SImode);" +) + +(define_insn "*cstoresi_eq0_thumb1_insn" + [(set (match_operand:SI 0 "s_register_operand" "=&l,l") + (eq:SI (match_operand:SI 1 "s_register_operand" "l,0") + (const_int 0))) + (clobber (match_operand:SI 2 "s_register_operand" "=X,l"))] + "TARGET_THUMB1" + "@ + neg\\t%0, %1\;adc\\t%0, %0, %1 + neg\\t%2, %1\;adc\\t%0, %1, %2" + [(set_attr "length" "4")] +) + +(define_insn "*cstoresi_ne0_thumb1_insn" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (ne:SI (match_operand:SI 1 "s_register_operand" "0") + (const_int 0))) + (clobber (match_operand:SI 2 "s_register_operand" "=l"))] + "TARGET_THUMB1" + "sub\\t%2, %1, #1\;sbc\\t%0, %1, %2" + [(set_attr "length" "4")] +) + +;; Used as part of the expansion of thumb ltu and gtu sequences +(define_insn "cstoresi_nltu_thumb1" + [(set (match_operand:SI 0 "s_register_operand" "=l,l") + (neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h") + (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))] + "TARGET_THUMB1" + "cmp\\t%1, %2\;sbc\\t%0, %0, %0" + [(set_attr "length" "4")] +) + +(define_insn_and_split "cstoresi_ltu_thumb1" + [(set (match_operand:SI 0 "s_register_operand" "=l,l") + (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h") + (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")))] + "TARGET_THUMB1" + "#" + "TARGET_THUMB1" + [(set (match_dup 3) + (neg:SI (ltu:SI (match_dup 1) (match_dup 2)))) + (set (match_dup 0) (neg:SI (match_dup 3)))] + "operands[3] = gen_reg_rtx (SImode);" + [(set_attr "length" "4")] +) + +;; Used as part of the expansion of thumb les sequence. +(define_insn "thumb1_addsi3_addgeu" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0") + (match_operand:SI 2 "s_register_operand" "l")) + (geu:SI (match_operand:SI 3 "s_register_operand" "l") + (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))] + "TARGET_THUMB1" + "cmp\\t%3, %4\;adc\\t%0, %1, %2" + [(set_attr "length" "4")] +) + + +;; Conditional move insns + +(define_expand "movsicc" + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operand 1 "arm_comparison_operator" "") + (match_operand:SI 2 "arm_not_operand" "") + (match_operand:SI 3 "arm_not_operand" "")))] + "TARGET_32BIT" + " + { + enum rtx_code code = GET_CODE (operands[1]); + rtx ccreg; + + if (code == UNEQ || code == LTGT) + FAIL; + + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "s_register_operand" "") + (if_then_else:SF (match_operand 1 "arm_comparison_operator" "") + (match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "nonmemory_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + { + enum rtx_code code = GET_CODE (operands[1]); + rtx ccreg; + + if (code == UNEQ || code == LTGT) + FAIL; + + /* When compiling for SOFT_FLOAT, ensure both arms are in registers. + Otherwise, ensure it is a valid FP add operand */ + if ((!(TARGET_HARD_FLOAT && TARGET_FPA)) + || (!arm_float_add_operand (operands[3], SFmode))) + operands[3] = force_reg (SFmode, operands[3]); + + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_expand "movdfcc" + [(set (match_operand:DF 0 "s_register_operand" "") + (if_then_else:DF (match_operand 1 "arm_comparison_operator" "") + (match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_add_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)" + " + { + enum rtx_code code = GET_CODE (operands[1]); + rtx ccreg; + + if (code == UNEQ || code == LTGT) + FAIL; + + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_insn "*movsicc_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") + (if_then_else:SI + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K") + (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))] + "TARGET_ARM" + "@ + mov%D3\\t%0, %2 + mvn%D3\\t%0, #%B2 + mov%d3\\t%0, %1 + mvn%d3\\t%0, #%B1 + mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + [(set_attr "length" "4,4,4,4,8,8,8,8") + (set_attr "conds" "use") + (set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")] +) + +(define_insn "*movsfcc_soft_insn" + [(set (match_operand:SF 0 "s_register_operand" "=r,r") + (if_then_else:SF (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,r") + (match_operand:SF 2 "s_register_operand" "r,0")))] + "TARGET_ARM && TARGET_SOFT_FLOAT" + "@ + mov%D3\\t%0, %2 + mov%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "insn" "mov")] +) + + +;; Jump and linkage insns + +(define_expand "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_EITHER" + "" +) + +(define_insn "*arm_jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_32BIT" + "* + { + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return \"b%?\\t%l0\"; + } + " + [(set_attr "predicable" "yes")] +) + +(define_insn "*thumb_jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_THUMB1" + "* + if (get_attr_length (insn) == 2) + return \"b\\t%l0\"; + return \"bl\\t%l0\\t%@ far jump\"; + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "4") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 0) (pc)) (const_int -2044)) + (le (minus (match_dup 0) (pc)) (const_int 2048))) + (const_int 2) + (const_int 4)))] +) + +(define_expand "call" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_EITHER" + " + { + rtx callee, pat; + + /* In an untyped call, we can get NULL for operand 2. */ + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 32-bit address of the callee into a register before performing the + branch and link. */ + callee = XEXP (operands[0], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? arm_is_long_call_p (SYMBOL_REF_DECL (callee)) + : !REG_P (callee)) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + + pat = gen_call_internal (operands[0], operands[1], operands[2]); + arm_emit_call_insn (pat, XEXP (operands[0], 0)); + DONE; + }" +) + +(define_expand "call_internal" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))])]) + +(define_insn "*call_reg_armv5" + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && arm_arch5" + "blx%?\\t%0" + [(set_attr "type" "call")] +) + +(define_insn "*call_reg_arm" + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5" + "* + return output_call (operands); + " + ;; length is worst case, normally it is only two + [(set_attr "length" "12") + (set_attr "type" "call")] +) + + +;; Note: not used for armv5+ because the sequence used (ldr pc, ...) is not +;; considered a function call by the branch predictor of some cores (PR40887). +;; Falls back to blx rN (*call_reg_armv5). + +(define_insn "*call_mem" + [(call (mem:SI (match_operand:SI 0 "call_memory_operand" "m")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5" + "* + return output_call_mem (operands); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +(define_insn "*call_reg_thumb1_v5" + [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && arm_arch5" + "blx\\t%0" + [(set_attr "length" "2") + (set_attr "type" "call")] +) + +(define_insn "*call_reg_thumb1" + [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && !arm_arch5" + "* + { + if (!TARGET_CALLER_INTERWORKING) + return thumb_call_via_reg (operands[0]); + else if (operands[1] == const0_rtx) + return \"bl\\t%__interwork_call_via_%0\"; + else if (frame_pointer_needed) + return \"bl\\t%__interwork_r7_call_via_%0\"; + else + return \"bl\\t%__interwork_r11_call_via_%0\"; + }" + [(set_attr "type" "call")] +) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_EITHER" + " + { + rtx pat, callee; + + /* In an untyped call, we can get NULL for operand 2. */ + if (operands[3] == 0) + operands[3] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 32-bit address of the callee into a register before performing the + branch and link. */ + callee = XEXP (operands[1], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? arm_is_long_call_p (SYMBOL_REF_DECL (callee)) + : !REG_P (callee)) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + + pat = gen_call_value_internal (operands[0], operands[1], + operands[2], operands[3]); + arm_emit_call_insn (pat, XEXP (operands[1], 0)); + DONE; + }" +) + +(define_expand "call_value_internal" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))])]) + +(define_insn "*call_value_reg_armv5" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && arm_arch5" + "blx%?\\t%1" + [(set_attr "type" "call")] +) + +(define_insn "*call_value_reg_arm" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5" + "* + return output_call (&operands[1]); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +;; Note: see *call_mem + +(define_insn "*call_value_mem" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "call_memory_operand" "m")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))" + "* + return output_call_mem (&operands[1]); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +(define_insn "*call_value_reg_thumb1_v5" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && arm_arch5" + "blx\\t%1" + [(set_attr "length" "2") + (set_attr "type" "call")] +) + +(define_insn "*call_value_reg_thumb1" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && !arm_arch5" + "* + { + if (!TARGET_CALLER_INTERWORKING) + return thumb_call_via_reg (operands[1]); + else if (operands[2] == const0_rtx) + return \"bl\\t%__interwork_call_via_%1\"; + else if (frame_pointer_needed) + return \"bl\\t%__interwork_r7_call_via_%1\"; + else + return \"bl\\t%__interwork_r11_call_via_%1\"; + }" + [(set_attr "type" "call")] +) + +;; Allow calls to SYMBOL_REFs specially as they are not valid general addresses +;; The 'a' causes the operand to be treated as an address, i.e. no '#' output. + +(define_insn "*call_symbol" + [(call (mem:SI (match_operand:SI 0 "" "")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_32BIT + && (GET_CODE (operands[0]) == SYMBOL_REF) + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))" + "* + { + return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\"; + }" + [(set_attr "type" "call")] +) + +(define_insn "*call_value_symbol" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "" "")) + (match_operand:SI 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_32BIT + && (GET_CODE (operands[1]) == SYMBOL_REF) + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "* + { + return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\"; + }" + [(set_attr "type" "call")] +) + +(define_insn "*call_insn" + [(call (mem:SI (match_operand:SI 0 "" "")) + (match_operand:SI 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 + && GET_CODE (operands[0]) == SYMBOL_REF + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))" + "bl\\t%a0" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + +(define_insn "*call_value_insn" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 + && GET_CODE (operands[1]) == SYMBOL_REF + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "bl\\t%a1" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + +;; We may also be able to do sibcalls for Thumb, but it's much harder... +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (return) + (use (match_operand 2 "" ""))])] + "TARGET_32BIT" + " + { + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + }" +) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (return) + (use (match_operand 3 "" ""))])] + "TARGET_32BIT" + " + { + if (operands[3] == NULL_RTX) + operands[3] = const0_rtx; + }" +) + +(define_insn "*sibcall_insn" + [(call (mem:SI (match_operand:SI 0 "" "X")) + (match_operand 1 "" "")) + (return) + (use (match_operand 2 "" ""))] + "TARGET_32BIT && GET_CODE (operands[0]) == SYMBOL_REF" + "* + return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\"; + " + [(set_attr "type" "call")] +) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "" "X")) + (match_operand 2 "" ""))) + (return) + (use (match_operand 3 "" ""))] + "TARGET_32BIT && GET_CODE (operands[1]) == SYMBOL_REF" + "* + return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\"; + " + [(set_attr "type" "call")] +) + +(define_expand "return" + [(return)] + "TARGET_32BIT && USE_RETURN_INSN (FALSE)" + "") + +;; Often the return insn will be the same as loading from memory, so set attr +(define_insn "*arm_return" + [(return)] + "TARGET_ARM && USE_RETURN_INSN (FALSE)" + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (const_true_rtx, TRUE, FALSE); + }" + [(set_attr "type" "load1") + (set_attr "length" "12") + (set_attr "predicable" "yes")] +) + +(define_insn "*cond_return" + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (return) + (pc)))] + "TARGET_ARM && USE_RETURN_INSN (TRUE)" + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (operands[0], TRUE, FALSE); + }" + [(set_attr "conds" "use") + (set_attr "length" "12") + (set_attr "type" "load1")] +) + +(define_insn "*cond_return_inverted" + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (pc) + (return)))] + "TARGET_ARM && USE_RETURN_INSN (TRUE)" + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (operands[0], TRUE, TRUE); + }" + [(set_attr "conds" "use") + (set_attr "length" "12") + (set_attr "type" "load1")] +) + +;; Generate a sequence of instructions to determine if the processor is +;; in 26-bit or 32-bit mode, and return the appropriate return address +;; mask. + +(define_expand "return_addr_mask" + [(set (match_dup 1) + (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (eq (match_dup 1) (const_int 0)) + (const_int -1) + (const_int 67108860)))] ; 0x03fffffc + "TARGET_ARM" + " + operands[1] = gen_rtx_REG (CC_NOOVmode, CC_REGNUM); + ") + +(define_insn "*check_arch2" + [(set (match_operand:CC_NOOV 0 "cc_register" "") + (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH) + (const_int 0)))] + "TARGET_ARM" + "teq\\t%|r0, %|r0\;teq\\t%|pc, %|pc" + [(set_attr "length" "8") + (set_attr "conds" "set")] +) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "TARGET_EITHER" + " + { + int i; + rtx par = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (XVECLEN (operands[2], 0))); + rtx addr = gen_reg_rtx (Pmode); + rtx mem; + int size = 0; + + emit_move_insn (addr, XEXP (operands[1], 0)); + mem = change_address (operands[1], BLKmode, addr); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx src = SET_SRC (XVECEXP (operands[2], 0, i)); + + /* Default code only uses r0 as a return value, but we could + be using anything up to 4 registers. */ + if (REGNO (src) == R0_REGNUM) + src = gen_rtx_REG (TImode, R0_REGNUM); + + XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, src, + GEN_INT (size)); + size += GET_MODE_SIZE (GET_MODE (src)); + } + + emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL, + const0_rtx)); + + size = 0; + + for (i = 0; i < XVECLEN (par, 0); i++) + { + HOST_WIDE_INT offset = 0; + rtx reg = XEXP (XVECEXP (par, 0, i), 0); + + if (size != 0) + emit_move_insn (addr, plus_constant (addr, size)); + + mem = change_address (mem, GET_MODE (reg), NULL); + if (REGNO (reg) == R0_REGNUM) + { + /* On thumb we have to use a write-back instruction. */ + emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, addr, + TARGET_THUMB ? TRUE : FALSE, mem, &offset)); + size = TARGET_ARM ? 16 : 0; + } + else + { + emit_move_insn (mem, reg); + size = GET_MODE_SIZE (GET_MODE (reg)); + } + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; + }" +) + +(define_expand "untyped_return" + [(match_operand:BLK 0 "memory_operand" "") + (match_operand 1 "" "")] + "TARGET_EITHER" + " + { + int i; + rtx addr = gen_reg_rtx (Pmode); + rtx mem; + int size = 0; + + emit_move_insn (addr, XEXP (operands[0], 0)); + mem = change_address (operands[0], BLKmode, addr); + + for (i = 0; i < XVECLEN (operands[1], 0); i++) + { + HOST_WIDE_INT offset = 0; + rtx reg = SET_DEST (XVECEXP (operands[1], 0, i)); + + if (size != 0) + emit_move_insn (addr, plus_constant (addr, size)); + + mem = change_address (mem, GET_MODE (reg), NULL); + if (REGNO (reg) == R0_REGNUM) + { + /* On thumb we have to use a write-back instruction. */ + emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, addr, + TARGET_THUMB ? TRUE : FALSE, mem, &offset)); + size = TARGET_ARM ? 16 : 0; + } + else + { + emit_move_insn (reg, mem); + size = GET_MODE_SIZE (GET_MODE (reg)); + } + } + + /* Emit USE insns before the return. */ + for (i = 0; i < XVECLEN (operands[1], 0); i++) + emit_use (SET_DEST (XVECEXP (operands[1], 0, i))); + + /* Construct the return. */ + expand_naked_return (); + + DONE; + }" +) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] VUNSPEC_BLOCKAGE)] + "TARGET_EITHER" + "" + [(set_attr "length" "0") + (set_attr "type" "block")] +) + +(define_expand "casesi" + [(match_operand:SI 0 "s_register_operand" "") ; index to jump on + (match_operand:SI 1 "const_int_operand" "") ; lower bound + (match_operand:SI 2 "const_int_operand" "") ; total range + (match_operand:SI 3 "" "") ; table label + (match_operand:SI 4 "" "")] ; Out of range label + "TARGET_32BIT || optimize_size || flag_pic" + " + { + enum insn_code code; + if (operands[1] != const0_rtx) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_addsi3 (reg, operands[0], + GEN_INT (-INTVAL (operands[1])))); + operands[0] = reg; + } + + if (TARGET_ARM) + code = CODE_FOR_arm_casesi_internal; + else if (TARGET_THUMB1) + code = CODE_FOR_thumb1_casesi_internal_pic; + else if (flag_pic) + code = CODE_FOR_thumb2_casesi_internal_pic; + else + code = CODE_FOR_thumb2_casesi_internal; + + if (!insn_data[(int) code].operand[1].predicate(operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + + emit_jump_insn (GEN_FCN ((int) code) (operands[0], operands[2], + operands[3], operands[4])); + DONE; + }" +) + +;; The USE in this pattern is needed to tell flow analysis that this is +;; a CASESI insn. It has no other purpose. +(define_insn "arm_casesi_internal" + [(parallel [(set (pc) + (if_then_else + (leu (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) + (label_ref (match_operand 2 "" "")))) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM)) + (use (label_ref (match_dup 2)))])] + "TARGET_ARM" + "* + if (flag_pic) + return \"cmp\\t%0, %1\;addls\\t%|pc, %|pc, %0, asl #2\;b\\t%l3\"; + return \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_expand "thumb1_casesi_internal_pic" + [(match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 1 "thumb1_cmp_operand" "") + (match_operand 2 "" "") + (match_operand 3 "" "")] + "TARGET_THUMB1" + { + rtx reg0; + rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[1]); + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[1], + operands[3])); + reg0 = gen_rtx_REG (SImode, 0); + emit_move_insn (reg0, operands[0]); + emit_jump_insn (gen_thumb1_casesi_dispatch (operands[2]/*, operands[3]*/)); + DONE; + } +) + +(define_insn "thumb1_casesi_dispatch" + [(parallel [(set (pc) (unspec [(reg:SI 0) + (label_ref (match_operand 0 "" "")) +;; (label_ref (match_operand 1 "" "")) +] + UNSPEC_THUMB1_CASESI)) + (clobber (reg:SI IP_REGNUM)) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_THUMB1" + "* return thumb1_output_casesi(operands);" + [(set_attr "length" "4")] +) + +(define_expand "indirect_jump" + [(set (pc) + (match_operand:SI 0 "s_register_operand" ""))] + "TARGET_EITHER" + " + /* Thumb-2 doesn't have mov pc, reg. Explicitly set the low bit of the + address and use bx. */ + if (TARGET_THUMB2) + { + rtx tmp; + tmp = gen_reg_rtx (SImode); + emit_insn (gen_iorsi3 (tmp, operands[0], GEN_INT(1))); + operands[0] = tmp; + } + " +) + +;; NB Never uses BX. +(define_insn "*arm_indirect_jump" + [(set (pc) + (match_operand:SI 0 "s_register_operand" "r"))] + "TARGET_ARM" + "mov%?\\t%|pc, %0\\t%@ indirect register jump" + [(set_attr "predicable" "yes")] +) + +(define_insn "*load_indirect_jump" + [(set (pc) + (match_operand:SI 0 "memory_operand" "m"))] + "TARGET_ARM" + "ldr%?\\t%|pc, %0\\t%@ indirect memory jump" + [(set_attr "type" "load1") + (set_attr "pool_range" "4096") + (set_attr "neg_pool_range" "4084") + (set_attr "predicable" "yes")] +) + +;; NB Never uses BX. +(define_insn "*thumb1_indirect_jump" + [(set (pc) + (match_operand:SI 0 "register_operand" "l*r"))] + "TARGET_THUMB1" + "mov\\tpc, %0" + [(set_attr "conds" "clob") + (set_attr "length" "2")] +) + + +;; Misc insns + +(define_insn "nop" + [(const_int 0)] + "TARGET_EITHER" + "* + if (TARGET_UNIFIED_ASM) + return \"nop\"; + if (TARGET_ARM) + return \"mov%?\\t%|r0, %|r0\\t%@ nop\"; + return \"mov\\tr8, r8\"; + " + [(set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 2) + (const_int 4)))] +) + + +;; Patterns to allow combination of arithmetic, cond code and shifts + +(define_insn "*arith_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "r,r,r,r") + (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")]) + (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))] + "TARGET_32BIT" + "%i1%?\\t%0, %2, %4%S3" + [(set_attr "predicable" "yes") + (set_attr "shift" "4") + (set_attr "arch" "a,t2,t2,a") + ;; Thumb2 doesn't allow the stack pointer to be used for + ;; operand1 for all operations other than add and sub. In this case + ;; the minus operation is a candidate for an rsub and hence needs + ;; to be disabled. + ;; We have to make sure to disable the fourth alternative if + ;; the shift_operator is MULT, since otherwise the insn will + ;; also match a multiply_accumulate pattern and validate_change + ;; will allow a replacement of the constant with a register + ;; despite the checks done in shift_operator. + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (if_then_else + (match_operand:SI 1 "add_operator" "") + (const_string "yes") (const_string "no")) + (const_string "yes") + (if_then_else + (match_operand:SI 3 "mult_operator" "") + (const_string "no") (const_string "yes"))]) + (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 2 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "") + (match_operand:SI 5 "reg_or_int_operand" "")]) + (match_operand:SI 6 "s_register_operand" "")]) + (match_operand:SI 7 "arm_rhs_operand" "")])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT" + [(set (match_dup 8) + (match_op_dup 2 [(match_op_dup 3 [(match_dup 4) (match_dup 5)]) + (match_dup 6)])) + (set (match_dup 0) + (match_op_dup 1 [(match_dup 8) (match_dup 7)]))] + "") + +(define_insn "*arith_shiftsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "shift_amount_operand" "M,r")]) + (match_operand:SI 2 "s_register_operand" "r,r")]) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_op_dup 1 [(match_op_dup 3 [(match_dup 4) (match_dup 5)]) + (match_dup 2)]))] + "TARGET_32BIT" + "%i1%.\\t%0, %2, %4%S3" + [(set_attr "conds" "set") + (set_attr "shift" "4") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*arith_shiftsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "shift_amount_operand" "M,r")]) + (match_operand:SI 2 "s_register_operand" "r,r")]) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "%i1%.\\t%0, %2, %4%S3" + [(set_attr "conds" "set") + (set_attr "shift" "4") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*sub_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "shift_amount_operand" "M,r")])))] + "TARGET_32BIT" + "sub%?\\t%0, %1, %3%S2" + [(set_attr "predicable" "yes") + (set_attr "shift" "3") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*sub_shiftsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "shift_amount_operand" "M,rM")])) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_dup 1) + (match_op_dup 2 [(match_dup 3) (match_dup 4)])))] + "TARGET_32BIT" + "sub%.\\t%0, %1, %3%S2" + [(set_attr "conds" "set") + (set_attr "shift" "3") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + +(define_insn "*sub_shiftsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "shift_amount_operand" "M,rM")])) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "sub%.\\t%0, %1, %3%S2" + [(set_attr "conds" "set") + (set_attr "shift" "3") + (set_attr "arch" "32,a") + (set_attr "type" "alu_shift,alu_shift_reg")]) + + +(define_insn "*and_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_ARM" + "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1" + [(set_attr "conds" "use") + (set_attr "insn" "mov") + (set_attr "length" "8")] +) + +(define_insn "*ior_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (ior:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "s_register_operand" "0,?r")))] + "TARGET_ARM" + "@ + orr%d2\\t%0, %1, #1 + mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1" + [(set_attr "conds" "use") + (set_attr "length" "4,8")] +) + +; A series of splitters for the compare_scc pattern below. Note that +; order is important. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (lt:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ge:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (not:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 31)))]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 1) (match_dup 1))) + (set (match_dup 0) + (minus:SI (const_int 1) (match_dup 1)))]) + (cond_exec (ltu:CC (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 0)))]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))]) + (cond_exec (ne:CC (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] +{ + operands[3] = GEN_INT (-INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_add_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(parallel + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (minus:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (cond_exec (ne:CC_NOOV (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))]) + +(define_insn_and_split "*compare_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "&& reload_completed" + [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 3))) + (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0))) + (cond_exec (match_dup 5) (set (match_dup 0) (const_int 1)))] +{ + rtx tmp1; + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + tmp1 = gen_rtx_REG (mode, CC_REGNUM); + + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx); +}) + +;; Attempt to improve the sequence generated by the compare_scc splitters +;; not to use conditional execution. +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (match_scratch:SI 3 "r")] + "TARGET_32BIT" + [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2))) + (parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 3))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 0) (match_dup 3)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]) + +(define_insn "*cond_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI (match_operator 3 "equality_operator" + [(match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))] + "TARGET_ARM" + "* + if (GET_CODE (operands[3]) == NE) + { + if (which_alternative != 1) + output_asm_insn (\"mov%D4\\t%0, %2\", operands); + if (which_alternative != 0) + output_asm_insn (\"mov%d4\\t%0, %1\", operands); + return \"\"; + } + if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%d4\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "use") + (set_attr "insn" "mov") + (set_attr "length" "4,4,8")] +) + +(define_insn "*cond_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 5 "shiftable_operator" + [(match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + return \"%i5\\t%0, %1, %2, lsr #31\"; + + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (GET_CODE (operands[5]) == AND) + output_asm_insn (\"mov%D4\\t%0, #0\", operands); + else if (GET_CODE (operands[5]) == MINUS) + output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands); + else if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + return \"%i5%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_insn "*cond_sub" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + (match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + return \"sub%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +;; ??? Is it worth using these conditional patterns in Thumb-2 mode? +(define_insn "*cmp_ite0" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]) + (const_int 0)) + (const_int 0)))] + "TARGET_ARM" + "* + { + static const char * const opcodes[4][2] = + { + {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", + \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, + {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", + \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, + {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", + \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, + {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", + \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} + }; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + + return opcodes[which_alternative][swap]; + }" + [(set_attr "conds" "set") + (set_attr "length" "8")] +) + +(define_insn "*cmp_ite1" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]) + (const_int 1)) + (const_int 0)))] + "TARGET_ARM" + "* + { + static const char * const opcodes[4][2] = + { + {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\", + \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"}, + {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\", + \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"}, + {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\", + \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"}, + {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\", + \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"} + }; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), + reverse_condition (GET_CODE (operands[4]))); + + return opcodes[which_alternative][swap]; + }" + [(set_attr "conds" "set") + (set_attr "length" "8")] +) + +(define_insn "*cmp_and" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (and:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])) + (const_int 0)))] + "TARGET_ARM" + "* + { + static const char *const opcodes[4][2] = + { + {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\", + \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"}, + {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\", + \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"}, + {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\", + \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"}, + {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\", + \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"} + }; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + + return opcodes[which_alternative][swap]; + }" + [(set_attr "conds" "set") + (set_attr "predicable" "no") + (set_attr "length" "8")] +) + +(define_insn "*cmp_ior" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (ior:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])) + (const_int 0)))] + "TARGET_ARM" + "* +{ + static const char *const opcodes[4][2] = + { + {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\", + \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"}, + {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\", + \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"}, + {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\", + \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"}, + {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\", + \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"} + }; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + + return opcodes[which_alternative][swap]; +} +" + [(set_attr "conds" "set") + (set_attr "length" "8")] +) + +(define_insn_and_split "*ior_scc_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y) + != CCmode)" + "#" + "TARGET_ARM && reload_completed" + [(set (match_dup 7) + (compare + (ior:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))] + "operands[7] + = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6], + DOM_CC_X_OR_Y), + CC_REGNUM);" + [(set_attr "conds" "clob") + (set_attr "length" "16")]) + +; If the above pattern is followed by a CMP insn, then the compare is +; redundant, since we can rework the conditional instruction that follows. +(define_insn_and_split "*ior_scc_scc_cmp" + [(set (match_operand 0 "dominant_cc_register" "") + (compare (ior:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")])) + (const_int 0))) + (set (match_operand:SI 7 "s_register_operand" "=r") + (ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] + "TARGET_ARM" + "#" + "TARGET_ARM && reload_completed" + [(set (match_dup 0) + (compare + (ior:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))] + "" + [(set_attr "conds" "set") + (set_attr "length" "16")]) + +(define_insn_and_split "*and_scc_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + != CCmode)" + "#" + "TARGET_ARM && reload_completed + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + != CCmode)" + [(set (match_dup 7) + (compare + (and:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))] + "operands[7] + = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6], + DOM_CC_X_AND_Y), + CC_REGNUM);" + [(set_attr "conds" "clob") + (set_attr "length" "16")]) + +; If the above pattern is followed by a CMP insn, then the compare is +; redundant, since we can rework the conditional instruction that follows. +(define_insn_and_split "*and_scc_scc_cmp" + [(set (match_operand 0 "dominant_cc_register" "") + (compare (and:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")])) + (const_int 0))) + (set (match_operand:SI 7 "s_register_operand" "=r") + (and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] + "TARGET_ARM" + "#" + "TARGET_ARM && reload_completed" + [(set (match_dup 0) + (compare + (and:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))] + "" + [(set_attr "conds" "set") + (set_attr "length" "16")]) + +;; If there is no dominance in the comparison, then we can still save an +;; instruction in the AND case, since we can know that the second compare +;; need only zero the value if false (if true, then the value is already +;; correct). +(define_insn_and_split "*and_scc_scc_nodom" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r") + (and:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r,r,0") + (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + == CCmode)" + "#" + "TARGET_ARM && reload_completed" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC CC_REGNUM))]) + (set (match_dup 7) (match_op_dup 8 [(match_dup 4) (match_dup 5)])) + (set (match_dup 0) + (if_then_else:SI (match_op_dup 6 [(match_dup 7) (const_int 0)]) + (match_dup 0) + (const_int 0)))] + "operands[7] = gen_rtx_REG (SELECT_CC_MODE (GET_CODE (operands[6]), + operands[4], operands[5]), + CC_REGNUM); + operands[8] = gen_rtx_COMPARE (GET_MODE (operands[7]), operands[4], + operands[5]);" + [(set_attr "conds" "clob") + (set_attr "length" "20")]) + +(define_split + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI + (and:SI (match_operand:SI 0 "s_register_operand" "") + (const_int 1)) + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "arm_add_operand" "")])) + (const_int 0))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 4) + (ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)]) + (match_dup 0))) + (set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 4) (const_int 1)) + (const_int 0)))] + "") + +(define_split + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "arm_add_operand" "")]) + (and:SI (match_operand:SI 0 "s_register_operand" "") + (const_int 1))) + (const_int 0))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 4) + (ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)]) + (match_dup 0))) + (set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 4) (const_int 1)) + (const_int 0)))] + "") +;; ??? The conditional patterns above need checking for Thumb-2 usefulness + +(define_insn "*negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) + return \"mov\\t%0, %1, asr #31\"; + + if (GET_CODE (operands[3]) == NE) + return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\"; + + output_asm_insn (\"cmp\\t%1, %2\", operands); + output_asm_insn (\"mov%D3\\t%0, #0\", operands); + return \"mvn%d3\\t%0, #0\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_insn "movcond" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + if (GET_CODE (operands[5]) == LT + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && GET_CODE (operands[1]) == REG) + { + if (operands[2] == const0_rtx) + return \"and\\t%0, %1, %3, asr #31\"; + return \"ands\\t%0, %1, %3, asr #32\;movcc\\t%0, %2\"; + } + else if (which_alternative != 0 && GET_CODE (operands[2]) == REG) + { + if (operands[1] == const0_rtx) + return \"bic\\t%0, %2, %3, asr #31\"; + return \"bics\\t%0, %2, %3, asr #32\;movcs\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + + if (GET_CODE (operands[5]) == GE + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && GET_CODE (operands[1]) == REG) + { + if (operands[2] == const0_rtx) + return \"bic\\t%0, %1, %3, asr #31\"; + return \"bics\\t%0, %1, %3, asr #32\;movcs\\t%0, %2\"; + } + else if (which_alternative != 0 && GET_CODE (operands[2]) == REG) + { + if (operands[1] == const0_rtx) + return \"and\\t%0, %2, %3, asr #31\"; + return \"ands\\t%0, %2, %3, asr #32\;movcc\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + if (GET_CODE (operands[4]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[4]))) + output_asm_insn (\"cmn\\t%3, #%n4\", operands); + else + output_asm_insn (\"cmp\\t%3, %4\", operands); + if (which_alternative != 0) + output_asm_insn (\"mov%d5\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%D5\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,8,12")] +) + +;; ??? The patterns below need checking for Thumb-2 usefulness. + +(define_insn "*ifcompare_plus_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_plus_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L")) + (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI")))] + "TARGET_ARM" + "@ + add%d4\\t%0, %2, %3 + sub%d4\\t%0, %2, #%n3 + add%d4\\t%0, %2, %3\;mov%D4\\t%0, %1 + sub%d4\\t%0, %2, #%n3\;mov%D4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,8") + (set_attr "type" "*,*,*,*")] +) + +(define_insn "*ifcompare_move_plus" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_move_plus" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI") + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L"))))] + "TARGET_ARM" + "@ + add%D4\\t%0, %2, %3 + sub%D4\\t%0, %2, #%n3 + add%D4\\t%0, %2, %3\;mov%d4\\t%0, %1 + sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,8") + (set_attr "type" "*,*,*,*")] +) + +(define_insn "*ifcompare_arith_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (match_operator 9 "arm_comparison_operator" + [(match_operand:SI 5 "s_register_operand" "r") + (match_operand:SI 6 "arm_add_operand" "rIL")]) + (match_operator:SI 8 "shiftable_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_insn "*if_arith_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (match_operator 5 "arm_comparison_operator" + [(match_operand 8 "cc_register" "") (const_int 0)]) + (match_operator:SI 6 "shiftable_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rI")])))] + "TARGET_ARM" + "%I6%d5\\t%0, %1, %2\;%I7%D5\\t%0, %3, %4" + [(set_attr "conds" "use") + (set_attr "length" "8")] +) + +(define_insn "*ifcompare_arith_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + /* If we have an operation where (op x 0) is the identity operation and + the conditional operator is LT or GE and we are comparing against zero and + everything is in registers then we can do this in two instructions. */ + if (operands[3] == const0_rtx + && GET_CODE (operands[7]) != AND + && GET_CODE (operands[5]) == REG + && GET_CODE (operands[1]) == REG + && REGNO (operands[1]) == REGNO (operands[4]) + && REGNO (operands[4]) != REGNO (operands[0])) + { + if (GET_CODE (operands[6]) == LT) + return \"and\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\"; + else if (GET_CODE (operands[6]) == GE) + return \"bic\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\"; + } + if (GET_CODE (operands[3]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[3]))) + output_asm_insn (\"cmn\\t%2, #%n3\", operands); + else + output_asm_insn (\"cmp\\t%2, %3\", operands); + output_asm_insn (\"%I7%d6\\t%0, %4, %5\", operands); + if (which_alternative != 0) + return \"mov%D6\\t%0, %1\"; + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_arith_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 4 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operator:SI 5 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))] + "TARGET_ARM" + "@ + %I5%d4\\t%0, %2, %3 + %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,8") + (set_attr "type" "*,*")] +) + +(define_insn "*ifcompare_move_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + /* If we have an operation where (op x 0) is the identity operation and + the conditional operator is LT or GE and we are comparing against zero and + everything is in registers then we can do this in two instructions */ + if (operands[5] == const0_rtx + && GET_CODE (operands[7]) != AND + && GET_CODE (operands[3]) == REG + && GET_CODE (operands[1]) == REG + && REGNO (operands[1]) == REGNO (operands[2]) + && REGNO (operands[2]) != REGNO (operands[0])) + { + if (GET_CODE (operands[6]) == GE) + return \"and\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\"; + else if (GET_CODE (operands[6]) == LT) + return \"bic\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\"; + } + + if (GET_CODE (operands[5]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[5]))) + output_asm_insn (\"cmn\\t%4, #%n5\", operands); + else + output_asm_insn (\"cmp\\t%4, %5\", operands); + + if (which_alternative != 0) + output_asm_insn (\"mov%d6\\t%0, %1\", operands); + return \"%I7%D6\\t%0, %2, %3\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_move_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (match_operator:SI 5 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))] + "TARGET_ARM" + "@ + %I5%D4\\t%0, %2, %3 + %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,8") + (set_attr "type" "*,*")] +) + +(define_insn "*ifcompare_move_not" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_move_not" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K") + (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))] + "TARGET_ARM" + "@ + mvn%D4\\t%0, %2 + mov%d4\\t%0, %1\;mvn%D4\\t%0, %2 + mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2" + [(set_attr "conds" "use") + (set_attr "insn" "mvn") + (set_attr "length" "4,8,8")] +) + +(define_insn "*ifcompare_not_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_not_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))] + "TARGET_ARM" + "@ + mvn%d4\\t%0, %2 + mov%D4\\t%0, %1\;mvn%d4\\t%0, %2 + mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2" + [(set_attr "conds" "use") + (set_attr "insn" "mvn") + (set_attr "length" "4,8,8")] +) + +(define_insn "*ifcompare_shift_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operator:SI 7 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_shift_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))] + "TARGET_ARM" + "@ + mov%d5\\t%0, %2%S4 + mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4 + mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4" + [(set_attr "conds" "use") + (set_attr "shift" "2") + (set_attr "length" "4,8,8") + (set_attr "insn" "mov") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_insn "*ifcompare_move_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (match_operator:SI 7 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_move_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K") + (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")])))] + "TARGET_ARM" + "@ + mov%D5\\t%0, %2%S4 + mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4 + mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4" + [(set_attr "conds" "use") + (set_attr "shift" "2") + (set_attr "length" "4,8,8") + (set_attr "insn" "mov") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_insn "*ifcompare_shift_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 7 "arm_comparison_operator" + [(match_operand:SI 5 "s_register_operand" "r") + (match_operand:SI 6 "arm_add_operand" "rIL")]) + (match_operator:SI 8 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rM")]) + (match_operator:SI 9 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rM")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_insn "*if_shift_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 8 "cc_register" "") (const_int 0)]) + (match_operator:SI 6 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rM")]) + (match_operator:SI 7 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rM")])))] + "TARGET_ARM" + "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7" + [(set_attr "conds" "use") + (set_attr "shift" "1") + (set_attr "length" "8") + (set_attr "insn" "mov") + (set (attr "type") (if_then_else + (and (match_operand 2 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +(define_insn "*ifcompare_not_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_insn "*if_not_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operator:SI 6 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")])))] + "TARGET_ARM" + "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3" + [(set_attr "conds" "use") + (set_attr "insn" "mvn") + (set_attr "length" "8")] +) + +(define_insn "*ifcompare_arith_not" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + +(define_insn "*if_arith_not" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operator:SI 6 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_ARM" + "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3" + [(set_attr "conds" "use") + (set_attr "insn" "mvn") + (set_attr "length" "8")] +) + +(define_insn "*ifcompare_neg_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_neg_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))] + "TARGET_ARM" + "@ + rsb%d4\\t%0, %2, #0 + mov%D4\\t%0, %1\;rsb%d4\\t%0, %2, #0 + mvn%D4\\t%0, #%B1\;rsb%d4\\t%0, %2, #0" + [(set_attr "conds" "use") + (set_attr "length" "4,8,8")] +) + +(define_insn "*ifcompare_move_neg" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12")] +) + +(define_insn "*if_move_neg" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K") + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))] + "TARGET_ARM" + "@ + rsb%D4\\t%0, %2, #0 + mov%d4\\t%0, %1\;rsb%D4\\t%0, %2, #0 + mvn%d4\\t%0, #%B1\;rsb%D4\\t%0, %2, #0" + [(set_attr "conds" "use") + (set_attr "length" "4,8,8")] +) + +(define_insn "*arith_adjacentmem" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 1 "shiftable_operator" + [(match_operand:SI 2 "memory_operand" "m") + (match_operand:SI 3 "memory_operand" "m")])) + (clobber (match_scratch:SI 4 "=r"))] + "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])" + "* + { + rtx ldm[3]; + rtx arith[4]; + rtx base_reg; + HOST_WIDE_INT val1 = 0, val2 = 0; + + if (REGNO (operands[0]) > REGNO (operands[4])) + { + ldm[1] = operands[4]; + ldm[2] = operands[0]; + } + else + { + ldm[1] = operands[0]; + ldm[2] = operands[4]; + } + + base_reg = XEXP (operands[2], 0); + + if (!REG_P (base_reg)) + { + val1 = INTVAL (XEXP (base_reg, 1)); + base_reg = XEXP (base_reg, 0); + } + + if (!REG_P (XEXP (operands[3], 0))) + val2 = INTVAL (XEXP (XEXP (operands[3], 0), 1)); + + arith[0] = operands[0]; + arith[3] = operands[1]; + + if (val1 < val2) + { + arith[1] = ldm[1]; + arith[2] = ldm[2]; + } + else + { + arith[1] = ldm[2]; + arith[2] = ldm[1]; + } + + ldm[0] = base_reg; + if (val1 !=0 && val2 != 0) + { + rtx ops[3]; + + if (val1 == 4 || val2 == 4) + /* Other val must be 8, since we know they are adjacent and neither + is zero. */ + output_asm_insn (\"ldm%(ib%)\\t%0, {%1, %2}\", ldm); + else if (const_ok_for_arm (val1) || const_ok_for_arm (-val1)) + { + ldm[0] = ops[0] = operands[4]; + ops[1] = base_reg; + ops[2] = GEN_INT (val1); + output_add_immediate (ops); + if (val1 < val2) + output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm); + else + output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm); + } + else + { + /* Offset is out of range for a single add, so use two ldr. */ + ops[0] = ldm[1]; + ops[1] = base_reg; + ops[2] = GEN_INT (val1); + output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops); + ops[0] = ldm[2]; + ops[2] = GEN_INT (val2); + output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops); + } + } + else if (val1 != 0) + { + if (val1 < val2) + output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm); + else + output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm); + } + else + { + if (val1 < val2) + output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm); + else + output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm); + } + output_asm_insn (\"%I3%?\\t%0, %1, %2\", arith); + return \"\"; + }" + [(set_attr "length" "12") + (set_attr "predicable" "yes") + (set_attr "type" "load1")] +) + +; This pattern is never tried by combine, so do it as a peephole + +(define_peephole2 + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "arm_general_register_operand" "")) + (set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (const_int 0)))] + "TARGET_ARM" + [(parallel [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0))) + (set (match_dup 0) (match_dup 1))])] + "" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (and:SI (ge:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0)) + (neg:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 4 "arm_rhs_operand" "")])))) + (clobber (match_operand:SI 5 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 5) (not:SI (ashiftrt:SI (match_dup 1) (const_int 31)))) + (set (match_dup 0) (and:SI (match_op_dup 2 [(match_dup 3) (match_dup 4)]) + (match_dup 5)))] + "" +) + +;; This split can be used because CC_Z mode implies that the following +;; branch will be an equality, or an unsigned inequality, so the sign +;; extension is not needed. + +(define_split + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (ashift:SI (subreg:SI (match_operand:QI 0 "memory_operand" "") 0) + (const_int 24)) + (match_operand 1 "const_int_operand" ""))) + (clobber (match_scratch:SI 2 ""))] + "TARGET_ARM + && (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) + == (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) >> 24) << 24)" + [(set (match_dup 2) (zero_extend:SI (match_dup 0))) + (set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 1)))] + " + operands[1] = GEN_INT (((unsigned long) INTVAL (operands[1])) >> 24); + " +) +;; ??? Check the patterns above for Thumb-2 usefulness + +(define_expand "prologue" + [(clobber (const_int 0))] + "TARGET_EITHER" + "if (TARGET_32BIT) + arm_expand_prologue (); + else + thumb1_expand_prologue (); + DONE; + " +) + +(define_expand "epilogue" + [(clobber (const_int 0))] + "TARGET_EITHER" + " + if (crtl->calls_eh_return) + emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2))); + if (TARGET_THUMB1) + thumb1_expand_epilogue (); + else if (USE_RETURN_INSN (FALSE)) + { + emit_jump_insn (gen_return ()); + DONE; + } + emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, + gen_rtvec (1, + gen_rtx_RETURN (VOIDmode)), + VUNSPEC_EPILOGUE)); + DONE; + " +) + +;; Note - although unspec_volatile's USE all hard registers, +;; USEs are ignored after relaod has completed. Thus we need +;; to add an unspec of the link register to ensure that flow +;; does not think that it is unused by the sibcall branch that +;; will replace the standard function epilogue. +(define_insn "sibcall_epilogue" + [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE) + (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])] + "TARGET_32BIT" + "* + if (use_return_insn (FALSE, next_nonnote_insn (insn))) + return output_return_instruction (const_true_rtx, FALSE, FALSE); + return arm_output_epilogue (next_nonnote_insn (insn)); + " +;; Length is absolute worst case + [(set_attr "length" "44") + (set_attr "type" "block") + ;; We don't clobber the conditions, but the potential length of this + ;; operation is sufficient to make conditionalizing the sequence + ;; unlikely to be profitable. + (set_attr "conds" "clob")] +) + +(define_insn "*epilogue_insns" + [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)] + "TARGET_EITHER" + "* + if (TARGET_32BIT) + return arm_output_epilogue (NULL); + else /* TARGET_THUMB1 */ + return thumb_unexpanded_epilogue (); + " + ; Length is absolute worst case + [(set_attr "length" "44") + (set_attr "type" "block") + ;; We don't clobber the conditions, but the potential length of this + ;; operation is sufficient to make conditionalizing the sequence + ;; unlikely to be profitable. + (set_attr "conds" "clob")] +) + +(define_expand "eh_epilogue" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:SI 2 "register_operand" ""))] + "TARGET_EITHER" + " + { + cfun->machine->eh_epilogue_sp_ofs = operands[1]; + if (GET_CODE (operands[2]) != REG || REGNO (operands[2]) != 2) + { + rtx ra = gen_rtx_REG (Pmode, 2); + + emit_move_insn (ra, operands[2]); + operands[2] = ra; + } + /* This is a hack -- we may have crystalized the function type too + early. */ + cfun->machine->func_type = 0; + }" +) + +;; This split is only used during output to reduce the number of patterns +;; that need assembler instructions adding to them. We allowed the setting +;; of the conditions to be implicit during rtl generation so that +;; the conditional compare patterns would work. However this conflicts to +;; some extent with the conditional data operations, so we have to split them +;; up again here. + +;; ??? Need to audit these splitters for Thumb-2. Why isn't normal +;; conditional execution sufficient? + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "" "") (match_operand 3 "" "")]) + (match_dup 0) + (match_operand 4 "" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 5) (match_dup 6)) + (cond_exec (match_dup 7) + (set (match_dup 0) (match_dup 4)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[5] = gen_rtx_REG (mode, CC_REGNUM); + operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + + operands[7] = gen_rtx_fmt_ee (rc, VOIDmode, operands[5], const0_rtx); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "" "") (match_operand 3 "" "")]) + (match_operand 4 "" "") + (match_dup 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 5) (match_dup 6)) + (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)]) + (set (match_dup 0) (match_dup 4)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + + operands[5] = gen_rtx_REG (mode, CC_REGNUM); + operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "" "") (match_operand 3 "" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 6) (match_dup 7)) + (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) + (set (match_dup 0) (match_dup 4))) + (cond_exec (match_dup 8) + (set (match_dup 0) (match_dup 5)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[6] = gen_rtx_REG (mode, CC_REGNUM); + operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + + operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "arm_add_operand" "")]) + (match_operand:SI 4 "arm_rhs_operand" "") + (not:SI + (match_operand:SI 5 "s_register_operand" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 6) (match_dup 7)) + (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) + (set (match_dup 0) (match_dup 4))) + (cond_exec (match_dup 8) + (set (match_dup 0) (not:SI (match_dup 5))))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[6] = gen_rtx_REG (mode, CC_REGNUM); + operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + + operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); + }" +) + +(define_insn "*cond_move_not" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r"))))] + "TARGET_ARM" + "@ + mvn%D4\\t%0, %2 + mov%d4\\t%0, %1\;mvn%D4\\t%0, %2" + [(set_attr "conds" "use") + (set_attr "insn" "mvn") + (set_attr "length" "4,8")] +) + +;; The next two patterns occur when an AND operation is followed by a +;; scc insn sequence + +(define_insn "*sign_extract_onebit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + output_asm_insn (\"ands\\t%0, %1, %2\", operands); + return \"mvnne\\t%0, #0\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*not_signextract_onebit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "n")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + output_asm_insn (\"tst\\t%1, %2\", operands); + output_asm_insn (\"mvneq\\t%0, #0\", operands); + return \"movne\\t%0, #0\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) +;; ??? The above patterns need auditing for Thumb-2 + +;; Push multiple registers to the stack. Registers are in parallel (use ...) +;; expressions. For simplicity, the first register is also in the unspec +;; part. +(define_insn "*push_multi" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec:BLK [(match_operand:SI 1 "s_register_operand" "")] + UNSPEC_PUSH_MULT))])] + "TARGET_32BIT" + "* + { + int num_saves = XVECLEN (operands[2], 0); + + /* For the StrongARM at least it is faster to + use STR to store only a single register. + In Thumb mode always use push, and the assembler will pick + something appropriate. */ + if (num_saves == 1 && TARGET_ARM) + output_asm_insn (\"str\\t%1, [%m0, #-4]!\", operands); + else + { + int i; + char pattern[100]; + + if (TARGET_ARM) + strcpy (pattern, \"stmfd\\t%m0!, {%1\"); + else + strcpy (pattern, \"push\\t{%1\"); + + for (i = 1; i < num_saves; i++) + { + strcat (pattern, \", %|\"); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[2], 0, i), 0))]); + } + + strcat (pattern, \"}\"); + output_asm_insn (pattern, operands); + } + + return \"\"; + }" + [(set_attr "type" "store4")] +) + +(define_insn "stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:SI 0 "s_register_operand" "rk") + (match_operand:SI 1 "s_register_operand" "rk")] + UNSPEC_PRLG_STK))] + "" + "" + [(set_attr "length" "0")] +) + +;; Similarly for the floating point registers +(define_insn "*push_fp_multi" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec:BLK [(match_operand:XF 1 "f_register_operand" "")] + UNSPEC_PUSH_MULT))])] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "* + { + char pattern[100]; + + sprintf (pattern, \"sfmfd\\t%%1, %d, [%%m0]!\", XVECLEN (operands[2], 0)); + output_asm_insn (pattern, operands); + return \"\"; + }" + [(set_attr "type" "f_fpa_store")] +) + +;; Special patterns for dealing with the constant pool + +(define_insn "align_4" + [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN)] + "TARGET_EITHER" + "* + assemble_align (32); + return \"\"; + " +) + +(define_insn "align_8" + [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN8)] + "TARGET_EITHER" + "* + assemble_align (64); + return \"\"; + " +) + +(define_insn "consttable_end" + [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)] + "TARGET_EITHER" + "* + making_const_table = FALSE; + return \"\"; + " +) + +(define_insn "consttable_1" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_1)] + "TARGET_THUMB1" + "* + making_const_table = TRUE; + assemble_integer (operands[0], 1, BITS_PER_WORD, 1); + assemble_zeros (3); + return \"\"; + " + [(set_attr "length" "4")] +) + +(define_insn "consttable_2" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_2)] + "TARGET_THUMB1" + "* + making_const_table = TRUE; + gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT); + assemble_integer (operands[0], 2, BITS_PER_WORD, 1); + assemble_zeros (2); + return \"\"; + " + [(set_attr "length" "4")] +) + +(define_insn "consttable_4" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_4)] + "TARGET_EITHER" + "* + { + rtx x = operands[0]; + making_const_table = TRUE; + switch (GET_MODE_CLASS (GET_MODE (x))) + { + case MODE_FLOAT: + if (GET_MODE (x) == HFmode) + arm_emit_fp16_const (x); + else + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + assemble_real (r, GET_MODE (x), BITS_PER_WORD); + } + break; + default: + /* XXX: Sometimes gcc does something really dumb and ends up with + a HIGH in a constant pool entry, usually because it's trying to + load into a VFP register. We know this will always be used in + combination with a LO_SUM which ignores the high bits, so just + strip off the HIGH. */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + assemble_integer (x, 4, BITS_PER_WORD, 1); + mark_symbol_refs_as_used (x); + break; + } + return \"\"; + }" + [(set_attr "length" "4")] +) + +(define_insn "consttable_8" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_8)] + "TARGET_EITHER" + "* + { + making_const_table = TRUE; + switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + { + case MODE_FLOAT: + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); + assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); + break; + } + default: + assemble_integer (operands[0], 8, BITS_PER_WORD, 1); + break; + } + return \"\"; + }" + [(set_attr "length" "8")] +) + +(define_insn "consttable_16" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_16)] + "TARGET_EITHER" + "* + { + making_const_table = TRUE; + switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + { + case MODE_FLOAT: + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); + assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); + break; + } + default: + assemble_integer (operands[0], 16, BITS_PER_WORD, 1); + break; + } + return \"\"; + }" + [(set_attr "length" "16")] +) + +;; Miscellaneous Thumb patterns + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "TARGET_THUMB1" + " + if (flag_pic) + { + /* Hopefully, CSE will eliminate this copy. */ + rtx reg1 = copy_addr_to_reg (gen_rtx_LABEL_REF (Pmode, operands[1])); + rtx reg2 = gen_reg_rtx (SImode); + + emit_insn (gen_addsi3 (reg2, operands[0], reg1)); + operands[0] = reg2; + } + " +) + +;; NB never uses BX. +(define_insn "*thumb1_tablejump" + [(set (pc) (match_operand:SI 0 "register_operand" "l*r")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_THUMB1" + "mov\\t%|pc, %0" + [(set_attr "length" "2")] +) + +;; V5 Instructions, + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (clz:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT && arm_arch5" + "clz%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "insn" "clz")]) + +(define_insn "rbitsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))] + "TARGET_32BIT && arm_arch_thumb2" + "rbit%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "insn" "clz")]) + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (ctz:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch_thumb2" + " + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_rbitsi2 (tmp, operands[1])); + emit_insn (gen_clzsi2 (operands[0], tmp)); + } + DONE; + " +) + +;; V5E instructions. + +(define_insn "prefetch" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "" "") + (match_operand:SI 2 "" ""))] + "TARGET_32BIT && arm_arch5e" + "pld\\t%a0") + +;; General predication pattern + +(define_cond_exec + [(match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") + (const_int 0)])] + "TARGET_32BIT" + "" +) + +(define_insn "prologue_use" + [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_PROLOGUE_USE)] + "" + "%@ %0 needed for prologue" + [(set_attr "length" "0")] +) + + +;; Patterns for exception handling + +(define_expand "eh_return" + [(use (match_operand 0 "general_operand" ""))] + "TARGET_EITHER" + " + { + if (TARGET_32BIT) + emit_insn (gen_arm_eh_return (operands[0])); + else + emit_insn (gen_thumb_eh_return (operands[0])); + DONE; + }" +) + +;; We can't expand this before we know where the link register is stored. +(define_insn_and_split "arm_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")] + VUNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "TARGET_ARM" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + arm_set_return_address (operands[0], operands[1]); + DONE; + }" +) + +(define_insn_and_split "thumb_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "l")] + VUNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&l"))] + "TARGET_THUMB1" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + thumb_set_return_address (operands[0], operands[1]); + DONE; + }" +) + + +;; TLS support + +(define_insn "load_tp_hard" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TLS))] + "TARGET_HARD_TP" + "mrc%?\\tp15, 0, %0, c13, c0, 3\\t@ load_tp_hard" + [(set_attr "predicable" "yes")] +) + +;; Doesn't clobber R1-R3. Must use r0 for the first operand. +(define_insn "load_tp_soft" + [(set (reg:SI 0) (unspec:SI [(const_int 0)] UNSPEC_TLS)) + (clobber (reg:SI LR_REGNUM)) + (clobber (reg:SI IP_REGNUM)) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SOFT_TP" + "bl\\t__aeabi_read_tp\\t@ load_tp_soft" + [(set_attr "conds" "clob")] +) + +;; We only care about the lower 16 bits of the constant +;; being inserted into the upper 16 bits of the register. +(define_insn "*arm_movtas_ze" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") + (const_int 16) + (const_int 16)) + (match_operand:SI 1 "const_int_operand" ""))] + "arm_arch_thumb2" + "movt%?\t%0, %L1" + [(set_attr "predicable" "yes") + (set_attr "length" "4")] +) + +(define_insn "*arm_rev" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (bswap:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT && arm_arch6" + "rev%?\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "4")] +) + +(define_insn "*thumb1_rev" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (bswap:SI (match_operand:SI 1 "s_register_operand" "l")))] + "TARGET_THUMB1 && arm_arch6" + "rev\t%0, %1" + [(set_attr "length" "2")] +) + +(define_expand "arm_legacy_rev" + [(set (match_operand:SI 2 "s_register_operand" "") + (xor:SI (rotatert:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 16)) + (match_dup 1))) + (set (match_dup 2) + (lshiftrt:SI (match_dup 2) + (const_int 8))) + (set (match_operand:SI 3 "s_register_operand" "") + (rotatert:SI (match_dup 1) + (const_int 8))) + (set (match_dup 2) + (and:SI (match_dup 2) + (const_int -65281))) + (set (match_operand:SI 0 "s_register_operand" "") + (xor:SI (match_dup 3) + (match_dup 2)))] + "TARGET_32BIT" + "" +) + +;; Reuse temporaries to keep register pressure down. +(define_expand "thumb_legacy_rev" + [(set (match_operand:SI 2 "s_register_operand" "") + (ashift:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 24))) + (set (match_operand:SI 3 "s_register_operand" "") + (lshiftrt:SI (match_dup 1) + (const_int 24))) + (set (match_dup 3) + (ior:SI (match_dup 3) + (match_dup 2))) + (set (match_operand:SI 4 "s_register_operand" "") + (const_int 16)) + (set (match_operand:SI 5 "s_register_operand" "") + (rotatert:SI (match_dup 1) + (match_dup 4))) + (set (match_dup 2) + (ashift:SI (match_dup 5) + (const_int 24))) + (set (match_dup 5) + (lshiftrt:SI (match_dup 5) + (const_int 24))) + (set (match_dup 5) + (ior:SI (match_dup 5) + (match_dup 2))) + (set (match_dup 5) + (rotatert:SI (match_dup 5) + (match_dup 4))) + (set (match_operand:SI 0 "s_register_operand" "") + (ior:SI (match_dup 5) + (match_dup 3)))] + "TARGET_THUMB" + "" +) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (bswap:SI (match_operand:SI 1 "s_register_operand" "r")))] +"TARGET_EITHER && (arm_arch6 || !optimize_size)" +" + if (!arm_arch6) + { + rtx op2 = gen_reg_rtx (SImode); + rtx op3 = gen_reg_rtx (SImode); + + if (TARGET_THUMB) + { + rtx op4 = gen_reg_rtx (SImode); + rtx op5 = gen_reg_rtx (SImode); + + emit_insn (gen_thumb_legacy_rev (operands[0], operands[1], + op2, op3, op4, op5)); + } + else + { + emit_insn (gen_arm_legacy_rev (operands[0], operands[1], + op2, op3)); + } + + DONE; + } + " +) + +;; Load the load/store multiple patterns +(include "ldmstm.md") +;; Load the FPA co-processor patterns +(include "fpa.md") +;; Load the Maverick co-processor patterns +(include "cirrus.md") +;; Vector bits common to IWMMXT and Neon +(include "vec-common.md") +;; Load the Intel Wireless Multimedia Extension patterns +(include "iwmmxt.md") +;; Load the VFP co-processor patterns +(include "vfp.md") +;; Thumb-2 patterns +(include "thumb2.md") +;; Neon patterns +(include "neon.md") +;; Synchronization Primitives +(include "sync.md") diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt new file mode 100644 index 000000000..a39bb3a8d --- /dev/null +++ b/gcc/config/arm/arm.opt @@ -0,0 +1,171 @@ +; Options for the ARM port of the compiler. + +; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mabi= +Target RejectNegative Joined Var(target_abi_name) +Specify an ABI + +mabort-on-noreturn +Target Report Mask(ABORT_NORETURN) +Generate a call to abort if a noreturn function returns + +mapcs +Target RejectNegative Mask(APCS_FRAME) MaskExists Undocumented + +mapcs-float +Target Report Mask(APCS_FLOAT) +Pass FP arguments in FP registers + +mapcs-frame +Target Report Mask(APCS_FRAME) +Generate APCS conformant stack frames + +mapcs-reentrant +Target Report Mask(APCS_REENT) +Generate re-entrant, PIC code + +mapcs-stack-check +Target Report Mask(APCS_STACK) Undocumented + +march= +Target RejectNegative Joined +Specify the name of the target architecture + +marm +Target RejectNegative InverseMask(THUMB) Undocumented + +mbig-endian +Target Report RejectNegative Mask(BIG_END) +Assume target CPU is configured as big endian + +mcallee-super-interworking +Target Report Mask(CALLEE_INTERWORKING) +Thumb: Assume non-static functions may be called from ARM code + +mcaller-super-interworking +Target Report Mask(CALLER_INTERWORKING) +Thumb: Assume function pointers may go to non-Thumb aware code + +mcirrus-fix-invalid-insns +Target Report Mask(CIRRUS_FIX_INVALID_INSNS) +Cirrus: Place NOPs to avoid invalid instruction combinations + +mcpu= +Target RejectNegative Joined +Specify the name of the target CPU + +mfloat-abi= +Target RejectNegative Joined Var(target_float_abi_name) +Specify if floating point hardware should be used + +mfp= +Target RejectNegative Joined Undocumented Var(target_fpe_name) + +mfp16-format= +Target RejectNegative Joined Var(target_fp16_format_name) +Specify the __fp16 floating-point format + +;; Now ignored. +mfpe +Target RejectNegative Mask(FPE) Undocumented + +mfpe= +Target RejectNegative Joined Undocumented Var(target_fpe_name) + +mfpu= +Target RejectNegative Joined Var(target_fpu_name) +Specify the name of the target floating point hardware/format + +mhard-float +Target RejectNegative +Alias for -mfloat-abi=hard + +mlittle-endian +Target Report RejectNegative InverseMask(BIG_END) +Assume target CPU is configured as little endian + +mlong-calls +Target Report Mask(LONG_CALLS) +Generate call insns as indirect calls, if necessary + +mpic-register= +Target RejectNegative Joined Var(arm_pic_register_string) +Specify the register to be used for PIC addressing + +mpoke-function-name +Target Report Mask(POKE_FUNCTION_NAME) +Store function names in object code + +msched-prolog +Target Report Mask(SCHED_PROLOG) +Permit scheduling of a function's prologue sequence + +msingle-pic-base +Target Report Mask(SINGLE_PIC_BASE) +Do not load the PIC register in function prologues + +msoft-float +Target RejectNegative +Alias for -mfloat-abi=soft + +mstructure-size-boundary= +Target RejectNegative Joined Var(structure_size_string) +Specify the minimum bit alignment of structures + +mthumb +Target Report Mask(THUMB) +Compile for the Thumb not the ARM + +mthumb-interwork +Target Report Mask(INTERWORK) +Support calls between Thumb and ARM instruction sets + +mtp= +Target RejectNegative Joined Var(target_thread_switch) +Specify how to access the thread pointer + +mtpcs-frame +Target Report Mask(TPCS_FRAME) +Thumb: Generate (non-leaf) stack frames even if not needed + +mtpcs-leaf-frame +Target Report Mask(TPCS_LEAF_FRAME) +Thumb: Generate (leaf) stack frames even if not needed + +mtune= +Target RejectNegative Joined +Tune code for the given processor + +mwords-little-endian +Target Report RejectNegative Mask(LITTLE_WORDS) +Assume big endian bytes, little endian words + +mvectorize-with-neon-quad +Target Report Mask(NEON_VECTORIZE_QUAD) +Use Neon quad-word (rather than double-word) registers for vectorization + +mword-relocations +Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) +Only generate absolute relocations on word sized values. + +mfix-cortex-m3-ldrd +Target Report Var(fix_cm3_ldrd) Init(2) +Avoid overlapping destination and address registers on LDRD instructions +that may trigger Cortex-M3 errata. diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md new file mode 100644 index 000000000..280af12f9 --- /dev/null +++ b/gcc/config/arm/arm1020e.md @@ -0,0 +1,375 @@ +;; ARM 1020E & ARM 1022E Pipeline Description +;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. +;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM1020E Technical Reference Manual, Copyright (c) 2003 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 1020E core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm1020e") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are two pipelines: +;; +;; - An Arithmetic Logic Unit (ALU) pipeline. +;; +;; The ALU pipeline has fetch, issue, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. +;; +;; - A Load-Store Unit (LSU) pipeline. +;; +;; The LSU pipeline has decode, execute, memory, and write stages. +;; We only model the execute, memory and write stages. + +(define_cpu_unit "1020a_e,1020a_m,1020a_w" "arm1020e") +(define_cpu_unit "1020l_e,1020l_m,1020l_w" "arm1020e") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "1020alu_op" 1 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "alu")) + "1020a_e,1020a_m,1020a_w") + +;; ALU operations with a shift-by-constant operand +(define_insn_reservation "1020alu_shift_op" 1 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "alu_shift")) + "1020a_e,1020a_m,1020a_w") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the execute stage. +(define_insn_reservation "1020alu_shift_reg_op" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "alu_shift_reg")) + "1020a_e*2,1020a_m,1020a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. + +;; The result of the "smul" and "smulw" instructions is not available +;; until after the memory stage. +(define_insn_reservation "1020mult1" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "insn" "smulxy,smulwy")) + "1020a_e,1020a_m,1020a_w") + +;; The "smlaxy" and "smlawx" instructions require two iterations through +;; the execute stage; the result is available immediately following +;; the execute stage. +(define_insn_reservation "1020mult2" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "insn" "smlaxy,smlalxy,smlawx")) + "1020a_e*2,1020a_m,1020a_w") + +;; The "smlalxy", "mul", and "mla" instructions require two iterations +;; through the execute stage; the result is not available until after +;; the memory stage. +(define_insn_reservation "1020mult3" 3 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "insn" "smlalxy,mul,mla")) + "1020a_e*2,1020a_m,1020a_w") + +;; The "muls" and "mlas" instructions loop in the execute stage for +;; four iterations in order to set the flags. The value result is +;; available after three iterations. +(define_insn_reservation "1020mult4" 3 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "insn" "muls,mlas")) + "1020a_e*4,1020a_m,1020a_w") + +;; Long multiply instructions that produce two registers of +;; output (such as umull) make their results available in two cycles; +;; the least significant word is available before the most significant +;; word. That fact is not modeled; instead, the instructions are +;; described.as if the entire result was available at the end of the +;; cycle in which both words are available. + +;; The "umull", "umlal", "smull", and "smlal" instructions all take +;; three iterations through the execute cycle, and make their results +;; available after the memory cycle. +(define_insn_reservation "1020mult5" 4 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "insn" "umull,umlal,smull,smlal")) + "1020a_e*3,1020a_m,1020a_w") + +;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in +;; the execute stage for five iterations in order to set the flags. +;; The value result is available after four iterations. +(define_insn_reservation "1020mult6" 4 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "insn" "umulls,umlals,smulls,smlals")) + "1020a_e*5,1020a_m,1020a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; LSU instructions require six cycles to execute. They use the ALU +;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles +;; three through six. +;; Loads and stores which use a scaled register offset or scaled +;; register pre-indexed addressing mode take three cycles EXCEPT for +;; those that are base + offset with LSL of 0 or 2, or base - offset +;; with LSL of zero. The remainder take 1 cycle to execute. +;; For 4byte loads there is a bypass from the load stage + +(define_insn_reservation "1020load1_op" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "load_byte,load1")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "1020store1_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "store1")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +;; A load's result can be stored by an immediately following store +(define_bypass 1 "1020load1_op" "1020store1_op" "arm_no_early_store_addr_dep") + +;; On a LDM/STM operation, the LSU pipeline iterates until all of the +;; registers have been processed. +;; +;; The time it takes to load the data depends on whether or not the +;; base address is 64-bit aligned; if it is not, an additional cycle +;; is required. This model assumes that the address is always 64-bit +;; aligned. Because the processor can load two registers per cycle, +;; that assumption means that we use the same instruction reservations +;; for loading 2k and 2k - 1 registers. +;; +;; The ALU pipeline is decoupled after the first cycle unless there is +;; a register dependency; the dependency is cleared as soon as the LDM/STM +;; has dealt with the corresponding register. So for example, +;; stmia sp, {r0-r3} +;; add r0, r0, #4 +;; will have one fewer stalls than +;; stmia sp, {r0-r3} +;; add r3, r3, #4 +;; +;; As with ALU operations, if one of the destination registers is the +;; PC, there are additional stalls; that is not modeled. + +(define_insn_reservation "1020load2_op" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "load2")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "1020store2_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "store2")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "1020load34_op" 3 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "load3,load4")) + "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w") + +(define_insn_reservation "1020store34_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "store3,store4")) + "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The ARM +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "1020branch_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "branch")) + "1020a_e") + +;; The latency for a call is not predictable. Therefore, we use 32 as +;; roughly equivalent to positive infinity. + +(define_insn_reservation "1020call_op" 32 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "call")) + "1020a_e*32") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_cpu_unit "v10_fmac" "arm1020e") + +(define_cpu_unit "v10_ds" "arm1020e") + +(define_cpu_unit "v10_fmstat" "arm1020e") + +(define_cpu_unit "v10_ls1,v10_ls2,v10_ls3" "arm1020e") + +;; fmstat is a serializing instruction. It will stall the core until +;; the mac and ds units have completed. +(exclusion_set "v10_fmac,v10_ds" "v10_fmstat") + +(define_attr "vfp10" "yes,no" + (const (if_then_else (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "fpu" "vfp")) + (const_string "yes") (const_string "no")))) + +;; Note, no instruction can issue to the VFP if the core is stalled in the +;; first execute state. We model this by using 1020a_e in the first cycle. +(define_insn_reservation "v10_ffarith" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) + "1020a_e+v10_fmac") + +(define_insn_reservation "v10_farith" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "faddd,fadds")) + "1020a_e+v10_fmac") + +(define_insn_reservation "v10_cvt" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_cvt")) + "1020a_e+v10_fmac") + +(define_insn_reservation "v10_fmul" 6 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fmuls,fmacs,fmuld,fmacd")) + "1020a_e+v10_fmac*2") + +(define_insn_reservation "v10_fdivs" 18 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fdivs")) + "1020a_e+v10_ds*14") + +(define_insn_reservation "v10_fdivd" 32 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fdivd")) + "1020a_e+v10_fmac+v10_ds*28") + +(define_insn_reservation "v10_floads" 4 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_loads")) + "1020a_e+1020l_e+v10_ls1,v10_ls2") + +;; We model a load of a double as needing all the vfp ls* stage in cycle 1. +;; This gives the correct mix between single-and double loads where a flds +;; followed by and fldd will stall for one cycle, but two back-to-back fldd +;; insns stall for two cycles. +(define_insn_reservation "v10_floadd" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_loadd")) + "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3") + +;; Moves to/from arm regs also use the load/store pipeline. + +(define_insn_reservation "v10_c2v" 4 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "r_2_f")) + "1020a_e+1020l_e+v10_ls1,v10_ls2") + +(define_insn_reservation "v10_fstores" 1 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_stores")) + "1020a_e+1020l_e+v10_ls1,v10_ls2") + +(define_insn_reservation "v10_fstored" 1 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_stored")) + "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3") + +(define_insn_reservation "v10_v2c" 1 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_2_r")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "v10_to_cpsr" 2 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_flag")) + "1020a_e+v10_fmstat,1020a_e+1020l_e,1020l_m,1020l_w") + +;; VFP bypasses + +;; There are bypasses for most operations other than store + +(define_bypass 3 + "v10_c2v,v10_floads" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd,v10_cvt") + +(define_bypass 4 + "v10_floadd" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +;; Arithmetic to other arithmetic saves a cycle due to forwarding +(define_bypass 4 + "v10_ffarith,v10_farith" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +(define_bypass 5 + "v10_fmul" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +(define_bypass 17 + "v10_fdivs" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +(define_bypass 31 + "v10_fdivd" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +;; VFP anti-dependencies. + +;; There is one anti-dependence in the following case (not yet modelled): +;; - After a store: one extra cycle for both fsts and fstd +;; Note, back-to-back fstd instructions will overload the load/store datapath +;; causing a two-cycle stall. diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md new file mode 100644 index 000000000..e62213638 --- /dev/null +++ b/gcc/config/arm/arm1026ejs.md @@ -0,0 +1,240 @@ +;; ARM 1026EJ-S Pipeline Description +;; Copyright (C) 2003, 2007 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM1026EJ-S Technical Reference Manual, Copyright (c) 2003 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 1026EJ-S core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm1026ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are two pipelines: +;; +;; - An Arithmetic Logic Unit (ALU) pipeline. +;; +;; The ALU pipeline has fetch, issue, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. +;; +;; - A Load-Store Unit (LSU) pipeline. +;; +;; The LSU pipeline has decode, execute, memory, and write stages. +;; We only model the execute, memory and write stages. + +(define_cpu_unit "a_e,a_m,a_w" "arm1026ejs") +(define_cpu_unit "l_e,l_m,l_w" "arm1026ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "alu_op" 1 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "alu")) + "a_e,a_m,a_w") + +;; ALU operations with a shift-by-constant operand +(define_insn_reservation "alu_shift_op" 1 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "alu_shift")) + "a_e,a_m,a_w") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the execute stage. +(define_insn_reservation "alu_shift_reg_op" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "alu_shift_reg")) + "a_e*2,a_m,a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. + +;; The result of the "smul" and "smulw" instructions is not available +;; until after the memory stage. +(define_insn_reservation "mult1" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "insn" "smulxy,smulwy")) + "a_e,a_m,a_w") + +;; The "smlaxy" and "smlawx" instructions require two iterations through +;; the execute stage; the result is available immediately following +;; the execute stage. +(define_insn_reservation "mult2" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "insn" "smlaxy,smlalxy,smlawx")) + "a_e*2,a_m,a_w") + +;; The "smlalxy", "mul", and "mla" instructions require two iterations +;; through the execute stage; the result is not available until after +;; the memory stage. +(define_insn_reservation "mult3" 3 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "insn" "smlalxy,mul,mla")) + "a_e*2,a_m,a_w") + +;; The "muls" and "mlas" instructions loop in the execute stage for +;; four iterations in order to set the flags. The value result is +;; available after three iterations. +(define_insn_reservation "mult4" 3 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "insn" "muls,mlas")) + "a_e*4,a_m,a_w") + +;; Long multiply instructions that produce two registers of +;; output (such as umull) make their results available in two cycles; +;; the least significant word is available before the most significant +;; word. That fact is not modeled; instead, the instructions are +;; described as if the entire result was available at the end of the +;; cycle in which both words are available. + +;; The "umull", "umlal", "smull", and "smlal" instructions all take +;; three iterations through the execute cycle, and make their results +;; available after the memory cycle. +(define_insn_reservation "mult5" 4 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "insn" "umull,umlal,smull,smlal")) + "a_e*3,a_m,a_w") + +;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in +;; the execute stage for five iterations in order to set the flags. +;; The value result is available after four iterations. +(define_insn_reservation "mult6" 4 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "insn" "umulls,umlals,smulls,smlals")) + "a_e*5,a_m,a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; LSU instructions require six cycles to execute. They use the ALU +;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles +;; three through six. +;; Loads and stores which use a scaled register offset or scaled +;; register pre-indexed addressing mode take three cycles EXCEPT for +;; those that are base + offset with LSL of 0 or 2, or base - offset +;; with LSL of zero. The remainder take 1 cycle to execute. +;; For 4byte loads there is a bypass from the load stage + +(define_insn_reservation "load1_op" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "load_byte,load1")) + "a_e+l_e,l_m,a_w+l_w") + +(define_insn_reservation "store1_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "store1")) + "a_e+l_e,l_m,a_w+l_w") + +;; A load's result can be stored by an immediately following store +(define_bypass 1 "load1_op" "store1_op" "arm_no_early_store_addr_dep") + +;; On a LDM/STM operation, the LSU pipeline iterates until all of the +;; registers have been processed. +;; +;; The time it takes to load the data depends on whether or not the +;; base address is 64-bit aligned; if it is not, an additional cycle +;; is required. This model assumes that the address is always 64-bit +;; aligned. Because the processor can load two registers per cycle, +;; that assumption means that we use the same instruction reservations +;; for loading 2k and 2k - 1 registers. +;; +;; The ALU pipeline is stalled until the completion of the last memory +;; stage in the LSU pipeline. That is modeled by keeping the ALU +;; execute stage busy until that point. +;; +;; As with ALU operations, if one of the destination registers is the +;; PC, there are additional stalls; that is not modeled. + +(define_insn_reservation "load2_op" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "load2")) + "a_e+l_e,l_m,a_w+l_w") + +(define_insn_reservation "store2_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "store2")) + "a_e+l_e,l_m,a_w+l_w") + +(define_insn_reservation "load34_op" 3 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "load3,load4")) + "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w") + +(define_insn_reservation "store34_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "store3,store4")) + "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The ARM +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "branch_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "branch")) + "nothing") + +;; The latency for a call is not predictable. Therefore, we use 32 as +;; roughly equivalent to positive infinity. + +(define_insn_reservation "call_op" 32 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "call")) + "nothing") diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md new file mode 100644 index 000000000..8fc30e976 --- /dev/null +++ b/gcc/config/arm/arm1136jfs.md @@ -0,0 +1,376 @@ +;; ARM 1136J[F]-S Pipeline Description +;; Copyright (C) 2003, 2007 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM1136JF-S Technical Reference Manual, Copyright (c) 2003 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 1136J-S and 1136JF-S cores. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm1136jfs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are three distinct pipelines (page 1-26 and following): +;; +;; - A 4-stage decode pipeline, shared by all three. It has fetch (1), +;; fetch (2), decode, and issue stages. Since this is always involved, +;; we do not model it in the scheduler. +;; +;; - A 4-stage ALU pipeline. It has shifter, ALU (main integer operations), +;; and saturation stages. The fourth stage is writeback; see below. +;; +;; - A 4-stage multiply-accumulate pipeline. It has three stages, called +;; MAC1 through MAC3, and a fourth writeback stage. +;; +;; The 4th-stage writeback is shared between the ALU and MAC pipelines, +;; which operate in lockstep. Results from either pipeline will be +;; moved into the writeback stage. Because the two pipelines operate +;; in lockstep, we schedule them as a single "execute" pipeline. +;; +;; - A 4-stage LSU pipeline. It has address generation, data cache (1), +;; data cache (2), and writeback stages. (Note that this pipeline, +;; including the writeback stage, is independent from the ALU & LSU pipes.) + +(define_cpu_unit "e_1,e_2,e_3,e_wb" "arm1136jfs") ; ALU and MAC +; e_1 = Sh/Mac1, e_2 = ALU/Mac2, e_3 = SAT/Mac3 +(define_cpu_unit "l_a,l_dc1,l_dc2,l_wb" "arm1136jfs") ; Load/Store + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require eight cycles to execute, and use the ALU +;; pipeline in each of the eight stages. The results are available +;; after the alu stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modelled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "11_alu_op" 2 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "alu")) + "e_1,e_2,e_3,e_wb") + +;; ALU operations with a shift-by-constant operand +(define_insn_reservation "11_alu_shift_op" 2 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "alu_shift")) + "e_1,e_2,e_3,e_wb") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the shift stage. +(define_insn_reservation "11_alu_shift_reg_op" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "alu_shift_reg")) + "e_1*2,e_2,e_3,e_wb") + +;; alu_ops can start sooner, if there is no shifter dependency +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_alu_op") +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_alu_op") +(define_bypass 2 "11_alu_shift_reg_op" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the first two execute stages until +;; the instruction has been passed through the multiplier array enough +;; times. + +;; Multiply and multiply-accumulate results are available after four stages. +(define_insn_reservation "11_mult1" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "mul,mla")) + "e_1*2,e_2,e_3,e_wb") + +;; The *S variants set the condition flags, which requires three more cycles. +(define_insn_reservation "11_mult2" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "muls,mlas")) + "e_1*2,e_2,e_3,e_wb") + +(define_bypass 3 "11_mult1,11_mult2" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_mult1,11_mult2" + "11_alu_op") +(define_bypass 3 "11_mult1,11_mult2" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "11_mult1,11_mult2" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "11_mult1,11_mult2" + "11_store1" + "arm_no_early_store_addr_dep") + +;; Signed and unsigned multiply long results are available across two cycles; +;; the less significant word is available one cycle before the more significant +;; word. Here we conservatively wait until both are available, which is +;; after three iterations and the memory cycle. The same is also true of +;; the two multiply-accumulate instructions. +(define_insn_reservation "11_mult3" 5 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "smull,umull,smlal,umlal")) + "e_1*3,e_2,e_3,e_wb*2") + +;; The *S variants set the condition flags, which requires three more cycles. +(define_insn_reservation "11_mult4" 5 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "smulls,umulls,smlals,umlals")) + "e_1*3,e_2,e_3,e_wb*2") + +(define_bypass 4 "11_mult3,11_mult4" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 4 "11_mult3,11_mult4" + "11_alu_op") +(define_bypass 4 "11_mult3,11_mult4" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 4 "11_mult3,11_mult4" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "11_mult3,11_mult4" + "11_store1" + "arm_no_early_store_addr_dep") + +;; Various 16x16->32 multiplies and multiply-accumulates, using combinations +;; of high and low halves of the argument registers. They take a single +;; pass through the pipeline and make the result available after three +;; cycles. +(define_insn_reservation "11_mult5" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx")) + "e_1,e_2,e_3,e_wb") + +(define_bypass 2 "11_mult5" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 2 "11_mult5" + "11_alu_op") +(define_bypass 2 "11_mult5" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 2 "11_mult5" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "11_mult5" + "11_store1" + "arm_no_early_store_addr_dep") + +;; The same idea, then the 32-bit result is added to a 64-bit quantity. +(define_insn_reservation "11_mult6" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "smlalxy")) + "e_1*2,e_2,e_3,e_wb*2") + +;; Signed 32x32 multiply, then the most significant 32 bits are extracted +;; and are available after the memory stage. +(define_insn_reservation "11_mult7" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "insn" "smmul,smmulr")) + "e_1*2,e_2,e_3,e_wb") + +(define_bypass 3 "11_mult6,11_mult7" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_mult6,11_mult7" + "11_alu_op") +(define_bypass 3 "11_mult6,11_mult7" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "11_mult6,11_mult7" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "11_mult6,11_mult7" + "11_store1" + "arm_no_early_store_addr_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; These vary greatly depending on their arguments and the results of +;; stat prediction. Cycle count ranges from zero (unconditional branch, +;; folded dynamic prediction) to seven (incorrect predictions, etc). We +;; assume an optimal case for now, because the cost of a cache miss +;; overwhelms the cost of everything else anyhow. + +(define_insn_reservation "11_branches" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "branch")) + "nothing") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "11_call" 32 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "call")) + "nothing") + +;; Branches are predicted. A correctly predicted branch will be no +;; cost, but we're conservative here, and use the timings a +;; late-register would give us. +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_branches") +(define_bypass 2 "11_alu_shift_reg_op" + "11_branches") +(define_bypass 2 "11_load1,11_load2" + "11_branches") +(define_bypass 3 "11_load34" + "11_branches") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback. +;; These models assume that all memory references hit in dcache. Also, +;; if the PC is one of the registers involved, there are additional stalls +;; not modelled here. Addressing modes are also not modelled. + +(define_insn_reservation "11_load1" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load1")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +;; Load byte results are not available until the writeback stage, where +;; the correct byte is extracted. + +(define_insn_reservation "11_loadb" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load_byte")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +(define_insn_reservation "11_store1" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "store1")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +;; Load/store double words into adjacent registers. The timing and +;; latencies are different depending on whether the address is 64-bit +;; aligned. This model assumes that it is. +(define_insn_reservation "11_load2" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load2")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +(define_insn_reservation "11_store2" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "store2")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +;; Load/store multiple registers. Two registers are stored per cycle. +;; Actual timing depends on how many registers are affected, so we +;; optimistically schedule a low latency. +(define_insn_reservation "11_load34" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load3,load4")) + "l_a+e_1,l_dc1*2,l_dc2,l_wb") + +(define_insn_reservation "11_store34" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "store3,store4")) + "l_a+e_1,l_dc1*2,l_dc2,l_wb") + +;; A store can start immediately after an alu op, if that alu op does +;; not provide part of the address to access. +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_store1" + "arm_no_early_store_addr_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_store1" + "arm_no_early_store_addr_dep") + +;; An alu op can start sooner after a load, if that alu op does not +;; have an early register dependency on the load +(define_bypass 2 "11_load1" + "11_alu_op") +(define_bypass 2 "11_load1" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 2 "11_load1" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +(define_bypass 3 "11_loadb" + "11_alu_op") +(define_bypass 3 "11_loadb" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "11_loadb" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +;; A mul op can start sooner after a load, if that mul op does not +;; have an early multiply dependency +(define_bypass 2 "11_load1" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_load34" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_loadb" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") + +;; A store can start sooner after a load, if that load does not +;; produce part of the address to access +(define_bypass 2 "11_load1" + "11_store1" + "arm_no_early_store_addr_dep") +(define_bypass 3 "11_loadb" + "11_store1" + "arm_no_early_store_addr_dep") diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md new file mode 100644 index 000000000..d3908f9e3 --- /dev/null +++ b/gcc/config/arm/arm926ejs.md @@ -0,0 +1,187 @@ +;; ARM 926EJ-S Pipeline Description +;; Copyright (C) 2003, 2007 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 926EJ-S core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm926ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +(define_cpu_unit "e,m,w" "arm926ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "9_alu_op" 1 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "alu,alu_shift")) + "e,m,w") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the execute stage. +(define_insn_reservation "9_alu_shift_reg_op" 2 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "alu_shift_reg")) + "e*2,m,w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. Multiply operations occur in both the execute and memory +;; stages of the pipeline + +(define_insn_reservation "9_mult1" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "insn" "smlalxy,mul,mla")) + "e*2,m,w") + +(define_insn_reservation "9_mult2" 4 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "insn" "muls,mlas")) + "e*3,m,w") + +(define_insn_reservation "9_mult3" 4 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "insn" "umull,umlal,smull,smlal")) + "e*3,m,w") + +(define_insn_reservation "9_mult4" 5 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "insn" "umulls,umlals,smulls,smlals")) + "e*4,m,w") + +(define_insn_reservation "9_mult5" 2 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "insn" "smulxy,smlaxy,smlawx")) + "e,m,w") + +(define_insn_reservation "9_mult6" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "insn" "smlalxy")) + "e*2,m,w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; Loads with a shifted offset take 3 cycles, and are (a) probably the +;; most common and (b) the pessimistic assumption will lead to fewer stalls. +(define_insn_reservation "9_load1_op" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load1,load_byte")) + "e*2,m,w") + +(define_insn_reservation "9_store1_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store1")) + "e,m,w") + +;; multiple word loads and stores +(define_insn_reservation "9_load2_op" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load2")) + "e,m*2,w") + +(define_insn_reservation "9_load3_op" 4 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load3")) + "e,m*3,w") + +(define_insn_reservation "9_load4_op" 5 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load4")) + "e,m*4,w") + +(define_insn_reservation "9_store2_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store2")) + "e,m*2,w") + +(define_insn_reservation "9_store3_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store3")) + "e,m*3,w") + +(define_insn_reservation "9_store4_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store4")) + "e,m*4,w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The ARM +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "9_branch_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "branch")) + "nothing") + +;; The latency for a call is not predictable. Therefore, we use 32 as +;; roughly equivalent to positive infinity. + +(define_insn_reservation "9_call_op" 32 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "call")) + "nothing") diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h new file mode 100644 index 000000000..9cba0a90a --- /dev/null +++ b/gcc/config/arm/arm_neon.h @@ -0,0 +1,12176 @@ +/* ARM NEON intrinsics include file. This file is generated automatically + using neon-gen.ml. Please do not edit manually. + + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_ARM_NEON_H +#define _GCC_ARM_NEON_H 1 + +#ifndef __ARM_NEON__ +#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h +#else + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_di int64x1_t; +typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_uhi uint16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_usi uint32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_udi uint64x1_t; +typedef __builtin_neon_qi int8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_hi int16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_si int32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_di int64x2_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_sf float32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_poly8 poly8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_poly16 poly16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_uqi uint8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16))); + +typedef float float32_t; +typedef __builtin_neon_poly8 poly8_t; +typedef __builtin_neon_poly16 poly16_t; + +typedef struct int8x8x2_t +{ + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t +{ + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t +{ + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t +{ + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t +{ + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t +{ + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t +{ + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t +{ + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t +{ + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t +{ + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t +{ + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t +{ + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t +{ + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t +{ + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t +{ + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t +{ + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float32x2x2_t +{ + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t +{ + float32x4_t val[2]; +} float32x4x2_t; + +typedef struct poly8x8x2_t +{ + poly8x8_t val[2]; +} poly8x8x2_t; + +typedef struct poly8x16x2_t +{ + poly8x16_t val[2]; +} poly8x16x2_t; + +typedef struct poly16x4x2_t +{ + poly16x4_t val[2]; +} poly16x4x2_t; + +typedef struct poly16x8x2_t +{ + poly16x8_t val[2]; +} poly16x8x2_t; + +typedef struct int8x8x3_t +{ + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t +{ + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t +{ + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t +{ + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t +{ + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t +{ + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t +{ + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t +{ + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t +{ + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t +{ + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t +{ + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t +{ + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t +{ + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t +{ + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t +{ + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t +{ + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float32x2x3_t +{ + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t +{ + float32x4_t val[3]; +} float32x4x3_t; + +typedef struct poly8x8x3_t +{ + poly8x8_t val[3]; +} poly8x8x3_t; + +typedef struct poly8x16x3_t +{ + poly8x16_t val[3]; +} poly8x16x3_t; + +typedef struct poly16x4x3_t +{ + poly16x4_t val[3]; +} poly16x4x3_t; + +typedef struct poly16x8x3_t +{ + poly16x8_t val[3]; +} poly16x8x3_t; + +typedef struct int8x8x4_t +{ + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t +{ + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t +{ + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t +{ + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t +{ + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t +{ + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t +{ + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t +{ + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t +{ + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t +{ + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t +{ + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t +{ + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t +{ + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t +{ + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t +{ + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t +{ + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float32x2x4_t +{ + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t +{ + float32x4_t val[4]; +} float32x4x4_t; + +typedef struct poly8x8x4_t +{ + poly8x8_t val[4]; +} poly8x8x4_t; + +typedef struct poly8x16x4_t +{ + poly8x16_t val[4]; +} poly8x16x4_t; + +typedef struct poly16x4x4_t +{ + poly16x4_t val[4]; +} poly16x4x4_t; + +typedef struct poly16x8x4_t +{ + poly16x8_t val[4]; +} poly16x8x4_t; + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vadd_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vadd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vaddq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vaddlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vaddlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vaddlv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddw_s8 (int16x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddw_s16 (int32x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddw_s32 (int64x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddw_u8 (uint16x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vaddwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddw_u16 (uint32x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vaddwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddw_u32 (uint64x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vaddwv2si ((int64x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrhadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrhadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrhadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrhaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrhaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrhaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqadddi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqadd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqadddi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqaddq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vaddhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vaddhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vaddhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vraddhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vraddhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vraddhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmul_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmul_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmul_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmulq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmulq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmulq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (poly8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 2); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vmullv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vmullv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlslv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlslv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlslv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vsub_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsub_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vsubq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vsublv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vsublv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vsublv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubw_s8 (int16x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubw_s16 (int32x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubw_s32 (int64x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubw_u8 (uint16x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vsubwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubw_u16 (uint32x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vsubwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubw_u32 (uint64x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vsubwv2si ((int64x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vhsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vhsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vhsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vhsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vhsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vhsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqsubdi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqsub_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqsubq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsubhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsubhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsubhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsubhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrsubhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrsubhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 2); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcage_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcageq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcale_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcagt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcagtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcalt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabd_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vabd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vabdv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vabd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vabdv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vabd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vabdv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabdq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabdq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vabdq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vabdv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vabdv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vabdv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabdl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vabdlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vabdlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabdl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vabdlv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vabav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vabav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vabav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vabav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vabav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vabav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vabalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vabalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vabalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmax_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmax_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmax_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmax_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmax_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmax_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmax_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmaxq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmaxq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmaxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmaxq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vmaxv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vmaxv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vmaxv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmin_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmin_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmin_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmin_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmin_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmin_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmin_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vminv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vminq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vminq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vminq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vminq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vminq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vminv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vminq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vminv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vminq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vminv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpadd_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpaddl_s8 (int8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpaddl_s16 (int16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpaddl_s32 (int32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpaddl_u8 (uint8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vpaddlv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpaddl_u16 (uint16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vpaddlv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpaddl_u32 (uint32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vpaddlv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpaddlq_s8 (int8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpaddlq_s16 (int16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpaddlq_s32 (int32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpaddlq_u8 (uint8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vpaddlv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpaddlq_u16 (uint16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vpaddlv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpaddlq_u32 (uint32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vpaddlv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadal_s8 (int16x4_t __a, int8x8_t __b) +{ + return (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadal_s16 (int32x2_t __a, int16x4_t __b) +{ + return (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpadal_s32 (int64x1_t __a, int32x2_t __b) +{ + return (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadal_u8 (uint16x4_t __a, uint8x8_t __b) +{ + return (uint16x4_t)__builtin_neon_vpadalv8qi ((int16x4_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadal_u16 (uint32x2_t __a, uint16x4_t __b) +{ + return (uint32x2_t)__builtin_neon_vpadalv4hi ((int32x2_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpadal_u32 (uint64x1_t __a, uint32x2_t __b) +{ + return (uint64x1_t)__builtin_neon_vpadalv2si ((int64x1_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpadalq_s8 (int16x8_t __a, int8x16_t __b) +{ + return (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpadalq_s16 (int32x4_t __a, int16x8_t __b) +{ + return (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpadalq_s32 (int64x2_t __a, int32x4_t __b) +{ + return (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpadalq_u8 (uint16x8_t __a, uint8x16_t __b) +{ + return (uint16x8_t)__builtin_neon_vpadalv16qi ((int16x8_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpadalq_u16 (uint32x4_t __a, uint16x8_t __b) +{ + return (uint32x4_t)__builtin_neon_vpadalv8hi ((int32x4_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpadalq_u32 (uint64x2_t __a, uint32x4_t __b) +{ + return (uint64x2_t)__builtin_neon_vpadalv4si ((int64x2_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmax_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmax_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmax_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmax_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmax_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vpmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmax_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vpmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmax_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vpmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmin_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmin_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmin_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmin_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmin_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vpminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmin_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vpminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmin_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vpminv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecps_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 3); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 3); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrts_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 3); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 3); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vshldi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vshldi (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqrshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqrshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqrshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqrshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqrshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshr_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshr_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshr_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshr_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshr_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshr_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshr_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshr_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshrq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshrq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshrq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshrq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshrq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshrq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshrq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshrq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshr_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshr_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshr_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshr_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshr_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshr_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshr_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshr_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrshrq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrshrq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrshrq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrshrq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrshrq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrshrq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrshrq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrshrq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqrshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshrun_n_s16 (int16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshrun_n_s32 (int32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshrun_n_s64 (int64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshrun_n_s16 (int16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 5); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshrun_n_s32 (int32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 5); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshrun_n_s64 (int64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 5); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshl_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshl_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshl_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshl_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshl_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshl_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshl_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshl_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshlq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshlq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshlq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshlq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshlq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshlq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshlq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshlq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshl_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshl_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshl_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshl_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vqshl_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshl_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshl_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshl_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshl_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshl_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshl_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshl_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vqshl_ndi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqshlq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqshlq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqshlq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqshlq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshlq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vqshl_nv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshlq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vqshl_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshlq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vqshl_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshlq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vqshl_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshlu_n_s8 (int8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshlu_n_s16 (int16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshlu_n_s32 (int32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshlu_n_s64 (int64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vqshlu_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshluq_n_s8 (int8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshluq_n_s16 (int16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshluq_n_s32 (int32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshluq_n_s64 (int64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshll_n_s8 (int8x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshll_n_s16 (int16x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshll_n_s32 (int32x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshll_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshll_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshll_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshll_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshll_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshll_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsri_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsli_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vabsv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vabsv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vabsv2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabs_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vabsv2sf (__a, 3); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vabsv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vabsv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vabsv4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabsq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vabsv4sf (__a, 3); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqabs_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqabs_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqabs_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vqabsv2si (__a, 1); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqabsq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqabsq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqabsq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vqabsv4si (__a, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vneg_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vnegv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vneg_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vnegv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vneg_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vnegv2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vneg_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vnegv2sf (__a, 3); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vnegq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vnegv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vnegq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vnegv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vnegq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vnegv4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vnegq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vnegv4sf (__a, 3); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqneg_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqneg_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqneg_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vqnegv2si (__a, 1); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqnegq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqnegq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqnegq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vqnegv4si (__a, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmvn_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmvn_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmvn_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vmvnv2si (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmvn_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmvn_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmvn_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmvn_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmvnq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmvnq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmvnq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vmvnv4si (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmvnq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmvnq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmvnq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmvnq_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcls_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vclsv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcls_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vclsv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcls_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vclsv2si (__a, 1); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclsq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vclsv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclsq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vclsv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclsq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vclsv4si (__a, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vclz_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vclzv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vclz_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vclzv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vclz_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vclzv2si (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclz_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclz_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclz_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclzq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vclzv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclzq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vclzv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclzq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vclzv4si (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclzq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclzq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclzq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcnt_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vcntv8qi (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcnt_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcnt_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcntq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vcntv16qi (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcntq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcntq_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecpe_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrecpev2sf (__a, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrecpe_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpeq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrecpev4sf (__a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrecpeq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrte_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsqrte_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vget_lane_s8 (int8x8_t __a, const int __b) +{ + return (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vget_lane_s16 (int16x4_t __a, const int __b) +{ + return (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vget_lane_s32 (int32x2_t __a, const int __b) +{ + return (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 3); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vget_lane_u8 (uint8x8_t __a, const int __b) +{ + return (uint8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vget_lane_u16 (uint16x4_t __a, const int __b) +{ + return (uint16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vget_lane_u32 (uint32x2_t __a, const int __b) +{ + return (uint32_t)__builtin_neon_vget_lanev2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return (poly8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 2); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return (poly16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 2); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) +{ + return (int64_t)__builtin_neon_vget_lanedi (__a, __b, 1); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) +{ + return (uint64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vgetq_lane_s8 (int8x16_t __a, const int __b) +{ + return (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vgetq_lane_s16 (int16x8_t __a, const int __b) +{ + return (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vgetq_lane_s32 (int32x4_t __a, const int __b) +{ + return (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) +{ + return (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 3); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vgetq_lane_u8 (uint8x16_t __a, const int __b) +{ + return (uint8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vgetq_lane_u16 (uint16x8_t __a, const int __b) +{ + return (uint16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vgetq_lane_u32 (uint32x4_t __a, const int __b) +{ + return (uint32_t)__builtin_neon_vget_lanev4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) +{ + return (poly8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 2); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) +{ + return (poly16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 2); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) +{ + return (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vgetq_lane_u64 (uint64x2_t __a, const int __b) +{ + return (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcreate_s8 (uint64_t __a) +{ + return (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcreate_s16 (uint64_t __a) +{ + return (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcreate_s32 (uint64_t __a) +{ + return (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vcreate_s64 (uint64_t __a) +{ + return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcreate_f32 (uint64_t __a) +{ + return (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcreate_u8 (uint64_t __a) +{ + return (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcreate_u16 (uint64_t __a) +{ + return (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcreate_u32 (uint64_t __a) +{ + return (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcreate_u64 (uint64_t __a) +{ + return (uint64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcreate_p8 (uint64_t __a) +{ + return (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vcreate_p16 (uint64_t __a) +{ + return (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_n_s8 (int8_t __a) +{ + return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_n_s16 (int16_t __a) +{ + return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_n_s32 (int32_t __a) +{ + return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_n_f32 (float32_t __a) +{ + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_n_u8 (uint8_t __a) +{ + return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_n_u16 (uint16_t __a) +{ + return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_n_u32 (uint32_t __a) +{ + return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_n_p8 (poly8_t __a) +{ + return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_n_p16 (poly16_t __a) +{ + return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_n_s64 (int64_t __a) +{ + return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_n_u64 (uint64_t __a) +{ + return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_n_s8 (int8_t __a) +{ + return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_n_s16 (int16_t __a) +{ + return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_n_s32 (int32_t __a) +{ + return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_n_f32 (float32_t __a) +{ + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_n_u8 (uint8_t __a) +{ + return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_n_u16 (uint16_t __a) +{ + return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_n_u32 (uint32_t __a) +{ + return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_n_p8 (poly8_t __a) +{ + return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_n_p16 (poly16_t __a) +{ + return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_n_s64 (int64_t __a) +{ + return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_n_u64 (uint64_t __a) +{ + return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmov_n_s8 (int8_t __a) +{ + return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmov_n_s16 (int16_t __a) +{ + return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmov_n_s32 (int32_t __a) +{ + return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmov_n_f32 (float32_t __a) +{ + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmov_n_u8 (uint8_t __a) +{ + return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmov_n_u16 (uint16_t __a) +{ + return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmov_n_u32 (uint32_t __a) +{ + return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmov_n_p8 (poly8_t __a) +{ + return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vmov_n_p16 (poly16_t __a) +{ + return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vmov_n_s64 (int64_t __a) +{ + return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vmov_n_u64 (uint64_t __a) +{ + return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmovq_n_s8 (int8_t __a) +{ + return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovq_n_s16 (int16_t __a) +{ + return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovq_n_s32 (int32_t __a) +{ + return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmovq_n_f32 (float32_t __a) +{ + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmovq_n_u8 (uint8_t __a) +{ + return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovq_n_u16 (uint16_t __a) +{ + return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovq_n_u32 (uint32_t __a) +{ + return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmovq_n_p8 (poly8_t __a) +{ + return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmovq_n_p16 (poly16_t __a) +{ + return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovq_n_s64 (int64_t __a) +{ + return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovq_n_u64 (uint64_t __a) +{ + return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_lane_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_lane_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_lane_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_lane_f32 (float32x2_t __a, const int __b) +{ + return (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_lane_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_lane_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_lane_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_lane_p8 (poly8x8_t __a, const int __b) +{ + return (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_lane_p16 (poly16x4_t __a, const int __b) +{ + return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_lane_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vdup_lanedi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_lane_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vdup_lanedi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_s8 (int8x8_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_s16 (int16x4_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_s32 (int32x2_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_f32 (float32x2_t __a, const int __b) +{ + return (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_p8 (poly8x8_t __a, const int __b) +{ + return (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_p16 (poly16x4_t __a, const int __b) +{ + return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_s64 (int64x1_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcombine_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcombine_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcombine_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x4_t)__builtin_neon_vcombinev2si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcombine_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x2_t)__builtin_neon_vcombinedi (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcombine_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcombine_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcombine_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcombine_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcombine_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x2_t)__builtin_neon_vcombinedi ((int64x1_t) __a, (int64x1_t) __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcombine_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vcombine_p16 (poly16x4_t __a, poly16x4_t __b) +{ + return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t __a) +{ + return (int8x8_t)__builtin_neon_vget_highv16qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t __a) +{ + return (int16x4_t)__builtin_neon_vget_highv8hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t __a) +{ + return (int32x2_t)__builtin_neon_vget_highv4si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t __a) +{ + return (int64x1_t)__builtin_neon_vget_highv2di (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t __a) +{ + return (float32x2_t)__builtin_neon_vget_highv4sf (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t __a) +{ + return (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t __a) +{ + return (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_low_s8 (int8x16_t __a) +{ + return (int8x8_t)__builtin_neon_vget_lowv16qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_low_s16 (int16x8_t __a) +{ + return (int16x4_t)__builtin_neon_vget_lowv8hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_low_s32 (int32x4_t __a) +{ + return (int32x2_t)__builtin_neon_vget_lowv4si (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_low_f32 (float32x4_t __a) +{ + return (float32x2_t)__builtin_neon_vget_lowv4sf (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_low_u8 (uint8x16_t __a) +{ + return (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_low_u16 (uint16x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_low_u32 (uint32x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_low_p8 (poly8x16_t __a) +{ + return (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_low_p16 (poly16x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_low_s64 (int64x2_t __a) +{ + return (int64x1_t)__builtin_neon_vget_lowv2di (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_low_u64 (uint64x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_s32_f32 (float32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_s32 (int32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vcvtv2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_u32 (uint32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vcvtv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvt_u32_f32 (float32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_s32_f32 (float32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_s32 (int32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_u32 (uint32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_u32_f32 (float32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_n_s32_f32 (float32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_n_f32_s32 (int32x2_t __a, const int __b) +{ + return (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_n_f32_u32 (uint32x2_t __a, const int __b) +{ + return (float32x2_t)__builtin_neon_vcvt_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvt_n_u32_f32 (float32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_s32_f32 (float32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_f32_s32 (int32x4_t __a, const int __b) +{ + return (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_f32_u32 (uint32x4_t __a, const int __b) +{ + return (float32x4_t)__builtin_neon_vcvt_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_u32_f32 (float32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmovn_s16 (int16x8_t __a) +{ + return (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmovn_s32 (int32x4_t __a) +{ + return (int16x4_t)__builtin_neon_vmovnv4si (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmovn_s64 (int64x2_t __a) +{ + return (int32x2_t)__builtin_neon_vmovnv2di (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmovn_u16 (uint16x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmovn_u32 (uint32x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmovn_u64 (uint64x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqmovn_s16 (int16x8_t __a) +{ + return (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqmovn_s32 (int32x4_t __a) +{ + return (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqmovn_s64 (int64x2_t __a) +{ + return (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqmovn_u16 (uint16x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vqmovnv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqmovn_u32 (uint32x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vqmovnv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqmovn_u64 (uint64x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vqmovnv2di ((int64x2_t) __a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqmovun_s16 (int16x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqmovun_s32 (int32x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqmovun_s64 (int64x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovl_s8 (int8x8_t __a) +{ + return (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovl_s16 (int16x4_t __a) +{ + return (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovl_s32 (int32x2_t __a) +{ + return (int64x2_t)__builtin_neon_vmovlv2si (__a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovl_u8 (uint8x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vmovlv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovl_u16 (uint16x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vmovlv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovl_u32 (uint32x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vmovlv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl1_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl1_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl1_p8 (poly8x8_t __a, uint8x8_t __b) +{ + return (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl2_s8 (int8x8x2_t __a, int8x8_t __b) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; + return (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; + return (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; + return (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl3_s8 (int8x8x3_t __a, int8x8_t __b) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; + return (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; + return (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; + return (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl4_s8 (int8x8x4_t __a, int8x8_t __b) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; + return (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; + return (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; + return (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) +{ + return (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + return (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + return (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + return (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + return (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + return (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + return (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + return (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + return (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + return (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d) +{ + return (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d) +{ + return (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmlal_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint64x2_t)__builtin_neon_vmlal_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d) +{ + return (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d) +{ + return (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmlsl_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint64x2_t)__builtin_neon_vmlsl_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vmull_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vmull_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_n_f32 (float32x2_t __a, float32_t __b) +{ + return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_n_u16 (uint16x4_t __a, uint16_t __b) +{ + return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_n_u32 (uint32x2_t __a, uint32_t __b) +{ + return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_n_s16 (int16x8_t __a, int16_t __b) +{ + return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_n_s32 (int32x4_t __a, int32_t __b) +{ + return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_n_f32 (float32x4_t __a, float32_t __b) +{ + return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_n_u16 (uint16x8_t __a, uint16_t __b) +{ + return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_n_u32 (uint32x4_t __a, uint32_t __b) +{ + return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int32x4_t)__builtin_neon_vmull_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int64x2_t)__builtin_neon_vmull_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_n_u16 (uint16x4_t __a, uint16_t __b) +{ + return (uint32x4_t)__builtin_neon_vmull_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_n_u32 (uint32x2_t __a, uint32_t __b) +{ + return (uint64x2_t)__builtin_neon_vmull_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) +{ + return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) +{ + return (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) +{ + return (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) +{ + return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) +{ + return (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) +{ + return (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlal_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlal_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) +{ + return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) +{ + return (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) +{ + return (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) +{ + return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) +{ + return (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) +{ + return (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlsl_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlsl_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vext_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vext_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vext_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vext_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vextdi (__a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vext_f32 (float32x2_t __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vextdi ((int64x1_t) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev64_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vrev64v8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev64_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vrev64v4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrev64_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vrev64v2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrev64_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrev64v2sf (__a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev64_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev64_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrev64_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vrev64v2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev64_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev64_p16 (poly16x4_t __a) +{ + return (poly16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev64q_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vrev64v16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev64q_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vrev64v8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrev64q_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vrev64v4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrev64q_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrev64v4sf (__a, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev64q_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev64q_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrev64q_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vrev64v4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev64q_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev64q_p16 (poly16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev32_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vrev32v8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev32_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vrev32v4hi (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev32_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev32_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev32_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev32_p16 (poly16x4_t __a) +{ + return (poly16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev32q_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vrev32v16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev32q_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vrev32v8hi (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev32q_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev32q_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev32q_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev32q_p16 (poly16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev16_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vrev16v8qi (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev16_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev16_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev16q_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vrev16v16qi (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev16q_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev16q_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) +{ + return (int64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) +{ + return (uint64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) +{ + return (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) +{ + return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) +{ + return (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) +{ + return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vtrn_s8 (int8x8_t __a, int8x8_t __b) +{ + int8x8x2_t __rv; + __builtin_neon_vtrnv8qi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vtrn_s16 (int16x4_t __a, int16x4_t __b) +{ + int16x4x2_t __rv; + __builtin_neon_vtrnv4hi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vtrn_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __builtin_neon_vtrnv2si (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vtrn_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __builtin_neon_vtrnv2sf (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vtrn_u8 (uint8x8_t __a, uint8x8_t __b) +{ + uint8x8x2_t __rv; + __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + return __rv; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vtrn_u16 (uint16x4_t __a, uint16x4_t __b) +{ + uint16x4x2_t __rv; + __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vtrn_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __builtin_neon_vtrnv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b); + return __rv; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vtrn_p8 (poly8x8_t __a, poly8x8_t __b) +{ + poly8x8x2_t __rv; + __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + return __rv; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vtrn_p16 (poly16x4_t __a, poly16x4_t __b) +{ + poly16x4x2_t __rv; + __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + return __rv; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_s8 (int8x16_t __a, int8x16_t __b) +{ + int8x16x2_t __rv; + __builtin_neon_vtrnv16qi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_s16 (int16x8_t __a, int16x8_t __b) +{ + int16x8x2_t __rv; + __builtin_neon_vtrnv8hi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_s32 (int32x4_t __a, int32x4_t __b) +{ + int32x4x2_t __rv; + __builtin_neon_vtrnv4si (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_f32 (float32x4_t __a, float32x4_t __b) +{ + float32x4x2_t __rv; + __builtin_neon_vtrnv4sf (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + uint8x16x2_t __rv; + __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + return __rv; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + uint16x8x2_t __rv; + __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + return __rv; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + uint32x4x2_t __rv; + __builtin_neon_vtrnv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b); + return __rv; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + poly8x16x2_t __rv; + __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + return __rv; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + poly16x8x2_t __rv; + __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + return __rv; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vzip_s8 (int8x8_t __a, int8x8_t __b) +{ + int8x8x2_t __rv; + __builtin_neon_vzipv8qi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vzip_s16 (int16x4_t __a, int16x4_t __b) +{ + int16x4x2_t __rv; + __builtin_neon_vzipv4hi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vzip_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __builtin_neon_vzipv2si (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vzip_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __builtin_neon_vzipv2sf (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vzip_u8 (uint8x8_t __a, uint8x8_t __b) +{ + uint8x8x2_t __rv; + __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + return __rv; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vzip_u16 (uint16x4_t __a, uint16x4_t __b) +{ + uint16x4x2_t __rv; + __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vzip_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __builtin_neon_vzipv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b); + return __rv; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vzip_p8 (poly8x8_t __a, poly8x8_t __b) +{ + poly8x8x2_t __rv; + __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + return __rv; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vzip_p16 (poly16x4_t __a, poly16x4_t __b) +{ + poly16x4x2_t __rv; + __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + return __rv; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vzipq_s8 (int8x16_t __a, int8x16_t __b) +{ + int8x16x2_t __rv; + __builtin_neon_vzipv16qi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vzipq_s16 (int16x8_t __a, int16x8_t __b) +{ + int16x8x2_t __rv; + __builtin_neon_vzipv8hi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vzipq_s32 (int32x4_t __a, int32x4_t __b) +{ + int32x4x2_t __rv; + __builtin_neon_vzipv4si (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vzipq_f32 (float32x4_t __a, float32x4_t __b) +{ + float32x4x2_t __rv; + __builtin_neon_vzipv4sf (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vzipq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + uint8x16x2_t __rv; + __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + return __rv; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vzipq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + uint16x8x2_t __rv; + __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + return __rv; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vzipq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + uint32x4x2_t __rv; + __builtin_neon_vzipv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b); + return __rv; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vzipq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + poly8x16x2_t __rv; + __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + return __rv; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vzipq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + poly16x8x2_t __rv; + __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + return __rv; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vuzp_s8 (int8x8_t __a, int8x8_t __b) +{ + int8x8x2_t __rv; + __builtin_neon_vuzpv8qi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vuzp_s16 (int16x4_t __a, int16x4_t __b) +{ + int16x4x2_t __rv; + __builtin_neon_vuzpv4hi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vuzp_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __builtin_neon_vuzpv2si (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vuzp_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __builtin_neon_vuzpv2sf (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vuzp_u8 (uint8x8_t __a, uint8x8_t __b) +{ + uint8x8x2_t __rv; + __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + return __rv; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vuzp_u16 (uint16x4_t __a, uint16x4_t __b) +{ + uint16x4x2_t __rv; + __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vuzp_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __builtin_neon_vuzpv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b); + return __rv; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vuzp_p8 (poly8x8_t __a, poly8x8_t __b) +{ + poly8x8x2_t __rv; + __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b); + return __rv; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vuzp_p16 (poly16x4_t __a, poly16x4_t __b) +{ + poly16x4x2_t __rv; + __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b); + return __rv; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vuzpq_s8 (int8x16_t __a, int8x16_t __b) +{ + int8x16x2_t __rv; + __builtin_neon_vuzpv16qi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_s16 (int16x8_t __a, int16x8_t __b) +{ + int16x8x2_t __rv; + __builtin_neon_vuzpv8hi (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vuzpq_s32 (int32x4_t __a, int32x4_t __b) +{ + int32x4x2_t __rv; + __builtin_neon_vuzpv4si (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vuzpq_f32 (float32x4_t __a, float32x4_t __b) +{ + float32x4x2_t __rv; + __builtin_neon_vuzpv4sf (&__rv.val[0], __a, __b); + return __rv; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + uint8x16x2_t __rv; + __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + return __rv; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + uint16x8x2_t __rv; + __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + return __rv; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + uint32x4x2_t __rv; + __builtin_neon_vuzpv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b); + return __rv; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + poly8x16x2_t __rv; + __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b); + return __rv; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + poly16x8x2_t __rv; + __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b); + return __rv; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_s8 (const int8_t * __a) +{ + return (int8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_s16 (const int16_t * __a) +{ + return (int16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_s32 (const int32_t * __a) +{ + return (int32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_s64 (const int64_t * __a) +{ + return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_f32 (const float32_t * __a) +{ + return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_u8 (const uint8_t * __a) +{ + return (uint8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_u16 (const uint16_t * __a) +{ + return (uint16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_u32 (const uint32_t * __a) +{ + return (uint32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_u64 (const uint64_t * __a) +{ + return (uint64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_p8 (const poly8_t * __a) +{ + return (poly8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_p16 (const poly16_t * __a) +{ + return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_s8 (const int8_t * __a) +{ + return (int8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_s16 (const int16_t * __a) +{ + return (int16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_s32 (const int32_t * __a) +{ + return (int32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_s64 (const int64_t * __a) +{ + return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_f32 (const float32_t * __a) +{ + return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_u8 (const uint8_t * __a) +{ + return (uint8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_u16 (const uint16_t * __a) +{ + return (uint16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_u32 (const uint32_t * __a) +{ + return (uint32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_u64 (const uint64_t * __a) +{ + return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_p8 (const poly8_t * __a) +{ + return (poly8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_p16 (const poly16_t * __a) +{ + return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_lane_s16 (const int16_t * __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_dup_s8 (const int8_t * __a) +{ + return (int8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_dup_s16 (const int16_t * __a) +{ + return (int16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_dup_s32 (const int32_t * __a) +{ + return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_dup_f32 (const float32_t * __a) +{ + return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_dup_u8 (const uint8_t * __a) +{ + return (uint8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_dup_u16 (const uint16_t * __a) +{ + return (uint16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_dup_u32 (const uint32_t * __a) +{ + return (uint32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_dup_p8 (const poly8_t * __a) +{ + return (poly8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_dup_p16 (const poly16_t * __a) +{ + return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_dup_s64 (const int64_t * __a) +{ + return (int64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_dup_u64 (const uint64_t * __a) +{ + return (uint64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_s8 (const int8_t * __a) +{ + return (int8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_s16 (const int16_t * __a) +{ + return (int16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_s32 (const int32_t * __a) +{ + return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_f32 (const float32_t * __a) +{ + return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_u8 (const uint8_t * __a) +{ + return (uint8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_u16 (const uint16_t * __a) +{ + return (uint16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_u32 (const uint32_t * __a) +{ + return (uint32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_p8 (const poly8_t * __a) +{ + return (poly8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_p16 (const poly16_t * __a) +{ + return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_s64 (const int64_t * __a) +{ + return (int64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_u64 (const uint64_t * __a) +{ + return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s8 (int8_t * __a, int8x8_t __b) +{ + __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s16 (int16_t * __a, int16x4_t __b) +{ + __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s32 (int32_t * __a, int32x2_t __b) +{ + __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s64 (int64_t * __a, int64x1_t __b) +{ + __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f32 (float32_t * __a, float32x2_t __b) +{ + __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u8 (uint8_t * __a, uint8x8_t __b) +{ + __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u16 (uint16_t * __a, uint16x4_t __b) +{ + __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u32 (uint32_t * __a, uint32x2_t __b) +{ + __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, (int32x2_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u64 (uint64_t * __a, uint64x1_t __b) +{ + __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p8 (poly8_t * __a, poly8x8_t __b) +{ + __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p16 (poly16_t * __a, poly16x4_t __b) +{ + __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s8 (int8_t * __a, int8x16_t __b) +{ + __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s16 (int16_t * __a, int16x8_t __b) +{ + __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s32 (int32_t * __a, int32x4_t __b) +{ + __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s64 (int64_t * __a, int64x2_t __b) +{ + __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f32 (float32_t * __a, float32x4_t __b) +{ + __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u8 (uint8_t * __a, uint8x16_t __b) +{ + __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u16 (uint16_t * __a, uint16x8_t __b) +{ + __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u32 (uint32_t * __a, uint32x4_t __b) +{ + __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, (int32x4_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u64 (uint64_t * __a, uint64x2_t __b) +{ + __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p8 (poly8_t * __a, poly8x16_t __b) +{ + __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p16 (poly16_t * __a, poly16x8_t __b) +{ + __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c) +{ + __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c) +{ + __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c) +{ + __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c) +{ + __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c) +{ + __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_s8 (const int8_t * __a) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_s16 (const int16_t * __a) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_s32 (const int32_t * __a) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_f32 (const float32_t * __a) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_u8 (const uint8_t * __a) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_u16 (const uint16_t * __a) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_u32 (const uint32_t * __a) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_p8 (const poly8_t * __a) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_p16 (const poly16_t * __a) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) +vld2_s64 (const int64_t * __a) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) +vld2_u64 (const uint64_t * __a) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vld2q_s8 (const int8_t * __a) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vld2q_s16 (const int16_t * __a) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vld2q_s32 (const int32_t * __a) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vld2q_f32 (const float32_t * __a) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vld2q_u8 (const uint8_t * __a) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vld2q_u16 (const uint16_t * __a) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vld2q_u32 (const uint32_t * __a) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vld2q_p8 (const poly8_t * __a) +{ + union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vld2q_p16 (const poly16_t * __a) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_dup_s8 (const int8_t * __a) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_s16 (const int16_t * __a) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_dup_s32 (const int32_t * __a) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_dup_f32 (const float32_t * __a) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_dup_u8 (const uint8_t * __a) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_u16 (const uint16_t * __a) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_dup_u32 (const uint32_t * __a) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_dup_p8 (const poly8_t * __a) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_p16 (const poly16_t * __a) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) +vld2_dup_s64 (const int64_t * __a) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) +vld2_dup_u64 (const uint64_t * __a) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s8 (int8_t * __a, int8x8x2_t __b) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s16 (int16_t * __a, int16x4x2_t __b) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s32 (int32_t * __a, int32x2x2_t __b) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_f32 (float32_t * __a, float32x2x2_t __b) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u8 (uint8_t * __a, uint8x8x2_t __b) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u16 (uint16_t * __a, uint16x4x2_t __b) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u32 (uint32_t * __a, uint32x2x2_t __b) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p8 (poly8_t * __a, poly8x8x2_t __b) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p16 (poly16_t * __a, poly16x4x2_t __b) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s64 (int64_t * __a, int64x1x2_t __b) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u64 (uint64_t * __a, uint64x1x2_t __b) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s8 (int8_t * __a, int8x16x2_t __b) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s16 (int16_t * __a, int16x8x2_t __b) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s32 (int32_t * __a, int32x4x2_t __b) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_f32 (float32_t * __a, float32x4x2_t __b) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u8 (uint8_t * __a, uint8x16x2_t __b) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u16 (uint16_t * __a, uint16x8x2_t __b) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u32 (uint32_t * __a, uint32x4x2_t __b) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_p8 (poly8_t * __a, poly8x16x2_t __b) +{ + union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_p16 (poly16_t * __a, poly16x8x2_t __b) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_s8 (const int8_t * __a) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_s16 (const int16_t * __a) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_s32 (const int32_t * __a) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_f32 (const float32_t * __a) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_u8 (const uint8_t * __a) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_u16 (const uint16_t * __a) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_u32 (const uint32_t * __a) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_p8 (const poly8_t * __a) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_p16 (const poly16_t * __a) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) +vld3_s64 (const int64_t * __a) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) +vld3_u64 (const uint64_t * __a) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) +vld3q_s8 (const int8_t * __a) +{ + union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) +vld3q_s16 (const int16_t * __a) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) +vld3q_s32 (const int32_t * __a) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) +vld3q_f32 (const float32_t * __a) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) +vld3q_u8 (const uint8_t * __a) +{ + union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) +vld3q_u16 (const uint16_t * __a) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) +vld3q_u32 (const uint32_t * __a) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) +vld3q_p8 (const poly8_t * __a) +{ + union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) +vld3q_p16 (const poly16_t * __a) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) +vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) +vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) +vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_dup_s8 (const int8_t * __a) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_s16 (const int16_t * __a) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_dup_s32 (const int32_t * __a) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_dup_f32 (const float32_t * __a) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_dup_u8 (const uint8_t * __a) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_u16 (const uint16_t * __a) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_dup_u32 (const uint32_t * __a) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_dup_p8 (const poly8_t * __a) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_p16 (const poly16_t * __a) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) +vld3_dup_s64 (const int64_t * __a) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) +vld3_dup_u64 (const uint64_t * __a) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s8 (int8_t * __a, int8x8x3_t __b) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s16 (int16_t * __a, int16x4x3_t __b) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s32 (int32_t * __a, int32x2x3_t __b) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_f32 (float32_t * __a, float32x2x3_t __b) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u8 (uint8_t * __a, uint8x8x3_t __b) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u16 (uint16_t * __a, uint16x4x3_t __b) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u32 (uint32_t * __a, uint32x2x3_t __b) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p8 (poly8_t * __a, poly8x8x3_t __b) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p16 (poly16_t * __a, poly16x4x3_t __b) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s64 (int64_t * __a, int64x1x3_t __b) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u64 (uint64_t * __a, uint64x1x3_t __b) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s8 (int8_t * __a, int8x16x3_t __b) +{ + union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s16 (int16_t * __a, int16x8x3_t __b) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s32 (int32_t * __a, int32x4x3_t __b) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_f32 (float32_t * __a, float32x4x3_t __b) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u8 (uint8_t * __a, uint8x16x3_t __b) +{ + union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u16 (uint16_t * __a, uint16x8x3_t __b) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u32 (uint32_t * __a, uint32x4x3_t __b) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_p8 (poly8_t * __a, poly8x16x3_t __b) +{ + union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_p16 (poly16_t * __a, poly16x8x3_t __b) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_s8 (const int8_t * __a) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_s16 (const int16_t * __a) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_s32 (const int32_t * __a) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_f32 (const float32_t * __a) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_u8 (const uint8_t * __a) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_u16 (const uint16_t * __a) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_u32 (const uint32_t * __a) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_p8 (const poly8_t * __a) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_p16 (const poly16_t * __a) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) +vld4_s64 (const int64_t * __a) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) +vld4_u64 (const uint64_t * __a) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) +vld4q_s8 (const int8_t * __a) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) +vld4q_s16 (const int16_t * __a) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) +vld4q_s32 (const int32_t * __a) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) +vld4q_f32 (const float32_t * __a) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) +vld4q_u8 (const uint8_t * __a) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) +vld4q_u16 (const uint16_t * __a) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) +vld4q_u32 (const uint32_t * __a) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) +vld4q_p8 (const poly8_t * __a) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) +vld4q_p16 (const poly16_t * __a) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) +vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) +vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) +vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_dup_s8 (const int8_t * __a) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_s16 (const int16_t * __a) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_dup_s32 (const int32_t * __a) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_dup_f32 (const float32_t * __a) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_dup_u8 (const uint8_t * __a) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_u16 (const uint16_t * __a) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_dup_u32 (const uint32_t * __a) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_dup_p8 (const poly8_t * __a) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_p16 (const poly16_t * __a) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) +vld4_dup_s64 (const int64_t * __a) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) +vld4_dup_u64 (const uint64_t * __a) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s8 (int8_t * __a, int8x8x4_t __b) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s16 (int16_t * __a, int16x4x4_t __b) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s32 (int32_t * __a, int32x2x4_t __b) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_f32 (float32_t * __a, float32x2x4_t __b) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u8 (uint8_t * __a, uint8x8x4_t __b) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u16 (uint16_t * __a, uint16x4x4_t __b) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u32 (uint32_t * __a, uint32x2x4_t __b) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p8 (poly8_t * __a, poly8x8x4_t __b) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p16 (poly16_t * __a, poly16x4x4_t __b) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s64 (int64_t * __a, int64x1x4_t __b) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u64 (uint64_t * __a, uint64x1x4_t __b) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s8 (int8_t * __a, int8x16x4_t __b) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s16 (int16_t * __a, int16x8x4_t __b) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s32 (int32_t * __a, int32x4x4_t __b) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_f32 (float32_t * __a, float32x4x4_t __b) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u8 (uint8_t * __a, uint8x16x4_t __b) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u16 (uint16_t * __a, uint16x8x4_t __b) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u32 (uint32_t * __a, uint32x4x4_t __b) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_p8 (poly8_t * __a, poly8x16x4_t __b) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_p16 (poly16_t * __a, poly16x8x4_t __b) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vand_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vand_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vand_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vand_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vand_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vand_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vand_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vand_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vandq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vandq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vandq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vandq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vandq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vandq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vandq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vandq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vorr_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vorr_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vorr_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vorr_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vorr_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vorr_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vorr_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vorr_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vorrq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vorrq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vorrq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vorrq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vorrq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vorrq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vorrq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vorrq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +veor_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +veor_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +veor_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +veor_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +veor_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +veor_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +veor_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_veordi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +veor_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +veorq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +veorq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +veorq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +veorq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +veorq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +veorq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +veorq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +veorq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vbic_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vbic_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vbic_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vbic_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vbic_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vbic_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vbic_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vbic_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vbicq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vbicq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vbicq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vbicq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vbicq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vbicq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vbicq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vbicq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vorn_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vorn_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vorn_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vorn_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vorn_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vorn_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vorn_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vorn_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vornq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vornq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vornq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vornq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vornq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vornq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vornq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vornq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s8 (int8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s16 (int16x4_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s32 (int32x2_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s64 (int64x1_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_f32 (float32x2_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u8 (uint8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u16 (uint16x4_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u32 (uint32x2_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u64 (uint64x1_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_p16 (poly16x4_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s8 (int8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s16 (int16x8_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s32 (int32x4_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s64 (int64x2_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_f32 (float32x4_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u8 (uint8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u16 (uint16x8_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u32 (uint32x4_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u64 (uint64x2_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p16 (poly16x8_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s8 (int8x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s16 (int16x4_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s32 (int32x2_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s64 (int64x1_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_f32 (float32x2_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u8 (uint8x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u16 (uint16x4_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u32 (uint32x2_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u64 (uint64x1_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_p8 (poly8x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s8 (int8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s16 (int16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s32 (int32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s64 (int64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_f32 (float32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u8 (uint8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u16 (uint16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u32 (uint32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u64 (uint64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p8 (poly8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s8 (int8x8_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s16 (int16x4_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s32 (int32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s64 (int64x1_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u8 (uint8x8_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u16 (uint16x4_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u32 (uint32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u64 (uint64x1_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi ((int64x1_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p8 (poly8x8_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p16 (poly16x4_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s8 (int8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s16 (int16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s32 (int32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s64 (int64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u8 (uint8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u16 (uint16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u32 (uint32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u64 (uint64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p8 (poly8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p16 (poly16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s8 (int8x8_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s16 (int16x4_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s32 (int32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_f32 (float32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u8 (uint8x8_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u16 (uint16x4_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u32 (uint32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u64 (uint64x1_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p8 (poly8x8_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p16 (poly16x4_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s8 (int8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s16 (int16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s32 (int32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_f32 (float32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u8 (uint8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u16 (uint16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u32 (uint32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u64 (uint64x2_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p8 (poly8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p16 (poly16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s8 (int8x8_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s16 (int16x4_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s32 (int32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv2si (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s64 (int64x1_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_f32 (float32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u8 (uint8x8_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u16 (uint16x4_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u32 (uint32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p8 (poly8x8_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p16 (poly16x4_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s8 (int8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s16 (int16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s32 (int32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s64 (int64x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_f32 (float32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u8 (uint8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u16 (uint16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u32 (uint32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p8 (poly8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p16 (poly16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s16 (int16x4_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s32 (int32x2_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s64 (int64x1_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_f32 (float32x2_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u8 (uint8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u16 (uint16x4_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u32 (uint32x2_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u64 (uint64x1_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p8 (poly8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p16 (poly16x4_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s16 (int16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s32 (int32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s64 (int64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_f32 (float32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u8 (uint8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u16 (uint16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u32 (uint32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u64 (uint64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p8 (poly8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p16 (poly16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s8 (int8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s32 (int32x2_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s64 (int64x1_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_f32 (float32x2_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u8 (uint8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u16 (uint16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u32 (uint32x2_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u64 (uint64x1_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p8 (poly8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p16 (poly16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s8 (int8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s32 (int32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s64 (int64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_f32 (float32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u8 (uint8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u16 (uint16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u32 (uint32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u64 (uint64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p8 (poly8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p16 (poly16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s8 (int8x8_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s16 (int16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s64 (int64x1_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_f32 (float32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u8 (uint8x8_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u16 (uint16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u32 (uint32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u64 (uint64x1_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p8 (poly8x8_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p16 (poly16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s8 (int8x16_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s16 (int16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s64 (int64x2_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_f32 (float32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u8 (uint8x16_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u16 (uint16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u32 (uint32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u64 (uint64x2_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p8 (poly8x16_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p16 (poly16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s8 (int8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s16 (int16x4_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s32 (int32x2_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s64 (int64x1_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_f32 (float32x2_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u16 (uint16x4_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u32 (uint32x2_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u64 (uint64x1_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p8 (poly8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p16 (poly16x4_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s8 (int8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s16 (int16x8_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s32 (int32x4_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s64 (int64x2_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_f32 (float32x4_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u16 (uint16x8_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u32 (uint32x4_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u64 (uint64x2_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p8 (poly8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p16 (poly16x8_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s8 (int8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s16 (int16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s32 (int32x2_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s64 (int64x1_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_f32 (float32x2_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u8 (uint8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u32 (uint32x2_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u64 (uint64x1_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p8 (poly8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p16 (poly16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s8 (int8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s16 (int16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s32 (int32x4_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s64 (int64x2_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_f32 (float32x4_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u8 (uint8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u32 (uint32x4_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u64 (uint64x2_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p8 (poly8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p16 (poly16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s8 (int8x8_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s16 (int16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s32 (int32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s64 (int64x1_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_f32 (float32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u8 (uint8x8_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u16 (uint16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u64 (uint64x1_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p8 (poly8x8_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p16 (poly16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s8 (int8x16_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s16 (int16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s32 (int32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s64 (int64x2_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_f32 (float32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u8 (uint8x16_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u16 (uint16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u64 (uint64x2_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p8 (poly8x16_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p16 (poly16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +#ifdef __cplusplus +} +#endif +#endif +#endif diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S new file mode 100644 index 000000000..4ecea6da5 --- /dev/null +++ b/gcc/config/arm/bpabi-v6m.S @@ -0,0 +1,318 @@ +/* Miscellaneous BPABI functions. ARMv6M implementation + + Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifdef L_aeabi_lcmp + +FUNC_START aeabi_lcmp + cmp xxh, yyh + beq 1f + bgt 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 + RET +1: + sub r0, xxl, yyl + beq 1f + bhi 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 +1: + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +FUNC_START aeabi_ulcmp + cmp xxh, yyh + bne 1f + sub r0, xxl, yyl + beq 2f +1: + bcs 1f + mov r0, #1 + neg r0, r0 + RET +1: + mov r0, #1 +2: + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +.macro test_div_by_zero signed + cmp yyh, #0 + bne 7f + cmp yyl, #0 + bne 7f + cmp xxh, #0 + bne 2f + cmp xxl, #0 +2: + .ifc \signed, unsigned + beq 3f + mov xxh, #0 + mvn xxh, xxh @ 0xffffffff + mov xxl, xxh +3: + .else + beq 5f + blt 6f + mov xxl, #0 + mvn xxl, xxl @ 0xffffffff + lsr xxh, xxl, #1 @ 0x7fffffff + b 5f +6: mov xxh, #0x80 + lsl xxh, xxh, #24 @ 0x80000000 + mov xxl, #0 +5: + .endif + @ tailcalls are tricky on v6-m. + push {r0, r1, r2} + ldr r0, 1f + adr r1, 1f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +1: + .word __aeabi_ldiv0 - 1b +7: +.endm + +#ifdef L_aeabi_ldivmod + +FUNC_START aeabi_ldivmod + test_div_by_zero signed + + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_ldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_ldivmod + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_uldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_uldivmod + +#endif /* L_aeabi_uldivmod */ + +#ifdef L_arm_addsubsf3 + +FUNC_START aeabi_frsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor r0, r0, r4 + bl __aeabi_fadd + pop {r4, pc} + + FUNC_END aeabi_frsub + +#endif /* L_arm_addsubsf3 */ + +#ifdef L_arm_cmpsf2 + +FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +FUNC_START aeabi_cfcmpeq +FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __lesf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +FUNC_START aeabi_fcmpeq + + push {r4, lr} + bl __eqsf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmpeq + +.macro COMPARISON cond, helper, mode=sf2 +FUNC_START aeabi_fcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpsf2 */ + +#ifdef L_arm_addsubdf3 + +FUNC_START aeabi_drsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor xxh, xxh, r4 + bl __aeabi_dadd + pop {r4, pc} + + FUNC_END aeabi_drsub + +#endif /* L_arm_addsubdf3 */ + +#ifdef L_arm_cmpdf2 + +FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +FUNC_START aeabi_cdcmpeq +FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __ledf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +FUNC_START aeabi_dcmpeq + + push {r4, lr} + bl __eqdf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmpeq + +.macro COMPARISON cond, helper, mode=df2 +FUNC_START aeabi_dcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpdf2 */ diff --git a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S new file mode 100644 index 000000000..2ff338927 --- /dev/null +++ b/gcc/config/arm/bpabi.S @@ -0,0 +1,163 @@ +/* Miscellaneous BPABI functions. + + Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifdef L_aeabi_lcmp + +ARM_FUNC_START aeabi_lcmp + cmp xxh, yyh + do_it lt + movlt r0, #-1 + do_it gt + movgt r0, #1 + do_it ne + RETc(ne) + subs r0, xxl, yyl + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +ARM_FUNC_START aeabi_ulcmp + cmp xxh, yyh + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + do_it ne + RETc(ne) + cmp xxl, yyl + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + do_it eq + moveq r0, #0 + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +.macro test_div_by_zero signed +/* Tail-call to divide-by-zero handlers which may be overridden by the user, + so unwinding works properly. */ +#if defined(__thumb2__) + cbnz yyh, 1f + cbnz yyl, 1f + cmp xxh, #0 + do_it eq + cmpeq xxl, #0 + .ifc \signed, unsigned + beq 2f + mov xxh, #0xffffffff + mov xxl, xxh +2: + .else + do_it lt, t + movlt xxl, #0 + movlt xxh, #0x80000000 + do_it gt, t + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +1: +#else + /* Note: Thumb-1 code calls via an ARM shim on processors which + support ARM mode. */ + cmp yyh, #0 + cmpeq yyl, #0 + bne 2f + cmp xxh, #0 + cmpeq xxl, #0 + .ifc \signed, unsigned + movne xxh, #0xffffffff + movne xxl, #0xffffffff + .else + movlt xxh, #0x80000000 + movlt xxl, #0 + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +2: +#endif +.endm + +#ifdef L_aeabi_ldivmod + +ARM_FUNC_START aeabi_ldivmod + test_div_by_zero signed + + sub sp, sp, #8 +#if defined(__thumb2__) + mov ip, sp + push {ip, lr} +#else + do_push {sp, lr} +#endif + bl SYM(__gnu_ldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 + do_pop {r2, r3} + RET + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +ARM_FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + + sub sp, sp, #8 +#if defined(__thumb2__) + mov ip, sp + push {ip, lr} +#else + do_push {sp, lr} +#endif + bl SYM(__gnu_uldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 + do_pop {r2, r3} + RET + +#endif /* L_aeabi_divmod */ + diff --git a/gcc/config/arm/bpabi.c b/gcc/config/arm/bpabi.c new file mode 100644 index 000000000..283bdc0ac --- /dev/null +++ b/gcc/config/arm/bpabi.c @@ -0,0 +1,56 @@ +/* Miscellaneous BPABI functions. + + Copyright (C) 2003, 2004, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +extern long long __divdi3 (long long, long long); +extern unsigned long long __udivdi3 (unsigned long long, + unsigned long long); +extern long long __gnu_ldivmod_helper (long long, long long, long long *); +extern unsigned long long __gnu_uldivmod_helper (unsigned long long, + unsigned long long, + unsigned long long *); + + +long long +__gnu_ldivmod_helper (long long a, + long long b, + long long *remainder) +{ + long long quotient; + + quotient = __divdi3 (a, b); + *remainder = a - b * quotient; + return quotient; +} + +unsigned long long +__gnu_uldivmod_helper (unsigned long long a, + unsigned long long b, + unsigned long long *remainder) +{ + unsigned long long quotient; + + quotient = __udivdi3 (a, b); + *remainder = a - b * quotient; + return quotient; +} diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h new file mode 100644 index 000000000..7b5ee6231 --- /dev/null +++ b/gcc/config/arm/bpabi.h @@ -0,0 +1,125 @@ +/* Configuration file for ARM BPABI targets. + Copyright (C) 2004, 2005, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Use the AAPCS ABI by default. */ +#define ARM_DEFAULT_ABI ARM_ABI_AAPCS + +/* Assume that AAPCS ABIs should adhere to the full BPABI. */ +#define TARGET_BPABI (TARGET_AAPCS_BASED) + +/* BPABI targets use EABI frame unwinding tables. */ +#undef ARM_UNWIND_INFO +#define ARM_UNWIND_INFO 1 + +/* Section 4.1 of the AAPCS requires the use of VFP format. */ +#undef FPUTYPE_DEFAULT +#define FPUTYPE_DEFAULT "vfp" + +/* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +#if TARGET_BIG_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT MASK_BIG_END +#else +#define TARGET_ENDIAN_DEFAULT 0 +#endif + +/* EABI targets should enable interworking by default. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_INTERWORK | TARGET_ENDIAN_DEFAULT) + +/* The ARM BPABI functions return a boolean; they use no special + calling convention. */ +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) TARGET_BPABI + +/* The BPABI integer comparison routines return { -1, 0, 1 }. */ +#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI + +#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\ + "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" + +#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\ + "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" + +/* Tell the assembler to build BPABI binaries. */ +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC \ + "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC + +#ifndef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC "" +#endif + +/* The generic link spec in elf.h does not support shared libraries. */ +#define BPABI_LINK_SPEC \ + "%{mbig-endian:-EB} %{mlittle-endian:-EL} " \ + "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} " \ + "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC + +#undef LINK_SPEC +#define LINK_SPEC BPABI_LINK_SPEC + +/* The BPABI requires that we always use an out-of-line implementation + of RTTI comparison, even if the target supports weak symbols, + because the same object file might be used on a target that does + not support merging symbols across DLL boundaries. This macro is + broken out separately so that it can be used within + TARGET_OS_CPP_BUILTINS in configuration files for systems based on + the BPABI. */ +#define TARGET_BPABI_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0"); \ + } \ + while (false) + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + TARGET_BPABI_CPP_BUILTINS() + +/* The BPABI specifies the use of .{init,fini}_array. Therefore, we + do not want GCC to put anything into the .{init,fini} sections. */ +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#define INIT_ARRAY_SECTION_ASM_OP ARM_EABI_CTORS_SECTION_OP +#define FINI_ARRAY_SECTION_ASM_OP ARM_EABI_DTORS_SECTION_OP + +/* The legacy _mcount implementation assumes r11 points to a + 4-word APCS frame. This is generally not true for EABI targets, + particularly not in Thumb mode. We assume the mcount + implementation does not require a counter variable (No Counter). + Note that __gnu_mcount_nc will be entered with a misaligned stack. + This is OK because it uses a special calling convention anyway. */ + +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 1 +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + fprintf (STREAM, "\tpush\t{lr}\n"); \ + fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n"); \ +} + +#undef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED 0 + +/* __gnu_mcount_nc restores the original LR value before returning. Ensure + that there is no unnecessary hook set up. */ +#undef PROFILE_HOOK diff --git a/gcc/config/arm/cirrus.md b/gcc/config/arm/cirrus.md new file mode 100644 index 000000000..f08da0bdc --- /dev/null +++ b/gcc/config/arm/cirrus.md @@ -0,0 +1,540 @@ +;; Cirrus EP9312 "Maverick" ARM floating point co-processor description. +;; Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. +;; Contributed by Red Hat. +;; Written by Aldy Hernandez (aldyh@redhat.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +; Cirrus types for invalid insn combinations +; not Not a cirrus insn +; normal Any Cirrus insn not covered by the special cases below +; double cfldrd, cfldr64, cfstrd, cfstr64 +; compare cfcmps, cfcmpd, cfcmp32, cfcmp64 +; move cfmvdlr, cfmvdhr, cfmvsr, cfmv64lr, cfmv64hr +(define_attr "cirrus" "not,normal,double,compare,move" (const_string "not")) + + +(define_insn "cirrus_adddi3" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (plus:DI (match_operand:DI 1 "cirrus_fp_register" "v") + (match_operand:DI 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfadd64%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_addsi3" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (plus:SI (match_operand:SI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfadd32%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_addsf3" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (plus:SF (match_operand:SF 1 "cirrus_fp_register" "v") + (match_operand:SF 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfadds%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_adddf3" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (plus:DF (match_operand:DF 1 "cirrus_fp_register" "v") + (match_operand:DF 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfaddd%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_subdi3" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (minus:DI (match_operand:DI 1 "cirrus_fp_register" "v") + (match_operand:DI 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfsub64%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_subsi3_insn" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (minus:SI (match_operand:SI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfsub32%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_subsf3" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (minus:SF (match_operand:SF 1 "cirrus_fp_register" "v") + (match_operand:SF 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfsubs%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_subdf3" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (minus:DF (match_operand:DF 1 "cirrus_fp_register" "v") + (match_operand:DF 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfsubd%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_mulsi3" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (mult:SI (match_operand:SI 2 "cirrus_fp_register" "v") + (match_operand:SI 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfmul32%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "muldi3" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (mult:DI (match_operand:DI 2 "cirrus_fp_register" "v") + (match_operand:DI 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfmul64%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_dmult") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_mulsi3addsi" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (plus:SI + (mult:SI (match_operand:SI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_fp_register" "v")) + (match_operand:SI 3 "cirrus_fp_register" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfmac32%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +;; Cirrus SI multiply-subtract +(define_insn "*cirrus_mulsi3subsi" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (minus:SI + (match_operand:SI 1 "cirrus_fp_register" "0") + (mult:SI (match_operand:SI 2 "cirrus_fp_register" "v") + (match_operand:SI 3 "cirrus_fp_register" "v"))))] + "0 && TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfmsc32%?\\t%V0, %V2, %V3" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_mulsf3" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (mult:SF (match_operand:SF 1 "cirrus_fp_register" "v") + (match_operand:SF 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfmuls%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_farith") + (set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_muldf3" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (mult:DF (match_operand:DF 1 "cirrus_fp_register" "v") + (match_operand:DF 2 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfmuld%?\\t%V0, %V1, %V2" + [(set_attr "type" "mav_dmult") + (set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_ashl_const" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (ashift:SI (match_operand:SI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_shift_const" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfsh32%?\\t%V0, %V1, #%s2" + [(set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_ashiftrt_const" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (ashiftrt:SI (match_operand:SI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_shift_const" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfsh32%?\\t%V0, %V1, #-%s2" + [(set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_ashlsi3" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (ashift:SI (match_operand:SI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfrshl32%?\\t%V1, %V0, %s2" + [(set_attr "cirrus" "normal")] +) + +(define_insn "ashldi3_cirrus" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (ashift:DI (match_operand:DI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "register_operand" "r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfrshl64%?\\t%V1, %V0, %s2" + [(set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_ashldi_const" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (ashift:DI (match_operand:DI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_shift_const" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfsh64%?\\t%V0, %V1, #%s2" + [(set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_ashiftrtdi_const" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (ashiftrt:DI (match_operand:DI 1 "cirrus_fp_register" "v") + (match_operand:SI 2 "cirrus_shift_const" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfsh64%?\\t%V0, %V1, #-%s2" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_absdi2" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (abs:DI (match_operand:DI 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfabs64%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +;; This doesn't really clobber ``cc''. Fixme: aldyh. +(define_insn "*cirrus_negdi2" + [(set (match_operand:DI 0 "cirrus_fp_register" "=v") + (neg:DI (match_operand:DI 1 "cirrus_fp_register" "v"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfneg64%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_negsi2" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (neg:SI (match_operand:SI 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfneg32%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_negsf2" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (neg:SF (match_operand:SF 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfnegs%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_negdf2" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (neg:DF (match_operand:DF 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfnegd%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +;; This doesn't really clobber the condition codes either. +(define_insn "*cirrus_abssi2" + [(set (match_operand:SI 0 "cirrus_fp_register" "=v") + (abs:SI (match_operand:SI 1 "cirrus_fp_register" "v"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0" + "cfabs32%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_abssf2" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (abs:SF (match_operand:SF 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfabss%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_absdf2" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (abs:DF (match_operand:DF 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfabsd%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +;; Convert Cirrus-SI to Cirrus-SF +(define_insn "cirrus_floatsisf2" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (float:SF (match_operand:SI 1 "s_register_operand" "r"))) + (clobber (match_scratch:DF 2 "=v"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfmv64lr%?\\t%Z2, %1\;cfcvt32s%?\\t%V0, %Y2" + [(set_attr "length" "8") + (set_attr "cirrus" "move")] +) + +(define_insn "cirrus_floatsidf2" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (float:DF (match_operand:SI 1 "s_register_operand" "r"))) + (clobber (match_scratch:DF 2 "=v"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfmv64lr%?\\t%Z2, %1\;cfcvt32d%?\\t%V0, %Y2" + [(set_attr "length" "8") + (set_attr "cirrus" "move")] +) + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (float:SF (match_operand:DI 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcvt64s%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")]) + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (float:DF (match_operand:DI 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcvt64d%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")]) + +(define_insn "cirrus_truncsfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (fix:SI (fix:SF (match_operand:SF 1 "cirrus_fp_register" "v")))) + (clobber (match_scratch:DF 2 "=v"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cftruncs32%?\\t%Y2, %V1\;cfmvr64l%?\\t%0, %Z2" + [(set_attr "length" "8") + (set_attr "cirrus" "normal")] +) + +(define_insn "cirrus_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (fix:SI (fix:DF (match_operand:DF 1 "cirrus_fp_register" "v")))) + (clobber (match_scratch:DF 2 "=v"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cftruncd32%?\\t%Y2, %V1\;cfmvr64l%?\\t%0, %Z2" + [(set_attr "length" "8")] +) + +(define_insn "*cirrus_truncdfsf2" + [(set (match_operand:SF 0 "cirrus_fp_register" "=v") + (float_truncate:SF + (match_operand:DF 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcvtds%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_extendsfdf2" + [(set (match_operand:DF 0 "cirrus_fp_register" "=v") + (float_extend:DF (match_operand:SF 1 "cirrus_fp_register" "v")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "cfcvtsd%?\\t%V0, %V1" + [(set_attr "cirrus" "normal")] +) + +(define_insn "*cirrus_arm_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,o<>,v,r,v,m,v") + (match_operand:DI 1 "di_operand" "rIK,mi,r,r,v,mi,v,v"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "* + { + switch (which_alternative) + { + case 0: + return \"#\"; + case 1: + case 2: + return output_move_double (operands); + + case 3: return \"cfmv64lr%?\\t%V0, %Q1\;cfmv64hr%?\\t%V0, %R1\"; + case 4: return \"cfmvr64l%?\\t%Q0, %V1\;cfmvr64h%?\\t%R0, %V1\"; + + case 5: return \"cfldr64%?\\t%V0, %1\"; + case 6: return \"cfstr64%?\\t%V1, %0\"; + + /* Shifting by 0 will just copy %1 into %0. */ + case 7: return \"cfsh64%?\\t%V0, %V1, #0\"; + + default: gcc_unreachable (); + } + }" + [(set_attr "length" " 8, 8, 8, 8, 8, 4, 4, 4") + (set_attr "type" " *,load2,store2, *, *, load2,store2, *") + (set_attr "pool_range" " *,1020, *, *, *, 1020, *, *") + (set_attr "neg_pool_range" " *,1012, *, *, *, 1008, *, *") + (set_attr "cirrus" "not, not, not,move,normal,double,double,normal")] +) + +;; Cirrus SI values have been outlawed. Look in arm.h for the comment +;; on HARD_REGNO_MODE_OK. + +(define_insn "*cirrus_movsf_hard_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=v,v,v,r,m,r,r,m") + (match_operand:SF 1 "general_operand" "v,mE,r,v,v,r,mE,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], SFmode))" + "@ + cfcpys%?\\t%V0, %V1 + cfldrs%?\\t%V0, %1 + cfmvsr%?\\t%V0, %1 + cfmvrs%?\\t%0, %V1 + cfstrs%?\\t%V1, %0 + mov%?\\t%0, %1 + ldr%?\\t%0, %1\\t%@ float + str%?\\t%1, %0\\t%@ float" + [(set_attr "length" " *, *, *, *, *, 4, 4, 4") + (set_attr "type" " *, load1, *, *,store1, *,load1,store1") + (set_attr "pool_range" " *, 1020, *, *, *, *,4096, *") + (set_attr "neg_pool_range" " *, 1008, *, *, *, *,4084, *") + (set_attr "cirrus" "normal,normal,move,normal,normal,not, not, not")] +) + +(define_insn "*cirrus_movdf_hard_insn" + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Q,r,m,r,v,v,v,r,m") + (match_operand:DF 1 "general_operand" "Q,r,r,r,mF,v,mF,r,v,v"))] + "TARGET_ARM + && TARGET_HARD_FLOAT && TARGET_MAVERICK + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + case 0: return \"ldm%?ia\\t%m1, %M0\\t%@ double\"; + case 1: return \"stm%?ia\\t%m0, %M1\\t%@ double\"; + case 2: return \"#\"; + case 3: case 4: return output_move_double (operands); + case 5: return \"cfcpyd%?\\t%V0, %V1\"; + case 6: return \"cfldrd%?\\t%V0, %1\"; + case 7: return \"cfmvdlr\\t%V0, %Q1\;cfmvdhr%?\\t%V0, %R1\"; + case 8: return \"cfmvrdl%?\\t%Q0, %V1\;cfmvrdh%?\\t%R0, %V1\"; + case 9: return \"cfstrd%?\\t%V1, %0\"; + default: gcc_unreachable (); + } + }" + [(set_attr "type" "load1,store2, *,store2,load1, *, load1, *, *,store2") + (set_attr "length" " 4, 4, 8, 8, 8, 4, 4, 8, 8, 4") + (set_attr "pool_range" " *, *, *, *, 252, *, 1020, *, *, *") + (set_attr "neg_pool_range" " *, *, *, *, 244, *, 1008, *, *, *") + (set_attr "cirrus" " not, not,not, not, not,normal,double,move,normal,double")] +) + +(define_insn "*cirrus_thumb2_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,o<>,v,r,v,m,v") + (match_operand:DI 1 "di_operand" "rIK,mi,r,r,v,mi,v,v"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_MAVERICK" + "* + { + switch (which_alternative) + { + case 0: + case 1: + case 2: + return (output_move_double (operands)); + + case 3: return \"cfmv64lr%?\\t%V0, %Q1\;cfmv64hr%?\\t%V0, %R1\"; + case 4: return \"cfmvr64l%?\\t%Q0, %V1\;cfmvr64h%?\\t%R0, %V1\"; + + case 5: return \"cfldr64%?\\t%V0, %1\"; + case 6: return \"cfstr64%?\\t%V1, %0\"; + + /* Shifting by 0 will just copy %1 into %0. */ + case 7: return \"cfsh64%?\\t%V0, %V1, #0\"; + + default: abort (); + } + }" + [(set_attr "length" " 8, 8, 8, 8, 8, 4, 4, 4") + (set_attr "type" " *,load2,store2, *, *, load2,store2, *") + (set_attr "pool_range" " *,4096, *, *, *, 1020, *, *") + (set_attr "neg_pool_range" " *, 0, *, *, *, 1008, *, *") + (set_attr "cirrus" "not, not, not,move,normal,double,double,normal")] +) + +(define_insn "*thumb2_cirrus_movsf_hard_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=v,v,v,r,m,r,r,m") + (match_operand:SF 1 "general_operand" "v,mE,r,v,v,r,mE,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_MAVERICK + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], SFmode))" + "@ + cfcpys%?\\t%V0, %V1 + cfldrs%?\\t%V0, %1 + cfmvsr%?\\t%V0, %1 + cfmvrs%?\\t%0, %V1 + cfstrs%?\\t%V1, %0 + mov%?\\t%0, %1 + ldr%?\\t%0, %1\\t%@ float + str%?\\t%1, %0\\t%@ float" + [(set_attr "length" " *, *, *, *, *, 4, 4, 4") + (set_attr "type" " *, load1, *, *,store1, *,load1,store1") + (set_attr "pool_range" " *, 1020, *, *, *, *,4096, *") + (set_attr "neg_pool_range" " *, 1008, *, *, *, *, 0, *") + (set_attr "cirrus" "normal,normal,move,normal,normal,not, not, not")] +) + +(define_insn "*thumb2_cirrus_movdf_hard_insn" + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Q,r,m,r,v,v,v,r,m") + (match_operand:DF 1 "general_operand" "Q,r,r,r,mF,v,mF,r,v,v"))] + "TARGET_THUMB2 + && TARGET_HARD_FLOAT && TARGET_MAVERICK + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + case 0: return \"ldm%?ia\\t%m1, %M0\\t%@ double\"; + case 1: return \"stm%?ia\\t%m0, %M1\\t%@ double\"; + case 2: case 3: case 4: return output_move_double (operands); + case 5: return \"cfcpyd%?\\t%V0, %V1\"; + case 6: return \"cfldrd%?\\t%V0, %1\"; + case 7: return \"cfmvdlr\\t%V0, %Q1\;cfmvdhr%?\\t%V0, %R1\"; + case 8: return \"cfmvrdl%?\\t%Q0, %V1\;cfmvrdh%?\\t%R0, %V1\"; + case 9: return \"cfstrd%?\\t%V1, %0\"; + default: abort (); + } + }" + [(set_attr "type" "load1,store2, *,store2,load1, *, load1, *, *,store2") + (set_attr "length" " 4, 4, 8, 8, 8, 4, 4, 8, 8, 4") + (set_attr "pool_range" " *, *, *, *,4092, *, 1020, *, *, *") + (set_attr "neg_pool_range" " *, *, *, *, 0, *, 1008, *, *, *") + (set_attr "cirrus" " not, not,not, not, not,normal,double,move,normal,double")] +) + diff --git a/gcc/config/arm/coff.h b/gcc/config/arm/coff.h new file mode 100644 index 000000000..bd3e6f85d --- /dev/null +++ b/gcc/config/arm/coff.h @@ -0,0 +1,86 @@ +/* Definitions of target machine for GNU compiler. + For ARM with COFF object format. + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2005, + 2007 Free Software Foundation, Inc. + Contributed by Doug Evans (devans@cygnus.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Note - it is important that this definition matches the one in tcoff.h. */ +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/coff)", stderr) + +#undef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) + +#ifndef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork" } +#endif + +/* This is COFF, but prefer stabs. */ +#define SDB_DEBUGGING_INFO 1 + +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + + +#define TARGET_ASM_FILE_START_APP_OFF true + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_coff_asm_named_section + +/* Support the ctors/dtors and other sections. */ + +#undef INIT_SECTION_ASM_OP + +/* Define this macro if jump tables (for `tablejump' insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. */ +/* We put ARM and Thumb-2 jump tables in the text section, because it makes + the code more efficient, but for Thumb-1 it's better to put them out of + band unless we are generating compressed tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION \ + (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) + +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata" +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP "\t.section .ctors,\"x\"" +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP "\t.section .dtors,\"x\"" + +/* Support the ctors/dtors sections for g++. */ + +/* __CTOR_LIST__ and __DTOR_LIST__ must be defined by the linker script. */ +#define CTOR_LISTS_DEFINED_EXTERNALLY + +#undef DO_GLOBAL_CTORS_BODY +#undef DO_GLOBAL_DTORS_BODY + +/* The ARM development system defines __main. */ +#define NAME__MAIN "__gccmain" +#define SYMBOL__MAIN __gccmain + +#define SUPPORTS_INIT_PRIORITY 0 diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md new file mode 100644 index 000000000..4e220e530 --- /dev/null +++ b/gcc/config/arm/constraints.md @@ -0,0 +1,335 @@ +;; Constraint definitions for ARM and Thumb +;; Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The following register constraints have been used: +;; - in ARM/Thumb-2 state: f, t, v, w, x, y, z +;; - in Thumb state: h, b +;; - in both states: l, c, k +;; In ARM state, 'l' is an alias for 'r' + +;; The following normal constraints have been used: +;; in ARM/Thumb-2 state: G, H, I, j, J, K, L, M +;; in Thumb-1 state: I, J, K, L, M, N, O + +;; The following multi-letter normal constraints have been used: +;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz +;; in Thumb-1 state: Pa, Pb, Pc, Pd +;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px + +;; The following memory constraints have been used: +;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us +;; in ARM state: Uq + + +(define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS" + "Legacy FPA registers @code{f0}-@code{f7}.") + +(define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS" + "The VFP registers @code{s0}-@code{s31}.") + +(define_register_constraint "v" "TARGET_ARM ? CIRRUS_REGS : NO_REGS" + "The Cirrus Maverick co-processor registers.") + +(define_register_constraint "w" + "TARGET_32BIT ? (TARGET_VFPD32 ? VFP_REGS : VFP_LO_REGS) : NO_REGS" + "The VFP registers @code{d0}-@code{d15}, or @code{d0}-@code{d31} for VFPv3.") + +(define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS" + "The VFP registers @code{d0}-@code{d7}.") + +(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS" + "The Intel iWMMX co-processor registers.") + +(define_register_constraint "z" + "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS" + "The Intel iWMMX GR registers.") + +(define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS" + "In Thumb state the core registers @code{r0}-@code{r7}.") + +(define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS" + "In Thumb state the core registers @code{r8}-@code{r15}.") + +(define_constraint "j" + "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" + (and (match_test "TARGET_32BIT && arm_arch_thumb2") + (ior (match_code "high") + (and (match_code "const_int") + (match_test "(ival & 0xffff0000) == 0"))))) + +(define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + +(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS" + "@internal + Thumb only. The union of the low registers and the stack register.") + +(define_register_constraint "c" "CC_REG" + "@internal The condition code register.") + +(define_constraint "I" + "In ARM/Thumb-2 state a constant that can be used as an immediate value in a + Data Processing instruction. In Thumb-1 state a constant in the range + 0-255." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? const_ok_for_arm (ival) + : ival >= 0 && ival <= 255"))) + +(define_constraint "J" + "In ARM/Thumb-2 state a constant in the range @minus{}4095-4095. In Thumb-1 + state a constant in the range @minus{}255-@minus{}1." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? (ival >= -4095 && ival <= 4095) + : (ival >= -255 && ival <= -1)"))) + +(define_constraint "K" + "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if + inverted. In Thumb-1 state a constant that satisfies the @code{I} + constraint multiplied by any power of 2." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? const_ok_for_arm (~ival) + : thumb_shiftable_const (ival)"))) + +(define_constraint "L" + "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if + negated. In Thumb-1 state a constant in the range @minus{}7-7." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? const_ok_for_arm (-ival) + : (ival >= -7 && ival <= 7)"))) + +;; The ARM state version is internal... +;; @internal In ARM/Thumb-2 state a constant in the range 0-32 or any +;; power of 2. +(define_constraint "M" + "In Thumb-1 state a constant that is a multiple of 4 in the range 0-1020." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? ((ival >= 0 && ival <= 32) + || (((ival & (ival - 1)) & 0xFFFFFFFF) == 0)) + : ival >= 0 && ival <= 1020 && (ival & 3) == 0"))) + +(define_constraint "N" + "Thumb-1 state a constant in the range 0-31." + (and (match_code "const_int") + (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)"))) + +(define_constraint "O" + "In Thumb-1 state a constant that is a multiple of 4 in the range + @minus{}508-508." + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508 + && ((ival & 3) == 0)"))) + +(define_constraint "Pa" + "@internal In Thumb-1 state a constant in the range -510 to +510" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510 + && (ival > 255 || ival < -255)"))) + +(define_constraint "Pb" + "@internal In Thumb-1 state a constant in the range -262 to +262" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262 + && (ival > 255 || ival < -255)"))) + +(define_constraint "Pc" + "@internal In Thumb-1 state a constant that is in the range 1021 to 1275" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 + && ival > 1020 && ival <= 1275"))) + +(define_constraint "Pd" + "@internal In Thumb-1 state a constant in the range 0 to 7" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= 0 && ival <= 7"))) + +(define_constraint "Ps" + "@internal In Thumb-2 state a constant in the range -255 to +255" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 255"))) + +(define_constraint "Pt" + "@internal In Thumb-2 state a constant in the range -7 to +7" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -7 && ival <= 7"))) + +(define_constraint "Pu" + "@internal In Thumb-2 state a constant in the range +1 to +8" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 8"))) + +(define_constraint "Pv" + "@internal In Thumb-2 state a constant in the range -255 to 0" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 0"))) + +(define_constraint "Pw" + "@internal In Thumb-2 state a constant in the range -255 to -1" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1"))) + +(define_constraint "Px" + "@internal In Thumb-2 state a constant in the range -7 to -1" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) + +(define_constraint "G" + "In ARM/Thumb-2 state a valid FPA immediate constant." + (and (match_code "const_double") + (match_test "TARGET_32BIT && arm_const_double_rtx (op)"))) + +(define_constraint "H" + "In ARM/Thumb-2 state a valid FPA immediate constant when negated." + (and (match_code "const_double") + (match_test "TARGET_32BIT && neg_const_double_rtx_ok_for_fpa (op)"))) + +(define_constraint "Dz" + "@internal + In ARM/Thumb-2 state a vector of constant zeros." + (and (match_code "const_vector") + (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + +(define_constraint "Da" + "@internal + In ARM/Thumb-2 state a const_int, const_double or const_vector that can + be generated with two Data Processing insns." + (and (match_code "const_double,const_int,const_vector") + (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 2"))) + +(define_constraint "Db" + "@internal + In ARM/Thumb-2 state a const_int, const_double or const_vector that can + be generated with three Data Processing insns." + (and (match_code "const_double,const_int,const_vector") + (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 3"))) + +(define_constraint "Dc" + "@internal + In ARM/Thumb-2 state a const_int, const_double or const_vector that can + be generated with four Data Processing insns. This pattern is disabled + if optimizing for space or when we have load-delay slots to fill." + (and (match_code "const_double,const_int,const_vector") + (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4 + && !(optimize_size || arm_ld_sched)"))) + +(define_constraint "Di" + "@internal + In ARM/Thumb-2 state a const_int or const_double where both the high + and low SImode words can be generated as immediates in 32-bit instructions." + (and (match_code "const_double,const_int") + (match_test "TARGET_32BIT && arm_const_double_by_immediates (op)"))) + +(define_constraint "Dn" + "@internal + In ARM/Thumb-2 state a const_vector which can be loaded with a Neon vmov + immediate instruction." + (and (match_code "const_vector") + (match_test "TARGET_32BIT + && imm_for_neon_mov_operand (op, GET_MODE (op))"))) + +(define_constraint "Dl" + "@internal + In ARM/Thumb-2 state a const_vector which can be used with a Neon vorr or + vbic instruction." + (and (match_code "const_vector") + (match_test "TARGET_32BIT + && imm_for_neon_logic_operand (op, GET_MODE (op))"))) + +(define_constraint "DL" + "@internal + In ARM/Thumb-2 state a const_vector which can be used with a Neon vorn or + vand instruction." + (and (match_code "const_vector") + (match_test "TARGET_32BIT + && imm_for_neon_inv_logic_operand (op, GET_MODE (op))"))) + +(define_constraint "Dv" + "@internal + In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts + instruction." + (and (match_code "const_double") + (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)"))) + +(define_constraint "Dy" + "@internal + In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd + instruction." + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) + +(define_memory_constraint "Ut" + "@internal + In ARM/Thumb-2 state an address valid for loading/storing opaque structure + types wider than TImode." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_struct_mem_operand (op)"))) + +(define_memory_constraint "Uv" + "@internal + In ARM/Thumb-2 state a valid VFP load/store address." + (and (match_code "mem") + (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)"))) + +(define_memory_constraint "Uy" + "@internal + In ARM/Thumb-2 state a valid iWMMX load/store address." + (and (match_code "mem") + (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)"))) + +(define_memory_constraint "Un" + "@internal + In ARM/Thumb-2 state a valid address for Neon doubleword vector + load/store instructions." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)"))) + +(define_memory_constraint "Um" + "@internal + In ARM/Thumb-2 state a valid address for Neon element and structure + load/store instructions." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)"))) + +(define_memory_constraint "Us" + "@internal + In ARM/Thumb-2 state a valid address for non-offset loads/stores of + quad-word values in four ARM registers." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)"))) + +(define_memory_constraint "Uq" + "@internal + In ARM state an address valid in ldrsb instructions." + (and (match_code "mem") + (match_test "TARGET_ARM + && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0), + SIGN_EXTEND, 0)"))) + +(define_memory_constraint "Q" + "@internal + In ARM/Thumb-2 state an address that is a single base register." + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0))"))) + +;; We used to have constraint letters for S and R in ARM state, but +;; all uses of these now appear to have been removed. + +;; Additionally, we used to have a Q constraint in Thumb state, but +;; this wasn't really a valid memory constraint. Again, all uses of +;; this now seem to have been removed. diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md new file mode 100644 index 000000000..eb154e298 --- /dev/null +++ b/gcc/config/arm/cortex-a5.md @@ -0,0 +1,297 @@ +;; ARM Cortex-A5 pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The integer (ALU) pipeline. There are five DPU pipeline +;; stages. However the decode/issue stages operate the same for all +;; instructions, so do not model them. We only need to model the +;; first execute stage because instructions always advance one stage +;; per cycle in order. Only branch instructions may dual-issue, so a +;; single unit covers all of the LS, ALU, MAC and FPU pipelines. + +(define_cpu_unit "cortex_a5_ex1" "cortex_a5") + +;; The branch pipeline. Branches can dual-issue with other instructions +;; (except when those instructions take multiple cycles to issue). + +(define_cpu_unit "cortex_a5_branch" "cortex_a5") + +;; Pseudo-unit for blocking the multiply pipeline when a double-precision +;; multiply is in progress. + +(define_cpu_unit "cortex_a5_fpmul_pipe" "cortex_a5") + +;; The floating-point add pipeline (ex1/f1 stage), used to model the usage +;; of the add pipeline by fmac instructions, etc. + +(define_cpu_unit "cortex_a5_fpadd_pipe" "cortex_a5") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a5_fp_div_sqrt" "cortex_a5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a5_alu" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "alu")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_alu_shift" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "alu_shift,alu_shift_reg")) + "cortex_a5_ex1") + +;; Forwarding path for unshifted operands. + +(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift" + "cortex_a5_alu") + +(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift" + "cortex_a5_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results from wr stage only so +;; there's no need to specify bypasses). + +(define_insn_reservation "cortex_a5_mul" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "mult")) + "cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage, which is one stage behind +;; the ex1 stage (the first stage we care about for scheduling purposes). The +;; dc1 stage is parallel with ex1, dc2 with ex2 and rot with wr. + +(define_insn_reservation "cortex_a5_load1" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load_byte,load1")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store1" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store1")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_load2" 3 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load2")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store2" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store2")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_load3" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store3" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_load4" 5 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store4" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Direct branches are the only instructions we can dual-issue (also IT and +;; nop, but those aren't very interesting for scheduling). (The latency here +;; is meant to represent when the branch actually takes place, but may not be +;; entirely correct.) + +(define_insn_reservation "cortex_a5_branch" 3 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "branch,call")) + "cortex_a5_branch") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a5_fpalu" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\ + fcmps, fcmpd")) + "cortex_a5_ex1+cortex_a5_fpadd_pipe") + +;; For fconsts and fconstd, 8-bit immediate data is passed directly from +;; f1 to f3 (which I think reduces the latency by one cycle). + +(define_insn_reservation "cortex_a5_fconst" 3 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fconsts,fconstd")) + "cortex_a5_ex1+cortex_a5_fpadd_pipe") + +;; We should try not to attempt to issue a single-precision multiplication in +;; the middle of a double-precision multiplication operation (the usage of +;; cortex_a5_fpmul_pipe). + +(define_insn_reservation "cortex_a5_fpmuls" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmuls")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe") + +;; For single-precision multiply-accumulate, the add (accumulate) is issued +;; whilst the multiply is in F4. The multiply result can then be forwarded +;; from F5 to F1. The issue unit is only used once (when we first start +;; processing the instruction), but the usage of the FP add pipeline could +;; block other instructions attempting to use it simultaneously. We try to +;; avoid that using cortex_a5_fpadd_pipe. + +(define_insn_reservation "cortex_a5_fpmacs" 8 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmacs")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe") + +;; Non-multiply instructions can issue in the middle two instructions of a +;; double-precision multiply. Note that it isn't entirely clear when a branch +;; can dual-issue when a multi-cycle multiplication is in progress; we ignore +;; that for now though. + +(define_insn_reservation "cortex_a5_fpmuld" 7 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmuld")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\ + cortex_a5_ex1+cortex_a5_fpmul_pipe") + +(define_insn_reservation "cortex_a5_fpmacd" 11 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmacd")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\ + cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ??? Not sure if the 14 cycles taken for single-precision divide to complete +;; includes the time taken for the special instruction used to collect the +;; result to travel down the multiply pipeline, or not. Assuming so. (If +;; that's wrong, the latency should be increased by a few cycles.) + +;; fsqrt takes one cycle less, but that is not modelled, nor is the use of the +;; multiply pipeline to collect the divide/square-root result. + +(define_insn_reservation "cortex_a5_fdivs" 14 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fdivs")) + "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13") + +;; ??? Similarly for fdivd. + +(define_insn_reservation "cortex_a5_fdivd" 29 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fdivd")) + "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; FP loads take data from wr/rot/f3. + +;; Core-to-VFP transfers use the multiply pipeline. + +(define_insn_reservation "cortex_a5_r2f" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "r_2_f")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f2r" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_2_r")) + "cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ??? The flag forwarding from fmstat to the ex2 stage of the second +;; instruction is not modeled at present. + +(define_insn_reservation "cortex_a5_f_flags" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_flag")) + "cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a5_f_loads" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_loads")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f_loadd" 5 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_loadd")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f_stores" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_stores")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f_stored" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_stored")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a5_f_loads" + "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\ + cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\ + cortex_a5_f2r") + +(define_bypass 3 "cortex_a5_f_loadd" + "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\ + cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\ + cortex_a5_f2r") diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md new file mode 100644 index 000000000..03f52b2df --- /dev/null +++ b/gcc/config/arm/cortex-a8-neon.md @@ -0,0 +1,1312 @@ +;; ARM Cortex-A8 NEON scheduling description. +;; Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +(define_automaton "cortex_a8_neon") + +;; Only one load, store, permute, MCR or MRC instruction can be issued +;; per cycle. +(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon") + +;; Only one data-processing instruction can be issued per cycle. +(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon") + +;; The VFPLite unit (non-pipelined). +(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon") + +;; We need a special mutual exclusion (to be used in addition to +;; cortex_a8_neon_issue_dp) for the case when an instruction such as +;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to +;; E2 of the floating-point add pipeline. On the cycle previous to that +;; forward we must prevent issue of any instruction to the floating-point +;; add pipeline, but still allow issue of a data-processing instruction +;; to any of the other pipelines. +(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon") + +;; Patterns of reservation. +;; We model the NEON issue units as running in parallel with the core ones. +;; We assume that multi-cycle NEON instructions get decomposed into +;; micro-ops as they are issued into the NEON pipeline, and not as they +;; are issued into the ARM pipeline. Dual issue may not occur except +;; upon the first and last cycles of a multi-cycle instruction, but it +;; is unclear whether two multi-cycle instructions can issue together (in +;; this model they cannot). It is also unclear whether a pair of +;; a multi-cycle and single-cycle instructions, that could potentially +;; issue together, only do so if (say) the single-cycle one precedes +;; the other. + +(define_reservation "cortex_a8_neon_dp" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp") +(define_reservation "cortex_a8_neon_dp_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp") +(define_reservation "cortex_a8_neon_dp_4" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp") + +(define_reservation "cortex_a8_neon_fadd" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_fadd") +(define_reservation "cortex_a8_neon_fadd_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_fadd,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd") + +(define_reservation "cortex_a8_neon_perm" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_perm_2" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_perm_3" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") + +(define_reservation "cortex_a8_neon_ls" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_2" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_3" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_4" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_5" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") + +(define_reservation "cortex_a8_neon_fmul_then_fadd" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + nothing*3,\ + cortex_a8_neon_issue_fadd") +(define_reservation "cortex_a8_neon_fmul_then_fadd_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp,\ + nothing*2,\ + cortex_a8_neon_issue_fadd,\ + cortex_a8_neon_issue_fadd") + +;; VFP instructions can only be single-issued into the NEON pipeline. +(define_reservation "cortex_a8_vfp" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_perm+cortex_a8_vfplite") + +;; VFP instructions. +;; The VFPLite unit that executes these isn't pipelined; we give the +;; worst-case latencies (and choose the double-precision ones where we +;; do not distinguish on precision). We assume RunFast mode is not +;; enabled and therefore do not model the possible VFP instruction +;; execution in the NEON floating point pipelines, nor additional +;; latencies for the processing of subnormals. +;; +;; TODO: RunFast mode could potentially be enabled when -ffast-math +;; is specified. + +(define_insn_reservation "cortex_a8_vfp_add_sub" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fconsts,fconstd,fadds,faddd")) + "cortex_a8_vfp,cortex_a8_vfplite*9") + +(define_insn_reservation "cortex_a8_vfp_muls" 12 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmuls")) + "cortex_a8_vfp,cortex_a8_vfplite*11") + +(define_insn_reservation "cortex_a8_vfp_muld" 17 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmuld")) + "cortex_a8_vfp,cortex_a8_vfplite*16") + +(define_insn_reservation "cortex_a8_vfp_macs" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmacs")) + "cortex_a8_vfp,cortex_a8_vfplite*20") + +(define_insn_reservation "cortex_a8_vfp_macd" 26 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmacd")) + "cortex_a8_vfp,cortex_a8_vfplite*25") + +(define_insn_reservation "cortex_a8_vfp_divs" 37 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fdivs")) + "cortex_a8_vfp,cortex_a8_vfplite*36") + +(define_insn_reservation "cortex_a8_vfp_divd" 65 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fdivd")) + "cortex_a8_vfp,cortex_a8_vfplite*64") + +;; Comparisons can actually take 7 cycles sometimes instead of four, +;; but given all the other instructions lumped into type=ffarith that +;; take four cycles, we pick that latency. +(define_insn_reservation "cortex_a8_vfp_farith" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd")) + "cortex_a8_vfp,cortex_a8_vfplite*3") + +(define_insn_reservation "cortex_a8_vfp_cvt" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "f_cvt")) + "cortex_a8_vfp,cortex_a8_vfplite*6") + +;; NEON -> core transfers. + +(define_insn_reservation "cortex_a8_neon_mrc" 20 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mrc")) + "cortex_a8_neon_ls") + +(define_insn_reservation "cortex_a8_neon_mrrc" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mrrc")) + "cortex_a8_neon_ls_2") + +;; The remainder of this file is auto-generated by neon-schedgen. + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3. +(define_insn_reservation "cortex_a8_neon_int_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_1")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N3. +(define_insn_reservation "cortex_a8_neon_int_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_2")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a8_neon_int_3" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_3")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N4. +(define_insn_reservation "cortex_a8_neon_int_4" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_4")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N4. +(define_insn_reservation "cortex_a8_neon_int_5" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_int_5")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vqneg_vqabs")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation produce a result at N3. +(define_insn_reservation "cortex_a8_neon_vmov" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vmov")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_vaba" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vaba")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vaba_qqq")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a8_neon_vsma" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vsma")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_qqq_8_16")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a8_neon_shift_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_1")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a8_neon_shift_2" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_2")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a8_neon_shift_3" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_shift_3")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N1. +(define_insn_reservation "cortex_a8_neon_vshl_ddd" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vshl_ddd")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vsra_vrsra")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5. +(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a8_neon_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a8_neon_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N5. +(define_insn_reservation "cortex_a8_neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vsum")) + "cortex_a8_neon_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5. +(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmul_ddd")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmul_qqd")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_ddd")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9. +(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2. +(define_insn_reservation "cortex_a8_neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_simple")) + "cortex_a8_neon_perm") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_2cycle")) + "cortex_a8_neon_perm_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_bp_3cycle")) + "cortex_a8_neon_perm_3") + +;; Instructions using this reservation produce a result at N1. +(define_insn_reservation "cortex_a8_neon_ldr" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_ldr")) + "cortex_a8_neon_ls") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_str" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_str")) + "cortex_a8_neon_ls") + +;; Instructions using this reservation produce a result at N1 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_1_2_regs")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation produce a result at N1 on cycle 3. +(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_3_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld2_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 4. +(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_3_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst3_vst4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld1_vld2_lane")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 5. +(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4_lane")) + "cortex_a8_neon_ls_5") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst1_vst2_lane")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vst3_vst4_lane")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vld3_vld4_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a8_neon_mcr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mcr")) + "cortex_a8_neon_perm") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "neon_type" "neon_mcr_2_mcrr")) + "cortex_a8_neon_perm_2") + +;; Exceptions to the default latencies. + +(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_mcr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld3_vld4_all_lanes" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_vld3_vld4" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vld2_4_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a8_neon_ldr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_bp_3cycle" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_bp_2cycle" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_bp_simple" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vsum" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vsra_vrsra" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a8_neon_vshl_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_shift_3" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_shift_2" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_shift_1" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vsma" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_vaba_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vaba" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vmov" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vqneg_vqabs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_int_5" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_int_4" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_3" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_2" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_1" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md new file mode 100644 index 000000000..1922e5cf4 --- /dev/null +++ b/gcc/config/arm/cortex-a8.md @@ -0,0 +1,275 @@ +;; ARM Cortex-A8 scheduling description. +;; Copyright (C) 2007, 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a8") + +;; Only one load/store instruction can be issued per cycle +;; (although reservation of this unit is only required for single +;; loads and stores -- see below). +(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8") + +;; Only one branch instruction can be issued per cycle. +(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8") + +;; The two ALU pipelines. +(define_cpu_unit "cortex_a8_alu0" "cortex_a8") +(define_cpu_unit "cortex_a8_alu1" "cortex_a8") + +;; The usual flow of an instruction through the pipelines. +(define_reservation "cortex_a8_default" + "cortex_a8_alu0|cortex_a8_alu1") + +;; The flow of a branch instruction through the pipelines. +(define_reservation "cortex_a8_branch" + "(cortex_a8_alu0+cortex_a8_issue_branch)|\ + (cortex_a8_alu1+cortex_a8_issue_branch)") + +;; The flow of a load or store instruction through the pipeline in +;; the case where that instruction consists of only one micro-op... +(define_reservation "cortex_a8_load_store_1" + "(cortex_a8_alu0+cortex_a8_issue_ls)|\ + (cortex_a8_alu1+cortex_a8_issue_ls)") + +;; ...and in the case of two micro-ops. Dual issue is altogether forbidden +;; during the issue cycle of the first micro-op. (Instead of modelling +;; a separate issue unit, we instead reserve alu0 and alu1 to +;; prevent any other instructions from being issued upon that first cycle.) +;; Even though the load/store pipeline is usually available in either +;; ALU pipe, multi-cycle instructions always issue in pipeline 0. +(define_reservation "cortex_a8_load_store_2" + "cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\ + cortex_a8_alu0+cortex_a8_issue_ls") + +;; The flow of a single-cycle multiplication. +(define_reservation "cortex_a8_multiply" + "cortex_a8_alu0") + +;; The flow of a multiplication instruction that gets decomposed into +;; two micro-ops. The two micro-ops will be issued to pipeline 0 on +;; successive cycles. Dual issue cannot happen at the same time as the +;; first of the micro-ops. +(define_reservation "cortex_a8_multiply_2" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; Similarly, the flow of a multiplication instruction that gets +;; decomposed into three micro-ops. Dual issue cannot occur except on +;; the cycle upon which the third micro-op is issued. +(define_reservation "cortex_a8_multiply_3" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; The model given here assumes that all instructions are unconditional. + +;; Data processing instructions, but not move instructions. + +;; We include CLZ with these since it has the same execution pattern +;; (source read in E2 and destination available at the end of that cycle). +(define_insn_reservation "cortex_a8_alu" 2 + (and (eq_attr "tune" "cortexa8") + (ior (and (and (eq_attr "type" "alu") + (eq_attr "neon_type" "none")) + (not (eq_attr "insn" "mov,mvn"))) + (eq_attr "insn" "clz"))) + "cortex_a8_default") + +(define_insn_reservation "cortex_a8_alu_shift" 2 + (and (eq_attr "tune" "cortexa8") + (and (eq_attr "type" "alu_shift") + (not (eq_attr "insn" "mov,mvn")))) + "cortex_a8_default") + +(define_insn_reservation "cortex_a8_alu_shift_reg" 2 + (and (eq_attr "tune" "cortexa8") + (and (eq_attr "type" "alu_shift_reg") + (not (eq_attr "insn" "mov,mvn")))) + "cortex_a8_default") + +;; Move instructions. + +(define_insn_reservation "cortex_a8_mov" 1 + (and (eq_attr "tune" "cortexa8") + (and (eq_attr "type" "alu,alu_shift,alu_shift_reg") + (eq_attr "insn" "mov,mvn"))) + "cortex_a8_default") + +;; Exceptions to the default latencies for data processing instructions. + +;; A move followed by an ALU instruction with no early dep. +;; (Such a pair can be issued in parallel, hence latency zero.) +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu") +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; An ALU instruction followed by an ALU instruction with no early dep. +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu") +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Multiplication instructions. These are categorized according to their +;; reservation behavior and the need below to distinguish certain +;; varieties for bypasses. Results are available at the E5 stage +;; (but some of these are multi-cycle instructions which explains the +;; latencies below). + +(define_insn_reservation "cortex_a8_mul" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "mul,smulxy,smmul")) + "cortex_a8_multiply_2") + +(define_insn_reservation "cortex_a8_mla" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) + "cortex_a8_multiply_2") + +(define_insn_reservation "cortex_a8_mull" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy")) + "cortex_a8_multiply_3") + +(define_insn_reservation "cortex_a8_smulwy" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "smulwy,smuad,smusd")) + "cortex_a8_multiply") + +;; smlald and smlsld are multiply-accumulate instructions but do not +;; received bypassed data from other multiplication results; thus, they +;; cannot go in cortex_a8_mla above. (See below for bypass details.) +(define_insn_reservation "cortex_a8_smlald" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "insn" "smlald,smlsld")) + "cortex_a8_multiply_2") + +;; A multiply with a single-register result or an MLA, followed by an +;; MLA with an accumulator dependency, has its result forwarded so two +;; such instructions can issue back-to-back. +(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy" + "cortex_a8_mla" + "arm_mac_accumulator_is_mul_result") + +;; A multiply followed by an ALU instruction needing the multiply +;; result only at E2 has lower latency than one needing it at E1. +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu") +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Load instructions. +;; The presence of any register writeback is ignored here. + +;; A load result has latency 3 unless the dependent instruction has +;; no early dep, in which case it is only latency two. +;; We assume 64-bit alignment for doubleword loads. +(define_insn_reservation "cortex_a8_load1_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "load1,load2,load_byte")) + "cortex_a8_load_store_1") + +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu") +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; We do not currently model the fact that loads with scaled register +;; offsets that are not LSL #2 have an extra cycle latency (they issue +;; as two micro-ops). + +;; A load multiple of three registers is usually issued as two micro-ops. +;; The first register will be available at E3 of the first iteration, +;; the second at E3 of the second iteration, and the third at E4 of +;; the second iteration. A load multiple of four registers is usually +;; issued as two micro-ops. +(define_insn_reservation "cortex_a8_load3_4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "load3,load4")) + "cortex_a8_load_store_2") + +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu") +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Store instructions. +;; Writeback is again ignored. + +(define_insn_reservation "cortex_a8_store1_2" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "store1,store2")) + "cortex_a8_load_store_1") + +(define_insn_reservation "cortex_a8_store3_4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "store3,store4")) + "cortex_a8_load_store_2") + +;; An ALU instruction acting as a producer for a store instruction +;; that only uses the result as the value to be stored (as opposed to +;; using it to calculate the address) has latency zero; the store +;; reads the value to be stored at the start of E3 and the ALU insn +;; writes it at the end of E2. Move instructions actually produce the +;; result at the end of E1, but since we don't have delay slots, the +;; scheduling behavior will be the same. +(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\ + cortex_a8_alu_shift_reg,cortex_a8_mov" + "cortex_a8_store1_2,cortex_a8_store3_4" + "arm_no_early_store_addr_dep") + +;; Branch instructions + +(define_insn_reservation "cortex_a8_branch" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "branch")) + "cortex_a8_branch") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "cortex_a8_call" 32 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "call")) + "cortex_a8_issue_branch") + +;; NEON (including VFP) instructions. + +(include "cortex-a8-neon.md") + diff --git a/gcc/config/arm/cortex-a9-neon.md b/gcc/config/arm/cortex-a9-neon.md new file mode 100644 index 000000000..2e8ec9b14 --- /dev/null +++ b/gcc/config/arm/cortex-a9-neon.md @@ -0,0 +1,1237 @@ +;; ARM Cortex-A9 pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; +;; Neon pipeline description contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +(define_automaton "cortex_a9_neon") + +;; Only one instruction can be issued per cycle. +(define_cpu_unit "cortex_a9_neon_issue_perm" "cortex_a9_neon") + +;; Only one data-processing instruction can be issued per cycle. +(define_cpu_unit "cortex_a9_neon_issue_dp" "cortex_a9_neon") + +;; We need a special mutual exclusion (to be used in addition to +;; cortex_a9_neon_issue_dp) for the case when an instruction such as +;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to +;; E2 of the floating-point add pipeline. On the cycle previous to that +;; forward we must prevent issue of any instruction to the floating-point +;; add pipeline, but still allow issue of a data-processing instruction +;; to any of the other pipelines. +(define_cpu_unit "cortex_a9_neon_issue_fadd" "cortex_a9_neon") +(define_cpu_unit "cortex_a9_neon_mcr" "cortex_a9_neon") + + +;; Patterns of reservation. +;; We model the NEON issue units as running in parallel with the core ones. +;; We assume that multi-cycle NEON instructions get decomposed into +;; micro-ops as they are issued into the NEON pipeline. + +(define_reservation "cortex_a9_neon_dp" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp") +(define_reservation "cortex_a9_neon_dp_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp") +(define_reservation "cortex_a9_neon_dp_4" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp") + +(define_reservation "cortex_a9_neon_fadd" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp + \ + cortex_a9_neon_issue_fadd") +(define_reservation "cortex_a9_neon_fadd_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_fadd,\ + cortex_a9_neon_issue_dp") + +(define_reservation "cortex_a9_neon_perm" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_perm_2" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm, \ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_perm_3" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") + +(define_reservation "cortex_a9_neon_ls" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm+cortex_a9_ls") +(define_reservation "cortex_a9_neon_ls_2" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_3" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_4" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_5" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") + +(define_reservation "cortex_a9_neon_fmul_then_fadd" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + nothing*3,\ + cortex_a9_neon_issue_fadd") +(define_reservation "cortex_a9_neon_fmul_then_fadd_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp,\ + nothing*2,\ + cortex_a9_neon_issue_fadd,\ + cortex_a9_neon_issue_fadd") + + +;; NEON -> core transfers. +(define_insn_reservation "ca9_neon_mrc" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mrc")) + "ca9_issue_vfp_neon + cortex_a9_neon_mcr") + +(define_insn_reservation "ca9_neon_mrrc" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mrrc")) + "ca9_issue_vfp_neon + cortex_a9_neon_mcr") + +;; The remainder of this file is auto-generated by neon-schedgen. + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_1" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_1")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_2" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_2")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_3" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_3")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_int_4" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_4")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N4. +(define_insn_reservation "cortex_a9_neon_int_5" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_int_5")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vqneg_vqabs")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation produce a result at N3. +(define_insn_reservation "cortex_a9_neon_vmov" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vmov")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vaba" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vaba")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vaba_qqq")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vsma" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vsma")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_qqq_8_16")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a9_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a9_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_shift_1" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_shift_1")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_shift_2" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_shift_2")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a9_neon_shift_3" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_shift_3")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N1. +(define_insn_reservation "cortex_a9_neon_vshl_ddd" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vshl_ddd")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vsra_vrsra")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a9_neon_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a9_neon_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vsum")) + "cortex_a9_neon_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmul_ddd")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmul_qqd")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_ddd")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_qqq")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2. +(define_insn_reservation "cortex_a9_neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_bp_simple")) + "cortex_a9_neon_perm") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_bp_2cycle")) + "cortex_a9_neon_perm_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_bp_3cycle")) + "cortex_a9_neon_perm_3") + +;; Instructions using this reservation produce a result at N1. +(define_insn_reservation "cortex_a9_neon_ldr" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_ldr")) + "cortex_a9_neon_ls") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_str" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_str")) + "cortex_a9_neon_ls") + +;; Instructions using this reservation produce a result at N1 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld1_1_2_regs")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation produce a result at N1 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld1_3_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld2_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 4. +(define_insn_reservation "cortex_a9_neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld3_vld4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst1_3_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst3_vst4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld1_vld2_lane")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 5. +(define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld3_vld4_lane")) + "cortex_a9_neon_ls_5") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst1_vst2_lane")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vst3_vst4_lane")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_vld3_vld4_all_lanes")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a9_neon_mcr" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mcr")) + "cortex_a9_neon_perm") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "neon_type" "neon_mcr_2_mcrr")) + "cortex_a9_neon_perm_2") + +;; Exceptions to the default latencies. + +(define_bypass 1 "cortex_a9_neon_mcr_2_mcrr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_mcr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld3_vld4_all_lanes" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vld3_vld4_lane" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vld1_vld2_lane" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_vld3_vld4" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vld2_4_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld1_3_4_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_vld1_1_2_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a9_neon_ldr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_bp_3cycle" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_bp_2cycle" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_bp_simple" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_fp_vmul_qqd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vmul_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vsum" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_fp_vadd_qqq_vabs_qq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vadd_ddd_vabs_dd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vsra_vrsra" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a9_neon_vshl_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_shift_3" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_shift_2" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_shift_1" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_mul_qqd_32_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mla_qqq_8_16" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vsma" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_vaba_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vaba" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vmov" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vqneg_vqabs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_int_5" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_int_4" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_3" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_2" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_1" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md new file mode 100644 index 000000000..b74ace833 --- /dev/null +++ b/gcc/config/arm/cortex-a9.md @@ -0,0 +1,269 @@ +;; ARM Cortex-A9 pipeline description +;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. +;; Originally written by CodeSourcery for VFP. +;; +;; Rewritten by Ramana Radhakrishnan +;; Integer Pipeline description contributed by ARM Ltd. +;; VFP Pipeline description rewritten and contributed by ARM Ltd. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a9") + +;; The Cortex-A9 core is modelled as a dual issue pipeline that has +;; the following components. +;; 1. 1 Load Store Pipeline. +;; 2. P0 / main pipeline for data processing instructions. +;; 3. P1 / Dual pipeline for Data processing instructions. +;; 4. MAC pipeline for multiply as well as multiply +;; and accumulate instructions. +;; 5. 1 VFP and an optional Neon unit. +;; The Load/Store, VFP and Neon issue pipeline are multiplexed. +;; The P0 / main pipeline and M1 stage of the MAC pipeline are +;; multiplexed. +;; The P1 / dual pipeline and M2 stage of the MAC pipeline are +;; multiplexed. +;; There are only 4 integer register read ports and hence at any point of +;; time we can't have issue down the E1 and the E2 ports unless +;; of course there are bypass paths that get exercised. +;; Both P0 and P1 have 2 stages E1 and E2. +;; Data processing instructions issue to E1 or E2 depending on +;; whether they have an early shift or not. + +(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9") +(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9") +(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9") +(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9") +(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9") +(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9") + +(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb") +(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default") +(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default") + +(define_reservation "cortex_a9_multcycle1" + "cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \ +cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") + +(define_reservation "cortex_a9_mult16" + "cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mac16" + "cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mult" + "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mac" + "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb") + + +;; Issue at the same time along the load store pipeline and +;; the VFP / Neon pipeline is not possible. +(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon") + +;; Default data processing instruction without any shift +;; The only exception to this is the mov instruction +;; which can go down E2 without any problem. +(define_insn_reservation "cortex_a9_dp" 2 + (and (eq_attr "tune" "cortexa9") + (ior (and (eq_attr "type" "alu") + (eq_attr "neon_type" "none")) + (and (and (eq_attr "type" "alu_shift_reg, alu_shift") + (eq_attr "insn" "mov")) + (eq_attr "neon_type" "none")))) + "cortex_a9_p0_default|cortex_a9_p1_default") + +;; An instruction using the shifter will go down E1. +(define_insn_reservation "cortex_a9_dp_shift" 3 + (and (eq_attr "tune" "cortexa9") + (and (eq_attr "type" "alu_shift_reg, alu_shift") + (not (eq_attr "insn" "mov")))) + "cortex_a9_p0_shift | cortex_a9_p1_shift") + +;; Loads have a latency of 4 cycles. +;; We don't model autoincrement instructions. These +;; instructions use the load store pipeline and 1 of +;; the E2 units to write back the result of the increment. + +(define_insn_reservation "cortex_a9_load1_2" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd")) + "cortex_a9_ls") + +;; Loads multiples and store multiples can't be issued for 2 cycles in a +;; row. The description below assumes that addresses are 64 bit aligned. +;; If not, there is an extra cycle latency which is not modelled. + +(define_insn_reservation "cortex_a9_load3_4" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "load3, load4")) + "cortex_a9_ls, cortex_a9_ls") + +(define_insn_reservation "cortex_a9_store1_2" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "store1, store2, f_stores, f_stored")) + "cortex_a9_ls") + +;; Almost all our store multiples use an auto-increment +;; form. Don't issue back to back load and store multiples +;; because the load store unit will stall. + +(define_insn_reservation "cortex_a9_store3_4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "store3, store4")) + "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls") + +;; We get 16*16 multiply / mac results in 3 cycles. +(define_insn_reservation "cortex_a9_mult16" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "insn" "smulxy")) + "cortex_a9_mult16") + +;; The 16*16 mac is slightly different that it +;; reserves M1 and M2 in the same cycle. +(define_insn_reservation "cortex_a9_mac16" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "insn" "smlaxy")) + "cortex_a9_mac16") + + +(define_insn_reservation "cortex_a9_multiply" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "insn" "mul")) + "cortex_a9_mult") + +(define_insn_reservation "cortex_a9_mac" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "insn" "mla")) + "cortex_a9_mac") + +;; An instruction with a result in E2 can be forwarded +;; to E2 or E1 or M1 or the load store unit in the next cycle. + +(define_bypass 1 "cortex_a9_dp" + "cortex_a9_dp_shift, cortex_a9_multiply, + cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, + cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4") + +(define_bypass 2 "cortex_a9_dp_shift" + "cortex_a9_dp_shift, cortex_a9_multiply, + cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, + cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4") + +;; An instruction in the load store pipeline can provide +;; read access to a DP instruction in the P0 default pipeline +;; before the writeback stage. + +(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2, +cortex_a9_store3_4, cortex_a9_store1_2") + +(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2, +cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4") + +;; Calls and branches. + +;; Branch instructions + +(define_insn_reservation "cortex_a9_branch" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "branch")) + "cortex_a9_branch") + +;; Call latencies are essentially 0 but make sure +;; dual issue doesn't happen i.e the next instruction +;; starts at the next cycle. +(define_insn_reservation "cortex_a9_call" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "call")) + "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon") + + +;; Pipelining for VFP instructions. +;; Issue happens either along load store unit or the VFP / Neon unit. +;; Pipeline Instruction Classification. +;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r +;; FP_ADD - fadds, faddd, fcmps (1) +;; FPMUL - fmul{s,d}, fmac{s,d} +;; FPDIV - fdiv{s,d} +(define_cpu_unit "ca9fps" "cortex_a9") +(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9") +(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9") +(define_cpu_unit "ca9fp_ds1" "cortex_a9") + + +;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle. +(define_insn_reservation "cortex_a9_fps" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag")) + "ca9_issue_vfp_neon + ca9fps") + +(define_bypass 1 + "cortex_a9_fps" + "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply") + +;; Scheduling on the FP_ADD pipeline. +(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4") + +(define_insn_reservation "cortex_a9_fadd" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fadds, faddd, f_cvt")) + "ca9fp_add") + +(define_insn_reservation "cortex_a9_fcmp" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fcmps, fcmpd")) + "ca9_issue_vfp_neon + ca9fp_add1") + +;; Scheduling for the Multiply and MAC instructions. +(define_reservation "ca9fmuls" + "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") + +(define_reservation "ca9fmuld" + "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") + +(define_insn_reservation "cortex_a9_fmuls" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuls")) + "ca9fmuls") + +(define_insn_reservation "cortex_a9_fmuld" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuld")) + "ca9fmuld") + +(define_insn_reservation "cortex_a9_fmacs" 8 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacs")) + "ca9fmuls, ca9fp_add") + +(define_insn_reservation "cortex_a9_fmacd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacd")) + "ca9fmuld, ca9fp_add") + +;; Division pipeline description. +(define_insn_reservation "cortex_a9_fdivs" 15 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivs")) + "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14") + +(define_insn_reservation "cortex_a9_fdivd" 25 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivd")) + "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24") + +;; Include Neon pipeline description +(include "cortex-a9-neon.md") diff --git a/gcc/config/arm/cortex-m4-fpu.md b/gcc/config/arm/cortex-m4-fpu.md new file mode 100644 index 000000000..6fd5faf74 --- /dev/null +++ b/gcc/config/arm/cortex-m4-fpu.md @@ -0,0 +1,111 @@ +;; ARM Cortex-M4 FPU pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Use an artifial unit to model FPU. +(define_cpu_unit "cortex_m4_v" "cortex_m4") + +(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v") + +;; Integer instructions following VDIV or VSQRT complete out-of-order. +(define_insn_reservation "cortex_m4_fdivs" 15 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fdivs")) + "cortex_m4_ex_v,cortex_m4_v*13") + +(define_insn_reservation "cortex_m4_vmov_1" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fcpys,fconsts")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_vmov_2" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_2_r,r_2_f")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_fmuls" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmuls")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fmacs" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmacs")) + "cortex_m4_ex_v*3") + +(define_insn_reservation "cortex_m4_ffariths" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "ffariths")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fadds" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fadds")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fcmps" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fcmps")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_flag" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_flag")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_cvt" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_cvt")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_load" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_loads")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_f_store" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_stores")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_f_loadd" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_loadd")) + "cortex_m4_ex_v*3") + +(define_insn_reservation "cortex_m4_f_stored" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_stored")) + "cortex_m4_ex_v*3") + +;; MAC instructions consume their addend one cycle later. If the result +;; of an arithmetic instruction is consumed as the addend of the following +;; MAC instruction, the latency can be decreased by one. + +(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") + +(define_bypass 3 "cortex_m4_fmacs" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") + +(define_bypass 14 "cortex_m4_fdivs" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md new file mode 100644 index 000000000..b71037585 --- /dev/null +++ b/gcc/config/arm/cortex-m4.md @@ -0,0 +1,111 @@ +;; ARM Cortex-M4 pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_m4") + +;; We model the pipelining of LDR instructions by using two artificial units. + +(define_cpu_unit "cortex_m4_a" "cortex_m4") + +(define_cpu_unit "cortex_m4_b" "cortex_m4") + +(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b") + +;; ALU and multiply is one cycle. +(define_insn_reservation "cortex_m4_alu" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "alu,alu_shift,alu_shift_reg,mult")) + "cortex_m4_ex") + +;; Byte, half-word and word load is two cycles. +(define_insn_reservation "cortex_m4_load1" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load_byte,load1")) + "cortex_m4_a, cortex_m4_b") + +;; str rx, [ry, #imm] is always one cycle. +(define_insn_reservation "cortex_m4_store1_1" 1 + (and (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store1")) + (ne (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0))) + "cortex_m4_a") + +;; Other byte, half-word and word load is two cycles. +(define_insn_reservation "cortex_m4_store1_2" 2 + (and (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store1")) + (eq (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0))) + "cortex_m4_a*2") + +(define_insn_reservation "cortex_m4_load2" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load2")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_store2" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store2")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_load3" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load3")) + "cortex_m4_ex*4") + +(define_insn_reservation "cortex_m4_store3" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store3")) + "cortex_m4_ex*4") + +(define_insn_reservation "cortex_m4_load4" 5 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load4")) + "cortex_m4_ex*5") + +(define_insn_reservation "cortex_m4_store4" 5 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store4")) + "cortex_m4_ex*5") + +;; If the address of load or store depends on the result of the preceding +;; instruction, the latency is increased by one. + +(define_bypass 2 "cortex_m4_alu" + "cortex_m4_load1" + "arm_early_load_addr_dep") + +(define_bypass 2 "cortex_m4_alu" + "cortex_m4_store1_1,cortex_m4_store1_2" + "arm_early_store_addr_dep") + +(define_insn_reservation "cortex_m4_branch" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "branch")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_call" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "call")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_block" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "block")) + "cortex_m4_ex") diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md new file mode 100644 index 000000000..e26c3d45d --- /dev/null +++ b/gcc/config/arm/cortex-r4.md @@ -0,0 +1,292 @@ +;; ARM Cortex-R4 scheduling description. +;; Copyright (C) 2007, 2008 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_r4") + +;; We approximate the dual-issue constraints of this core using four +;; "issue units" and a reservation matrix as follows. The numbers indicate +;; the instruction groups' preferences in order. Multiple entries for +;; the same numbered preference indicate units that must be reserved +;; together. +;; +;; Issue unit: A B C ALU +;; +;; ALU w/o reg shift 1st 2nd 1st and 2nd +;; ALU w/ reg shift 1st 2nd 2nd 1st and 2nd +;; Moves 1st 2nd 2nd +;; Multiplication 1st 1st +;; Division 1st 1st +;; Load/store single 1st 1st +;; Other load/store 1st 1st +;; Branches 1st + +(define_cpu_unit "cortex_r4_issue_a" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_b" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_c" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4") + +(define_reservation "cortex_r4_alu" + "(cortex_r4_issue_a+cortex_r4_issue_alu)|\ + (cortex_r4_issue_b+cortex_r4_issue_alu)") +(define_reservation "cortex_r4_alu_shift_reg" + "(cortex_r4_issue_a+cortex_r4_issue_alu)|\ + (cortex_r4_issue_b+cortex_r4_issue_c+\ + cortex_r4_issue_alu)") +(define_reservation "cortex_r4_mov" + "cortex_r4_issue_a|(cortex_r4_issue_b+\ + cortex_r4_issue_alu)") +(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_mul_2" + "(cortex_r4_issue_a+cortex_r4_issue_alu)*2") +;; Division instructions execute out-of-order with respect to the +;; rest of the pipeline and only require reservations on their first and +;; final cycles. +(define_reservation "cortex_r4_div_9" + "cortex_r4_issue_a+cortex_r4_issue_alu,\ + nothing*7,\ + cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_div_10" + "cortex_r4_issue_a+cortex_r4_issue_alu,\ + nothing*8,\ + cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_load_store" + "cortex_r4_issue_a+cortex_r4_issue_c") +(define_reservation "cortex_r4_load_store_2" + "(cortex_r4_issue_a+cortex_r4_issue_b)*2") +(define_reservation "cortex_r4_branch" "cortex_r4_issue_b") + +;; We assume that all instructions are unconditional. + +;; Data processing instructions. Moves without shifts are kept separate +;; for the purposes of the dual-issue constraints above. +(define_insn_reservation "cortex_r4_alu" 2 + (and (eq_attr "tune_cortexr4" "yes") + (and (eq_attr "type" "alu") + (not (eq_attr "insn" "mov")))) + "cortex_r4_alu") + +(define_insn_reservation "cortex_r4_mov" 2 + (and (eq_attr "tune_cortexr4" "yes") + (and (eq_attr "type" "alu") + (eq_attr "insn" "mov"))) + "cortex_r4_mov") + +(define_insn_reservation "cortex_r4_alu_shift" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "alu_shift")) + "cortex_r4_alu") + +(define_insn_reservation "cortex_r4_alu_shift_reg" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "alu_shift_reg")) + "cortex_r4_alu_shift_reg") + +;; An ALU instruction followed by an ALU instruction with no early dep. +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu") +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; In terms of availabilities, a consumer mov could theoretically be +;; issued together with a producer ALU instruction, without stalls. +;; In practice this cannot happen because mov;add (in that order) is not +;; eligible for dual issue and furthermore dual issue is not permitted +;; when a dependency is involved. We therefore note it as latency one. +;; A mov followed by another of the same is also latency one. +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_mov") + +;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are +;; media data processing instructions nor sad instructions. + +;; Multiplication instructions. + +(define_insn_reservation "cortex_r4_mul_4" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "mul,smmul")) + "cortex_r4_mul_2") + +(define_insn_reservation "cortex_r4_mul_3" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "smulxy,smulwy,smuad,smusd")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_mla_4" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "mla,smmla")) + "cortex_r4_mul_2") + +(define_insn_reservation "cortex_r4_mla_3" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_smlald" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "smlald,smlsld")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_mull" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "smull,umull,umlal,umaal")) + "cortex_r4_mul_2") + +;; A multiply or an MLA with a single-register result, followed by an +;; MLA with an accumulator dependency, has its result forwarded. +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3" + "cortex_r4_mla_3,cortex_r4_mla_4" + "arm_mac_accumulator_is_mul_result") + +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4" + "cortex_r4_mla_3,cortex_r4_mla_4" + "arm_mac_accumulator_is_mul_result") + +;; A multiply followed by an ALU instruction needing the multiply +;; result only at ALU has lower latency than one needing it at Shift. +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu") +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; A multiply followed by a mov has one cycle lower latency again. +(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_mov") +(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_mov") + +;; We guess that division of A/B using sdiv or udiv, on average, +;; is performed with B having ten more leading zeros than A. +;; This gives a latency of nine for udiv and ten for sdiv. +(define_insn_reservation "cortex_r4_udiv" 9 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "udiv")) + "cortex_r4_div_9") + +(define_insn_reservation "cortex_r4_sdiv" 10 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "insn" "sdiv")) + "cortex_r4_div_10") + +;; Branches. We assume correct prediction. + +(define_insn_reservation "cortex_r4_branch" 0 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "branch")) + "cortex_r4_branch") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "cortex_r4_call" 32 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "call")) + "nothing") + +;; Status register access instructions are not currently emitted. + +;; Load instructions. +;; We do not model the "addr_md_3cycle" cases and assume that +;; accesses following are correctly aligned. + +(define_insn_reservation "cortex_r4_load_1_2" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "load1,load2")) + "cortex_r4_load_store") + +(define_insn_reservation "cortex_r4_load_3_4" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "load3,load4")) + "cortex_r4_load_store_2") + +;; If a producing load is followed by an instruction consuming only +;; as a Normal Reg, there is one fewer cycle of latency. + +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu") +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu") +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; If a producing load is followed by an instruction consuming only +;; as a Late Reg, there are two fewer cycles of latency. Such consumer +;; instructions are moves and stores. + +(define_bypass 1 "cortex_r4_load_1_2" + "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4") +(define_bypass 2 "cortex_r4_load_3_4" + "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4") + +;; If a producer's result is required as the base or offset of a load, +;; there is an extra cycle latency. + +(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\ + cortex_r4_alu_shift_reg" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +;; Store instructions. + +(define_insn_reservation "cortex_r4_store_1_2" 0 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "store1,store2")) + "cortex_r4_load_store") + +(define_insn_reservation "cortex_r4_store_3_4" 0 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "store3,store4")) + "cortex_r4_load_store_2") + diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md new file mode 100644 index 000000000..8982bc068 --- /dev/null +++ b/gcc/config/arm/cortex-r4f.md @@ -0,0 +1,161 @@ +;; ARM Cortex-R4F VFP pipeline description +;; Copyright (C) 2007, 2008 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; With the exception of simple VMOV , instructions and +;; the accululate operand of a multiply-accumulate instruction, all +;; registers are early registers. Thus base latencies are 1 more than +;; those listed in the TRM. + +;; We use the A, B abd C units from the integer core, plus two additional +;; units to enforce VFP dual issue constraints. + +;; A B C V1 VMLA +;; fcpy 1 2 +;; farith 1 2 1 +;; fmrc 1 2 +;; fconst 1 2 * * +;; ffarith 1 2 * * +;; fmac 1 2 1 2 +;; fdiv 1 2 * +;; f_loads * * * +;; f_stores * * * + +(define_cpu_unit "cortex_r4_v1" "cortex_r4") + +(define_cpu_unit "cortex_r4_vmla" "cortex_r4") + +(define_reservation "cortex_r4_issue_ab" + "(cortex_r4_issue_a|cortex_r4_issue_b)") +(define_reservation "cortex_r4_single_issue" + "cortex_r4_issue_a+cortex_r4_issue_b") + +(define_insn_reservation "cortex_r4_fcpys" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fcpys")) + "cortex_r4_issue_ab") + +(define_insn_reservation "cortex_r4_ffariths" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "ffariths,fconsts,fcmps")) + "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_fariths" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fadds,fmuls")) + "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b") + +(define_insn_reservation "cortex_r4_fmacs" 6 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmacs")) + "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)") + +(define_insn_reservation "cortex_r4_fdivs" 17 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fdivs")) + "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_floads" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_loads")) + "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_fstores" 1 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_stores")) + "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla") + +(define_insn_reservation "cortex_r4_mcr" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "r_2_f")) + "cortex_r4_issue_ab") + +(define_insn_reservation "cortex_r4_mrc" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_2_r")) + "cortex_r4_issue_ab") + +;; Bypasses for normal (not early) regs. +(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr" + "cortex_r4_fcpys") +(define_bypass 2 "cortex_r4_fariths" + "cortex_r4_fcpys") +(define_bypass 5 "cortex_r4_fmacs" + "cortex_r4_fcpys") +(define_bypass 16 "cortex_r4_fdivs" + "cortex_r4_fcpys") + +(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +(define_bypass 2 "cortex_r4_fariths" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +;; mac->mac has an extra forwarding path. +(define_bypass 3 "cortex_r4_fmacs" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +(define_bypass 16 "cortex_r4_fdivs" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") + +;; Double precision operations. These can not dual issue. + +(define_insn_reservation "cortex_r4_fmacd" 20 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmacd")) + "cortex_r4_single_issue*13") + +(define_insn_reservation "cortex_r4_farith" 10 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "faddd,fmuld")) + "cortex_r4_single_issue*3") + +;; FIXME: The short cycle count suggests these instructions complete +;; out of order. Chances are this is not a pipelined operation. +(define_insn_reservation "cortex_r4_fdivd" 97 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fdivd")) + "cortex_r4_single_issue*3") + +(define_insn_reservation "cortex_r4_ffarithd" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "ffarithd,fconstd")) + "cortex_r4_single_issue") + +(define_insn_reservation "cortex_r4_fcmpd" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fcmpd")) + "cortex_r4_single_issue*2") + +(define_insn_reservation "cortex_r4_f_cvt" 8 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_cvt")) + "cortex_r4_single_issue*3") + +(define_insn_reservation "cortex_r4_f_memd" 8 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_loadd,f_stored")) + "cortex_r4_single_issue") + +(define_insn_reservation "cortex_r4_f_flag" 1 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_stores")) + "cortex_r4_single_issue") + diff --git a/gcc/config/arm/crti.asm b/gcc/config/arm/crti.asm new file mode 100644 index 000000000..9454273dd --- /dev/null +++ b/gcc/config/arm/crti.asm @@ -0,0 +1,86 @@ +# Copyright (C) 2001, 2008, 2009, 2010 Free Software Foundation, Inc. +# Written By Nick Clifton +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +# This file just make a stack frame for the contents of the .fini and +# .init sections. Users may put any desired instructions in those +# sections. + +#ifdef __ELF__ +#define TYPE(x) .type x,function +#else +#define TYPE(x) +#endif +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + + # Note - this macro is complemented by the FUNC_END macro + # in crtn.asm. If you change this macro you must also change + # that macro match. +.macro FUNC_START +#ifdef __thumb__ + .thumb + + push {r3, r4, r5, r6, r7, lr} +#else + .arm + # Create a stack frame and save any call-preserved registers + mov ip, sp + stmdb sp!, {r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr, pc} + sub fp, ip, #4 +#endif +.endm + + .section ".init" + .align 2 + .global _init +#ifdef __thumb__ + .thumb_func +#endif + TYPE(_init) +_init: + FUNC_START + + + .section ".fini" + .align 2 + .global _fini +#ifdef __thumb__ + .thumb_func +#endif + TYPE(_fini) +_fini: + FUNC_START + +# end of crti.asm diff --git a/gcc/config/arm/crtn.asm b/gcc/config/arm/crtn.asm new file mode 100644 index 000000000..c7f90814d --- /dev/null +++ b/gcc/config/arm/crtn.asm @@ -0,0 +1,82 @@ +# Copyright (C) 2001, 2004, 2008, 2009, 2010 Free Software Foundation, Inc. +# Written By Nick Clifton +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +# This file just makes sure that the .fini and .init sections do in +# fact return. Users may put any desired instructions in those sections. +# This file is the last thing linked into any executable. + + # Note - this macro is complemented by the FUNC_START macro + # in crti.asm. If you change this macro you must also change + # that macro match. + # + # Note - we do not try any fancy optimizations of the return + # sequences here, it is just not worth it. Instead keep things + # simple. Restore all the save resgisters, including the link + # register and then perform the correct function return instruction. + # We also save/restore r3 to ensure stack alignment. +.macro FUNC_END +#ifdef __thumb__ + .thumb + + pop {r3, r4, r5, r6, r7} + pop {r3} + mov lr, r3 +#else + .arm + + sub sp, fp, #40 + ldmfd sp, {r4, r5, r6, r7, r8, r9, sl, fp, sp, lr} +#endif + +#if defined __THUMB_INTERWORK__ || defined __thumb__ + bx lr +#else + mov pc, lr +#endif +.endm + + + .section ".init" + ;; + FUNC_END + + .section ".fini" + ;; + FUNC_END + +# end of crtn.asm diff --git a/gcc/config/arm/ecos-elf.h b/gcc/config/arm/ecos-elf.h new file mode 100644 index 000000000..9e9fa7046 --- /dev/null +++ b/gcc/config/arm/ecos-elf.h @@ -0,0 +1,27 @@ +/* Definitions for ecos based ARM systems using ELF + Copyright (C) 1998, 2001, 2007 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/ELF Ecos)", stderr); + +#define HAS_INIT_SECTION + +#undef INVOKE_main + diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h new file mode 100644 index 000000000..88400884e --- /dev/null +++ b/gcc/config/arm/elf.h @@ -0,0 +1,166 @@ +/* Definitions of target machine for GNU compiler. + For ARM with ELF obj format. + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007, + 2008 Free Software Foundation, Inc. + Contributed by Philip Blundell and + Catherine Moore + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef OBJECT_FORMAT_ELF + #error elf.h included before elfos.h +#endif + +#ifndef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__ELF__" +#endif + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "subtarget_extra_asm_spec", SUBTARGET_EXTRA_ASM_SPEC }, \ + { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \ + SUBSUBTARGET_EXTRA_SPECS +#endif + +#ifndef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC "" +#endif + +#ifndef SUBTARGET_ASM_FLOAT_SPEC +#define SUBTARGET_ASM_FLOAT_SPEC "\ +%{mapcs-float:-mfloat}" +#endif + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS + +#ifndef ASM_SPEC +#define ASM_SPEC "\ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%{mcpu=*:-mcpu=%*} \ +%{march=*:-march=%*} \ +%{mapcs-*:-mapcs-%*} \ +%(subtarget_asm_float_spec) \ +%{mthumb-interwork:-mthumb-interwork} \ +%{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \ +%{mfloat-abi=*} %{mfpu=*} \ +%(subtarget_extra_asm_spec)" +#endif + +/* The ARM uses @ are a comment character so we need to redefine + TYPE_OPERAND_FMT. */ +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "%%%s" + +/* We might need a ARM specific header to function declarations. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ARM_DECLARE_FUNCTION_NAME (FILE, NAME, DECL); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + ARM_OUTPUT_FN_UNWIND (FILE, TRUE); \ + } \ + while (0) + +/* We might need an ARM specific trailer for function declarations. */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + ARM_OUTPUT_FN_UNWIND (FILE, FALSE); \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ + } \ + while (0) + +/* Define this macro if jump tables (for `tablejump' insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. */ +/* We put ARM and Thumb-2 jump tables in the text section, because it makes + the code more efficient, but for Thumb-1 it's better to put them out of + band unless we are generating compressed tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION \ + (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) + +#ifndef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X" +#endif + +/* Run-time Target Specification. */ +#ifndef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/elf)", stderr) +#endif + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) +#endif + +#ifndef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork", "fno-leading-underscore" } +#endif + +#define TARGET_ASM_FILE_START_APP_OFF true +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + + +/* Output an element in the static constructor array. */ +#undef TARGET_ASM_CONSTRUCTOR +#define TARGET_ASM_CONSTRUCTOR arm_elf_asm_constructor + +#undef TARGET_ASM_DESTRUCTOR +#define TARGET_ASM_DESTRUCTOR arm_elf_asm_destructor + +/* For PIC code we need to explicitly specify (PLT) and (GOT) relocs. */ +#define NEED_PLT_RELOC flag_pic +#define NEED_GOT_RELOC flag_pic + +/* The ELF assembler handles GOT addressing differently to NetBSD. */ +#define GOT_PCREL 0 + +/* Align output to a power of two. Note ".align 0" is redundant, + and also GAS will treat it as ".align 2" which we do not want. */ +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + do \ + { \ + if ((POWER) > 0) \ + fprintf (STREAM, "\t.align\t%d\n", POWER); \ + } \ + while (0) + +/* Horrible hack: We want to prevent some libgcc routines being included + for some multilibs. */ +#ifndef __ARM_ARCH_6M__ +#undef L_fixdfsi +#undef L_fixunsdfsi +#undef L_truncdfsf2 +#undef L_fixsfsi +#undef L_fixunssfsi +#undef L_floatdidf +#undef L_floatdisf +#undef L_floatundidf +#undef L_floatundisf +#endif + diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md new file mode 100644 index 000000000..42eb9b272 --- /dev/null +++ b/gcc/config/arm/fa526.md @@ -0,0 +1,161 @@ +;; Faraday FA526 Pipeline Description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp. +;; +;; Modeled pipeline characteristics: +;; LD -> any use: latency = 3 (2 cycle penalty). +;; ALU -> any use: latency = 2 (1 cycle penalty). + +;; This automaton provides a pipeline description for the Faraday +;; FA526 core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa526") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; S E M W + +(define_cpu_unit "fa526_core" "fa526") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "526_alu_op" 1 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "alu")) + "fa526_core") + +(define_insn_reservation "526_alu_shift_op" 2 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "alu_shift,alu_shift_reg")) + "fa526_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "526_mult1" 2 + (and (eq_attr "tune" "fa526") + (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy")) + "fa526_core") + +(define_insn_reservation "526_mult2" 5 + (and (eq_attr "tune" "fa526") + (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ + umlals,smulls,smlals,smlawx")) + "fa526_core*4") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "526_load1_op" 3 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load1,load_byte")) + "fa526_core") + +(define_insn_reservation "526_load2_op" 4 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load2")) + "fa526_core*2") + +(define_insn_reservation "526_load3_op" 5 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load3")) + "fa526_core*3") + +(define_insn_reservation "526_load4_op" 6 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load4")) + "fa526_core*4") + +(define_insn_reservation "526_store1_op" 0 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store1")) + "fa526_core") + +(define_insn_reservation "526_store2_op" 1 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store2")) + "fa526_core*2") + +(define_insn_reservation "526_store3_op" 2 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store3")) + "fa526_core*3") + +(define_insn_reservation "526_store4_op" 3 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store4")) + "fa526_core*4") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA526 +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "526_branch_op" 0 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "branch")) + "fa526_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. For most cases, the return value is set +;; by a mov instruction, which has 1 cycle latency. +(define_insn_reservation "526_call_op" 1 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "call")) + "fa526_core") + diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md new file mode 100644 index 000000000..06e63d696 --- /dev/null +++ b/gcc/config/arm/fa606te.md @@ -0,0 +1,171 @@ +;; Faraday FA606TE Pipeline Description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA606TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; Modeled pipeline characteristics: +;; LD -> any use: latency = 2 (1 cycle penalty). +;; ALU -> any use: latency = 1 (0 cycle penalty). + +;; This automaton provides a pipeline description for the Faraday +;; FA606TE core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa606te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; E M W + +(define_cpu_unit "fa606te_core" "fa606te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "606te_alu_op" 1 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "alu,alu_shift,alu_shift_reg")) + "fa606te_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "606te_mult1" 2 + (and (eq_attr "tune" "fa606te") + (eq_attr "insn" "smlalxy")) + "fa606te_core") + +(define_insn_reservation "606te_mult2" 3 + (and (eq_attr "tune" "fa606te") + (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy")) + "fa606te_core*2") + +(define_insn_reservation "606te_mult3" 4 + (and (eq_attr "tune" "fa606te") + (eq_attr "insn" "mul,mla,muls,mlas")) + "fa606te_core*3") + +(define_insn_reservation "606te_mult4" 5 + (and (eq_attr "tune" "fa606te") + (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals")) + "fa606te_core*4") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "606te_load1_op" 2 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load1,load_byte")) + "fa606te_core") + +(define_insn_reservation "606te_load2_op" 3 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load2")) + "fa606te_core*2") + +(define_insn_reservation "606te_load3_op" 4 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load3")) + "fa606te_core*3") + +(define_insn_reservation "606te_load4_op" 5 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load4")) + "fa606te_core*4") + +(define_insn_reservation "606te_store1_op" 0 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store1")) + "fa606te_core") + +(define_insn_reservation "606te_store2_op" 1 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store2")) + "fa606te_core*2") + +(define_insn_reservation "606te_store3_op" 2 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store3")) + "fa606te_core*3") + +(define_insn_reservation "606te_store4_op" 3 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store4")) + "fa606te_core*4") + + +;;(define_insn_reservation "606te_ldm_op" 9 +;; (and (eq_attr "tune" "fa606te") +;; (eq_attr "type" "load2,load3,load4,store2,store3,store4")) +;; "fa606te_core*7") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA606TE +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "606te_branch_op" 0 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "branch")) + "fa606te_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. For most cases, the return value is set +;; by a mov instruction, which has 1 cycle latency. +(define_insn_reservation "606te_call_op" 1 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "call")) + "fa606te_core") + diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md new file mode 100644 index 000000000..7fe1c8724 --- /dev/null +++ b/gcc/config/arm/fa626te.md @@ -0,0 +1,165 @@ +;; Faraday FA626TE Pipeline Description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA626TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; Modeled pipeline characteristics: +;; ALU -> simple address LDR/STR: latency = 2 (available after 2 cycles). +;; ALU -> shifted address LDR/STR: latency = 3. +;; ( extra 1 cycle unavoidable stall). +;; ALU -> other use: latency = 2 (available after 2 cycles). +;; LD -> simple address LDR/STR: latency = 3 (available after 3 cycles). +;; LD -> shifted address LDR/STR: latency = 4 +;; ( extra 1 cycle unavoidable stall). +;; LD -> any other use: latency = 3 (available after 3 cycles). + +;; This automaton provides a pipeline description for the Faraday +;; FA626TE core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa626te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; S E M W + +(define_cpu_unit "fa626te_core" "fa626te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "626te_alu_op" 1 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "alu")) + "fa626te_core") + +(define_insn_reservation "626te_alu_shift_op" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "alu_shift,alu_shift_reg")) + "fa626te_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "626te_mult1" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy")) + "fa626te_core") + +(define_insn_reservation "626te_mult2" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "insn" "mul,mla")) + "fa626te_core") + +(define_insn_reservation "626te_mult3" 3 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + "fa626te_core*2") + +(define_insn_reservation "626te_mult4" 4 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "insn" "smulls,smlals,umulls,umlals")) + "fa626te_core*3") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "626te_load1_op" 3 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "load1,load_byte")) + "fa626te_core") + +(define_insn_reservation "626te_load2_op" 4 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "load2,load3")) + "fa626te_core*2") + +(define_insn_reservation "626te_load3_op" 5 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "load4")) + "fa626te_core*3") + +(define_insn_reservation "626te_store1_op" 0 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "store1")) + "fa626te_core") + +(define_insn_reservation "626te_store2_op" 1 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "store2,store3")) + "fa626te_core*2") + +(define_insn_reservation "626te_store3_op" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "store4")) + "fa626te_core*3") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA626TE +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "626te_branch_op" 0 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "branch")) + "fa626te_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. +(define_insn_reservation "626te_call_op" 1 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "call")) + "fa626te_core") + diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md new file mode 100644 index 000000000..3c33d5971 --- /dev/null +++ b/gcc/config/arm/fa726te.md @@ -0,0 +1,218 @@ +;; Faraday FA726TE Pipeline Description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; This automaton provides a pipeline description for the Faraday +;; FA726TE core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa726te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; E1 E2 E3 E4 E5 WB +;;______________________________________________________ +;; +;; <-------------- LD/ST -----------> +;; shifter + LU <-- AU --> +;; <-- AU --> shifter + LU CPSR (Pipe 0) +;;______________________________________________________ +;; +;; <---------- MUL ---------> +;; shifter + LU <-- AU --> +;; <-- AU --> shifter + LU CPSR (Pipe 1) + + +(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te") +(define_cpu_unit "fa726te_mac_pipe" "fa726te") +(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te") + +;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly +;; improve code quality. +(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te") +(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te") + +(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)") +;; Reservation to restrict issue to 1. +(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; Move instructions. +(define_insn_reservation "726te_shift_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "insn" "mov,mvn")) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + +;; ALU operations with no shifted operand will finished in 1 cycle +;; Other ALU instructions 2 cycles. +(define_insn_reservation "726te_alu_op" 1 + (and (eq_attr "tune" "fa726te") + (and (eq_attr "type" "alu") + (not (eq_attr "insn" "mov,mvn")))) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + +;; ALU operations with a shift-by-register operand. +;; These really stall in the decoder, in order to read the shift value +;; in the first cycle. If the instruction uses both shifter and AU, +;; it takes 3 cycles. +(define_insn_reservation "726te_alu_shift_op" 3 + (and (eq_attr "tune" "fa726te") + (and (eq_attr "type" "alu_shift") + (not (eq_attr "insn" "mov,mvn")))) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + +(define_insn_reservation "726te_alu_shift_reg_op" 3 + (and (eq_attr "tune" "fa726te") + (and (eq_attr "type" "alu_shift_reg") + (not (eq_attr "insn" "mov,mvn")))) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. Multiply operations occur in both the execute and memory +;; stages of the pipeline + +(define_insn_reservation "726te_mult_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\ + umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy")) + "fa726te_issue+fa726te_mac_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; Loads with a shifted offset take 3 cycles, and are (a) probably the +;; most common and (b) the pessimistic assumption will lead to fewer stalls. + +;; Scalar loads are pipelined in FA726TE LSU pipe. +;; Here we model the resource conflict between Load@E3-stage & Store@W-stage. +;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the +;; same "bundle", and the 2nd load will introudce another ISSUE stall but is +;; still ok to execute (and may be benefical sometimes). + +(define_insn_reservation "726te_load1_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "load1,load_byte")) + "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\ + | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)") + +(define_insn_reservation "726te_store1_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "store1")) + "fa726te_blockage*2") + +;; Load/Store Multiple blocks all pipelines in EX stages until WB. +;; No other instructions can be issued together. Since they essentially +;; prevent all scheduling opportunities, we model them together here. + +;; The LDM is breaking into multiple load instructions, later instruction in +;; the pipe 1 is stalled. +(define_insn_reservation "726te_ldm2_op" 4 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "load2,load3")) + "fa726te_blockage*4") + +(define_insn_reservation "726te_ldm3_op" 5 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "load4")) + "fa726te_blockage*5") + +(define_insn_reservation "726te_stm2_op" 2 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "store2,store3")) + "fa726te_blockage*3") + +(define_insn_reservation "726te_stm3_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "store4")) + "fa726te_blockage*4") + +(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\ + 726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep") +(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\ + 726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op" + "arm_no_early_store_addr_dep") +(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op") +(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op" + "726te_shift_op,726te_alu_op") +(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op" + "726te_alu_shift_op" "arm_no_early_alu_shift_dep") +(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op" + "726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep") +(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op") + +(define_bypass 4 "726te_load1_op" "726te_mult_op") +(define_bypass 5 "726te_ldm2_op" "726te_mult_op") +(define_bypass 6 "726te_ldm3_op" "726te_mult_op") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA726TE +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "726te_branch_op" 0 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "branch")) + "fa726te_blockage") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 is ready for int return value. +(define_insn_reservation "726te_call_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "call")) + "fa726te_blockage") + diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md new file mode 100644 index 000000000..9ba33ddec --- /dev/null +++ b/gcc/config/arm/fmp626.md @@ -0,0 +1,182 @@ +;; Faraday FA626TE Pipeline Description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; Pipeline architecture +;; S E M W(Q1) Q2 +;; ___________________________________________ +;; shifter alu +;; mul1 mul2 mul3 +;; ld/st1 ld/st2 ld/st3 ld/st4 ld/st5 + +;; This automaton provides a pipeline description for the Faraday +;; FMP626 core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fmp626") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +(define_cpu_unit "fmp626_core" "fmp626") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "mp626_alu_op" 1 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "alu")) + "fmp626_core") + +(define_insn_reservation "mp626_alu_shift_op" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "alu_shift,alu_shift_reg")) + "fmp626_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "mp626_mult1" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy")) + "fmp626_core") + +(define_insn_reservation "mp626_mult2" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "insn" "mul,mla")) + "fmp626_core") + +(define_insn_reservation "mp626_mult3" 3 + (and (eq_attr "tune" "fmp626") + (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + "fmp626_core*2") + +(define_insn_reservation "mp626_mult4" 4 + (and (eq_attr "tune" "fmp626") + (eq_attr "insn" "smulls,smlals,umulls,umlals")) + "fmp626_core*3") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "mp626_load1_op" 5 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "load1,load_byte")) + "fmp626_core") + +(define_insn_reservation "mp626_load2_op" 6 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "load2,load3")) + "fmp626_core*2") + +(define_insn_reservation "mp626_load3_op" 7 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "load4")) + "fmp626_core*3") + +(define_insn_reservation "mp626_store1_op" 0 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "store1")) + "fmp626_core") + +(define_insn_reservation "mp626_store2_op" 1 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "store2,store3")) + "fmp626_core*2") + +(define_insn_reservation "mp626_store3_op" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "store4")) + "fmp626_core*3") + +(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op" + "mp626_store1_op,mp626_store2_op,mp626_store3_op" + "arm_no_early_store_addr_dep") +(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\ + mp626_mult3,mp626_mult4" "mp626_store1_op" + "arm_no_early_store_addr_dep") +(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op") +(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op") +(define_bypass 2 "mp626_mult3" "mp626_alu_op") +(define_bypass 3 "mp626_mult4" "mp626_alu_op") +(define_bypass 4 "mp626_load1_op" "mp626_alu_op") +(define_bypass 5 "mp626_load2_op" "mp626_alu_op") +(define_bypass 6 "mp626_load3_op" "mp626_alu_op") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FMP626 +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "mp626_branch_op" 0 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "branch")) + "fmp626_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. +(define_insn_reservation "mp626_call_op" 1 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "call")) + "fmp626_core") + diff --git a/gcc/config/arm/fp16.c b/gcc/config/arm/fp16.c new file mode 100644 index 000000000..936caeb78 --- /dev/null +++ b/gcc/config/arm/fp16.c @@ -0,0 +1,145 @@ +/* Half-float conversion routines. + + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +static inline unsigned short +__gnu_f2h_internal(unsigned int a, int ieee) +{ + unsigned short sign = (a >> 16) & 0x8000; + int aexp = (a >> 23) & 0xff; + unsigned int mantissa = a & 0x007fffff; + unsigned int mask; + unsigned int increment; + + if (aexp == 0xff) + { + if (!ieee) + return sign; + return sign | 0x7e00 | (mantissa >> 13); + } + + if (aexp == 0 && mantissa == 0) + return sign; + + aexp -= 127; + + /* Decimal point between bits 22 and 23. */ + mantissa |= 0x00800000; + if (aexp < -14) + { + mask = 0x007fffff; + if (aexp < -25) + aexp = -26; + else if (aexp != -25) + mask >>= 24 + aexp; + } + else + mask = 0x00001fff; + + /* Round. */ + if (mantissa & mask) + { + increment = (mask + 1) >> 1; + if ((mantissa & mask) == increment) + increment = mantissa & (increment << 1); + mantissa += increment; + if (mantissa >= 0x01000000) + { + mantissa >>= 1; + aexp++; + } + } + + if (ieee) + { + if (aexp > 15) + return sign | 0x7c00; + } + else + { + if (aexp > 16) + return sign | 0x7fff; + } + + if (aexp < -24) + return sign; + + if (aexp < -14) + { + mantissa >>= -14 - aexp; + aexp = -14; + } + + /* We leave the leading 1 in the mantissa, and subtract one + from the exponent bias to compensate. */ + return sign | (((aexp + 14) << 10) + (mantissa >> 13)); +} + +unsigned int +__gnu_h2f_internal(unsigned short a, int ieee) +{ + unsigned int sign = (unsigned int)(a & 0x8000) << 16; + int aexp = (a >> 10) & 0x1f; + unsigned int mantissa = a & 0x3ff; + + if (aexp == 0x1f && ieee) + return sign | 0x7f800000 | (mantissa << 13); + + if (aexp == 0) + { + int shift; + + if (mantissa == 0) + return sign; + + shift = __builtin_clz(mantissa) - 21; + mantissa <<= shift; + aexp = -shift; + } + + return sign | (((aexp + 0x70) << 23) + (mantissa << 13)); +} + +unsigned short +__gnu_f2h_ieee(unsigned int a) +{ + return __gnu_f2h_internal(a, 1); +} + +unsigned int +__gnu_h2f_ieee(unsigned short a) +{ + return __gnu_h2f_internal(a, 1); +} + +unsigned short +__gnu_f2h_alternative(unsigned int x) +{ + return __gnu_f2h_internal(x, 0); +} + +unsigned int +__gnu_h2f_alternative(unsigned short a) +{ + return __gnu_h2f_internal(a, 0); +} diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md new file mode 100644 index 000000000..6e6dd8d43 --- /dev/null +++ b/gcc/config/arm/fpa.md @@ -0,0 +1,889 @@ +;;- Machine description for FPA co-processor for ARM cpus. +;; Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. +;; Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) +;; and Martin Simmons (@harleqn.co.uk). +;; More major hacks by Richard Earnshaw (rearnsha@arm.com). + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Some FPA mnemonics are ambiguous between conditional infixes and +;; conditional suffixes. All instructions use a conditional infix, +;; even in unified assembly mode. + +;; FPA automaton. +(define_automaton "armfp") + +;; Floating point unit (FPA) +(define_cpu_unit "fpa" "armfp") + +; The fpa10 doesn't really have a memory read unit, but it can start +; to speculatively execute the instruction in the pipeline, provided +; the data is already loaded, so pretend reads have a delay of 2 (and +; that the pipeline is infinite). +(define_cpu_unit "fpa_mem" "arm") + +(define_insn_reservation "fdivx" 71 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "fdivx")) + "core+fpa*69") + +(define_insn_reservation "fdivd" 59 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "fdivd")) + "core+fpa*57") + +(define_insn_reservation "fdivs" 31 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "fdivs")) + "core+fpa*29") + +(define_insn_reservation "fmul" 9 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "fmul")) + "core+fpa*7") + +(define_insn_reservation "ffmul" 6 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "ffmul")) + "core+fpa*4") + +(define_insn_reservation "farith" 4 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "farith")) + "core+fpa*2") + +(define_insn_reservation "ffarith" 2 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "ffarith")) + "core+fpa*2") + +(define_insn_reservation "r_2_f" 5 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "r_2_f")) + "core+fpa*3") + +(define_insn_reservation "f_2_r" 1 + (and (eq_attr "fpu" "fpa") + (eq_attr "type" "f_2_r")) + "core+fpa*2") + +(define_insn_reservation "f_load" 3 + (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_fpa_load")) + "fpa_mem+core*3") + +(define_insn_reservation "f_store" 4 + (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_fpa_store")) + "core*4") + +(define_insn_reservation "r_mem_f" 6 + (and (eq_attr "model_wbuf" "no") + (and (eq_attr "fpu" "fpa") (eq_attr "type" "r_mem_f"))) + "core*6") + +(define_insn_reservation "f_mem_r" 7 + (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_mem_r")) + "core*7") + + +(define_insn "*addsf3_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f,f") + (plus:SF (match_operand:SF 1 "s_register_operand" "%f,f") + (match_operand:SF 2 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + adf%?s\\t%0, %1, %2 + suf%?s\\t%0, %1, #%N2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*adddf3_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f") + (plus:DF (match_operand:DF 1 "s_register_operand" "%f,f") + (match_operand:DF 2 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + adf%?d\\t%0, %1, %2 + suf%?d\\t%0, %1, #%N2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*adddf_esfdf_df_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f,f")) + (match_operand:DF 2 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + adf%?d\\t%0, %1, %2 + suf%?d\\t%0, %1, #%N2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*adddf_df_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (plus:DF (match_operand:DF 1 "s_register_operand" "f") + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "adf%?d\\t%0, %1, %2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*adddf_esfdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "adf%?d\\t%0, %1, %2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*subsf3_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f,f") + (minus:SF (match_operand:SF 1 "arm_float_rhs_operand" "f,G") + (match_operand:SF 2 "arm_float_rhs_operand" "fG,f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + suf%?s\\t%0, %1, %2 + rsf%?s\\t%0, %2, %1" + [(set_attr "type" "farith")] +) + +(define_insn "*subdf3_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f") + (minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "f,G") + (match_operand:DF 2 "arm_float_rhs_operand" "fG,f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + suf%?d\\t%0, %1, %2 + rsf%?d\\t%0, %2, %1" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*subdf_esfdf_df_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (match_operand:DF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "suf%?d\\t%0, %1, %2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*subdf_df_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f") + (minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "f,G") + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f,f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + suf%?d\\t%0, %1, %2 + rsf%?d\\t%0, %2, %1" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*subdf_esfdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "suf%?d\\t%0, %1, %2" + [(set_attr "type" "farith") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsf3_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (mult:SF (match_operand:SF 1 "s_register_operand" "f") + (match_operand:SF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "fml%?s\\t%0, %1, %2" + [(set_attr "type" "ffmul") + (set_attr "predicable" "yes")] +) + +(define_insn "*muldf3_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mult:DF (match_operand:DF 1 "s_register_operand" "f") + (match_operand:DF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "muf%?d\\t%0, %1, %2" + [(set_attr "type" "fmul") + (set_attr "predicable" "yes")] +) + +(define_insn "*muldf_esfdf_df_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (match_operand:DF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "muf%?d\\t%0, %1, %2" + [(set_attr "type" "fmul") + (set_attr "predicable" "yes")] +) + +(define_insn "*muldf_df_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mult:DF (match_operand:DF 1 "s_register_operand" "f") + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "muf%?d\\t%0, %1, %2" + [(set_attr "type" "fmul") + (set_attr "predicable" "yes")] +) + +(define_insn "*muldf_esfdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mult:DF + (float_extend:DF (match_operand:SF 1 "s_register_operand" "f")) + (float_extend:DF (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "muf%?d\\t%0, %1, %2" + [(set_attr "type" "fmul") + (set_attr "predicable" "yes")] +) + +;; Division insns + +(define_insn "*divsf3_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f,f") + (div:SF (match_operand:SF 1 "arm_float_rhs_operand" "f,G") + (match_operand:SF 2 "arm_float_rhs_operand" "fG,f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + fdv%?s\\t%0, %1, %2 + frd%?s\\t%0, %2, %1" + [(set_attr "type" "fdivs") + (set_attr "predicable" "yes")] +) + +(define_insn "*divdf3_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f") + (div:DF (match_operand:DF 1 "arm_float_rhs_operand" "f,G") + (match_operand:DF 2 "arm_float_rhs_operand" "fG,f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + dvf%?d\\t%0, %1, %2 + rdf%?d\\t%0, %2, %1" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*divdf_esfdf_df_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (div:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (match_operand:DF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "dvf%?d\\t%0, %1, %2" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*divdf_df_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (div:DF (match_operand:DF 1 "arm_float_rhs_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "rdf%?d\\t%0, %2, %1" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*divdf_esfdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (div:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "dvf%?d\\t%0, %1, %2" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*modsf3_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (mod:SF (match_operand:SF 1 "s_register_operand" "f") + (match_operand:SF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "rmf%?s\\t%0, %1, %2" + [(set_attr "type" "fdivs") + (set_attr "predicable" "yes")] +) + +(define_insn "*moddf3_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mod:DF (match_operand:DF 1 "s_register_operand" "f") + (match_operand:DF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "rmf%?d\\t%0, %1, %2" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*moddf_esfdf_df_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mod:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (match_operand:DF 2 "arm_float_rhs_operand" "fG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "rmf%?d\\t%0, %1, %2" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*moddf_df_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mod:DF (match_operand:DF 1 "s_register_operand" "f") + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "rmf%?d\\t%0, %1, %2" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*moddf_esfdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (mod:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f")) + (float_extend:DF + (match_operand:SF 2 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "rmf%?d\\t%0, %1, %2" + [(set_attr "type" "fdivd") + (set_attr "predicable" "yes")] +) + +(define_insn "*negsf2_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (neg:SF (match_operand:SF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "mnf%?s\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*negdf2_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (neg:DF (match_operand:DF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "mnf%?d\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*negdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (neg:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "mnf%?d\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*abssf2_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (abs:SF (match_operand:SF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "abs%?s\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*absdf2_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (abs:DF (match_operand:DF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "abs%?d\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*absdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (abs:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "abs%?d\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*sqrtsf2_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "sqt%?s\\t%0, %1" + [(set_attr "type" "float_em") + (set_attr "predicable" "yes")] +) + +(define_insn "*sqrtdf2_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "sqt%?d\\t%0, %1" + [(set_attr "type" "float_em") + (set_attr "predicable" "yes")] +) + +(define_insn "*sqrtdf_esfdf_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (sqrt:DF (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "sqt%?d\\t%0, %1" + [(set_attr "type" "float_em") + (set_attr "predicable" "yes")] +) + +(define_insn "*floatsisf2_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (float:SF (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "flt%?s\\t%0, %1" + [(set_attr "type" "r_2_f") + (set_attr "predicable" "yes")] +) + +(define_insn "*floatsidf2_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (float:DF (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "flt%?d\\t%0, %1" + [(set_attr "type" "r_2_f") + (set_attr "predicable" "yes")] +) + +(define_insn "*fix_truncsfsi2_fpa" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "fix%?z\\t%0, %1" + [(set_attr "type" "f_2_r") + (set_attr "predicable" "yes")] +) + +(define_insn "*fix_truncdfsi2_fpa" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "fix%?z\\t%0, %1" + [(set_attr "type" "f_2_r") + (set_attr "predicable" "yes")] +) + +(define_insn "*truncdfsf2_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f") + (float_truncate:SF + (match_operand:DF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "mvf%?s\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*extendsfdf2_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "f")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "mvf%?d\\t%0, %1" + [(set_attr "type" "ffarith") + (set_attr "predicable" "yes")] +) + +(define_insn "*movsf_fpa" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f, m,f,r,r,r, m") + (match_operand:SF 1 "general_operand" "fG,H,mE,f,r,f,r,mE,r"))] + "TARGET_ARM + && TARGET_HARD_FLOAT && TARGET_FPA + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], SFmode))" + "@ + mvf%?s\\t%0, %1 + mnf%?s\\t%0, #%N1 + ldf%?s\\t%0, %1 + stf%?s\\t%1, %0 + str%?\\t%1, [%|sp, #-4]!\;ldf%?s\\t%0, [%|sp], #4 + stf%?s\\t%1, [%|sp, #-4]!\;ldr%?\\t%0, [%|sp], #4 + mov%?\\t%0, %1 + ldr%?\\t%0, %1\\t%@ float + str%?\\t%1, %0\\t%@ float" + [(set_attr "length" "4,4,4,4,8,8,4,4,4") + (set_attr "predicable" "yes") + (set_attr "type" + "ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r,*,load1,store1") + (set_attr "pool_range" "*,*,1024,*,*,*,*,4096,*") + (set_attr "neg_pool_range" "*,*,1012,*,*,*,*,4084,*")] +) + +(define_insn "*movdf_fpa" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=r,Q,r,m,r, f, f,f, m,!f,!r") + (match_operand:DF 1 "general_operand" + "Q, r,r,r,mF,fG,H,mF,f,r, f"))] + "TARGET_ARM + && TARGET_HARD_FLOAT && TARGET_FPA + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + default: + case 0: return \"ldm%(ia%)\\t%m1, %M0\\t%@ double\"; + case 1: return \"stm%(ia%)\\t%m0, %M1\\t%@ double\"; + case 2: return \"#\"; + case 3: case 4: return output_move_double (operands); + case 5: return \"mvf%?d\\t%0, %1\"; + case 6: return \"mnf%?d\\t%0, #%N1\"; + case 7: return \"ldf%?d\\t%0, %1\"; + case 8: return \"stf%?d\\t%1, %0\"; + case 9: return output_mov_double_fpa_from_arm (operands); + case 10: return output_mov_double_arm_from_fpa (operands); + } + } + " + [(set_attr "length" "4,4,8,8,8,4,4,4,4,8,8") + (set_attr "predicable" "yes") + (set_attr "type" + "load1,store2,*,store2,load1,ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r") + (set_attr "pool_range" "*,*,*,*,1020,*,*,1024,*,*,*") + (set_attr "neg_pool_range" "*,*,*,*,1008,*,*,1008,*,*,*")] +) + +;; We treat XFmode as meaning 'internal format'. It's the right size and we +;; don't use it for anything else. We only support moving between FPA +;; registers and moving an FPA register to/from memory. +(define_insn "*movxf_fpa" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,f,m") + (match_operand:XF 1 "general_operand" "f,m,f"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_FPA + && (register_operand (operands[0], XFmode) + || register_operand (operands[1], XFmode))" + "* + switch (which_alternative) + { + default: + case 0: return \"mvf%?e\\t%0, %1\"; + case 1: if (TARGET_FPA_EMU2) + return \"ldf%?e\\t%0, %1\"; + return \"lfm%?\\t%0, 1, %1\"; + case 2: if (TARGET_FPA_EMU2) + return \"stf%?e\\t%1, %0\"; + return \"sfm%?\\t%1, 1, %0\"; + } + " + [(set_attr "length" "4,4,4") + (set_attr "predicable" "yes") + (set_attr "type" "ffarith,f_fpa_load,f_fpa_store")] +) + +;; stfs/ldfs always use a conditional infix. This works around the +;; ambiguity between "stf pl s" and "sftp ls". +(define_insn "*thumb2_movsf_fpa" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f, m,f,r,r,r, m") + (match_operand:SF 1 "general_operand" "fG,H,mE,f,r,f,r,mE,r"))] + "TARGET_THUMB2 + && TARGET_HARD_FLOAT && TARGET_FPA + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], SFmode))" + "@ + mvf%?s\\t%0, %1 + mnf%?s\\t%0, #%N1 + ldf%?s\\t%0, %1 + stf%?s\\t%1, %0 + str%?\\t%1, [%|sp, #-4]!\;ldf%?s\\t%0, [%|sp], #4 + stf%?s\\t%1, [%|sp, #-4]!\;ldr%?\\t%0, [%|sp], #4 + mov%?\\t%0, %1 @bar + ldr%?\\t%0, %1\\t%@ float + str%?\\t%1, %0\\t%@ float" + [(set_attr "length" "4,4,4,4,8,8,4,4,4") + (set_attr "ce_count" "1,1,1,1,2,2,1,1,1") + (set_attr "predicable" "yes") + (set_attr "type" + "ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r,*,load1,store1") + (set_attr "pool_range" "*,*,1024,*,*,*,*,4096,*") + (set_attr "neg_pool_range" "*,*,1012,*,*,*,*,0,*")] +) + +;; Not predicable because we don't know the number of instructions. +(define_insn "*thumb2_movdf_fpa" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=r,Q,r,m,r, f, f,f, m,!f,!r") + (match_operand:DF 1 "general_operand" + "Q, r,r,r,mF,fG,H,mF,f,r, f"))] + "TARGET_THUMB2 + && TARGET_HARD_FLOAT && TARGET_FPA + && (GET_CODE (operands[0]) != MEM + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + default: + case 0: return \"ldm%(ia%)\\t%m1, %M0\\t%@ double\"; + case 1: return \"stm%(ia%)\\t%m0, %M1\\t%@ double\"; + case 2: case 3: case 4: return output_move_double (operands); + case 5: return \"mvf%?d\\t%0, %1\"; + case 6: return \"mnf%?d\\t%0, #%N1\"; + case 7: return \"ldf%?d\\t%0, %1\"; + case 8: return \"stf%?d\\t%1, %0\"; + case 9: return output_mov_double_fpa_from_arm (operands); + case 10: return output_mov_double_arm_from_fpa (operands); + } + } + " + [(set_attr "length" "4,4,8,8,8,4,4,4,4,8,8") + (set_attr "type" + "load1,store2,*,store2,load1,ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r") + (set_attr "pool_range" "*,*,*,*,4092,*,*,1024,*,*,*") + (set_attr "neg_pool_range" "*,*,*,*,0,*,*,1020,*,*,*")] +) + +;; Saving and restoring the floating point registers in the prologue should +;; be done in XFmode, even though we don't support that for anything else +;; (Well, strictly it's 'internal representation', but that's effectively +;; XFmode). +;; Not predicable because we don't know the number of instructions. + +(define_insn "*thumb2_movxf_fpa" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,f,f,m,f,r,r") + (match_operand:XF 1 "general_operand" "fG,H,m,f,r,f,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_FPA && reload_completed" + "* + switch (which_alternative) + { + default: + case 0: return \"mvf%?e\\t%0, %1\"; + case 1: return \"mnf%?e\\t%0, #%N1\"; + case 2: return \"ldf%?e\\t%0, %1\"; + case 3: return \"stf%?e\\t%1, %0\"; + case 4: return output_mov_long_double_fpa_from_arm (operands); + case 5: return output_mov_long_double_arm_from_fpa (operands); + case 6: return output_mov_long_double_arm_from_arm (operands); + } + " + [(set_attr "length" "4,4,4,4,8,8,12") + (set_attr "type" "ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r,*") + (set_attr "pool_range" "*,*,1024,*,*,*,*") + (set_attr "neg_pool_range" "*,*,1004,*,*,*,*")] +) + +(define_insn "*cmpsf_fpa" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:SF 0 "s_register_operand" "f,f") + (match_operand:SF 1 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + cmf%?\\t%0, %1 + cnf%?\\t%0, #%N1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmpdf_fpa" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "f,f") + (match_operand:DF 1 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + cmf%?\\t%0, %1 + cnf%?\\t%0, #%N1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmpesfdf_df_fpa" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (float_extend:DF + (match_operand:SF 0 "s_register_operand" "f,f")) + (match_operand:DF 1 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + cmf%?\\t%0, %1 + cnf%?\\t%0, #%N1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmpdf_esfdf_fpa" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "f") + (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "cmf%?\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmpsf_trap_fpa" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:SF 0 "s_register_operand" "f,f") + (match_operand:SF 1 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + cmf%?e\\t%0, %1 + cnf%?e\\t%0, #%N1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmpdf_trap_fpa" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "f,f") + (match_operand:DF 1 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + cmf%?e\\t%0, %1 + cnf%?e\\t%0, #%N1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmp_esfdf_df_trap_fpa" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (float_extend:DF + (match_operand:SF 0 "s_register_operand" "f,f")) + (match_operand:DF 1 "arm_float_add_operand" "fG,H")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + cmf%?e\\t%0, %1 + cnf%?e\\t%0, #%N1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*cmp_df_esfdf_trap_fpa" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "f") + (float_extend:DF + (match_operand:SF 1 "s_register_operand" "f"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA" + "cmf%?e\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "type" "f_2_r")] +) + +(define_insn "*movsfcc_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f,f,f,f,f,f,f,f") + (if_then_else:SF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H") + (match_operand:SF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + mvf%D3s\\t%0, %2 + mnf%D3s\\t%0, #%N2 + mvf%d3s\\t%0, %1 + mnf%d3s\\t%0, #%N1 + mvf%d3s\\t%0, %1\;mvf%D3s\\t%0, %2 + mvf%d3s\\t%0, %1\;mnf%D3s\\t%0, #%N2 + mnf%d3s\\t%0, #%N1\;mvf%D3s\\t%0, %2 + mnf%d3s\\t%0, #%N1\;mnf%D3s\\t%0, #%N2" + [(set_attr "length" "4,4,4,4,8,8,8,8") + (set_attr "type" "ffarith") + (set_attr "conds" "use")] +) + +(define_insn "*movdfcc_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f,f,f,f,f,f,f") + (if_then_else:DF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H") + (match_operand:DF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + mvf%D3d\\t%0, %2 + mnf%D3d\\t%0, #%N2 + mvf%d3d\\t%0, %1 + mnf%d3d\\t%0, #%N1 + mvf%d3d\\t%0, %1\;mvf%D3d\\t%0, %2 + mvf%d3d\\t%0, %1\;mnf%D3d\\t%0, #%N2 + mnf%d3d\\t%0, #%N1\;mvf%D3d\\t%0, %2 + mnf%d3d\\t%0, #%N1\;mnf%D3d\\t%0, #%N2" + [(set_attr "length" "4,4,4,4,8,8,8,8") + (set_attr "type" "ffarith") + (set_attr "conds" "use")] +) + +(define_insn "*thumb2_movsfcc_fpa" + [(set (match_operand:SF 0 "s_register_operand" "=f,f,f,f,f,f,f,f") + (if_then_else:SF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H") + (match_operand:SF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + it\\t%D3\;mvf%D3s\\t%0, %2 + it\\t%D3\;mnf%D3s\\t%0, #%N2 + it\\t%d3\;mvf%d3s\\t%0, %1 + it\\t%d3\;mnf%d3s\\t%0, #%N1 + ite\\t%d3\;mvf%d3s\\t%0, %1\;mvf%D3s\\t%0, %2 + ite\\t%d3\;mvf%d3s\\t%0, %1\;mnf%D3s\\t%0, #%N2 + ite\\t%d3\;mnf%d3s\\t%0, #%N1\;mvf%D3s\\t%0, %2 + ite\\t%d3\;mnf%d3s\\t%0, #%N1\;mnf%D3s\\t%0, #%N2" + [(set_attr "length" "6,6,6,6,10,10,10,10") + (set_attr "type" "ffarith") + (set_attr "conds" "use")] +) + +(define_insn "*thumb2_movdfcc_fpa" + [(set (match_operand:DF 0 "s_register_operand" "=f,f,f,f,f,f,f,f") + (if_then_else:DF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H") + (match_operand:DF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_FPA" + "@ + it\\t%D3\;mvf%D3d\\t%0, %2 + it\\t%D3\;mnf%D3d\\t%0, #%N2 + it\\t%d3\;mvf%d3d\\t%0, %1 + it\\t%d3\;mnf%d3d\\t%0, #%N1 + ite\\t%d3\;mvf%d3d\\t%0, %1\;mvf%D3d\\t%0, %2 + ite\\t%d3\;mvf%d3d\\t%0, %1\;mnf%D3d\\t%0, #%N2 + ite\\t%d3\;mnf%d3d\\t%0, #%N1\;mvf%D3d\\t%0, %2 + ite\\t%d3\;mnf%d3d\\t%0, #%N1\;mnf%D3d\\t%0, #%N2" + [(set_attr "length" "6,6,6,6,10,10,10,10") + (set_attr "type" "ffarith") + (set_attr "conds" "use")] +) + diff --git a/gcc/config/arm/freebsd.h b/gcc/config/arm/freebsd.h new file mode 100644 index 000000000..701bb1499 --- /dev/null +++ b/gcc/config/arm/freebsd.h @@ -0,0 +1,67 @@ +/* Definitions for StrongARM running FreeBSD using the ELF format + Copyright (C) 2001, 2004, 2007, 2010 Free Software Foundation, Inc. + Contributed by David E. O'Brien and BSDi. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER } + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC FBSD_CPP_SPEC + +#undef LINK_SPEC +#define LINK_SPEC " \ + %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \ + %{v:-V} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(fbsd_dynamic_linker) } \ + %{static:-Bstatic}} \ + %{symbolic:-Bsymbolic}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with . GCC defaults come from c-decl.c, + c-common.c, and config//.h. */ + +/* arm.h gets this wrong for FreeBSD. We use the GCC defaults instead. */ + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* We use the GCC defaults here. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#undef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_strongarm + +#undef TARGET_VERSION +#define TARGET_VERSION fprintf (stderr, " (FreeBSD/StrongARM ELF)"); diff --git a/gcc/config/arm/gentune.sh b/gcc/config/arm/gentune.sh new file mode 100755 index 000000000..a873973e3 --- /dev/null +++ b/gcc/config/arm/gentune.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# Generate arm-tune.md, a file containing the tune attribute from the list of +# CPUs in arm-cores.def +# Copyright (C) 2004, 2009 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +echo ";; -*- buffer-read-only: t -*-" +echo ";; Generated automatically by gentune.sh from arm-cores.def" + +allcores=`awk -F'[(, ]+' '/^ARM_CORE/ { cores = cores$3"," } END { print cores } ' $1` + +echo "(define_attr \"tune\"" +echo " \"$allcores\"" | sed -e 's/,"$/"/' +echo " (const (symbol_ref \"((enum attr_tune) arm_tune)\")))" diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S new file mode 100644 index 000000000..eb0c38632 --- /dev/null +++ b/gcc/config/arm/ieee754-df.S @@ -0,0 +1,1447 @@ +/* ieee754-df.S double-precision floating point support for ARM + + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * For slightly simpler code please see the single precision version + * of this file. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + + +@ For FPA, float words are always big-endian. +@ For VFP, floats words follow the memory system mode. +#if defined(__VFP_FP__) && !defined(__ARMEB__) +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#else +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#endif + + +#ifdef L_arm_negdf2 + +ARM_FUNC_START negdf2 +ARM_FUNC_ALIAS aeabi_dneg negdf2 + + @ flip sign bit + eor xh, xh, #0x80000000 + RET + + FUNC_END aeabi_dneg + FUNC_END negdf2 + +#endif + +#ifdef L_arm_addsubdf3 + +ARM_FUNC_START aeabi_drsub + + eor xh, xh, #0x80000000 @ flip sign bit of first arg + b 1f + +ARM_FUNC_START subdf3 +ARM_FUNC_ALIAS aeabi_dsub subdf3 + + eor yh, yh, #0x80000000 @ flip sign bit of second arg +#if defined(__INTERWORKING_STUBS__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START adddf3 +ARM_FUNC_ALIAS aeabi_dadd adddf3 + +1: do_push {r4, r5, lr} + + @ Look for zeroes, equal values, INF, or NAN. + shift1 lsl, r4, xh, #1 + shift1 lsl, r5, yh, #1 + teq r4, r5 + do_it eq + teqeq xl, yl + do_it ne, ttt + COND(orr,s,ne) ip, r4, xl + COND(orr,s,ne) ip, r5, yl + COND(mvn,s,ne) ip, r4, asr #21 + COND(mvn,s,ne) ip, r5, asr #21 + beq LSYM(Lad_s) + + @ Compute exponent difference. Make largest exponent in r4, + @ corresponding arg in xh-xl, and positive exponent difference in r5. + shift1 lsr, r4, r4, #21 + rsbs r5, r4, r5, lsr #21 + do_it lt + rsblt r5, r5, #0 + ble 1f + add r4, r4, r5 + eor yl, xl, yl + eor yh, xh, yh + eor xl, yl, xl + eor xh, yh, xh + eor yl, xl, yl + eor yh, xh, yh +1: + @ If exponent difference is too large, return largest argument + @ already in xh-xl. We need up to 54 bit to handle proper rounding + @ of 0x1p54 - 1.1. + cmp r5, #54 + do_it hi + RETLDM "r4, r5" hi + + @ Convert mantissa to signed integer. + tst xh, #0x80000000 + mov xh, xh, lsl #12 + mov ip, #0x00100000 + orr xh, ip, xh, lsr #12 + beq 1f +#if defined(__thumb2__) + negs xl, xl + sbc xh, xh, xh, lsl #1 +#else + rsbs xl, xl, #0 + rsc xh, xh, #0 +#endif +1: + tst yh, #0x80000000 + mov yh, yh, lsl #12 + orr yh, ip, yh, lsr #12 + beq 1f +#if defined(__thumb2__) + negs yl, yl + sbc yh, yh, yh, lsl #1 +#else + rsbs yl, yl, #0 + rsc yh, yh, #0 +#endif +1: + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r4, r5 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Compensate for the exponent overlapping the mantissa MSB added later + sub r4, r4, #1 + + @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. + rsbs lr, r5, #32 + blt 1f + shift1 lsl, ip, yl, lr + shiftop adds xl xl yl lsr r5 yl + adc xh, xh, #0 + shiftop adds xl xl yh lsl lr yl + shiftop adcs xh xh yh asr r5 yh + b 2f +1: sub r5, r5, #32 + add lr, lr, #32 + cmp yl, #1 + shift1 lsl,ip, yh, lr + do_it cs + orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later + shiftop adds xl xl yh asr r5 yh + adcs xh, xh, yh, asr #31 +2: + @ We now have a result in xh-xl-ip. + @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) + and r5, xh, #0x80000000 + bpl LSYM(Lad_p) +#if defined(__thumb2__) + mov lr, #0 + negs ip, ip + sbcs xl, lr, xl + sbc xh, lr, xh +#else + rsbs ip, ip, #0 + rscs xl, xl, #0 + rsc xh, xh, #0 +#endif + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp xh, #0x00100000 + bcc LSYM(Lad_a) + cmp xh, #0x00200000 + bcc LSYM(Lad_e) + + @ Result needs to be shifted right. + movs xh, xh, lsr #1 + movs xl, xl, rrx + mov ip, ip, rrx + add r4, r4, #1 + + @ Make sure we did not bust our exponent. + mov r2, r4, lsl #21 + cmn r2, #(2 << 21) + bcs LSYM(Lad_o) + + @ Our result is now properly aligned into xh-xl, remaining bits in ip. + @ Round with MSB of ip. If halfway between two numbers, round towards + @ LSB of xl = 0. + @ Pack final result together. +LSYM(Lad_e): + cmp ip, #0x80000000 + do_it eq + COND(mov,s,eq) ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + orr xh, xh, r5 + RETLDM "r4, r5" + + @ Result must be shifted left and exponent adjusted. +LSYM(Lad_a): + movs ip, ip, lsl #1 + adcs xl, xl, xl + adc xh, xh, xh + tst xh, #0x00100000 + sub r4, r4, #1 + bne LSYM(Lad_e) + + @ No rounding necessary since ip will always be 0 at this point. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + teq xh, #0 + movne r3, #20 + moveq r3, #52 + moveq xh, xl + moveq xl, #0 + mov r2, xh + cmp r2, #(1 << 16) + movhs r2, r2, lsr #16 + subhs r3, r3, #16 + cmp r2, #(1 << 8) + movhs r2, r2, lsr #8 + subhs r3, r3, #8 + cmp r2, #(1 << 4) + movhs r2, r2, lsr #4 + subhs r3, r3, #4 + cmp r2, #(1 << 2) + subhs r3, r3, #2 + sublo r3, r3, r2, lsr #1 + sub r3, r3, r2, lsr #3 + +#else + + teq xh, #0 + do_it eq, t + moveq xh, xl + moveq xl, #0 + clz r3, xh + do_it eq + addeq r3, r3, #32 + sub r3, r3, #11 + +#endif + + @ determine how to shift the value. + subs r2, r3, #32 + bge 2f + adds r2, r2, #12 + ble 1f + + @ shift value left 21 to 31 bits, or actually right 11 to 1 bits + @ since a register switch happened above. + add ip, r2, #20 + rsb r2, r2, #12 + shift1 lsl, xl, xh, ip + shift1 lsr, xh, xh, r2 + b 3f + + @ actually shift value left 1 to 20 bits, which might also represent + @ 32 to 52 bits if counting the register switch that happened earlier. +1: add r2, r2, #20 +2: do_it le + rsble ip, r2, #32 + shift1 lsl, xh, xh, r2 +#if defined(__thumb2__) + lsr ip, xl, ip + itt le + orrle xh, xh, ip + lslle xl, xl, r2 +#else + orrle xh, xh, xl, lsr ip + movle xl, xl, lsl r2 +#endif + + @ adjust exponent accordingly. +3: subs r4, r4, r3 + do_it ge, tt + addge xh, xh, r4, lsl #20 + orrge xh, xh, r5 + RETLDM "r4, r5" ge + + @ Exponent too small, denormalize result. + @ Find out proper shift value. + mvn r4, r4 + subs r4, r4, #31 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, sign is in r5. + add r4, r4, #20 + rsb r2, r4, #32 + shift1 lsr, xl, xl, r4 + shiftop orr xl xl xh lsl r2 yh + shiftop orr xh r5 xh lsr r4 yh + RETLDM "r4, r5" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. +1: rsb r4, r4, #12 + rsb r2, r4, #32 + shift1 lsr, xl, xl, r2 + shiftop orr xl xl xh lsl r4 yh + mov xh, r5 + RETLDM "r4, r5" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. +2: shift1 lsr, xl, xh, r4 + mov xh, r5 + RETLDM "r4, r5" + + @ Adjust exponents for denormalized arguments. + @ Note that r4 must not remain equal to 0. +LSYM(Lad_d): + teq r4, #0 + eor yh, yh, #0x00100000 + do_it eq, te + eoreq xh, xh, #0x00100000 + addeq r4, r4, #1 + subne r5, r5, #1 + b LSYM(Lad_x) + + +LSYM(Lad_s): + mvns ip, r4, asr #21 + do_it ne + COND(mvn,s,ne) ip, r5, asr #21 + beq LSYM(Lad_i) + + teq r4, r5 + do_it eq + teqeq xl, yl + beq 1f + + @ Result is x + 0.0 = x or 0.0 + y = y. + orrs ip, r4, xl + do_it eq, t + moveq xh, yh + moveq xl, yl + RETLDM "r4, r5" + +1: teq xh, yh + + @ Result is x - x = 0. + do_it ne, tt + movne xh, #0 + movne xl, #0 + RETLDM "r4, r5" ne + + @ Result is x + x = 2x. + movs ip, r4, lsr #21 + bne 2f + movs xl, xl, lsl #1 + adcs xh, xh, xh + do_it cs + orrcs xh, xh, #0x80000000 + RETLDM "r4, r5" +2: adds r4, r4, #(2 << 21) + do_it cc, t + addcc xh, xh, #(1 << 20) + RETLDM "r4, r5" cc + and r5, xh, #0x80000000 + + @ Overflow: return INF. +LSYM(Lad_o): + orr xh, r5, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5" + + @ At least one of x or y is INF/NAN. + @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) + @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) + @ if either is NAN: return NAN + @ if opposite sign: return NAN + @ otherwise return xh-xl (which is INF or -INF) +LSYM(Lad_i): + mvns ip, r4, asr #21 + do_it ne, te + movne xh, yh + movne xl, yl + COND(mvn,s,eq) ip, r5, asr #21 + do_it ne, t + movne yh, xh + movne yl, xl + orrs r4, xl, xh, lsl #12 + do_it eq, te + COND(orr,s,eq) r5, yl, yh, lsl #12 + teqeq xh, yh + orrne xh, xh, #0x00080000 @ quiet NAN + RETLDM "r4, r5" + + FUNC_END aeabi_dsub + FUNC_END subdf3 + FUNC_END aeabi_dadd + FUNC_END adddf3 + +ARM_FUNC_START floatunsidf +ARM_FUNC_ALIAS aeabi_ui2d floatunsidf + + teq r0, #0 + do_it eq, t + moveq r1, #0 + RETc(eq) + do_push {r4, r5, lr} + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + mov r5, #0 @ sign bit is 0 + .ifnc xl, r0 + mov xl, r0 + .endif + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END aeabi_ui2d + FUNC_END floatunsidf + +ARM_FUNC_START floatsidf +ARM_FUNC_ALIAS aeabi_i2d floatsidf + + teq r0, #0 + do_it eq, t + moveq r1, #0 + RETc(eq) + do_push {r4, r5, lr} + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + ands r5, r0, #0x80000000 @ sign bit in r5 + do_it mi + rsbmi r0, r0, #0 @ absolute value + .ifnc xl, r0 + mov xl, r0 + .endif + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END aeabi_i2d + FUNC_END floatsidf + +ARM_FUNC_START extendsfdf2 +ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 + + movs r2, r0, lsl #1 @ toss sign bit + mov xh, r2, asr #3 @ stretch exponent + mov xh, xh, rrx @ retrieve sign bit + mov xl, r2, lsl #28 @ retrieve remaining bits + do_it ne, ttt + COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent + teqne r3, #0xff000000 @ if not 0, check if INF or NAN + eorne xh, xh, #0x38000000 @ fixup exponent otherwise. + RETc(ne) @ and return it. + + teq r2, #0 @ if actually 0 + do_it ne, e + teqne r3, #0xff000000 @ or INF or NAN + RETc(eq) @ we are done already. + + @ value was denormalized. We can normalize it now. + do_push {r4, r5, lr} + mov r4, #0x380 @ setup corresponding exponent + and r5, xh, #0x80000000 @ move sign bit in r5 + bic xh, xh, #0x80000000 + b LSYM(Lad_l) + + FUNC_END aeabi_f2d + FUNC_END extendsfdf2 + +ARM_FUNC_START floatundidf +ARM_FUNC_ALIAS aeabi_ul2d floatundidf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqd f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0/r1 for backwards + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. + do_push {r4, r5, ip, lr, pc} +#else + do_push {r4, r5, lr} +#endif + + mov r5, #0 + b 2f + +ARM_FUNC_START floatdidf +ARM_FUNC_ALIAS aeabi_l2d floatdidf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqd f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0/r1 for backwards + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. + do_push {r4, r5, ip, lr, pc} +#else + do_push {r4, r5, lr} +#endif + + ands r5, ah, #0x80000000 @ sign bit in r5 + bpl 2f +#if defined(__thumb2__) + negs al, al + sbc ah, ah, ah, lsl #1 +#else + rsbs al, al, #0 + rsc ah, ah, #0 +#endif +2: + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + + @ FPA little-endian: must swap the word order. + .ifnc xh, ah + mov ip, al + mov xh, ah + mov xl, ip + .endif + + movs ip, xh, lsr #22 + beq LSYM(Lad_p) + + @ The value is too big. Scale it down a bit... + mov r2, #3 + movs ip, ip, lsr #3 + do_it ne + addne r2, r2, #3 + movs ip, ip, lsr #3 + do_it ne + addne r2, r2, #3 + add r2, r2, ip, lsr #3 + + rsb r3, r2, #32 + shift1 lsl, ip, xl, r3 + shift1 lsr, xl, xl, r2 + shiftop orr xl xl xh lsl r3 lr + shift1 lsr, xh, xh, r2 + add r4, r4, r2 + b LSYM(Lad_p) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + + @ Legacy code expects the result to be returned in f0. Copy it + @ there as well. +LSYM(f0_ret): + do_push {r0, r1} + ldfd f0, [sp], #8 + RETLDM + +#endif + + FUNC_END floatdidf + FUNC_END aeabi_l2d + FUNC_END floatundidf + FUNC_END aeabi_ul2d + +#endif /* L_addsubdf3 */ + +#ifdef L_arm_muldivdf3 + +ARM_FUNC_START muldf3 +ARM_FUNC_ALIAS aeabi_dmul muldf3 + do_push {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + do_it ne, tte + COND(and,s,ne) r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Lml_s) + + @ Add exponents together + add r4, r4, r5 + + @ Determine final sign. + eor r6, xh, yh + + @ Convert mantissa to unsigned integer. + @ If power of two, branch to a separate path. + bic xh, xh, ip, lsl #21 + bic yh, yh, ip, lsl #21 + orrs r5, xl, xh, lsl #12 + do_it ne + COND(orr,s,ne) r5, yl, yh, lsl #12 + orr xh, xh, #0x00100000 + orr yh, yh, #0x00100000 + beq LSYM(Lml_1) + +#if __ARM_ARCH__ < 4 + + @ Put sign bit in r6, which will be restored in yl later. + and r6, r6, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. + stmfd sp!, {r6, r7, r8, r9, sl, fp} + mov r7, xl, lsr #16 + mov r8, yl, lsr #16 + mov r9, xh, lsr #16 + mov sl, yh, lsr #16 + bic xl, xl, r7, lsl #16 + bic yl, yl, r8, lsl #16 + bic xh, xh, r9, lsl #16 + bic yh, yh, sl, lsl #16 + mul ip, xl, yl + mul fp, xl, r8 + mov lr, #0 + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, r7, yl + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, xl, sl + mov r5, #0 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r7, yh + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, r8 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r9, yl + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, sl + mul r6, r9, sl + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, r9, yh + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, xl, yh + adds lr, lr, fp + mul fp, r7, sl + adcs r5, r5, fp + mul fp, xh, yl + adc r6, r6, #0 + adds lr, lr, fp + mul fp, r9, r8 + adcs r5, r5, fp + mul fp, r7, r8 + adc r6, r6, #0 + adds lr, lr, fp + mul fp, xh, yh + adcs r5, r5, fp + adc r6, r6, #0 + ldmfd sp!, {yl, r7, r8, r9, sl, fp} + +#else + + @ Here is the actual multiplication. + umull ip, lr, xl, yl + mov r5, #0 + umlal lr, r5, xh, yl + and yl, r6, #0x80000000 + umlal lr, r5, xl, yh + mov r6, #0 + umlal r5, r6, xh, yh + +#endif + + @ The LSBs in ip are only significant for the final rounding. + @ Fold them into lr. + teq ip, #0 + do_it ne + orrne lr, lr, #1 + + @ Adjust result upon the MSB position. + sub r4, r4, #0xff + cmp r6, #(1 << (20-11)) + sbc r4, r4, #0x300 + bcs 1f + movs lr, lr, lsl #1 + adcs r5, r5, r5 + adc r6, r6, r6 +1: + @ Shift to final position, add sign to result. + orr xh, yl, r6, lsl #11 + orr xh, xh, r5, lsr #21 + mov xl, r5, lsl #11 + orr xl, xl, lr, lsr #21 + mov lr, lr, lsl #11 + + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + do_it hi + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp lr, #0x80000000 + do_it eq + COND(mov,s,eq) lr, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" + + @ Multiplication by 0x1p*: let''s shortcut a lot of code. +LSYM(Lml_1): + and r6, r6, #0x80000000 + orr xh, r6, xh + orr xl, xl, yl + eor xh, xh, yh + subs r4, r4, ip, lsr #1 + do_it gt, tt + COND(rsb,s,gt) r5, r4, ip + orrgt xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" gt + + @ Under/overflow: fix things up for the code below. + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r4, #(53 + 1) + do_it le, tt + movle xl, #0 + bicle xh, xh, #0x7fffffff + RETLDM "r4, r5, r6" le + + @ Find out proper shift value. + rsb r4, r4, #0 + subs r4, r4, #32 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. + add r4, r4, #20 + rsb r5, r4, #32 + shift1 lsl, r3, xl, r5 + shift1 lsr, xl, xl, r4 + shiftop orr xl xl xh lsl r5 r2 + and r2, xh, #0x80000000 + bic xh, xh, #0x80000000 + adds xl, xl, r3, lsr #31 + shiftop adc xh r2 xh lsr r4 r6 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. Then round. +1: rsb r4, r4, #12 + rsb r5, r4, #32 + shift1 lsl, r3, xl, r4 + shift1 lsr, xl, xl, r5 + shiftop orr xl xl xh lsl r4 r2 + bic xh, xh, #0x7fffffff + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. +2: rsb r5, r4, #32 + shiftop orr lr lr xl lsl r5 r2 + shift1 lsr, r3, xl, r4 + shiftop orr r3 r3 xh lsl r5 r2 + shift1 lsr, xl, xh, r4 + bic xh, xh, #0x7fffffff + shiftop bic xl xl xh lsr r4 r2 + add xl, xl, r3, lsr #31 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r4, #0 + bne 2f + and r6, xh, #0x80000000 +1: movs xl, xl, lsl #1 + adc xh, xh, xh + tst xh, #0x00100000 + do_it eq + subeq r4, r4, #1 + beq 1b + orr xh, xh, r6 + teq r5, #0 + do_it ne + RETc(ne) +2: and r6, yh, #0x80000000 +3: movs yl, yl, lsl #1 + adc yh, yh, yh + tst yh, #0x00100000 + do_it eq + subeq r5, r5, #1 + beq 3b + orr yh, yh, r6 + RET + +LSYM(Lml_s): + @ Isolate the INF and NAN cases away + teq r4, ip + and r5, ip, yh, lsr #20 + do_it ne + teqne r5, ip + beq 1f + + @ Here, one or more arguments are either denormalized or zero. + orrs r6, xl, xh, lsl #1 + do_it ne + COND(orr,s,ne) r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor xh, xh, yh + and xh, xh, #0x80000000 + mov xl, #0 + RETLDM "r4, r5, r6" + +1: @ One or both args are INF or NAN. + orrs r6, xl, xh, lsl #1 + do_it eq, te + moveq xl, yl + moveq xh, yh + COND(orr,s,ne) r6, yl, yh, lsl #1 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r4, ip + bne 1f + orrs r6, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN * -> NAN +1: teq r5, ip + bne LSYM(Lml_i) + orrs r6, yl, yh, lsl #12 + do_it ne, t + movne xl, yl + movne xh, yh + bne LSYM(Lml_n) @ * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor xh, xh, yh + + @ Overflow: return INF (sign already in xh). +LSYM(Lml_o): + and xh, xh, #0x80000000 + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5, r6" + + @ Return a quiet NAN. +LSYM(Lml_n): + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f80000 + RETLDM "r4, r5, r6" + + FUNC_END aeabi_dmul + FUNC_END muldf3 + +ARM_FUNC_START divdf3 +ARM_FUNC_ALIAS aeabi_ddiv divdf3 + + do_push {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + do_it ne, tte + COND(and,s,ne) r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Ldv_s) + + @ Substract divisor exponent from dividend''s. + sub r4, r4, r5 + + @ Preserve final sign into lr. + eor lr, xh, yh + + @ Convert mantissa to unsigned integer. + @ Dividend -> r5-r6, divisor -> yh-yl. + orrs r5, yl, yh, lsl #12 + mov xh, xh, lsl #12 + beq LSYM(Ldv_1) + mov yh, yh, lsl #12 + mov r5, #0x10000000 + orr yh, r5, yh, lsr #4 + orr yh, yh, yl, lsr #24 + mov yl, yl, lsl #8 + orr r5, r5, xh, lsr #4 + orr r5, r5, xl, lsr #24 + mov r6, xl, lsl #8 + + @ Initialize xh with final sign bit. + and xh, lr, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r5, yh + do_it eq + cmpeq r6, yl + adc r4, r4, #(255 - 2) + add r4, r4, #0x300 + bcs 1f + movs yh, yh, lsr #1 + mov yl, yl, rrx +1: + @ Perform first substraction to align result to a nibble. + subs r6, r6, yl + sbc r5, r5, yh + movs yh, yh, lsr #1 + mov yl, yl, rrx + mov xl, #0x00100000 + mov ip, #0x00080000 + + @ The actual division loop. +1: subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #1 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #2 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #3 + + orrs lr, r5, r6 + beq 2f + mov r5, r5, lsl #4 + orr r5, r5, r6, lsr #28 + mov r6, r6, lsl #4 + mov yh, yh, lsl #3 + orr yh, yh, yl, lsr #29 + mov yl, yl, lsl #3 + movs ip, ip, lsr #4 + bne 1b + + @ We are done with a word of the result. + @ Loop again for the low word if this pass was for the high word. + tst xh, #0x00100000 + bne 3f + orr xh, xh, xl + mov xl, #0 + mov ip, #0x80000000 + b 1b +2: + @ Be sure result starts in the high word. + tst xh, #0x00100000 + do_it eq, t + orreq xh, xh, xl + moveq xl, #0 +3: + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + do_it hi + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + subs ip, r5, yh + do_it eq, t + COND(sub,s,eq) ip, r6, yl + COND(mov,s,eq) ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" + + @ Division by 0x1p*: shortcut a lot of code. +LSYM(Ldv_1): + and lr, lr, #0x80000000 + orr xh, lr, xh, lsr #12 + adds r4, r4, ip, lsr #1 + do_it gt, tt + COND(rsb,s,gt) r5, r4, ip + orrgt xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" gt + + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + b LSYM(Lml_u) + + @ Result mightt need to be denormalized: put remainder bits + @ in lr for rounding considerations. +LSYM(Ldv_u): + orr lr, r5, r6 + b LSYM(Lml_u) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + and r5, ip, yh, lsr #20 + teq r4, ip + do_it eq + teqeq r5, ip + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r4, ip + bne 1f + orrs r4, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN / -> NAN + teq r5, ip + bne LSYM(Lml_i) @ INF / -> INF + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r5, ip + bne 2f + orrs r5, yl, yh, lsl #12 + beq LSYM(Lml_z) @ / INF -> 0 + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ / NAN -> NAN +2: @ If both are nonzero, we need to normalize and resume above. + orrs r6, xl, xh, lsl #1 + do_it ne + COND(orr,s,ne) r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + @ One or both arguments are 0. + orrs r4, xl, xh, lsl #1 + bne LSYM(Lml_i) @ / 0 -> INF + orrs r5, yl, yh, lsl #1 + bne LSYM(Lml_z) @ 0 / -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END aeabi_ddiv + FUNC_END divdf3 + +#endif /* L_muldivdf3 */ + +#ifdef L_arm_cmpdf2 + +@ Note: only r0 (return value) and ip are clobbered here. + +ARM_FUNC_START gtdf2 +ARM_FUNC_ALIAS gedf2 gtdf2 + mov ip, #-1 + b 1f + +ARM_FUNC_START ltdf2 +ARM_FUNC_ALIAS ledf2 ltdf2 + mov ip, #1 + b 1f + +ARM_FUNC_START cmpdf2 +ARM_FUNC_ALIAS nedf2 cmpdf2 +ARM_FUNC_ALIAS eqdf2 cmpdf2 + mov ip, #1 @ how should we specify unordered here? + +1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + mov ip, yh, lsl #1 + do_it ne + COND(mvn,s,ne) ip, ip, asr #21 + beq 3f + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +2: add sp, sp, #4 + orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 + do_it eq, e + COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 + teqne xh, yh @ or xh == yh + do_it eq, tt + teqeq xl, yl @ and xl == yl + moveq r0, #0 @ then equal. + RETc(eq) + + @ Clear C flag + cmn r0, #0 + + @ Compare sign, + teq xh, yh + + @ Compare values if same sign + do_it pl + cmppl xh, yh + do_it eq + cmpeq xl, yl + + @ Result: + do_it cs, e + movcs r0, yh, asr #31 + mvncc r0, yh, asr #31 + orr r0, r0, #1 + RET + + @ Look for a NAN. +3: mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + bne 4f + orrs ip, xl, xh, lsl #12 + bne 5f @ x is NAN +4: mov ip, yh, lsl #1 + mvns ip, ip, asr #21 + bne 2b + orrs ip, yl, yh, lsl #12 + beq 2b @ y is not NAN +5: ldr r0, [sp], #4 @ unordered return code + RET + + FUNC_END gedf2 + FUNC_END gtdf2 + FUNC_END ledf2 + FUNC_END ltdf2 + FUNC_END nedf2 + FUNC_END eqdf2 + FUNC_END cmpdf2 + +ARM_FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +ARM_FUNC_START aeabi_cdcmpeq +ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: do_push {r0, lr} + ARM_CALL cmpdf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + do_it mi + cmnmi r0, #0 + RETLDM "r0" + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +ARM_FUNC_START aeabi_dcmpeq + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it eq, e + moveq r0, #1 @ Equal to. + movne r0, #0 @ Less than, greater than, or unordered. + RETLDM + + FUNC_END aeabi_dcmpeq + +ARM_FUNC_START aeabi_dcmplt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it cc, e + movcc r0, #1 @ Less than. + movcs r0, #0 @ Equal to, greater than, or unordered. + RETLDM + + FUNC_END aeabi_dcmplt + +ARM_FUNC_START aeabi_dcmple + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it ls, e + movls r0, #1 @ Less than or equal to. + movhi r0, #0 @ Greater than or unordered. + RETLDM + + FUNC_END aeabi_dcmple + +ARM_FUNC_START aeabi_dcmpge + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdrcmple + do_it ls, e + movls r0, #1 @ Operand 2 is less than or equal to operand 1. + movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. + RETLDM + + FUNC_END aeabi_dcmpge + +ARM_FUNC_START aeabi_dcmpgt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdrcmple + do_it cc, e + movcc r0, #1 @ Operand 2 is less than operand 1. + movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, + @ or they are unordered. + RETLDM + + FUNC_END aeabi_dcmpgt + +#endif /* L_cmpdf2 */ + +#ifdef L_arm_unorddf2 + +ARM_FUNC_START unorddf2 +ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 + + mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + bne 1f + orrs ip, xl, xh, lsl #12 + bne 3f @ x is NAN +1: mov ip, yh, lsl #1 + mvns ip, ip, asr #21 + bne 2f + orrs ip, yl, yh, lsl #12 + bne 3f @ y is NAN +2: mov r0, #0 @ arguments are ordered. + RET + +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END aeabi_dcmpun + FUNC_END unorddf2 + +#endif /* L_unorddf2 */ + +#ifdef L_arm_fixdfsi + +ARM_FUNC_START fixdfsi +ARM_FUNC_ALIAS aeabi_d2iz fixdfsi + + @ check exponent range. + mov r2, xh, lsl #1 + adds r2, r2, #(1 << 21) + bcs 2f @ value is INF or NAN + bpl 1f @ value is too small + mov r3, #(0xfffffc00 + 31) + subs r2, r3, r2, asr #21 + bls 3f @ value is too large + + @ scale value + mov r3, xh, lsl #11 + orr r3, r3, #0x80000000 + orr r3, r3, xl, lsr #21 + tst xh, #0x80000000 @ the sign bit + shift1 lsr, r0, r3, r2 + do_it ne + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ x is NAN. +3: ands r0, xh, #0x80000000 @ the sign bit + do_it eq + moveq r0, #0x7fffffff @ maximum signed positive si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END aeabi_d2iz + FUNC_END fixdfsi + +#endif /* L_fixdfsi */ + +#ifdef L_arm_fixunsdfsi + +ARM_FUNC_START fixunsdfsi +ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi + + @ check exponent range. + movs r2, xh, lsl #1 + bcs 1f @ value is negative + adds r2, r2, #(1 << 21) + bcs 2f @ value is INF or NAN + bpl 1f @ value is too small + mov r3, #(0xfffffc00 + 31) + subs r2, r3, r2, asr #21 + bmi 3f @ value is too large + + @ scale value + mov r3, xh, lsl #11 + orr r3, r3, #0x80000000 + orr r3, r3, xl, lsr #21 + shift1 lsr, r0, r3, r2 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ value is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END aeabi_d2uiz + FUNC_END fixunsdfsi + +#endif /* L_fixunsdfsi */ + +#ifdef L_arm_truncdfsf2 + +ARM_FUNC_START truncdfsf2 +ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 + + @ check exponent range. + mov r2, xh, lsl #1 + subs r3, r2, #((1023 - 127) << 21) + do_it cs, t + COND(sub,s,cs) ip, r3, #(1 << 21) + COND(rsb,s,cs) ip, ip, #(254 << 21) + bls 2f @ value is out of range + +1: @ shift and round mantissa + and ip, xh, #0x80000000 + mov r2, xl, lsl #3 + orr xl, ip, xl, lsr #29 + cmp r2, #0x80000000 + adc r0, xl, r3, lsl #2 + do_it eq + biceq r0, r0, #1 + RET + +2: @ either overflow or underflow + tst xh, #0x40000000 + bne 3f @ overflow + + @ check if denormalized value is possible + adds r2, r3, #(23 << 21) + do_it lt, t + andlt r0, xh, #0x80000000 @ too small, return signed 0. + RETc(lt) + + @ denormalize value so we can resume with the code above afterwards. + orr xh, xh, #0x00100000 + mov r2, r2, lsr #21 + rsb r2, r2, #24 + rsb ip, r2, #32 +#if defined(__thumb2__) + lsls r3, xl, ip +#else + movs r3, xl, lsl ip +#endif + shift1 lsr, xl, xl, r2 + do_it ne + orrne xl, xl, #1 @ fold r3 for rounding considerations. + mov r3, xh, lsl #11 + mov r3, r3, lsr #11 + shiftop orr xl xl r3 lsl ip ip + shift1 lsr, r3, r3, r2 + mov r3, r3, lsl #1 + b 1b + +3: @ chech for NAN + mvns r3, r2, asr #21 + bne 5f @ simple overflow + orrs r3, xl, xh, lsl #12 + do_it ne, tt + movne r0, #0x7f000000 + orrne r0, r0, #0x00c00000 + RETc(ne) @ return NAN + +5: @ return INF with sign + and r0, xh, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + FUNC_END aeabi_d2f + FUNC_END truncdfsf2 + +#endif /* L_truncdfsf2 */ diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S new file mode 100644 index 000000000..c93f66d8f --- /dev/null +++ b/gcc/config/arm/ieee754-sf.S @@ -0,0 +1,1060 @@ +/* ieee754-sf.S single-precision floating point support for ARM + + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + +#ifdef L_arm_negsf2 + +ARM_FUNC_START negsf2 +ARM_FUNC_ALIAS aeabi_fneg negsf2 + + eor r0, r0, #0x80000000 @ flip sign bit + RET + + FUNC_END aeabi_fneg + FUNC_END negsf2 + +#endif + +#ifdef L_arm_addsubsf3 + +ARM_FUNC_START aeabi_frsub + + eor r0, r0, #0x80000000 @ flip sign bit of first arg + b 1f + +ARM_FUNC_START subsf3 +ARM_FUNC_ALIAS aeabi_fsub subsf3 + + eor r1, r1, #0x80000000 @ flip sign bit of second arg +#if defined(__INTERWORKING_STUBS__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START addsf3 +ARM_FUNC_ALIAS aeabi_fadd addsf3 + +1: @ Look for zeroes, equal values, INF, or NAN. + movs r2, r0, lsl #1 + do_it ne, ttt + COND(mov,s,ne) r3, r1, lsl #1 + teqne r2, r3 + COND(mvn,s,ne) ip, r2, asr #24 + COND(mvn,s,ne) ip, r3, asr #24 + beq LSYM(Lad_s) + + @ Compute exponent difference. Make largest exponent in r2, + @ corresponding arg in r0, and positive exponent difference in r3. + mov r2, r2, lsr #24 + rsbs r3, r2, r3, lsr #24 + do_it gt, ttt + addgt r2, r2, r3 + eorgt r1, r0, r1 + eorgt r0, r1, r0 + eorgt r1, r0, r1 + do_it lt + rsblt r3, r3, #0 + + @ If exponent difference is too large, return largest argument + @ already in r0. We need up to 25 bit to handle proper rounding + @ of 0x1p25 - 1.1. + cmp r3, #25 + do_it hi + RETc(hi) + + @ Convert mantissa to signed integer. + tst r0, #0x80000000 + orr r0, r0, #0x00800000 + bic r0, r0, #0xff000000 + do_it ne + rsbne r0, r0, #0 + tst r1, #0x80000000 + orr r1, r1, #0x00800000 + bic r1, r1, #0xff000000 + do_it ne + rsbne r1, r1, #0 + + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r2, r3 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Compensate for the exponent overlapping the mantissa MSB added later + sub r2, r2, #1 + + @ Shift and add second arg to first arg in r0. + @ Keep leftover bits into r1. + shiftop adds r0 r0 r1 asr r3 ip + rsb r3, r3, #32 + shift1 lsl, r1, r1, r3 + + @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) + and r3, r0, #0x80000000 + bpl LSYM(Lad_p) +#if defined(__thumb2__) + negs r1, r1 + sbc r0, r0, r0, lsl #1 +#else + rsbs r1, r1, #0 + rsc r0, r0, #0 +#endif + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp r0, #0x00800000 + bcc LSYM(Lad_a) + cmp r0, #0x01000000 + bcc LSYM(Lad_e) + + @ Result needs to be shifted right. + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #1 + + @ Make sure we did not bust our exponent. + cmp r2, #254 + bhs LSYM(Lad_o) + + @ Our result is now properly aligned into r0, remaining bits in r1. + @ Pack final result together. + @ Round with MSB of r1. If halfway between two numbers, round towards + @ LSB of r0 = 0. +LSYM(Lad_e): + cmp r1, #0x80000000 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + orr r0, r0, r3 + RET + + @ Result must be shifted left and exponent adjusted. +LSYM(Lad_a): + movs r1, r1, lsl #1 + adc r0, r0, r0 + tst r0, #0x00800000 + sub r2, r2, #1 + bne LSYM(Lad_e) + + @ No rounding necessary since r1 will always be 0 at this point. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + movs ip, r0, lsr #12 + moveq r0, r0, lsl #12 + subeq r2, r2, #12 + tst r0, #0x00ff0000 + moveq r0, r0, lsl #8 + subeq r2, r2, #8 + tst r0, #0x00f00000 + moveq r0, r0, lsl #4 + subeq r2, r2, #4 + tst r0, #0x00c00000 + moveq r0, r0, lsl #2 + subeq r2, r2, #2 + cmp r0, #0x00800000 + movcc r0, r0, lsl #1 + sbcs r2, r2, #0 + +#else + + clz ip, r0 + sub ip, ip, #8 + subs r2, r2, ip + shift1 lsl, r0, r0, ip + +#endif + + @ Final result with sign + @ If exponent negative, denormalize result. + do_it ge, et + addge r0, r0, r2, lsl #23 + rsblt r2, r2, #0 + orrge r0, r0, r3 +#if defined(__thumb2__) + do_it lt, t + lsrlt r0, r0, r2 + orrlt r0, r3, r0 +#else + orrlt r0, r3, r0, lsr r2 +#endif + RET + + @ Fixup and adjust bit position for denormalized arguments. + @ Note that r2 must not remain equal to 0. +LSYM(Lad_d): + teq r2, #0 + eor r1, r1, #0x00800000 + do_it eq, te + eoreq r0, r0, #0x00800000 + addeq r2, r2, #1 + subne r3, r3, #1 + b LSYM(Lad_x) + +LSYM(Lad_s): + mov r3, r1, lsl #1 + + mvns ip, r2, asr #24 + do_it ne + COND(mvn,s,ne) ip, r3, asr #24 + beq LSYM(Lad_i) + + teq r2, r3 + beq 1f + + @ Result is x + 0.0 = x or 0.0 + y = y. + teq r2, #0 + do_it eq + moveq r0, r1 + RET + +1: teq r0, r1 + + @ Result is x - x = 0. + do_it ne, t + movne r0, #0 + RETc(ne) + + @ Result is x + x = 2x. + tst r2, #0xff000000 + bne 2f + movs r0, r0, lsl #1 + do_it cs + orrcs r0, r0, #0x80000000 + RET +2: adds r2, r2, #(2 << 24) + do_it cc, t + addcc r0, r0, #(1 << 23) + RETc(cc) + and r3, r0, #0x80000000 + + @ Overflow: return INF. +LSYM(Lad_o): + orr r0, r3, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ At least one of r0/r1 is INF/NAN. + @ if r0 != INF/NAN: return r1 (which is INF/NAN) + @ if r1 != INF/NAN: return r0 (which is INF/NAN) + @ if r0 or r1 is NAN: return NAN + @ if opposite sign: return NAN + @ otherwise return r0 (which is INF or -INF) +LSYM(Lad_i): + mvns r2, r2, asr #24 + do_it ne, et + movne r0, r1 + COND(mvn,s,eq) r3, r3, asr #24 + movne r1, r0 + movs r2, r0, lsl #9 + do_it eq, te + COND(mov,s,eq) r3, r1, lsl #9 + teqeq r0, r1 + orrne r0, r0, #0x00400000 @ quiet NAN + RET + + FUNC_END aeabi_frsub + FUNC_END aeabi_fadd + FUNC_END addsf3 + FUNC_END aeabi_fsub + FUNC_END subsf3 + +ARM_FUNC_START floatunsisf +ARM_FUNC_ALIAS aeabi_ui2f floatunsisf + + mov r3, #0 + b 1f + +ARM_FUNC_START floatsisf +ARM_FUNC_ALIAS aeabi_i2f floatsisf + + ands r3, r0, #0x80000000 + do_it mi + rsbmi r0, r0, #0 + +1: movs ip, r0 + do_it eq + RETc(eq) + + @ Add initial exponent to sign + orr r3, r3, #((127 + 23) << 23) + + .ifnc ah, r0 + mov ah, r0 + .endif + mov al, #0 + b 2f + + FUNC_END aeabi_i2f + FUNC_END floatsisf + FUNC_END aeabi_ui2f + FUNC_END floatunsisf + +ARM_FUNC_START floatundisf +ARM_FUNC_ALIAS aeabi_ul2f floatundisf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqs f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + + mov r3, #0 + b 1f + +ARM_FUNC_START floatdisf +ARM_FUNC_ALIAS aeabi_l2f floatdisf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqs f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + + ands r3, ah, #0x80000000 @ sign bit in r3 + bpl 1f +#if defined(__thumb2__) + negs al, al + sbc ah, ah, ah, lsl #1 +#else + rsbs al, al, #0 + rsc ah, ah, #0 +#endif +1: +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0 for backwards + @ compatibility. + str lr, [sp, #-8]! + adr lr, LSYM(f0_ret) +#endif + + movs ip, ah + do_it eq, tt + moveq ip, al + moveq ah, al + moveq al, #0 + + @ Add initial exponent to sign + orr r3, r3, #((127 + 23 + 32) << 23) + do_it eq + subeq r3, r3, #(32 << 23) +2: sub r3, r3, #(1 << 23) + +#if __ARM_ARCH__ < 5 + + mov r2, #23 + cmp ip, #(1 << 16) + do_it hs, t + movhs ip, ip, lsr #16 + subhs r2, r2, #16 + cmp ip, #(1 << 8) + do_it hs, t + movhs ip, ip, lsr #8 + subhs r2, r2, #8 + cmp ip, #(1 << 4) + do_it hs, t + movhs ip, ip, lsr #4 + subhs r2, r2, #4 + cmp ip, #(1 << 2) + do_it hs, e + subhs r2, r2, #2 + sublo r2, r2, ip, lsr #1 + subs r2, r2, ip, lsr #3 + +#else + + clz r2, ip + subs r2, r2, #8 + +#endif + + sub r3, r3, r2, lsl #23 + blt 3f + + shiftop add r3 r3 ah lsl r2 ip + shift1 lsl, ip, al, r2 + rsb r2, r2, #32 + cmp ip, #0x80000000 + shiftop adc r0 r3 al lsr r2 r2 + do_it eq + biceq r0, r0, #1 + RET + +3: add r2, r2, #32 + shift1 lsl, ip, ah, r2 + rsb r2, r2, #32 + orrs al, al, ip, lsl #1 + shiftop adc r0 r3 ah lsr r2 r2 + do_it eq + biceq r0, r0, ip, lsr #31 + RET + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + +LSYM(f0_ret): + str r0, [sp, #-4]! + ldfs f0, [sp], #4 + RETLDM + +#endif + + FUNC_END floatdisf + FUNC_END aeabi_l2f + FUNC_END floatundisf + FUNC_END aeabi_ul2f + +#endif /* L_addsubsf3 */ + +#ifdef L_arm_muldivsf3 + +ARM_FUNC_START mulsf3 +ARM_FUNC_ALIAS aeabi_fmul mulsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + do_it ne, tt + COND(and,s,ne) r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Lml_s) +LSYM(Lml_x): + + @ Add exponents together + add r2, r2, r3 + + @ Determine final sign. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ If power of two, branch to a separate path. + @ Make up for final alignment. + movs r0, r0, lsl #9 + do_it ne + COND(mov,s,ne) r1, r1, lsl #9 + beq LSYM(Lml_1) + mov r3, #0x08000000 + orr r0, r3, r0, lsr #5 + orr r1, r3, r1, lsr #5 + +#if __ARM_ARCH__ < 4 + + @ Put sign bit in r3, which will be restored into r0 later. + and r3, ip, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. + do_push {r3, r4, r5} + mov r4, r0, lsr #16 + mov r5, r1, lsr #16 + bic r0, r0, r4, lsl #16 + bic r1, r1, r5, lsl #16 + mul ip, r4, r5 + mul r3, r0, r1 + mul r0, r5, r0 + mla r0, r4, r1, r0 + adds r3, r3, r0, lsl #16 + adc r1, ip, r0, lsr #16 + do_pop {r0, r4, r5} + +#else + + @ The actual multiplication. + umull r3, r1, r0, r1 + + @ Put final sign in r0. + and r0, ip, #0x80000000 + +#endif + + @ Adjust result upon the MSB position. + cmp r1, #(1 << 23) + do_it cc, tt + movcc r1, r1, lsl #1 + orrcc r1, r1, r3, lsr #31 + movcc r3, r3, lsl #1 + + @ Add sign to result. + orr r0, r0, r1 + + @ Apply exponent bias, check for under/overflow. + sbc r2, r2, #127 + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, #0x80000000 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + RET + + @ Multiplication by 0x1p*: let''s shortcut a lot of code. +LSYM(Lml_1): + teq r0, #0 + and ip, ip, #0x80000000 + do_it eq + moveq r1, r1, lsl #9 + orr r0, ip, r0, lsr #9 + orr r0, r0, r1, lsr #9 + subs r2, r2, #127 + do_it gt, tt + COND(rsb,s,gt) r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + RETc(gt) + + @ Under/overflow: fix things up for the code below. + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r2, #(24 + 1) + do_it le, t + bicle r0, r0, #0x7fffffff + RETc(le) + + @ Shift value right, round, etc. + rsb r2, r2, #0 + movs r1, r0, lsl #1 + shift1 lsr, r1, r1, r2 + rsb r2, r2, #32 + shift1 lsl, ip, r0, r2 + movs r0, r1, rrx + adc r0, r0, #0 + orrs r3, r3, ip, lsl #1 + do_it eq + biceq r0, r0, ip, lsr #31 + RET + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: do_it eq, tt + moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: do_it eq, tt + moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Lml_x) + +LSYM(Lml_s): + @ Isolate the INF and NAN cases away + and r3, ip, r1, lsr #23 + teq r2, ip + do_it ne + teqne r3, ip + beq 1f + + @ Here, one or more arguments are either denormalized or zero. + bics ip, r0, #0x80000000 + do_it ne + COND(bic,s,ne) ip, r1, #0x80000000 + bne LSYM(Lml_d) + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor r0, r0, r1 + bic r0, r0, #0x7fffffff + RET + +1: @ One or both args are INF or NAN. + teq r0, #0x0 + do_it ne, ett + teqne r0, #0x80000000 + moveq r0, r1 + teqne r1, #0x0 + teqne r1, #0x80000000 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN * -> NAN +1: teq r3, ip + bne LSYM(Lml_i) + movs r3, r1, lsl #9 + do_it ne + movne r0, r1 + bne LSYM(Lml_n) @ * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor r0, r0, r1 + + @ Overflow: return INF (sign already in r0). +LSYM(Lml_o): + and r0, r0, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ Return a quiet NAN. +LSYM(Lml_n): + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00c00000 + RET + + FUNC_END aeabi_fmul + FUNC_END mulsf3 + +ARM_FUNC_START divsf3 +ARM_FUNC_ALIAS aeabi_fdiv divsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + do_it ne, tt + COND(and,s,ne) r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Ldv_s) +LSYM(Ldv_x): + + @ Substract divisor exponent from dividend''s + sub r2, r2, r3 + + @ Preserve final sign into ip. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ Dividend -> r3, divisor -> r1. + movs r1, r1, lsl #9 + mov r0, r0, lsl #9 + beq LSYM(Ldv_1) + mov r3, #0x10000000 + orr r1, r3, r1, lsr #4 + orr r3, r3, r0, lsr #4 + + @ Initialize r0 (result) with final sign bit. + and r0, ip, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r3, r1 + do_it cc + movcc r3, r3, lsl #1 + adc r2, r2, #(127 - 2) + + @ The actual division loop. + mov ip, #0x00800000 +1: cmp r3, r1 + do_it cs, t + subcs r3, r3, r1 + orrcs r0, r0, ip + cmp r3, r1, lsr #1 + do_it cs, t + subcs r3, r3, r1, lsr #1 + orrcs r0, r0, ip, lsr #1 + cmp r3, r1, lsr #2 + do_it cs, t + subcs r3, r3, r1, lsr #2 + orrcs r0, r0, ip, lsr #2 + cmp r3, r1, lsr #3 + do_it cs, t + subcs r3, r3, r1, lsr #3 + orrcs r0, r0, ip, lsr #3 + movs r3, r3, lsl #4 + do_it ne + COND(mov,s,ne) ip, ip, lsr #4 + bne 1b + + @ Check exponent for under/overflow. + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, r1 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + RET + + @ Division by 0x1p*: let''s shortcut a lot of code. +LSYM(Ldv_1): + and ip, ip, #0x80000000 + orr r0, ip, r0, lsr #9 + adds r2, r2, #127 + do_it gt, tt + COND(rsb,s,gt) r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + RETc(gt) + + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + b LSYM(Lml_u) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: do_it eq, tt + moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: do_it eq, tt + moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Ldv_x) + + @ One or both arguments are either INF, NAN, zero or denormalized. +LSYM(Ldv_s): + and r3, ip, r1, lsr #23 + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN / -> NAN + teq r3, ip + bne LSYM(Lml_i) @ INF / -> INF + mov r0, r1 + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r3, ip + bne 2f + movs r3, r1, lsl #9 + beq LSYM(Lml_z) @ / INF -> 0 + mov r0, r1 + b LSYM(Lml_n) @ / NAN -> NAN +2: @ If both are nonzero, we need to normalize and resume above. + bics ip, r0, #0x80000000 + do_it ne + COND(bic,s,ne) ip, r1, #0x80000000 + bne LSYM(Ldv_d) + @ One or both arguments are zero. + bics r2, r0, #0x80000000 + bne LSYM(Lml_i) @ / 0 -> INF + bics r3, r1, #0x80000000 + bne LSYM(Lml_z) @ 0 / -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END aeabi_fdiv + FUNC_END divsf3 + +#endif /* L_muldivsf3 */ + +#ifdef L_arm_cmpsf2 + + @ The return value in r0 is + @ + @ 0 if the operands are equal + @ 1 if the first operand is greater than the second, or + @ the operands are unordered and the operation is + @ CMP, LT, LE, NE, or EQ. + @ -1 if the first operand is less than the second, or + @ the operands are unordered and the operation is GT + @ or GE. + @ + @ The Z flag will be set iff the operands are equal. + @ + @ The following registers are clobbered by this function: + @ ip, r0, r1, r2, r3 + +ARM_FUNC_START gtsf2 +ARM_FUNC_ALIAS gesf2 gtsf2 + mov ip, #-1 + b 1f + +ARM_FUNC_START ltsf2 +ARM_FUNC_ALIAS lesf2 ltsf2 + mov ip, #1 + b 1f + +ARM_FUNC_START cmpsf2 +ARM_FUNC_ALIAS nesf2 cmpsf2 +ARM_FUNC_ALIAS eqsf2 cmpsf2 + mov ip, #1 @ how should we specify unordered here? + +1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mvns ip, r2, asr #24 + do_it ne + COND(mvn,s,ne) ip, r3, asr #24 + beq 3f + + @ Compare values. + @ Note that 0.0 is equal to -0.0. +2: add sp, sp, #4 + orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag + do_it ne + teqne r0, r1 @ if not 0 compare sign + do_it pl + COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0 + + @ Result: + do_it hi + movhi r0, r1, asr #31 + do_it lo + mvnlo r0, r1, asr #31 + do_it ne + orrne r0, r0, #1 + RET + + @ Look for a NAN. +3: mvns ip, r2, asr #24 + bne 4f + movs ip, r0, lsl #9 + bne 5f @ r0 is NAN +4: mvns ip, r3, asr #24 + bne 2b + movs ip, r1, lsl #9 + beq 2b @ r1 is not NAN +5: ldr r0, [sp], #4 @ return unordered code. + RET + + FUNC_END gesf2 + FUNC_END gtsf2 + FUNC_END lesf2 + FUNC_END ltsf2 + FUNC_END nesf2 + FUNC_END eqsf2 + FUNC_END cmpsf2 + +ARM_FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +ARM_FUNC_START aeabi_cfcmpeq +ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: do_push {r0, r1, r2, r3, lr} + ARM_CALL cmpsf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + do_it mi + cmnmi r0, #0 + RETLDM "r0, r1, r2, r3" + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +ARM_FUNC_START aeabi_fcmpeq + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it eq, e + moveq r0, #1 @ Equal to. + movne r0, #0 @ Less than, greater than, or unordered. + RETLDM + + FUNC_END aeabi_fcmpeq + +ARM_FUNC_START aeabi_fcmplt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it cc, e + movcc r0, #1 @ Less than. + movcs r0, #0 @ Equal to, greater than, or unordered. + RETLDM + + FUNC_END aeabi_fcmplt + +ARM_FUNC_START aeabi_fcmple + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it ls, e + movls r0, #1 @ Less than or equal to. + movhi r0, #0 @ Greater than or unordered. + RETLDM + + FUNC_END aeabi_fcmple + +ARM_FUNC_START aeabi_fcmpge + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfrcmple + do_it ls, e + movls r0, #1 @ Operand 2 is less than or equal to operand 1. + movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. + RETLDM + + FUNC_END aeabi_fcmpge + +ARM_FUNC_START aeabi_fcmpgt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfrcmple + do_it cc, e + movcc r0, #1 @ Operand 2 is less than operand 1. + movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, + @ or they are unordered. + RETLDM + + FUNC_END aeabi_fcmpgt + +#endif /* L_cmpsf2 */ + +#ifdef L_arm_unordsf2 + +ARM_FUNC_START unordsf2 +ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 + + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mvns ip, r2, asr #24 + bne 1f + movs ip, r0, lsl #9 + bne 3f @ r0 is NAN +1: mvns ip, r3, asr #24 + bne 2f + movs ip, r1, lsl #9 + bne 3f @ r1 is NAN +2: mov r0, #0 @ arguments are ordered. + RET +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END aeabi_fcmpun + FUNC_END unordsf2 + +#endif /* L_unordsf2 */ + +#ifdef L_arm_fixsfsi + +ARM_FUNC_START fixsfsi +ARM_FUNC_ALIAS aeabi_f2iz fixsfsi + + @ check exponent range. + mov r2, r0, lsl #1 + cmp r2, #(127 << 24) + bcc 1f @ value is too small + mov r3, #(127 + 31) + subs r2, r3, r2, lsr #24 + bls 2f @ value is too large + + @ scale value + mov r3, r0, lsl #8 + orr r3, r3, #0x80000000 + tst r0, #0x80000000 @ the sign bit + shift1 lsr, r0, r3, r2 + do_it ne + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: cmp r2, #(127 + 31 - 0xff) + bne 3f + movs r2, r0, lsl #9 + bne 4f @ r0 is NAN. +3: ands r0, r0, #0x80000000 @ the sign bit + do_it eq + moveq r0, #0x7fffffff @ the maximum signed positive si + RET + +4: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END aeabi_f2iz + FUNC_END fixsfsi + +#endif /* L_fixsfsi */ + +#ifdef L_arm_fixunssfsi + +ARM_FUNC_START fixunssfsi +ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi + + @ check exponent range. + movs r2, r0, lsl #1 + bcs 1f @ value is negative + cmp r2, #(127 << 24) + bcc 1f @ value is too small + mov r3, #(127 + 31) + subs r2, r3, r2, lsr #24 + bmi 2f @ value is too large + + @ scale the value + mov r3, r0, lsl #8 + orr r3, r3, #0x80000000 + shift1 lsr, r0, r3, r2 + RET + +1: mov r0, #0 + RET + +2: cmp r2, #(127 + 31 - 0xff) + bne 3f + movs r2, r0, lsl #9 + bne 4f @ r0 is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END aeabi_f2uiz + FUNC_END fixunssfsi + +#endif /* L_fixunssfsi */ diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md new file mode 100644 index 000000000..887c962ba --- /dev/null +++ b/gcc/config/arm/iterators.md @@ -0,0 +1,405 @@ +;; Code and mode itertator and attribute definitions for the ARM backend +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;;---------------------------------------------------------------------------- +;; Mode iterators +;;---------------------------------------------------------------------------- + +;; A list of modes that are exactly 64 bits in size. This is used to expand +;; some splits that are the same for all modes when operating on ARM +;; registers. +(define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF]) + +(define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF]) + +;; A list of integer modes that are up to one word long +(define_mode_iterator QHSI [QI HI SI]) + +;; Integer element sizes implemented by IWMMXT. +(define_mode_iterator VMMX [V2SI V4HI V8QI]) + +;; Integer element sizes for shifts. +(define_mode_iterator VSHFT [V4HI V2SI DI]) + +;; Integer and float modes supported by Neon and IWMMXT. +(define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) + +;; Integer and float modes supported by Neon and IWMMXT, except V2DI. +(define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) + +;; Integer modes supported by Neon and IWMMXT +(define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI]) + +;; Integer modes supported by Neon and IWMMXT, except V2DI +(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI]) + +;; Double-width vector modes. +(define_mode_iterator VD [V8QI V4HI V2SI V2SF]) + +;; Double-width vector modes plus 64-bit elements. +(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI]) + +;; Double-width vector modes without floating-point elements. +(define_mode_iterator VDI [V8QI V4HI V2SI]) + +;; Quad-width vector modes. +(define_mode_iterator VQ [V16QI V8HI V4SI V4SF]) + +;; Quad-width vector modes plus 64-bit elements. +(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI]) + +;; Quad-width vector modes without floating-point elements. +(define_mode_iterator VQI [V16QI V8HI V4SI]) + +;; Quad-width vector modes, with TImode added, for moves. +(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI]) + +;; Opaque structure types wider than TImode. +(define_mode_iterator VSTRUCT [EI OI CI XI]) + +;; Opaque structure types used in table lookups (except vtbl1/vtbx1). +(define_mode_iterator VTAB [TI EI OI]) + +;; Widenable modes. +(define_mode_iterator VW [V8QI V4HI V2SI]) + +;; Narrowable modes. +(define_mode_iterator VN [V8HI V4SI V2DI]) + +;; All supported vector modes (except singleton DImode). +(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI]) + +;; All supported vector modes (except those with 64-bit integer elements). +(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) + +;; Supported integer vector modes (not 64 bit elements). +(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI]) + +;; Supported integer vector modes (not singleton DI) +(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) + +;; Vector modes, including 64-bit integer elements. +(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI]) + +;; Vector modes including 64-bit integer elements, but no floats. +(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) + +;; Vector modes for float->int conversions. +(define_mode_iterator VCVTF [V2SF V4SF]) + +;; Vector modes form int->float conversions. +(define_mode_iterator VCVTI [V2SI V4SI]) + +;; Vector modes for doubleword multiply-accumulate, etc. insns. +(define_mode_iterator VMD [V4HI V2SI V2SF]) + +;; Vector modes for quadword multiply-accumulate, etc. insns. +(define_mode_iterator VMQ [V8HI V4SI V4SF]) + +;; Above modes combined. +(define_mode_iterator VMDQ [V4HI V2SI V2SF V8HI V4SI V4SF]) + +;; As VMD, but integer modes only. +(define_mode_iterator VMDI [V4HI V2SI]) + +;; As VMQ, but integer modes only. +(define_mode_iterator VMQI [V8HI V4SI]) + +;; Above modes combined. +(define_mode_iterator VMDQI [V4HI V2SI V8HI V4SI]) + +;; Modes with 8-bit and 16-bit elements. +(define_mode_iterator VX [V8QI V4HI V16QI V8HI]) + +;; Modes with 8-bit elements. +(define_mode_iterator VE [V8QI V16QI]) + +;; Modes with 64-bit elements only. +(define_mode_iterator V64 [DI V2DI]) + +;; Modes with 32-bit elements only. +(define_mode_iterator V32 [V2SI V2SF V4SI V4SF]) + +;; Modes with 8-bit, 16-bit and 32-bit elements. +(define_mode_iterator VU [V16QI V8HI V4SI]) + +;;---------------------------------------------------------------------------- +;; Code iterators +;;---------------------------------------------------------------------------- + +;; A list of condition codes used in compare instructions where +;; the carry flag from the addition is used instead of doing the +;; compare a second time. +(define_code_iterator LTUGEU [ltu geu]) + +;; A list of ... +(define_code_iterator ior_xor [ior xor]) + +;; Operations on two halves of a quadword vector. +(define_code_iterator vqh_ops [plus smin smax umin umax]) + +;; Operations on two halves of a quadword vector, +;; without unsigned variants (for use with *SFmode pattern). +(define_code_iterator vqhs_ops [plus smin smax]) + +;; A list of widening operators +(define_code_iterator SE [sign_extend zero_extend]) + +;;---------------------------------------------------------------------------- +;; Mode attributes +;;---------------------------------------------------------------------------- + +;; Determine element size suffix from vector mode. +(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")]) + +;; vtbl suffix for NEON vector modes. +(define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")]) + +;; (Opposite) mode to convert to/from for NEON mode conversions. +(define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI") + (V4SI "V4SF") (V4SF "V4SI")]) + +;; Define element mode for each vector mode. +(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") + (V4HI "HI") (V8HI "HI") + (V2SI "SI") (V4SI "SI") + (V2SF "SF") (V4SF "SF") + (DI "DI") (V2DI "DI")]) + +;; Element modes for vector extraction, padded up to register size. + +(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI") + (V4HI "SI") (V8HI "SI") + (V2SI "SI") (V4SI "SI") + (V2SF "SF") (V4SF "SF") + (DI "DI") (V2DI "DI")]) + +;; Mode of pair of elements for each vector mode, to define transfer +;; size for structure lane/dup loads and stores. +(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") + (V4HI "SI") (V8HI "SI") + (V2SI "V2SI") (V4SI "V2SI") + (V2SF "V2SF") (V4SF "V2SF") + (DI "V2DI") (V2DI "V2DI")]) + +;; Similar, for three elements. +;; ??? Should we define extra modes so that sizes of all three-element +;; accesses can be accurately represented? +(define_mode_attr V_three_elem [(V8QI "SI") (V16QI "SI") + (V4HI "V4HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V4SI") + (V2SF "V4SF") (V4SF "V4SF") + (DI "EI") (V2DI "EI")]) + +;; Similar, for four elements. +(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") + (V4HI "V4HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V4SI") + (V2SF "V4SF") (V4SF "V4SF") + (DI "OI") (V2DI "OI")]) + +;; Register width from element mode +(define_mode_attr V_reg [(V8QI "P") (V16QI "q") + (V4HI "P") (V8HI "q") + (V2SI "P") (V4SI "q") + (V2SF "P") (V4SF "q") + (DI "P") (V2DI "q")]) + +;; Wider modes with the same number of elements. +(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")]) + +;; Narrower modes with the same number of elements. +(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")]) + +;; Narrower modes with double the number of elements. +(define_mode_attr V_narrow_pack [(V4SI "V8HI") (V8HI "V16QI") (V2DI "V4SI") + (V4HI "V8QI") (V2SI "V4HI") (DI "V2SI")]) + +;; Modes with half the number of equal-sized elements. +(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") + (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF") + (V2DI "DI")]) + +;; Same, but lower-case. +(define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi") + (V4SI "v2si") (V4SF "v2sf") + (V2DI "di")]) + +;; Modes with twice the number of equal-sized elements. +(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI") + (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF") + (DI "V2DI")]) + +;; Same, but lower-case. +(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi") + (V2SI "v4si") (V2SF "v4sf") + (DI "v2di")]) + +;; Modes with double-width elements. +(define_mode_attr V_double_width [(V8QI "V4HI") (V16QI "V8HI") + (V4HI "V2SI") (V8HI "V4SI") + (V2SI "DI") (V4SI "V2DI")]) + +;; Double-sized modes with the same element size. +;; Used for neon_vdup_lane, where the second operand is double-sized +;; even when the first one is quad. +(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI") + (V4SI "V2SI") (V4SF "V2SF") + (V8QI "V8QI") (V4HI "V4HI") + (V2SI "V2SI") (V2SF "V2SF")]) + +;; Mode of result of comparison operations (and bit-select operand 1). +(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") + (V4HI "V4HI") (V8HI "V8HI") + (V2SI "V2SI") (V4SI "V4SI") + (V2SF "V2SI") (V4SF "V4SI") + (DI "DI") (V2DI "V2DI")]) + +;; Get element type from double-width mode, for operations where we +;; don't care about signedness. +(define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8") + (V4HI "i16") (V8HI "i16") + (V2SI "i32") (V4SI "i32") + (DI "i64") (V2DI "i64") + (V2SF "f32") (V4SF "f32")]) + +;; Same, but for operations which work on signed values. +(define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8") + (V4HI "s16") (V8HI "s16") + (V2SI "s32") (V4SI "s32") + (DI "s64") (V2DI "s64") + (V2SF "f32") (V4SF "f32")]) + +;; Same, but for operations which work on unsigned values. +(define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8") + (V4HI "u16") (V8HI "u16") + (V2SI "u32") (V4SI "u32") + (DI "u64") (V2DI "u64") + (V2SF "f32") (V4SF "f32")]) + +;; Element types for extraction of unsigned scalars. +(define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8") + (V4HI "u16") (V8HI "u16") + (V2SI "32") (V4SI "32") + (V2SF "32") (V4SF "32")]) + +(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") + (V4HI "16") (V8HI "16") + (V2SI "32") (V4SI "32") + (DI "64") (V2DI "64") + (V2SF "32") (V4SF "32")]) + +;; Element sizes for duplicating ARM registers to all elements of a vector. +(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")]) + +;; Opaque integer types for results of pair-forming intrinsics (vtrn, etc.) +(define_mode_attr V_PAIR [(V8QI "TI") (V16QI "OI") + (V4HI "TI") (V8HI "OI") + (V2SI "TI") (V4SI "OI") + (V2SF "TI") (V4SF "OI") + (DI "TI") (V2DI "OI")]) + +;; Same, but lower-case. +(define_mode_attr V_pair [(V8QI "ti") (V16QI "oi") + (V4HI "ti") (V8HI "oi") + (V2SI "ti") (V4SI "oi") + (V2SF "ti") (V4SF "oi") + (DI "ti") (V2DI "oi")]) + +;; Extra suffix on some 64-bit insn names (to avoid collision with standard +;; names which we don't want to define). +(define_mode_attr V_suf64 [(V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (V2SF "") (V4SF "") + (DI "_neon") (V2DI "")]) + + +;; Scalars to be presented to scalar multiplication instructions +;; must satisfy the following constraints. +;; 1. If the mode specifies 16-bit elements, the scalar must be in D0-D7. +;; 2. If the mode specifies 32-bit elements, the scalar must be in D0-D15. + +;; This mode attribute is used to obtain the correct register constraints. + +(define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t") + (V8HI "x") (V4SI "t") (V4SF "t")]) + +;; Predicates used for setting neon_type + +(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") + (V4HI "false") (V8HI "false") + (V2SI "false") (V4SI "false") + (V2SF "true") (V4SF "true") + (DI "false") (V2DI "false")]) + +(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true") + (V4HI "true") (V8HI "true") + (V2SI "false") (V4SI "false") + (V2SF "false") (V4SF "false") + (DI "false") (V2DI "false")]) + + +(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false") + (V4HI "true") (V8HI "false") + (V2SI "true") (V4SI "false") + (V2SF "true") (V4SF "false") + (DI "true") (V2DI "false")]) + +(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") + (V4HI "4") (V8HI "8") + (V2SI "2") (V4SI "4") + (V2SF "2") (V4SF "4") + (DI "1") (V2DI "2") + (DF "1") (V2DF "2")]) + +;; Same as V_widen, but lower-case. +(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")]) + +;; Widen. Result is half the number of elements, but widened to double-width. +(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) + +;; Conditions to be used in extenddi patterns. +(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")]) +(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6") + (QI "&& arm_arch6")]) +(define_mode_attr qhs_extenddi_op [(SI "s_register_operand") + (HI "nonimmediate_operand") + (QI "nonimmediate_operand")]) +(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) + +;;---------------------------------------------------------------------------- +;; Code attributes +;;---------------------------------------------------------------------------- + +;; Assembler mnemonics for vqh_ops and vqhs_ops iterators. +(define_code_attr VQH_mnem [(plus "vadd") (smin "vmin") (smax "vmax") + (umin "vmin") (umax "vmax")]) + +;; Signs of above, where relevant. +(define_code_attr VQH_sign [(plus "i") (smin "s") (smax "s") (umin "u") + (umax "u")]) + +(define_code_attr cnb [(ltu "CC_C") (geu "CC")]) +(define_code_attr optab [(ltu "ltu") (geu "geu")]) + +;; Assembler mnemonics for signedness of widening operations. +(define_code_attr US [(sign_extend "s") (zero_extend "u")]) diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md new file mode 100644 index 000000000..7f13ae49b --- /dev/null +++ b/gcc/config/arm/iwmmxt.md @@ -0,0 +1,1332 @@ +;; ??? This file needs auditing for thumb2 +;; Patterns for the Intel Wireless MMX technology architecture. +;; Copyright (C) 2003, 2004, 2005, 2007, 2008, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Red Hat. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +(define_insn "iwmmxt_iordi3" + [(set (match_operand:DI 0 "register_operand" "=y,?&r,?&r") + (ior:DI (match_operand:DI 1 "register_operand" "%y,0,r") + (match_operand:DI 2 "register_operand" "y,r,r")))] + "TARGET_REALLY_IWMMXT" + "@ + wor%?\\t%0, %1, %2 + # + #" + [(set_attr "predicable" "yes") + (set_attr "length" "4,8,8")]) + +(define_insn "iwmmxt_xordi3" + [(set (match_operand:DI 0 "register_operand" "=y,?&r,?&r") + (xor:DI (match_operand:DI 1 "register_operand" "%y,0,r") + (match_operand:DI 2 "register_operand" "y,r,r")))] + "TARGET_REALLY_IWMMXT" + "@ + wxor%?\\t%0, %1, %2 + # + #" + [(set_attr "predicable" "yes") + (set_attr "length" "4,8,8")]) + +(define_insn "iwmmxt_anddi3" + [(set (match_operand:DI 0 "register_operand" "=y,?&r,?&r") + (and:DI (match_operand:DI 1 "register_operand" "%y,0,r") + (match_operand:DI 2 "register_operand" "y,r,r")))] + "TARGET_REALLY_IWMMXT" + "@ + wand%?\\t%0, %1, %2 + # + #" + [(set_attr "predicable" "yes") + (set_attr "length" "4,8,8")]) + +(define_insn "iwmmxt_nanddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (and:DI (match_operand:DI 1 "register_operand" "y") + (not:DI (match_operand:DI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wandn%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "*iwmmxt_arm_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, m,y,y,yr,y,yrUy") + (match_operand:DI 1 "di_operand" "rIK,mi,r,y,yr,y,yrUy,y"))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* +{ + switch (which_alternative) + { + default: + return output_move_double (operands); + case 0: + return \"#\"; + case 3: + return \"wmov%?\\t%0,%1\"; + case 4: + return \"tmcrr%?\\t%0,%Q1,%R1\"; + case 5: + return \"tmrrc%?\\t%Q0,%R0,%1\"; + case 6: + return \"wldrd%?\\t%0,%1\"; + case 7: + return \"wstrd%?\\t%1,%0\"; + } +}" + [(set_attr "length" "8,8,8,4,4,4,4,4") + (set_attr "type" "*,load1,store2,*,*,*,*,*") + (set_attr "pool_range" "*,1020,*,*,*,*,*,*") + (set_attr "neg_pool_range" "*,1012,*,*,*,*,*,*")] +) + +(define_insn "*iwmmxt_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,rk, m,z,r,?z,Uy,z") + (match_operand:SI 1 "general_operand" "rk, I,K,mi,rk,r,z,Uy,z, z"))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: return \"mov\\t%0, %1\"; + case 1: return \"mov\\t%0, %1\"; + case 2: return \"mvn\\t%0, #%B1\"; + case 3: return \"ldr\\t%0, %1\"; + case 4: return \"str\\t%1, %0\"; + case 5: return \"tmcr\\t%0, %1\"; + case 6: return \"tmrc\\t%0, %1\"; + case 7: return arm_output_load_gr (operands); + case 8: return \"wstrw\\t%1, %0\"; + default:return \"wstrw\\t%1, [sp, #-4]!\;wldrw\\t%0, [sp], #4\\t@move CG reg\"; + }" + [(set_attr "type" "*,*,*,load1,store1,*,*,load1,store1,*") + (set_attr "length" "*,*,*,*, *,*,*, 16, *,8") + (set_attr "pool_range" "*,*,*,4096, *,*,*,1024, *,*") + (set_attr "neg_pool_range" "*,*,*,4084, *,*,*, *, 1012,*") + ;; Note - the "predicable" attribute is not allowed to have alternatives. + ;; Since the wSTRw wCx instruction is not predicable, we cannot support + ;; predicating any of the alternatives in this template. Instead, + ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn. + (set_attr "predicable" "no") + ;; Also - we have to pretend that these insns clobber the condition code + ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize + ;; them. + (set_attr "conds" "clob")] +) + +;; Because iwmmxt_movsi_insn is not predicable, we provide the +;; cond_exec version explicitly, with appropriate constraints. + +(define_insn "*cond_iwmmxt_movsi_insn" + [(cond_exec + (match_operator 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") + (const_int 0)]) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r") + (match_operand:SI 1 "general_operand" "rI,K,mi,r,r,z")))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: return \"mov%?\\t%0, %1\"; + case 1: return \"mvn%?\\t%0, #%B1\"; + case 2: return \"ldr%?\\t%0, %1\"; + case 3: return \"str%?\\t%1, %0\"; + case 4: return \"tmcr%?\\t%0, %1\"; + default: return \"tmrc%?\\t%0, %1\"; + }" + [(set_attr "type" "*,*,load1,store1,*,*") + (set_attr "pool_range" "*,*,4096, *,*,*") + (set_attr "neg_pool_range" "*,*,4084, *,*,*")] +) + +(define_insn "mov_internal" + [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m") + (match_operand:VMMX 1 "general_operand" "y,y,mi,y,r,r,mi,r"))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: return \"wmov%?\\t%0, %1\"; + case 1: return \"wstrd%?\\t%1, %0\"; + case 2: return \"wldrd%?\\t%0, %1\"; + case 3: return \"tmrrc%?\\t%Q0, %R0, %1\"; + case 4: return \"tmcrr%?\\t%0, %Q1, %R1\"; + case 5: return \"#\"; + default: return output_move_double (operands); + }" + [(set_attr "predicable" "yes") + (set_attr "length" "4, 4, 4,4,4,8, 8,8") + (set_attr "type" "*,store1,load1,*,*,*,load1,store1") + (set_attr "pool_range" "*, *, 256,*,*,*, 256,*") + (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")]) + +;; Vector add/subtract + +(define_insn "*add3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (plus:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wadd%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ssaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddbss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddhss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ssaddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ss_plus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddwss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddbus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "usaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddhus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "usaddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (us_plus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddwus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "*sub3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (minus:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "sssubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubbss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubhss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "sssubv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubwss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubbus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ussubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubhus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ussubv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubwus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "*mulv4hi3_iwmmxt" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmulul%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_int 16))))] + "TARGET_REALLY_IWMMXT" + "wmulsm%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_int 16))))] + "TARGET_REALLY_IWMMXT" + "wmulum%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wmacs" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:V4HI 2 "register_operand" "y") + (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))] + "TARGET_REALLY_IWMMXT" + "wmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wmacsz" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))] + "TARGET_REALLY_IWMMXT" + "wmacsz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wmacu" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:V4HI 2 "register_operand" "y") + (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))] + "TARGET_REALLY_IWMMXT" + "wmacu%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wmacuz" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))] + "TARGET_REALLY_IWMMXT" + "wmacuz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +;; Same as xordi3, but don't show input operands so that we don't think +;; they are live. +(define_insn "iwmmxt_clrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(const_int 0)] UNSPEC_CLRDI))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes")]) + +;; Seems like cse likes to generate these, so we have to support them. + +(define_insn "*iwmmxt_clrv8qi" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (const_vector:V8QI [(const_int 0) (const_int 0) + (const_int 0) (const_int 0) + (const_int 0) (const_int 0) + (const_int 0) (const_int 0)]))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes")]) + +(define_insn "*iwmmxt_clrv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (const_vector:V4HI [(const_int 0) (const_int 0) + (const_int 0) (const_int 0)]))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes")]) + +(define_insn "*iwmmxt_clrv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (const_vector:V2SI [(const_int 0) (const_int 0)]))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes")]) + +;; Unsigned averages/sum of absolute differences + +(define_insn "iwmmxt_uavgrndv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ashiftrt:V8QI + (plus:V8QI (plus:V8QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) + (const_vector:V8QI [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)])) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wavg2br%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_uavgrndv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI + (plus:V4HI (plus:V4HI + (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")) + (const_vector:V4HI [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)])) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wavg2hr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + + +(define_insn "iwmmxt_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ashiftrt:V8QI (plus:V8QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wavg2b%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI (plus:V4HI + (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wavg2h%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_psadbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "psadbw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + + +;; Insert/extract/shuffle + +(define_insn "iwmmxt_tinsrb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI (match_operand:V8QI 1 "register_operand" "0") + (vec_duplicate:V8QI + (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_REALLY_IWMMXT" + "tinsrb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tinsrh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0") + (vec_duplicate:V4HI + (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_REALLY_IWMMXT" + "tinsrh%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tinsrw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI (match_operand:V2SI 1 "register_operand" "0") + (vec_duplicate:V2SI + (match_operand:SI 2 "nonimmediate_operand" "r")) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_REALLY_IWMMXT" + "tinsrw%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_textrmub" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_textrmsb" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmsb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_textrmuh" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmuh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_textrmsh" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmsh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +;; There are signed/unsigned variants of this instruction, but they are +;; pointless. +(define_insn "iwmmxt_textrmw" + [(set (match_operand:SI 0 "register_operand" "=r") + (vec_select:SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + "TARGET_REALLY_IWMMXT" + "textrmsw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wshufh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_WSHUFH))] + "TARGET_REALLY_IWMMXT" + "wshufh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +;; Mask-generating comparisons +;; +;; Note - you cannot use patterns like these here: +;; +;; (set (match:) (: (match:) (match:))) +;; +;; Because GCC will assume that the truth value (1 or 0) is installed +;; into the entire destination vector, (with the '1' going into the least +;; significant element of the vector). This is not how these instructions +;; behave. +;; +;; Unfortunately the current patterns are illegal. They are SET insns +;; without a SET in them. They work in most cases for ordinary code +;; generation, but there are circumstances where they can cause gcc to fail. +;; XXX - FIXME. + +(define_insn "eqv8qi3" + [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y") + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + VUNSPEC_WCMP_EQ)] + "TARGET_REALLY_IWMMXT" + "wcmpeqb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "eqv4hi3" + [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y") + (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] + VUNSPEC_WCMP_EQ)] + "TARGET_REALLY_IWMMXT" + "wcmpeqh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "eqv2si3" + [(unspec_volatile:V2SI [(match_operand:V2SI 0 "register_operand" "=y") + (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] + VUNSPEC_WCMP_EQ)] + "TARGET_REALLY_IWMMXT" + "wcmpeqw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "gtuv8qi3" + [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y") + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + VUNSPEC_WCMP_GTU)] + "TARGET_REALLY_IWMMXT" + "wcmpgtub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "gtuv4hi3" + [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y") + (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] + VUNSPEC_WCMP_GTU)] + "TARGET_REALLY_IWMMXT" + "wcmpgtuh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "gtuv2si3" + [(unspec_volatile [(match_operand:V2SI 0 "register_operand" "=y") + (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] + VUNSPEC_WCMP_GTU)] + "TARGET_REALLY_IWMMXT" + "wcmpgtuw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "gtv8qi3" + [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y") + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + VUNSPEC_WCMP_GT)] + "TARGET_REALLY_IWMMXT" + "wcmpgtsb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "gtv4hi3" + [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y") + (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] + VUNSPEC_WCMP_GT)] + "TARGET_REALLY_IWMMXT" + "wcmpgtsh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "gtv2si3" + [(unspec_volatile [(match_operand:V2SI 0 "register_operand" "=y") + (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] + VUNSPEC_WCMP_GT)] + "TARGET_REALLY_IWMMXT" + "wcmpgtsw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +;; Max/min insns + +(define_insn "*smax3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (smax:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmaxs%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "*umax3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (umax:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmaxu%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "*smin3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (smin:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmins%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "*umin3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (umin:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wminu%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +;; Pack/unpack insns. + +(define_insn "iwmmxt_wpackhss" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y")) + (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackhss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wpackwss" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y")) + (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackwss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wpackdss" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (ss_truncate:SI (match_operand:DI 1 "register_operand" "y")) + (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackdss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wpackhus" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y")) + (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackhus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wpackwus" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y")) + (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackwus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wpackdus" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (us_truncate:SI (match_operand:DI 1 "register_operand" "y")) + (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackdus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + + +(define_insn "iwmmxt_wunpckihb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (const_int 85)))] + "TARGET_REALLY_IWMMXT" + "wunpckihb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckihh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_REALLY_IWMMXT" + "wunpckihh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckihw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wunpckihw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckilb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (const_int 85)))] + "TARGET_REALLY_IWMMXT" + "wunpckilb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckilh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_REALLY_IWMMXT" + "wunpckilh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckilw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wunpckilw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckehub" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckehub%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckehuh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (zero_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckehuh%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckehuw" + [(set (match_operand:DI 0 "register_operand" "=y") + (zero_extend:DI + (vec_select:SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 1)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckehuw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckehsb" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (sign_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckehsb%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckehsh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckehsh%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckehsw" + [(set (match_operand:DI 0 "register_operand" "=y") + (sign_extend:DI + (vec_select:SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 1)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckehsw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckelub" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckelub%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckeluh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (zero_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckeluh%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckeluw" + [(set (match_operand:DI 0 "register_operand" "=y") + (zero_extend:DI + (vec_select:SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 0)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckeluw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckelsb" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (sign_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckelsb%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckelsh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckelsh%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wunpckelsw" + [(set (match_operand:DI 0 "register_operand" "=y") + (sign_extend:DI + (vec_select:SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 0)]))))] + "TARGET_REALLY_IWMMXT" + "wunpckelsw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +;; Shifts + +(define_insn "rorv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (rotatert:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:SI 2 "register_operand" "z")))] + "TARGET_REALLY_IWMMXT" + "wrorhg%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "rorv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (rotatert:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:SI 2 "register_operand" "z")))] + "TARGET_REALLY_IWMMXT" + "wrorwg%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "rordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (rotatert:DI (match_operand:DI 1 "register_operand" "y") + (match_operand:SI 2 "register_operand" "z")))] + "TARGET_REALLY_IWMMXT" + "wrordg%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashr3_iwmmxt" + [(set (match_operand:VSHFT 0 "register_operand" "=y") + (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y") + (match_operand:SI 2 "register_operand" "z")))] + "TARGET_REALLY_IWMMXT" + "wsrag%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "lshr3_iwmmxt" + [(set (match_operand:VSHFT 0 "register_operand" "=y") + (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y") + (match_operand:SI 2 "register_operand" "z")))] + "TARGET_REALLY_IWMMXT" + "wsrlg%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashl3_iwmmxt" + [(set (match_operand:VSHFT 0 "register_operand" "=y") + (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y") + (match_operand:SI 2 "register_operand" "z")))] + "TARGET_REALLY_IWMMXT" + "wsllg%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "rorv4hi3_di" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (rotatert:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wrorh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "rorv2si3_di" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (rotatert:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wrorw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "rordi3_di" + [(set (match_operand:DI 0 "register_operand" "=y") + (rotatert:DI (match_operand:DI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wrord%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashrv4hi3_di" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsrah%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashrv2si3_di" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsraw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashrdi3_di" + [(set (match_operand:DI 0 "register_operand" "=y") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsrad%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "lshrv4hi3_di" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsrlh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "lshrv2si3_di" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsrlw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "lshrdi3_di" + [(set (match_operand:DI 0 "register_operand" "=y") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsrld%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashlv4hi3_di" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashift:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsllh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashlv2si3_di" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashift:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsllw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "ashldi3_di" + [(set (match_operand:DI 0 "register_operand" "=y") + (ashift:DI (match_operand:DI 1 "register_operand" "y") + (match_operand:DI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wslld%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wmadds" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMADDS))] + "TARGET_REALLY_IWMMXT" + "wmadds%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wmaddu" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMADDU))] + "TARGET_REALLY_IWMMXT" + "wmaddu%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmia" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (match_operand:SI 2 "register_operand" "r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" "r")))))] + "TARGET_REALLY_IWMMXT" + "tmia%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmiaph" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI (sign_extend:DI + (truncate:HI (match_operand:SI 2 "register_operand" "r"))) + (sign_extend:DI + (truncate:HI (match_operand:SI 3 "register_operand" "r")))) + (mult:DI (sign_extend:DI + (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16)))) + (sign_extend:DI + (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))] + "TARGET_REALLY_IWMMXT" + "tmiaph%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmiabb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI (match_operand:SI 2 "register_operand" "r"))) + (sign_extend:DI + (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))] + "TARGET_REALLY_IWMMXT" + "tmiabb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmiatb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI (ashiftrt:SI + (match_operand:SI 2 "register_operand" "r") + (const_int 16)))) + (sign_extend:DI + (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))] + "TARGET_REALLY_IWMMXT" + "tmiatb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmiabt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI (match_operand:SI 2 "register_operand" "r"))) + (sign_extend:DI + (truncate:HI (ashiftrt:SI + (match_operand:SI 3 "register_operand" "r") + (const_int 16)))))))] + "TARGET_REALLY_IWMMXT" + "tmiabt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmiatt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI (ashiftrt:SI + (match_operand:SI 2 "register_operand" "r") + (const_int 16)))) + (sign_extend:DI + (truncate:HI (ashiftrt:SI + (match_operand:SI 3 "register_operand" "r") + (const_int 16)))))))] + "TARGET_REALLY_IWMMXT" + "tmiatt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tbcstqi" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_duplicate:V8QI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_REALLY_IWMMXT" + "tbcstb%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tbcsthi" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_duplicate:V4HI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_REALLY_IWMMXT" + "tbcsth%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tbcstsi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_duplicate:V2SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_REALLY_IWMMXT" + "tbcstw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))] + "TARGET_REALLY_IWMMXT" + "tmovmskb%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmovmskh" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))] + "TARGET_REALLY_IWMMXT" + "tmovmskh%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmovmskw" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))] + "TARGET_REALLY_IWMMXT" + "tmovmskw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_waccb" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))] + "TARGET_REALLY_IWMMXT" + "waccb%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wacch" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))] + "TARGET_REALLY_IWMMXT" + "wacch%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_waccw" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))] + "TARGET_REALLY_IWMMXT" + "waccw%?\\t%0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_walign" + [(set (match_operand:V8QI 0 "register_operand" "=y,y") + (subreg:V8QI (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y,y") + (match_operand:V8QI 2 "register_operand" "y,y")) 0) + (mult:SI + (match_operand:SI 3 "nonmemory_operand" "i,z") + (const_int 8))) 0))] + "TARGET_REALLY_IWMMXT" + "@ + waligni%?\\t%0, %1, %2, %3 + walignr%U3%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmrc" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")] + VUNSPEC_TMRC))] + "TARGET_REALLY_IWMMXT" + "tmrc%?\\t%0, %w1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_tmcr" + [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i") + (match_operand:SI 1 "register_operand" "r")] + VUNSPEC_TMCR)] + "TARGET_REALLY_IWMMXT" + "tmcr%?\\t%w0, %1" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wsadb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSAD))] + "TARGET_REALLY_IWMMXT" + "wsadb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wsadh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSAD))] + "TARGET_REALLY_IWMMXT" + "wsadh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wsadbz" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))] + "TARGET_REALLY_IWMMXT" + "wsadbz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "iwmmxt_wsadhz" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))] + "TARGET_REALLY_IWMMXT" + "wsadhz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")]) + diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md new file mode 100644 index 000000000..5db4a3269 --- /dev/null +++ b/gcc/config/arm/ldmstm.md @@ -0,0 +1,1191 @@ +/* ARM ldm/stm instruction patterns. This file was automatically generated + using arm-ldmstm.ml. Please do not edit manually. + + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +(define_insn "*ldm4_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 5 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_ldm4_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 5 "s_register_operand" "l"))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4")]) + +(define_insn "*ldm4_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 5))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_ldm4_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&l") + (plus:SI (match_dup 5) (const_int 16))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 5))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5" + "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4")]) + +(define_insn "*stm4_ia" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 5 "s_register_operand" "rk")) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(ia%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (mem:SI (match_dup 5)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "stm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_stm4_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&l") + (plus:SI (match_dup 5) (const_int 16))) + (set (mem:SI (match_dup 5)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5" + "stm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4")]) + +(define_insn "*ldm4_ib" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 16))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(ib%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_ib_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 16))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "ldm%(ib%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_ib" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int 4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 16))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(ib%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_ib_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 16))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "stm%(ib%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_da" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (match_dup 5)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(da%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_da_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (match_dup 5)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "ldm%(da%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_da" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -12))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 5)) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(da%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_da_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (mem:SI (plus:SI (match_dup 5) (const_int -12))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 5)) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "stm%(da%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_db" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") + (const_int -16)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -12)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(db%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_db_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -16)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -12)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 4 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "ldm%(db%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_db" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -16))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -12))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(db%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_db_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (mem:SI (plus:SI (match_dup 5) (const_int -16))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -12))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 3 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 4 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "stm%(db%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 6 "memory_operand" "")) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 7 "memory_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 4, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (parallel + [(set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 6 "memory_operand" "")) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 7 "memory_operand" ""))])] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 4, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 9 "const_int_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 10 "const_int_operand" "")) + (set (match_operand:SI 6 "memory_operand" "") + (match_dup 2)) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 11 "const_int_operand" "")) + (set (match_operand:SI 7 "memory_operand" "") + (match_dup 3))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 4)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 9 "const_int_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 10 "const_int_operand" "")) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 11 "const_int_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 6 "memory_operand" "") + (match_dup 2)) + (set (match_operand:SI 7 "memory_operand" "") + (match_dup 3))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 4)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 4 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" "")) + (set (match_operand:SI 6 "memory_operand" "") + (match_operand:SI 2 "s_register_operand" "")) + (set (match_operand:SI 7 "memory_operand" "") + (match_operand:SI 3 "s_register_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_stm_seq (operands, 4)) + DONE; + else + FAIL; +}) + +(define_insn "*ldm3_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 4 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_ldm3_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 4 "s_register_operand" "l"))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3")]) + +(define_insn "*ldm3_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 4))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_ldm3_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&l") + (plus:SI (match_dup 4) (const_int 12))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 4))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3")]) + +(define_insn "*stm3_ia" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 4 "s_register_operand" "rk")) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(ia%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (mem:SI (match_dup 4)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_stm3_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&l") + (plus:SI (match_dup 4) (const_int 12))) + (set (mem:SI (match_dup 4)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4" + "stm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3")]) + +(define_insn "*ldm3_ib" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 12))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(ib%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_ib_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 12))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(ib%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_ib" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int 4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 12))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(ib%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_ib_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 12))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(ib%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_da" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (match_dup 4)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(da%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_da_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (match_dup 4)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(da%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_da" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -8))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 4)) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(da%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_da_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (mem:SI (plus:SI (match_dup 4) (const_int -8))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 4)) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(da%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_db" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(db%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_db_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(db%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_db" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -12))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(db%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_db_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (mem:SI (plus:SI (match_dup 4) (const_int -12))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -8))) + (match_operand:SI 2 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 3 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(db%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 3, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" ""))])] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 3, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 7 "const_int_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 2))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 3)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 7 "const_int_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 2))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 3)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_operand:SI 2 "s_register_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_stm_seq (operands, 3)) + DONE; + else + FAIL; +}) + +(define_insn "*ldm2_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 3 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "ldm%(ia%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_ldm2_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_operand:SI 3 "s_register_operand" "l"))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2" + "ldm%(ia%)\t%3, {%1, %2}" + [(set_attr "type" "load2")]) + +(define_insn "*ldm2_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 3))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_ldm2_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&l") + (plus:SI (match_dup 3) (const_int 8))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (match_dup 3))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "load2")]) + +(define_insn "*stm2_ia" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 3 "s_register_operand" "rk")) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "stm%(ia%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (mem:SI (match_dup 3)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*thumb_stm2_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&l") + (plus:SI (match_dup 3) (const_int 8))) + (set (mem:SI (match_dup 3)) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3" + "stm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "store2")]) + +(define_insn "*ldm2_ib" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 8))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "ldm%(ib%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_ib_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 8))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(ib%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_ib" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int 4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "stm%(ib%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_ib_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(ib%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_da" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") + (const_int -4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (match_dup 3)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "ldm%(da%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_da_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -4)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (match_dup 3)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(da%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_da" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 3)) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "stm%(da%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_da_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (match_dup 3)) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(da%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_db" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "ldm%(db%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_db_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (match_operand:SI 1 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(db%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_db" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -8))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "stm%(db%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_db_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (mem:SI (plus:SI (match_dup 3) (const_int -8))) + (match_operand:SI 1 "arm_hard_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) + (match_operand:SI 2 "arm_hard_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(db%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 2, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 2)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 2)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_stm_seq (operands, 2)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 4 "s_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand:SI 6 "s_register_operand" "") + (match_operand:SI 7 "s_register_operand" "")])) + (clobber (reg:CC CC_REGNUM))])] + "(((operands[6] == operands[0] && operands[7] == operands[1]) + || (operands[7] == operands[0] && operands[6] == operands[1])) + && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))" + [(parallel + [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)])) + (clobber (reg:CC CC_REGNUM))])] +{ + if (!gen_ldm_seq (operands, 2, true)) + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "s_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand:SI 6 "s_register_operand" "") + (match_operand:SI 7 "s_register_operand" "")]))] + "(((operands[6] == operands[0] && operands[7] == operands[1]) + || (operands[7] == operands[0] && operands[6] == operands[1])) + && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))" + [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] +{ + if (!gen_ldm_seq (operands, 2, true)) + FAIL; +}) + diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm new file mode 100644 index 000000000..2e76c01df --- /dev/null +++ b/gcc/config/arm/lib1funcs.asm @@ -0,0 +1,1829 @@ +@ libgcc routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008, + 2009, 2010 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif /* __ELF__ and __linux__ */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ +/* ------------------------------------------------------------------------ */ + +/* We need to know what prefix to add to function names. */ + +#ifndef __USER_LABEL_PREFIX__ +#error __USER_LABEL_PREFIX__ not defined +#endif + +/* ANSI concatenation macros. */ + +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ + +#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) + +#ifdef __ELF__ +#ifdef __thumb__ +#define __PLT__ /* Not supported in Thumb assembler (for now). */ +#elif defined __vxworks && !defined __PIC__ +#define __PLT__ /* Not supported by the kernel loader. */ +#else +#define __PLT__ (PLT) +#endif +#define TYPE(x) .type SYM(x),function +#define SIZE(x) .size SYM(x), . - SYM(x) +#define LSYM(x) .x +#else +#define __PLT__ +#define TYPE(x) +#define SIZE(x) +#define LSYM(x) x +#endif + +/* Function end macros. Variants for interworking. */ + +#if defined(__ARM_ARCH_2__) +# define __ARM_ARCH__ 2 +#endif + +#if defined(__ARM_ARCH_3__) +# define __ARM_ARCH__ 3 +#endif + +#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ + || defined(__ARM_ARCH_4T__) +/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with + long multiply instructions. That includes v3M. */ +# define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH__ 5 +#endif + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_6M__) +# define __ARM_ARCH__ 6 +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH__ 7 +#endif + +#ifndef __ARM_ARCH__ +#error Unable to determine architecture. +#endif + +/* There are times when we might prefer Thumb1 code even if ARM code is + permitted, for example, the code might be smaller, or there might be + interworking problems with switching to ARM state if interworking is + disabled. */ +#if (defined(__thumb__) \ + && !defined(__thumb2__) \ + && (!defined(__THUMB_INTERWORK__) \ + || defined (__OPTIMIZE_SIZE__) \ + || defined(__ARM_ARCH_6M__))) +# define __prefer_thumb__ +#endif + +/* How to return from a function call depends on the architecture variant. */ + +#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) + +# define RET bx lr +# define RETc(x) bx##x lr + +/* Special precautions for interworking on armv4t. */ +# if (__ARM_ARCH__ == 4) + +/* Always use bx, not ldr pc. */ +# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) +# define __INTERWORKING__ +# endif /* __THUMB__ || __THUMB_INTERWORK__ */ + +/* Include thumb stub before arm mode code. */ +# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) +# define __INTERWORKING_STUBS__ +# endif /* __thumb__ && !__THUMB_INTERWORK__ */ + +#endif /* __ARM_ARCH == 4 */ + +#else + +# define RET mov pc, lr +# define RETc(x) mov##x pc, lr + +#endif + +.macro cfi_pop advance, reg, cfa_offset +#ifdef __ELF__ + .pushsection .debug_frame + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte \advance + .byte (0xc0 | \reg) /* DW_CFA_restore */ + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 \cfa_offset + .popsection +#endif +.endm +.macro cfi_push advance, reg, offset, cfa_offset +#ifdef __ELF__ + .pushsection .debug_frame + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte \advance + .byte (0x80 | \reg) /* DW_CFA_offset */ + .uleb128 (\offset / -4) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 \cfa_offset + .popsection +#endif +.endm +.macro cfi_start start_label, end_label +#ifdef __ELF__ + .pushsection .debug_frame +LSYM(Lstart_frame): + .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE +LSYM(Lstart_cie): + .4byte 0xffffffff @ CIE Identifier Tag + .byte 0x1 @ CIE Version + .ascii "\0" @ CIE Augmentation + .uleb128 0x1 @ CIE Code Alignment Factor + .sleb128 -4 @ CIE Data Alignment Factor + .byte 0xe @ CIE RA Column + .byte 0xc @ DW_CFA_def_cfa + .uleb128 0xd + .uleb128 0x0 + + .align 2 +LSYM(Lend_cie): + .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length +LSYM(Lstart_fde): + .4byte LSYM(Lstart_frame) @ FDE CIE offset + .4byte \start_label @ FDE initial location + .4byte \end_label-\start_label @ FDE address range + .popsection +#endif +.endm +.macro cfi_end end_label +#ifdef __ELF__ + .pushsection .debug_frame + .align 2 +LSYM(Lend_fde): + .popsection +\end_label: +#endif +.endm + +/* Don't pass dirn, it's there just to get token pasting right. */ + +.macro RETLDM regs=, cond=, unwind=, dirn=ia +#if defined (__INTERWORKING__) + .ifc "\regs","" + ldr\cond lr, [sp], #8 + .else +# if defined(__thumb2__) + pop\cond {\regs, lr} +# else + ldm\cond\dirn sp!, {\regs, lr} +# endif + .endif + .ifnc "\unwind", "" + /* Mark LR as restored. */ +97: cfi_pop 97b - \unwind, 0xe, 0x0 + .endif + bx\cond lr +#else + /* Caller is responsible for providing IT instruction. */ + .ifc "\regs","" + ldr\cond pc, [sp], #8 + .else +# if defined(__thumb2__) + pop\cond {\regs, pc} +# else + ldm\cond\dirn sp!, {\regs, pc} +# endif + .endif +#endif +.endm + +/* The Unified assembly syntax allows the same code to be assembled for both + ARM and Thumb-2. However this is only supported by recent gas, so define + a set of macros to allow ARM code on older assemblers. */ +#if defined(__thumb2__) +.macro do_it cond, suffix="" + it\suffix \cond +.endm +.macro shift1 op, arg0, arg1, arg2 + \op \arg0, \arg1, \arg2 +.endm +#define do_push push +#define do_pop pop +#define COND(op1, op2, cond) op1 ## op2 ## cond +/* Perform an arithmetic operation with a variable shift operand. This + requires two instructions and a scratch register on Thumb-2. */ +.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \shiftop \tmp, \src2, \shiftreg + \name \dest, \src1, \tmp +.endm +#else +.macro do_it cond, suffix="" +.endm +.macro shift1 op, arg0, arg1, arg2 + mov \arg0, \arg1, \op \arg2 +.endm +#define do_push stmfd sp!, +#define do_pop ldmfd sp!, +#define COND(op1, op2, cond) op1 ## cond ## op2 +.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \name \dest, \src1, \src2, \shiftop \shiftreg +.endm +#endif + +#ifdef __ARM_EABI__ +.macro ARM_LDIV0 name signed + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM (__aeabi_idiv0) __PLT__ +.endm +#else +.macro ARM_LDIV0 name signed + str lr, [sp, #-8]! +98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 + bl SYM (__div0) __PLT__ + mov r0, #0 @ About as wrong as it could be. + RETLDM unwind=98b +.endm +#endif + + +#ifdef __ARM_EABI__ +.macro THUMB_LDIV0 name signed +#if defined(__ARM_ARCH_6M__) + .ifc \signed, unsigned + cmp r0, #0 + beq 1f + mov r0, #0 + mvn r0, r0 @ 0xffffffff +1: + .else + cmp r0, #0 + beq 2f + blt 3f + mov r0, #0 + mvn r0, r0 + lsr r0, r0, #1 @ 0x7fffffff + b 2f +3: mov r0, #0x80 + lsl r0, r0, #24 @ 0x80000000 +2: + .endif + push {r0, r1, r2} + ldr r0, 4f + adr r1, 4f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +4: + .word __aeabi_idiv0 - 4b +#elif defined(__thumb2__) + .syntax unified + .ifc \signed, unsigned + cbz r0, 1f + mov r0, #0xffffffff +1: + .else + cmp r0, #0 + do_it gt + movgt r0, #0x7fffffff + do_it lt + movlt r0, #0x80000000 + .endif + b.w SYM(__aeabi_idiv0) __PLT__ +#else + .align 2 + bx pc + nop + .arm + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM(__aeabi_idiv0) __PLT__ + .thumb +#endif +.endm +#else +.macro THUMB_LDIV0 name signed + push { r1, lr } +98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 + bl SYM (__div0) + mov r0, #0 @ About as wrong as it could be. +#if defined (__INTERWORKING__) + pop { r1, r2 } + bx r2 +#else + pop { r1, pc } +#endif +.endm +#endif + +.macro FUNC_END name + SIZE (__\name) +.endm + +.macro DIV_FUNC_END name signed + cfi_start __\name, LSYM(Lend_div0) +LSYM(Ldiv0): +#ifdef __thumb__ + THUMB_LDIV0 \name \signed +#else + ARM_LDIV0 \name \signed +#endif + cfi_end LSYM(Lend_div0) + FUNC_END \name +.endm + +.macro THUMB_FUNC_START name + .globl SYM (\name) + TYPE (\name) + .thumb_func +SYM (\name): +.endm + +/* Function start macros. Variants for ARM and Thumb. */ + +#ifdef __thumb__ +#define THUMB_FUNC .thumb_func +#define THUMB_CODE .force_thumb +# if defined(__thumb2__) +#define THUMB_SYNTAX .syntax divided +# else +#define THUMB_SYNTAX +# endif +#else +#define THUMB_FUNC +#define THUMB_CODE +#define THUMB_SYNTAX +#endif + +.macro FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + THUMB_CODE + THUMB_FUNC + THUMB_SYNTAX +SYM (__\name): +.endm + +/* Special function that will always be coded in ARM assembly, even if + in Thumb-only compilation. */ + +#if defined(__thumb2__) + +/* For Thumb-2 we build everything in thumb mode. */ +.macro ARM_FUNC_START name + FUNC_START \name + .syntax unified +.endm +#define EQUIV .thumb_set +.macro ARM_CALL name + bl __\name +.endm + +#elif defined(__INTERWORKING_STUBS__) + +.macro ARM_FUNC_START name + FUNC_START \name + bx pc + nop + .arm +/* A hook to tell gdb that we've switched to ARM mode. Also used to call + directly from other local arm routines. */ +_L__\name: +.endm +#define EQUIV .thumb_set +/* Branch directly to a function declared with ARM_FUNC_START. + Must be called in arm mode. */ +.macro ARM_CALL name + bl _L__\name +.endm + +#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ + +#ifdef __ARM_ARCH_6M__ +#define EQUIV .thumb_set +#else +.macro ARM_FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + .arm +SYM (__\name): +.endm +#define EQUIV .set +.macro ARM_CALL name + bl __\name +.endm +#endif + +#endif + +.macro FUNC_ALIAS new old + .globl SYM (__\new) +#if defined (__thumb__) + .thumb_set SYM (__\new), SYM (__\old) +#else + .set SYM (__\new), SYM (__\old) +#endif +.endm + +#ifndef __ARM_ARCH_6M__ +.macro ARM_FUNC_ALIAS new old + .globl SYM (__\new) + EQUIV SYM (__\new), SYM (__\old) +#if defined(__INTERWORKING_STUBS__) + .set SYM (_L__\new), SYM (_L__\old) +#endif +.endm +#endif + +#ifdef __ARMEB__ +#define xxh r0 +#define xxl r1 +#define yyh r2 +#define yyl r3 +#else +#define xxh r1 +#define xxl r0 +#define yyh r3 +#define yyl r2 +#endif + +#ifdef __ARM_EABI__ +.macro WEAK name + .weak SYM (__\name) +.endm +#endif + +#ifdef __thumb__ +/* Register aliases. */ + +work .req r4 @ XXXX is this safe ? +dividend .req r0 +divisor .req r1 +overdone .req r2 +result .req r2 +curbit .req r3 +#endif +#if 0 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 +#endif + +/* ------------------------------------------------------------------------ */ +/* Bodies of the division and modulo routines. */ +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + +#if defined (__thumb2__) + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsb \curbit, \curbit, #31 + adr \result, 1f + add \curbit, \result, \curbit, lsl #4 + mov \result, #0 + mov pc, \curbit +.p2align 3 +1: + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp.w \dividend, \divisor, lsl #shift + nop.n + adc.w \result, \result, \result + it cs + subcs.w \dividend, \dividend, \divisor, lsl #shift + .endr +#else + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsbs \curbit, \curbit, #31 + addne \curbit, \curbit, \curbit, lsl #1 + mov \result, #0 + addne pc, pc, \curbit, lsl #2 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + adc \result, \result, \result + subcs \dividend, \dividend, \divisor, lsl #shift + .endr +#endif + +#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ +#if __ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else /* __ARM_ARCH__ < 5 */ + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4-bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif /* __ARM_ARCH__ < 5 */ + + @ Division loop +1: cmp \dividend, \divisor + do_it hs, t + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + do_it ne, t + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV2_ORDER divisor, order + +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + rsbs \order, \order, #31 + addne pc, pc, \order, lsl #3 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + subcs \dividend, \dividend, \divisor, lsl #shift + .endr + +#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else /* __ARM_ARCH__ < 5 */ + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif /* __ARM_ARCH__ < 5 */ + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: + +#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + +.endm +/* ------------------------------------------------------------------------ */ +.macro THUMB_DIV_MOD_BODY modulo + @ Load the constant 0x10000000 into our work register. + mov work, #1 + lsl work, #28 +LSYM(Loop1): + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + bhs LSYM(Lbignum) + cmp divisor, dividend + bhs LSYM(Lbignum) + lsl divisor, #4 + lsl curbit, #4 + b LSYM(Loop1) +LSYM(Lbignum): + @ Set work to 0x80000000 + lsl work, #3 +LSYM(Loop2): + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, work + bhs LSYM(Loop3) + cmp divisor, dividend + bhs LSYM(Loop3) + lsl divisor, #1 + lsl curbit, #1 + b LSYM(Loop2) +LSYM(Loop3): + @ Test for possible subtractions ... + .if \modulo + @ ... On the final pass, this may subtract too much from the dividend, + @ so keep track of which subtractions are done, we can fix them up + @ afterwards. + mov overdone, #0 + cmp dividend, divisor + blo LSYM(Lover1) + sub dividend, dividend, divisor +LSYM(Lover1): + lsr work, divisor, #1 + cmp dividend, work + blo LSYM(Lover2) + sub dividend, dividend, work + mov ip, curbit + mov work, #1 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover2): + lsr work, divisor, #2 + cmp dividend, work + blo LSYM(Lover3) + sub dividend, dividend, work + mov ip, curbit + mov work, #2 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover3): + lsr work, divisor, #3 + cmp dividend, work + blo LSYM(Lover4) + sub dividend, dividend, work + mov ip, curbit + mov work, #3 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover4): + mov ip, curbit + .else + @ ... and note which bits are done in the result. On the final pass, + @ this may subtract too much from the dividend, but the result will be ok, + @ since the "bit" will have been shifted out at the bottom. + cmp dividend, divisor + blo LSYM(Lover1) + sub dividend, dividend, divisor + orr result, result, curbit +LSYM(Lover1): + lsr work, divisor, #1 + cmp dividend, work + blo LSYM(Lover2) + sub dividend, dividend, work + lsr work, curbit, #1 + orr result, work +LSYM(Lover2): + lsr work, divisor, #2 + cmp dividend, work + blo LSYM(Lover3) + sub dividend, dividend, work + lsr work, curbit, #2 + orr result, work +LSYM(Lover3): + lsr work, divisor, #3 + cmp dividend, work + blo LSYM(Lover4) + sub dividend, dividend, work + lsr work, curbit, #3 + orr result, work +LSYM(Lover4): + .endif + + cmp dividend, #0 @ Early termination? + beq LSYM(Lover5) + lsr curbit, #4 @ No, any more bits to do? + beq LSYM(Lover5) + lsr divisor, #4 + b LSYM(Loop3) +LSYM(Lover5): + .if \modulo + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + mov work, #0xe + lsl work, #28 + and overdone, work + beq LSYM(Lgot_result) + + @ If we terminated early, because dividend became zero, then the + @ bit in ip will not be in the bottom nibble, and we should not + @ perform the additions below. We must test for this though + @ (rather relying upon the TSTs to prevent the additions) since + @ the bit in ip could be in the top two bits which might then match + @ with one of the smaller RORs. + mov curbit, ip + mov work, #0x7 + tst curbit, work + beq LSYM(Lgot_result) + + mov curbit, ip + mov work, #3 + ror curbit, work + tst overdone, curbit + beq LSYM(Lover6) + lsr work, divisor, #3 + add dividend, work +LSYM(Lover6): + mov curbit, ip + mov work, #2 + ror curbit, work + tst overdone, curbit + beq LSYM(Lover7) + lsr work, divisor, #2 + add dividend, work +LSYM(Lover7): + mov curbit, ip + mov work, #1 + ror curbit, work + tst overdone, curbit + beq LSYM(Lgot_result) + lsr work, divisor, #1 + add dividend, work + .endif +LSYM(Lgot_result): +.endm +/* ------------------------------------------------------------------------ */ +/* Start of the Real Functions */ +/* ------------------------------------------------------------------------ */ +#ifdef L_udivsi3 + +#if defined(__prefer_thumb__) + + FUNC_START udivsi3 + FUNC_ALIAS aeabi_uidiv udivsi3 + + cmp divisor, #0 + beq LSYM(Ldiv0) +LSYM(udivsi3_skip_div0_test): + mov curbit, #1 + mov result, #0 + + push { work } + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 0 + + mov r0, result + pop { work } + RET + +#else /* ARM version/Thumb-2. */ + + ARM_FUNC_START udivsi3 + ARM_FUNC_ALIAS aeabi_uidiv udivsi3 + + /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily + check for division-by-zero a second time. */ +LSYM(udivsi3_skip_div0_test): + subs r2, r1, #1 + do_it eq + RETc(eq) + bcc LSYM(Ldiv0) + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + RET + +11: do_it eq, e + moveq r0, #1 + movne r0, #0 + RET + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + RET + +#endif /* ARM version */ + + DIV_FUNC_END udivsi3 unsigned + +#if defined(__prefer_thumb__) +FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) + push {r0, r1, lr} + bl LSYM(udivsi3_skip_div0_test) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 +#else +ARM_FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(udivsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +#endif + FUNC_END aeabi_uidivmod + +#endif /* L_udivsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_umodsi3 + + FUNC_START umodsi3 + +#ifdef __thumb__ + + cmp divisor, #0 + beq LSYM(Ldiv0) + mov curbit, #1 + cmp dividend, divisor + bhs LSYM(Lover10) + RET + +LSYM(Lover10): + push { work } + + THUMB_DIV_MOD_BODY 1 + + pop { work } + RET + +#else /* ARM version. */ + + subs r2, r1, #1 @ compare divisor with 1 + bcc LSYM(Ldiv0) + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + RETc(ls) + + ARM_MOD_BODY r0, r1, r2, r3 + + RET + +#endif /* ARM version. */ + + DIV_FUNC_END umodsi3 unsigned + +#endif /* L_umodsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_divsi3 + +#if defined(__prefer_thumb__) + + FUNC_START divsi3 + FUNC_ALIAS aeabi_idiv divsi3 + + cmp divisor, #0 + beq LSYM(Ldiv0) +LSYM(divsi3_skip_div0_test): + push { work } + mov work, dividend + eor work, divisor @ Save the sign of the result. + mov ip, work + mov curbit, #1 + mov result, #0 + cmp divisor, #0 + bpl LSYM(Lover10) + neg divisor, divisor @ Loops below use unsigned. +LSYM(Lover10): + cmp dividend, #0 + bpl LSYM(Lover11) + neg dividend, dividend +LSYM(Lover11): + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 0 + + mov r0, result + mov work, ip + cmp work, #0 + bpl LSYM(Lover12) + neg r0, r0 +LSYM(Lover12): + pop { work } + RET + +#else /* ARM/Thumb-2 version. */ + + ARM_FUNC_START divsi3 + ARM_FUNC_ALIAS aeabi_idiv divsi3 + + cmp r1, #0 + beq LSYM(Ldiv0) +LSYM(divsi3_skip_div0_test): + eor ip, r0, r1 @ save the sign of the result. + do_it mi + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + do_it mi + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + do_it mi + rsbmi r0, r0, #0 + RET + +10: teq ip, r0 @ same sign ? + do_it mi + rsbmi r0, r0, #0 + RET + +11: do_it lo + movlo r0, #0 + do_it eq,t + moveq r0, ip, asr #31 + orreq r0, r0, #1 + RET + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + do_it mi + rsbmi r0, r0, #0 + RET + +#endif /* ARM version */ + + DIV_FUNC_END divsi3 signed + +#if defined(__prefer_thumb__) +FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) + push {r0, r1, lr} + bl LSYM(divsi3_skip_div0_test) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 +#else +ARM_FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(divsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +#endif + FUNC_END aeabi_idivmod + +#endif /* L_divsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_modsi3 + + FUNC_START modsi3 + +#ifdef __thumb__ + + mov curbit, #1 + cmp divisor, #0 + beq LSYM(Ldiv0) + bpl LSYM(Lover10) + neg divisor, divisor @ Loops below use unsigned. +LSYM(Lover10): + push { work } + @ Need to save the sign of the dividend, unfortunately, we need + @ work later on. Must do this after saving the original value of + @ the work register, because we will pop this value off first. + push { dividend } + cmp dividend, #0 + bpl LSYM(Lover11) + neg dividend, dividend +LSYM(Lover11): + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 1 + + pop { work } + cmp work, #0 + bpl LSYM(Lover12) + neg dividend, dividend +LSYM(Lover12): + pop { work } + RET + +#else /* ARM version. */ + + cmp r1, #0 + beq LSYM(Ldiv0) + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + RET + +#endif /* ARM version */ + + DIV_FUNC_END modsi3 signed + +#endif /* L_modsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_dvmd_tls + +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + FUNC_START aeabi_idiv0 + FUNC_START aeabi_ldiv0 + RET + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else + FUNC_START div0 + RET + FUNC_END div0 +#endif + +#endif /* L_divmodsi_tools */ +/* ------------------------------------------------------------------------ */ +#ifdef L_dvmd_lnx +@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls + +/* Constant taken from . */ +#define SIGFPE 8 + +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + ARM_FUNC_START aeabi_idiv0 + ARM_FUNC_START aeabi_ldiv0 +#else + ARM_FUNC_START div0 +#endif + + do_push {r1, lr} + mov r0, #SIGFPE + bl SYM(raise) __PLT__ + RETLDM r1 + +#ifdef __ARM_EABI__ + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else + FUNC_END div0 +#endif + +#endif /* L_dvmd_lnx */ +#ifdef L_clear_cache +#if defined __ARM_EABI__ && defined __linux__ +@ EABI GNU/Linux call to cacheflush syscall. + ARM_FUNC_START clear_cache + do_push {r7} +#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) + movw r7, #2 + movt r7, #0xf +#else + mov r7, #0xf0000 + add r7, r7, #2 +#endif + mov r2, #0 + swi 0 + do_pop {r7} + RET + FUNC_END clear_cache +#else +#error "This is only for ARM EABI GNU/Linux" +#endif +#endif /* L_clear_cache */ +/* ------------------------------------------------------------------------ */ +/* Dword shift operations. */ +/* All the following Dword shift variants rely on the fact that + shft xxx, Reg + is in fact done as + shft xxx, (Reg & 255) + so for Reg value in (32...63) and (-1...-31) we will get zero (in the + case of logical shifts) or the sign (for asr). */ + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ +#ifndef __symbian__ + +#ifdef L_lshrdi3 + + FUNC_START lshrdi3 + FUNC_ALIAS aeabi_llsr lshrdi3 + +#ifdef __thumb__ + lsr al, r2 + mov r3, ah + lsr ah, r2 + mov ip, r3 + sub r2, #32 + lsr r3, r2 + orr al, r3 + neg r2, r2 + mov r3, ip + lsl r3, r2 + orr al, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, lsr r2 + RET +#endif + FUNC_END aeabi_llsr + FUNC_END lshrdi3 + +#endif + +#ifdef L_ashrdi3 + + FUNC_START ashrdi3 + FUNC_ALIAS aeabi_lasr ashrdi3 + +#ifdef __thumb__ + lsr al, r2 + mov r3, ah + asr ah, r2 + sub r2, #32 + @ If r2 is negative at this point the following step would OR + @ the sign bit into all of AL. That's not what we want... + bmi 1f + mov ip, r3 + asr r3, r2 + orr al, r3 + mov r3, ip +1: + neg r2, r2 + lsl r3, r2 + orr al, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, asr r2 + RET +#endif + + FUNC_END aeabi_lasr + FUNC_END ashrdi3 + +#endif + +#ifdef L_ashldi3 + + FUNC_START ashldi3 + FUNC_ALIAS aeabi_llsl ashldi3 + +#ifdef __thumb__ + lsl ah, r2 + mov r3, al + lsl al, r2 + mov ip, r3 + sub r2, #32 + lsl r3, r2 + orr ah, r3 + neg r2, r2 + mov r3, ip + lsr r3, r2 + orr ah, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + orrmi ah, ah, al, lsr ip + mov al, al, lsl r2 + RET +#endif + FUNC_END aeabi_llsl + FUNC_END ashldi3 + +#endif + +#endif /* __symbian__ */ + +#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +#define HAVE_ARM_CLZ 1 +#endif + +#ifdef L_clzsi2 +#if defined(__ARM_ARCH_6M__) +FUNC_START clzsi2 + mov r1, #28 + mov r3, #1 + lsl r3, r3, #16 + cmp r0, r3 /* 0x10000 */ + bcc 2f + lsr r0, r0, #16 + sub r1, r1, #16 +2: lsr r3, r3, #8 + cmp r0, r3 /* #0x100 */ + bcc 2f + lsr r0, r0, #8 + sub r1, r1, #8 +2: lsr r3, r3, #4 + cmp r0, r3 /* #0x10 */ + bcc 2f + lsr r0, r0, #4 + sub r1, r1, #4 +2: adr r2, 1f + ldrb r0, [r2, r0] + add r0, r0, r1 + bx lr +.align 2 +1: +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 + FUNC_END clzsi2 +#else +ARM_FUNC_START clzsi2 +# if defined(HAVE_ARM_CLZ) + clz r0, r0 + RET +# else + mov r1, #28 + cmp r0, #0x10000 + do_it cs, t + movcs r0, r0, lsr #16 + subcs r1, r1, #16 + cmp r0, #0x100 + do_it cs, t + movcs r0, r0, lsr #8 + subcs r1, r1, #8 + cmp r0, #0x10 + do_it cs, t + movcs r0, r0, lsr #4 + subcs r1, r1, #4 + adr r2, 1f + ldrb r0, [r2, r0] + add r0, r0, r1 + RET +.align 2 +1: +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 +# endif /* !HAVE_ARM_CLZ */ + FUNC_END clzsi2 +#endif +#endif /* L_clzsi2 */ + +#ifdef L_clzdi2 +#if !defined(HAVE_ARM_CLZ) + +# if defined(__ARM_ARCH_6M__) +FUNC_START clzdi2 + push {r4, lr} +# else +ARM_FUNC_START clzdi2 + do_push {r4, lr} +# endif + cmp xxh, #0 + bne 1f +# ifdef __ARMEB__ + mov r0, xxl + bl __clzsi2 + add r0, r0, #32 + b 2f +1: + bl __clzsi2 +# else + bl __clzsi2 + add r0, r0, #32 + b 2f +1: + mov r0, xxh + bl __clzsi2 +# endif +2: +# if defined(__ARM_ARCH_6M__) + pop {r4, pc} +# else + RETLDM r4 +# endif + FUNC_END clzdi2 + +#else /* HAVE_ARM_CLZ */ + +ARM_FUNC_START clzdi2 + cmp xxh, #0 + do_it eq, et + clzeq r0, xxl + clzne r0, xxh + addeq r0, r0, #32 + RET + FUNC_END clzdi2 + +#endif +#endif /* L_clzdi2 */ + +/* ------------------------------------------------------------------------ */ +/* These next two sections are here despite the fact that they contain Thumb + assembler because their presence allows interworked code to be linked even + when the GCC library is this one. */ + +/* Do not build the interworking functions when the target architecture does + not support Thumb instructions. (This can be a multilib option). */ +#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ + || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ + || __ARM_ARCH__ >= 6 + +#if defined L_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code. + The address of function to be called is loaded into a register and then + one of these labels is called via a BL instruction. This puts the + return address into the link register with the bottom bit set, and the + code here switches to the correct mode before executing the function. */ + + .text + .align 0 + .force_thumb + +.macro call_via register + THUMB_FUNC_START _call_via_\register + + bx \register + nop + + SIZE (_call_via_\register) +.endm + + call_via r0 + call_via r1 + call_via r2 + call_via r3 + call_via r4 + call_via r5 + call_via r6 + call_via r7 + call_via r8 + call_via r9 + call_via sl + call_via fp + call_via ip + call_via sp + call_via lr + +#endif /* L_call_via_rX */ + +/* Don't bother with the old interworking routines for Thumb-2. */ +/* ??? Maybe only omit these on "m" variants. */ +#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) + +#if defined L_interwork_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code, + when the target address is in an unknown instruction set. The address + of function to be called is loaded into a register and then one of these + labels is called via a BL instruction. This puts the return address + into the link register with the bottom bit set, and the code here + switches to the correct mode before executing the function. Unfortunately + the target code cannot be relied upon to return via a BX instruction, so + instead we have to store the resturn address on the stack and allow the + called function to return here instead. Upon return we recover the real + return address and use a BX to get back to Thumb mode. + + There are three variations of this code. The first, + _interwork_call_via_rN(), will push the return address onto the + stack and pop it in _arm_return(). It should only be used if all + arguments are passed in registers. + + The second, _interwork_r7_call_via_rN(), instead stores the return + address at [r7, #-4]. It is the caller's responsibility to ensure + that this address is valid and contains no useful data. + + The third, _interwork_r11_call_via_rN(), works in the same way but + uses r11 instead of r7. It is useful if the caller does not really + need a frame pointer. */ + + .text + .align 0 + + .code 32 + .globl _arm_return +LSYM(Lstart_arm_return): + cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) + cfi_push 0, 0xe, -0x8, 0x8 + nop @ This nop is for the benefit of debuggers, so that + @ backtraces will use the correct unwind information. +_arm_return: + RETLDM unwind=LSYM(Lstart_arm_return) + cfi_end LSYM(Lend_arm_return) + + .globl _arm_return_r7 +_arm_return_r7: + ldr lr, [r7, #-4] + bx lr + + .globl _arm_return_r11 +_arm_return_r11: + ldr lr, [r11, #-4] + bx lr + +.macro interwork_with_frame frame, register, name, return + .code 16 + + THUMB_FUNC_START \name + + bx pc + nop + + .code 32 + tst \register, #1 + streq lr, [\frame, #-4] + adreq lr, _arm_return_\frame + bx \register + + SIZE (\name) +.endm + +.macro interwork register + .code 16 + + THUMB_FUNC_START _interwork_call_via_\register + + bx pc + nop + + .code 32 + .globl LSYM(Lchange_\register) +LSYM(Lchange_\register): + tst \register, #1 + streq lr, [sp, #-8]! + adreq lr, _arm_return + bx \register + + SIZE (_interwork_call_via_\register) + + interwork_with_frame r7,\register,_interwork_r7_call_via_\register + interwork_with_frame r11,\register,_interwork_r11_call_via_\register +.endm + + interwork r0 + interwork r1 + interwork r2 + interwork r3 + interwork r4 + interwork r5 + interwork r6 + interwork r7 + interwork r8 + interwork r9 + interwork sl + interwork fp + interwork ip + interwork sp + + /* The LR case has to be handled a little differently... */ + .code 16 + + THUMB_FUNC_START _interwork_call_via_lr + + bx pc + nop + + .code 32 + .globl .Lchange_lr +.Lchange_lr: + tst lr, #1 + stmeqdb r13!, {lr, pc} + mov ip, lr + adreq lr, _arm_return + bx ip + + SIZE (_interwork_call_via_lr) + +#endif /* L_interwork_call_via_rX */ +#endif /* !__thumb2__ */ + +/* Functions to support compact pic switch tables in thumb1 state. + All these routines take an index into the table in r0. The + table is at LR & ~1 (but this must be rounded up in the case + of 32-bit entires). They are only permitted to clobber r12 + and r14 and r0 must be preserved on exit. */ +#ifdef L_thumb1_case_sqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_sqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_sqi) +#endif + +#ifdef L_thumb1_case_uqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_uqi) +#endif + +#ifdef L_thumb1_case_shi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_shi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_shi) +#endif + +#ifdef L_thumb1_case_uhi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uhi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_uhi) +#endif + +#ifdef L_thumb1_case_si + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_si + push {r0, r1} + mov r1, lr + adds.n r1, r1, #2 /* Align to word. */ + lsrs r1, r1, #2 + lsls r0, r0, #2 + lsls r1, r1, #2 + ldr r0, [r1, r0] + adds r0, r0, r1 + mov lr, r0 + pop {r0, r1} + mov pc, lr /* We know we were called from thumb code. */ + SIZE (__gnu_thumb1_case_si) +#endif + +#endif /* Arch supports thumb. */ + +#ifndef __symbian__ +#ifndef __ARM_ARCH_6M__ +#include "ieee754-df.S" +#include "ieee754-sf.S" +#include "bpabi.S" +#else /* __ARM_ARCH_6M__ */ +#include "bpabi-v6m.S" +#endif /* __ARM_ARCH_6M__ */ +#endif /* !__symbian__ */ diff --git a/gcc/config/arm/libgcc-bpabi.ver b/gcc/config/arm/libgcc-bpabi.ver new file mode 100644 index 000000000..3ba8364dc --- /dev/null +++ b/gcc/config/arm/libgcc-bpabi.ver @@ -0,0 +1,108 @@ +# Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +GCC_3.5 { + # BPABI symbols + __aeabi_cdcmpeq + __aeabi_cdcmple + __aeabi_cdrcmple + __aeabi_cfcmpeq + __aeabi_cfcmple + __aeabi_cfrcmple + __aeabi_d2f + __aeabi_d2iz + __aeabi_d2lz + __aeabi_d2uiz + __aeabi_d2ulz + __aeabi_dadd + __aeabi_dcmpeq + __aeabi_dcmpge + __aeabi_dcmpgt + __aeabi_dcmple + __aeabi_dcmplt + __aeabi_dcmpun + __aeabi_ddiv + __aeabi_dmul + __aeabi_dneg + __aeabi_drsub + __aeabi_dsub + __aeabi_f2d + __aeabi_f2iz + __aeabi_f2lz + __aeabi_f2uiz + __aeabi_f2ulz + __aeabi_fadd + __aeabi_fcmpeq + __aeabi_fcmpge + __aeabi_fcmpgt + __aeabi_fcmple + __aeabi_fcmplt + __aeabi_fcmpun + __aeabi_fdiv + __aeabi_fmul + __aeabi_fneg + __aeabi_frsub + __aeabi_fsub + __aeabi_i2d + __aeabi_i2f + __aeabi_idiv + __aeabi_idiv0 + __aeabi_idivmod + __aeabi_l2d + __aeabi_l2f + __aeabi_lasr + __aeabi_lcmp + __aeabi_ldiv0 + __aeabi_ldivmod + __aeabi_llsl + __aeabi_llsr + __aeabi_lmul + __aeabi_ui2d + __aeabi_ui2f + __aeabi_uidiv + __aeabi_uidivmod + __aeabi_uldivmod + __aeabi_ulcmp + __aeabi_ul2d + __aeabi_ul2f + __aeabi_uread4 + __aeabi_uread8 + __aeabi_uwrite4 + __aeabi_uwrite8 + + # Exception-Handling + # \S 7.5 + _Unwind_Complete + _Unwind_VRS_Get + _Unwind_VRS_Set + _Unwind_VRS_Pop + # \S 9.2 + __aeabi_unwind_cpp_pr0 + __aeabi_unwind_cpp_pr1 + __aeabi_unwind_cpp_pr2 + # The libstdc++ exception-handling personality routine uses this + # GNU-specific entry point. + __gnu_unwind_frame +} + +%exclude { + _Unwind_Backtrace +} +GCC_4.3.0 { + _Unwind_Backtrace +} diff --git a/gcc/config/arm/libunwind.S b/gcc/config/arm/libunwind.S new file mode 100644 index 000000000..48eb592fd --- /dev/null +++ b/gcc/config/arm/libunwind.S @@ -0,0 +1,363 @@ +/* Support functions for the unwinder. + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Paul Brook + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifndef __symbian__ + +#include "lib1funcs.asm" + +.macro UNPREFIX name + .global SYM (\name) + EQUIV SYM (\name), SYM (__\name) +.endm + +#if (__ARM_ARCH__ == 4) +/* Some coprocessors require armv5. We know this code will never be run on + other cpus. Tell gas to allow armv5, but only mark the objects as armv4. + */ +.arch armv5t +#ifdef __ARM_ARCH_4T__ +.object_arch armv4t +#else +.object_arch armv4 +#endif +#endif + +#ifdef __ARM_ARCH_6M__ + +/* r0 points to a 16-word block. Upload these values to the actual core + state. */ +FUNC_START restore_core_regs + mov r1, r0 + add r1, r1, #52 + ldmia r1!, {r3, r4, r5} + sub r3, r3, #4 + mov ip, r3 + str r5, [r3] + mov lr, r4 + /* Restore r8-r11. */ + mov r1, r0 + add r1, r1, #32 + ldmia r1!, {r2, r3, r4, r5} + mov r8, r2 + mov r9, r3 + mov sl, r4 + mov fp, r5 + mov r1, r0 + add r1, r1, #8 + ldmia r1!, {r2, r3, r4, r5, r6, r7} + ldr r1, [r0, #4] + ldr r0, [r0] + mov sp, ip + pop {pc} + FUNC_END restore_core_regs + UNPREFIX restore_core_regs + +/* ARMV6M does not have coprocessors, so these should never be used. */ +FUNC_START gnu_Unwind_Restore_VFP + RET + +/* Store VFR regsters d0-d15 to the address in r0. */ +FUNC_START gnu_Unwind_Save_VFP + RET + +/* Load VFP registers d0-d15 from the address in r0. + Use this to load from FSTMD format. */ +FUNC_START gnu_Unwind_Restore_VFP_D + RET + +/* Store VFP registers d0-d15 to the address in r0. + Use this to store in FLDMD format. */ +FUNC_START gnu_Unwind_Save_VFP_D + RET + +/* Load VFP registers d16-d31 from the address in r0. + Use this to load from FSTMD (=VSTM) format. Needs VFPv3. */ +FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31 + RET + +/* Store VFP registers d16-d31 to the address in r0. + Use this to store in FLDMD (=VLDM) format. Needs VFPv3. */ +FUNC_START gnu_Unwind_Save_VFP_D_16_to_31 + RET + +FUNC_START gnu_Unwind_Restore_WMMXD + RET + +FUNC_START gnu_Unwind_Save_WMMXD + RET + +FUNC_START gnu_Unwind_Restore_WMMXC + RET + +FUNC_START gnu_Unwind_Save_WMMXC + RET + +.macro UNWIND_WRAPPER name nargs + FUNC_START \name + /* Create a phase2_vrs structure. */ + /* Save r0 in the PC slot so we can use it as a scratch register. */ + push {r0} + add r0, sp, #4 + push {r0, lr} /* Push original SP and LR. */ + /* Make space for r8-r12. */ + sub sp, sp, #20 + /* Save low registers. */ + push {r0, r1, r2, r3, r4, r5, r6, r7} + /* Save high registers. */ + add r0, sp, #32 + mov r1, r8 + mov r2, r9 + mov r3, sl + mov r4, fp + mov r5, ip + stmia r0!, {r1, r2, r3, r4, r5} + /* Restore original low register values. */ + add r0, sp, #4 + ldmia r0!, {r1, r2, r3, r4, r5} + /* Restore orginial r0. */ + ldr r0, [sp, #60] + str r0, [sp] + /* Demand-save flags, plus an extra word for alignment. */ + mov r3, #0 + push {r2, r3} + /* Point r1 at the block. Pass r[0..nargs) unchanged. */ + add r\nargs, sp, #4 + + bl SYM (__gnu\name) + + ldr r3, [sp, #64] + add sp, sp, #72 + bx r3 + + FUNC_END \name + UNPREFIX \name +.endm + +#else /* !__ARM_ARCH_6M__ */ + +/* r0 points to a 16-word block. Upload these values to the actual core + state. */ +ARM_FUNC_START restore_core_regs + /* We must use sp as the base register when restoring sp. Push the + last 3 registers onto the top of the current stack to achieve + this. */ + add r1, r0, #52 + ldmia r1, {r3, r4, r5} /* {sp, lr, pc}. */ +#if defined(__thumb2__) + /* Thumb-2 doesn't allow sp in a load-multiple instruction, so push + the target address onto the target stack. This is safe as + we're always returning to somewhere further up the call stack. */ + mov ip, r3 + mov lr, r4 + str r5, [ip, #-4]! +#elif defined(__INTERWORKING__) + /* Restore pc into ip. */ + mov r2, r5 + stmfd sp!, {r2, r3, r4} +#else + stmfd sp!, {r3, r4, r5} +#endif + /* Don't bother restoring ip. */ + ldmia r0, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp} +#if defined(__thumb2__) + /* Pop the return address off the target stack. */ + mov sp, ip + pop {pc} +#elif defined(__INTERWORKING__) + /* Pop the three registers we pushed earlier. */ + ldmfd sp, {ip, sp, lr} + bx ip +#else + ldmfd sp, {sp, lr, pc} +#endif + FUNC_END restore_core_regs + UNPREFIX restore_core_regs + +/* Load VFP registers d0-d15 from the address in r0. + Use this to load from FSTMX format. */ +ARM_FUNC_START gnu_Unwind_Restore_VFP + /* Use the generic coprocessor form so that gas doesn't complain + on soft-float targets. */ + ldc p11,cr0,[r0],{0x21} /* fldmiax r0, {d0-d15} */ + RET + +/* Store VFP registers d0-d15 to the address in r0. + Use this to store in FSTMX format. */ +ARM_FUNC_START gnu_Unwind_Save_VFP + /* Use the generic coprocessor form so that gas doesn't complain + on soft-float targets. */ + stc p11,cr0,[r0],{0x21} /* fstmiax r0, {d0-d15} */ + RET + +/* Load VFP registers d0-d15 from the address in r0. + Use this to load from FSTMD format. */ +ARM_FUNC_START gnu_Unwind_Restore_VFP_D + ldc p11,cr0,[r0],{0x20} /* fldmiad r0, {d0-d15} */ + RET + +/* Store VFP registers d0-d15 to the address in r0. + Use this to store in FLDMD format. */ +ARM_FUNC_START gnu_Unwind_Save_VFP_D + stc p11,cr0,[r0],{0x20} /* fstmiad r0, {d0-d15} */ + RET + +/* Load VFP registers d16-d31 from the address in r0. + Use this to load from FSTMD (=VSTM) format. Needs VFPv3. */ +ARM_FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31 + ldcl p11,cr0,[r0],{0x20} /* vldm r0, {d16-d31} */ + RET + +/* Store VFP registers d16-d31 to the address in r0. + Use this to store in FLDMD (=VLDM) format. Needs VFPv3. */ +ARM_FUNC_START gnu_Unwind_Save_VFP_D_16_to_31 + stcl p11,cr0,[r0],{0x20} /* vstm r0, {d16-d31} */ + RET + +ARM_FUNC_START gnu_Unwind_Restore_WMMXD + /* Use the generic coprocessor form so that gas doesn't complain + on non-iWMMXt targets. */ + ldcl p1, cr0, [r0], #8 /* wldrd wr0, [r0], #8 */ + ldcl p1, cr1, [r0], #8 /* wldrd wr1, [r0], #8 */ + ldcl p1, cr2, [r0], #8 /* wldrd wr2, [r0], #8 */ + ldcl p1, cr3, [r0], #8 /* wldrd wr3, [r0], #8 */ + ldcl p1, cr4, [r0], #8 /* wldrd wr4, [r0], #8 */ + ldcl p1, cr5, [r0], #8 /* wldrd wr5, [r0], #8 */ + ldcl p1, cr6, [r0], #8 /* wldrd wr6, [r0], #8 */ + ldcl p1, cr7, [r0], #8 /* wldrd wr7, [r0], #8 */ + ldcl p1, cr8, [r0], #8 /* wldrd wr8, [r0], #8 */ + ldcl p1, cr9, [r0], #8 /* wldrd wr9, [r0], #8 */ + ldcl p1, cr10, [r0], #8 /* wldrd wr10, [r0], #8 */ + ldcl p1, cr11, [r0], #8 /* wldrd wr11, [r0], #8 */ + ldcl p1, cr12, [r0], #8 /* wldrd wr12, [r0], #8 */ + ldcl p1, cr13, [r0], #8 /* wldrd wr13, [r0], #8 */ + ldcl p1, cr14, [r0], #8 /* wldrd wr14, [r0], #8 */ + ldcl p1, cr15, [r0], #8 /* wldrd wr15, [r0], #8 */ + RET + +ARM_FUNC_START gnu_Unwind_Save_WMMXD + /* Use the generic coprocessor form so that gas doesn't complain + on non-iWMMXt targets. */ + stcl p1, cr0, [r0], #8 /* wstrd wr0, [r0], #8 */ + stcl p1, cr1, [r0], #8 /* wstrd wr1, [r0], #8 */ + stcl p1, cr2, [r0], #8 /* wstrd wr2, [r0], #8 */ + stcl p1, cr3, [r0], #8 /* wstrd wr3, [r0], #8 */ + stcl p1, cr4, [r0], #8 /* wstrd wr4, [r0], #8 */ + stcl p1, cr5, [r0], #8 /* wstrd wr5, [r0], #8 */ + stcl p1, cr6, [r0], #8 /* wstrd wr6, [r0], #8 */ + stcl p1, cr7, [r0], #8 /* wstrd wr7, [r0], #8 */ + stcl p1, cr8, [r0], #8 /* wstrd wr8, [r0], #8 */ + stcl p1, cr9, [r0], #8 /* wstrd wr9, [r0], #8 */ + stcl p1, cr10, [r0], #8 /* wstrd wr10, [r0], #8 */ + stcl p1, cr11, [r0], #8 /* wstrd wr11, [r0], #8 */ + stcl p1, cr12, [r0], #8 /* wstrd wr12, [r0], #8 */ + stcl p1, cr13, [r0], #8 /* wstrd wr13, [r0], #8 */ + stcl p1, cr14, [r0], #8 /* wstrd wr14, [r0], #8 */ + stcl p1, cr15, [r0], #8 /* wstrd wr15, [r0], #8 */ + RET + +ARM_FUNC_START gnu_Unwind_Restore_WMMXC + /* Use the generic coprocessor form so that gas doesn't complain + on non-iWMMXt targets. */ + ldc2 p1, cr8, [r0], #4 /* wldrw wcgr0, [r0], #4 */ + ldc2 p1, cr9, [r0], #4 /* wldrw wcgr1, [r0], #4 */ + ldc2 p1, cr10, [r0], #4 /* wldrw wcgr2, [r0], #4 */ + ldc2 p1, cr11, [r0], #4 /* wldrw wcgr3, [r0], #4 */ + RET + +ARM_FUNC_START gnu_Unwind_Save_WMMXC + /* Use the generic coprocessor form so that gas doesn't complain + on non-iWMMXt targets. */ + stc2 p1, cr8, [r0], #4 /* wstrw wcgr0, [r0], #4 */ + stc2 p1, cr9, [r0], #4 /* wstrw wcgr1, [r0], #4 */ + stc2 p1, cr10, [r0], #4 /* wstrw wcgr2, [r0], #4 */ + stc2 p1, cr11, [r0], #4 /* wstrw wcgr3, [r0], #4 */ + RET + +/* Wrappers to save core registers, then call the real routine. */ + +.macro UNWIND_WRAPPER name nargs + ARM_FUNC_START \name + /* Create a phase2_vrs structure. */ + /* Split reg push in two to ensure the correct value for sp. */ +#if defined(__thumb2__) + mov ip, sp + push {lr} /* PC is ignored. */ + push {ip, lr} /* Push original SP and LR. */ +#else + stmfd sp!, {sp, lr, pc} +#endif + stmfd sp!, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip} + + /* Demand-save flags, plus an extra word for alignment. */ + mov r3, #0 + stmfd sp!, {r2, r3} + + /* Point r1 at the block. Pass r[0..nargs) unchanged. */ + add r\nargs, sp, #4 +#if defined(__thumb__) && !defined(__thumb2__) + /* Switch back to thumb mode to avoid interworking hassle. */ + adr ip, .L1_\name + orr ip, ip, #1 + bx ip + .thumb +.L1_\name: + bl SYM (__gnu\name) __PLT__ + ldr r3, [sp, #64] + add sp, #72 + bx r3 +#else + bl SYM (__gnu\name) __PLT__ + ldr lr, [sp, #64] + add sp, sp, #72 + RET +#endif + FUNC_END \name + UNPREFIX \name +.endm + +#endif /* !__ARM_ARCH_6M__ */ + +UNWIND_WRAPPER _Unwind_RaiseException 1 +UNWIND_WRAPPER _Unwind_Resume 1 +UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1 +UNWIND_WRAPPER _Unwind_ForcedUnwind 3 +UNWIND_WRAPPER _Unwind_Backtrace 2 + +#endif /* ndef __symbian__ */ diff --git a/gcc/config/arm/linux-atomic.c b/gcc/config/arm/linux-atomic.c new file mode 100644 index 000000000..57065a6e8 --- /dev/null +++ b/gcc/config/arm/linux-atomic.c @@ -0,0 +1,278 @@ +/* Linux-specific atomic operations for ARM EABI. + Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Kernel helper for compare-and-exchange. */ +typedef int (__kernel_cmpxchg_t) (int oldval, int newval, int *ptr); +#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *) 0xffff0fc0) + +/* Kernel helper for memory barrier. */ +typedef void (__kernel_dmb_t) (void); +#define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0) + +/* Note: we implement byte, short and int versions of atomic operations using + the above kernel helpers, but there is no support for "long long" (64-bit) + operations as yet. */ + +#define HIDDEN __attribute__ ((visibility ("hidden"))) + +#ifdef __ARMEL__ +#define INVERT_MASK_1 0 +#define INVERT_MASK_2 0 +#else +#define INVERT_MASK_1 24 +#define INVERT_MASK_2 16 +#endif + +#define MASK_1 0xffu +#define MASK_2 0xffffu + +#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_fetch_and_##OP##_4 (int *ptr, int val) \ + { \ + int failure, tmp; \ + \ + do { \ + tmp = *ptr; \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return tmp; \ + } + +FETCH_AND_OP_WORD (add, , +) +FETCH_AND_OP_WORD (sub, , -) +FETCH_AND_OP_WORD (or, , |) +FETCH_AND_OP_WORD (and, , &) +FETCH_AND_OP_WORD (xor, , ^) +FETCH_AND_OP_WORD (nand, ~, &) + +#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH +#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH + +/* Implement both __sync__and_fetch and __sync_fetch_and_ for + subword-sized quantities. */ + +#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \ + TYPE HIDDEN \ + NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \ + { \ + int *wordptr = (int *) ((unsigned int) ptr & ~3); \ + unsigned int mask, shift, oldval, newval; \ + int failure; \ + \ + shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = *wordptr; \ + newval = ((PFX_OP (((oldval & mask) >> shift) \ + INF_OP (unsigned int) val)) << shift) & mask; \ + newval |= oldval & ~mask; \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (RETURN & mask) >> shift; \ + } + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval) + +#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_##OP##_and_fetch_4 (int *ptr, int val) \ + { \ + int tmp, failure; \ + \ + do { \ + tmp = *ptr; \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return PFX_OP (tmp INF_OP val); \ + } + +OP_AND_FETCH_WORD (add, , +) +OP_AND_FETCH_WORD (sub, , -) +OP_AND_FETCH_WORD (or, , |) +OP_AND_FETCH_WORD (and, , &) +OP_AND_FETCH_WORD (xor, , ^) +OP_AND_FETCH_WORD (nand, ~, &) + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval) + +int HIDDEN +__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int actual_oldval, fail; + + while (1) + { + actual_oldval = *ptr; + + if (__builtin_expect (oldval != actual_oldval, 0)) + return actual_oldval; + + fail = __kernel_cmpxchg (actual_oldval, newval, ptr); + + if (__builtin_expect (!fail, 1)) + return oldval; + } +} + +#define SUBWORD_VAL_CAS(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + int *wordptr = (int *)((unsigned int) ptr & ~3), fail; \ + unsigned int mask, shift, actual_oldval, actual_newval; \ + \ + shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + while (1) \ + { \ + actual_oldval = *wordptr; \ + \ + if (__builtin_expect (((actual_oldval & mask) >> shift) != \ + (unsigned int) oldval, 0)) \ + return (actual_oldval & mask) >> shift; \ + \ + actual_newval = (actual_oldval & ~mask) \ + | (((unsigned int) newval << shift) & mask); \ + \ + fail = __kernel_cmpxchg (actual_oldval, actual_newval, \ + wordptr); \ + \ + if (__builtin_expect (!fail, 1)) \ + return oldval; \ + } \ + } + +SUBWORD_VAL_CAS (unsigned short, 2) +SUBWORD_VAL_CAS (unsigned char, 1) + +typedef unsigned char bool; + +bool HIDDEN +__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int failure = __kernel_cmpxchg (oldval, newval, ptr); + return (failure == 0); +} + +#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \ + bool HIDDEN \ + __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + TYPE actual_oldval \ + = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \ + return (oldval == actual_oldval); \ + } + +SUBWORD_BOOL_CAS (unsigned short, 2) +SUBWORD_BOOL_CAS (unsigned char, 1) + +void HIDDEN +__sync_synchronize (void) +{ + __kernel_dmb (); +} + +int HIDDEN +__sync_lock_test_and_set_4 (int *ptr, int val) +{ + int failure, oldval; + + do { + oldval = *ptr; + failure = __kernel_cmpxchg (oldval, val, ptr); + } while (failure != 0); + + return oldval; +} + +#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \ + { \ + int failure; \ + unsigned int oldval, newval, shift, mask; \ + int *wordptr = (int *) ((unsigned int) ptr & ~3); \ + \ + shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = *wordptr; \ + newval = (oldval & ~mask) \ + | (((unsigned int) val << shift) & mask); \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (oldval & mask) >> shift; \ + } + +SUBWORD_TEST_AND_SET (unsigned short, 2) +SUBWORD_TEST_AND_SET (unsigned char, 1) + +#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ + void HIDDEN \ + __sync_lock_release_##WIDTH (TYPE *ptr) \ + { \ + /* All writes before this point must be seen before we release \ + the lock itself. */ \ + __kernel_dmb (); \ + *ptr = 0; \ + } + +SYNC_LOCK_RELEASE (int, 4) +SYNC_LOCK_RELEASE (short, 2) +SYNC_LOCK_RELEASE (char, 1) diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h new file mode 100644 index 000000000..833005284 --- /dev/null +++ b/gcc/config/arm/linux-eabi.h @@ -0,0 +1,103 @@ +/* Configuration file for ARM GNU/Linux EABI targets. + Copyright (C) 2004, 2005, 2006, 2007, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* On EABI GNU/Linux, we want both the BPABI builtins and the + GNU/Linux builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + TARGET_BPABI_CPP_BUILTINS(); \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + ANDROID_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (false) + +/* We default to a soft-float ABI so that binaries can run on all + target hardware. */ +#undef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT + +/* We default to the "aapcs-linux" ABI so that enums are int-sized by + default. */ +#undef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX + +/* Default to armv5t so that thumb shared libraries work. + The ARM10TDMI core is the default for armv5t, so set + SUBTARGET_CPU_DEFAULT to achieve this. */ +#undef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi + +/* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +#undef TARGET_LINKER_EMULATION +#if TARGET_BIG_ENDIAN_DEFAULT +#define TARGET_LINKER_EMULATION "armelfb_linux_eabi" +#else +#define TARGET_LINKER_EMULATION "armelf_linux_eabi" +#endif + +#undef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION + +/* Use ld-linux.so.3 so that it will be possible to run "classic" + GNU/Linux binaries on an EABI system. */ +#undef GLIBC_DYNAMIC_LINKER +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.3" + +/* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ +#undef LINK_SPEC +#define LINK_SPEC BE8_LINK_SPEC \ + LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ + LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) + +#undef CC1_SPEC +#define CC1_SPEC \ + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC) + +#define CC1PLUS_SPEC \ + LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ + GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + +/* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we + do not use -lfloat. */ +#undef LIBGCC_SPEC + +/* Clear the instruction cache from `beg' to `end'. This is + implemented in lib1funcs.asm, so ensure an error if this definition + is used. */ +#undef CLEAR_INSN_CACHE +#define CLEAR_INSN_CACHE(BEG, END) not_used diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h new file mode 100644 index 000000000..81d27bb72 --- /dev/null +++ b/gcc/config/arm/linux-elf.h @@ -0,0 +1,120 @@ +/* Definitions for ARM running Linux-based GNU systems using ELF + Copyright (C) 1993, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Philip Blundell + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* elfos.h should have already been included. Now just override + any conflicting definitions and add any extras. */ + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM GNU/Linux with ELF)", stderr); + +#undef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD + +/* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +#if TARGET_BIG_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT MASK_BIG_END +#define TARGET_ENDIAN_OPTION "mbig-endian" +#define TARGET_LINKER_EMULATION "armelfb_linux" +#else +#define TARGET_ENDIAN_DEFAULT 0 +#define TARGET_ENDIAN_OPTION "mlittle-endian" +#define TARGET_LINKER_EMULATION "armelf_linux" +#endif + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (TARGET_ENDIAN_DEFAULT) + +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6 + +#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p" + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "mhard-float", "mno-thumb-interwork" } + +/* Now we define the strings used to build the spec file. */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#define LIBGCC_SPEC "%{msoft-float:-lfloat} %{mfloat-abi=soft*:-lfloat} -lgcc" + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" + +#define LINUX_TARGET_LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " LINUX_DYNAMIC_LINKER " \ + -X \ + %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + SUBTARGET_EXTRA_LINK_SPEC + +#undef LINK_SPEC +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +/* This is how we tell the assembler that two symbols have the same value. */ +#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \ + do \ + { \ + assemble_name (FILE, NAME1); \ + fputs (" = ", FILE); \ + assemble_name (FILE, NAME2); \ + fputc ('\n', FILE); \ + } \ + while (0) + +/* NWFPE always understands FPA instructions. */ +#undef FPUTYPE_DEFAULT +#define FPUTYPE_DEFAULT "fpe3" + +/* Call the function profiler with a given profile label. */ +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + fprintf (STREAM, "\tbl\tmcount%s\n", \ + (TARGET_ARM && NEED_PLT_RELOC) ? "(PLT)" : ""); \ +} + +/* The GNU/Linux profiler clobbers the link register. Make sure the + prologue knows to save it. */ +#define PROFILE_HOOK(X) \ + emit_clobber (gen_rtx_REG (SImode, LR_REGNUM)) + +/* The GNU/Linux profiler needs a frame pointer. */ +#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile + +/* Add .note.GNU-stack. */ +#undef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 1 diff --git a/gcc/config/arm/linux-gas.h b/gcc/config/arm/linux-gas.h new file mode 100644 index 000000000..9b6fcde2b --- /dev/null +++ b/gcc/config/arm/linux-gas.h @@ -0,0 +1,56 @@ +/* Definitions of target machine for GNU compiler. + ARM Linux-based GNU systems version. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2004, 2007 + Free Software Foundation, Inc. + Contributed by Russell King . + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This is how we tell the assembler that a symbol is weak. + GAS always supports weak symbols. */ + +/* Unsigned chars produces much better code than signed. */ +#define DEFAULT_SIGNED_CHAR 0 + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* Use the AAPCS type for wchar_t, or the previous Linux default for + non-AAPCS. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long int") + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +/* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ +#define CLEAR_INSN_CACHE(BEG, END) \ +{ \ + register unsigned long _beg __asm ("a1") = (unsigned long) (BEG); \ + register unsigned long _end __asm ("a2") = (unsigned long) (END); \ + register unsigned long _flg __asm ("a3") = 0; \ + __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" \ + : "=r" (_beg) \ + : "0" (_beg), "r" (_end), "r" (_flg)); \ +} diff --git a/gcc/config/arm/mmintrin.h b/gcc/config/arm/mmintrin.h new file mode 100644 index 000000000..2cc500de3 --- /dev/null +++ b/gcc/config/arm/mmintrin.h @@ -0,0 +1,1254 @@ +/* Copyright (C) 2002, 2003, 2004, 2009 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _MMINTRIN_H_INCLUDED +#define _MMINTRIN_H_INCLUDED + +/* The data type intended for user use. */ +typedef unsigned long long __m64, __int64; + +/* Internal data types for implementing the intrinsics. */ +typedef int __v2si __attribute__ ((vector_size (8))); +typedef short __v4hi __attribute__ ((vector_size (8))); +typedef char __v8qi __attribute__ ((vector_size (8))); + +/* "Convert" __m64 and __int64 into each other. */ +static __inline __m64 +_mm_cvtsi64_m64 (__int64 __i) +{ + return __i; +} + +static __inline __int64 +_mm_cvtm64_si64 (__m64 __i) +{ + return __i; +} + +static __inline int +_mm_cvtsi64_si32 (__int64 __i) +{ + return __i; +} + +static __inline __int64 +_mm_cvtsi32_si64 (int __i) +{ + return __i; +} + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with signed saturation. */ +static __inline __m64 +_mm_packs_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackhss ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of + the result, and the two 32-bit values from M2 into the upper two 16-bit + values of the result, all with signed saturation. */ +static __inline __m64 +_mm_packs_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackwss ((__v2si)__m1, (__v2si)__m2); +} + +/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and + the 64-bit value from M2 into the upper 32-bits of the result, all with + signed saturation for values that do not fit exactly into 32-bits. */ +static __inline __m64 +_mm_packs_pi64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackdss ((long long)__m1, (long long)__m2); +} + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with unsigned saturation. */ +static __inline __m64 +_mm_packs_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackhus ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Pack the two 32-bit values from M1 into the lower two 16-bit values of + the result, and the two 32-bit values from M2 into the upper two 16-bit + values of the result, all with unsigned saturation. */ +static __inline __m64 +_mm_packs_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackwus ((__v2si)__m1, (__v2si)__m2); +} + +/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and + the 64-bit value from M2 into the upper 32-bits of the result, all with + unsigned saturation for values that do not fit exactly into 32-bits. */ +static __inline __m64 +_mm_packs_pu64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackdus ((long long)__m1, (long long)__m2); +} + +/* Interleave the four 8-bit values from the high half of M1 with the four + 8-bit values from the high half of M2. */ +static __inline __m64 +_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckihb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Interleave the two 16-bit values from the high half of M1 with the two + 16-bit values from the high half of M2. */ +static __inline __m64 +_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckihh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Interleave the 32-bit value from the high half of M1 with the 32-bit + value from the high half of M2. */ +static __inline __m64 +_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckihw ((__v2si)__m1, (__v2si)__m2); +} + +/* Interleave the four 8-bit values from the low half of M1 with the four + 8-bit values from the low half of M2. */ +static __inline __m64 +_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckilb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Interleave the two 16-bit values from the low half of M1 with the two + 16-bit values from the low half of M2. */ +static __inline __m64 +_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckilh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Interleave the 32-bit value from the low half of M1 with the 32-bit + value from the low half of M2. */ +static __inline __m64 +_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckilw ((__v2si)__m1, (__v2si)__m2); +} + +/* Take the four 8-bit values from the low half of M1, sign extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackel_pi8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelsb ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the low half of M1, sign extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackel_pi16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelsh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the low half of M1, and return it sign extended + to 64 bits. */ +static __inline __m64 +_mm_unpackel_pi32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelsw ((__v2si)__m1); +} + +/* Take the four 8-bit values from the high half of M1, sign extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pi8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehsb ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the high half of M1, sign extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pi16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehsh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the high half of M1, and return it sign extended + to 64 bits. */ +static __inline __m64 +_mm_unpackeh_pi32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehsw ((__v2si)__m1); +} + +/* Take the four 8-bit values from the low half of M1, zero extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackel_pu8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelub ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the low half of M1, zero extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackel_pu16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckeluh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the low half of M1, and return it zero extended + to 64 bits. */ +static __inline __m64 +_mm_unpackel_pu32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckeluw ((__v2si)__m1); +} + +/* Take the four 8-bit values from the high half of M1, zero extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pu8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehub ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the high half of M1, zero extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pu16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehuh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the high half of M1, and return it zero extended + to 64 bits. */ +static __inline __m64 +_mm_unpackeh_pu32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehuw ((__v2si)__m1); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2. */ +static __inline __m64 +_mm_add_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2. */ +static __inline __m64 +_mm_add_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2. */ +static __inline __m64 +_mm_add_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddw ((__v2si)__m1, (__v2si)__m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddbss ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddhss ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2 using signed + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddwss ((__v2si)__m1, (__v2si)__m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddbus ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddhus ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2 using unsigned + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddwus ((__v2si)__m1, (__v2si)__m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ +static __inline __m64 +_mm_sub_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ +static __inline __m64 +_mm_sub_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ +static __inline __m64 +_mm_sub_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubw ((__v2si)__m1, (__v2si)__m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed + saturating arithmetic. */ +static __inline __m64 +_mm_subs_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubbss ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + signed saturating arithmetic. */ +static __inline __m64 +_mm_subs_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubhss ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using + signed saturating arithmetic. */ +static __inline __m64 +_mm_subs_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubwss ((__v2si)__m1, (__v2si)__m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using + unsigned saturating arithmetic. */ +static __inline __m64 +_mm_subs_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubbus ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + unsigned saturating arithmetic. */ +static __inline __m64 +_mm_subs_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubhus ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using + unsigned saturating arithmetic. */ +static __inline __m64 +_mm_subs_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubwus ((__v2si)__m1, (__v2si)__m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing + four 32-bit intermediate results, which are then summed by pairs to + produce two 32-bit results. */ +static __inline __m64 +_mm_madd_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmadds ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing + four 32-bit intermediate results, which are then summed by pairs to + produce two 32-bit results. */ +static __inline __m64 +_mm_madd_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmaddu ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in + M2 and produce the high 16 bits of the 32-bit results. */ +static __inline __m64 +_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmulsm ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in + M2 and produce the high 16 bits of the 32-bit results. */ +static __inline __m64 +_mm_mulhi_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmulum ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce + the low 16 bits of the results. */ +static __inline __m64 +_mm_mullo_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmulul ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Shift four 16-bit values in M left by COUNT. */ +static __inline __m64 +_mm_sll_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsllh ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_slli_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsllhi ((__v4hi)__m, __count); +} + +/* Shift two 32-bit values in M left by COUNT. */ +static __inline __m64 +_mm_sll_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsllw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_slli_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsllwi ((__v2si)__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT. */ +static __inline __m64 +_mm_sll_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wslld (__m, __count); +} + +static __inline __m64 +_mm_slli_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wslldi (__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ +static __inline __m64 +_mm_sra_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrah ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_srai_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrahi ((__v4hi)__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ +static __inline __m64 +_mm_sra_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsraw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_srai_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrawi ((__v2si)__m, __count); +} + +/* Shift the 64-bit value in M right by COUNT; shift in the sign bit. */ +static __inline __m64 +_mm_sra_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrad (__m, __count); +} + +static __inline __m64 +_mm_srai_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsradi (__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ +static __inline __m64 +_mm_srl_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrlh ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_srli_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrlhi ((__v4hi)__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ +static __inline __m64 +_mm_srl_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrlw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_srli_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrlwi ((__v2si)__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ +static __inline __m64 +_mm_srl_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrld (__m, __count); +} + +static __inline __m64 +_mm_srli_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrldi (__m, __count); +} + +/* Rotate four 16-bit values in M right by COUNT. */ +static __inline __m64 +_mm_ror_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wrorh ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_rori_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wrorhi ((__v4hi)__m, __count); +} + +/* Rotate two 32-bit values in M right by COUNT. */ +static __inline __m64 +_mm_ror_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wrorw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_rori_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wrorwi ((__v2si)__m, __count); +} + +/* Rotate two 64-bit values in M right by COUNT. */ +static __inline __m64 +_mm_ror_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wrord (__m, __count); +} + +static __inline __m64 +_mm_rori_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wrordi (__m, __count); +} + +/* Bit-wise AND the 64-bit values in M1 and M2. */ +static __inline __m64 +_mm_and_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wand (__m1, __m2); +} + +/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the + 64-bit value in M2. */ +static __inline __m64 +_mm_andnot_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wandn (__m1, __m2); +} + +/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ +static __inline __m64 +_mm_or_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wor (__m1, __m2); +} + +/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ +static __inline __m64 +_mm_xor_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wxor (__m1, __m2); +} + +/* Compare eight 8-bit values. The result of the comparison is 0xFF if the + test is true and zero if false. */ +static __inline __m64 +_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpeqb ((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtsb ((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtub ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Compare four 16-bit values. The result of the comparison is 0xFFFF if + the test is true and zero if false. */ +static __inline __m64 +_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpeqh ((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtsh ((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtuh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if + the test is true and zero if false. */ +static __inline __m64 +_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpeqw ((__v2si)__m1, (__v2si)__m2); +} + +static __inline __m64 +_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtsw ((__v2si)__m1, (__v2si)__m2); +} + +static __inline __m64 +_mm_cmpgt_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtuw ((__v2si)__m1, (__v2si)__m2); +} + +/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed + by accumulate across all elements and __A. */ +static __inline __m64 +_mm_mac_pu16 (__m64 __A, __m64 __B, __m64 __C) +{ + return __builtin_arm_wmacu (__A, (__v4hi)__B, (__v4hi)__C); +} + +/* Element-wise multiplication of signed 16-bit values __B and __C, followed + by accumulate across all elements and __A. */ +static __inline __m64 +_mm_mac_pi16 (__m64 __A, __m64 __B, __m64 __C) +{ + return __builtin_arm_wmacs (__A, (__v4hi)__B, (__v4hi)__C); +} + +/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed + by accumulate across all elements. */ +static __inline __m64 +_mm_macz_pu16 (__m64 __A, __m64 __B) +{ + return __builtin_arm_wmacuz ((__v4hi)__A, (__v4hi)__B); +} + +/* Element-wise multiplication of signed 16-bit values __B and __C, followed + by accumulate across all elements. */ +static __inline __m64 +_mm_macz_pi16 (__m64 __A, __m64 __B) +{ + return __builtin_arm_wmacsz ((__v4hi)__A, (__v4hi)__B); +} + +/* Accumulate across all unsigned 8-bit values in __A. */ +static __inline __m64 +_mm_acc_pu8 (__m64 __A) +{ + return __builtin_arm_waccb ((__v8qi)__A); +} + +/* Accumulate across all unsigned 16-bit values in __A. */ +static __inline __m64 +_mm_acc_pu16 (__m64 __A) +{ + return __builtin_arm_wacch ((__v4hi)__A); +} + +/* Accumulate across all unsigned 32-bit values in __A. */ +static __inline __m64 +_mm_acc_pu32 (__m64 __A) +{ + return __builtin_arm_waccw ((__v2si)__A); +} + +static __inline __m64 +_mm_mia_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmia (__A, __B, __C); +} + +static __inline __m64 +_mm_miaph_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiaph (__A, __B, __C); +} + +static __inline __m64 +_mm_miabb_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiabb (__A, __B, __C); +} + +static __inline __m64 +_mm_miabt_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiabt (__A, __B, __C); +} + +static __inline __m64 +_mm_miatb_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiatb (__A, __B, __C); +} + +static __inline __m64 +_mm_miatt_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiatt (__A, __B, __C); +} + +/* Extract one of the elements of A and sign extend. The selector N must + be immediate. */ +#define _mm_extract_pi8(A, N) __builtin_arm_textrmsb ((__v8qi)(A), (N)) +#define _mm_extract_pi16(A, N) __builtin_arm_textrmsh ((__v4hi)(A), (N)) +#define _mm_extract_pi32(A, N) __builtin_arm_textrmsw ((__v2si)(A), (N)) + +/* Extract one of the elements of A and zero extend. The selector N must + be immediate. */ +#define _mm_extract_pu8(A, N) __builtin_arm_textrmub ((__v8qi)(A), (N)) +#define _mm_extract_pu16(A, N) __builtin_arm_textrmuh ((__v4hi)(A), (N)) +#define _mm_extract_pu32(A, N) __builtin_arm_textrmuw ((__v2si)(A), (N)) + +/* Inserts word D into one of the elements of A. The selector N must be + immediate. */ +#define _mm_insert_pi8(A, D, N) \ + ((__m64) __builtin_arm_tinsrb ((__v8qi)(A), (D), (N))) +#define _mm_insert_pi16(A, D, N) \ + ((__m64) __builtin_arm_tinsrh ((__v4hi)(A), (D), (N))) +#define _mm_insert_pi32(A, D, N) \ + ((__m64) __builtin_arm_tinsrw ((__v2si)(A), (D), (N))) + +/* Compute the element-wise maximum of signed 8-bit values. */ +static __inline __m64 +_mm_max_pi8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxsb ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise maximum of signed 16-bit values. */ +static __inline __m64 +_mm_max_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxsh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise maximum of signed 32-bit values. */ +static __inline __m64 +_mm_max_pi32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxsw ((__v2si)__A, (__v2si)__B); +} + +/* Compute the element-wise maximum of unsigned 8-bit values. */ +static __inline __m64 +_mm_max_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxub ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise maximum of unsigned 16-bit values. */ +static __inline __m64 +_mm_max_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxuh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise maximum of unsigned 32-bit values. */ +static __inline __m64 +_mm_max_pu32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxuw ((__v2si)__A, (__v2si)__B); +} + +/* Compute the element-wise minimum of signed 16-bit values. */ +static __inline __m64 +_mm_min_pi8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminsb ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise minimum of signed 16-bit values. */ +static __inline __m64 +_mm_min_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminsh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise minimum of signed 32-bit values. */ +static __inline __m64 +_mm_min_pi32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminsw ((__v2si)__A, (__v2si)__B); +} + +/* Compute the element-wise minimum of unsigned 16-bit values. */ +static __inline __m64 +_mm_min_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminub ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise minimum of unsigned 16-bit values. */ +static __inline __m64 +_mm_min_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminuh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise minimum of unsigned 32-bit values. */ +static __inline __m64 +_mm_min_pu32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminuw ((__v2si)__A, (__v2si)__B); +} + +/* Create an 8-bit mask of the signs of 8-bit values. */ +static __inline int +_mm_movemask_pi8 (__m64 __A) +{ + return __builtin_arm_tmovmskb ((__v8qi)__A); +} + +/* Create an 8-bit mask of the signs of 16-bit values. */ +static __inline int +_mm_movemask_pi16 (__m64 __A) +{ + return __builtin_arm_tmovmskh ((__v4hi)__A); +} + +/* Create an 8-bit mask of the signs of 32-bit values. */ +static __inline int +_mm_movemask_pi32 (__m64 __A) +{ + return __builtin_arm_tmovmskw ((__v2si)__A); +} + +/* Return a combination of the four 16-bit values in A. The selector + must be an immediate. */ +#define _mm_shuffle_pi16(A, N) \ + ((__m64) __builtin_arm_wshufh ((__v4hi)(A), (N))) + + +/* Compute the rounded averages of the unsigned 8-bit values in A and B. */ +static __inline __m64 +_mm_avg_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2br ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the rounded averages of the unsigned 16-bit values in A and B. */ +static __inline __m64 +_mm_avg_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2hr ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the averages of the unsigned 8-bit values in A and B. */ +static __inline __m64 +_mm_avg2_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2b ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the averages of the unsigned 16-bit values in A and B. */ +static __inline __m64 +_mm_avg2_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2h ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the sum of the absolute differences of the unsigned 8-bit + values in A and B. Return the value in the lower 16-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sad_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadb ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the sum of the absolute differences of the unsigned 16-bit + values in A and B. Return the value in the lower 32-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sad_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the sum of the absolute differences of the unsigned 8-bit + values in A and B. Return the value in the lower 16-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sadz_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the sum of the absolute differences of the unsigned 16-bit + values in A and B. Return the value in the lower 32-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sadz_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); +} + +static __inline __m64 +_mm_align_si64 (__m64 __A, __m64 __B, int __C) +{ + return (__m64) __builtin_arm_walign ((__v8qi)__A, (__v8qi)__B, __C); +} + +/* Creates a 64-bit zero. */ +static __inline __m64 +_mm_setzero_si64 (void) +{ + return __builtin_arm_wzero (); +} + +/* Set and Get arbitrary iWMMXt Control registers. + Note only registers 0-3 and 8-11 are currently defined, + the rest are reserved. */ + +static __inline void +_mm_setwcx (const int __value, const int __regno) +{ + switch (__regno) + { + case 0: __builtin_arm_setwcx (__value, 0); break; + case 1: __builtin_arm_setwcx (__value, 1); break; + case 2: __builtin_arm_setwcx (__value, 2); break; + case 3: __builtin_arm_setwcx (__value, 3); break; + case 8: __builtin_arm_setwcx (__value, 8); break; + case 9: __builtin_arm_setwcx (__value, 9); break; + case 10: __builtin_arm_setwcx (__value, 10); break; + case 11: __builtin_arm_setwcx (__value, 11); break; + default: break; + } +} + +static __inline int +_mm_getwcx (const int __regno) +{ + switch (__regno) + { + case 0: return __builtin_arm_getwcx (0); + case 1: return __builtin_arm_getwcx (1); + case 2: return __builtin_arm_getwcx (2); + case 3: return __builtin_arm_getwcx (3); + case 8: return __builtin_arm_getwcx (8); + case 9: return __builtin_arm_getwcx (9); + case 10: return __builtin_arm_getwcx (10); + case 11: return __builtin_arm_getwcx (11); + default: return 0; + } +} + +/* Creates a vector of two 32-bit values; I0 is least significant. */ +static __inline __m64 +_mm_set_pi32 (int __i1, int __i0) +{ + union { + __m64 __q; + struct { + unsigned int __i0; + unsigned int __i1; + } __s; + } __u; + + __u.__s.__i0 = __i0; + __u.__s.__i1 = __i1; + + return __u.__q; +} + +/* Creates a vector of four 16-bit values; W0 is least significant. */ +static __inline __m64 +_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) +{ + unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2; + unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0; + return _mm_set_pi32 (__i1, __i0); + +} + +/* Creates a vector of eight 8-bit values; B0 is least significant. */ +static __inline __m64 +_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, + char __b3, char __b2, char __b1, char __b0) +{ + unsigned int __i1, __i0; + + __i1 = (unsigned char)__b7; + __i1 = __i1 << 8 | (unsigned char)__b6; + __i1 = __i1 << 8 | (unsigned char)__b5; + __i1 = __i1 << 8 | (unsigned char)__b4; + + __i0 = (unsigned char)__b3; + __i0 = __i0 << 8 | (unsigned char)__b2; + __i0 = __i0 << 8 | (unsigned char)__b1; + __i0 = __i0 << 8 | (unsigned char)__b0; + + return _mm_set_pi32 (__i1, __i0); +} + +/* Similar, but with the arguments in reverse order. */ +static __inline __m64 +_mm_setr_pi32 (int __i0, int __i1) +{ + return _mm_set_pi32 (__i1, __i0); +} + +static __inline __m64 +_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) +{ + return _mm_set_pi16 (__w3, __w2, __w1, __w0); +} + +static __inline __m64 +_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, + char __b4, char __b5, char __b6, char __b7) +{ + return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); +} + +/* Creates a vector of two 32-bit values, both elements containing I. */ +static __inline __m64 +_mm_set1_pi32 (int __i) +{ + return _mm_set_pi32 (__i, __i); +} + +/* Creates a vector of four 16-bit values, all elements containing W. */ +static __inline __m64 +_mm_set1_pi16 (short __w) +{ + unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w; + return _mm_set1_pi32 (__i); +} + +/* Creates a vector of four 16-bit values, all elements containing B. */ +static __inline __m64 +_mm_set1_pi8 (char __b) +{ + unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; + unsigned int __i = __w << 16 | __w; + return _mm_set1_pi32 (__i); +} + +/* Convert an integer to a __m64 object. */ +static __inline __m64 +_m_from_int (int __a) +{ + return (__m64)__a; +} + +#define _m_packsswb _mm_packs_pi16 +#define _m_packssdw _mm_packs_pi32 +#define _m_packuswb _mm_packs_pu16 +#define _m_packusdw _mm_packs_pu32 +#define _m_packssqd _mm_packs_pi64 +#define _m_packusqd _mm_packs_pu64 +#define _mm_packs_si64 _mm_packs_pi64 +#define _mm_packs_su64 _mm_packs_pu64 +#define _m_punpckhbw _mm_unpackhi_pi8 +#define _m_punpckhwd _mm_unpackhi_pi16 +#define _m_punpckhdq _mm_unpackhi_pi32 +#define _m_punpcklbw _mm_unpacklo_pi8 +#define _m_punpcklwd _mm_unpacklo_pi16 +#define _m_punpckldq _mm_unpacklo_pi32 +#define _m_punpckehsbw _mm_unpackeh_pi8 +#define _m_punpckehswd _mm_unpackeh_pi16 +#define _m_punpckehsdq _mm_unpackeh_pi32 +#define _m_punpckehubw _mm_unpackeh_pu8 +#define _m_punpckehuwd _mm_unpackeh_pu16 +#define _m_punpckehudq _mm_unpackeh_pu32 +#define _m_punpckelsbw _mm_unpackel_pi8 +#define _m_punpckelswd _mm_unpackel_pi16 +#define _m_punpckelsdq _mm_unpackel_pi32 +#define _m_punpckelubw _mm_unpackel_pu8 +#define _m_punpckeluwd _mm_unpackel_pu16 +#define _m_punpckeludq _mm_unpackel_pu32 +#define _m_paddb _mm_add_pi8 +#define _m_paddw _mm_add_pi16 +#define _m_paddd _mm_add_pi32 +#define _m_paddsb _mm_adds_pi8 +#define _m_paddsw _mm_adds_pi16 +#define _m_paddsd _mm_adds_pi32 +#define _m_paddusb _mm_adds_pu8 +#define _m_paddusw _mm_adds_pu16 +#define _m_paddusd _mm_adds_pu32 +#define _m_psubb _mm_sub_pi8 +#define _m_psubw _mm_sub_pi16 +#define _m_psubd _mm_sub_pi32 +#define _m_psubsb _mm_subs_pi8 +#define _m_psubsw _mm_subs_pi16 +#define _m_psubuw _mm_subs_pi32 +#define _m_psubusb _mm_subs_pu8 +#define _m_psubusw _mm_subs_pu16 +#define _m_psubusd _mm_subs_pu32 +#define _m_pmaddwd _mm_madd_pi16 +#define _m_pmadduwd _mm_madd_pu16 +#define _m_pmulhw _mm_mulhi_pi16 +#define _m_pmulhuw _mm_mulhi_pu16 +#define _m_pmullw _mm_mullo_pi16 +#define _m_pmacsw _mm_mac_pi16 +#define _m_pmacuw _mm_mac_pu16 +#define _m_pmacszw _mm_macz_pi16 +#define _m_pmacuzw _mm_macz_pu16 +#define _m_paccb _mm_acc_pu8 +#define _m_paccw _mm_acc_pu16 +#define _m_paccd _mm_acc_pu32 +#define _m_pmia _mm_mia_si64 +#define _m_pmiaph _mm_miaph_si64 +#define _m_pmiabb _mm_miabb_si64 +#define _m_pmiabt _mm_miabt_si64 +#define _m_pmiatb _mm_miatb_si64 +#define _m_pmiatt _mm_miatt_si64 +#define _m_psllw _mm_sll_pi16 +#define _m_psllwi _mm_slli_pi16 +#define _m_pslld _mm_sll_pi32 +#define _m_pslldi _mm_slli_pi32 +#define _m_psllq _mm_sll_si64 +#define _m_psllqi _mm_slli_si64 +#define _m_psraw _mm_sra_pi16 +#define _m_psrawi _mm_srai_pi16 +#define _m_psrad _mm_sra_pi32 +#define _m_psradi _mm_srai_pi32 +#define _m_psraq _mm_sra_si64 +#define _m_psraqi _mm_srai_si64 +#define _m_psrlw _mm_srl_pi16 +#define _m_psrlwi _mm_srli_pi16 +#define _m_psrld _mm_srl_pi32 +#define _m_psrldi _mm_srli_pi32 +#define _m_psrlq _mm_srl_si64 +#define _m_psrlqi _mm_srli_si64 +#define _m_prorw _mm_ror_pi16 +#define _m_prorwi _mm_rori_pi16 +#define _m_prord _mm_ror_pi32 +#define _m_prordi _mm_rori_pi32 +#define _m_prorq _mm_ror_si64 +#define _m_prorqi _mm_rori_si64 +#define _m_pand _mm_and_si64 +#define _m_pandn _mm_andnot_si64 +#define _m_por _mm_or_si64 +#define _m_pxor _mm_xor_si64 +#define _m_pcmpeqb _mm_cmpeq_pi8 +#define _m_pcmpeqw _mm_cmpeq_pi16 +#define _m_pcmpeqd _mm_cmpeq_pi32 +#define _m_pcmpgtb _mm_cmpgt_pi8 +#define _m_pcmpgtub _mm_cmpgt_pu8 +#define _m_pcmpgtw _mm_cmpgt_pi16 +#define _m_pcmpgtuw _mm_cmpgt_pu16 +#define _m_pcmpgtd _mm_cmpgt_pi32 +#define _m_pcmpgtud _mm_cmpgt_pu32 +#define _m_pextrb _mm_extract_pi8 +#define _m_pextrw _mm_extract_pi16 +#define _m_pextrd _mm_extract_pi32 +#define _m_pextrub _mm_extract_pu8 +#define _m_pextruw _mm_extract_pu16 +#define _m_pextrud _mm_extract_pu32 +#define _m_pinsrb _mm_insert_pi8 +#define _m_pinsrw _mm_insert_pi16 +#define _m_pinsrd _mm_insert_pi32 +#define _m_pmaxsb _mm_max_pi8 +#define _m_pmaxsw _mm_max_pi16 +#define _m_pmaxsd _mm_max_pi32 +#define _m_pmaxub _mm_max_pu8 +#define _m_pmaxuw _mm_max_pu16 +#define _m_pmaxud _mm_max_pu32 +#define _m_pminsb _mm_min_pi8 +#define _m_pminsw _mm_min_pi16 +#define _m_pminsd _mm_min_pi32 +#define _m_pminub _mm_min_pu8 +#define _m_pminuw _mm_min_pu16 +#define _m_pminud _mm_min_pu32 +#define _m_pmovmskb _mm_movemask_pi8 +#define _m_pmovmskw _mm_movemask_pi16 +#define _m_pmovmskd _mm_movemask_pi32 +#define _m_pshufw _mm_shuffle_pi16 +#define _m_pavgb _mm_avg_pu8 +#define _m_pavgw _mm_avg_pu16 +#define _m_pavg2b _mm_avg2_pu8 +#define _m_pavg2w _mm_avg2_pu16 +#define _m_psadbw _mm_sad_pu8 +#define _m_psadwd _mm_sad_pu16 +#define _m_psadzbw _mm_sadz_pu8 +#define _m_psadzwd _mm_sadz_pu16 +#define _m_paligniq _mm_align_si64 +#define _m_cvt_si2pi _mm_cvtsi64_m64 +#define _m_cvt_pi2si _mm_cvtm64_si64 + +#endif /* _MMINTRIN_H_INCLUDED */ diff --git a/gcc/config/arm/neon-docgen.ml b/gcc/config/arm/neon-docgen.ml new file mode 100644 index 000000000..23e37b498 --- /dev/null +++ b/gcc/config/arm/neon-docgen.ml @@ -0,0 +1,337 @@ +(* ARM NEON documentation generator. + + Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-docgen neon.cmo neon-docgen.ml + + Run with: + /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi +*) + +open Neon + +(* The combined "ops" and "reinterp" table. *) +let ops_reinterp = reinterp @ ops + +(* Helper functions for extracting things from the "ops" table. *) +let single_opcode desired_opcode () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + if opcode = desired_opcode then row :: got_so_far + else got_so_far + ) [] ops_reinterp + +let multiple_opcodes desired_opcodes () = + List.fold_left (fun got_so_far -> + fun desired_opcode -> + (single_opcode desired_opcode ()) @ got_so_far) + [] desired_opcodes + +let ldx_opcode number () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vldx n | Vldx_lane n | Vldx_dup n when n = number -> + row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +let stx_opcode number () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vstx n | Vstx_lane n when n = number -> + row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +let tbl_opcode () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vtbl _ -> row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +let tbx_opcode () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vtbx _ -> row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +(* The groups of intrinsics. *) +let intrinsic_groups = + [ "Addition", single_opcode Vadd; + "Multiplication", single_opcode Vmul; + "Multiply-accumulate", single_opcode Vmla; + "Multiply-subtract", single_opcode Vmls; + "Subtraction", single_opcode Vsub; + "Comparison (equal-to)", single_opcode Vceq; + "Comparison (greater-than-or-equal-to)", single_opcode Vcge; + "Comparison (less-than-or-equal-to)", single_opcode Vcle; + "Comparison (greater-than)", single_opcode Vcgt; + "Comparison (less-than)", single_opcode Vclt; + "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage; + "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale; + "Comparison (absolute greater-than)", single_opcode Vcagt; + "Comparison (absolute less-than)", single_opcode Vcalt; + "Test bits", single_opcode Vtst; + "Absolute difference", single_opcode Vabd; + "Absolute difference and accumulate", single_opcode Vaba; + "Maximum", single_opcode Vmax; + "Minimum", single_opcode Vmin; + "Pairwise add", single_opcode Vpadd; + "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada; + "Folding maximum", single_opcode Vpmax; + "Folding minimum", single_opcode Vpmin; + "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts]; + "Vector shift left", single_opcode Vshl; + "Vector shift left by constant", single_opcode Vshl_n; + "Vector shift right by constant", single_opcode Vshr_n; + "Vector shift right by constant and accumulate", single_opcode Vsra_n; + "Vector shift right and insert", single_opcode Vsri; + "Vector shift left and insert", single_opcode Vsli; + "Absolute value", single_opcode Vabs; + "Negation", single_opcode Vneg; + "Bitwise not", single_opcode Vmvn; + "Count leading sign bits", single_opcode Vcls; + "Count leading zeros", single_opcode Vclz; + "Count number of set bits", single_opcode Vcnt; + "Reciprocal estimate", single_opcode Vrecpe; + "Reciprocal square-root estimate", single_opcode Vrsqrte; + "Get lanes from a vector", single_opcode Vget_lane; + "Set lanes in a vector", single_opcode Vset_lane; + "Create vector from literal bit pattern", single_opcode Vcreate; + "Set all lanes to the same value", + multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane]; + "Combining vectors", single_opcode Vcombine; + "Splitting vectors", multiple_opcodes [Vget_high; Vget_low]; + "Conversions", multiple_opcodes [Vcvt; Vcvt_n]; + "Move, single_opcode narrowing", single_opcode Vmovn; + "Move, single_opcode long", single_opcode Vmovl; + "Table lookup", tbl_opcode; + "Extended table lookup", tbx_opcode; + "Multiply, lane", single_opcode Vmul_lane; + "Long multiply, lane", single_opcode Vmull_lane; + "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane; + "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane; + "Multiply-accumulate, lane", single_opcode Vmla_lane; + "Multiply-subtract, lane", single_opcode Vmls_lane; + "Vector multiply by scalar", single_opcode Vmul_n; + "Vector long multiply by scalar", single_opcode Vmull_n; + "Vector saturating doubling long multiply by scalar", + single_opcode Vqdmull_n; + "Vector saturating doubling multiply high by scalar", + single_opcode Vqdmulh_n; + "Vector multiply-accumulate by scalar", single_opcode Vmla_n; + "Vector multiply-subtract by scalar", single_opcode Vmls_n; + "Vector extract", single_opcode Vext; + "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16]; + "Bit selection", single_opcode Vbsl; + "Transpose elements", single_opcode Vtrn; + "Zip elements", single_opcode Vzip; + "Unzip elements", single_opcode Vuzp; + "Element/structure loads, VLD1 variants", ldx_opcode 1; + "Element/structure stores, VST1 variants", stx_opcode 1; + "Element/structure loads, VLD2 variants", ldx_opcode 2; + "Element/structure stores, VST2 variants", stx_opcode 2; + "Element/structure loads, VLD3 variants", ldx_opcode 3; + "Element/structure stores, VST3 variants", stx_opcode 3; + "Element/structure loads, VLD4 variants", ldx_opcode 4; + "Element/structure stores, VST4 variants", stx_opcode 4; + "Logical operations (AND)", single_opcode Vand; + "Logical operations (OR)", single_opcode Vorr; + "Logical operations (exclusive OR)", single_opcode Veor; + "Logical operations (AND-NOT)", single_opcode Vbic; + "Logical operations (OR-NOT)", single_opcode Vorn; + "Reinterpret casts", single_opcode Vreinterp ] + +(* Given an intrinsic shape, produce a string to document the corresponding + operand shapes. *) +let rec analyze_shape shape = + let rec n_things n thing = + match n with + 0 -> [] + | n -> thing :: (n_things (n - 1) thing) + in + let rec analyze_shape_elt reg_no elt = + match elt with + Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}" + | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}" + | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}" + | Immed -> "#@var{0}" + | VecArray (1, elt) -> + let elt_regexp = analyze_shape_elt 0 elt in + "@{" ^ elt_regexp ^ "@}" + | VecArray (n, elt) -> + let rec f m = + match m with + 0 -> [] + | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1)) + in + let ops = List.rev (f n) in + "@{" ^ (commas (fun x -> x) ops "") ^ "@}" + | (PtrTo elt | CstPtrTo elt) -> + "[" ^ (analyze_shape_elt reg_no elt) ^ "]" + | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]" + | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]" + | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]" + | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts)) + in + match shape with + All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) "" + | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^ + ", " ^ (analyze_shape_elt 0 Dreg) + | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^ + (analyze_shape_elt 0 elt) + | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ + ", " ^ (analyze_shape_elt 0 Dreg) + | Wide_noreg elt -> analyze_shape (Long_noreg elt) + | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ + ", " ^ (analyze_shape_elt 0 Qreg) + | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) "" + | By_scalar Dreg -> + analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) + | By_scalar Qreg -> + analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) + | By_scalar _ -> assert false + | Wide_lane -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Wide_scalar -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Pair_result elt -> + let elt_regexp = analyze_shape_elt 0 elt in + let elt_regexp' = analyze_shape_elt 1 elt in + elt_regexp ^ ", " ^ elt_regexp' + | Unary_scalar _ -> "FIXME Unary_scalar" + | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) + | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) + | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) + +(* Document a single intrinsic. *) +let describe_intrinsic first chan + (elt_ty, (_, features, shape, name, munge, _)) = + let c_arity, new_elt_ty = munge shape elt_ty in + let c_types = strings_of_arity c_arity in + Printf.fprintf chan "@itemize @bullet\n"; + let item_code = if first then "@item" else "@itemx" in + Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types) + (intrinsic_name name) (string_of_elt elt_ty); + Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) ""); + if not (List.exists (fun feature -> feature = No_op) features) then + begin + let print_one_insn name = + Printf.fprintf chan "@code{"; + let no_suffix = (new_elt_ty = NoElts) in + let name_with_suffix = + if no_suffix then name + else name ^ "." ^ (string_of_elt_dots new_elt_ty) + in + let possible_operands = analyze_all_shapes features shape + analyze_shape + in + let rec print_one_possible_operand op = + Printf.fprintf chan "%s %s}" name_with_suffix op + in + (* If the intrinsic expands to multiple instructions, we assume + they are all of the same form. *) + print_one_possible_operand (List.hd possible_operands) + in + let rec print_insns names = + match names with + [] -> () + | [name] -> print_one_insn name + | name::names -> (print_one_insn name; + Printf.fprintf chan " @emph{or} "; + print_insns names) + in + let insn_names = get_insn_names features name in + Printf.fprintf chan "@*@emph{Form of expected instruction(s):} "; + print_insns insn_names; + Printf.fprintf chan "\n" + end; + Printf.fprintf chan "@end itemize\n"; + Printf.fprintf chan "\n\n" + +(* Document a group of intrinsics. *) +let document_group chan (group_title, group_extractor) = + (* Extract the rows in question from the ops table and then turn them + into a list of intrinsics. *) + let intrinsics = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (_, _, _, _, _, elt_tys) -> + List.fold_left (fun got_so_far' -> + fun elt_ty -> + (elt_ty, row) :: got_so_far') + got_so_far elt_tys + ) [] (group_extractor ()) + in + (* Emit the title for this group. *) + Printf.fprintf chan "@subsubsection %s\n\n" group_title; + (* Emit a description of each intrinsic. *) + List.iter (describe_intrinsic true chan) intrinsics; + (* Close this group. *) + Printf.fprintf chan "\n\n" + +let gnu_header chan = + List.iter (fun s -> Printf.fprintf chan "%s\n" s) [ + "@c Copyright (C) 2006 Free Software Foundation, Inc."; + "@c This is part of the GCC manual."; + "@c For copying conditions, see the file gcc.texi."; + ""; + "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml"; + "@c Please do not edit manually."] + +(* Program entry point. *) +let _ = + if Array.length Sys.argv <> 2 then + failwith "Usage: neon-docgen " + else + let file = Sys.argv.(1) in + try + let chan = open_out file in + gnu_header chan; + List.iter (document_group chan) intrinsic_groups; + close_out chan + with Sys_error sys -> + failwith ("Could not create output file " ^ file ^ ": " ^ sys) diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml new file mode 100644 index 000000000..112c8be6e --- /dev/null +++ b/gcc/config/arm/neon-gen.ml @@ -0,0 +1,416 @@ +(* Auto-generate ARM Neon intrinsics header file. + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-gen neon.cmo neon-gen.ml + + Run with: + ./neon-gen > arm_neon.h +*) + +open Neon + +(* The format codes used in the following functions are documented at: + http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\ + #6_printflikefunctionsforprettyprinting + (one line, remove the backslash.) +*) + +(* Following functions can be used to approximate GNU indentation style. *) +let start_function () = + Format.printf "@["; + ref 0 + +let end_function nesting = + match !nesting with + 0 -> Format.printf "@;@;@]" + | _ -> failwith ("Bad nesting (ending function at level " + ^ (string_of_int !nesting) ^ ")") + +let open_braceblock nesting = + begin match !nesting with + 0 -> Format.printf "@,@<0>{@[@," + | _ -> Format.printf "@,@[ @<0>{@[@," + end; + incr nesting + +let close_braceblock nesting = + decr nesting; + match !nesting with + 0 -> Format.printf "@]@,@<0>}" + | _ -> Format.printf "@]@,@<0>}@]" + +let print_function arity fnname body = + let ffmt = start_function () in + Format.printf "__extension__ static __inline "; + let inl = "__attribute__ ((__always_inline__))" in + begin match arity with + Arity0 ret -> + Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname + | Arity1 (ret, arg0) -> + Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname + (string_of_vectype arg0) + | Arity2 (ret, arg0, arg1) -> + Format.printf "%s %s@,%s (%s __a, %s __b)" + (string_of_vectype ret) inl fnname (string_of_vectype arg0) + (string_of_vectype arg1) + | Arity3 (ret, arg0, arg1, arg2) -> + Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)" + (string_of_vectype ret) inl fnname (string_of_vectype arg0) + (string_of_vectype arg1) (string_of_vectype arg2) + | Arity4 (ret, arg0, arg1, arg2, arg3) -> + Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)" + (string_of_vectype ret) inl fnname (string_of_vectype arg0) + (string_of_vectype arg1) (string_of_vectype arg2) + (string_of_vectype arg3) + end; + open_braceblock ffmt; + let rec print_lines = function + [] -> () + | [line] -> Format.printf "%s" line + | line::lines -> Format.printf "%s@," line; print_lines lines in + print_lines body; + close_braceblock ffmt; + end_function ffmt + +let return_by_ptr features = List.mem ReturnPtr features + +let union_string num elts base = + let itype = inttype_for_array num elts in + let iname = string_of_inttype itype + and sname = string_of_vectype (T_arrayof (num, elts)) in + Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base + +let rec signed_ctype = function + T_uint8x8 | T_poly8x8 -> T_int8x8 + | T_uint8x16 | T_poly8x16 -> T_int8x16 + | T_uint16x4 | T_poly16x4 -> T_int16x4 + | T_uint16x8 | T_poly16x8 -> T_int16x8 + | T_uint32x2 -> T_int32x2 + | T_uint32x4 -> T_int32x4 + | T_uint64x1 -> T_int64x1 + | T_uint64x2 -> T_int64x2 + (* Cast to types defined by mode in arm.c, not random types pulled in from + the header in use. This fixes incompatible pointer errors when + compiling with C++. *) + | T_uint8 | T_int8 -> T_intQI + | T_uint16 | T_int16 -> T_intHI + | T_uint32 | T_int32 -> T_intSI + | T_uint64 | T_int64 -> T_intDI + | T_float32 -> T_floatSF + | T_poly8 -> T_intQI + | T_poly16 -> T_intHI + | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) + | T_ptrto elt -> T_ptrto (signed_ctype elt) + | T_const elt -> T_const (signed_ctype elt) + | x -> x + +let add_cast ctype cval = + let stype = signed_ctype ctype in + if ctype <> stype then + Printf.sprintf "(%s) %s" (string_of_vectype stype) cval + else + cval + +let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" + +(* Return a tuple of a list of declarations to go at the start of the function, + and a list of statements needed to return THING. *) +let return arity return_by_ptr thing = + match arity with + Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) + | Arity4 (ret, _, _, _, _) -> + match ret with + T_arrayof (num, vec) -> + if return_by_ptr then + let sname = string_of_vectype ret in + [Printf.sprintf "%s __rv;" sname], + [thing ^ ";"; "return __rv;"] + else + let uname = union_string num vec "__rv" in + [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"] + | T_void -> [], [thing ^ ";"] + | _ -> + [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"] + +let rec element_type ctype = + match ctype with + T_arrayof (_, v) -> element_type v + | _ -> ctype + +let params return_by_ptr ps = + let pdecls = ref [] in + let ptype t p = + match t with + T_arrayof (num, elts) -> + let uname = union_string num elts (p ^ "u") in + let decl = Printf.sprintf "%s = { %s };" uname p in + pdecls := decl :: !pdecls; + p ^ "u.__o" + | _ -> add_cast t p in + let plist = match ps with + Arity0 _ -> [] + | Arity1 (_, t1) -> [ptype t1 "__a"] + | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"] + | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"] + | Arity4 (_, t1, t2, t3, t4) -> + [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in + match ps with + Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) + | Arity4 (ret, _, _, _, _) -> + if return_by_ptr then + !pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist + else + !pdecls, plist + +let modify_params features plist = + let is_flipped = + List.exists (function Flipped _ -> true | _ -> false) features in + if is_flipped then + match plist with + [ a; b ] -> [ b; a ] + | _ -> + failwith ("Don't know how to flip args " ^ (String.concat ", " plist)) + else + plist + +(* !!! Decide whether to add an extra information word based on the shape + form. *) +let extra_word shape features paramlist bits = + let use_word = + match shape with + All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow + | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm + | Narrow_imm -> true + | _ -> List.mem InfoWord features + in + if use_word then + paramlist @ [string_of_int bits] + else + paramlist + +(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0). + Bit 1 represents floats & polynomials (1), or ordinary integers (0). + Bit 2 represents rounding (1) vs none (0). *) +let infoword_value elttype features = + let bits01 = + match elt_class elttype with + Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001 + | Poly -> 0b010 + | Float -> 0b011 + | _ -> 0b000 + and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in + bits01 lor rounding_bit + +(* "Cast" type operations will throw an exception in mode_of_elt (actually in + elt_width, called from there). Deal with that here, and generate a suffix + with multiple modes (). *) +let rec mode_suffix elttype shape = + try + let mode = mode_of_elt elttype shape in + string_of_mode mode + with MixedMode (dst, src) -> + let dstmode = mode_of_elt dst shape + and srcmode = mode_of_elt src shape in + string_of_mode dstmode ^ string_of_mode srcmode + +let print_variant opcode features shape name (ctype, asmtype, elttype) = + let bits = infoword_value elttype features in + let modesuf = mode_suffix elttype shape in + let return_by_ptr = return_by_ptr features in + let pdecls, paramlist = params return_by_ptr ctype in + let paramlist' = modify_params features paramlist in + let paramlist'' = extra_word shape features paramlist' bits in + let parstr = String.concat ", " paramlist'' in + let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)" + (builtin_name features name) modesuf parstr in + let rdecls, stmts = return ctype return_by_ptr builtin in + let body = pdecls @ rdecls @ stmts + and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in + print_function ctype fnname body + +(* When this function processes the element types in the ops table, it rewrites + them in a list of tuples (a,b,c): + a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8) + b : Asm type : a single, processed element type, e.g. P16. This is the + type which should be attached to the asm opcode. + c : Variant type : the unprocessed type for this variant (e.g. in add + instructions which don't care about the sign, b might be i16 and c + might be s16.) +*) + +let print_op (opcode, features, shape, name, munge, types) = + let sorted_types = List.sort compare types in + let munged_types = List.map + (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in + List.iter + (fun variant -> print_variant opcode features shape name variant) + munged_types + +let print_ops ops = + List.iter print_op ops + +(* Output type definitions. Table entries are: + cbase : "C" name for the type. + abase : "ARM" base name for the type (i.e. int in int8x8_t). + esize : element size. + enum : element count. +*) + +let deftypes () = + let typeinfo = [ + (* Doubleword vector types. *) + "__builtin_neon_qi", "int", 8, 8; + "__builtin_neon_hi", "int", 16, 4; + "__builtin_neon_si", "int", 32, 2; + "__builtin_neon_di", "int", 64, 1; + "__builtin_neon_sf", "float", 32, 2; + "__builtin_neon_poly8", "poly", 8, 8; + "__builtin_neon_poly16", "poly", 16, 4; + "__builtin_neon_uqi", "uint", 8, 8; + "__builtin_neon_uhi", "uint", 16, 4; + "__builtin_neon_usi", "uint", 32, 2; + "__builtin_neon_udi", "uint", 64, 1; + + (* Quadword vector types. *) + "__builtin_neon_qi", "int", 8, 16; + "__builtin_neon_hi", "int", 16, 8; + "__builtin_neon_si", "int", 32, 4; + "__builtin_neon_di", "int", 64, 2; + "__builtin_neon_sf", "float", 32, 4; + "__builtin_neon_poly8", "poly", 8, 16; + "__builtin_neon_poly16", "poly", 16, 8; + "__builtin_neon_uqi", "uint", 8, 16; + "__builtin_neon_uhi", "uint", 16, 8; + "__builtin_neon_usi", "uint", 32, 4; + "__builtin_neon_udi", "uint", 64, 2 + ] in + List.iter + (fun (cbase, abase, esize, enum) -> + let attr = + match enum with + 1 -> "" + | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" + (esize * enum / 8) in + Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr) + typeinfo; + Format.print_newline (); + (* Extra types not in . *) + Format.printf "typedef float float32_t;\n"; + Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; + Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" + +(* Output structs containing arrays, for load & store instructions etc. *) + +let arrtypes () = + let typeinfo = [ + "int", 8; "int", 16; + "int", 32; "int", 64; + "uint", 8; "uint", 16; + "uint", 32; "uint", 64; + "float", 32; "poly", 8; + "poly", 16 + ] in + let writestruct elname elsize regsize arrsize = + let elnum = regsize / elsize in + let structname = + Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in + let sfmt = start_function () in + Format.printf "typedef struct %s" structname; + open_braceblock sfmt; + Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize; + close_braceblock sfmt; + Format.printf " %s;" structname; + end_function sfmt; + in + for n = 2 to 4 do + List.iter + (fun (elname, elsize) -> + writestruct elname elsize 64 n; + writestruct elname elsize 128 n) + typeinfo + done + +let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) + +(* Do it. *) + +let _ = + print_lines [ +"/* ARM NEON intrinsics include file. This file is generated automatically"; +" using neon-gen.ml. Please do not edit manually."; +""; +" Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc."; +" Contributed by CodeSourcery."; +""; +" This file is part of GCC."; +""; +" GCC is free software; you can redistribute it and/or modify it"; +" under the terms of the GNU General Public License as published"; +" by the Free Software Foundation; either version 3, or (at your"; +" option) any later version."; +""; +" GCC is distributed in the hope that it will be useful, but WITHOUT"; +" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; +" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; +" License for more details."; +""; +" Under Section 7 of GPL version 3, you are granted additional"; +" permissions described in the GCC Runtime Library Exception, version"; +" 3.1, as published by the Free Software Foundation."; +""; +" You should have received a copy of the GNU General Public License and"; +" a copy of the GCC Runtime Library Exception along with this program;"; +" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see"; +" . */"; +""; +"#ifndef _GCC_ARM_NEON_H"; +"#define _GCC_ARM_NEON_H 1"; +""; +"#ifndef __ARM_NEON__"; +"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h"; +"#else"; +""; +"#ifdef __cplusplus"; +"extern \"C\" {"; +"#endif"; +""; +"#include "; +""]; + deftypes (); + arrtypes (); + Format.print_newline (); + print_ops ops; + Format.print_newline (); + print_ops reinterp; + print_lines [ +"#ifdef __cplusplus"; +"}"; +"#endif"; +"#endif"; +"#endif"] diff --git a/gcc/config/arm/neon-schedgen.ml b/gcc/config/arm/neon-schedgen.ml new file mode 100644 index 000000000..3d9b04422 --- /dev/null +++ b/gcc/config/arm/neon-schedgen.ml @@ -0,0 +1,543 @@ +(* Emission of the core of the Cortex-A8 NEON scheduling description. + Copyright (C) 2007, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . +*) + +(* This scheduling description generator works as follows. + - Each group of instructions has source and destination requirements + specified and a list of cores supported. This is then filtered + and per core scheduler descriptions are generated out. + The reservations generated are prefixed by the name of the + core and the check is performed on the basis of what the tuning + string is. Running this will generate Neon scheduler descriptions + for all cores supported. + + The source requirements may be specified using + Source (the stage at which all source operands not otherwise + described are read), Source_m (the stage at which Rm operands are + read), Source_n (likewise for Rn) and Source_d (likewise for Rd). + - For each group of instructions the earliest stage where a source + operand may be required is calculated. + - Each group of instructions is selected in turn as a producer. + The latencies between this group and every other group are then + calculated, yielding up to four values for each combination: + 1. Producer -> consumer Rn latency + 2. Producer -> consumer Rm latency + 3. Producer -> consumer Rd (as a source) latency + 4. Producer -> consumer worst-case latency. + Value 4 is calculated from the destination availability requirements + of the consumer and the earliest source availability requirements + of the producer. + - The largest Value 4 calculated for the current producer is the + worse-case latency, L, for that instruction group. This value is written + out in a define_insn_reservation for the producer group. + - For each producer and consumer pair, the latencies calculated above + are collated. The average (of up to four values) is calculated and + if this average is different from the worst-case latency, an + unguarded define_bypass construction is issued for that pair. + (For each pair only one define_bypass construction will be emitted, + and at present we do not emit specific guards.) +*) + +let find_with_result fn lst = + let rec scan = function + [] -> raise Not_found + | l::ls -> + match fn l with + Some result -> result + | _ -> scan ls in + scan lst + +let n1 = 1 and n2 = 2 and n3 = 3 and n4 = 4 and n5 = 5 and n6 = 6 + and n7 = 7 and n8 = 8 and n9 = 9 + +type availability = Source of int + | Source_n of int + | Source_m of int + | Source_d of int + | Dest of int + | Dest_n_after of int * int + +type guard = Guard_none | Guard_only_m | Guard_only_n | Guard_only_d + +(* Reservation behaviors. All but the last row here correspond to one + pipeline each. Each constructor will correspond to one + define_reservation. *) +type reservation = + Mul | Mul_2cycle | Mul_4cycle +| Shift | Shift_2cycle +| ALU | ALU_2cycle +| Fmul | Fmul_2cycle +| Fadd | Fadd_2cycle +(* | VFP *) +| Permute of int +| Ls of int +| Fmul_then_fadd | Fmul_then_fadd_2 + +type core = CortexA8 | CortexA9 +let allCores = [CortexA8; CortexA9] +let coreStr = function + CortexA8 -> "cortex_a8" + | CortexA9 -> "cortex_a9" + +let tuneStr = function + CortexA8 -> "cortexa8" + | CortexA9 -> "cortexa9" + + +(* This table must be kept as short as possible by conflating + entries with the same availability behavior. + + First components: instruction group names + Second components: availability requirements, in the order in which + they should appear in the comments in the .md file. + Third components: reservation info + Fourth components: List of supported cores. +*) +let availability_table = [ + (* NEON integer ALU instructions. *) + (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr + veor vbic vorn ddd qqq *) + "neon_int_1", [Source n2; Dest n3], ALU, allCores; + (* vadd vsub qqd vsub ddd qqq *) + "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU, allCores; + (* vsum vneg dd qq vadd vsub qdd *) + "neon_int_3", [Source n1; Dest n3], ALU, allCores; + (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *) + (* vhadd vrhadd vqadd vtst ddd qqq *) + "neon_int_4", [Source n2; Dest n4], ALU, allCores; + (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *) + "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU, allCores; + (* vqneg vqabs dd qq *) + "neon_vqneg_vqabs", [Source n1; Dest n4], ALU, allCores; + (* vmov vmvn *) + "neon_vmov", [Dest n3], ALU, allCores; + (* vaba *) + "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU, allCores; + "neon_vaba_qqq", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], + ALU_2cycle, allCores; + (* vsma *) + "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU, allCores; + + (* NEON integer multiply instructions. *) + (* vmul, vqdmlh, vqrdmlh *) + (* vmul, vqdmul, qdd 16/8 long 32/16 long *) + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], + Mul, allCores; + "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], + Mul_2cycle, allCores; + (* vmul, vqdmul again *) + "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar", + [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle, allCores; + (* vmla, vmls *) + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long", + [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul, allCores; + "neon_mla_qqq_8_16", + [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], + Mul_2cycle, allCores; + "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], + Mul_2cycle, allCores; + "neon_mla_qqq_32_qqd_32_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], + Mul_4cycle, allCores; + (* vmul, vqdmulh, vqrdmulh *) + (* vmul, vqdmul *) + "neon_mul_ddd_16_scalar_32_16_long_scalar", + [Source_n n2; Source_m n1; Dest n6], Mul, allCores; + "neon_mul_qqd_32_scalar", + [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle, allCores; + (* vmla, vmls *) + (* vmla, vmla, vqdmla, vqdmls *) + "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul, allCores; + + (* NEON integer shift instructions. *) + (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *) + "neon_shift_1", [Source n1; Dest n3], Shift, allCores; + (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow, allCores; + vqshl_vrshl_vqrshl_ddd *) + "neon_shift_2", [Source n1; Dest n4], Shift, allCores; + (* vsli, vsri and vshl for qqq *) + "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle, allCores; + "neon_vshl_ddd", [Source n1; Dest n1], Shift, allCores; + "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)], + Shift_2cycle, allCores; + "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift, allCores; + + (* NEON floating-point instructions. *) + (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *) + (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *) + "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd, allCores; + "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)], + Fadd_2cycle, allCores; + (* vsum, fvmx, vfmn *) + "neon_fp_vsum", [Source n1; Dest n5], Fadd, allCores; + "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul, allCores; + "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)], + Fmul_2cycle, allCores; + (* vmla, vmls *) + "neon_fp_vmla_ddd", + [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd, allCores; + "neon_fp_vmla_qqq", + [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)], + Fmul_then_fadd_2, allCores; + "neon_fp_vmla_ddd_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd, allCores; + "neon_fp_vmla_qqq_scalar", + [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)], + Fmul_then_fadd_2, allCores; + "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd, allCores; + "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)], + Fmul_then_fadd_2, allCores; + + (* NEON byte permute instructions. *) + (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *) + "neon_bp_simple", [Source n1; Dest n2], Permute 1, allCores; + (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}, allCores; + similarly for vtbx *) + "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2, allCores; + (* all the rest *) + "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3, allCores; + + (* NEON load/store instructions. *) + "neon_ldr", [Dest n1], Ls 1, allCores; + "neon_str", [Source n1], Ls 1, allCores; + "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2, allCores; + "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3, allCores; + "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2, allCores; + "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3, allCores; + "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4, allCores; + "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2, allCores; + "neon_vst1_3_4_regs", [Source n1], Ls 3, allCores; + "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4, allCores; + "neon_vst3_vst4", [Source n1], Ls 4, allCores; + "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3, allCores; + "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5, allCores; + "neon_vst1_vst2_lane", [Source n1], Ls 2, allCores; + "neon_vst3_vst4_lane", [Source n1], Ls 3, allCores; + "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3, allCores; + + (* NEON register transfer instructions. *) + "neon_mcr", [Dest n2], Permute 1, allCores; + "neon_mcr_2_mcrr", [Dest n2], Permute 2, allCores; + (* MRC instructions are in the .tpl file. *) +] + +(* Augment the tuples in the availability table with an extra component + that describes the earliest stage where a source operand may be + required. (It is also possible that an entry in the table has no + source requirements.) *) +let calculate_sources = + List.map (fun (name, avail, res, cores) -> + let earliest_stage = + List.fold_left + (fun cur -> fun info -> + match info with + Source stage + | Source_n stage + | Source_m stage + | Source_d stage -> + (match cur with + None -> Some stage + | Some stage' when stage < stage' -> Some stage + | _ -> cur) + | _ -> cur) None avail + in + (name, avail, res, earliest_stage)) + +(* Find the stage, if any, at the end of which a group produces a result. *) +let find_dest (attr, avail, _, _) = + try + find_with_result + (fun av -> match av with + Dest st -> Some (Some st) + | Dest_n_after (after, st) -> Some (Some (after + st)) + | _ -> None) avail + with Not_found -> None + +(* Find the worst-case latency between a producer and a consumer. *) +let worst_case_latency producer (_, _, _, earliest_required) = + let dest = find_dest producer in + match earliest_required, dest with + None, _ -> + (* The consumer doesn't have any source requirements. *) + None + | _, None -> + (* The producer doesn't produce any results (e.g. a store insn). *) + None + | Some consumed, Some produced -> Some (produced - consumed + 1) + +(* Helper function for below. *) +let latency_calc f producer (_, avail, _, _) = + try + let source_avail = find_with_result f avail in + match find_dest producer with + None -> + (* The producer does not produce a result. *) + Some 0 + | Some produced -> + let latency = produced - source_avail + 1 in + (* Latencies below zero are raised to zero since we don't have + delay slots. *) + if latency < 0 then Some 0 else Some latency + with Not_found -> None + +(* Find any Rm latency between a producer and a consumer. If no + Rm source requirement is explicitly specified for the consumer, + return "positive infinity". Also return "positive infinity" if + the latency matches the supplied worst-case latency for this + producer. *) +let get_m_latency producer consumer = + match latency_calc (fun av -> match av with Source_m stage -> Some stage + | _ -> None) producer consumer + with None -> [] | Some latency -> [(Guard_only_m, latency)] + +(* Likewise for Rn. *) +let get_n_latency producer consumer = + match latency_calc (fun av -> match av with Source_n stage -> Some stage + | _ -> None) producer consumer + with None -> [] | Some latency -> [(Guard_only_n, latency)] + +(* Likewise for Rd. *) +let get_d_latency producer consumer = + match + latency_calc (fun av -> match av with Source_d stage -> Some stage + | _ -> None) producer consumer + with None -> [] | Some latency -> [(Guard_only_d, latency)] + +(* Given a producer and a consumer, work out the latency of the producer + to the consumer in each of the four cases (availability information + permitting) identified at the top of this file. Return the + consumer, the worst-case unguarded latency and any guarded latencies. *) +let calculate_latencies producer consumer = + let worst = worst_case_latency producer consumer in + let m_latency = get_m_latency producer consumer in + let n_latency = get_n_latency producer consumer in + let d_latency = get_d_latency producer consumer in + (consumer, worst, m_latency @ n_latency @ d_latency) + +(* Helper function for below. *) +let pick_latency largest worst guards = + let guards = + match worst with + None -> guards + | Some worst -> (Guard_none, worst) :: guards + in + if List.length guards = 0 then None else + let total_latency = + List.fold_left (fun acc -> fun (_, latency) -> acc + latency) 0 guards + in + let average_latency = (float_of_int total_latency) /. + (float_of_int (List.length guards)) in + let rounded_latency = int_of_float (ceil average_latency) in + if rounded_latency = largest then None + else Some (Guard_none, rounded_latency) + +(* Collate all bypasses for a particular producer as required in + worst_case_latencies_and_bypasses. (By this stage there is a maximum + of one bypass from this producer to any particular consumer listed + in LATENCIES.) Use a hash table to collate bypasses with the + same latency and guard. *) +let collate_bypasses (producer_name, _, _, _) largest latencies core = + let ht = Hashtbl.create 42 in + let keys = ref [] in + List.iter ( + fun ((consumer, _, _, _), worst, guards) -> + (* Find out which latency to use. Ignoring latencies that match + the *overall* worst-case latency for this producer (which will + be in define_insn_reservation), we have to examine: + 1. the latency with no guard between this producer and this + consumer; and + 2. any guarded latency. *) + let guard_latency_opt = pick_latency largest worst guards in + match guard_latency_opt with + None -> () + | Some (guard, latency) -> + begin + (if (try ignore (Hashtbl.find ht (guard, latency)); false + with Not_found -> true) then + keys := (guard, latency) :: !keys); + Hashtbl.add ht (guard, latency) ((coreStr core) ^ "_" ^ consumer) + end + ) latencies; + (* The hash table now has bypasses collated so that ones with the + same latency and guard have the same keys. Walk through all the + keys, extract the associated bypasses, and concatenate the names + of the consumers for each bypass. *) + List.map ( + fun ((guard, latency) as key) -> + let consumers = Hashtbl.find_all ht key in + (producer_name, + String.concat ",\\\n " consumers, + latency, + guard) + ) !keys + +(* For every producer, find the worst-case latency between it and + *any* consumer. Also determine (if such a thing exists) the + lowest-latency bypass from each producer to each consumer. Group + the output in such a way that all bypasses with the same producer + and latency are together, and so that bypasses with the worst-case + latency are ignored. *) +let worst_case_latencies_and_bypasses core = + let rec f (worst_acc, bypasses_acc) prev xs = + match xs with + [] -> (worst_acc, bypasses_acc) + | ((producer_name, producer_avail, res_string, _) as producer)::next -> + (* For this particular producer, work out the latencies between + it and every consumer. *) + let latencies = + List.fold_left (fun acc -> fun consumer -> + (calculate_latencies producer consumer) :: acc) + [] (prev @ xs) + in + (* Now work out what the overall worst case latency was for this + particular producer. *) + match latencies with + [] -> assert false + | _ -> + let comp_fn (_, l1, _) (_, l2, _) = + if l1 > l2 then -1 else if l1 = l2 then 0 else 1 + in + let largest = + match List.hd (List.sort comp_fn latencies) with + (_, None, _) -> 0 (* Producer has no consumers. *) + | (_, Some worst, _) -> worst + in + (* Having got the largest latency, collect all bypasses for + this producer and filter out those with that larger + latency. Record the others for later emission. *) + let bypasses = collate_bypasses producer largest latencies core in + (* Go on to process remaining producers, having noted + the result for this one. *) + f ((producer_name, producer_avail, largest, + res_string) :: worst_acc, + bypasses @ bypasses_acc) + (prev @ [producer]) next + in + f ([], []) [] + +(* Emit a helpful comment for a define_insn_reservation. *) +let write_comment producer avail = + let seen_source = ref false in + let describe info = + let read = if !seen_source then "" else "read " in + match info with + Source stage -> + seen_source := true; + Printf.printf "%stheir source operands at N%d" read stage + | Source_n stage -> + seen_source := true; + Printf.printf "%stheir (D|Q)n operands at N%d" read stage + | Source_m stage -> + seen_source := true; + Printf.printf "%stheir (D|Q)m operands at N%d" read stage + | Source_d stage -> + Printf.printf "%stheir (D|Q)d operands at N%d" read stage + | Dest stage -> + Printf.printf "produce a result at N%d" stage + | Dest_n_after (after, stage) -> + Printf.printf "produce a result at N%d on cycle %d" stage (after + 1) + in + Printf.printf ";; Instructions using this reservation "; + let rec f infos x = + let sep = if x mod 2 = 1 then "" else "\n;;" in + match infos with + [] -> assert false + | [info] -> describe info; Printf.printf ".\n" + | info::(_::[] as infos) -> + describe info; Printf.printf ", and%s " sep; f infos (x+1) + | info::infos -> describe info; Printf.printf ",%s " sep; f infos (x+1) + in + f avail 0 + + +(* Emit a define_insn_reservation for each producer. The latency + written in will be its worst-case latency. *) +let emit_insn_reservations core = + let corestring = coreStr core in + let tunestring = tuneStr core + in List.iter ( + fun (producer, avail, latency, reservation) -> + write_comment producer avail; + Printf.printf "(define_insn_reservation \"%s_%s\" %d\n" + corestring producer latency; + Printf.printf " (and (eq_attr \"tune\" \"%s\")\n" tunestring; + Printf.printf " (eq_attr \"neon_type\" \"%s\"))\n" producer; + let str = + match reservation with + Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4" + | Shift -> "dp" | Shift_2cycle -> "dp_2" + | ALU -> "dp" | ALU_2cycle -> "dp_2" + | Fmul -> "dp" | Fmul_2cycle -> "dp_2" + | Fadd -> "fadd" | Fadd_2cycle -> "fadd_2" + | Ls 1 -> "ls" + | Ls n -> "ls_" ^ (string_of_int n) + | Permute 1 -> "perm" + | Permute n -> "perm_" ^ (string_of_int n) + | Fmul_then_fadd -> "fmul_then_fadd" + | Fmul_then_fadd_2 -> "fmul_then_fadd_2" + in + Printf.printf " \"%s_neon_%s\")\n\n" corestring str + ) + +(* Given a guard description, return the name of the C function to + be used as the guard for define_bypass. *) +let guard_fn g = + match g with + Guard_only_m -> "arm_neon_only_m_dependency" + | Guard_only_n -> "arm_neon_only_n_dependency" + | Guard_only_d -> "arm_neon_only_d_dependency" + | Guard_none -> assert false + +(* Emit a define_bypass for each bypass. *) +let emit_bypasses core = + List.iter ( + fun (producer, consumers, latency, guard) -> + Printf.printf "(define_bypass %d \"%s_%s\"\n" + latency (coreStr core) producer; + + if guard = Guard_none then + Printf.printf " \"%s\")\n\n" consumers + else + begin + Printf.printf " \"%s\"\n" consumers; + Printf.printf " \"%s\")\n\n" (guard_fn guard) + end + ) + + +let calculate_per_core_availability_table core availability_table = + let table = calculate_sources availability_table in + let worst_cases, bypasses = worst_case_latencies_and_bypasses core table in + emit_insn_reservations core (List.rev worst_cases); + Printf.printf ";; Exceptions to the default latencies.\n\n"; + emit_bypasses core bypasses + +let calculate_core_availability_table core availability_table = +let filter_core = List.filter (fun (_, _, _, cores) + -> List.exists ((=) core) cores) +in calculate_per_core_availability_table core (filter_core availability_table) + + +(* Program entry point. *) +let main = + List.map (fun core -> calculate_core_availability_table + core availability_table) allCores diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml new file mode 100644 index 000000000..63fbbbf2c --- /dev/null +++ b/gcc/config/arm/neon-testgen.ml @@ -0,0 +1,283 @@ +(* Auto-generate ARM Neon intrinsics tests. + Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-testgen neon.cmo neon-testgen.ml + + Run with: + cd /path/to/gcc/testsuite/gcc.target/arm/neon + /path/to/neon-testgen +*) + +open Neon + +type c_type_flags = Pointer | Const + +(* Open a test source file. *) +let open_test_file dir name = + try + open_out (dir ^ "/" ^ name ^ ".c") + with Sys_error str -> + failwith ("Could not create test source file " ^ name ^ ": " ^ str) + +(* Emit prologue code to a test source file. *) +let emit_prologue chan test_name = + Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic. */\n" test_name; + Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n"; + Printf.fprintf chan "/* { dg-do assemble } */\n"; + Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n"; + Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n"; + Printf.fprintf chan "/* { dg-add-options arm_neon } */\n"; + Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"; + Printf.fprintf chan "void test_%s (void)\n{\n" test_name + +(* Emit declarations of local variables that are going to be passed + to an intrinsic, together with one to take a returned value if needed. *) +let emit_automatics chan c_types features = + let emit () = + ignore ( + List.fold_left (fun arg_number -> fun (flags, ty) -> + let pointer_bit = + if List.mem Pointer flags then "*" else "" + in + (* Const arguments to builtins are directly + written in as constants. *) + if not (List.mem Const flags) then + Printf.fprintf chan " %s %sarg%d_%s;\n" + ty pointer_bit arg_number ty; + arg_number + 1) + 0 (List.tl c_types)) + in + match c_types with + (_, return_ty) :: tys -> + if return_ty <> "void" then begin + (* The intrinsic returns a value. We need to do explict register + allocation for vget_low tests or they fail because of copy + elimination. *) + ((if List.mem Fixed_return_reg features then + Printf.fprintf chan " register %s out_%s asm (\"d18\");\n" + return_ty return_ty + else + Printf.fprintf chan " %s out_%s;\n" return_ty return_ty); + emit ()) + end else + (* The intrinsic does not return a value. *) + emit () + | _ -> assert false + +(* Emit code to call an intrinsic. *) +let emit_call chan const_valuator c_types name elt_ty = + (if snd (List.hd c_types) <> "void" then + Printf.fprintf chan " out_%s = " (snd (List.hd c_types)) + else + Printf.fprintf chan " "); + Printf.fprintf chan "%s_%s (" (intrinsic_name name) (string_of_elt elt_ty); + let print_arg chan arg_number (flags, ty) = + (* If the argument is of const type, then directly write in the + constant now. *) + if List.mem Const flags then + match const_valuator with + None -> + if List.mem Pointer flags then + Printf.fprintf chan "0" + else + Printf.fprintf chan "1" + | Some f -> Printf.fprintf chan "%s" (string_of_int (f arg_number)) + else + Printf.fprintf chan "arg%d_%s" arg_number ty + in + let rec print_args arg_number tys = + match tys with + [] -> () + | [ty] -> print_arg chan arg_number ty + | ty::tys -> + print_arg chan arg_number ty; + Printf.fprintf chan ", "; + print_args (arg_number + 1) tys + in + print_args 0 (List.tl c_types); + Printf.fprintf chan ");\n" + +(* Emit epilogue code to a test source file. *) +let emit_epilogue chan features regexps = + let no_op = List.exists (fun feature -> feature = No_op) features in + Printf.fprintf chan "}\n\n"; + (if not no_op then + List.iter (fun regexp -> + Printf.fprintf chan + "/* { dg-final { scan-assembler \"%s\" } } */\n" regexp) + regexps + else + () + ); + Printf.fprintf chan "/* { dg-final { cleanup-saved-temps } } */\n" + +(* Check a list of C types to determine which ones are pointers and which + ones are const. *) +let check_types tys = + let tys' = + List.map (fun ty -> + let len = String.length ty in + if len > 2 && String.get ty (len - 2) = ' ' + && String.get ty (len - 1) = '*' + then ([Pointer], String.sub ty 0 (len - 2)) + else ([], ty)) tys + in + List.map (fun (flags, ty) -> + if String.length ty > 6 && String.sub ty 0 6 = "const " + then (Const :: flags, String.sub ty 6 ((String.length ty) - 6)) + else (flags, ty)) tys' + +(* Given an intrinsic shape, produce a regexp that will match + the right-hand sides of instructions generated by an intrinsic of + that shape. *) +let rec analyze_shape shape = + let rec n_things n thing = + match n with + 0 -> [] + | n -> thing :: (n_things (n - 1) thing) + in + let rec analyze_shape_elt elt = + match elt with + Dreg -> "\\[dD\\]\\[0-9\\]+" + | Qreg -> "\\[qQ\\]\\[0-9\\]+" + | Corereg -> "\\[rR\\]\\[0-9\\]+" + | Immed -> "#\\[0-9\\]+" + | VecArray (1, elt) -> + let elt_regexp = analyze_shape_elt elt in + "((\\\\\\{" ^ elt_regexp ^ "\\\\\\})|(" ^ elt_regexp ^ "))" + | VecArray (n, elt) -> + let elt_regexp = analyze_shape_elt elt in + let alt1 = elt_regexp ^ "-" ^ elt_regexp in + let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in + "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}" + | (PtrTo elt | CstPtrTo elt) -> + "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]" + | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" + | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")" + in + match shape with + All (n, elt) -> commas analyze_shape_elt (n_things n elt) "" + | Long -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Dreg) ^ + ", " ^ (analyze_shape_elt Dreg) + | Long_noreg elt -> (analyze_shape_elt elt) ^ ", " ^ (analyze_shape_elt elt) + | Wide -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Qreg) ^ + ", " ^ (analyze_shape_elt Dreg) + | Wide_noreg elt -> analyze_shape (Long_noreg elt) + | Narrow -> (analyze_shape_elt Dreg) ^ ", " ^ (analyze_shape_elt Qreg) ^ + ", " ^ (analyze_shape_elt Qreg) + | Use_operands elts -> commas analyze_shape_elt (Array.to_list elts) "" + | By_scalar Dreg -> + analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) + | By_scalar Qreg -> + analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) + | By_scalar _ -> assert false + | Wide_lane -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Wide_scalar -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Pair_result elt -> + let elt_regexp = analyze_shape_elt elt in + elt_regexp ^ ", " ^ elt_regexp + | Unary_scalar _ -> "FIXME Unary_scalar" + | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) + | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) + | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) + +(* Generate tests for one intrinsic. *) +let test_intrinsic dir opcode features shape name munge elt_ty = + (* Open the test source file. *) + let test_name = name ^ (string_of_elt elt_ty) in + let chan = open_test_file dir test_name in + (* Work out what argument and return types the intrinsic has. *) + let c_arity, new_elt_ty = munge shape elt_ty in + let c_types = check_types (strings_of_arity c_arity) in + (* Extract any constant valuator (a function specifying what constant + values are to be written into the intrinsic call) from the features + list. *) + let const_valuator = + try + match (List.find (fun feature -> match feature with + Const_valuator _ -> true + | _ -> false) features) with + Const_valuator f -> Some f + | _ -> assert false + with Not_found -> None + in + (* Work out what instruction name(s) to expect. *) + let insns = get_insn_names features name in + let no_suffix = (new_elt_ty = NoElts) in + let insns = + if no_suffix then insns + else List.map (fun insn -> + let suffix = string_of_elt_dots new_elt_ty in + insn ^ "\\." ^ suffix) insns + in + (* Construct a regexp to match against the expected instruction name(s). *) + let insn_regexp = + match insns with + [] -> assert false + | [insn] -> insn + | _ -> + let rec calc_regexp insns cur_regexp = + match insns with + [] -> cur_regexp + | [insn] -> cur_regexp ^ "(" ^ insn ^ "))" + | insn::insns -> calc_regexp insns (cur_regexp ^ "(" ^ insn ^ ")|") + in calc_regexp insns "(" + in + (* Construct regexps to match against the instructions that this + intrinsic expands to. Watch out for any writeback character and + comments after the instruction. *) + let regexps = List.map (fun regexp -> insn_regexp ^ "\\[ \t\\]+" ^ regexp ^ + "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n") + (analyze_all_shapes features shape analyze_shape) + in + (* Emit file and function prologues. *) + emit_prologue chan test_name; + (* Emit local variable declarations. *) + emit_automatics chan c_types features; + Printf.fprintf chan "\n"; + (* Emit the call to the intrinsic. *) + emit_call chan const_valuator c_types name elt_ty; + (* Emit the function epilogue and the DejaGNU scan-assembler directives. *) + emit_epilogue chan features regexps; + (* Close the test file. *) + close_out chan + +(* Generate tests for one element of the "ops" table. *) +let test_intrinsic_group dir (opcode, features, shape, name, munge, types) = + List.iter (test_intrinsic dir opcode features shape name munge) types + +(* Program entry point. *) +let _ = + let directory = if Array.length Sys.argv <> 1 then Sys.argv.(1) else "." in + List.iter (test_intrinsic_group directory) (reinterp @ ops) + diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md new file mode 100644 index 000000000..247dc1ff4 --- /dev/null +++ b/gcc/config/arm/neon.md @@ -0,0 +1,5476 @@ +;; ARM NEON coprocessor Machine Description +;; Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Constants for unspecs. +(define_constants + [(UNSPEC_ASHIFT_SIGNED 65) + (UNSPEC_ASHIFT_UNSIGNED 66) + (UNSPEC_VABD 69) + (UNSPEC_VABDL 70) + (UNSPEC_VADD 72) + (UNSPEC_VADDHN 73) + (UNSPEC_VADDL 74) + (UNSPEC_VADDW 75) + (UNSPEC_VBSL 78) + (UNSPEC_VCAGE 79) + (UNSPEC_VCAGT 80) + (UNSPEC_VCEQ 81) + (UNSPEC_VCGE 82) + (UNSPEC_VCGT 83) + (UNSPEC_VCLS 84) + (UNSPEC_VCVT 88) + (UNSPEC_VCVT_N 89) + (UNSPEC_VEXT 93) + (UNSPEC_VHADD 97) + (UNSPEC_VHSUB 98) + (UNSPEC_VLD1 99) + (UNSPEC_VLD1_DUP 100) + (UNSPEC_VLD1_LANE 101) + (UNSPEC_VLD2 102) + (UNSPEC_VLD2_DUP 103) + (UNSPEC_VLD2_LANE 104) + (UNSPEC_VLD3 105) + (UNSPEC_VLD3A 106) + (UNSPEC_VLD3B 107) + (UNSPEC_VLD3_DUP 108) + (UNSPEC_VLD3_LANE 109) + (UNSPEC_VLD4 110) + (UNSPEC_VLD4A 111) + (UNSPEC_VLD4B 112) + (UNSPEC_VLD4_DUP 113) + (UNSPEC_VLD4_LANE 114) + (UNSPEC_VMAX 115) + (UNSPEC_VMIN 116) + (UNSPEC_VMLA 117) + (UNSPEC_VMLAL 118) + (UNSPEC_VMLA_LANE 119) + (UNSPEC_VMLAL_LANE 120) + (UNSPEC_VMLS 121) + (UNSPEC_VMLSL 122) + (UNSPEC_VMLS_LANE 123) + (UNSPEC_VMLSL_LANE 124) + (UNSPEC_VMOVL 125) + (UNSPEC_VMOVN 126) + (UNSPEC_VMUL 127) + (UNSPEC_VMULL 128) + (UNSPEC_VMUL_LANE 129) + (UNSPEC_VMULL_LANE 130) + (UNSPEC_VPADAL 135) + (UNSPEC_VPADD 136) + (UNSPEC_VPADDL 137) + (UNSPEC_VPMAX 138) + (UNSPEC_VPMIN 139) + (UNSPEC_VPSMAX 140) + (UNSPEC_VPSMIN 141) + (UNSPEC_VPUMAX 142) + (UNSPEC_VPUMIN 143) + (UNSPEC_VQABS 144) + (UNSPEC_VQADD 145) + (UNSPEC_VQDMLAL 146) + (UNSPEC_VQDMLAL_LANE 147) + (UNSPEC_VQDMLSL 148) + (UNSPEC_VQDMLSL_LANE 149) + (UNSPEC_VQDMULH 150) + (UNSPEC_VQDMULH_LANE 151) + (UNSPEC_VQDMULL 152) + (UNSPEC_VQDMULL_LANE 153) + (UNSPEC_VQMOVN 154) + (UNSPEC_VQMOVUN 155) + (UNSPEC_VQNEG 156) + (UNSPEC_VQSHL 157) + (UNSPEC_VQSHL_N 158) + (UNSPEC_VQSHLU_N 159) + (UNSPEC_VQSHRN_N 160) + (UNSPEC_VQSHRUN_N 161) + (UNSPEC_VQSUB 162) + (UNSPEC_VRECPE 163) + (UNSPEC_VRECPS 164) + (UNSPEC_VREV16 165) + (UNSPEC_VREV32 166) + (UNSPEC_VREV64 167) + (UNSPEC_VRSQRTE 168) + (UNSPEC_VRSQRTS 169) + (UNSPEC_VSHL 171) + (UNSPEC_VSHLL_N 172) + (UNSPEC_VSHL_N 173) + (UNSPEC_VSHR_N 174) + (UNSPEC_VSHRN_N 175) + (UNSPEC_VSLI 176) + (UNSPEC_VSRA_N 177) + (UNSPEC_VSRI 178) + (UNSPEC_VST1 179) + (UNSPEC_VST1_LANE 180) + (UNSPEC_VST2 181) + (UNSPEC_VST2_LANE 182) + (UNSPEC_VST3 183) + (UNSPEC_VST3A 184) + (UNSPEC_VST3B 185) + (UNSPEC_VST3_LANE 186) + (UNSPEC_VST4 187) + (UNSPEC_VST4A 188) + (UNSPEC_VST4B 189) + (UNSPEC_VST4_LANE 190) + (UNSPEC_VSTRUCTDUMMY 191) + (UNSPEC_VSUB 192) + (UNSPEC_VSUBHN 193) + (UNSPEC_VSUBL 194) + (UNSPEC_VSUBW 195) + (UNSPEC_VTBL 196) + (UNSPEC_VTBX 197) + (UNSPEC_VTRN1 198) + (UNSPEC_VTRN2 199) + (UNSPEC_VTST 200) + (UNSPEC_VUZP1 201) + (UNSPEC_VUZP2 202) + (UNSPEC_VZIP1 203) + (UNSPEC_VZIP2 204) + (UNSPEC_MISALIGNED_ACCESS 205) + (UNSPEC_VCLE 206) + (UNSPEC_VCLT 207)]) + + +;; Attribute used to permit string comparisons against in +;; neon_type attribute definitions. +(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) + +(define_insn "*neon_mov" + [(set (match_operand:VD 0 "nonimmediate_operand" + "=w,Uv,w, w, ?r,?w,?r,?r, ?Us") + (match_operand:VD 1 "general_operand" + " w,w, Dn,Uvi, w, r, r, Usi,r"))] + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + if (which_alternative == 2) + { + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_move (operands[1], mode, + &operands[1], &width); + + gcc_assert (is_valid != 0); + + if (width == 0) + return "vmov.f32\t%P0, %1 @ "; + else + sprintf (templ, "vmov.i%d\t%%P0, %%1 @ ", width); + + return templ; + } + + /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp + below must be changed to output_move_neon (which will use the + element/structure loads/stores), and the constraint changed to 'Um' instead + of 'Uv'. */ + + switch (which_alternative) + { + case 0: return "vmov\t%P0, %P1 @ "; + case 1: case 3: return output_move_vfp (operands); + case 2: gcc_unreachable (); + case 4: return "vmov\t%Q0, %R0, %P1 @ "; + case 5: return "vmov\t%P0, %Q1, %R1 @ "; + default: return output_move_double (operands); + } +} + [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*") + (set_attr "type" "*,f_stored,*,f_loadd,*,*,alu,load2,store2") + (set_attr "insn" "*,*,*,*,*,*,mov,*,*") + (set_attr "length" "4,4,4,4,4,4,8,8,8") + (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")]) + +(define_insn "*neon_mov" + [(set (match_operand:VQXMOV 0 "nonimmediate_operand" + "=w,Un,w, w, ?r,?w,?r,?r, ?Us") + (match_operand:VQXMOV 1 "general_operand" + " w,w, Dn,Uni, w, r, r, Usi, r"))] + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + if (which_alternative == 2) + { + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_move (operands[1], mode, + &operands[1], &width); + + gcc_assert (is_valid != 0); + + if (width == 0) + return "vmov.f32\t%q0, %1 @ "; + else + sprintf (templ, "vmov.i%d\t%%q0, %%1 @ ", width); + + return templ; + } + + switch (which_alternative) + { + case 0: return "vmov\t%q0, %q1 @ "; + case 1: case 3: return output_move_neon (operands); + case 2: gcc_unreachable (); + case 4: return "vmov\t%Q0, %R0, %e1 @ \;vmov\t%J0, %K0, %f1"; + case 5: return "vmov\t%e0, %Q1, %R1 @ \;vmov\t%f0, %J1, %K1"; + default: return output_move_quad (operands); + } +} + [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\ + neon_mrrc,neon_mcr_2_mcrr,*,*,*") + (set_attr "type" "*,*,*,*,*,*,alu,load4,store4") + (set_attr "insn" "*,*,*,*,*,*,mov,*,*") + (set_attr "length" "4,8,4,8,8,8,16,8,16") + (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_NEON" +{ + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (TImode, operands[1]); + } +}) + +(define_expand "mov" + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") + (match_operand:VSTRUCT 1 "general_operand" ""))] + "TARGET_NEON" +{ + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (mode, operands[1]); + } +}) + +(define_insn "*neon_mov" + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") + (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + switch (which_alternative) + { + case 0: return "#"; + case 1: case 2: return output_move_neon (operands); + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2") + (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) + +(define_split + [(set (match_operand:EI 0 "s_register_operand" "") + (match_operand:EI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (DImode, rdest + 4); + src[1] = gen_rtx_REG (DImode, rsrc + 4); + + neon_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:OI 0 "s_register_operand" "") + (match_operand:OI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (TImode, rdest + 4); + src[1] = gen_rtx_REG (TImode, rsrc + 4); + + neon_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:CI 0 "s_register_operand" "") + (match_operand:CI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[3], src[3]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (TImode, rdest + 4); + src[1] = gen_rtx_REG (TImode, rsrc + 4); + dest[2] = gen_rtx_REG (TImode, rdest + 8); + src[2] = gen_rtx_REG (TImode, rsrc + 8); + + neon_disambiguate_copy (operands, dest, src, 3); +}) + +(define_split + [(set (match_operand:XI 0 "s_register_operand" "") + (match_operand:XI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 6) (match_dup 7))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[4], src[4]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (TImode, rdest + 4); + src[1] = gen_rtx_REG (TImode, rsrc + 4); + dest[2] = gen_rtx_REG (TImode, rdest + 8); + src[2] = gen_rtx_REG (TImode, rsrc + 8); + dest[3] = gen_rtx_REG (TImode, rdest + 12); + src[3] = gen_rtx_REG (TImode, rsrc + 12); + + neon_disambiguate_copy (operands, dest, src, 4); +}) + +(define_expand "movmisalign" + [(set (match_operand:VDQX 0 "nonimmediate_operand" "") + (unspec:VDQX [(match_operand:VDQX 1 "general_operand" "")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + /* This pattern is not permitted to fail during expansion: if both arguments + are non-registers (e.g. memory := constant, which can be created by the + auto-vectorizer), force operand 1 into a register. */ + if (!s_register_operand (operands[0], mode) + && !s_register_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); +}) + +(define_insn "*movmisalign_neon_store" + [(set (match_operand:VDX 0 "memory_operand" "=Um") + (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vst1.\t{%P1}, %A0" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + +(define_insn "*movmisalign_neon_load" + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(match_operand:VDX 1 "memory_operand" " Um")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vld1.\t{%P0}, %A1" + [(set_attr "neon_type" "neon_vld1_1_2_regs")]) + +(define_insn "*movmisalign_neon_store" + [(set (match_operand:VQX 0 "memory_operand" "=Um") + (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vst1.\t{%q1}, %A0" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + +(define_insn "*movmisalign_neon_load" + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(match_operand:VQX 1 "memory_operand" " Um")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vld1.\t{%q0}, %A1" + [(set_attr "neon_type" "neon_vld1_1_2_regs")]) + +(define_insn "vec_set_internal" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (vec_merge:VD + (vec_duplicate:VD + (match_operand: 1 "s_register_operand" "r")) + (match_operand:VD 3 "s_register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "TARGET_NEON" +{ + int elt = ffs ((int) INTVAL (operands[2])) - 1; + if (BYTES_BIG_ENDIAN) + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + + return "vmov%?.\t%P0[%c2], %1"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_mcr")]) + +(define_insn "vec_set_internal" + [(set (match_operand:VQ 0 "s_register_operand" "=w") + (vec_merge:VQ + (vec_duplicate:VQ + (match_operand: 1 "s_register_operand" "r")) + (match_operand:VQ 3 "s_register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "TARGET_NEON" +{ + HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; + int half_elts = GET_MODE_NUNITS (mode) / 2; + int elt = elem % half_elts; + int hi = (elem / half_elts) * 2; + int regno = REGNO (operands[0]); + + if (BYTES_BIG_ENDIAN) + elt = half_elts - 1 - elt; + + operands[0] = gen_rtx_REG (mode, regno + hi); + operands[2] = GEN_INT (elt); + + return "vmov%?.\t%P0[%c2], %1"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_mcr")] +) + +(define_insn "vec_setv2di_internal" + [(set (match_operand:V2DI 0 "s_register_operand" "=w") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "s_register_operand" "r")) + (match_operand:V2DI 3 "s_register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "TARGET_NEON" +{ + HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; + int regno = REGNO (operands[0]) + 2 * elem; + + operands[0] = gen_rtx_REG (DImode, regno); + + return "vmov%?\t%P0, %Q1, %R1"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_mcr_2_mcrr")] +) + +(define_expand "vec_set" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_vec_set_internal (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; +}) + +(define_insn "vec_extract" + [(set (match_operand: 0 "s_register_operand" "=r") + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov%?.\t%0, %P1[%c2]"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "vec_extract" + [(set (match_operand: 0 "s_register_operand" "=r") + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + "TARGET_NEON" +{ + int half_elts = GET_MODE_NUNITS (mode) / 2; + int elt = INTVAL (operands[2]) % half_elts; + int hi = (INTVAL (operands[2]) / half_elts) * 2; + int regno = REGNO (operands[1]); + + if (BYTES_BIG_ENDIAN) + elt = half_elts - 1 - elt; + + operands[1] = gen_rtx_REG (mode, regno + hi); + operands[2] = GEN_INT (elt); + + return "vmov%?.\t%0, %P1[%c2]"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "vec_extractv2di" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (vec_select:DI + (match_operand:V2DI 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); + + operands[1] = gen_rtx_REG (DImode, regno); + + return "vmov%?\t%Q0, %R0, %P1 @ v2di"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_int_1")] +) + +(define_expand "vec_init" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand 1 "" "")] + "TARGET_NEON" +{ + neon_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;; Doubleword and quadword arithmetic. + +;; NOTE: some other instructions also support 64-bit integer +;; element size, which we could potentially use for "long long" operations. + +(define_insn "*add3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vadd.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_1")))] +) + +(define_insn "adddi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0") + (match_operand:DI 2 "s_register_operand" "w,r,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vadd.i64\t%P0, %P1, %P2"; + case 1: return "#"; + case 2: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "conds" "*,clob,clob") + (set_attr "length" "*,8,8")] +) + +(define_insn "*sub3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vsub.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_2")))] +) + +(define_insn "subdi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0") + (match_operand:DI 2 "s_register_operand" "w,r,0,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vsub.i64\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 2: /* fall through */ + case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_2,*,*,*") + (set_attr "conds" "*,clob,clob,clob") + (set_attr "length" "*,8,8,8")] +) + +(define_insn "*mul3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vmul.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qqq_8_16_32_ddd_32")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] +) + +(define_insn "mul3add_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")) + (match_operand:VDQ 1 "s_register_operand" "0")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vmla.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) + +(define_insn "mul3negadd_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0") + (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w"))))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vmls.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) + +(define_insn "ior3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") + (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") + (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vorr\t%0, %1, %2"; + case 1: return neon_output_logic_immediate ("vorr", &operands[2], + mode, 0, VALID_NEON_QREG_MODE (mode)); + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "iordi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") + (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r")))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vorr\t%P0, %P1, %P2"; + case 1: return neon_output_logic_immediate ("vorr", &operands[2], + DImode, 0, VALID_NEON_QREG_MODE (DImode)); + case 2: return "#"; + case 3: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,8,8")] +) + +;; The concrete forms of the Neon immediate-logic instructions are vbic and +;; vorr. We support the pseudo-instruction vand instead, because that +;; corresponds to the canonical form the middle-end expects to use for +;; immediate bitwise-ANDs. + +(define_insn "and3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") + (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") + (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vand\t%0, %1, %2"; + case 1: return neon_output_logic_immediate ("vand", &operands[2], + mode, 1, VALID_NEON_QREG_MODE (mode)); + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "anddi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") + (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r")))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vand\t%P0, %P1, %P2"; + case 1: return neon_output_logic_immediate ("vand", &operands[2], + DImode, 1, VALID_NEON_QREG_MODE (DImode)); + case 2: return "#"; + case 3: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,8,8")] +) + +(define_insn "orn3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))] + "TARGET_NEON" + "vorn\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "orndi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r") + (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0") + (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))] + "TARGET_NEON" + "@ + vorn\t%P0, %P1, %P2 + # + #" + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "length" "*,8,8")] +) + +(define_insn "bic3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))] + "TARGET_NEON" + "vbic\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_1")] +) + +;; Compare to *anddi_notdi_di. +(define_insn "bicdi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r") + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0")) + (match_operand:DI 1 "s_register_operand" "w,0,r")))] + "TARGET_NEON" + "@ + vbic\t%P0, %P1, %P2 + # + #" + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "length" "*,8,8")] +) + +(define_insn "xor3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON" + "veor\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "xordi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") + (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r") + (match_operand:DI 2 "s_register_operand" "w,r,r")))] + "TARGET_NEON" + "@ + veor\t%P0, %P1, %P2 + # + #" + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "length" "*,8,8")] +) + +(define_insn "one_cmpl2" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vmvn\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "abs2" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vabs.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_3")))] +) + +(define_insn "neg2" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vneg.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_3")))] +) + +(define_insn "*umin3_neon" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmin.\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "*umax3_neon" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmax.\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "*smin3_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmin.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) + +(define_insn "*smax3_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmax.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) + +; TODO: V2DI shifts are current disabled because there are bugs in the +; generic vectorizer code. It ends up creating a V2DI constructor with +; SImode elements. + +(define_insn "vashl3" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vshl.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) + +; Used for implementing logical shift-right, which is a left-shift by a negative +; amount, with signed operands. This is essentially the same as ashl3 +; above, but using an unspec in case GCC tries anything tricky with negative +; shift amounts. + +(define_insn "ashl3_signed" + [(set (match_operand:VDQI 0 "s_register_operand" "=w") + (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") + (match_operand:VDQI 2 "s_register_operand" "w")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_NEON" + "vshl.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) + +; Used for implementing logical shift-right, which is a left-shift by a negative +; amount, with unsigned operands. + +(define_insn "ashl3_unsigned" + [(set (match_operand:VDQI 0 "s_register_operand" "=w") + (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") + (match_operand:VDQI 2 "s_register_operand" "w")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_NEON" + "vshl.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) + +(define_expand "vashr3" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "s_register_operand" "")))] + "TARGET_NEON" +{ + rtx neg = gen_reg_rtx (mode); + + emit_insn (gen_neg2 (neg, operands[2])); + emit_insn (gen_ashl3_signed (operands[0], operands[1], neg)); + + DONE; +}) + +(define_expand "vlshr3" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "s_register_operand" "")))] + "TARGET_NEON" +{ + rtx neg = gen_reg_rtx (mode); + + emit_insn (gen_neg2 (neg, operands[2])); + emit_insn (gen_ashl3_unsigned (operands[0], operands[1], neg)); + + DONE; +}) + +;; Widening operations + +(define_insn "widen_ssum3" + [(set (match_operand: 0 "s_register_operand" "=w") + (plus: (sign_extend: + (match_operand:VW 1 "s_register_operand" "%w")) + (match_operand: 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vaddw.\t%q0, %q2, %P1" + [(set_attr "neon_type" "neon_int_3")] +) + +(define_insn "widen_usum3" + [(set (match_operand: 0 "s_register_operand" "=w") + (plus: (zero_extend: + (match_operand:VW 1 "s_register_operand" "%w")) + (match_operand: 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vaddw.\t%q0, %q2, %P1" + [(set_attr "neon_type" "neon_int_3")] +) + +;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit +;; shift-count granularity. That's good enough for the middle-end's current +;; needs. + +(define_expand "vec_shr_" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand:VDQ 1 "s_register_operand" "") + (match_operand:SI 2 "const_multiple_of_8_operand" "")] + "TARGET_NEON" +{ + rtx zero_reg; + HOST_WIDE_INT num_bits = INTVAL (operands[2]); + const int width = GET_MODE_BITSIZE (mode); + const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; + rtx (*gen_ext) (rtx, rtx, rtx, rtx) = + (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; + + if (num_bits == width) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); + operands[0] = gen_lowpart (bvecmode, operands[0]); + operands[1] = gen_lowpart (bvecmode, operands[1]); + + emit_insn (gen_ext (operands[0], operands[1], zero_reg, + GEN_INT (num_bits / BITS_PER_UNIT))); + DONE; +}) + +(define_expand "vec_shl_" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand:VDQ 1 "s_register_operand" "") + (match_operand:SI 2 "const_multiple_of_8_operand" "")] + "TARGET_NEON" +{ + rtx zero_reg; + HOST_WIDE_INT num_bits = INTVAL (operands[2]); + const int width = GET_MODE_BITSIZE (mode); + const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; + rtx (*gen_ext) (rtx, rtx, rtx, rtx) = + (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; + + if (num_bits == 0) + { + emit_move_insn (operands[0], CONST0_RTX (mode)); + DONE; + } + + num_bits = width - num_bits; + + zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); + operands[0] = gen_lowpart (bvecmode, operands[0]); + operands[1] = gen_lowpart (bvecmode, operands[1]); + + emit_insn (gen_ext (operands[0], zero_reg, operands[1], + GEN_INT (num_bits / BITS_PER_UNIT))); + DONE; +}) + +;; Helpers for quad-word reduction operations + +; Add (or smin, smax...) the low N/2 elements of the N-element vector +; operand[1] to the high N/2 elements of same. Put the result in operand[0], an +; N/2-element vector. + +(define_insn "quad_halves_v4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") + (vqh_ops:V2SI + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2SI (match_dup 1) + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_NEON" + ".32\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) + +(define_insn "quad_halves_v4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (vqhs_ops:V2SF + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2SF (match_dup 1) + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_NEON && flag_unsafe_math_optimizations" + ".f32\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) + +(define_insn "quad_halves_v8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "+w") + (vqh_ops:V4HI + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) + (vec_select:V4HI (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] + "TARGET_NEON" + ".16\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) + +(define_insn "quad_halves_v16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "+w") + (vqh_ops:V8QI + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])) + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)]))))] + "TARGET_NEON" + ".8\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set (attr "neon_type") + (if_then_else (eq_attr "vqh_mnem" "vadd") + (const_string "neon_int_1") (const_string "neon_int_5")))] +) + +; FIXME: We wouldn't need the following insns if we could write subregs of +; vector registers. Make an attempt at removing unnecessary moves, though +; we're really at the mercy of the register allocator. + +(define_insn "neon_move_lo_quad_" + [(set (match_operand:ANY128 0 "s_register_operand" "+w") + (vec_concat:ANY128 + (match_operand: 1 "s_register_operand" "w") + (vec_select: + (match_dup 0) + (match_operand:ANY128 2 "vect_par_constant_high" ""))))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%e0, %P1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_move_hi_quad_" + [(set (match_operand:ANY128 0 "s_register_operand" "+w") + (vec_concat:ANY128 + (vec_select: + (match_dup 0) + (match_operand:ANY128 2 "vect_par_constant_low" "")) + (match_operand: 1 "s_register_operand" "w")))] + + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%f0, %P1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_expand "move_hi_quad_" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "")] + "TARGET_NEON" +{ + rtvec v = rtvec_alloc (/2); + rtx t1; + int i; + + for (i=0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT (i); + + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_move_hi_quad_ (operands[0], operands[1], t1)); + + DONE; +}) + +(define_expand "move_lo_quad_" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "")] + "TARGET_NEON" +{ + rtvec v = rtvec_alloc (/2); + rtx t1; + int i; + + for (i=0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((/2) + i); + + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_move_lo_quad_ (operands[0], operands[1], t1)); + + DONE; +}) + +;; Reduction operations + +(define_expand "reduc_splus_" + [(match_operand:VD 0 "s_register_operand" "") + (match_operand:VD 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpadd_internal); + DONE; +}) + +(define_expand "reduc_splus_" + [(match_operand:VQ 0 "s_register_operand" "") + (match_operand:VQ 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_plus (step1, operands[1])); + emit_insn (gen_reduc_splus_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_insn "reduc_splus_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=w") + (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] + UNSPEC_VPADD))] + "TARGET_NEON" + "vadd.i64\t%e0, %e1, %f1" + [(set_attr "neon_type" "neon_int_1")] +) + +;; NEON does not distinguish between signed and unsigned addition except on +;; widening operations. +(define_expand "reduc_uplus_" + [(match_operand:VDQI 0 "s_register_operand" "") + (match_operand:VDQI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_reduc_splus_ (operands[0], operands[1])); + DONE; +}) + +(define_expand "reduc_smin_" + [(match_operand:VD 0 "s_register_operand" "") + (match_operand:VD 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpsmin); + DONE; +}) + +(define_expand "reduc_smin_" + [(match_operand:VQ 0 "s_register_operand" "") + (match_operand:VQ 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_smin (step1, operands[1])); + emit_insn (gen_reduc_smin_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_expand "reduc_smax_" + [(match_operand:VD 0 "s_register_operand" "") + (match_operand:VD 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpsmax); + DONE; +}) + +(define_expand "reduc_smax_" + [(match_operand:VQ 0 "s_register_operand" "") + (match_operand:VQ 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_smax (step1, operands[1])); + emit_insn (gen_reduc_smax_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_expand "reduc_umin_" + [(match_operand:VDI 0 "s_register_operand" "") + (match_operand:VDI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpumin); + DONE; +}) + +(define_expand "reduc_umin_" + [(match_operand:VQI 0 "s_register_operand" "") + (match_operand:VQI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_umin (step1, operands[1])); + emit_insn (gen_reduc_umin_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_expand "reduc_umax_" + [(match_operand:VDI 0 "s_register_operand" "") + (match_operand:VDI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpumax); + DONE; +}) + +(define_expand "reduc_umax_" + [(match_operand:VQI 0 "s_register_operand" "") + (match_operand:VQI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_umax (step1, operands[1])); + emit_insn (gen_reduc_umax_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_insn "neon_vpadd_internal" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")] + UNSPEC_VPADD))] + "TARGET_NEON" + "vpadd.\t%P0, %P1, %P2" + ;; Assume this schedules like vadd. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_1")))] +) + +(define_insn "neon_vpsmin" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")] + UNSPEC_VPSMIN))] + "TARGET_NEON" + "vpmin.\t%P0, %P1, %P2" + ;; Assume this schedules like vmin. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) + +(define_insn "neon_vpsmax" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")] + UNSPEC_VPSMAX))] + "TARGET_NEON" + "vpmax.\t%P0, %P1, %P2" + ;; Assume this schedules like vmax. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) + +(define_insn "neon_vpumin" + [(set (match_operand:VDI 0 "s_register_operand" "=w") + (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w")] + UNSPEC_VPUMIN))] + "TARGET_NEON" + "vpmin.\t%P0, %P1, %P2" + ;; Assume this schedules like umin. + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "neon_vpumax" + [(set (match_operand:VDI 0 "s_register_operand" "=w") + (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w")] + UNSPEC_VPUMAX))] + "TARGET_NEON" + "vpmax.\t%P0, %P1, %P2" + ;; Assume this schedules like umax. + [(set_attr "neon_type" "neon_int_5")] +) + +;; Saturating arithmetic + +; NOTE: Neon supports many more saturating variants of instructions than the +; following, but these are all GCC currently understands. +; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself +; yet either, although these patterns may be used by intrinsics when they're +; added. + +(define_insn "*ss_add_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqadd.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_4")] +) + +(define_insn "*us_add_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqadd.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_4")] +) + +(define_insn "*ss_sub_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqsub.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "*us_sub_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqsub.\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_5")] +) + +;; Conditional instructions. These are comparisons with conditional moves for +;; vectors. They perform the assignment: +;; +;; Vop0 = (Vop4 Vop5) ? Vop1 : Vop2; +;; +;; where op3 is <, <=, ==, !=, >= or >. Operations are performed +;; element-wise. + +(define_expand "vcond" + [(set (match_operand:VDQW 0 "s_register_operand" "") + (if_then_else:VDQW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQW 4 "s_register_operand" "") + (match_operand:VDQW 5 "nonmemory_operand" "")]) + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + /* See the description of "magic" bits in the 'T' case of + arm_print_operand. */ + HOST_WIDE_INT magic_word = (mode == V2SFmode || mode == V4SFmode) + ? 3 : 1; + rtx magic_rtx = GEN_INT (magic_word); + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GE: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + magic_rtx)); + break; + + case GT: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + magic_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + magic_rtx)); + break; + + case LE: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + magic_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + magic_rtx)); + break; + + case LT: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + magic_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + magic_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + magic_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + +(define_expand "vcondu" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (if_then_else:VDQIW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQIW 4 "s_register_operand" "") + (match_operand:VDQIW 5 "s_register_operand" "")]) + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "s_register_operand" "")))] + "TARGET_NEON" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GEU: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + const0_rtx)); + break; + + case GTU: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + const0_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + break; + + case LEU: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + const0_rtx)); + break; + + case LTU: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + const0_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + +;; Patterns for builtins. + +; good for plain vadd, vaddq. + +(define_expand "neon_vadd" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_add3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vadd_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Note that NEON operations don't support the full IEEE 754 standard: in +; particular, denormal values are flushed to zero. This means that GCC cannot +; use those instructions for autovectorization, etc. unless +; -funsafe-math-optimizations is in effect (in which case flush-to-zero +; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h +; header) must work in either case: if -funsafe-math-optimizations is given, +; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics +; expand to unspecs (which may potentially limit the extent to which they might +; be optimized by generic code). + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vadd_unspec" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w")] + UNSPEC_VADD))] + "TARGET_NEON" + "vadd.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_1")))] +) + +; operand 3 represents in bits: +; bit 0: signed (vs unsigned). +; bit 1: rounding (vs none). + +(define_insn "neon_vaddl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VADDL))] + "TARGET_NEON" + "vaddl.%T3%#\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_int_3")] +) + +(define_insn "neon_vaddw" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VADDW))] + "TARGET_NEON" + "vaddw.%T3%#\t%q0, %q1, %P2" + [(set_attr "neon_type" "neon_int_2")] +) + +; vhadd and vrhadd. + +(define_insn "neon_vhadd" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VHADD))] + "TARGET_NEON" + "v%O3hadd.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_4")] +) + +(define_insn "neon_vqadd" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQADD))] + "TARGET_NEON" + "vqadd.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_4")] +) + +(define_insn "neon_vaddhn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:VN 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VADDHN))] + "TARGET_NEON" + "v%O3addhn.\t%P0, %q1, %q2" + [(set_attr "neon_type" "neon_int_4")] +) + +;; We cannot replace this unspec with mul3 because of the odd +;; polynomial multiplication case that can specified by operand 3. +(define_insn "neon_vmul" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMUL))] + "TARGET_NEON" + "vmul.%F3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qqq_8_16_32_ddd_32")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] +) + +(define_expand "neon_vmla" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_mul3add_neon (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_neon_vmla_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmla_unspec" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0") + (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")] + UNSPEC_VMLA))] + "TARGET_NEON" + "vmla.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) + +(define_insn "neon_vmlal" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMLAL))] + "TARGET_NEON" + "vmlal.%T4%#\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_expand "neon_vmls" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_mul3negadd_neon (operands[0], + operands[1], operands[2], operands[3])); + else + emit_insn (gen_neon_vmls_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmls_unspec" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0") + (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")] + UNSPEC_VMLS))] + "TARGET_NEON" + "vmls.\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd") + (const_string "neon_fp_vmla_qqq")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + (if_then_else + (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_qqq_8_16") + (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] +) + +(define_insn "neon_vmlsl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMLSL))] + "TARGET_NEON" + "vmlsl.%T4%#\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_insn "neon_vqdmulh" + [(set (match_operand:VMDQI 0 "s_register_operand" "=w") + (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") + (match_operand:VMDQI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQDMULH))] + "TARGET_NEON" + "vq%O3dmulh.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qqq_8_16_32_ddd_32")) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (const_string "neon_mul_qqq_8_16_32_ddd_32"))))] +) + +(define_insn "neon_vqdmlal" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMLAL))] + "TARGET_NEON" + "vqdmlal.\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_insn "neon_vqdmlsl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMLSL))] + "TARGET_NEON" + "vqdmlsl.\t%q0, %P2, %P3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_insn "neon_vmull" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMULL))] + "TARGET_NEON" + "vmull.%T3%#\t%q0, %P1, %P2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) + +(define_insn "neon_vqdmull" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQDMULL))] + "TARGET_NEON" + "vqdmull.\t%q0, %P1, %P2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) + +(define_expand "neon_vsub" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_sub3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vsub_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vsub_unspec" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w")] + UNSPEC_VSUB))] + "TARGET_NEON" + "vsub.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_2")))] +) + +(define_insn "neon_vsubl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSUBL))] + "TARGET_NEON" + "vsubl.%T3%#\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_int_2")] +) + +(define_insn "neon_vsubw" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSUBW))] + "TARGET_NEON" + "vsubw.%T3%#\t%q0, %q1, %P2" + [(set_attr "neon_type" "neon_int_2")] +) + +(define_insn "neon_vqsub" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSUB))] + "TARGET_NEON" + "vqsub.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "neon_vhsub" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VHSUB))] + "TARGET_NEON" + "vhsub.%T3%#\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "neon_vsubhn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:VN 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSUBHN))] + "TARGET_NEON" + "v%O3subhn.\t%P0, %q1, %q2" + [(set_attr "neon_type" "neon_int_4")] +) + +(define_insn "neon_vceq" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCEQ))] + "TARGET_NEON" + "@ + vceq.\t%0, %1, %2 + vceq.\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vcge" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGE))] + "TARGET_NEON" + "@ + vcge.%T3%#\t%0, %1, %2 + vcge.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vcgt" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "nonmemory_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGT))] + "TARGET_NEON" + "@ + vcgt.%T3%#\t%0, %1, %2 + vcgt.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +;; VCLE and VCLT only support comparisons with immediate zero (register +;; variants are VCGE and VCGT with operands reversed). + +(define_insn "neon_vcle" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "nonmemory_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLE))] + "TARGET_NEON" + "vcle.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vclt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "nonmemory_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLT))] + "TARGET_NEON" + "vclt.%T3%#\t%0, %1, #0" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vcage" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCAGE))] + "TARGET_NEON" + "vacge.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vcagt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCAGT))] + "TARGET_NEON" + "vacgt.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vtst" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VTST))] + "TARGET_NEON" + "vtst.\t%0, %1, %2" + [(set_attr "neon_type" "neon_int_4")] +) + +(define_insn "neon_vabd" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VABD))] + "TARGET_NEON" + "vabd.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vabdl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VABDL))] + "TARGET_NEON" + "vabdl.%T3%#\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_int_5")] +) + +(define_insn "neon_vaba" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (plus:VDQIW (match_operand:VDQIW 1 "s_register_operand" "0") + (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:VDQIW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABD)))] + "TARGET_NEON" + "vaba.%T4%#\t%0, %2, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vaba") (const_string "neon_vaba_qqq")))] +) + +(define_insn "neon_vabal" + [(set (match_operand: 0 "s_register_operand" "=w") + (plus: (match_operand: 1 "s_register_operand" "0") + (unspec: [(match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABDL)))] + "TARGET_NEON" + "vabal.%T4%#\t%q0, %P2, %P3" + [(set_attr "neon_type" "neon_vaba")] +) + +(define_insn "neon_vmax" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMAX))] + "TARGET_NEON" + "vmax.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_insn "neon_vmin" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMIN))] + "TARGET_NEON" + "vmin.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")) + (const_string "neon_int_5")))] +) + +(define_expand "neon_vpadd" + [(match_operand:VD 0 "s_register_operand" "=w") + (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + emit_insn (gen_neon_vpadd_internal (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_insn "neon_vpaddl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VPADDL))] + "TARGET_NEON" + "vpaddl.%T2%#\t%0, %1" + ;; Assume this schedules like vaddl. + [(set_attr "neon_type" "neon_int_3")] +) + +(define_insn "neon_vpadal" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VPADAL))] + "TARGET_NEON" + "vpadal.%T3%#\t%0, %2" + ;; Assume this schedules like vpadd. + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "neon_vpmax" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VPMAX))] + "TARGET_NEON" + "vpmax.%T3%#\t%0, %1, %2" + ;; Assume this schedules like vmax. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) + +(define_insn "neon_vpmin" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VPMIN))] + "TARGET_NEON" + "vpmin.%T3%#\t%0, %1, %2" + ;; Assume this schedules like vmin. + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_int_5")))] +) + +(define_insn "neon_vrecps" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VRECPS))] + "TARGET_NEON" + "vrecps.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vrecps_vrsqrts_ddd") + (const_string "neon_fp_vrecps_vrsqrts_qqq")))] +) + +(define_insn "neon_vrsqrts" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VRSQRTS))] + "TARGET_NEON" + "vrsqrts.\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vrecps_vrsqrts_ddd") + (const_string "neon_fp_vrecps_vrsqrts_qqq")))] +) + +(define_expand "neon_vabs" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_abs2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vqabs" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQABS))] + "TARGET_NEON" + "vqabs.\t%0, %1" + [(set_attr "neon_type" "neon_vqneg_vqabs")] +) + +(define_expand "neon_vneg" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_neg2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vqneg" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQNEG))] + "TARGET_NEON" + "vqneg.\t%0, %1" + [(set_attr "neon_type" "neon_vqneg_vqabs")] +) + +(define_insn "neon_vcls" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VCLS))] + "TARGET_NEON" + "vcls.\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_insn "clz2" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vclz.\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_expand "neon_vclz" + [(match_operand:VDQIW 0 "s_register_operand" "") + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_clz2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "popcount2" + [(set (match_operand:VE 0 "s_register_operand" "=w") + (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcnt.\t%0, %1" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_expand "neon_vcnt" + [(match_operand:VE 0 "s_register_operand" "=w") + (match_operand:VE 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + emit_insn (gen_popcount2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vrecpe" + [(set (match_operand:V32 0 "s_register_operand" "=w") + (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VRECPE))] + "TARGET_NEON" + "vrecpe.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vrsqrte" + [(set (match_operand:V32 0 "s_register_operand" "=w") + (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VRSQRTE))] + "TARGET_NEON" + "vrsqrte.\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_expand "neon_vmvn" + [(match_operand:VDQIW 0 "s_register_operand" "") + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_one_cmpl2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vget_lane_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov%?.s\t%0, %P1[%c2]"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lane_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov%?.u\t%0, %P1[%c2]"; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lane_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; + unsigned int elt = INTVAL (operands[2]); + unsigned int elt_adj = elt % halfelts; + + if (BYTES_BIG_ENDIAN) + elt_adj = halfelts - 1 - elt_adj; + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt_adj); + output_asm_insn ("vmov%?.s\t%0, %P1[%c2]", ops); + + return ""; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lane_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; + unsigned int elt = INTVAL (operands[2]); + unsigned int elt_adj = elt % halfelts; + + if (BYTES_BIG_ENDIAN) + elt_adj = halfelts - 1 - elt_adj; + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt_adj); + output_asm_insn ("vmov%?.u\t%0, %P1[%c2]", ops); + + return ""; +} + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_expand "neon_vget_lane" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + HOST_WIDE_INT magic = INTVAL (operands[3]); + rtx insn; + + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); + + if (BYTES_BIG_ENDIAN) + { + /* The intrinsics are defined in terms of a model where the + element ordering in memory is vldm order, whereas the generic + RTL is defined in terms of a model where the element ordering + in memory is array order. Convert the lane number to conform + to this model. */ + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + + if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (mode)) == 32) + insn = gen_vec_extract (operands[0], operands[1], operands[2]); + else + { + if ((magic & 1) != 0) + insn = gen_neon_vget_lane_sext_internal (operands[0], operands[1], + operands[2]); + else + insn = gen_neon_vget_lane_zext_internal (operands[0], operands[1], + operands[2]); + } + emit_insn (insn); + DONE; +}) + +; Operand 3 (info word) is ignored because it does nothing useful with 64-bit +; elements. + +(define_expand "neon_vget_lanedi" + [(match_operand:DI 0 "s_register_operand" "=r") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vget_lanev2di" + [(match_operand:DI 0 "s_register_operand" "=r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 2); + emit_insn (gen_vec_extractv2di (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vset_lane" + [(match_operand:VDQ 0 "s_register_operand" "=w") + (match_operand: 1 "s_register_operand" "r") + (match_operand:VDQ 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + unsigned int elt = INTVAL (operands[3]); + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + + if (BYTES_BIG_ENDIAN) + { + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + elt ^= reg_nelts - 1; + } + + emit_insn (gen_vec_set_internal (operands[0], operands[1], + GEN_INT (1 << elt), operands[2])); + DONE; +}) + +; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. + +(define_expand "neon_vset_lanedi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "r") + (match_operand:DI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vcreate" + [(match_operand:VDX 0 "s_register_operand" "") + (match_operand:DI 1 "general_operand" "")] + "TARGET_NEON" +{ + rtx src = gen_lowpart (mode, operands[1]); + emit_move_insn (operands[0], src); + DONE; +}) + +(define_insn "neon_vdup_n" + [(set (match_operand:VX 0 "s_register_operand" "=w") + (vec_duplicate:VX (match_operand: 1 "s_register_operand" "r")))] + "TARGET_NEON" + "vdup%?.\t%0, %1" + ;; Assume this schedules like vmov. + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vdup_n" + [(set (match_operand:V32 0 "s_register_operand" "=w,w") + (vec_duplicate:V32 (match_operand: 1 "s_register_operand" "r,t")))] + "TARGET_NEON" + "@ + vdup%?.\t%0, %1 + vdup%?.\t%0, %y1" + ;; Assume this schedules like vmov. + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_expand "neon_vdup_ndi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "r")] + "TARGET_NEON" +{ + emit_move_insn (operands[0], operands[1]); + DONE; +} +) + +(define_insn "neon_vdup_nv2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") + (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] + "TARGET_NEON" + "@ + vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1 + vmov%?\t%e0, %P1\;vmov%?\t%f0, %P1" + [(set_attr "predicable" "yes") + (set_attr "length" "8") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vdup_lane_internal" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (vec_duplicate:VDQW + (vec_select: + (match_operand: 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + if () + return "vdup.\t%P0, %P1[%c2]"; + else + return "vdup.\t%q0, %P1[%c2]"; +} + ;; Assume this schedules like vmov. + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_expand "neon_vdup_lane" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand: 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); + if (BYTES_BIG_ENDIAN) + { + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + emit_insn (gen_neon_vdup_lane_internal (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Scalar index is ignored, since only zero is valid here. +(define_expand "neon_vdup_lanedi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +; Likewise for v2di, as the DImode second operand has only a single element. +(define_expand "neon_vdup_lanev2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); + DONE; +}) + +;; In this insn, operand 1 should be low, and operand 2 the high part of the +;; dest vector. +;; FIXME: A different implementation of this builtin could make it much +;; more likely that we wouldn't actually need to output anything (we could make +;; it so that the reg allocator puts things in the right places magically +;; instead). Lack of subregs for vectors makes that tricky though, I think. + +(define_insn "neon_vcombine" + [(set (match_operand: 0 "s_register_operand" "=w") + (vec_concat: (match_operand:VDX 1 "s_register_operand" "w") + (match_operand:VDX 2 "s_register_operand" "w")))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src1 = REGNO (operands[1]); + int src2 = REGNO (operands[2]); + rtx destlo; + + if (src1 == dest && src2 == dest + 2) + return ""; + else if (src2 == dest && src1 == dest + 2) + /* Special case of reversed high/low parts. */ + return "vswp\t%P1, %P2"; + + destlo = gen_rtx_REG (mode, dest); + + if (!reg_overlap_mentioned_p (operands[2], destlo)) + { + /* Try to avoid unnecessary moves if part of the result is in the right + place already. */ + if (src1 != dest) + output_asm_insn ("vmov\t%e0, %P1", operands); + if (src2 != dest + 2) + output_asm_insn ("vmov\t%f0, %P2", operands); + } + else + { + if (src2 != dest + 2) + output_asm_insn ("vmov\t%f0, %P2", operands); + if (src1 != dest) + output_asm_insn ("vmov\t%e0, %P1", operands); + } + + return ""; +} + ;; We set the neon_type attribute based on the vmov instructions above. + [(set_attr "length" "8") + (set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "=w") + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv2di" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") + (parallel [(const_int 1)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "=w") + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv2di" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") + (parallel [(const_int 0)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vcvt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VCVT))] + "TARGET_NEON" + "vcvt.%T2%#32.f32\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vcvt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VCVT))] + "TARGET_NEON" + "vcvt.f32.%T2%#32\t%0, %1" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vcvt_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCVT_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, 33); + return "vcvt.%T3%#32.f32\t%0, %1, %2"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vcvt_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCVT_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, 33); + return "vcvt.f32.%T3%#32\t%0, %1, %2"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vadd_ddd_vabs_dd") + (const_string "neon_fp_vadd_qqq_vabs_qq")))] +) + +(define_insn "neon_vmovn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VMOVN))] + "TARGET_NEON" + "vmovn.\t%P0, %q1" + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vqmovn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQMOVN))] + "TARGET_NEON" + "vqmovn.%T2%#\t%P0, %q1" + [(set_attr "neon_type" "neon_shift_2")] +) + +(define_insn "neon_vqmovun" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQMOVUN))] + "TARGET_NEON" + "vqmovun.\t%P0, %q1" + [(set_attr "neon_type" "neon_shift_2")] +) + +(define_insn "neon_vmovl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VMOVL))] + "TARGET_NEON" + "vmovl.%T2%#\t%q0, %P1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vmul_lane" + [(set (match_operand:VMD 0 "s_register_operand" "=w") + (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") + (match_operand:VMD 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMUL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmul.\t%P0, %P1, %P2[%c3]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmul_ddd") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))))] +) + +(define_insn "neon_vmul_lane" + [(set (match_operand:VMQ 0 "s_register_operand" "=w") + (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") + (match_operand: 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMUL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmul.\t%q0, %q1, %P2[%c3]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmul_qqd") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") + (const_string "neon_mul_qqd_32_scalar"))))] +) + +(define_insn "neon_vmull_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMULL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmull.%T4%#\t%q0, %P1, %P2[%c3]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) + +(define_insn "neon_vqdmull_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMULL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vqdmull.\t%q0, %P1, %P2[%c3]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) + +(define_insn "neon_vqdmulh_lane" + [(set (match_operand:VMQI 0 "s_register_operand" "=w") + (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") + (match_operand: 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMULH_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vq%O4dmulh.%T4%#\t%q0, %q1, %P2[%c3]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") + (const_string "neon_mul_qqd_32_scalar")))] +) + +(define_insn "neon_vqdmulh_lane" + [(set (match_operand:VMDI 0 "s_register_operand" "=w") + (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMULH_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vq%O4dmulh.%T4%#\t%P0, %P1, %P2[%c3]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar") + (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] +) + +(define_insn "neon_vmla_lane" + [(set (match_operand:VMD 0 "s_register_operand" "=w") + (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") + (match_operand:VMD 2 "s_register_operand" "w") + (match_operand:VMD 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLA_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmla.\t%P0, %P2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))] +) + +(define_insn "neon_vmla_lane" + [(set (match_operand:VMQ 0 "s_register_operand" "=w") + (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") + (match_operand:VMQ 2 "s_register_operand" "w") + (match_operand: 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLA_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmla.\t%q0, %q2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_qqq_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long") + (const_string "neon_mla_qqq_32_qqd_32_scalar"))))] +) + +(define_insn "neon_vmlal_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLAL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmlal.%T5%#\t%q0, %P2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_insn "neon_vqdmlal_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VQDMLAL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vqdmlal.\t%q0, %P2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_insn "neon_vmls_lane" + [(set (match_operand:VMD 0 "s_register_operand" "=w") + (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") + (match_operand:VMD 2 "s_register_operand" "w") + (match_operand:VMD 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLS_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmls.\t%P0, %P2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_ddd_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))] +) + +(define_insn "neon_vmls_lane" + [(set (match_operand:VMQ 0 "s_register_operand" "=w") + (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") + (match_operand:VMQ 2 "s_register_operand" "w") + (match_operand: 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLS_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmls.\t%q0, %q2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_vmla_qqq_scalar") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long") + (const_string "neon_mla_qqq_32_qqd_32_scalar"))))] +) + +(define_insn "neon_vmlsl_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLSL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmlsl.%T5%#\t%q0, %P2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +(define_insn "neon_vqdmlsl_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VQDMLSL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vqdmlsl.\t%q0, %P2, %P3[%c4]"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] +) + +; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a +; core register into a temp register, then use a scalar taken from that. This +; isn't an optimal solution if e.g. the scalar has just been read from memory +; or extracted from another vector. The latter case it's currently better to +; use the "_lane" variant, and the former case can probably be implemented +; using vld1_lane, but that hasn't been done yet. + +(define_expand "neon_vmul_n" + [(match_operand:VMD 0 "s_register_operand" "") + (match_operand:VMD 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmul_lane (operands[0], operands[1], tmp, + const0_rtx, const0_rtx)); + DONE; +}) + +(define_expand "neon_vmul_n" + [(match_operand:VMQ 0 "s_register_operand" "") + (match_operand:VMQ 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmul_lane (operands[0], operands[1], tmp, + const0_rtx, const0_rtx)); + DONE; +}) + +(define_expand "neon_vmull_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VMDI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmull_lane (operands[0], operands[1], tmp, + const0_rtx, operands[3])); + DONE; +}) + +(define_expand "neon_vqdmull_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VMDI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmull_lane (operands[0], operands[1], tmp, + const0_rtx, const0_rtx)); + DONE; +}) + +(define_expand "neon_vqdmulh_n" + [(match_operand:VMDI 0 "s_register_operand" "") + (match_operand:VMDI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmulh_lane (operands[0], operands[1], tmp, + const0_rtx, operands[3])); + DONE; +}) + +(define_expand "neon_vqdmulh_n" + [(match_operand:VMQI 0 "s_register_operand" "") + (match_operand:VMQI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmulh_lane (operands[0], operands[1], tmp, + const0_rtx, operands[3])); + DONE; +}) + +(define_expand "neon_vmla_n" + [(match_operand:VMD 0 "s_register_operand" "") + (match_operand:VMD 1 "s_register_operand" "") + (match_operand:VMD 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmla_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmla_n" + [(match_operand:VMQ 0 "s_register_operand" "") + (match_operand:VMQ 1 "s_register_operand" "") + (match_operand:VMQ 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmla_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmlal_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmlal_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vqdmlal_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmlal_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmls_n" + [(match_operand:VMD 0 "s_register_operand" "") + (match_operand:VMD 1 "s_register_operand" "") + (match_operand:VMD 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmls_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmls_n" + [(match_operand:VMQ 0 "s_register_operand" "") + (match_operand:VMQ 1 "s_register_operand" "") + (match_operand:VMQ 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmls_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmlsl_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmlsl_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vqdmlsl_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmlsl_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_insn "neon_vext" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VEXT))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vext.\t%0, %1, %2, %3"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_2cycle")))] +) + +(define_insn "neon_vrev64" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VREV64))] + "TARGET_NEON" + "vrev64.\t%0, %1" + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vrev32" + [(set (match_operand:VX 0 "s_register_operand" "=w") + (unspec:VX [(match_operand:VX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VREV32))] + "TARGET_NEON" + "vrev32.\t%0, %1" + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vrev16" + [(set (match_operand:VE 0 "s_register_operand" "=w") + (unspec:VE [(match_operand:VE 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VREV16))] + "TARGET_NEON" + "vrev16.\t%0, %1" + [(set_attr "neon_type" "neon_bp_simple")] +) + +; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register +; allocation. For an intrinsic of form: +; rD = vbsl_* (rS, rN, rM) +; We can use any of: +; vbsl rS, rN, rM (if D = S) +; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) +; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) + +(define_insn "neon_vbsl_internal" + [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") + (match_operand:VDQX 2 "s_register_operand" " w,w,0") + (match_operand:VDQX 3 "s_register_operand" " w,0,w")] + UNSPEC_VBSL))] + "TARGET_NEON" + "@ + vbsl\t%0, %2, %3 + vbit\t%0, %2, %1 + vbif\t%0, %3, %1" + [(set_attr "neon_type" "neon_int_1")] +) + +(define_expand "neon_vbsl" + [(set (match_operand:VDQX 0 "s_register_operand" "") + (unspec:VDQX [(match_operand: 1 "s_register_operand" "") + (match_operand:VDQX 2 "s_register_operand" "") + (match_operand:VDQX 3 "s_register_operand" "")] + UNSPEC_VBSL))] + "TARGET_NEON" +{ + /* We can't alias operands together if they have different modes. */ + operands[1] = gen_lowpart (mode, operands[1]); +}) + +(define_insn "neon_vshl" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHL))] + "TARGET_NEON" + "v%O3shl.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_vshl_ddd") + (const_string "neon_shift_3")))] +) + +(define_insn "neon_vqshl" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHL))] + "TARGET_NEON" + "vq%O3shl.%T3%#\t%0, %1, %2" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_shift_2") + (const_string "neon_vqshl_vrshl_vqrshl_qqq")))] +) + +(define_insn "neon_vshr_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHR_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) + 1); + return "v%O3shr.%T3%#\t%0, %1, %2"; +} + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vshrn_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHRN_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); + return "v%O3shrn.\t%P0, %q1, %2"; +} + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vqshrn_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHRN_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); + return "vq%O3shrn.%T3%#\t%P0, %q1, %2"; +} + [(set_attr "neon_type" "neon_shift_2")] +) + +(define_insn "neon_vqshrun_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHRUN_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); + return "vq%O3shrun.%T3%#\t%P0, %q1, %2"; +} + [(set_attr "neon_type" "neon_shift_2")] +) + +(define_insn "neon_vshl_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHL_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + return "vshl.\t%0, %1, %2"; +} + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vqshl_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHL_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + return "vqshl.%T3%#\t%0, %1, %2"; +} + [(set_attr "neon_type" "neon_shift_2")] +) + +(define_insn "neon_vqshlu_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHLU_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + return "vqshlu.%T3%#\t%0, %1, %2"; +} + [(set_attr "neon_type" "neon_shift_2")] +) + +(define_insn "neon_vshll_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHLL_N))] + "TARGET_NEON" +{ + /* The boundaries are: 0 < imm <= size. */ + neon_const_bounds (operands[2], 0, neon_element_bits (mode) + 1); + return "vshll.%T3%#\t%q0, %P1, %2"; +} + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vsra_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VSRA_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 1, neon_element_bits (mode) + 1); + return "v%O4sra.%T4%#\t%0, %2, %3"; +} + [(set_attr "neon_type" "neon_vsra_vrsra")] +) + +(define_insn "neon_vsri_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSRI))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 1, neon_element_bits (mode) + 1); + return "vsri.\t%0, %2, %3"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_shift_1") + (const_string "neon_shift_3")))] +) + +(define_insn "neon_vsli_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSLI))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 0, neon_element_bits (mode)); + return "vsli.\t%0, %2, %3"; +} + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_shift_1") + (const_string "neon_shift_3")))] +) + +(define_insn "neon_vtbl1v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" + "vtbl.8\t%P0, {%P1}, %P2" + [(set_attr "neon_type" "neon_bp_2cycle")] +) + +(define_insn "neon_vtbl2v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" +{ + rtx ops[4]; + int tabbase = REGNO (operands[1]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = operands[2]; + output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); + + return ""; +} + [(set_attr "neon_type" "neon_bp_2cycle")] +) + +(define_insn "neon_vtbl3v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" +{ + rtx ops[5]; + int tabbase = REGNO (operands[1]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = operands[2]; + output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); + + return ""; +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) + +(define_insn "neon_vtbl4v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" +{ + rtx ops[6]; + int tabbase = REGNO (operands[1]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); + ops[5] = operands[2]; + output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); + + return ""; +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) + +(define_insn "neon_vtbx1v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:V8QI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" + "vtbx.8\t%P0, {%P2}, %P3" + [(set_attr "neon_type" "neon_bp_2cycle")] +) + +(define_insn "neon_vtbx2v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:TI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" +{ + rtx ops[4]; + int tabbase = REGNO (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = operands[3]; + output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); + + return ""; +} + [(set_attr "neon_type" "neon_bp_2cycle")] +) + +(define_insn "neon_vtbx3v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:EI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" +{ + rtx ops[5]; + int tabbase = REGNO (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = operands[3]; + output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); + + return ""; +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) + +(define_insn "neon_vtbx4v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:OI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" +{ + rtx ops[6]; + int tabbase = REGNO (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); + ops[5] = operands[3]; + output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); + + return ""; +} + [(set_attr "neon_type" "neon_bp_3cycle")] +) + +(define_insn "neon_vtrn_internal" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VTRN2))] + "TARGET_NEON" + "vtrn.\t%0, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_3cycle")))] +) + +(define_expand "neon_vtrn" + [(match_operand:SI 0 "s_register_operand" "r") + (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")] + "TARGET_NEON" +{ + neon_emit_pair_result_insn (mode, gen_neon_vtrn_internal, + operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn "neon_vzip_internal" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VZIP2))] + "TARGET_NEON" + "vzip.\t%0, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_3cycle")))] +) + +(define_expand "neon_vzip" + [(match_operand:SI 0 "s_register_operand" "r") + (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")] + "TARGET_NEON" +{ + neon_emit_pair_result_insn (mode, gen_neon_vzip_internal, + operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn "neon_vuzp_internal" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VUZP1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VUZP2))] + "TARGET_NEON" + "vuzp.\t%0, %3" + [(set (attr "neon_type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_bp_simple") + (const_string "neon_bp_3cycle")))] +) + +(define_expand "neon_vuzp" + [(match_operand:SI 0 "s_register_operand" "r") + (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")] + "TARGET_NEON" +{ + neon_emit_pair_result_insn (mode, gen_neon_vuzp_internal, + operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "neon_vreinterpretv8qi" + [(match_operand:V8QI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv4hi" + [(match_operand:V4HI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv2si" + [(match_operand:V2SI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv2sf" + [(match_operand:V2SF 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretdi" + [(match_operand:DI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv16qi" + [(match_operand:V16QI 0 "s_register_operand" "") + (match_operand:VQX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv8hi" + [(match_operand:V8HI 0 "s_register_operand" "") + (match_operand:VQX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv4si" + [(match_operand:V4SI 0 "s_register_operand" "") + (match_operand:VQX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv4sf" + [(match_operand:V4SF 0 "s_register_operand" "") + (match_operand:VQX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv2di" + [(match_operand:V2DI 0 "s_register_operand" "") + (match_operand:VQX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_insn "neon_vld1" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))] + UNSPEC_VLD1))] + "TARGET_NEON" + "vld1.\t%h0, [%1]" + [(set_attr "neon_type" "neon_vld1_1_2_regs")] +) + +(define_insn "neon_vld1_lane" + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:VDX 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VLD1_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + if (lane < 0 || lane >= max) + error ("lane out of range"); + if (max == 1) + return "vld1.\t%P0, [%1]"; + else + return "vld1.\t{%P0[%c3]}, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_int 2)) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld1_vld2_lane")))] +) + +(define_insn "neon_vld1_lane" + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:VQX 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VLD1_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + operands[3] = GEN_INT (lane); + } + operands[0] = gen_rtx_REG (mode, regno); + if (max == 2) + return "vld1.\t%P0, [%1]"; + else + return "vld1.\t{%P0[%c3]}, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_int 2)) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld1_vld2_lane")))] +) + +(define_insn "neon_vld1_dup" + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(mem: (match_operand:SI 1 "s_register_operand" "r"))] + UNSPEC_VLD1_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + return "vld1.\t{%P0[]}, [%1]"; + else + return "vld1.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) + +(define_insn "neon_vld1_dup" + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(mem: (match_operand:SI 1 "s_register_operand" "r"))] + UNSPEC_VLD1_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 2) + return "vld1.\t{%e0[], %f0[]}, [%1]"; + else + return "vld1.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) + +(define_insn "neon_vst1" + [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r")) + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] + UNSPEC_VST1))] + "TARGET_NEON" + "vst1.\t%h1, [%0]" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]) + +(define_insn "neon_vst1_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (vec_select: + (match_operand:VDX 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + if (lane < 0 || lane >= max) + error ("lane out of range"); + if (max == 1) + return "vst1.\t{%P1}, [%0]"; + else + return "vst1.\t{%P1[%c2]}, [%0]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_int 1)) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst1_vst2_lane")))]) + +(define_insn "neon_vst1_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (vec_select: + (match_operand:VQX 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + operands[2] = GEN_INT (lane); + } + operands[1] = gen_rtx_REG (mode, regno); + if (max == 2) + return "vst1.\t{%P1}, [%0]"; + else + return "vst1.\t{%P1[%c2]}, [%0]"; +} + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +) + +(define_insn "neon_vld2" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON" +{ + if ( == 64) + return "vld1.64\t%h0, [%1]"; + else + return "vld2.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))] +) + +(define_insn "neon_vld2" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON" + "vld2.\t%h0, [%1]" + [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")]) + +(define_insn "neon_vld2_lane" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:TI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = operands[1]; + ops[3] = operands[3]; + output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, [%2]", ops); + return ""; +} + [(set_attr "neon_type" "neon_vld1_vld2_lane")] +) + +(define_insn "neon_vld2_lane" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:OI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = operands[1]; + ops[3] = GEN_INT (lane); + output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, [%2]", ops); + return ""; +} + [(set_attr "neon_type" "neon_vld1_vld2_lane")] +) + +(define_insn "neon_vld2_dup" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + return "vld2.\t{%e0[], %f0[]}, [%1]"; + else + return "vld1.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) + +(define_insn "neon_vst2" + [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r")) + (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON" +{ + if ( == 64) + return "vst1.64\t%h1, [%0]"; + else + return "vst2.\t%h1, [%0]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst1_1_2_regs_vst2_2_regs")))] +) + +(define_insn "neon_vst2" + [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON" + "vst2.\t%h1, [%0]" + [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")] +) + +(define_insn "neon_vst2_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (unspec: + [(match_operand:TI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = operands[2]; + output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, [%0]", ops); + return ""; +} + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +) + +(define_insn "neon_vst2_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (unspec: + [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = GEN_INT (lane); + output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, [%0]", ops); + return ""; +} + [(set_attr "neon_type" "neon_vst1_vst2_lane")] +) + +(define_insn "neon_vld3" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3))] + "TARGET_NEON" +{ + if ( == 64) + return "vld1.64\t%h0, [%1]"; + else + return "vld3.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld3_vld4")))] +) + +(define_expand "neon_vld3" + [(match_operand:CI 0 "s_register_operand" "=w") + (match_operand:SI 1 "s_register_operand" "+r") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vld3qa (operands[0], operands[0], + operands[1], operands[1])); + emit_insn (gen_neon_vld3qb (operands[0], operands[0], + operands[1], operands[1])); + DONE; +}) + +(define_insn "neon_vld3qa" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) + (match_operand:CI 1 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3A)) + (set (match_operand:SI 2 "s_register_operand" "=r") + (plus:SI (match_dup 3) + (const_int 24)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = operands[2]; + output_asm_insn ("vld3.\t{%P0, %P1, %P2}, [%3]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) + +(define_insn "neon_vld3qb" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2")) + (match_operand:CI 1 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3B)) + (set (match_operand:SI 2 "s_register_operand" "=r") + (plus:SI (match_dup 3) + (const_int 24)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno + 2); + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = operands[2]; + output_asm_insn ("vld3.\t{%P0, %P1, %P2}, [%3]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) + +(define_insn "neon_vld3_lane" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:EI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; + ops[4] = operands[3]; + output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) + +(define_insn "neon_vld3_lane" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:CI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = operands[1]; + ops[4] = GEN_INT (lane); + output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) + +(define_insn "neon_vld3_dup" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + { + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; + output_asm_insn ("vld3.\t{%P0[], %P1[], %P2[]}, [%3]", ops); + return ""; + } + else + return "vld1.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld3_vld4_all_lanes") + (const_string "neon_vld1_1_2_regs")))]) + +(define_insn "neon_vst3" + [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r")) + (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3))] + "TARGET_NEON" +{ + if ( == 64) + return "vst1.64\t%h1, [%0]"; + else + return "vst3.\t%h1, [%0]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst2_4_regs_vst3_vst4")))]) + +(define_expand "neon_vst3" + [(match_operand:SI 0 "s_register_operand" "+r") + (match_operand:CI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vst3qa (operands[0], operands[0], operands[1])); + emit_insn (gen_neon_vst3qb (operands[0], operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vst3qa" + [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) + (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3A)) + (set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (match_dup 1) + (const_int 24)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[2]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + output_asm_insn ("vst3.\t{%P1, %P2, %P3}, [%0]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) + +(define_insn "neon_vst3qb" + [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0")) + (unspec:EI [(match_operand:CI 2 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3B)) + (set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (match_dup 1) + (const_int 24)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[2]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 6); + ops[3] = gen_rtx_REG (DImode, regno + 10); + output_asm_insn ("vst3.\t{%P1, %P2, %P3}, [%0]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) + +(define_insn "neon_vst3_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (unspec: + [(match_operand:EI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = operands[2]; + output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vst3_vst4_lane")] +) + +(define_insn "neon_vst3_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (unspec: + [(match_operand:CI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = GEN_INT (lane); + output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]", + ops); + return ""; +} +[(set_attr "neon_type" "neon_vst3_vst4_lane")]) + +(define_insn "neon_vld4" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4))] + "TARGET_NEON" +{ + if ( == 64) + return "vld1.64\t%h0, [%1]"; + else + return "vld4.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vld1_1_2_regs") + (const_string "neon_vld3_vld4")))] +) + +(define_expand "neon_vld4" + [(match_operand:XI 0 "s_register_operand" "=w") + (match_operand:SI 1 "s_register_operand" "+r") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vld4qa (operands[0], operands[0], + operands[1], operands[1])); + emit_insn (gen_neon_vld4qb (operands[0], operands[0], + operands[1], operands[1])); + DONE; +}) + +(define_insn "neon_vld4qa" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) + (match_operand:XI 1 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4A)) + (set (match_operand:SI 2 "s_register_operand" "=r") + (plus:SI (match_dup 3) + (const_int 32)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = gen_rtx_REG (DImode, regno + 12); + ops[4] = operands[2]; + output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, [%4]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) + +(define_insn "neon_vld4qb" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) + (match_operand:XI 1 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4B)) + (set (match_operand:SI 2 "s_register_operand" "=r") + (plus:SI (match_dup 3) + (const_int 32)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno + 2); + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = gen_rtx_REG (DImode, regno + 14); + ops[4] = operands[2]; + output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, [%4]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4")] +) + +(define_insn "neon_vld4_lane" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:OI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; + ops[5] = operands[3]; + output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) + +(define_insn "neon_vld4_lane" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:XI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = gen_rtx_REG (DImode, regno + 12); + ops[4] = operands[1]; + ops[5] = GEN_INT (lane); + output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vld3_vld4_lane")] +) + +(define_insn "neon_vld4_dup" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(mem: (match_operand:SI 1 "s_register_operand" "r")) + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + { + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; + output_asm_insn ("vld4.\t{%P0[], %P1[], %P2[], %P3[]}, [%4]", + ops); + return ""; + } + else + return "vld1.\t%h0, [%1]"; +} + [(set (attr "neon_type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_vld3_vld4_all_lanes") + (const_string "neon_vld1_1_2_regs")))] +) + +(define_insn "neon_vst4" + [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r")) + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4))] + "TARGET_NEON" +{ + if ( == 64) + return "vst1.64\t%h1, [%0]"; + else + return "vst4.\t%h1, [%0]"; +} + [(set (attr "neon_type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (const_string "neon_vst2_4_regs_vst3_vst4")))] +) + +(define_expand "neon_vst4" + [(match_operand:SI 0 "s_register_operand" "+r") + (match_operand:XI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vst4qa (operands[0], operands[0], operands[1])); + emit_insn (gen_neon_vst4qb (operands[0], operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vst4qa" + [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) + (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4A)) + (set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (match_dup 1) + (const_int 32)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[2]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = gen_rtx_REG (DImode, regno + 12); + output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, [%0]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) + +(define_insn "neon_vst4qb" + [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0")) + (unspec:OI [(match_operand:XI 2 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4B)) + (set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (match_dup 1) + (const_int 32)))] + "TARGET_NEON" +{ + int regno = REGNO (operands[2]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 6); + ops[3] = gen_rtx_REG (DImode, regno + 10); + ops[4] = gen_rtx_REG (DImode, regno + 14); + output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, [%0]!", ops); + return ""; +} + [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")] +) + +(define_insn "neon_vst4_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (unspec: + [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = gen_rtx_REG (DImode, regno + 6); + ops[5] = operands[2]; + output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vst3_vst4_lane")] +) + +(define_insn "neon_vst4_lane" + [(set (mem: (match_operand:SI 0 "s_register_operand" "r")) + (unspec: + [(match_operand:XI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = gen_rtx_REG (DImode, regno + 12); + ops[5] = GEN_INT (lane); + output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]", + ops); + return ""; +} + [(set_attr "neon_type" "neon_vst3_vst4_lane")] +) + +(define_expand "neon_vand" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_inv_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_and3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vorr" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_ior3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_veor" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_xor3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vbic" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_bic3_neon (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vorn" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_inv_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_orn3_neon (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "neon_vec_unpack_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))))] + "TARGET_NEON" + "vmovl. %q0, %e1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vec_unpack_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))))] + "TARGET_NEON" + "vmovl. %q0, %f1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand"))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((/2) + i); + + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_hi_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_lo_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_insn "neon_vec_mult_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull. %q0, %e1, %e3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_lo_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + } +) + +(define_insn "neon_vec_mult_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull. %q0, %f1, %f3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (/2 + i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_hi_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + + } +) + +;; Vectorize for non-neon-quad case +(define_insn "neon_unpack_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (match_operand:VDI 1 "register_operand" "w")))] + "TARGET_NEON" + "vmovl. %q0, %P1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; +} +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; +} +) + +(define_insn "neon_vec_mult_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: + (match_operand:VDI 1 "register_operand" "w")) + (SE: + (match_operand:VDI 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmull. %q0, %P1, %P2" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; + + } +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; + + } +) + +;; The case when using all quad registers. +(define_insn "vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: + (truncate: + (match_operand:VN 1 "register_operand" "w")) + (truncate: + (match_operand:VN 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmovn.i\t%e0, %q1\;vmovn.i\t%f0, %q2" + [(set_attr "neon_type" "neon_shift_1") + (set_attr "length" "8")] +) + +;; For the non-quad case. +(define_insn "neon_vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=w") + (truncate: (match_operand:VN 1 "register_operand" "w")))] + "TARGET_NEON" + "vmovn.i\t%P0, %q1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VSHFT 1 "register_operand" "") + (match_operand:VSHFT 2 "register_operand")] + "TARGET_NEON" +{ + rtx tempreg = gen_reg_rtx (mode); + + emit_insn (gen_move_lo_quad_ (tempreg, operands[1])); + emit_insn (gen_move_hi_quad_ (tempreg, operands[2])); + emit_insn (gen_neon_vec_pack_trunc_ (operands[0], tempreg)); + DONE; +}) diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml new file mode 100644 index 000000000..b5b9cab73 --- /dev/null +++ b/gcc/config/arm/neon.ml @@ -0,0 +1,1857 @@ +(* Common code for ARM NEON header file, documentation and test case + generators. + + Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . *) + +(* Shorthand types for vector elements. *) +type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 + | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts + | Cast of elts * elts | NoElts + +type eltclass = Signed | Unsigned | Float | Poly | Int | Bits + | ConvClass of eltclass * eltclass | NoType + +(* These vector types correspond directly to C types. *) +type vectype = T_int8x8 | T_int8x16 + | T_int16x4 | T_int16x8 + | T_int32x2 | T_int32x4 + | T_int64x1 | T_int64x2 + | T_uint8x8 | T_uint8x16 + | T_uint16x4 | T_uint16x8 + | T_uint32x2 | T_uint32x4 + | T_uint64x1 | T_uint64x2 + | T_float32x2 | T_float32x4 + | T_poly8x8 | T_poly8x16 + | T_poly16x4 | T_poly16x8 + | T_immediate of int * int + | T_int8 | T_int16 + | T_int32 | T_int64 + | T_uint8 | T_uint16 + | T_uint32 | T_uint64 + | T_poly8 | T_poly16 + | T_float32 | T_arrayof of int * vectype + | T_ptrto of vectype | T_const of vectype + | T_void | T_intQI + | T_intHI | T_intSI + | T_intDI | T_floatSF + +(* The meanings of the following are: + TImode : "Tetra", two registers (four words). + EImode : "hExa", three registers (six words). + OImode : "Octa", four registers (eight words). + CImode : "dodeCa", six registers (twelve words). + XImode : "heXadeca", eight registers (sixteen words). +*) + +type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode + +type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt + | PtrTo of shape_elt | CstPtrTo of shape_elt + (* These next ones are used only in the test generator. *) + | Element_of_dreg (* Used for "lane" variants. *) + | Element_of_qreg (* Likewise. *) + | All_elements_of_dreg (* Used for "dup" variants. *) + | Alternatives of shape_elt list (* Used for multiple valid operands *) + +type shape_form = All of int * shape_elt + | Long + | Long_noreg of shape_elt + | Wide + | Wide_noreg of shape_elt + | Narrow + | Long_imm + | Narrow_imm + | Binary_imm of shape_elt + | Use_operands of shape_elt array + | By_scalar of shape_elt + | Unary_scalar of shape_elt + | Wide_lane + | Wide_scalar + | Pair_result of shape_elt + +type arity = Arity0 of vectype + | Arity1 of vectype * vectype + | Arity2 of vectype * vectype * vectype + | Arity3 of vectype * vectype * vectype * vectype + | Arity4 of vectype * vectype * vectype * vectype * vectype + +type vecmode = V8QI | V4HI | V2SI | V2SF | DI + | V16QI | V8HI | V4SI | V4SF | V2DI + | QI | HI | SI | SF + +type opcode = + (* Binary ops. *) + Vadd + | Vmul + | Vmla + | Vmls + | Vsub + | Vceq + | Vcge + | Vcgt + | Vcle + | Vclt + | Vcage + | Vcagt + | Vcale + | Vcalt + | Vtst + | Vabd + | Vaba + | Vmax + | Vmin + | Vpadd + | Vpada + | Vpmax + | Vpmin + | Vrecps + | Vrsqrts + | Vshl + | Vshr_n + | Vshl_n + | Vsra_n + | Vsri + | Vsli + (* Logic binops. *) + | Vand + | Vorr + | Veor + | Vbic + | Vorn + | Vbsl + (* Ops with scalar. *) + | Vmul_lane + | Vmla_lane + | Vmls_lane + | Vmul_n + | Vmla_n + | Vmls_n + | Vmull_n + | Vmull_lane + | Vqdmull_n + | Vqdmull_lane + | Vqdmulh_n + | Vqdmulh_lane + (* Unary ops. *) + | Vabs + | Vneg + | Vcls + | Vclz + | Vcnt + | Vrecpe + | Vrsqrte + | Vmvn + (* Vector extract. *) + | Vext + (* Reverse elements. *) + | Vrev64 + | Vrev32 + | Vrev16 + (* Transposition ops. *) + | Vtrn + | Vzip + | Vuzp + (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) + | Vldx of int + | Vstx of int + | Vldx_lane of int + | Vldx_dup of int + | Vstx_lane of int + (* Set/extract lanes from a vector. *) + | Vget_lane + | Vset_lane + (* Initialize vector from bit pattern. *) + | Vcreate + (* Set all lanes to same value. *) + | Vdup_n + | Vmov_n (* Is this the same? *) + (* Duplicate scalar to all lanes of vector. *) + | Vdup_lane + (* Combine vectors. *) + | Vcombine + (* Get quadword high/low parts. *) + | Vget_high + | Vget_low + (* Convert vectors. *) + | Vcvt + | Vcvt_n + (* Narrow/lengthen vectors. *) + | Vmovn + | Vmovl + (* Table lookup. *) + | Vtbl of int + | Vtbx of int + (* Reinterpret casts. *) + | Vreinterp + +(* Features used for documentation, to distinguish between some instruction + variants, and to signal special requirements (e.g. swapping arguments). *) + +type features = + Halving + | Rounding + | Saturating + | Dst_unsign + | High_half + | Doubling + | Flipped of string (* Builtin name to use with flipped arguments. *) + | InfoWord (* Pass an extra word for signage/rounding etc. (always passed + for All _, Long, Wide, Narrow shape_forms. *) + | ReturnPtr (* Pass explicit pointer to return value as first argument. *) + (* A specification as to the shape of instruction expected upon + disassembly, used if it differs from the shape used to build the + intrinsic prototype. Multiple entries in the constructor's argument + indicate that the intrinsic expands to more than one assembly + instruction, each with a corresponding shape specified here. *) + | Disassembles_as of shape_form list + | Builtin_name of string (* Override the name of the builtin. *) + (* Override the name of the instruction. If more than one name + is specified, it means that the instruction can have any of those + names. *) + | Instruction_name of string list + (* Mark that the intrinsic yields no instructions, or expands to yield + behavior that the test generator cannot test. *) + | No_op + (* Mark that the intrinsic has constant arguments that cannot be set + to the defaults (zero for pointers and one otherwise) in the test + cases. The function supplied must return the integer to be written + into the testcase for the argument number (0-based) supplied to it. *) + | Const_valuator of (int -> int) + | Fixed_return_reg + +exception MixedMode of elts * elts + +let rec elt_width = function + S8 | U8 | P8 | I8 | B8 -> 8 + | S16 | U16 | P16 | I16 | B16 -> 16 + | S32 | F32 | U32 | I32 | B32 -> 32 + | S64 | U64 | I64 | B64 -> 64 + | Conv (a, b) -> + let wa = elt_width a and wb = elt_width b in + if wa = wb then wa else failwith "element width?" + | Cast (a, b) -> raise (MixedMode (a, b)) + | NoElts -> failwith "No elts" + +let rec elt_class = function + S8 | S16 | S32 | S64 -> Signed + | U8 | U16 | U32 | U64 -> Unsigned + | P8 | P16 -> Poly + | F32 -> Float + | I8 | I16 | I32 | I64 -> Int + | B8 | B16 | B32 | B64 -> Bits + | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) + | NoElts -> NoType + +let elt_of_class_width c w = + match c, w with + Signed, 8 -> S8 + | Signed, 16 -> S16 + | Signed, 32 -> S32 + | Signed, 64 -> S64 + | Float, 32 -> F32 + | Unsigned, 8 -> U8 + | Unsigned, 16 -> U16 + | Unsigned, 32 -> U32 + | Unsigned, 64 -> U64 + | Poly, 8 -> P8 + | Poly, 16 -> P16 + | Int, 8 -> I8 + | Int, 16 -> I16 + | Int, 32 -> I32 + | Int, 64 -> I64 + | Bits, 8 -> B8 + | Bits, 16 -> B16 + | Bits, 32 -> B32 + | Bits, 64 -> B64 + | _ -> failwith "Bad element type" + +(* Return unsigned integer element the same width as argument. *) +let unsigned_of_elt elt = + elt_of_class_width Unsigned (elt_width elt) + +let signed_of_elt elt = + elt_of_class_width Signed (elt_width elt) + +(* Return untyped bits element the same width as argument. *) +let bits_of_elt elt = + elt_of_class_width Bits (elt_width elt) + +let non_signed_variant = function + S8 -> I8 + | S16 -> I16 + | S32 -> I32 + | S64 -> I64 + | U8 -> I8 + | U16 -> I16 + | U32 -> I32 + | U64 -> I64 + | x -> x + +let poly_unsigned_variant v = + let elclass = match elt_class v with + Poly -> Unsigned + | x -> x in + elt_of_class_width elclass (elt_width v) + +let widen_elt elt = + let w = elt_width elt + and c = elt_class elt in + elt_of_class_width c (w * 2) + +let narrow_elt elt = + let w = elt_width elt + and c = elt_class elt in + elt_of_class_width c (w / 2) + +(* If we're trying to find a mode from a "Use_operands" instruction, use the + last vector operand as the dominant mode used to invoke the correct builtin. + We must stick to this rule in neon.md. *) +let find_key_operand operands = + let rec scan opno = + match operands.(opno) with + Qreg -> Qreg + | Dreg -> Dreg + | VecArray (_, Qreg) -> Qreg + | VecArray (_, Dreg) -> Dreg + | _ -> scan (opno-1) + in + scan ((Array.length operands) - 1) + +let rec mode_of_elt elt shape = + let flt = match elt_class elt with + Float | ConvClass(_, Float) -> true | _ -> false in + let idx = + match elt_width elt with + 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 + | _ -> failwith "Bad element width" + in match shape with + All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg + | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> + [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx) + | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg + | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> + [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx) + | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> + [| QI; HI; if flt then SF else SI; DI |].(idx) + | Long | Wide | Wide_lane | Wide_scalar + | Long_imm -> + [| V8QI; V4HI; V2SI; DI |].(idx) + | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) + | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) + | _ -> failwith "invalid shape" + +(* Modify an element type dependent on the shape of the instruction and the + operand number. *) + +let shapemap shape no = + let ident = fun x -> x in + match shape with + All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ + | Binary_imm _ -> ident + | Long | Long_noreg _ | Wide_scalar | Long_imm -> + [| widen_elt; ident; ident |].(no) + | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) + | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) + | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) + +(* Register type (D/Q) of an operand, based on shape and operand number. *) + +let regmap shape no = + match shape with + All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg + | Long -> [| Qreg; Dreg; Dreg |].(no) + | Wide -> [| Qreg; Qreg; Dreg |].(no) + | Narrow -> [| Dreg; Qreg; Qreg |].(no) + | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) + | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) + | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) + | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) + | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) + | Binary_imm reg -> [| reg; reg; Immed |].(no) + | Long_imm -> [| Qreg; Dreg; Immed |].(no) + | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) + | Use_operands these -> these.(no) + +let type_for_elt shape elt no = + let elt = (shapemap shape no) elt in + let reg = regmap shape no in + let rec type_for_reg_elt reg elt = + match reg with + Dreg -> + begin match elt with + S8 -> T_int8x8 + | S16 -> T_int16x4 + | S32 -> T_int32x2 + | S64 -> T_int64x1 + | U8 -> T_uint8x8 + | U16 -> T_uint16x4 + | U32 -> T_uint32x2 + | U64 -> T_uint64x1 + | F32 -> T_float32x2 + | P8 -> T_poly8x8 + | P16 -> T_poly16x4 + | _ -> failwith "Bad elt type" + end + | Qreg -> + begin match elt with + S8 -> T_int8x16 + | S16 -> T_int16x8 + | S32 -> T_int32x4 + | S64 -> T_int64x2 + | U8 -> T_uint8x16 + | U16 -> T_uint16x8 + | U32 -> T_uint32x4 + | U64 -> T_uint64x2 + | F32 -> T_float32x4 + | P8 -> T_poly8x16 + | P16 -> T_poly16x8 + | _ -> failwith "Bad elt type" + end + | Corereg -> + begin match elt with + S8 -> T_int8 + | S16 -> T_int16 + | S32 -> T_int32 + | S64 -> T_int64 + | U8 -> T_uint8 + | U16 -> T_uint16 + | U32 -> T_uint32 + | U64 -> T_uint64 + | P8 -> T_poly8 + | P16 -> T_poly16 + | F32 -> T_float32 + | _ -> failwith "Bad elt type" + end + | Immed -> + T_immediate (0, 0) + | VecArray (num, sub) -> + T_arrayof (num, type_for_reg_elt sub elt) + | PtrTo x -> + T_ptrto (type_for_reg_elt x elt) + | CstPtrTo x -> + T_ptrto (T_const (type_for_reg_elt x elt)) + (* Anything else is solely for the use of the test generator. *) + | _ -> assert false + in + type_for_reg_elt reg elt + +(* Return size of a vector type, in bits. *) +let vectype_size = function + T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 + | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 + | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 + | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 + | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 + | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 + | _ -> raise Not_found + +let inttype_for_array num elttype = + let eltsize = vectype_size elttype in + let numwords = (num * eltsize) / 32 in + match numwords with + 4 -> B_TImode + | 6 -> B_EImode + | 8 -> B_OImode + | 12 -> B_CImode + | 16 -> B_XImode + | _ -> failwith ("no int type for size " ^ string_of_int numwords) + +(* These functions return pairs of (internal, external) types, where "internal" + types are those seen by GCC, and "external" are those seen by the assembler. + These types aren't necessarily the same, since the intrinsics can munge more + than one C type into each assembler opcode. *) + +let make_sign_invariant func shape elt = + let arity, elt' = func shape elt in + arity, non_signed_variant elt' + +(* Don't restrict any types. *) + +let elts_same make_arity shape elt = + let vtype = type_for_elt shape elt in + make_arity vtype, elt + +(* As sign_invar_*, but when sign matters. *) +let elts_same_io_lane = + elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) + +let elts_same_io = + elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) + +let elts_same_2_lane = + elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) + +let elts_same_3 = elts_same_2_lane + +let elts_same_2 = + elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) + +let elts_same_1 = + elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) + +(* Use for signed/unsigned invariant operations (i.e. where the operation + doesn't depend on the sign of the data. *) + +let sign_invar_io_lane = make_sign_invariant elts_same_io_lane +let sign_invar_io = make_sign_invariant elts_same_io +let sign_invar_2_lane = make_sign_invariant elts_same_2_lane +let sign_invar_2 = make_sign_invariant elts_same_2 +let sign_invar_1 = make_sign_invariant elts_same_1 + +(* Sign-sensitive comparison. *) + +let cmp_sign_matters shape elt = + let vtype = type_for_elt shape elt + and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in + Arity2 (rtype, vtype 1, vtype 2), elt + +(* Signed/unsigned invariant comparison. *) + +let cmp_sign_invar shape elt = + let shape', elt' = cmp_sign_matters shape elt in + let elt'' = + match non_signed_variant elt' with + P8 -> I8 + | x -> x + in + shape', elt'' + +(* Comparison (VTST) where only the element width matters. *) + +let cmp_bits shape elt = + let vtype = type_for_elt shape elt + and rtype = type_for_elt shape (unsigned_of_elt elt) 0 + and bits_only = bits_of_elt elt in + Arity2 (rtype, vtype 1, vtype 2), bits_only + +let reg_shift shape elt = + let vtype = type_for_elt shape elt + and op2type = type_for_elt shape (signed_of_elt elt) 2 in + Arity2 (vtype 0, vtype 1, op2type), elt + +(* Genericised constant-shift type-generating function. *) + +let const_shift mkimm ?arity ?result shape elt = + let op2type = (shapemap shape 2) elt in + let op2width = elt_width op2type in + let op2 = mkimm op2width + and op1 = type_for_elt shape elt 1 + and r_elt = + match result with + None -> elt + | Some restriction -> restriction elt in + let rtype = type_for_elt shape r_elt 0 in + match arity with + None -> Arity2 (rtype, op1, op2), elt + | Some mkarity -> mkarity rtype op1 op2, elt + +(* Use for immediate right-shifts. *) + +let shift_right shape elt = + const_shift (fun imm -> T_immediate (1, imm)) shape elt + +let shift_right_acc shape elt = + const_shift (fun imm -> T_immediate (1, imm)) + ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt + +(* Use for immediate right-shifts when the operation doesn't care about + signedness. *) + +let shift_right_sign_invar = + make_sign_invariant shift_right + +(* Immediate right-shift; result is unsigned even when operand is signed. *) + +let shift_right_to_uns shape elt = + const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt + shape elt + +(* Immediate left-shift. *) + +let shift_left shape elt = + const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt + +(* Immediate left-shift, unsigned result. *) + +let shift_left_to_uns shape elt = + const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt + shape elt + +(* Immediate left-shift, don't care about signs. *) + +let shift_left_sign_invar = + make_sign_invariant shift_left + +(* Shift left/right and insert: only element size matters. *) + +let shift_insert shape elt = + let arity, elt = + const_shift (fun imm -> T_immediate (1, imm)) + ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in + arity, bits_of_elt elt + +(* Get/set lane. *) + +let get_lane shape elt = + let vtype = type_for_elt shape elt in + Arity2 (vtype 0, vtype 1, vtype 2), + (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) + +let set_lane shape elt = + let vtype = type_for_elt shape elt in + Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt + +let set_lane_notype shape elt = + let vtype = type_for_elt shape elt in + Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts + +let create_vector shape elt = + let vtype = type_for_elt shape U64 1 + and rtype = type_for_elt shape elt 0 in + Arity1 (rtype, vtype), elt + +let conv make_arity shape elt = + let edest, esrc = match elt with + Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc + | _ -> failwith "Non-conversion element in conversion" in + let vtype = type_for_elt shape esrc + and rtype = type_for_elt shape edest 0 in + make_arity rtype vtype, elt + +let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) +let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) + +(* Operation has an unsigned result even if operands are signed. *) + +let dst_unsign make_arity shape elt = + let vtype = type_for_elt shape elt + and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in + make_arity rtype vtype, elt + +let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) + +let make_bits_only func shape elt = + let arity, elt' = func shape elt in + arity, bits_of_elt elt' + +(* Extend operation. *) + +let extend shape elt = + let vtype = type_for_elt shape elt in + Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt + +(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned + integer ops respectively, or unsigned for polynomial ops. *) + +let table mkarity shape elt = + let vtype = type_for_elt shape elt in + let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in + mkarity vtype op2, bits_of_elt elt + +let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) +let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) + +(* Operations where only bits matter. *) + +let bits_1 = make_bits_only elts_same_1 +let bits_2 = make_bits_only elts_same_2 +let bits_3 = make_bits_only elts_same_3 + +(* Store insns. *) +let store_1 shape elt = + let vtype = type_for_elt shape elt in + Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt + +let store_3 shape elt = + let vtype = type_for_elt shape elt in + Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt + +let make_notype func shape elt = + let arity, _ = func shape elt in + arity, NoElts + +let notype_1 = make_notype elts_same_1 +let notype_2 = make_notype elts_same_2 +let notype_3 = make_notype elts_same_3 + +(* Bit-select operations (first operand is unsigned int). *) + +let bit_select shape elt = + let vtype = type_for_elt shape elt + and itype = type_for_elt shape (unsigned_of_elt elt) in + Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts + +(* Common lists of supported element types. *) + +let su_8_32 = [S8; S16; S32; U8; U16; U32] +let su_8_64 = S64 :: U64 :: su_8_32 +let su_16_64 = [S16; S32; S64; U16; U32; U64] +let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 +let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 + +let ops = + [ + (* Addition. *) + Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32; + Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64]; + Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; + Vadd, [], Long, "vaddl", elts_same_2, su_8_32; + Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; + Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; + Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; + Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], + All (3, Dreg), "vRhadd", elts_same_2, su_8_32; + Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], + All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; + Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; + Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; + Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; + Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], + Narrow, "vRaddhn", sign_invar_2, su_16_64; + + (* Multiplication. *) + Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; + Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; + Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", + elts_same_2, [S16; S32]; + Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", + elts_same_2, [S16; S32]; + Vmul, + [Saturating; Rounding; Doubling; High_half; + Instruction_name ["vqrdmulh"]], + All (3, Dreg), "vqRdmulh", + elts_same_2, [S16; S32]; + Vmul, + [Saturating; Rounding; Doubling; High_half; + Instruction_name ["vqrdmulh"]], + All (3, Qreg), "vqRdmulhQ", + elts_same_2, [S16; S32]; + Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; + Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; + + (* Multiply-accumulate. *) + Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; + Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; + Vmla, [], Long, "vmlal", elts_same_io, su_8_32; + Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; + + (* Multiply-subtract. *) + Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; + Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; + Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; + Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; + + (* Subtraction. *) + Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32; + Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64]; + Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; + Vsub, [], Long, "vsubl", elts_same_2, su_8_32; + Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; + Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; + Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; + Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; + Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; + Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; + Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], + Narrow, "vRsubhn", sign_invar_2, su_16_64; + + (* Comparison, equal. *) + Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; + Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; + + (* Comparison, greater-than or equal. *) + Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32; + Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32; + + (* Comparison, less-than or equal. *) + Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, + F32 :: su_8_32; + Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], + All (3, Qreg), "vcleQ", cmp_sign_matters, + F32 :: su_8_32; + + (* Comparison, greater-than. *) + Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32; + Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32; + + (* Comparison, less-than. *) + Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, + F32 :: su_8_32; + Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], + All (3, Qreg), "vcltQ", cmp_sign_matters, + F32 :: su_8_32; + + (* Compare absolute greater-than or equal. *) + Vcage, [Instruction_name ["vacge"]], + All (3, Dreg), "vcage", cmp_sign_matters, [F32]; + Vcage, [Instruction_name ["vacge"]], + All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; + + (* Compare absolute less-than or equal. *) + Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], + All (3, Dreg), "vcale", cmp_sign_matters, [F32]; + Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], + All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; + + (* Compare absolute greater-than or equal. *) + Vcagt, [Instruction_name ["vacgt"]], + All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; + Vcagt, [Instruction_name ["vacgt"]], + All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; + + (* Compare absolute less-than or equal. *) + Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], + All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; + Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], + All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; + + (* Test bits. *) + Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; + Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; + + (* Absolute difference. *) + Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; + Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; + Vabd, [], Long, "vabdl", elts_same_2, su_8_32; + + (* Absolute difference and accumulate. *) + Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; + Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; + Vaba, [], Long, "vabal", elts_same_io, su_8_32; + + (* Max. *) + Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; + Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; + + (* Min. *) + Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; + Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; + + (* Pairwise add. *) + Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; + Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; + Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; + + (* Pairwise add, widen and accumulate. *) + Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; + Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; + + (* Folding maximum, minimum. *) + Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; + Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; + + (* Reciprocal step. *) + Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; + Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; + Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; + Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; + + (* Vector shift left. *) + Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; + Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; + Vshl, [Instruction_name ["vrshl"]; Rounding], + All (3, Dreg), "vRshl", reg_shift, su_8_64; + Vshl, [Instruction_name ["vrshl"]; Rounding], + All (3, Qreg), "vRshlQ", reg_shift, su_8_64; + Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; + Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; + Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], + All (3, Dreg), "vqRshl", reg_shift, su_8_64; + Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], + All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; + + (* Vector shift right by constant. *) + Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; + Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; + Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, + "vRshr_n", shift_right, su_8_64; + Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, + "vRshrQ_n", shift_right, su_8_64; + Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; + Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", + shift_right_sign_invar, su_16_64; + Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; + Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, + "vqRshrn_n", shift_right, su_16_64; + Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", + shift_right_to_uns, [S16; S32; S64]; + Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], + Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; + + (* Vector shift left by constant. *) + Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; + Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; + Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; + Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; + Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", + shift_left_to_uns, [S8; S16; S32; S64]; + Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", + shift_left_to_uns, [S8; S16; S32; S64]; + Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; + + (* Vector shift right by constant and accumulate. *) + Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; + Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; + Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, + "vRsra_n", shift_right_acc, su_8_64; + Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, + "vRsraQ_n", shift_right_acc, su_8_64; + + (* Vector shift right and insert. *) + Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, + P8 :: P16 :: su_8_64; + Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, + P8 :: P16 :: su_8_64; + + (* Vector shift left and insert. *) + Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, + P8 :: P16 :: su_8_64; + Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, + P8 :: P16 :: su_8_64; + + (* Absolute value. *) + Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; + Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; + Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; + Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; + + (* Negate. *) + Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; + Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; + Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; + Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; + + (* Bitwise not. *) + Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; + Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; + + (* Count leading sign bits. *) + Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; + Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; + + (* Count leading zeros. *) + Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; + Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; + + (* Count number of set bits. *) + Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; + Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; + + (* Reciprocal estimate. *) + Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; + Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; + + (* Reciprocal square-root estimate. *) + Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; + Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; + + (* Get lanes from a vector. *) + Vget_lane, + [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; + Instruction_name ["vmov"]], + Use_operands [| Corereg; Dreg; Immed |], + "vget_lane", get_lane, pf_su_8_32; + Vget_lane, + [No_op; + InfoWord; + Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Corereg; Dreg; Immed |], + "vget_lane", notype_2, [S64; U64]; + Vget_lane, + [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; + Instruction_name ["vmov"]], + Use_operands [| Corereg; Qreg; Immed |], + "vgetQ_lane", get_lane, pf_su_8_32; + Vget_lane, + [InfoWord; + Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Corereg; Qreg; Immed |], + "vgetQ_lane", notype_2, [S64; U64]; + + (* Set lanes in a vector. *) + Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; + Instruction_name ["vmov"]], + Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", + set_lane, pf_su_8_32; + Vset_lane, [No_op; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", + set_lane_notype, [S64; U64]; + Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; + Instruction_name ["vmov"]], + Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", + set_lane, pf_su_8_32; + Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", + set_lane_notype, [S64; U64]; + + (* Create vector from literal bit pattern. *) + Vcreate, + [No_op], (* Not really, but it can yield various things that are too + hard for the test generator at this time. *) + Use_operands [| Dreg; Corereg |], "vcreate", create_vector, + pf_su_8_64; + + (* Set all lanes to the same value. *) + Vdup_n, + [Disassembles_as [Use_operands [| Dreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, + pf_su_8_32; + Vdup_n, + [No_op; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, + [S64; U64]; + Vdup_n, + [Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, + pf_su_8_32; + Vdup_n, + [No_op; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; + Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, + [S64; U64]; + + (* These are just aliases for the above. *) + Vmov_n, + [Builtin_name "vdup_n"; + Disassembles_as [Use_operands [| Dreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Dreg; Corereg |], + "vmov_n", bits_1, pf_su_8_32; + Vmov_n, + [No_op; + Builtin_name "vdup_n"; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Dreg; Corereg |], + "vmov_n", notype_1, [S64; U64]; + Vmov_n, + [Builtin_name "vdupQ_n"; + Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Qreg; Corereg |], + "vmovQ_n", bits_1, pf_su_8_32; + Vmov_n, + [No_op; + Builtin_name "vdupQ_n"; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; + Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Qreg; Corereg |], + "vmovQ_n", notype_1, [S64; U64]; + + (* Duplicate, lane version. We can't use Use_operands here because the + rightmost register (always Dreg) would be picked up by find_key_operand, + when we want the leftmost register to be used in this case (otherwise + the modes are indistinguishable in neon.md, etc. *) + Vdup_lane, + [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], + Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; + Vdup_lane, + [No_op; Const_valuator (fun _ -> 0)], + Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; + Vdup_lane, + [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], + Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; + Vdup_lane, + [No_op; Const_valuator (fun _ -> 0)], + Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; + + (* Combining vectors. *) + Vcombine, [No_op], + Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, + pf_su_8_64; + + (* Splitting vectors. *) + Vget_high, [No_op], + Use_operands [| Dreg; Qreg |], "vget_high", + notype_1, pf_su_8_64; + Vget_low, [Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Dreg |]]; + Fixed_return_reg], + Use_operands [| Dreg; Qreg |], "vget_low", + notype_1, pf_su_8_32; + Vget_low, [No_op], + Use_operands [| Dreg; Qreg |], "vget_low", + notype_1, [S64; U64]; + + (* Conversions. *) + Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + + (* Move, narrowing. *) + Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], + Narrow, "vmovn", sign_invar_1, su_16_64; + Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], + Narrow, "vqmovn", elts_same_1, su_16_64; + Vmovn, + [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], + Narrow, "vqmovun", dst_unsign_1, + [S16; S32; S64]; + + (* Move, long. *) + Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], + Long, "vmovl", elts_same_1, su_8_32; + + (* Table lookup. *) + Vtbl 1, + [Instruction_name ["vtbl"]; + Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; + Vtbl 2, [Instruction_name ["vtbl"]], + Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, + [U8; S8; P8]; + Vtbl 3, [Instruction_name ["vtbl"]], + Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, + [U8; S8; P8]; + Vtbl 4, [Instruction_name ["vtbl"]], + Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, + [U8; S8; P8]; + + (* Extended table lookup. *) + Vtbx 1, + [Instruction_name ["vtbx"]; + Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; + Vtbx 2, [Instruction_name ["vtbx"]], + Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, + [U8; S8; P8]; + Vtbx 3, [Instruction_name ["vtbx"]], + Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, + [U8; S8; P8]; + Vtbx 4, [Instruction_name ["vtbx"]], + Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, + [U8; S8; P8]; + + (* Multiply, lane. (note: these were undocumented at the time of + writing). *) + Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, + [S16; S32; U16; U32; F32]; + Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, + [S16; S32; U16; U32; F32]; + + (* Multiply-accumulate, lane. *) + Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, + [S16; S32; U16; U32]; + Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", + elts_same_io_lane, [S16; S32]; + + (* Multiply-subtract, lane. *) + Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, + [S16; S32; U16; U32]; + Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", + elts_same_io_lane, [S16; S32]; + + (* Long multiply, lane. *) + Vmull_lane, [], + Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; + + (* Saturating doubling long multiply, lane. *) + Vqdmull_lane, [Saturating; Doubling], + Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; + + (* Saturating doubling long multiply high, lane. *) + Vqdmulh_lane, [Saturating; Halving], + By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; + Vqdmulh_lane, [Saturating; Halving], + By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; + Vqdmulh_lane, [Saturating; Halving; Rounding; + Instruction_name ["vqrdmulh"]], + By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; + Vqdmulh_lane, [Saturating; Halving; Rounding; + Instruction_name ["vqrdmulh"]], + By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; + + (* Vector multiply by scalar. *) + Vmul_n, [InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", + sign_invar_2, [S16; S32; U16; U32; F32]; + Vmul_n, [InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", + sign_invar_2, [S16; S32; U16; U32; F32]; + + (* Vector long multiply by scalar. *) + Vmull_n, [Instruction_name ["vmull"]; + Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], + Wide_scalar, "vmull_n", + elts_same_2, [S16; S32; U16; U32]; + + (* Vector saturating doubling long multiply by scalar. *) + Vqdmull_n, [Saturating; Doubling; + Disassembles_as [Use_operands [| Qreg; Dreg; + Element_of_dreg |]]], + Wide_scalar, "vqdmull_n", + elts_same_2, [S16; S32]; + + (* Vector saturating doubling long multiply high by scalar. *) + Vqdmulh_n, + [Saturating; Halving; InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], + "vqdmulhQ_n", elts_same_2, [S16; S32]; + Vqdmulh_n, + [Saturating; Halving; InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], + "vqdmulh_n", elts_same_2, [S16; S32]; + Vqdmulh_n, + [Saturating; Halving; Rounding; InfoWord; + Instruction_name ["vqrdmulh"]; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], + "vqRdmulhQ_n", elts_same_2, [S16; S32]; + Vqdmulh_n, + [Saturating; Halving; Rounding; InfoWord; + Instruction_name ["vqrdmulh"]; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], + "vqRdmulh_n", elts_same_2, [S16; S32]; + + (* Vector multiply-accumulate by scalar. *) + Vmla_n, [InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmla_n, [InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; + Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, + [S16; S32]; + + (* Vector multiply subtract by scalar. *) + Vmls_n, [InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmls_n, [InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; + Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, + [S16; S32]; + + (* Vector extract. *) + Vext, [Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, + pf_su_8_64; + Vext, [Const_valuator (fun _ -> 0)], + Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, + pf_su_8_64; + + (* Reverse elements. *) + Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32; + Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32; + Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16]; + Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16]; + Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8]; + Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8]; + + (* Bit selection. *) + Vbsl, + [Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, + pf_su_8_64; + Vbsl, + [Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], + Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, + pf_su_8_64; + + (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards + generating good code for intrinsics which return structure types -- + builtins work well by themselves (and understand that the values being + stored on e.g. the stack also reside in registers, so can optimise the + stores away entirely if the results are used immediately), but + intrinsics are very much less efficient. Maybe something can be improved + re: inlining, or tweaking the ABI used for intrinsics (a special call + attribute?). + *) + Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32; + Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; + + (* Zip elements. *) + Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32; + Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; + + (* Unzip elements. *) + Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32; + Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32; + + (* Element/structure loads. VLD1 variants. *) + Vldx 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, + pf_su_8_64; + Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, + pf_su_8_64; + + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], + "vld1_lane", bits_3, pf_su_8_32; + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], + "vld1_lane", bits_3, [S64; U64]; + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], + "vld1Q_lane", bits_3, pf_su_8_32; + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], + "vld1Q_lane", bits_3, [S64; U64]; + + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", + bits_1, pf_su_8_32; + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", + bits_1, [S64; U64]; + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", + bits_1, pf_su_8_32; + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", + bits_1, [S64; U64]; + + (* VST1 variants. *) + Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Dreg |], "vst1", + store_1, pf_su_8_64; + Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", + store_1, pf_su_8_64; + + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Dreg; Immed |], + "vst1_lane", store_3, pf_su_8_32; + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| PtrTo Corereg; Dreg; Immed |], + "vst1_lane", store_3, [U64; S64]; + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg; Immed |], + "vst1Q_lane", store_3, pf_su_8_32; + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg; Immed |], + "vst1Q_lane", store_3, [U64; S64]; + + (* VLD2 variants. *) + Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2", bits_1, pf_su_8_32; + Vldx 2, [Instruction_name ["vld1"]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2", bits_1, [S64; U64]; + Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]; + Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], + "vld2Q", bits_1, pf_su_8_32; + + Vldx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; + VecArray (2, Dreg); Immed |], + "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; + Vldx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; + VecArray (2, Qreg); Immed |], + "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; + + Vldx_dup 2, + [Disassembles_as [Use_operands + [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2_dup", bits_1, pf_su_8_32; + Vldx_dup 2, + [Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (2, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2_dup", bits_1, [S64; U64]; + + (* VST2 variants. *) + Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", + store_1, pf_su_8_32; + Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", + store_1, [S64; U64]; + Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]; + Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", + store_1, pf_su_8_32; + + Vstx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", + store_3, P8 :: P16 :: F32 :: su_8_32; + Vstx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", + store_3, [P16; F32; U16; U32; S16; S32]; + + (* VLD3 variants. *) + Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3", bits_1, pf_su_8_32; + Vldx 3, [Instruction_name ["vld1"]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3", bits_1, [S64; U64]; + Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); + CstPtrTo Corereg |]; + Use_operands [| VecArray (3, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], + "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; + + Vldx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; + VecArray (3, Dreg); Immed |], + "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; + Vldx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; + VecArray (3, Qreg); Immed |], + "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; + + Vldx_dup 3, + [Disassembles_as [Use_operands + [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3_dup", bits_1, pf_su_8_32; + Vldx_dup 3, + [Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (3, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3_dup", bits_1, [S64; U64]; + + (* VST3 variants. *) + Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", + store_1, pf_su_8_32; + Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", + store_1, [S64; U64]; + Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); + PtrTo Corereg |]; + Use_operands [| VecArray (3, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", + store_1, pf_su_8_32; + + Vstx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", + store_3, P8 :: P16 :: F32 :: su_8_32; + Vstx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", + store_3, [P16; F32; U16; U32; S16; S32]; + + (* VLD4/VST4 variants. *) + Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4", bits_1, pf_su_8_32; + Vldx 4, [Instruction_name ["vld1"]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4", bits_1, [S64; U64]; + Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + CstPtrTo Corereg |]; + Use_operands [| VecArray (4, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], + "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; + + Vldx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; + VecArray (4, Dreg); Immed |], + "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; + Vldx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; + VecArray (4, Qreg); Immed |], + "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; + + Vldx_dup 4, + [Disassembles_as [Use_operands + [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4_dup", bits_1, pf_su_8_32; + Vldx_dup 4, + [Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (4, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4_dup", bits_1, [S64; U64]; + + Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", + store_1, pf_su_8_32; + Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", + store_1, [S64; U64]; + Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]; + Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", + store_1, pf_su_8_32; + + Vstx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", + store_3, P8 :: P16 :: F32 :: su_8_32; + Vstx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", + store_3, [P16; F32; U16; U32; S16; S32]; + + (* Logical operations. And. *) + Vand, [], All (3, Dreg), "vand", notype_2, su_8_32; + Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64]; + Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; + + (* Or. *) + Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32; + Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64]; + Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; + + (* Eor. *) + Veor, [], All (3, Dreg), "veor", notype_2, su_8_32; + Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64]; + Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; + + (* Bic (And-not). *) + Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32; + Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64]; + Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64; + + (* Or-not. *) + Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32; + Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64]; + Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64; + ] + +let reinterp = + let elems = P8 :: P16 :: F32 :: su_8_64 in + List.fold_right + (fun convto acc -> + let types = List.fold_right + (fun convfrom acc -> + if convfrom <> convto then + Cast (convto, convfrom) :: acc + else + acc) + elems + [] + in + let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |], + "vreinterpret", conv_1, types + and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |], + "vreinterpretQ", conv_1, types in + dconv :: qconv :: acc) + elems + [] + +(* Output routines. *) + +let rec string_of_elt = function + S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" + | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" + | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" + | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" + | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" + | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b + | NoElts -> failwith "No elts" + +let string_of_elt_dots elt = + match elt with + Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b + | _ -> string_of_elt elt + +let string_of_vectype vt = + let rec name affix = function + T_int8x8 -> affix "int8x8" + | T_int8x16 -> affix "int8x16" + | T_int16x4 -> affix "int16x4" + | T_int16x8 -> affix "int16x8" + | T_int32x2 -> affix "int32x2" + | T_int32x4 -> affix "int32x4" + | T_int64x1 -> affix "int64x1" + | T_int64x2 -> affix "int64x2" + | T_uint8x8 -> affix "uint8x8" + | T_uint8x16 -> affix "uint8x16" + | T_uint16x4 -> affix "uint16x4" + | T_uint16x8 -> affix "uint16x8" + | T_uint32x2 -> affix "uint32x2" + | T_uint32x4 -> affix "uint32x4" + | T_uint64x1 -> affix "uint64x1" + | T_uint64x2 -> affix "uint64x2" + | T_float32x2 -> affix "float32x2" + | T_float32x4 -> affix "float32x4" + | T_poly8x8 -> affix "poly8x8" + | T_poly8x16 -> affix "poly8x16" + | T_poly16x4 -> affix "poly16x4" + | T_poly16x8 -> affix "poly16x8" + | T_int8 -> affix "int8" + | T_int16 -> affix "int16" + | T_int32 -> affix "int32" + | T_int64 -> affix "int64" + | T_uint8 -> affix "uint8" + | T_uint16 -> affix "uint16" + | T_uint32 -> affix "uint32" + | T_uint64 -> affix "uint64" + | T_poly8 -> affix "poly8" + | T_poly16 -> affix "poly16" + | T_float32 -> affix "float32" + | T_immediate _ -> "const int" + | T_void -> "void" + | T_intQI -> "__builtin_neon_qi" + | T_intHI -> "__builtin_neon_hi" + | T_intSI -> "__builtin_neon_si" + | T_intDI -> "__builtin_neon_di" + | T_floatSF -> "__builtin_neon_sf" + | T_arrayof (num, base) -> + let basename = name (fun x -> x) base in + affix (Printf.sprintf "%sx%d" basename num) + | T_ptrto x -> + let basename = name affix x in + Printf.sprintf "%s *" basename + | T_const x -> + let basename = name affix x in + Printf.sprintf "const %s" basename + in + name (fun x -> x ^ "_t") vt + +let string_of_inttype = function + B_TImode -> "__builtin_neon_ti" + | B_EImode -> "__builtin_neon_ei" + | B_OImode -> "__builtin_neon_oi" + | B_CImode -> "__builtin_neon_ci" + | B_XImode -> "__builtin_neon_xi" + +let string_of_mode = function + V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" + | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" + | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" + | SF -> "sf" + +(* Use uppercase chars for letters which form part of the intrinsic name, but + should be omitted from the builtin name (the info is passed in an extra + argument, instead). *) +let intrinsic_name name = String.lowercase name + +(* Allow the name of the builtin to be overridden by things (e.g. Flipped) + found in the features list. *) +let builtin_name features name = + let name = List.fold_right + (fun el name -> + match el with + Flipped x | Builtin_name x -> x + | _ -> name) + features name in + let islower x = let str = String.make 1 x in (String.lowercase str) = str + and buf = Buffer.create (String.length name) in + String.iter (fun c -> if islower c then Buffer.add_char buf c) name; + Buffer.contents buf + +(* Transform an arity into a list of strings. *) +let strings_of_arity a = + match a with + | Arity0 vt -> [string_of_vectype vt] + | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] + | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; + string_of_vectype vt2; + string_of_vectype vt3] + | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; + string_of_vectype vt2; + string_of_vectype vt3; + string_of_vectype vt4] + | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; + string_of_vectype vt2; + string_of_vectype vt3; + string_of_vectype vt4; + string_of_vectype vt5] + +(* Suffixes on the end of builtin names that are to be stripped in order + to obtain the name used as an instruction. They are only stripped if + preceded immediately by an underscore. *) +let suffixes_to_strip = [ "n"; "lane"; "dup" ] + +(* Get the possible names of an instruction corresponding to a "name" from the + ops table. This is done by getting the equivalent builtin name and + stripping any suffixes from the list at the top of this file, unless + the features list presents with an Instruction_name entry, in which + case that is used; or unless the features list presents with a Flipped + entry, in which case that is used. If both such entries are present, + the first in the list will be chosen. *) +let get_insn_names features name = + let names = try + begin + match List.find (fun feature -> match feature with + Instruction_name _ -> true + | Flipped _ -> true + | _ -> false) features + with + Instruction_name names -> names + | Flipped name -> [name] + | _ -> assert false + end + with Not_found -> [builtin_name features name] + in + begin + List.map (fun name' -> + try + let underscore = String.rindex name' '_' in + let our_suffix = String.sub name' (underscore + 1) + ((String.length name') - underscore - 1) + in + let rec strip remaining_suffixes = + match remaining_suffixes with + [] -> name' + | s::ss when our_suffix = s -> String.sub name' 0 underscore + | _::ss -> strip ss + in + strip suffixes_to_strip + with (Not_found | Invalid_argument _) -> name') names + end + +(* Apply a function to each element of a list and then comma-separate + the resulting strings. *) +let rec commas f elts acc = + match elts with + [] -> acc + | [elt] -> acc ^ (f elt) + | elt::elts -> + commas f elts (acc ^ (f elt) ^ ", ") + +(* Given a list of features and the shape specified in the "ops" table, apply + a function to each possible shape that the instruction may have. + By default, this is the "shape" entry in "ops". If the features list + contains a Disassembles_as entry, the shapes contained in that entry are + mapped to corresponding outputs and returned in a list. If there is more + than one Disassembles_as entry, only the first is used. *) +let analyze_all_shapes features shape f = + try + match List.find (fun feature -> + match feature with Disassembles_as _ -> true + | _ -> false) + features with + Disassembles_as shapes -> List.map f shapes + | _ -> assert false + with Not_found -> [f shape] + diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h new file mode 100644 index 000000000..9cf186b33 --- /dev/null +++ b/gcc/config/arm/netbsd-elf.h @@ -0,0 +1,157 @@ +/* Definitions of target machine for GNU compiler, NetBSD/arm ELF version. + Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Contributed by Wasabi Systems, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (NetBSD/arm ELF)", stderr); + +/* arm.h defaults to ARM6 CPU. */ + +/* This defaults us to little-endian. */ +#ifndef TARGET_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT 0 +#endif + +#undef MULTILIB_DEFAULTS + +/* Default it to use ATPCS with soft-VFP. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (MASK_APCS_FRAME \ + | TARGET_ENDIAN_DEFAULT) + +#undef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_ATPCS + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + } \ + while (0) + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC + +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC \ + "-matpcs %{fpic|fpie:-k} %{fPIC|fPIE:-k}" + +/* Default to full VFP if -mhard-float is specified. */ +#undef SUBTARGET_ASM_FLOAT_SPEC +#define SUBTARGET_ASM_FLOAT_SPEC \ + "%{mhard-float:{!mfpu=*:-mfpu=vfp}} \ + %{mfloat-abi=hard:{!mfpu=*:-mfpu=vfp}}" + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "subtarget_extra_asm_spec", SUBTARGET_EXTRA_ASM_SPEC }, \ + { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, + +#define NETBSD_ENTRY_POINT "__start" + +#undef LINK_SPEC +#define LINK_SPEC \ + "-X %{mbig-endian:-EB} %{mlittle-endian:-EL} \ + %(netbsd_link_spec)" + +/* Make GCC agree with . */ + +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* We don't have any limit on the length as out debugger is GDB. */ +#undef DBX_CONTIN_LENGTH + +/* NetBSD does its profiling differently to the Acorn compiler. We + don't need a word following the mcount call; and to skip it + requires either an assembly stub or use of fomit-frame-pointer when + compiling the profiling functions. Since we break Acorn CC + compatibility below a little more won't hurt. */ + +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM,LABELNO) \ +{ \ + asm_fprintf (STREAM, "\tmov\t%Rip, %Rlr\n"); \ + asm_fprintf (STREAM, "\tbl\t__mcount%s\n", \ + (TARGET_ARM && NEED_PLT_RELOC) \ + ? "(PLT)" : ""); \ +} + +/* VERY BIG NOTE: Change of structure alignment for NetBSD/arm. + There are consequences you should be aware of... + + Normally GCC/arm uses a structure alignment of 32 for compatibility + with armcc. This means that structures are padded to a word + boundary. However this causes problems with bugged NetBSD kernel + code (possibly userland code as well - I have not checked every + binary). The nature of this bugged code is to rely on sizeof() + returning the correct size of various structures rounded to the + nearest byte (SCSI and ether code are two examples, the vm system + is another). This code breaks when the structure alignment is 32 + as sizeof() will report a word=rounded size. By changing the + structure alignment to 8. GCC will conform to what is expected by + NetBSD. + + This has several side effects that should be considered. + 1. Structures will only be aligned to the size of the largest member. + i.e. structures containing only bytes will be byte aligned. + structures containing shorts will be half word aligned. + structures containing ints will be word aligned. + + This means structures should be padded to a word boundary if + alignment of 32 is required for byte structures etc. + + 2. A potential performance penalty may exist if strings are no longer + word aligned. GCC will not be able to use word load/stores to copy + short strings. + + This modification is not encouraged but with the present state of the + NetBSD source tree it is currently the only solution that meets the + requirements. */ + +#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8 + +/* Clear the instruction cache from `BEG' to `END'. This makes a + call to the ARM_SYNC_ICACHE architecture specific syscall. */ +#define CLEAR_INSN_CACHE(BEG, END) \ +do \ + { \ + extern int sysarch(int number, void *args); \ + struct \ + { \ + unsigned int addr; \ + int len; \ + } s; \ + s.addr = (unsigned int)(BEG); \ + s.len = (END) - (BEG); \ + (void) sysarch (0, &s); \ + } \ +while (0) + +#undef FPUTYPE_DEFAULT +#define FPUTYPE_DEFAULT "vfp" + diff --git a/gcc/config/arm/netbsd.h b/gcc/config/arm/netbsd.h new file mode 100644 index 000000000..4a1adbae9 --- /dev/null +++ b/gcc/config/arm/netbsd.h @@ -0,0 +1,150 @@ +/* NetBSD/arm a.out version. + Copyright (C) 1993, 1994, 1997, 1998, 2003, 2004, 2005, 2007, 2008, 2010 + Free Software Foundation, Inc. + Contributed by Mark Brinicombe (amb@physig.ph.kcl.ac.uk) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/NetBSD)", stderr); + +/* Unsigned chars produces much better code than signed. */ +#define DEFAULT_SIGNED_CHAR 0 + +/* Since we always use GAS as our assembler we support stabs. */ +#define DBX_DEBUGGING_INFO 1 + +/*#undef ASM_DECLARE_FUNCTION_NAME*/ + +/* ARM6 family default cpu. */ +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6 + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) + +/* Some defines for CPP. + arm32 is the NetBSD port name, so we always define arm32 and __arm32__. */ +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + NETBSD_OS_CPP_BUILTINS_AOUT(); \ + builtin_define_std ("arm32"); \ + builtin_define_std ("unix"); \ + builtin_define_std ("riscbsd"); \ + } while (0) + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_AOUT }, + +#undef CPP_SPEC +#define CPP_SPEC "\ +%(cpp_cpu_arch) %(cpp_float) %(cpp_endian) %(netbsd_cpp_spec) \ +" + +/* Because TARGET_DEFAULT sets MASK_SOFT_FLOAT */ +#undef CPP_FLOAT_DEFAULT_SPEC +#define CPP_FLOAT_DEFAULT_SPEC "-D__SOFTFP__" + +/* Pass -X to the linker so that it will strip symbols starting with 'L' */ +#undef LINK_SPEC +#define LINK_SPEC "-X %(netbsd_link_spec)" + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* We don't have any limit on the length as out debugger is GDB. */ +#undef DBX_CONTIN_LENGTH + +/* NetBSD does its profiling differently to the Acorn compiler. We + don't need a word following the mcount call; and to skip it + requires either an assembly stub or use of fomit-frame-pointer when + compiling the profiling functions. Since we break Acorn CC + compatibility below a little more won't hurt. */ + +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM,LABELNO) \ +{ \ + fprintf(STREAM, "\tmov\t%sip, %slr\n", REGISTER_PREFIX, REGISTER_PREFIX); \ + fprintf(STREAM, "\tbl\tmcount\n"); \ +} + +/* On the ARM `@' introduces a comment, so we must use something else + for .type directives. */ +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "%%%s" + +/* NetBSD uses the old PCC style aggregate returning conventions. */ +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 1 + +/* Although not normally relevant (since by default, all aggregates + are returned in memory) compiling some parts of libc requires + non-APCS style struct returns. */ +#undef TARGET_RETURN_IN_MEMORY + +/* VERY BIG NOTE : Change of structure alignment for RiscBSD. + There are consequences you should be aware of... + + Normally GCC/arm uses a structure alignment of 32 for compatibility + with armcc. This means that structures are padded to a word + boundary. However this causes problems with bugged NetBSD kernel + code (possibly userland code as well - I have not checked every + binary). The nature of this bugged code is to rely on sizeof() + returning the correct size of various structures rounded to the + nearest byte (SCSI and ether code are two examples, the vm system + is another). This code breaks when the structure alignment is 32 + as sizeof() will report a word=rounded size. By changing the + structure alignment to 8. GCC will conform to what is expected by + NetBSD. + + This has several side effects that should be considered. + 1. Structures will only be aligned to the size of the largest member. + i.e. structures containing only bytes will be byte aligned. + structures containing shorts will be half word aligned. + structures containing ints will be word aligned. + + This means structures should be padded to a word boundary if + alignment of 32 is required for byte structures etc. + + 2. A potential performance penalty may exist if strings are no longer + word aligned. GCC will not be able to use word load/stores to copy + short strings. + + This modification is not encouraged but with the present state of the + NetBSD source tree it is currently the only solution that meets the + requirements. */ +#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8 + +/* Clear the instruction cache from `BEG' to `END'. This makes a + call to the ARM32_SYNC_ICACHE architecture specific syscall. */ +#define CLEAR_INSN_CACHE(BEG, END) \ +{ \ + extern int sysarch(int number, void *args); \ + struct { \ + unsigned int addr; \ + int len; \ + } s; \ + s.addr = (unsigned int)(BEG); \ + s.len = (END) - (BEG); \ + (void)sysarch(0, &s); \ +} diff --git a/gcc/config/arm/pe.c b/gcc/config/arm/pe.c new file mode 100644 index 000000000..3d9efd578 --- /dev/null +++ b/gcc/config/arm/pe.c @@ -0,0 +1,257 @@ +/* Routines for GCC for ARM/pe. + Copyright (C) 1995, 1996, 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2010 + Free Software Foundation, Inc. + Contributed by Doug Evans (dje@cygnus.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "output.h" +#include "flags.h" +#include "tree.h" +#include "expr.h" +#include "diagnostic-core.h" +#include "tm_p.h" + +extern int current_function_anonymous_args; + + +/* Return nonzero if DECL is a dllexport'd object. */ + +tree current_class_type; /* FIXME */ + +int +arm_dllexport_p (tree decl) +{ + tree exp; + + if (TREE_CODE (decl) != VAR_DECL + && TREE_CODE (decl) != FUNCTION_DECL) + return 0; + exp = lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)); + if (exp) + return 1; + + return 0; +} + +/* Return nonzero if DECL is a dllimport'd object. */ + +int +arm_dllimport_p (tree decl) +{ + tree imp; + + if (TREE_CODE (decl) == FUNCTION_DECL + && TARGET_NOP_FUN_DLLIMPORT) + return 0; + + if (TREE_CODE (decl) != VAR_DECL + && TREE_CODE (decl) != FUNCTION_DECL) + return 0; + imp = lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)); + if (imp) + return 1; + + return 0; +} + +/* Return nonzero if SYMBOL is marked as being dllexport'd. */ + +int +arm_dllexport_name_p (const char *symbol) +{ + return symbol[0] == ARM_PE_FLAG_CHAR && symbol[1] == 'e' && symbol[2] == '.'; +} + +/* Return nonzero if SYMBOL is marked as being dllimport'd. */ + +int +arm_dllimport_name_p (const char *symbol) +{ + return symbol[0] == ARM_PE_FLAG_CHAR && symbol[1] == 'i' && symbol[2] == '.'; +} + +/* Mark a DECL as being dllexport'd. + Note that we override the previous setting (e.g.: dllimport). */ + +void +arm_mark_dllexport (tree decl) +{ + const char * oldname; + char * newname; + rtx rtlname; + tree idp; + + rtlname = XEXP (DECL_RTL (decl), 0); + if (GET_CODE (rtlname) == MEM) + rtlname = XEXP (rtlname, 0); + gcc_assert (GET_CODE (rtlname) == SYMBOL_REF); + oldname = XSTR (rtlname, 0); + + if (arm_dllimport_name_p (oldname)) + oldname += 9; + else if (arm_dllexport_name_p (oldname)) + return; /* already done */ + + newname = XALLOCAVEC (char, strlen (oldname) + 4); + sprintf (newname, "%ce.%s", ARM_PE_FLAG_CHAR, oldname); + + /* We pass newname through get_identifier to ensure it has a unique + address. RTL processing can sometimes peek inside the symbol ref + and compare the string's addresses to see if two symbols are + identical. */ + /* ??? At least I think that's why we do this. */ + idp = get_identifier (newname); + + XEXP (DECL_RTL (decl), 0) = + gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); +} + +/* Mark a DECL as being dllimport'd. */ + +void +arm_mark_dllimport (tree decl) +{ + const char * oldname; + char * newname; + tree idp; + rtx rtlname, newrtl; + + rtlname = XEXP (DECL_RTL (decl), 0); + + if (GET_CODE (rtlname) == MEM) + rtlname = XEXP (rtlname, 0); + gcc_assert (GET_CODE (rtlname) == SYMBOL_REF); + oldname = XSTR (rtlname, 0); + + gcc_assert (!arm_dllexport_name_p (oldname)); + if (arm_dllimport_name_p (oldname)) + return; /* already done */ + + /* ??? One can well ask why we're making these checks here, + and that would be a good question. */ + + /* Imported variables can't be initialized. */ + if (TREE_CODE (decl) == VAR_DECL + && !DECL_VIRTUAL_P (decl) + && DECL_INITIAL (decl)) + { + error ("initialized variable %q+D is marked dllimport", decl); + return; + } + /* Nor can they be static. */ + if (TREE_CODE (decl) == VAR_DECL + /* ??? Is this test for vtables needed? */ + && !DECL_VIRTUAL_P (decl) + && 0 /*???*/) + { + error ("static variable %q+D is marked dllimport", decl); + return; + } + + /* `extern' needn't be specified with dllimport. + Specify `extern' now and hope for the best. Sigh. */ + if (TREE_CODE (decl) == VAR_DECL + /* ??? Is this test for vtables needed? */ + && !DECL_VIRTUAL_P (decl)) + { + DECL_EXTERNAL (decl) = 1; + TREE_PUBLIC (decl) = 1; + } + + newname = XALLOCAVEC (char, strlen (oldname) + 11); + sprintf (newname, "%ci.__imp_%s", ARM_PE_FLAG_CHAR, oldname); + + /* We pass newname through get_identifier to ensure it has a unique + address. RTL processing can sometimes peek inside the symbol ref + and compare the string's addresses to see if two symbols are + identical. */ + /* ??? At least I think that's why we do this. */ + idp = get_identifier (newname); + + newrtl = gen_rtx_MEM (Pmode, + gen_rtx_SYMBOL_REF (Pmode, + IDENTIFIER_POINTER (idp))); + XEXP (DECL_RTL (decl), 0) = newrtl; +} + +void +arm_pe_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED) +{ + /* This bit is copied from arm_encode_section_info. */ + if (optimize > 0 && TREE_CONSTANT (decl)) + SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; + + /* Mark the decl so we can tell from the rtl whether the object is + dllexport'd or dllimport'd. */ + if (arm_dllexport_p (decl)) + arm_mark_dllexport (decl); + else if (arm_dllimport_p (decl)) + arm_mark_dllimport (decl); + /* It might be that DECL has already been marked as dllimport, but a + subsequent definition nullified that. The attribute is gone but + DECL_RTL still has @i.__imp_foo. We need to remove that. */ + else if ((TREE_CODE (decl) == FUNCTION_DECL + || TREE_CODE (decl) == VAR_DECL) + && DECL_RTL (decl) != NULL_RTX + && GET_CODE (DECL_RTL (decl)) == MEM + && GET_CODE (XEXP (DECL_RTL (decl), 0)) == MEM + && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF + && arm_dllimport_name_p (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0))) + { + const char *oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0); + tree idp = get_identifier (oldname + 9); + rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); + + XEXP (DECL_RTL (decl), 0) = newrtl; + + /* We previously set TREE_PUBLIC and DECL_EXTERNAL. + ??? We leave these alone for now. */ + } +} + +void +arm_pe_unique_section (tree decl, int reloc) +{ + int len; + const char * name; + char * string; + const char * prefix; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = arm_strip_name_encoding (name); + + /* The object is put in, for example, section .text$foo. + The linker will then ultimately place them in .text + (everything from the $ on is stripped). */ + if (TREE_CODE (decl) == FUNCTION_DECL) + prefix = ".text$"; + else if (decl_readonly_section (decl, reloc)) + prefix = ".rdata$"; + else + prefix = ".data$"; + len = strlen (name) + strlen (prefix); + string = XALLOCAVEC (char, len + 1); + sprintf (string, "%s%s", prefix, name); + + DECL_SECTION_NAME (decl) = build_string (len, string); +} diff --git a/gcc/config/arm/pe.h b/gcc/config/arm/pe.h new file mode 100644 index 000000000..009c4fe43 --- /dev/null +++ b/gcc/config/arm/pe.h @@ -0,0 +1,148 @@ +/* Definitions of target machine for GNU compiler, for ARM with PE obj format. + Copyright (C) 1995, 1996, 1999, 2000, 2002, 2003, 2004, 2005, 2007 + Free Software Foundation, Inc. + Contributed by Doug Evans (dje@cygnus.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Enable PE specific code. */ +#define ARM_PE 1 + +#define ARM_PE_FLAG_CHAR '@' + +/* Ensure that @x. will be stripped from the function name. */ +#undef SUBTARGET_NAME_ENCODING_LENGTHS +#define SUBTARGET_NAME_ENCODING_LENGTHS \ + case ARM_PE_FLAG_CHAR: return 3; + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/pe)", stderr) + +/* Get tree.c to declare a target-specific specialization of + merge_decl_attributes. */ +#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1 + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__pe__" + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_NOP_FUN_DLLIMPORT) + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork" } + +#undef WCHAR_TYPE +#define WCHAR_TYPE "short unsigned int" +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 16 + +/* r11 is fixed. */ +#undef SUBTARGET_CONDITIONAL_REGISTER_USAGE +#define SUBTARGET_CONDITIONAL_REGISTER_USAGE \ + fixed_regs [11] = 1; \ + call_used_regs [11] = 1; + + +/* PE/COFF uses explicit import from shared libraries. */ +#define MULTIPLE_SYMBOL_SPACES 1 + +#define TARGET_ASM_UNIQUE_SECTION arm_pe_unique_section +#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section + +#define SUPPORTS_ONE_ONLY 1 + +/* Switch into a generic section. */ +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION default_pe_asm_named_section + +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + +/* Output a reference to a label. */ +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ + asm_fprintf (STREAM, "%U%s", arm_strip_name_encoding (NAME)) + +/* Output a function definition label. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ + do \ + { \ + if (arm_dllexport_name_p (NAME)) \ + { \ + drectve_section (); \ + fprintf (STREAM, "\t.ascii \" -export:%s\"\n", \ + arm_strip_name_encoding (NAME)); \ + switch_to_section (function_section (DECL)); \ + } \ + ARM_DECLARE_FUNCTION_NAME (STREAM, NAME, DECL); \ + if (TARGET_THUMB) \ + fprintf (STREAM, "\t.code 16\n"); \ + ASM_OUTPUT_LABEL (STREAM, NAME); \ + } \ + while (0) + +/* Output a common block. */ +#undef ASM_OUTPUT_COMMON +#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED) \ + do \ + { \ + if (arm_dllexport_name_p (NAME)) \ + { \ + drectve_section (); \ + fprintf ((STREAM), "\t.ascii \" -export:%s\"\n",\ + arm_strip_name_encoding (NAME)); \ + } \ + if (! arm_dllimport_name_p (NAME)) \ + { \ + fprintf ((STREAM), "\t.comm\t"); \ + assemble_name ((STREAM), (NAME)); \ + asm_fprintf ((STREAM), ", %d\t%@ %d\n", \ + (int)(ROUNDED), (int)(SIZE)); \ + } \ + } \ + while (0) + +/* Output the label for an initialized variable. */ +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \ + do \ + { \ + if (arm_dllexport_name_p (NAME)) \ + { \ + section *save_section = in_section; \ + drectve_section (); \ + fprintf (STREAM, "\t.ascii \" -export:%s\"\n",\ + arm_strip_name_encoding (NAME)); \ + switch_to_section (save_section); \ + } \ + ASM_OUTPUT_LABEL ((STREAM), (NAME)); \ + } \ + while (0) + +/* Support the ctors/dtors and other sections. */ + +#define DRECTVE_SECTION_ASM_OP "\t.section .drectve" + +#define drectve_section() \ + (fprintf (asm_out_file, "%s\n", DRECTVE_SECTION_ASM_OP), \ + in_section = NULL) diff --git a/gcc/config/arm/pe.opt b/gcc/config/arm/pe.opt new file mode 100644 index 000000000..560a52a81 --- /dev/null +++ b/gcc/config/arm/pe.opt @@ -0,0 +1,23 @@ +; PE-specific options for the ARM port + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mnop-fun-dllimport +Target Report Mask(NOP_FUN_DLLIMPORT) +Ignore dllimport attribute for functions diff --git a/gcc/config/arm/pr-support.c b/gcc/config/arm/pr-support.c new file mode 100644 index 000000000..deee661e2 --- /dev/null +++ b/gcc/config/arm/pr-support.c @@ -0,0 +1,401 @@ +/* ARM EABI compliant unwinding routines + Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc. + Contributed by Paul Brook + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "unwind.h" + +/* We add a prototype for abort here to avoid creating a dependency on + target headers. */ +extern void abort (void); + +typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */ + +/* Misc constants. */ +#define R_IP 12 +#define R_SP 13 +#define R_LR 14 +#define R_PC 15 + +#define uint32_highbit (((_uw) 1) << 31) + +void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp); + +/* Unwind descriptors. */ + +typedef struct +{ + _uw16 length; + _uw16 offset; +} EHT16; + +typedef struct +{ + _uw length; + _uw offset; +} EHT32; + +/* Calculate the address encoded by a 31-bit self-relative offset at address + P. Copy of routine in unwind-arm.c. */ + +static inline _uw +selfrel_offset31 (const _uw *p) +{ + _uw offset; + + offset = *p; + /* Sign extend to 32 bits. */ + if (offset & (1 << 30)) + offset |= 1u << 31; + + return offset + (_uw) p; +} + + +/* Personality routine helper functions. */ + +#define CODE_FINISH (0xb0) + +/* Return the next byte of unwinding information, or CODE_FINISH if there is + no data remaining. */ +static inline _uw8 +next_unwind_byte (__gnu_unwind_state * uws) +{ + _uw8 b; + + if (uws->bytes_left == 0) + { + /* Load another word */ + if (uws->words_left == 0) + return CODE_FINISH; /* Nothing left. */ + uws->words_left--; + uws->data = *(uws->next++); + uws->bytes_left = 3; + } + else + uws->bytes_left--; + + /* Extract the most significant byte. */ + b = (uws->data >> 24) & 0xff; + uws->data <<= 8; + return b; +} + +/* Execute the unwinding instructions described by UWS. */ +_Unwind_Reason_Code +__gnu_unwind_execute (_Unwind_Context * context, __gnu_unwind_state * uws) +{ + _uw op; + int set_pc; + _uw reg; + + set_pc = 0; + for (;;) + { + op = next_unwind_byte (uws); + if (op == CODE_FINISH) + { + /* If we haven't already set pc then copy it from lr. */ + if (!set_pc) + { + _Unwind_VRS_Get (context, _UVRSC_CORE, R_LR, _UVRSD_UINT32, + ®); + _Unwind_VRS_Set (context, _UVRSC_CORE, R_PC, _UVRSD_UINT32, + ®); + set_pc = 1; + } + /* Drop out of the loop. */ + break; + } + if ((op & 0x80) == 0) + { + /* vsp = vsp +- (imm6 << 2 + 4). */ + _uw offset; + + offset = ((op & 0x3f) << 2) + 4; + _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + if (op & 0x40) + reg -= offset; + else + reg += offset; + _Unwind_VRS_Set (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + continue; + } + + if ((op & 0xf0) == 0x80) + { + op = (op << 8) | next_unwind_byte (uws); + if (op == 0x8000) + { + /* Refuse to unwind. */ + return _URC_FAILURE; + } + /* Pop r4-r15 under mask. */ + op = (op << 4) & 0xfff0; + if (_Unwind_VRS_Pop (context, _UVRSC_CORE, op, _UVRSD_UINT32) + != _UVRSR_OK) + return _URC_FAILURE; + if (op & (1 << R_PC)) + set_pc = 1; + continue; + } + if ((op & 0xf0) == 0x90) + { + op &= 0xf; + if (op == 13 || op == 15) + /* Reserved. */ + return _URC_FAILURE; + /* vsp = r[nnnn]. */ + _Unwind_VRS_Get (context, _UVRSC_CORE, op, _UVRSD_UINT32, ®); + _Unwind_VRS_Set (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, ®); + continue; + } + if ((op & 0xf0) == 0xa0) + { + /* Pop r4-r[4+nnn], [lr]. */ + _uw mask; + + mask = (0xff0 >> (7 - (op & 7))) & 0xff0; + if (op & 8) + mask |= (1 << R_LR); + if (_Unwind_VRS_Pop (context, _UVRSC_CORE, mask, _UVRSD_UINT32) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xf0) == 0xb0) + { + /* op == 0xb0 already handled. */ + if (op == 0xb1) + { + op = next_unwind_byte (uws); + if (op == 0 || ((op & 0xf0) != 0)) + /* Spare. */ + return _URC_FAILURE; + /* Pop r0-r4 under mask. */ + if (_Unwind_VRS_Pop (context, _UVRSC_CORE, op, _UVRSD_UINT32) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xb2) + { + /* vsp = vsp + 0x204 + (uleb128 << 2). */ + int shift; + + _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, + ®); + op = next_unwind_byte (uws); + shift = 2; + while (op & 0x80) + { + reg += ((op & 0x7f) << shift); + shift += 7; + op = next_unwind_byte (uws); + } + reg += ((op & 0x7f) << shift) + 0x204; + _Unwind_VRS_Set (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, + ®); + continue; + } + if (op == 0xb3) + { + /* Pop VFP registers with fldmx. */ + op = next_unwind_byte (uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_VFPX) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xfc) == 0xb4) + { + /* Pop FPA E[4]-E[4+nn]. */ + op = 0x40000 | ((op & 3) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_FPA, op, _UVRSD_FPAX) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + /* op & 0xf8 == 0xb8. */ + /* Pop VFP D[8]-D[8+nnn] with fldmx. */ + op = 0x80000 | ((op & 7) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_VFPX) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xf0) == 0xc0) + { + if (op == 0xc6) + { + /* Pop iWMMXt D registers. */ + op = next_unwind_byte (uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_WMMXD, op, _UVRSD_UINT64) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xc7) + { + op = next_unwind_byte (uws); + if (op == 0 || (op & 0xf0) != 0) + /* Spare. */ + return _URC_FAILURE; + /* Pop iWMMXt wCGR{3,2,1,0} under mask. */ + if (_Unwind_VRS_Pop (context, _UVRSC_WMMXC, op, _UVRSD_UINT32) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if ((op & 0xf8) == 0xc0) + { + /* Pop iWMMXt wR[10]-wR[10+nnn]. */ + op = 0xa0000 | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_WMMXD, op, _UVRSD_UINT64) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + if (op == 0xc8) + { +#ifndef __VFP_FP__ + /* Pop FPA registers. */ + op = next_unwind_byte (uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_FPA, op, _UVRSD_FPAX) + != _UVRSR_OK) + return _URC_FAILURE; + continue; +#else + /* Pop VFPv3 registers D[16+ssss]-D[16+ssss+cccc] with vldm. */ + op = next_unwind_byte (uws); + op = (((op & 0xf0) + 16) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE) + != _UVRSR_OK) + return _URC_FAILURE; + continue; +#endif + } + if (op == 0xc9) + { + /* Pop VFP registers with fldmd. */ + op = next_unwind_byte (uws); + op = ((op & 0xf0) << 12) | ((op & 0xf) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + /* Spare. */ + return _URC_FAILURE; + } + if ((op & 0xf8) == 0xd0) + { + /* Pop VFP D[8]-D[8+nnn] with fldmd. */ + op = 0x80000 | ((op & 7) + 1); + if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE) + != _UVRSR_OK) + return _URC_FAILURE; + continue; + } + /* Spare. */ + return _URC_FAILURE; + } + return _URC_OK; +} + + +/* Execute the unwinding instructions associated with a frame. UCBP and + CONTEXT are the current exception object and virtual CPU state + respectively. */ + +_Unwind_Reason_Code +__gnu_unwind_frame (_Unwind_Control_Block * ucbp, _Unwind_Context * context) +{ + _uw *ptr; + __gnu_unwind_state uws; + + ptr = (_uw *) ucbp->pr_cache.ehtp; + /* Skip over the personality routine address. */ + ptr++; + /* Setup the unwinder state. */ + uws.data = (*ptr) << 8; + uws.next = ptr + 1; + uws.bytes_left = 3; + uws.words_left = ((*ptr) >> 24) & 0xff; + + return __gnu_unwind_execute (context, &uws); +} + +/* Get the _Unwind_Control_Block from an _Unwind_Context. */ + +static inline _Unwind_Control_Block * +unwind_UCB_from_context (_Unwind_Context * context) +{ + return (_Unwind_Control_Block *) _Unwind_GetGR (context, R_IP); +} + +/* Get the start address of the function being unwound. */ + +_Unwind_Ptr +_Unwind_GetRegionStart (_Unwind_Context * context) +{ + _Unwind_Control_Block *ucbp; + + ucbp = unwind_UCB_from_context (context); + return (_Unwind_Ptr) ucbp->pr_cache.fnstart; +} + +/* Find the Language specific exception data. */ + +void * +_Unwind_GetLanguageSpecificData (_Unwind_Context * context) +{ + _Unwind_Control_Block *ucbp; + _uw *ptr; + + /* Get a pointer to the exception table entry. */ + ucbp = unwind_UCB_from_context (context); + ptr = (_uw *) ucbp->pr_cache.ehtp; + /* Skip the personality routine address. */ + ptr++; + /* Skip the unwind opcodes. */ + ptr += (((*ptr) >> 24) & 0xff) + 1; + + return ptr; +} + + +/* These two should never be used. */ + +_Unwind_Ptr +_Unwind_GetDataRelBase (_Unwind_Context *context __attribute__ ((unused))) +{ + abort (); +} + +_Unwind_Ptr +_Unwind_GetTextRelBase (_Unwind_Context *context __attribute__ ((unused))) +{ + abort (); +} diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md new file mode 100644 index 000000000..e34b46da0 --- /dev/null +++ b/gcc/config/arm/predicates.md @@ -0,0 +1,688 @@ +;; Predicate definitions for ARM and Thumb +;; Copyright (C) 2004, 2007, 2008, 2010 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_predicate "s_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + /* We don't consider registers whose class is NO_REGS + to be a register operand. */ + /* XXX might have to check for lo regs only for thumb ??? */ + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) != NO_REGS)); +}) + +;; Any hard register. +(define_predicate "arm_hard_register_operand" + (match_code "reg") +{ + return REGNO (op) < FIRST_PSEUDO_REGISTER; +}) + +;; A low register. +(define_predicate "low_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) <= LAST_LO_REGNUM"))) + +;; A low register or const_int. +(define_predicate "low_reg_or_int_operand" + (ior (match_code "const_int") + (match_operand 0 "low_register_operand"))) + +;; Any core register, or any pseudo. */ +(define_predicate "arm_general_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return (GET_CODE (op) == REG + && (REGNO (op) <= LAST_ARM_REGNUM + || REGNO (op) >= FIRST_PSEUDO_REGISTER)); +}) + +(define_predicate "f_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + /* We don't consider registers whose class is NO_REGS + to be a register operand. */ + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == FPA_REGS)); +}) + +(define_predicate "vfp_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + /* We don't consider registers whose class is NO_REGS + to be a register operand. */ + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == VFP_D0_D7_REGS + || REGNO_REG_CLASS (REGNO (op)) == VFP_LO_REGS + || (TARGET_VFPD32 + && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS))); +}) + +(define_special_predicate "subreg_lowpart_operator" + (and (match_code "subreg") + (match_test "subreg_lowpart_p (op)"))) + +;; Reg, subreg(reg) or const_int. +(define_predicate "reg_or_int_operand" + (ior (match_code "const_int") + (match_operand 0 "s_register_operand"))) + +(define_predicate "arm_immediate_operand" + (and (match_code "const_int") + (match_test "const_ok_for_arm (INTVAL (op))"))) + +;; A constant value which fits into two instructions, each taking +;; an arithmetic constant operand for one of the words. +(define_predicate "arm_immediate_di_operand" + (and (match_code "const_int,const_double") + (match_test "arm_const_double_by_immediates (op)"))) + +(define_predicate "arm_neg_immediate_operand" + (and (match_code "const_int") + (match_test "const_ok_for_arm (-INTVAL (op))"))) + +(define_predicate "arm_not_immediate_operand" + (and (match_code "const_int") + (match_test "const_ok_for_arm (~INTVAL (op))"))) + +(define_predicate "const0_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 0"))) + +;; Something valid on the RHS of an ARM data-processing instruction +(define_predicate "arm_rhs_operand" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "arm_immediate_operand"))) + +(define_predicate "arm_rhsm_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "memory_operand"))) + +(define_predicate "shift_amount_operand" + (ior (and (match_test "TARGET_ARM") + (match_operand 0 "s_register_operand")) + (match_operand 0 "const_int_operand"))) + +(define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "arm_neg_immediate_operand"))) + +(define_predicate "arm_addimm_operand" + (ior (match_operand 0 "arm_immediate_operand") + (match_operand 0 "arm_neg_immediate_operand"))) + +(define_predicate "arm_not_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "arm_not_immediate_operand"))) + +(define_predicate "arm_di_operand" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "arm_immediate_di_operand"))) + +;; True if the operand is a memory reference which contains an +;; offsettable address. +(define_predicate "offsettable_memory_operand" + (and (match_code "mem") + (match_test + "offsettable_address_p (reload_completed | reload_in_progress, + mode, XEXP (op, 0))"))) + +;; True if the operand is a memory operand that does not have an +;; automodified base register (and thus will not generate output reloads). +(define_predicate "call_memory_operand" + (and (match_code "mem") + (and (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) + != RTX_AUTOINC") + (match_operand 0 "memory_operand")))) + +(define_predicate "arm_reload_memory_operand" + (and (match_code "mem,reg,subreg") + (match_test "(!CONSTANT_P (op) + && (true_regnum(op) == -1 + || (GET_CODE (op) == REG + && REGNO (op) >= FIRST_PSEUDO_REGISTER)))"))) + +;; True for valid operands for the rhs of an floating point insns. +;; Allows regs or certain consts on FPA, just regs for everything else. +(define_predicate "arm_float_rhs_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_double") + (match_test "TARGET_FPA && arm_const_double_rtx (op)")))) + +(define_predicate "arm_float_add_operand" + (ior (match_operand 0 "arm_float_rhs_operand") + (and (match_code "const_double") + (match_test "TARGET_FPA && neg_const_double_rtx_ok_for_fpa (op)")))) + +(define_predicate "vfp_compare_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_double") + (match_test "arm_const_double_rtx (op)")))) + +(define_predicate "arm_float_compare_operand" + (if_then_else (match_test "TARGET_VFP") + (match_operand 0 "vfp_compare_operand") + (match_operand 0 "arm_float_rhs_operand"))) + +;; True for valid index operands. +(define_predicate "index_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_operand 0 "immediate_operand") + (match_test "(GET_CODE (op) != CONST_INT + || (INTVAL (op) < 4096 && INTVAL (op) > -4096))")))) + +;; True for operators that can be combined with a shift in ARM state. +(define_special_predicate "shiftable_operator" + (and (match_code "plus,minus,ior,xor,and") + (match_test "mode == GET_MODE (op)"))) + +;; True for logical binary operators. +(define_special_predicate "logical_binary_operator" + (and (match_code "ior,xor,and") + (match_test "mode == GET_MODE (op)"))) + +;; True for commutative operators +(define_special_predicate "commutative_binary_operator" + (and (match_code "ior,xor,and,plus") + (match_test "mode == GET_MODE (op)"))) + +;; True for shift operators. +(define_special_predicate "shift_operator" + (and (ior (ior (and (match_code "mult") + (match_test "power_of_two_operand (XEXP (op, 1), mode)")) + (and (match_code "rotate") + (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (match_code "ashift,ashiftrt,lshiftrt,rotatert")) + (match_test "mode == GET_MODE (op)"))) + +;; True for MULT, to identify which variant of shift_operator is in use. +(define_special_predicate "mult_operator" + (match_code "mult")) + +;; True for operators that have 16-bit thumb variants. */ +(define_special_predicate "thumb_16bit_operator" + (match_code "plus,minus,and,ior,xor")) + +;; True for EQ & NE +(define_special_predicate "equality_operator" + (match_code "eq,ne")) + +;; True for integer comparisons and, if FP is active, for comparisons +;; other than LTGT or UNEQ. +(define_special_predicate "arm_comparison_operator" + (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu") + (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT + && (TARGET_FPA || TARGET_VFP)") + (match_code "unordered,ordered,unlt,unle,unge,ungt")))) + +(define_special_predicate "lt_ge_comparison_operator" + (match_code "lt,ge")) + +(define_special_predicate "noov_comparison_operator" + (match_code "lt,ge,eq,ne")) + +(define_special_predicate "minmax_operator" + (and (match_code "smin,smax,umin,umax") + (match_test "mode == GET_MODE (op)"))) + +(define_special_predicate "cc_register" + (and (match_code "reg") + (and (match_test "REGNO (op) == CC_REGNUM") + (ior (match_test "mode == GET_MODE (op)") + (match_test "mode == VOIDmode && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))) + +(define_special_predicate "dominant_cc_register" + (match_code "reg") +{ + if (mode == VOIDmode) + { + mode = GET_MODE (op); + + if (GET_MODE_CLASS (mode) != MODE_CC) + return false; + } + + return (cc_register (op, mode) + && (mode == CC_DNEmode + || mode == CC_DEQmode + || mode == CC_DLEmode + || mode == CC_DLTmode + || mode == CC_DGEmode + || mode == CC_DGTmode + || mode == CC_DLEUmode + || mode == CC_DLTUmode + || mode == CC_DGEUmode + || mode == CC_DGTUmode)); +}) + +(define_special_predicate "arm_extendqisi_mem_op" + (and (match_operand 0 "memory_operand") + (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0), + SIGN_EXTEND, 0)"))) + +(define_special_predicate "arm_reg_or_extendqisi_mem_op" + (ior (match_operand 0 "arm_extendqisi_mem_op") + (match_operand 0 "s_register_operand"))) + +(define_predicate "power_of_two_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT value = INTVAL (op) & 0xffffffff; + + return value != 0 && (value & (value - 1)) == 0; +}) + +(define_predicate "nonimmediate_di_operand" + (match_code "reg,subreg,mem") +{ + if (s_register_operand (op, mode)) + return true; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return GET_CODE (op) == MEM && memory_address_p (DImode, XEXP (op, 0)); +}) + +(define_predicate "di_operand" + (ior (match_code "const_int,const_double") + (and (match_code "reg,subreg,mem") + (match_operand 0 "nonimmediate_di_operand")))) + +(define_predicate "nonimmediate_soft_df_operand" + (match_code "reg,subreg,mem") +{ + if (s_register_operand (op, mode)) + return true; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return GET_CODE (op) == MEM && memory_address_p (DFmode, XEXP (op, 0)); +}) + +(define_predicate "soft_df_operand" + (ior (match_code "const_double") + (and (match_code "reg,subreg,mem") + (match_operand 0 "nonimmediate_soft_df_operand")))) + +(define_predicate "const_shift_operand" + (and (match_code "const_int") + (ior (match_operand 0 "power_of_two_operand") + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 32")))) + + +(define_special_predicate "load_multiple_operation" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + unsigned dest_regno; + rtx src_addr; + HOST_WIDE_INT i = 1, base = 0; + HOST_WIDE_INT offset = 0; + rtx elt; + bool addr_reg_loaded = false; + bool update = false; + + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || !REG_P (SET_DEST (XVECEXP (op, 0, 0)))) + return false; + + /* Check to see if this might be a write-back. */ + if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS) + { + i++; + base = 1; + update = true; + + /* Now check it more carefully. */ + if (GET_CODE (SET_DEST (elt)) != REG + || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG + || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT + || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4) + return false; + } + + /* Perform a quick check so we don't blow up below. */ + if (count <= i + || GET_CODE (XVECEXP (op, 0, i - 1)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG + || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM) + return false; + + dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1))); + src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0); + if (GET_CODE (src_addr) == PLUS) + { + if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT) + return false; + offset = INTVAL (XEXP (src_addr, 1)); + src_addr = XEXP (src_addr, 0); + } + if (!REG_P (src_addr)) + return false; + + for (; i < count; i++) + { + elt = XVECEXP (op, 0, i); + + if (GET_CODE (elt) != SET + || GET_CODE (SET_DEST (elt)) != REG + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) <= dest_regno + || GET_CODE (SET_SRC (elt)) != MEM + || GET_MODE (SET_SRC (elt)) != SImode + || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS + || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr) + || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT + || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4) + && (!REG_P (XEXP (SET_SRC (elt), 0)) + || offset + (i - base) * 4 != 0))) + return false; + dest_regno = REGNO (SET_DEST (elt)); + if (dest_regno == REGNO (src_addr)) + addr_reg_loaded = true; + } + /* For Thumb, we only have updating instructions. If the pattern does + not describe an update, it must be because the address register is + in the list of loaded registers - on the hardware, this has the effect + of overriding the update. */ + if (update && addr_reg_loaded) + return false; + if (TARGET_THUMB1) + return update || addr_reg_loaded; + return true; +}) + +(define_special_predicate "store_multiple_operation" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + unsigned src_regno; + rtx dest_addr; + HOST_WIDE_INT i = 1, base = 0, offset = 0; + rtx elt; + + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET) + return false; + + /* Check to see if this might be a write-back. */ + if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS) + { + i++; + base = 1; + + /* Now check it more carefully. */ + if (GET_CODE (SET_DEST (elt)) != REG + || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG + || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT + || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4) + return false; + } + + /* Perform a quick check so we don't blow up below. */ + if (count <= i + || GET_CODE (XVECEXP (op, 0, i - 1)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != MEM + || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != REG) + return false; + + src_regno = REGNO (SET_SRC (XVECEXP (op, 0, i - 1))); + dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, i - 1)), 0); + + if (GET_CODE (dest_addr) == PLUS) + { + if (GET_CODE (XEXP (dest_addr, 1)) != CONST_INT) + return false; + offset = INTVAL (XEXP (dest_addr, 1)); + dest_addr = XEXP (dest_addr, 0); + } + if (!REG_P (dest_addr)) + return false; + + for (; i < count; i++) + { + elt = XVECEXP (op, 0, i); + + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != REG + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) <= src_regno + || GET_CODE (SET_DEST (elt)) != MEM + || GET_MODE (SET_DEST (elt)) != SImode + || ((GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS + || !rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr) + || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT + || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != offset + (i - base) * 4) + && (!REG_P (XEXP (SET_DEST (elt), 0)) + || offset + (i - base) * 4 != 0))) + return false; + src_regno = REGNO (SET_SRC (elt)); + } + + return true; +}) + +(define_special_predicate "multi_register_push" + (match_code "parallel") +{ + if ((GET_CODE (XVECEXP (op, 0, 0)) != SET) + || (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC) + || (XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPEC_PUSH_MULT)) + return false; + + return true; +}) + +;;------------------------------------------------------------------------- +;; +;; Thumb predicates +;; + +(define_predicate "thumb1_cmp_operand" + (ior (and (match_code "reg,subreg") + (match_operand 0 "s_register_operand")) + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 256")))) + +(define_predicate "thumb1_cmpneg_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) < 0 && INTVAL (op) > -256"))) + +;; Return TRUE if a result can be stored in OP without clobbering the +;; condition code register. Prior to reload we only accept a +;; register. After reload we have to be able to handle memory as +;; well, since a pseudo may not get a hard reg and reload cannot +;; handle output-reloads on jump insns. + +;; We could possibly handle mem before reload as well, but that might +;; complicate things with the need to handle increment +;; side-effects. +(define_predicate "thumb_cbrch_target_operand" + (and (match_code "reg,subreg,mem") + (ior (match_operand 0 "s_register_operand") + (and (match_test "reload_in_progress || reload_completed") + (match_operand 0 "memory_operand"))))) + +;;------------------------------------------------------------------------- +;; +;; MAVERICK predicates +;; + +(define_predicate "cirrus_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return (GET_CODE (op) == REG + && (REGNO_REG_CLASS (REGNO (op)) == CIRRUS_REGS + || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS)); +}) + +(define_predicate "cirrus_fp_register" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == CIRRUS_REGS)); +}) + +(define_predicate "cirrus_shift_const" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 64"))) + + +;; Neon predicates + +(define_predicate "const_multiple_of_8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT val = INTVAL (op); + return (val & 7) == 0; +}) + +(define_predicate "imm_for_neon_mov_operand" + (match_code "const_vector") +{ + return neon_immediate_valid_for_move (op, mode, NULL, NULL); +}) + +(define_predicate "imm_for_neon_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); +}) + +(define_predicate "imm_for_neon_inv_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL)); +}) + +(define_predicate "neon_logic_op2" + (ior (match_operand 0 "imm_for_neon_logic_operand") + (match_operand 0 "s_register_operand"))) + +(define_predicate "neon_inv_logic_op2" + (ior (match_operand 0 "imm_for_neon_inv_logic_operand") + (match_operand 0 "s_register_operand"))) + +;; TODO: We could check lane numbers more precisely based on the mode. +(define_predicate "neon_lane_number" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))) +;; Predicates for named expanders that overlap multiple ISAs. + +(define_predicate "cmpdi_operand" + (if_then_else (match_test "TARGET_HARD_FLOAT && TARGET_MAVERICK") + (and (match_test "TARGET_ARM") + (match_operand 0 "cirrus_fp_register")) + (and (match_test "TARGET_32BIT") + (match_operand 0 "arm_di_operand")))) + +;; True if the operand is memory reference suitable for a ldrex/strex. +(define_predicate "arm_sync_memory_operand" + (and (match_operand 0 "memory_operand") + (match_code "reg" "0"))) + +;; Predicates for parallel expanders based on mode. +(define_special_predicate "vect_par_constant_high" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != (base/2) + i) + return false; + } + return true; +}) + +(define_special_predicate "vect_par_constant_low" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != i) + return false; + } + return true; +}) + +(define_special_predicate "add_operator" + (match_code "plus")) diff --git a/gcc/config/arm/rtems-eabi.h b/gcc/config/arm/rtems-eabi.h new file mode 100644 index 000000000..ced98a91b --- /dev/null +++ b/gcc/config/arm/rtems-eabi.h @@ -0,0 +1,29 @@ +/* Definitions for RTEMS based ARM systems using EABI. + Copyright (C) 2011 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#define HAS_INIT_SECTION + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + TARGET_BPABI_CPP_BUILTINS(); \ + } while (0) diff --git a/gcc/config/arm/rtems-elf.h b/gcc/config/arm/rtems-elf.h new file mode 100644 index 000000000..dade74b15 --- /dev/null +++ b/gcc/config/arm/rtems-elf.h @@ -0,0 +1,45 @@ +/* Definitions for RTEMS based ARM systems using ELF + Copyright (C) 2000, 2002, 2005, 2007, 2008 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Run-time Target Specification. */ +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/ELF RTEMS)", stderr); + +#define HAS_INIT_SECTION + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + } while (0) + +/* + * The default in gcc now is soft-float, but gcc misses it to + * pass it to the assembler. + */ +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC "\ + %{!mhard-float: %{!msoft-float:-mfpu=softfpa}}" + +/* + * The default includes --start-group and --end-group which conflicts + * with how this used to be defined. + */ +#undef LINK_GCC_C_SEQUENCE_SPEC diff --git a/gcc/config/arm/semi.h b/gcc/config/arm/semi.h new file mode 100644 index 000000000..1e35710c9 --- /dev/null +++ b/gcc/config/arm/semi.h @@ -0,0 +1,75 @@ +/* Definitions of target machine for GNU compiler. ARM on semi-hosted platform + Copyright (C) 1994, 1995, 1996, 1997, 2001, 2004, 2005, 2007 + Free Software Foundation, Inc. + Contributed by Richard Earnshaw (richard.earnshaw@arm.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#define STARTFILE_SPEC "crt0.o%s" + +#ifndef LIB_SPEC +#define LIB_SPEC "-lc" +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__semi__" +#endif + +#ifndef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} -X" +#endif + +#ifndef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/semi-hosted)", stderr); +#endif + +#ifndef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD +#endif + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) +#endif + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "subtarget_extra_asm_spec", SUBTARGET_EXTRA_ASM_SPEC }, +#endif + +#ifndef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC "" +#endif + +/* The compiler supports PIC code generation, even though the binutils + may not. If we are asked to compile position independent code, we + always pass -k to the assembler. If it doesn't recognize it, then + it will barf, which probably means that it doesn't know how to + assemble PIC code. This is what we want, since otherwise tools + may incorrectly assume we support PIC compilation even if the + binutils can't. */ +#ifndef ASM_SPEC +#define ASM_SPEC "\ +%{fpic|fpie: -k} %{fPIC|fPIE: -k} \ +%{mbig-endian:-EB} \ +%{mcpu=*:-mcpu=%*} \ +%{march=*:-march=%*} \ +%{mapcs-float:-mfloat} \ +%{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \ +%{mfloat-abi=*} %{mfpu=*} \ +%{mthumb-interwork:-mthumb-interwork} \ +%(subtarget_extra_asm_spec)" +#endif diff --git a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h new file mode 100644 index 000000000..a89d05a00 --- /dev/null +++ b/gcc/config/arm/sfp-machine.h @@ -0,0 +1,105 @@ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_H ((_FP_QNANBIT_H << 1) - 1) +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_H 0 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 + +/* Someone please check this. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#if defined __ARMEB__ +# define __BYTE_ORDER __BIG_ENDIAN +#else +# define __BYTE_ORDER __LITTLE_ENDIAN +#endif + + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); + +#ifdef __ARM_EABI__ +/* Rename functions to their EABI names. */ +/* The comparison functions need wrappers for EABI semantics, so + leave them unmolested. */ +#define __negsf2 __aeabi_fneg +#define __subsf3 __aeabi_fsub +#define __addsf3 __aeabi_fadd +#define __floatunsisf __aeabi_ui2f +#define __floatsisf __aeabi_i2f +#define __floatundisf __aeabi_ul2f +#define __floatdisf __aeabi_l2f +#define __mulsf3 __aeabi_fmul +#define __divsf3 __aeabi_fdiv +#define __unordsf2 __aeabi_fcmpun +#define __fixsfsi __aeabi_f2iz +#define __fixunssfsi __aeabi_f2uiz +#define __fixsfdi __aeabi_f2lz +#define __fixunssfdi __aeabi_f2ulz +#define __floatdisf __aeabi_l2f + +#define __negdf2 __aeabi_dneg +#define __subdf3 __aeabi_dsub +#define __adddf3 __aeabi_dadd +#define __floatunsidf __aeabi_ui2d +#define __floatsidf __aeabi_i2d +#define __extendsfdf2 __aeabi_f2d +#define __truncdfsf2 __aeabi_d2f +#define __floatundidf __aeabi_ul2d +#define __floatdidf __aeabi_l2d +#define __muldf3 __aeabi_dmul +#define __divdf3 __aeabi_ddiv +#define __unorddf2 __aeabi_dcmpun +#define __fixdfsi __aeabi_d2iz +#define __fixunsdfsi __aeabi_d2uiz +#define __fixdfdi __aeabi_d2lz +#define __fixunsdfdi __aeabi_d2ulz +#define __floatdidf __aeabi_l2d +#define __extendhfsf2 __gnu_h2f_ieee +#define __truncsfhf2 __gnu_f2h_ieee + +#endif /* __ARM_EABI__ */ diff --git a/gcc/config/arm/symbian.h b/gcc/config/arm/symbian.h new file mode 100644 index 000000000..ff233a89f --- /dev/null +++ b/gcc/config/arm/symbian.h @@ -0,0 +1,105 @@ +/* Configuration file for Symbian OS on ARM processors. + Copyright (C) 2004, 2005, 2007, 2008 + Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Do not expand builtin functions (unless explicitly prefixed with + "__builtin"). Symbian OS code relies on properties of the standard + library that go beyond those guaranteed by the ANSI/ISO standard. + For example, "memcpy" works even with overlapping memory, like + "memmove". We cannot simply set flag_no_builtin in arm.c because + (a) flag_no_builtin is not declared in language-independent code, + and (b) that would prevent users from explicitly overriding the + default with -fbuiltin, which may sometimes be useful. + + Make all symbols hidden by default. Symbian OS expects that all + exported symbols will be explicitly marked with + "__declspec(dllexport)". + + Enumeration types use 4 bytes, even if the enumerals are small, + unless explicitly overridden. + + The wchar_t type is a 2-byte type, unless explicitly + overridden. */ +#define CC1_SPEC \ + "%{!fbuiltin:%{!fno-builtin:-fno-builtin}} " \ + "%{!fvisibility=*:-fvisibility=hidden} " \ + "%{!fshort-enums:%{!fno-short-enums:-fno-short-enums}} " \ + "%{!fshort-wchar:%{!fno-short-wchar:-fshort-wchar}} " +#define CC1PLUS_SPEC CC1_SPEC + +/* Symbian OS does not use crt*.o, unlike the generic unknown-elf + configuration. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "" + +/* Do not link with any libraries by default. On Symbian OS, the user + must supply all required libraries on the command line. */ +#undef LIB_SPEC +#define LIB_SPEC "" + +/* Support the "dllimport" attribute. */ +#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1 + +/* Symbian OS assumes ARM V5 or above. Since -march=armv5 is + equivalent to making the ARM 10TDMI core the default, we can set + SUBTARGET_CPU_DEFAULT and get an equivalent effect. */ +#undef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi + +/* The assembler should assume VFP FPU format, and armv5t. */ +#undef SUBTARGET_ASM_FLOAT_SPEC +#define SUBTARGET_ASM_FLOAT_SPEC \ + "%{!mfpu=*:-mfpu=vfp} %{!mcpu=*:%{!march=*:-march=armv5t}}" + +/* SymbianOS provides the BPABI routines in a separate library. + Therefore, we do not need to define any of them in libgcc. */ +#undef RENAME_LIBRARY +#define RENAME_LIBRARY(GCC_NAME, AEABI_NAME) /* empty */ + +/* Define the __symbian__ macro. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + /* Include the default BPABI stuff. */ \ + TARGET_BPABI_CPP_BUILTINS (); \ + /* Symbian OS does not support merging symbols across DLL \ + boundaries. */ \ + builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0"); \ + builtin_define ("__symbian__"); \ + } \ + while (false) + +/* On SymbianOS, these sections are not writable, so we use "a", + rather than "aw", for the section attributes. */ +#undef ARM_EABI_CTORS_SECTION_OP +#define ARM_EABI_CTORS_SECTION_OP \ + "\t.section\t.init_array,\"a\",%init_array" +#undef ARM_EABI_DTORS_SECTION_OP +#define ARM_EABI_DTORS_SECTION_OP \ + "\t.section\t.fini_array,\"a\",%fini_array" + +/* SymbianOS cannot merge entities with vague linkage at runtime. */ +#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false + +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md new file mode 100644 index 000000000..689a235c1 --- /dev/null +++ b/gcc/config/arm/sync.md @@ -0,0 +1,602 @@ +;; Machine description for ARM processor synchronization primitives. +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; ARMV6 introduced ldrex and strex instruction. These instruction +;; access SI width data. In order to implement synchronization +;; primitives for the narrower QI and HI modes we insert appropriate +;; AND/OR sequences into the synchronization loop to mask out the +;; relevant component of an SI access. + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + "TARGET_HAVE_MEMORY_BARRIER" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_expand "sync_compare_and_swapsi" + [(set (match_operand:SI 0 "s_register_operand") + (unspec_volatile:SI [(match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "s_register_operand") + (match_operand:SI 3 "s_register_operand")] + VUNSPEC_SYNC_COMPARE_AND_SWAP))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omrn; + generator.u.omrn = gen_arm_sync_compare_and_swapsi; + arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2], + operands[3]); + DONE; + }) + +(define_mode_iterator NARROW [QI HI]) + +(define_expand "sync_compare_and_swap" + [(set (match_operand:NARROW 0 "s_register_operand") + (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand") + (match_operand:NARROW 2 "s_register_operand") + (match_operand:NARROW 3 "s_register_operand")] + VUNSPEC_SYNC_COMPARE_AND_SWAP))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omrn; + generator.u.omrn = gen_arm_sync_compare_and_swap; + arm_expand_sync (mode, &generator, operands[0], operands[1], + operands[2], operands[3]); + DONE; + }) + +(define_expand "sync_lock_test_and_setsi" + [(match_operand:SI 0 "s_register_operand") + (match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "s_register_operand")] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_lock_test_and_setsi; + arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, + operands[2]); + DONE; + }) + +(define_expand "sync_lock_test_and_set" + [(match_operand:NARROW 0 "s_register_operand") + (match_operand:NARROW 1 "memory_operand") + (match_operand:NARROW 2 "s_register_operand")] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_lock_test_and_set; + arm_expand_sync (mode, &generator, operands[0], operands[1], NULL, + operands[2]); + DONE; + }) + +(define_code_iterator syncop [plus minus ior xor and]) + +(define_code_attr sync_optab [(ior "ior") + (xor "xor") + (and "and") + (plus "add") + (minus "sub")]) + +(define_code_attr sync_clobber [(ior "=&r") + (and "=&r") + (xor "X") + (plus "X") + (minus "X")]) + +(define_code_attr sync_t2_reqd [(ior "4") + (and "4") + (xor "*") + (plus "*") + (minus "*")]) + +(define_expand "sync_si" + [(match_operand:SI 0 "memory_operand") + (match_operand:SI 1 "s_register_operand") + (syncop:SI (match_dup 0) (match_dup 1))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_si; + arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]); + DONE; + }) + +(define_expand "sync_nandsi" + [(match_operand:SI 0 "memory_operand") + (match_operand:SI 1 "s_register_operand") + (not:SI (and:SI (match_dup 0) (match_dup 1)))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_nandsi; + arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]); + DONE; + }) + +(define_expand "sync_" + [(match_operand:NARROW 0 "memory_operand") + (match_operand:NARROW 1 "s_register_operand") + (syncop:NARROW (match_dup 0) (match_dup 1))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_; + arm_expand_sync (mode, &generator, NULL, operands[0], NULL, + operands[1]); + DONE; + }) + +(define_expand "sync_nand" + [(match_operand:NARROW 0 "memory_operand") + (match_operand:NARROW 1 "s_register_operand") + (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_nand; + arm_expand_sync (mode, &generator, NULL, operands[0], NULL, + operands[1]); + DONE; + }) + +(define_expand "sync_new_si" + [(match_operand:SI 0 "s_register_operand") + (match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "s_register_operand") + (syncop:SI (match_dup 1) (match_dup 2))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_si; + arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, + operands[2]); + DONE; + }) + +(define_expand "sync_new_nandsi" + [(match_operand:SI 0 "s_register_operand") + (match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "s_register_operand") + (not:SI (and:SI (match_dup 1) (match_dup 2)))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_nandsi; + arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, + operands[2]); + DONE; + }) + +(define_expand "sync_new_" + [(match_operand:NARROW 0 "s_register_operand") + (match_operand:NARROW 1 "memory_operand") + (match_operand:NARROW 2 "s_register_operand") + (syncop:NARROW (match_dup 1) (match_dup 2))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_; + arm_expand_sync (mode, &generator, operands[0], operands[1], + NULL, operands[2]); + DONE; + }) + +(define_expand "sync_new_nand" + [(match_operand:NARROW 0 "s_register_operand") + (match_operand:NARROW 1 "memory_operand") + (match_operand:NARROW 2 "s_register_operand") + (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_new_nand; + arm_expand_sync (mode, &generator, operands[0], operands[1], + NULL, operands[2]); + DONE; + }); + +(define_expand "sync_old_si" + [(match_operand:SI 0 "s_register_operand") + (match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "s_register_operand") + (syncop:SI (match_dup 1) (match_dup 2))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_old_si; + arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, + operands[2]); + DONE; + }) + +(define_expand "sync_old_nandsi" + [(match_operand:SI 0 "s_register_operand") + (match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "s_register_operand") + (not:SI (and:SI (match_dup 1) (match_dup 2)))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_old_nandsi; + arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL, + operands[2]); + DONE; + }) + +(define_expand "sync_old_" + [(match_operand:NARROW 0 "s_register_operand") + (match_operand:NARROW 1 "memory_operand") + (match_operand:NARROW 2 "s_register_operand") + (syncop:NARROW (match_dup 1) (match_dup 2))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_old_; + arm_expand_sync (mode, &generator, operands[0], operands[1], + NULL, operands[2]); + DONE; + }) + +(define_expand "sync_old_nand" + [(match_operand:NARROW 0 "s_register_operand") + (match_operand:NARROW 1 "memory_operand") + (match_operand:NARROW 2 "s_register_operand") + (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + struct arm_sync_generator generator; + generator.op = arm_sync_generator_omn; + generator.u.omn = gen_arm_sync_old_nand; + arm_expand_sync (mode, &generator, operands[0], operands[1], + NULL, operands[2]); + DONE; + }) + +(define_insn "arm_sync_compare_and_swapsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI + [(match_operand:SI 1 "arm_sync_memory_operand" "+Q") + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "s_register_operand" "r")] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + ] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_required_value" "2") + (set_attr "sync_new_value" "3") + (set_attr "sync_t1" "0") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_compare_and_swap" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (zero_extend:SI + (unspec_volatile:NARROW + [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q") + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "s_register_operand" "r")] + VUNSPEC_SYNC_COMPARE_AND_SWAP))) + (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] + VUNSPEC_SYNC_COMPARE_AND_SWAP)) + ] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_required_value" "2") + (set_attr "sync_new_value" "3") + (set_attr "sync_t1" "0") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_lock_test_and_setsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (match_operand:SI 1 "arm_sync_memory_operand" "+Q")) + (set (match_dup 1) + (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")] + VUNSPEC_SYNC_LOCK)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_release_barrier" "no") + (set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "0") + (set_attr "sync_t2" "3") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_lock_test_and_set" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")] + VUNSPEC_SYNC_LOCK)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_release_barrier" "no") + (set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "0") + (set_attr "sync_t2" "3") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_new_si" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(syncop:SI + (match_operand:SI 1 "arm_sync_memory_operand" "+Q") + (match_operand:SI 2 "s_register_operand" "r")) + ] + VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) + (unspec_volatile:SI [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "0") + (set_attr "sync_t2" "3") + (set_attr "sync_op" "") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_new_nandsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(not:SI (and:SI + (match_operand:SI 1 "arm_sync_memory_operand" "+Q") + (match_operand:SI 2 "s_register_operand" "r"))) + ] + VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) + (unspec_volatile:SI [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "0") + (set_attr "sync_t2" "3") + (set_attr "sync_op" "nand") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_new_" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(syncop:SI + (zero_extend:SI + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) + (match_operand:SI 2 "s_register_operand" "r")) + ] + VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "0") + (set_attr "sync_t2" "3") + (set_attr "sync_op" "") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_new_nand" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI + [(not:SI + (and:SI + (zero_extend:SI + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) + (match_operand:SI 2 "s_register_operand" "r"))) + ] VUNSPEC_SYNC_NEW_OP)) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_NEW_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "0") + (set_attr "sync_t2" "3") + (set_attr "sync_op" "nand") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_old_si" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(syncop:SI + (match_operand:SI 1 "arm_sync_memory_operand" "+Q") + (match_operand:SI 2 "s_register_operand" "r")) + ] + VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) + (unspec_volatile:SI [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "3") + (set_attr "sync_t2" "") + (set_attr "sync_op" "") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_old_nandsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(not:SI (and:SI + (match_operand:SI 1 "arm_sync_memory_operand" "+Q") + (match_operand:SI 2 "s_register_operand" "r"))) + ] + VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) + (unspec_volatile:SI [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "3") + (set_attr "sync_t2" "4") + (set_attr "sync_op" "nand") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_old_" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(syncop:SI + (zero_extend:SI + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) + (match_operand:SI 2 "s_register_operand" "r")) + ] + VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "3") + (set_attr "sync_t2" "") + (set_attr "sync_op" "") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "arm_sync_old_nand" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(not:SI (and:SI + (zero_extend:SI + (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")) + (match_operand:SI 2 "s_register_operand" "r"))) + ] + VUNSPEC_SYNC_OLD_OP)) + (set (match_dup 1) + (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)] + VUNSPEC_SYNC_OLD_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_sync_insn (insn, operands); + } + [(set_attr "sync_result" "0") + (set_attr "sync_memory" "1") + (set_attr "sync_new_value" "2") + (set_attr "sync_t1" "3") + (set_attr "sync_t2" "4") + (set_attr "sync_op" "nand") + (set_attr "conds" "clob") + (set_attr "predicable" "no")]) + +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + "TARGET_HAVE_MEMORY_BARRIER" + { + return arm_output_memory_barrier (operands); + } + [(set_attr "length" "4") + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")]) + diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm new file mode 100644 index 000000000..33d7e19f7 --- /dev/null +++ b/gcc/config/arm/t-arm @@ -0,0 +1,66 @@ +# Rules common to all arm targets +# +# Copyright (C) 2004, 2005, 2007, 2008, 2009, 2010 +# Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \ + $(srcdir)/config/arm/predicates.md \ + $(srcdir)/config/arm/arm-generic.md \ + $(srcdir)/config/arm/arm1020e.md \ + $(srcdir)/config/arm/arm1026ejs.md \ + $(srcdir)/config/arm/arm1136jfs.md \ + $(srcdir)/config/arm/fa526.md \ + $(srcdir)/config/arm/fa606te.md \ + $(srcdir)/config/arm/fa626te.md \ + $(srcdir)/config/arm/fmp626.md \ + $(srcdir)/config/arm/fa726te.md \ + $(srcdir)/config/arm/arm926ejs.md \ + $(srcdir)/config/arm/cirrus.md \ + $(srcdir)/config/arm/fpa.md \ + $(srcdir)/config/arm/vec-common.md \ + $(srcdir)/config/arm/iwmmxt.md \ + $(srcdir)/config/arm/vfp.md \ + $(srcdir)/config/arm/neon.md \ + $(srcdir)/config/arm/thumb2.md + +LIB1ASMSRC = arm/lib1funcs.asm +LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ + _thumb1_case_uhi _thumb1_case_si +s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ + s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) + +$(srcdir)/config/arm/arm-tune.md: $(srcdir)/config/arm/gentune.sh \ + $(srcdir)/config/arm/arm-cores.def + $(SHELL) $(srcdir)/config/arm/gentune.sh \ + $(srcdir)/config/arm/arm-cores.def > \ + $(srcdir)/config/arm/arm-tune.md + +arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ + insn-config.h conditions.h output.h \ + $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ + $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ + $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \ + $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ + intl.h libfuncs.h $(PARAMS_H) + +arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/arm-c.c diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf new file mode 100644 index 000000000..38c291827 --- /dev/null +++ b/gcc/config/arm/t-arm-elf @@ -0,0 +1,128 @@ +# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, +# 2008, 2010 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# For most CPUs we have an assembly soft-float implementations. +# However this is not true for ARMv6M. Here we want to use the soft-fp C +# implementation. The soft-fp code is only build for ARMv6M. This pulls +# in the asm implementation for other CPUs. +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ + _call_via_rX _interwork_call_via_rX \ + _lshrdi3 _ashrdi3 _ashldi3 \ + _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ + _arm_fixdfsi _arm_fixunsdfsi \ + _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ + _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ + _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \ + _clzsi2 _clzdi2 + +MULTILIB_OPTIONS = marm/mthumb +MULTILIB_DIRNAMES = arm thumb +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = + +#MULTILIB_OPTIONS += mcpu=fa526/mcpu=fa626/mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te +#MULTILIB_DIRNAMES += fa526 fa626 fa606te fa626te fmp626 fa726te +#MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=fa526 *mthumb*/*mcpu=fa626 + +#MULTILIB_OPTIONS += march=armv7 +#MULTILIB_DIRNAMES += thumb2 +#MULTILIB_EXCEPTIONS += march=armv7* marm/*march=armv7* +#MULTILIB_MATCHES += march?armv7=march?armv7-a +#MULTILIB_MATCHES += march?armv7=march?armv7-r +#MULTILIB_MATCHES += march?armv7=march?armv7-m +#MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 +#MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 +#MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 + +# Not quite true. We can support hard-vfp calling in Thumb2, but how do we +# express that here? Also, we really need architecture v5e or later +# (mcrr etc). +MULTILIB_OPTIONS += mfloat-abi=hard +MULTILIB_DIRNAMES += fpu +MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* +#MULTILIB_EXCEPTIONS += *mcpu=fa526/*mfloat-abi=hard* +#MULTILIB_EXCEPTIONS += *mcpu=fa626/*mfloat-abi=hard* + +# MULTILIB_OPTIONS += mcpu=ep9312 +# MULTILIB_DIRNAMES += ep9312 +# MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312* +# +# MULTILIB_OPTIONS += mlittle-endian/mbig-endian +# MULTILIB_DIRNAMES += le be +# MULTILIB_MATCHES += mbig-endian=mbe mlittle-endian=mle +# +# MULTILIB_OPTIONS += mhard-float/msoft-float +# MULTILIB_DIRNAMES += fpu soft +# MULTILIB_EXCEPTIONS += *mthumb/*mhard-float* +# +# MULTILIB_OPTIONS += mno-thumb-interwork/mthumb-interwork +# MULTILIB_DIRNAMES += normal interwork +# +# MULTILIB_OPTIONS += fno-leading-underscore/fleading-underscore +# MULTILIB_DIRNAMES += elf under +# +# MULTILIB_OPTIONS += mcpu=arm7 +# MULTILIB_DIRNAMES += nofmult +# MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=arm7* +# # Note: the multilib_exceptions matches both -mthumb and +# # -mthumb-interwork +# # +# # We have to match all the arm cpu variants which do not have the +# # multiply instruction and treat them as if the user had specified +# # -mcpu=arm7. Note that in the following the ? is interpreted as +# # an = for the purposes of matching command line options. +# # FIXME: There ought to be a better way to do this. +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7d +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7di +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm70 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm700 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm700i +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm710 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm710c +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7100 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7500 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7500fe +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm6 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm60 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm600 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm610 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm620 + +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o + +# If EXTRA_MULTILIB_PARTS is not defined above then define EXTRA_PARTS here +# EXTRA_PARTS = crtbegin.o crtend.o crti.o crtn.o + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +# Currently there is a bug somewhere in GCC's alias analysis +# or scheduling code that is breaking _fpmul_parts in fp-bit.c. +# Disabling function inlining is a workaround for this problem. +TARGET_LIBGCC2_CFLAGS = -fno-inline + +# Assemble startup files. +$(T)crti.o: $(srcdir)/config/arm/crti.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/arm/crti.asm + +$(T)crtn.o: $(srcdir)/config/arm/crtn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/arm/crtn.asm + diff --git a/gcc/config/arm/t-arm-softfp b/gcc/config/arm/t-arm-softfp new file mode 100644 index 000000000..f9cace97e --- /dev/null +++ b/gcc/config/arm/t-arm-softfp @@ -0,0 +1,29 @@ +# Copyright (C) 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +softfp_float_modes := sf df +softfp_int_modes := si di +softfp_extensions := sfdf +softfp_truncations := dfsf +softfp_machine_header := arm/sfp-machine.h +softfp_exclude_libgcc2 := y +softfp_wrap_start := '\#ifdef __ARM_ARCH_6M__' +softfp_wrap_end := '\#endif' + +# softfp seems to be missing a whole bunch of prototypes. +TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi new file mode 100644 index 000000000..61da9ec7b --- /dev/null +++ b/gcc/config/arm/t-bpabi @@ -0,0 +1,36 @@ +# Copyright (C) 2004, 2005 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Add the bpabi.S functions. +LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod + +# Add the BPABI C functions. +LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \ + $(srcdir)/config/arm/unaligned-funcs.c + +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c + +UNWIND_H = $(srcdir)/config/arm/unwind-arm.h +LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \ + $(srcdir)/config/arm/libunwind.S \ + $(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c +LIB2ADDEHDEP = $(UNWIND_H) $(srcdir)/config/$(LIB1ASMSRC) + +# Add the BPABI names. +SHLIB_MAPFILES += $(srcdir)/config/arm/libgcc-bpabi.ver + diff --git a/gcc/config/arm/t-linux b/gcc/config/arm/t-linux new file mode 100644 index 000000000..a6fddad50 --- /dev/null +++ b/gcc/config/arm/t-linux @@ -0,0 +1,34 @@ +# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006, +# 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Just for these, we omit the frame pointer since it makes such a big +# difference. +TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC + +LIB1ASMSRC = arm/lib1funcs.asm +LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \ + _arm_addsubdf3 _arm_addsubsf3 + +# MULTILIB_OPTIONS = mhard-float/msoft-float +# MULTILIB_DIRNAMES = hard-float soft-float + +# EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o + +# LIBGCC = stmp-multilib +# INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/arm/t-linux-androideabi b/gcc/config/arm/t-linux-androideabi new file mode 100644 index 000000000..8f1307c55 --- /dev/null +++ b/gcc/config/arm/t-linux-androideabi @@ -0,0 +1,10 @@ +MULTILIB_OPTIONS = march=armv7-a mthumb +MULTILIB_DIRNAMES = armv7-a thumb +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = +MULTILIB_OSDIRNAMES = + +# The "special" multilib can be used to build native applications for Android, +# as opposed to native shared libraries that are then called via JNI. +#MULTILIB_OPTIONS += tno-android-cc +#MULTILIB_DIRNAMES += special diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi new file mode 100644 index 000000000..39de9aefe --- /dev/null +++ b/gcc/config/arm/t-linux-eabi @@ -0,0 +1,43 @@ +# Copyright (C) 2005, 2009, 2010, 2012 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# These functions are included in shared libraries. +TARGET_LIBGCC2_CFLAGS = -fPIC + +# We do not build a Thumb multilib for Linux because the definition of +# CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode. +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = + +#MULTILIB_OPTIONS += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te +#MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te +#MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* + +ifneq (,$(findstring gnueabi,$(target))) +ARM_EB = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),eb) +MULTIARCH_DIRNAME = $(call if_multiarch,arm$(ARM_EB)-linux-gnueabi$(if $(filter hard,$(with_float)),hf)) +endif + +# Use a version of div0 which raises SIGFPE, and a special __clear_cache. +LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache + +# Multilib the standard Linux files. Don't include crti.o or crtn.o, +# which are provided by glibc. +EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o + +LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c diff --git a/gcc/config/arm/t-netbsd b/gcc/config/arm/t-netbsd new file mode 100644 index 000000000..22bbbe7dd --- /dev/null +++ b/gcc/config/arm/t-netbsd @@ -0,0 +1,47 @@ +# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, +# 2006 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Just for these, we omit the frame pointer since it makes such a big +# difference. It is then pointless adding debugging. +TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fpic +LIBGCC2_DEBUG_CFLAGS = -g0 +LIB2FUNCS_EXTRA = $(srcdir)/config/floatunsidf.c $(srcdir)/config/floatunsisf.c + +# Build a shared libgcc library. +SHLIB_EXT = .so +SHLIB_NAME = @shlib_base_name@.so +SHLIB_SONAME = @shlib_base_name@.so.1 +SHLIB_OBJS = @shlib_objs@ + +SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \ + -Wl,-soname,$(SHLIB_SONAME) \ + -o $(SHLIB_NAME).tmp @multilib_flags@ $(SHLIB_OBJS) -lc && \ + rm -f $(SHLIB_SONAME) && \ + if [ -f $(SHLIB_NAME) ]; then \ + mv -f $(SHLIB_NAME) $(SHLIB_NAME).backup; \ + else true; fi && \ + mv $(SHLIB_NAME).tmp $(SHLIB_NAME) && \ + $(LN_S) $(SHLIB_NAME) $(SHLIB_SONAME) +# $(slibdir) double quoted to protect it from expansion while building +# libgcc.mk. We want this delayed until actual install time. +SHLIB_INSTALL = \ + $$(mkinstalldirs) $$(DESTDIR)$$(slibdir); \ + $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \ + rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \ + $(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME) diff --git a/gcc/config/arm/t-pe b/gcc/config/arm/t-pe new file mode 100644 index 000000000..626b1d29a --- /dev/null +++ b/gcc/config/arm/t-pe @@ -0,0 +1,52 @@ +# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009, +# 2010 +# Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifndef __ARMEB__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifndef __ARMEB__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c + echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/pe.c + +MULTILIB_OPTIONS = mhard-float mthumb +MULTILIB_DIRNAMES = fpu thumb + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib +TARGET_LIBGCC2_CFLAGS = diff --git a/gcc/config/arm/t-rtems b/gcc/config/arm/t-rtems new file mode 100644 index 000000000..52d14bab0 --- /dev/null +++ b/gcc/config/arm/t-rtems @@ -0,0 +1,10 @@ +# Custom rtems multilibs + +MULTILIB_OPTIONS = marm/mthumb +MULTILIB_DIRNAMES = arm thumb +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = marm=mno-thumb + +MULTILIB_OPTIONS += msoft-float/mhard-float +MULTILIB_DIRNAMES += soft fpu +MULTILIB_EXCEPTIONS += *mthumb/*mhard-float* diff --git a/gcc/config/arm/t-rtems-eabi b/gcc/config/arm/t-rtems-eabi new file mode 100644 index 000000000..f0e714a9b --- /dev/null +++ b/gcc/config/arm/t-rtems-eabi @@ -0,0 +1,8 @@ +# Custom RTEMS EABI multilibs + +MULTILIB_OPTIONS = mthumb march=armv6-m/march=armv7/march=armv7-m +MULTILIB_DIRNAMES = thumb armv6-m armv7 armv7-m +MULTILIB_EXCEPTIONS = march=armv6-m march=armv7 march=armv7-m +MULTILIB_MATCHES = +MULTILIB_EXCLUSIONS = +MULTILIB_OSDIRNAMES = diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf new file mode 100644 index 000000000..64d7ca694 --- /dev/null +++ b/gcc/config/arm/t-strongarm-elf @@ -0,0 +1,61 @@ +# Copyright (C) 2000, 2001, 2006, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifndef __ARMEB__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifndef __ARMEB__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c + echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +MULTILIB_OPTIONS = mlittle-endian/mbig-endian mhard-float/msoft-float +MULTILIB_DIRNAMES = le be fpu soft +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = mbig-endian=mbe mlittle-endian=mle +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib + +# Currently there is a bug somewhere in GCC's alias analysis +# or scheduling code that is breaking _fpmul_parts in fp-bit.c. +# Disabling function inlining is a workaround for this problem. +TARGET_LIBGCC2_CFLAGS = -fno-inline + +# Assemble startup files. +$(T)crti.o: $(srcdir)/config/arm/crti.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/arm/crti.asm + +$(T)crtn.o: $(srcdir)/config/arm/crtn.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/arm/crtn.asm diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian new file mode 100644 index 000000000..4a1476f67 --- /dev/null +++ b/gcc/config/arm/t-symbian @@ -0,0 +1,53 @@ +# Copyright (C) 2004, 2005, 2006, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 + +# These functions have __aeabi equivalents and will never be called by GCC. +# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being +# used -- and we make sure that definitions are not available in lib1funcs.asm, +# either, so they end up undefined. +LIB1ASMFUNCS += \ + _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \ + _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \ + _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \ + _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ + _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ + _fixsfsi _fixunssfsi + +# Include the gcc personality routine +UNWIND_H = $(srcdir)/config/arm/unwind-arm.h +LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c +LIB2ADDEHDEP = $(UNWIND_H) + +# Include half-float helpers. +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c + +# Create a multilib for processors with VFP floating-point, and a +# multilib for those without -- using the soft-float ABI in both +# cases. Symbian OS object should be compiled with interworking +# enabled, so there are no separate thumb-mode libraries. +MULTILIB_OPTIONS = mfloat-abi=softfp +MULTILIB_DIRNAMES = softfp + +# There is no C library to link against on Symbian OS -- at least when +# building GCC. +SHLIB_LC = + +# Symbian OS provides its own startup code. +EXTRA_MULTILIB_PARTS= diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks new file mode 100644 index 000000000..af01ac412 --- /dev/null +++ b/gcc/config/arm/t-vxworks @@ -0,0 +1,44 @@ +# Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifndef __ARMEB__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifndef __ARMEB__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +MULTILIB_OPTIONS = \ + mrtp fPIC \ + t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe +MULTILIB_MATCHES = fPIC=fpic +# Don't build -fPIC multilibs for kernel or Thumb code. +MULTILIB_EXCEPTIONS = fPIC* mrtp/fPIC/*t[45]t* diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe new file mode 100644 index 000000000..165bef200 --- /dev/null +++ b/gcc/config/arm/t-wince-pe @@ -0,0 +1,56 @@ +# Copyright (C) 2003, 2004, 2006, 2008, 2009, 2010 +# Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 + +# We want fine grained libraries, so use the new code to build the +# floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + echo '#ifndef __ARMEB__' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c + echo '#endif' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#ifndef __ARMEB__' > dp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c + echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c + echo '#endif' >> dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/pe.c + +MULTILIB_OPTIONS = mhard-float +MULTILIB_DIRNAMES = fpu +# Note - Thumb multilib omitted because Thumb support for +# arm-wince-pe target does not appear to be working in binutils +# yet... +# MULTILIB_OPTIONS += thumb +# MULTILIB_DIRNAMES += thumb + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib +TARGET_LIBGCC2_CFLAGS = diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md new file mode 100644 index 000000000..1b2fb2d44 --- /dev/null +++ b/gcc/config/arm/thumb2.md @@ -0,0 +1,1121 @@ +;; ARM Thumb-2 Machine Description +;; Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; Note: Thumb-2 is the variant of the Thumb architecture that adds +;; 32-bit encodings of [almost all of] the Arm instruction set. +;; Some old documents refer to the relatively minor interworking +;; changes made in armv5t as "thumb2". These are considered part +;; the 16-bit Thumb-1 instruction set. + +(define_insn "*thumb2_incscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand:CC 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "s_register_operand" "0,?r")))] + "TARGET_THUMB2" + "@ + it\\t%d2\;add%d2\\t%0, %1, #1 + ite\\t%D2\;mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1" + [(set_attr "conds" "use") + (set_attr "length" "6,10")] +) + +(define_insn "*thumb2_decscc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + (match_operator:SI 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2" + "@ + it\\t%d2\;sub%d2\\t%0, %1, #1 + ite\\t%D2\;mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1" + [(set_attr "conds" "use") + (set_attr "length" "6,10")] +) + +;; Thumb-2 only allows shift by constant on data processing instructions +(define_insn "*thumb_andsi_not_shiftsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "const_int_operand" "M")])) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_THUMB2" + "bic%?\\t%0, %1, %2%S4" + [(set_attr "predicable" "yes") + (set_attr "shift" "2") + (set_attr "type" "alu_shift")] +) + +(define_insn "*thumb2_smaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (smax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "@ + cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2 + cmp\\t%1, %2\;it\\tge\;movge\\t%0, %1 + cmp\\t%1, %2\;ite\\tge\;movge\\t%0, %1\;movlt\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "10,10,14")] +) + +(define_insn "*thumb2_sminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (smin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "@ + cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2 + cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %1 + cmp\\t%1, %2\;ite\\tlt\;movlt\\t%0, %1\;movge\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "10,10,14")] +) + +(define_insn "*thumb32_umaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "@ + cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2 + cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %1 + cmp\\t%1, %2\;ite\\tcs\;movcs\\t%0, %1\;movcc\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "10,10,14")] +) + +(define_insn "*thumb2_uminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "@ + cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2 + cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %1 + cmp\\t%1, %2\;ite\\tcc\;movcc\\t%0, %1\;movcs\\t%0, %2" + [(set_attr "conds" "clob") + (set_attr "length" "10,10,14")] +) + +;; Thumb-2 does not have rsc, so use a clever trick with shifter operands. +(define_insn "*thumb2_negdi2" + [(set (match_operand:DI 0 "s_register_operand" "=&r,r") + (neg:DI (match_operand:DI 1 "s_register_operand" "?r,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1" + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + +(define_insn "*thumb2_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,&r") + (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "@ + cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 + eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" + [(set_attr "conds" "clob,*") + (set_attr "shift" "1") + ;; predicable can't be set based on the variant, so left as no + (set_attr "length" "10,8")] +) + +(define_insn "*thumb2_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,&r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "@ + cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 + eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" + [(set_attr "conds" "clob,*") + (set_attr "shift" "1") + ;; predicable can't be set based on the variant, so left as no + (set_attr "length" "10,8")] +) + +;; We have two alternatives here for memory loads (and similarly for stores) +;; to reflect the fact that the permissible constant pool ranges differ +;; between ldr instructions taking low regs and ldr instructions taking high +;; regs. The high register alternatives are not taken into account when +;; choosing register preferences in order to reflect their expense. +(define_insn "*thumb2_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m") + (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))] + "TARGET_THUMB2 && ! TARGET_IWMMXT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + movw%?\\t%0, %1 + ldr%?\\t%0, %1 + ldr%?\\t%0, %1 + str%?\\t%1, %0 + str%?\\t%1, %0" + [(set_attr "type" "*,*,*,*,load1,load1,store1,store1") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,*,1020,4096,*,*") + (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")] +) + +(define_insn "tls_load_dot_plus_four" + [(set (match_operand:SI 0 "register_operand" "=l,l,r,r") + (mem:SI (unspec:SI [(match_operand:SI 2 "register_operand" "0,1,0,1") + (const_int 4) + (match_operand 3 "" "")] + UNSPEC_PIC_BASE))) + (clobber (match_scratch:SI 1 "=X,l,X,r"))] + "TARGET_THUMB2" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[3])); + return \"add\\t%2, %|pc\;ldr%?\\t%0, [%2]\"; + " + [(set_attr "length" "4,4,6,6")] +) + +;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot +;; of the messiness associated with the ARM patterns. +(define_insn "*thumb2_movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") + (match_operand:HI 1 "general_operand" "rI,n,r,m"))] + "TARGET_THUMB2" + "@ + mov%?\\t%0, %1\\t%@ movhi + movw%?\\t%0, %L1\\t%@ movhi + str%(h%)\\t%1, %0\\t%@ movhi + ldr%(h%)\\t%0, %1\\t%@ movhi" + [(set_attr "type" "*,*,store1,load1") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,4096") + (set_attr "neg_pool_range" "*,*,*,250")] +) + +(define_insn "*thumb2_cmpsi_neg_shiftsi" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "s_register_operand" "r") + (neg:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M")]))))] + "TARGET_THUMB2" + "cmn%?\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "type" "alu_shift")] +) + +(define_insn "*thumb2_mov_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]))] + "TARGET_THUMB2" + "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + [(set_attr "conds" "use") + (set_attr "length" "10")] +) + +(define_insn "*thumb2_mov_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2" + "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + [(set_attr "conds" "use") + (set_attr "length" "10")] +) + +(define_insn "*thumb2_mov_notscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2" + "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + [(set_attr "conds" "use") + (set_attr "length" "10")] +) + +(define_insn "*thumb2_movsicc_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") + (if_then_else:SI + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K") + (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))] + "TARGET_THUMB2" + "@ + it\\t%D3\;mov%D3\\t%0, %2 + it\\t%D3\;mvn%D3\\t%0, #%B2 + it\\t%d3\;mov%d3\\t%0, %1 + it\\t%d3\;mvn%d3\\t%0, #%B1 + ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + [(set_attr "length" "6,6,6,6,10,10,10,10") + (set_attr "conds" "use")] +) + +(define_insn "*thumb2_movsfcc_soft_insn" + [(set (match_operand:SF 0 "s_register_operand" "=r,r") + (if_then_else:SF (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,r") + (match_operand:SF 2 "s_register_operand" "r,0")))] + "TARGET_THUMB2 && TARGET_SOFT_FLOAT" + "@ + it\\t%D3\;mov%D3\\t%0, %2 + it\\t%d3\;mov%d3\\t%0, %1" + [(set_attr "length" "6,6") + (set_attr "conds" "use")] +) + +(define_insn "*call_reg_thumb2" + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2" + "blx%?\\t%0" + [(set_attr "type" "call")] +) + +(define_insn "*call_value_reg_thumb2" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2" + "blx\\t%1" + [(set_attr "type" "call")] +) + +(define_insn "*thumb2_indirect_jump" + [(set (pc) + (match_operand:SI 0 "register_operand" "l*r"))] + "TARGET_THUMB2" + "bx\\t%0" + [(set_attr "conds" "clob")] +) +;; Don't define thumb2_load_indirect_jump because we can't guarantee label +;; addresses will have the thumb bit set correctly. + + +(define_insn "*thumb2_and_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_THUMB2" + "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1" + [(set_attr "conds" "use") + (set_attr "length" "10")] +) + +(define_insn "*thumb2_ior_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (ior:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "s_register_operand" "0,?r")))] + "TARGET_THUMB2" + "@ + it\\t%d2\;orr%d2\\t%0, %1, #1 + ite\\t%D2\;mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1" + [(set_attr "conds" "use") + (set_attr "length" "6,10")] +) + +(define_insn "*thumb2_cond_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI (match_operator 3 "equality_operator" + [(match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))] + "TARGET_THUMB2" + "* + if (GET_CODE (operands[3]) == NE) + { + if (which_alternative != 1) + output_asm_insn (\"it\\t%D4\;mov%D4\\t%0, %2\", operands); + if (which_alternative != 0) + output_asm_insn (\"it\\t%d4\;mov%d4\\t%0, %1\", operands); + return \"\"; + } + switch (which_alternative) + { + case 0: + output_asm_insn (\"it\\t%d4\", operands); + break; + case 1: + output_asm_insn (\"it\\t%D4\", operands); + break; + case 2: + output_asm_insn (\"ite\\t%D4\", operands); + break; + default: + abort(); + } + if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%d4\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "use") + (set_attr "length" "6,6,10")] +) + +(define_insn "*thumb2_cond_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 5 "shiftable_operator" + [(match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + return \"%i5\\t%0, %1, %2, lsr #31\"; + + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (GET_CODE (operands[5]) == AND) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, #0\", operands); + } + else if (GET_CODE (operands[5]) == MINUS) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands); + } + else if (which_alternative != 0) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + } + else + output_asm_insn (\"it\\t%d4\", operands); + return \"%i5%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "14")] +) + +(define_insn "*thumb2_cond_sub" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + (match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (which_alternative != 0) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + } + else + output_asm_insn (\"it\\t%d4\", operands); + return \"sub%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "10,14")] +) + +(define_insn "*thumb2_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) + return \"asr\\t%0, %1, #31\"; + + if (GET_CODE (operands[3]) == NE) + return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\"; + + output_asm_insn (\"cmp\\t%1, %2\", operands); + output_asm_insn (\"ite\\t%D3\", operands); + output_asm_insn (\"mov%D3\\t%0, #0\", operands); + return \"mvn%d3\\t%0, #0\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "14")] +) + +(define_insn "*thumb2_movcond" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (GET_CODE (operands[5]) == LT + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && GET_CODE (operands[1]) == REG) + { + if (operands[2] == const0_rtx) + return \"and\\t%0, %1, %3, asr #31\"; + return \"ands\\t%0, %1, %3, asr #32\;it\\tcc\;movcc\\t%0, %2\"; + } + else if (which_alternative != 0 && GET_CODE (operands[2]) == REG) + { + if (operands[1] == const0_rtx) + return \"bic\\t%0, %2, %3, asr #31\"; + return \"bics\\t%0, %2, %3, asr #32\;it\\tcs\;movcs\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + + if (GET_CODE (operands[5]) == GE + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && GET_CODE (operands[1]) == REG) + { + if (operands[2] == const0_rtx) + return \"bic\\t%0, %1, %3, asr #31\"; + return \"bics\\t%0, %1, %3, asr #32\;it\\tcs\;movcs\\t%0, %2\"; + } + else if (which_alternative != 0 && GET_CODE (operands[2]) == REG) + { + if (operands[1] == const0_rtx) + return \"and\\t%0, %2, %3, asr #31\"; + return \"ands\\t%0, %2, %3, asr #32\;it\tcc\;movcc\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + if (GET_CODE (operands[4]) == CONST_INT + && !const_ok_for_arm (INTVAL (operands[4]))) + output_asm_insn (\"cmn\\t%3, #%n4\", operands); + else + output_asm_insn (\"cmp\\t%3, %4\", operands); + switch (which_alternative) + { + case 0: + output_asm_insn (\"it\\t%D5\", operands); + break; + case 1: + output_asm_insn (\"it\\t%d5\", operands); + break; + case 2: + output_asm_insn (\"ite\\t%d5\", operands); + break; + default: + abort(); + } + if (which_alternative != 0) + output_asm_insn (\"mov%d5\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%D5\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "10,10,14")] +) + +;; Zero and sign extension instructions. + +;; All supported Thumb2 implementations are armv6, so only that case is +;; provided. +(define_insn "*thumb2_extendqisi_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_THUMB2 && arm_arch6" + "@ + sxtb%?\\t%0, %1 + ldr%(sb%)\\t%0, %1" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,4096") + (set_attr "neg_pool_range" "*,250")] +) + +(define_insn "*thumb2_zero_extendhisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_THUMB2 && arm_arch6" + "@ + uxth%?\\t%0, %1 + ldr%(h%)\\t%0, %1" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,4096") + (set_attr "neg_pool_range" "*,250")] +) + +(define_insn "thumb2_zero_extendqisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_THUMB2 && arm_arch6" + "@ + uxtb%(%)\\t%0, %1 + ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" + [(set_attr "type" "alu_shift,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,4096") + (set_attr "neg_pool_range" "*,250")] +) + +(define_insn "thumb2_casesi_internal" + [(parallel [(set (pc) + (if_then_else + (leu (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) + (label_ref (match_operand 2 "" "")))) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r")) + (use (label_ref (match_dup 2)))])] + "TARGET_THUMB2 && !flag_pic" + "* return thumb2_output_casesi(operands);" + [(set_attr "conds" "clob") + (set_attr "length" "16")] +) + +(define_insn "thumb2_casesi_internal_pic" + [(parallel [(set (pc) + (if_then_else + (leu (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) + (label_ref (match_operand 2 "" "")))) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (match_scratch:SI 5 "=r")) + (use (label_ref (match_dup 2)))])] + "TARGET_THUMB2 && flag_pic" + "* return thumb2_output_casesi(operands);" + [(set_attr "conds" "clob") + (set_attr "length" "20")] +) + +;; Note: this is not predicable, to avoid issues with linker-generated +;; interworking stubs. +(define_insn "*thumb2_return" + [(return)] + "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)" + "* + { + return output_return_instruction (const_true_rtx, TRUE, FALSE); + }" + [(set_attr "type" "load1") + (set_attr "length" "12")] +) + +(define_insn_and_split "thumb2_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")] + VUNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "TARGET_THUMB2" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + thumb_set_return_address (operands[0], operands[1]); + DONE; + }" +) + +(define_insn "*thumb2_alusi3_short" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (match_operator:SI 3 "thumb_16bit_operator" + [(match_operand:SI 1 "s_register_operand" "0") + (match_operand:SI 2 "s_register_operand" "l")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed + && GET_CODE(operands[3]) != PLUS + && GET_CODE(operands[3]) != MINUS" + "%I3%!\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "length" "2")] +) + +;; Similarly for 16-bit shift instructions +;; There is no 16-bit rotate by immediate instruction. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "low_reg_or_int_operand" "")]))] + "TARGET_THUMB2 + && peep2_regno_dead_p(0, CC_REGNUM) + && (CONST_INT_P (operands[2]) || operands[1] == operands[0]) + && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT) + || REG_P(operands[2]))" + [(parallel + [(set (match_dup 0) + (match_op_dup 3 + [(match_dup 1) + (match_dup 2)])) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_shiftsi3_short" + [(set (match_operand:SI 0 "low_register_operand" "=l,l") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "low_register_operand" "0,l") + (match_operand:SI 2 "low_reg_or_int_operand" "l,M")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed + && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT) + || REG_P(operands[2]))" + "* return arm_output_shift(operands, 2);" + [(set_attr "predicable" "yes") + (set_attr "shift" "1") + (set_attr "length" "2") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift") + (const_string "alu_shift_reg")))] +) + +;; 16-bit load immediate +(define_peephole2 + [(set (match_operand:QHSI 0 "low_register_operand" "") + (match_operand:QHSI 1 "const_int_operand" ""))] + "TARGET_THUMB2 + && peep2_regno_dead_p(0, CC_REGNUM) + && (unsigned HOST_WIDE_INT) INTVAL(operands[1]) < 256" + [(parallel + [(set (match_dup 0) + (match_dup 1)) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_mov_shortim" + [(set (match_operand:QHSI 0 "low_register_operand" "=l") + (match_operand:QHSI 1 "const_int_operand" "I")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "mov%!\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "2")] +) + +;; 16-bit add/sub immediate +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (plus:SI (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_THUMB2 + && peep2_regno_dead_p(0, CC_REGNUM) + && ((rtx_equal_p(operands[0], operands[1]) + && INTVAL(operands[2]) > -256 && INTVAL(operands[2]) < 256) + || (INTVAL(operands[2]) > -8 && INTVAL(operands[2]) < 8))" + [(parallel + [(set (match_dup 0) + (plus:SI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_addsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l,l") + (plus:SI (match_operand:SI 1 "low_register_operand" "l,0") + (match_operand:SI 2 "low_reg_or_int_operand" "lPt,Ps"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "* + HOST_WIDE_INT val; + + if (GET_CODE (operands[2]) == CONST_INT) + val = INTVAL(operands[2]); + else + val = 0; + + /* We prefer eg. subs rn, rn, #1 over adds rn, rn, #0xffffffff. */ + if (val < 0 && const_ok_for_arm(ARM_SIGN_EXTEND (-val))) + return \"sub%!\\t%0, %1, #%n2\"; + else + return \"add%!\\t%0, %1, %2\"; + " + [(set_attr "predicable" "yes") + (set_attr "length" "2")] +) + +(define_insn "divsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (div:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_THUMB2 && arm_arch_hwdiv" + "sdiv%?\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "insn" "sdiv")] +) + +(define_insn "udivsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (udiv:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_THUMB2 && arm_arch_hwdiv" + "udiv%?\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "insn" "udiv")] +) + +(define_insn "*thumb2_subsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (minus:SI (match_operand:SI 1 "low_register_operand" "l") + (match_operand:SI 2 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "sub%!\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "length" "2")] +) + +(define_peephole2 + [(set (match_operand:CC 0 "cc_register" "") + (compare:CC (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_THUMB2 + && peep2_reg_dead_p (1, operands[1]) + && satisfies_constraint_Pw (operands[2])" + [(parallel + [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])] + "operands[3] = GEN_INT (- INTVAL (operands[2]));" +) + +(define_peephole2 + [(match_scratch:SI 3 "l") + (set (match_operand:CC 0 "cc_register" "") + (compare:CC (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_THUMB2 + && satisfies_constraint_Px (operands[2])" + [(parallel + [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])] + "operands[4] = GEN_INT (- INTVAL (operands[2]));" +) + +(define_insn "*thumb2_addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r") + (match_operand:SI 2 "arm_add_operand" "lPt,Ps,rIL")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_THUMB2" + "* + HOST_WIDE_INT val; + + if (GET_CODE (operands[2]) == CONST_INT) + val = INTVAL (operands[2]); + else + val = 0; + + if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val))) + return \"subs\\t%0, %1, #%n2\"; + else + return \"adds\\t%0, %1, %2\"; + " + [(set_attr "conds" "set") + (set_attr "length" "2,2,4")] +) + +(define_insn "*thumb2_addsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 0 "s_register_operand" "l, r") + (match_operand:SI 1 "arm_add_operand" "lPv,rIL")) + (const_int 0)))] + "TARGET_THUMB2" + "* + HOST_WIDE_INT val; + + if (GET_CODE (operands[1]) == CONST_INT) + val = INTVAL (operands[1]); + else + val = 0; + + if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val))) + return \"cmp\\t%0, #%n1\"; + else + return \"cmn\\t%0, %1\"; + " + [(set_attr "conds" "set") + (set_attr "length" "2,4")] +) + +;; 16-bit encodings of "muls" and "mul". We only use these when +;; optimizing for size since "muls" is slow on all known +;; implementations and since "mul" will be generated by +;; "*arm_mulsi3_v6" anyhow. The assembler will use a 16-bit encoding +;; for "mul" whenever possible anyhow. +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (mult:SI (match_operand:SI 1 "low_register_operand" "") + (match_dup 0)))] + "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" + [(parallel + [(set (match_dup 0) + (mult:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (mult:SI (match_dup 0) + (match_operand:SI 1 "low_register_operand" "")))] + "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)" + [(parallel + [(set (match_dup 0) + (mult:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_mulsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (mult:SI (match_operand:SI 1 "low_register_operand" "%0") + (match_operand:SI 2 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && optimize_size && reload_completed" + "mul%!\\t%0, %2, %0" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "insn" "muls")]) + +(define_insn "*thumb2_mulsi_short_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=l") + (mult:SI (match_dup 1) (match_dup 2)))] + "TARGET_THUMB2 && optimize_size" + "muls\\t%0, %2, %0" + [(set_attr "length" "2") + (set_attr "insn" "muls")]) + +(define_insn "*thumb2_mulsi_short_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=l"))] + "TARGET_THUMB2 && optimize_size" + "muls\\t%0, %2, %0" + [(set_attr "length" "2") + (set_attr "insn" "muls")]) + +(define_insn "*thumb2_cbz" + [(set (pc) (if_then_else + (eq (match_operand:SI 0 "s_register_operand" "l,?r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (get_attr_length (insn) == 2) + return \"cbz\\t%0, %l1\"; + else + return \"cmp\\t%0, #0\;beq\\t%l1\"; + " + [(set (attr "length") + (if_then_else + (and (ge (minus (match_dup 1) (pc)) (const_int 2)) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (eq (symbol_ref ("which_alternative")) (const_int 0))) + (const_int 2) + (const_int 8)))] +) + +(define_insn "*thumb2_cbnz" + [(set (pc) (if_then_else + (ne (match_operand:SI 0 "s_register_operand" "l,?r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (get_attr_length (insn) == 2) + return \"cbnz\\t%0, %l1\"; + else + return \"cmp\\t%0, #0\;bne\\t%l1\"; + " + [(set (attr "length") + (if_then_else + (and (ge (minus (match_dup 1) (pc)) (const_int 2)) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (eq (symbol_ref ("which_alternative")) (const_int 0))) + (const_int 2) + (const_int 8)))] +) + +;; 16-bit complement +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (not:SI (match_operand:SI 1 "low_register_operand" "")))] + "TARGET_THUMB2 + && peep2_regno_dead_p(0, CC_REGNUM)" + [(parallel + [(set (match_dup 0) + (not:SI (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_one_cmplsi2_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (not:SI (match_operand:SI 1 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "mvn%!\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "2")] +) + +;; 16-bit negate +(define_peephole2 + [(set (match_operand:SI 0 "low_register_operand" "") + (neg:SI (match_operand:SI 1 "low_register_operand" "")))] + "TARGET_THUMB2 + && peep2_regno_dead_p(0, CC_REGNUM)" + [(parallel + [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*thumb2_negsi2_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (neg:SI (match_operand:SI 1 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "neg%!\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "2")] +) + +(define_insn "*orsi_notsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_THUMB2" + "orn%?\\t%0, %1, %2" + [(set_attr "predicable" "yes")] +) + +(define_insn "*orsi_not_shiftsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (not:SI (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "const_int_operand" "M")])) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_THUMB2" + "orn%?\\t%0, %1, %2%S4" + [(set_attr "predicable" "yes") + (set_attr "shift" "2") + (set_attr "type" "alu_shift")] +) + +(define_peephole2 + [(set (match_operand:CC_NOOV 0 "cc_register" "") + (compare:CC_NOOV (zero_extract:SI + (match_operand:SI 1 "low_register_operand" "") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "")) + (const_int 0))) + (match_scratch:SI 3 "l") + (set (pc) + (if_then_else (match_operator:CC_NOOV 4 "equality_operator" + [(match_dup 0) (const_int 0)]) + (match_operand 5 "" "") + (match_operand 6 "" "")))] + "TARGET_THUMB2 + && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32)" + [(parallel [(set (match_dup 0) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (clobber (match_dup 3))]) + (set (pc) + (if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)]) + (match_dup 5) (match_dup 6)))] + " + operands[2] = GEN_INT (31 - INTVAL (operands[2])); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? LT : GE, + VOIDmode, operands[0], const0_rtx); + ") + +(define_peephole2 + [(set (match_operand:CC_NOOV 0 "cc_register" "") + (compare:CC_NOOV (zero_extract:SI + (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (const_int 0)) + (const_int 0))) + (match_scratch:SI 3 "l") + (set (pc) + (if_then_else (match_operator:CC_NOOV 4 "equality_operator" + [(match_dup 0) (const_int 0)]) + (match_operand 5 "" "") + (match_operand 6 "" "")))] + "TARGET_THUMB2 + && (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 32)" + [(parallel [(set (match_dup 0) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (clobber (match_dup 3))]) + (set (pc) + (if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)]) + (match_dup 5) (match_dup 6)))] + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + ") diff --git a/gcc/config/arm/uclinux-eabi.h b/gcc/config/arm/uclinux-eabi.h new file mode 100644 index 000000000..4455288b8 --- /dev/null +++ b/gcc/config/arm/uclinux-eabi.h @@ -0,0 +1,66 @@ +/* Definitions for ARM EABI ucLinux + Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. + Contributed by Paul Brook + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Override settings that are different to the uclinux-elf or + bpabi defaults. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE | MASK_INTERWORK) + +/* On EABI GNU/Linux, we want both the BPABI builtins and the + GNU/Linux builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + TARGET_BPABI_CPP_BUILTINS(); \ + builtin_define ("__uClinux__"); \ + builtin_define ("__gnu_linux__"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + } \ + while (false) + +#undef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux_eabi -elf2flt" \ + " --pic-veneer --target2=abs" + +/* We default to the "aapcs-linux" ABI so that enums are int-sized by + default. */ +#undef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX + +/* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ +#undef CLEAR_INSN_CACHE +#define CLEAR_INSN_CACHE(BEG, END) \ +{ \ + register unsigned long _beg __asm ("a1") = (unsigned long) (BEG); \ + register unsigned long _end __asm ("a2") = (unsigned long) (END); \ + register unsigned long _flg __asm ("a3") = 0; \ + register unsigned long _scno __asm ("r7") = 0xf0002; \ + __asm __volatile ("swi 0x0 @ sys_cacheflush" \ + : "=r" (_beg) \ + : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno)); \ +} + diff --git a/gcc/config/arm/uclinux-elf.h b/gcc/config/arm/uclinux-elf.h new file mode 100644 index 000000000..50fd76580 --- /dev/null +++ b/gcc/config/arm/uclinux-elf.h @@ -0,0 +1,88 @@ +/* Definitions for ARM running ucLinux using ELF + Copyright (C) 1999, 2001, 2004, 2005, 2007, 2008 + Free Software Foundation, Inc. + Contributed by Philip Blundell + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* We don't want a PLT. */ +#undef NEED_PLT_RELOC +#define NEED_PLT_RELOC 0 + +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/ELF ucLinux)", stderr); + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE) + +/* NOTE: The remaining definitions in this file are needed because uclinux + does not use config/linux.h. */ + +/* Add GNU/Linux builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__uClinux__"); \ + builtin_define ("__gnu_linux__"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + } \ + while (false) + +/* Do not assume anything about header files. */ +#define NO_IMPLICIT_EXTERN_C + +/* The GNU C++ standard library requires that these macros be defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)" + +#undef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux" + +/* Now we define the strings used to build the spec file. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt1%O%s crti%O%s crtbegin%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef CC1_SPEC +#define CC1_SPEC "%{profile:-p}" + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +#undef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X -elf2flt" + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc/config/arm/unaligned-funcs.c b/gcc/config/arm/unaligned-funcs.c new file mode 100644 index 000000000..4e684f4fc --- /dev/null +++ b/gcc/config/arm/unaligned-funcs.c @@ -0,0 +1,57 @@ +/* EABI unaligned read/write functions. + + Copyright (C) 2005, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +int __aeabi_uread4 (void *); +int __aeabi_uwrite4 (int, void *); +long long __aeabi_uread8 (void *); +long long __aeabi_uwrite8 (long long, void *); + +struct __attribute__((packed)) u4 { int data; }; +struct __attribute__((packed)) u8 { long long data; }; + +int +__aeabi_uread4 (void *ptr) +{ + return ((struct u4 *) ptr)->data; +} + +int +__aeabi_uwrite4 (int data, void *ptr) +{ + ((struct u4 *) ptr)->data = data; + return data; +} + +long long +__aeabi_uread8 (void *ptr) +{ + return ((struct u8 *) ptr)->data; +} + +long long +__aeabi_uwrite8 (long long data, void *ptr) +{ + ((struct u8 *) ptr)->data = data; + return data; +} diff --git a/gcc/config/arm/unknown-elf.h b/gcc/config/arm/unknown-elf.h new file mode 100644 index 000000000..b47455ea9 --- /dev/null +++ b/gcc/config/arm/unknown-elf.h @@ -0,0 +1,100 @@ +/* Definitions for non-Linux based ARM systems using ELF + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2007, 2008, 2010 + Free Software Foundation, Inc. + Contributed by Catherine Moore + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* elfos.h should have already been included. Now just override + any conflicting definitions and add any extras. */ + +/* Run-time Target Specification. */ +#ifndef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/ELF)", stderr); +#endif + +/* Default to using software floating point. */ +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (0) +#endif + +/* Now we define the strings used to build the spec file. */ +#define UNKNOWN_ELF_STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC UNKNOWN_ELF_STARTFILE_SPEC + +#define UNKNOWN_ELF_ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC UNKNOWN_ELF_ENDFILE_SPEC + +/* The __USES_INITFINI__ define is tested in newlib/libc/sys/arm/crt0.S + to see if it needs to invoked _init() and _fini(). */ +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__USES_INITFINI__" + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Return a nonzero value if DECL has a section attribute. */ +#define IN_NAMED_SECTION_P(DECL) \ + ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ + && DECL_SECTION_NAME (DECL) != NULL_TREE) + +#undef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + if (IN_NAMED_SECTION_P (DECL)) \ + switch_to_section (get_named_section (DECL, NULL, 0)); \ + else \ + switch_to_section (bss_section); \ + \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + \ + last_assemble_variable_decl = DECL; \ + ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL); \ + ASM_OUTPUT_SKIP (FILE, SIZE ? (int)(SIZE) : 1); \ + } \ + while (0) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + if ((DECL) != NULL && IN_NAMED_SECTION_P (DECL)) \ + switch_to_section (get_named_section (DECL, NULL, 0)); \ + else \ + switch_to_section (bss_section); \ + \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1); \ + } \ + while (0) + +#ifndef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm7tdmi +#endif + +/* The libgcc udivmod functions may throw exceptions. If newlib is + configured to support long longs in I/O, then printf will depend on + udivmoddi4, which will depend on the exception unwind routines, + which will depend on abort, which is defined in libc. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC "--start-group %G %L --end-group" diff --git a/gcc/config/arm/unwind-arm.c b/gcc/config/arm/unwind-arm.c new file mode 100644 index 000000000..2c6e00489 --- /dev/null +++ b/gcc/config/arm/unwind-arm.c @@ -0,0 +1,1263 @@ +/* ARM EABI compliant unwinding routines. + Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc. + Contributed by Paul Brook + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "unwind.h" + +/* We add a prototype for abort here to avoid creating a dependency on + target headers. */ +extern void abort (void); + +/* Definitions for C++ runtime support routines. We make these weak + declarations to avoid pulling in libsupc++ unnecessarily. */ +typedef unsigned char bool; + +typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */ + +void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp); +bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp); +bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp, + const type_info *rttip, + bool is_reference, + void **matched_object); + +_Unwind_Ptr __attribute__((weak)) +__gnu_Unwind_Find_exidx (_Unwind_Ptr, int *); + +/* Misc constants. */ +#define R_IP 12 +#define R_SP 13 +#define R_LR 14 +#define R_PC 15 + +#define EXIDX_CANTUNWIND 1 +#define uint32_highbit (((_uw) 1) << 31) + +#define UCB_FORCED_STOP_FN(ucbp) ((ucbp)->unwinder_cache.reserved1) +#define UCB_PR_ADDR(ucbp) ((ucbp)->unwinder_cache.reserved2) +#define UCB_SAVED_CALLSITE_ADDR(ucbp) ((ucbp)->unwinder_cache.reserved3) +#define UCB_FORCED_STOP_ARG(ucbp) ((ucbp)->unwinder_cache.reserved4) + +struct core_regs +{ + _uw r[16]; +}; + +/* We use normal integer types here to avoid the compiler generating + coprocessor instructions. */ +struct vfp_regs +{ + _uw64 d[16]; + _uw pad; +}; + +struct vfpv3_regs +{ + /* Always populated via VSTM, so no need for the "pad" field from + vfp_regs (which is used to store the format word for FSTMX). */ + _uw64 d[16]; +}; + +struct fpa_reg +{ + _uw w[3]; +}; + +struct fpa_regs +{ + struct fpa_reg f[8]; +}; + +struct wmmxd_regs +{ + _uw64 wd[16]; +}; + +struct wmmxc_regs +{ + _uw wc[4]; +}; + +/* Unwind descriptors. */ + +typedef struct +{ + _uw16 length; + _uw16 offset; +} EHT16; + +typedef struct +{ + _uw length; + _uw offset; +} EHT32; + +/* The ABI specifies that the unwind routines may only use core registers, + except when actually manipulating coprocessor state. This allows + us to write one implementation that works on all platforms by + demand-saving coprocessor registers. + + During unwinding we hold the coprocessor state in the actual hardware + registers and allocate demand-save areas for use during phase1 + unwinding. */ + +typedef struct +{ + /* The first fields must be the same as a phase2_vrs. */ + _uw demand_save_flags; + struct core_regs core; + _uw prev_sp; /* Only valid during forced unwinding. */ + struct vfp_regs vfp; + struct vfpv3_regs vfp_regs_16_to_31; + struct fpa_regs fpa; + struct wmmxd_regs wmmxd; + struct wmmxc_regs wmmxc; +} phase1_vrs; + +#define DEMAND_SAVE_VFP 1 /* VFP state has been saved if not set */ +#define DEMAND_SAVE_VFP_D 2 /* VFP state is for FLDMD/FSTMD if set */ +#define DEMAND_SAVE_VFP_V3 4 /* VFPv3 state for regs 16 .. 31 has + been saved if not set */ +#define DEMAND_SAVE_WMMXD 8 /* iWMMXt data registers have been + saved if not set. */ +#define DEMAND_SAVE_WMMXC 16 /* iWMMXt control registers have been + saved if not set. */ + +/* This must match the structure created by the assembly wrappers. */ +typedef struct +{ + _uw demand_save_flags; + struct core_regs core; +} phase2_vrs; + + +/* An exception index table entry. */ + +typedef struct __EIT_entry +{ + _uw fnoffset; + _uw content; +} __EIT_entry; + +/* Assembly helper functions. */ + +/* Restore core register state. Never returns. */ +void __attribute__((noreturn)) restore_core_regs (struct core_regs *); + + +/* Coprocessor register state manipulation functions. */ + +/* Routines for FLDMX/FSTMX format... */ +void __gnu_Unwind_Save_VFP (struct vfp_regs * p); +void __gnu_Unwind_Restore_VFP (struct vfp_regs * p); +void __gnu_Unwind_Save_WMMXD (struct wmmxd_regs * p); +void __gnu_Unwind_Restore_WMMXD (struct wmmxd_regs * p); +void __gnu_Unwind_Save_WMMXC (struct wmmxc_regs * p); +void __gnu_Unwind_Restore_WMMXC (struct wmmxc_regs * p); + +/* ...and those for FLDMD/FSTMD format... */ +void __gnu_Unwind_Save_VFP_D (struct vfp_regs * p); +void __gnu_Unwind_Restore_VFP_D (struct vfp_regs * p); + +/* ...and those for VLDM/VSTM format, saving/restoring only registers + 16 through 31. */ +void __gnu_Unwind_Save_VFP_D_16_to_31 (struct vfpv3_regs * p); +void __gnu_Unwind_Restore_VFP_D_16_to_31 (struct vfpv3_regs * p); + +/* Restore coprocessor state after phase1 unwinding. */ +static void +restore_non_core_regs (phase1_vrs * vrs) +{ + if ((vrs->demand_save_flags & DEMAND_SAVE_VFP) == 0) + { + if (vrs->demand_save_flags & DEMAND_SAVE_VFP_D) + __gnu_Unwind_Restore_VFP_D (&vrs->vfp); + else + __gnu_Unwind_Restore_VFP (&vrs->vfp); + } + + if ((vrs->demand_save_flags & DEMAND_SAVE_VFP_V3) == 0) + __gnu_Unwind_Restore_VFP_D_16_to_31 (&vrs->vfp_regs_16_to_31); + + if ((vrs->demand_save_flags & DEMAND_SAVE_WMMXD) == 0) + __gnu_Unwind_Restore_WMMXD (&vrs->wmmxd); + if ((vrs->demand_save_flags & DEMAND_SAVE_WMMXC) == 0) + __gnu_Unwind_Restore_WMMXC (&vrs->wmmxc); +} + +/* A better way to do this would probably be to compare the absolute address + with a segment relative relocation of the same symbol. */ + +extern int __text_start; +extern int __data_start; + +/* The exception index table location. */ +extern __EIT_entry __exidx_start; +extern __EIT_entry __exidx_end; + +/* ABI defined personality routines. */ +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0 (_Unwind_State, + _Unwind_Control_Block *, _Unwind_Context *);// __attribute__((weak)); +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1 (_Unwind_State, + _Unwind_Control_Block *, _Unwind_Context *) __attribute__((weak)); +extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2 (_Unwind_State, + _Unwind_Control_Block *, _Unwind_Context *) __attribute__((weak)); + +/* ABI defined routine to store a virtual register to memory. */ + +_Unwind_VRS_Result _Unwind_VRS_Get (_Unwind_Context *context, + _Unwind_VRS_RegClass regclass, + _uw regno, + _Unwind_VRS_DataRepresentation representation, + void *valuep) +{ + phase1_vrs *vrs = (phase1_vrs *) context; + + switch (regclass) + { + case _UVRSC_CORE: + if (representation != _UVRSD_UINT32 + || regno > 15) + return _UVRSR_FAILED; + *(_uw *) valuep = vrs->core.r[regno]; + return _UVRSR_OK; + + case _UVRSC_VFP: + case _UVRSC_FPA: + case _UVRSC_WMMXD: + case _UVRSC_WMMXC: + return _UVRSR_NOT_IMPLEMENTED; + + default: + return _UVRSR_FAILED; + } +} + + +/* ABI defined function to load a virtual register from memory. */ + +_Unwind_VRS_Result _Unwind_VRS_Set (_Unwind_Context *context, + _Unwind_VRS_RegClass regclass, + _uw regno, + _Unwind_VRS_DataRepresentation representation, + void *valuep) +{ + phase1_vrs *vrs = (phase1_vrs *) context; + + switch (regclass) + { + case _UVRSC_CORE: + if (representation != _UVRSD_UINT32 + || regno > 15) + return _UVRSR_FAILED; + + vrs->core.r[regno] = *(_uw *) valuep; + return _UVRSR_OK; + + case _UVRSC_VFP: + case _UVRSC_FPA: + case _UVRSC_WMMXD: + case _UVRSC_WMMXC: + return _UVRSR_NOT_IMPLEMENTED; + + default: + return _UVRSR_FAILED; + } +} + + +/* ABI defined function to pop registers off the stack. */ + +_Unwind_VRS_Result _Unwind_VRS_Pop (_Unwind_Context *context, + _Unwind_VRS_RegClass regclass, + _uw discriminator, + _Unwind_VRS_DataRepresentation representation) +{ + phase1_vrs *vrs = (phase1_vrs *) context; + + switch (regclass) + { + case _UVRSC_CORE: + { + _uw *ptr; + _uw mask; + int i; + + if (representation != _UVRSD_UINT32) + return _UVRSR_FAILED; + + mask = discriminator & 0xffff; + ptr = (_uw *) vrs->core.r[R_SP]; + /* Pop the requested registers. */ + for (i = 0; i < 16; i++) + { + if (mask & (1 << i)) + vrs->core.r[i] = *(ptr++); + } + /* Writeback the stack pointer value if it wasn't restored. */ + if ((mask & (1 << R_SP)) == 0) + vrs->core.r[R_SP] = (_uw) ptr; + } + return _UVRSR_OK; + + case _UVRSC_VFP: + { + _uw start = discriminator >> 16; + _uw count = discriminator & 0xffff; + struct vfp_regs tmp; + struct vfpv3_regs tmp_16_to_31; + int tmp_count; + _uw *sp; + _uw *dest; + int num_vfpv3_regs = 0; + + /* We use an approximation here by bounding _UVRSD_DOUBLE + register numbers at 32 always, since we can't detect if + VFPv3 isn't present (in such a case the upper limit is 16). */ + if ((representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE) + || start + count > (representation == _UVRSD_VFPX ? 16 : 32) + || (representation == _UVRSD_VFPX && start >= 16)) + return _UVRSR_FAILED; + + /* Check if we're being asked to pop VFPv3-only registers + (numbers 16 through 31). */ + if (start >= 16) + num_vfpv3_regs = count; + else if (start + count > 16) + num_vfpv3_regs = start + count - 16; + + if (num_vfpv3_regs && representation != _UVRSD_DOUBLE) + return _UVRSR_FAILED; + + /* Demand-save coprocessor registers for stage1. */ + if (start < 16 && (vrs->demand_save_flags & DEMAND_SAVE_VFP)) + { + vrs->demand_save_flags &= ~DEMAND_SAVE_VFP; + + if (representation == _UVRSD_DOUBLE) + { + /* Save in FLDMD/FSTMD format. */ + vrs->demand_save_flags |= DEMAND_SAVE_VFP_D; + __gnu_Unwind_Save_VFP_D (&vrs->vfp); + } + else + { + /* Save in FLDMX/FSTMX format. */ + vrs->demand_save_flags &= ~DEMAND_SAVE_VFP_D; + __gnu_Unwind_Save_VFP (&vrs->vfp); + } + } + + if (num_vfpv3_regs > 0 + && (vrs->demand_save_flags & DEMAND_SAVE_VFP_V3)) + { + vrs->demand_save_flags &= ~DEMAND_SAVE_VFP_V3; + __gnu_Unwind_Save_VFP_D_16_to_31 (&vrs->vfp_regs_16_to_31); + } + + /* Restore the registers from the stack. Do this by saving the + current VFP registers to a memory area, moving the in-memory + values into that area, and restoring from the whole area. + For _UVRSD_VFPX we assume FSTMX standard format 1. */ + if (representation == _UVRSD_VFPX) + __gnu_Unwind_Save_VFP (&tmp); + else + { + /* Save registers 0 .. 15 if required. */ + if (start < 16) + __gnu_Unwind_Save_VFP_D (&tmp); + + /* Save VFPv3 registers 16 .. 31 if required. */ + if (num_vfpv3_regs) + __gnu_Unwind_Save_VFP_D_16_to_31 (&tmp_16_to_31); + } + + /* Work out how many registers below register 16 need popping. */ + tmp_count = num_vfpv3_regs > 0 ? 16 - start : count; + + /* Copy registers below 16, if needed. + The stack address is only guaranteed to be word aligned, so + we can't use doubleword copies. */ + sp = (_uw *) vrs->core.r[R_SP]; + if (tmp_count > 0) + { + tmp_count *= 2; + dest = (_uw *) &tmp.d[start]; + while (tmp_count--) + *(dest++) = *(sp++); + } + + /* Copy VFPv3 registers numbered >= 16, if needed. */ + if (num_vfpv3_regs > 0) + { + /* num_vfpv3_regs is needed below, so copy it. */ + int tmp_count_2 = num_vfpv3_regs * 2; + int vfpv3_start = start < 16 ? 16 : start; + + dest = (_uw *) &tmp_16_to_31.d[vfpv3_start - 16]; + while (tmp_count_2--) + *(dest++) = *(sp++); + } + + /* Skip the format word space if using FLDMX/FSTMX format. */ + if (representation == _UVRSD_VFPX) + sp++; + + /* Set the new stack pointer. */ + vrs->core.r[R_SP] = (_uw) sp; + + /* Reload the registers. */ + if (representation == _UVRSD_VFPX) + __gnu_Unwind_Restore_VFP (&tmp); + else + { + /* Restore registers 0 .. 15 if required. */ + if (start < 16) + __gnu_Unwind_Restore_VFP_D (&tmp); + + /* Restore VFPv3 registers 16 .. 31 if required. */ + if (num_vfpv3_regs > 0) + __gnu_Unwind_Restore_VFP_D_16_to_31 (&tmp_16_to_31); + } + } + return _UVRSR_OK; + + case _UVRSC_FPA: + return _UVRSR_NOT_IMPLEMENTED; + + case _UVRSC_WMMXD: + { + _uw start = discriminator >> 16; + _uw count = discriminator & 0xffff; + struct wmmxd_regs tmp; + _uw *sp; + _uw *dest; + + if ((representation != _UVRSD_UINT64) || start + count > 16) + return _UVRSR_FAILED; + + if (vrs->demand_save_flags & DEMAND_SAVE_WMMXD) + { + /* Demand-save resisters for stage1. */ + vrs->demand_save_flags &= ~DEMAND_SAVE_WMMXD; + __gnu_Unwind_Save_WMMXD (&vrs->wmmxd); + } + + /* Restore the registers from the stack. Do this by saving the + current WMMXD registers to a memory area, moving the in-memory + values into that area, and restoring from the whole area. */ + __gnu_Unwind_Save_WMMXD (&tmp); + + /* The stack address is only guaranteed to be word aligned, so + we can't use doubleword copies. */ + sp = (_uw *) vrs->core.r[R_SP]; + dest = (_uw *) &tmp.wd[start]; + count *= 2; + while (count--) + *(dest++) = *(sp++); + + /* Set the new stack pointer. */ + vrs->core.r[R_SP] = (_uw) sp; + + /* Reload the registers. */ + __gnu_Unwind_Restore_WMMXD (&tmp); + } + return _UVRSR_OK; + + case _UVRSC_WMMXC: + { + int i; + struct wmmxc_regs tmp; + _uw *sp; + + if ((representation != _UVRSD_UINT32) || discriminator > 16) + return _UVRSR_FAILED; + + if (vrs->demand_save_flags & DEMAND_SAVE_WMMXC) + { + /* Demand-save resisters for stage1. */ + vrs->demand_save_flags &= ~DEMAND_SAVE_WMMXC; + __gnu_Unwind_Save_WMMXC (&vrs->wmmxc); + } + + /* Restore the registers from the stack. Do this by saving the + current WMMXC registers to a memory area, moving the in-memory + values into that area, and restoring from the whole area. */ + __gnu_Unwind_Save_WMMXC (&tmp); + + sp = (_uw *) vrs->core.r[R_SP]; + for (i = 0; i < 4; i++) + if (discriminator & (1 << i)) + tmp.wc[i] = *(sp++); + + /* Set the new stack pointer. */ + vrs->core.r[R_SP] = (_uw) sp; + + /* Reload the registers. */ + __gnu_Unwind_Restore_WMMXC (&tmp); + } + return _UVRSR_OK; + + default: + return _UVRSR_FAILED; + } +} + + +/* Core unwinding functions. */ + +/* Calculate the address encoded by a 31-bit self-relative offset at address + P. */ +static inline _uw +selfrel_offset31 (const _uw *p) +{ + _uw offset; + + offset = *p; + /* Sign extend to 32 bits. */ + if (offset & (1 << 30)) + offset |= 1u << 31; + else + offset &= ~(1u << 31); + + return offset + (_uw) p; +} + + +/* Perform a binary search for RETURN_ADDRESS in TABLE. The table contains + NREC entries. */ + +static const __EIT_entry * +search_EIT_table (const __EIT_entry * table, int nrec, _uw return_address) +{ + _uw next_fn; + _uw this_fn; + int n, left, right; + + if (nrec == 0) + return (__EIT_entry *) 0; + + left = 0; + right = nrec - 1; + + while (1) + { + n = (left + right) / 2; + this_fn = selfrel_offset31 (&table[n].fnoffset); + if (n != nrec - 1) + next_fn = selfrel_offset31 (&table[n + 1].fnoffset) - 1; + else + next_fn = (_uw)0 - 1; + + if (return_address < this_fn) + { + if (n == left) + return (__EIT_entry *) 0; + right = n - 1; + } + else if (return_address <= next_fn) + return &table[n]; + else + left = n + 1; + } +} + +/* Find the exception index table eintry for the given address. + Fill in the relevant fields of the UCB. + Returns _URC_FAILURE if an error occurred, _URC_OK on success. */ + +static _Unwind_Reason_Code +get_eit_entry (_Unwind_Control_Block *ucbp, _uw return_address) +{ + const __EIT_entry * eitp; + int nrec; + + /* The return address is the address of the instruction following the + call instruction (plus one in thumb mode). If this was the last + instruction in the function the address will lie in the following + function. Subtract 2 from the address so that it points within the call + instruction itself. */ + return_address -= 2; + + if (__gnu_Unwind_Find_exidx) + { + eitp = (const __EIT_entry *) __gnu_Unwind_Find_exidx (return_address, + &nrec); + if (!eitp) + { + UCB_PR_ADDR (ucbp) = 0; + return _URC_FAILURE; + } + } + else + { + eitp = &__exidx_start; + nrec = &__exidx_end - &__exidx_start; + } + + eitp = search_EIT_table (eitp, nrec, return_address); + + if (!eitp) + { + UCB_PR_ADDR (ucbp) = 0; + return _URC_FAILURE; + } + ucbp->pr_cache.fnstart = selfrel_offset31 (&eitp->fnoffset); + + /* Can this frame be unwound at all? */ + if (eitp->content == EXIDX_CANTUNWIND) + { + UCB_PR_ADDR (ucbp) = 0; + return _URC_END_OF_STACK; + } + + /* Obtain the address of the "real" __EHT_Header word. */ + + if (eitp->content & uint32_highbit) + { + /* It is immediate data. */ + ucbp->pr_cache.ehtp = (_Unwind_EHT_Header *)&eitp->content; + ucbp->pr_cache.additional = 1; + } + else + { + /* The low 31 bits of the content field are a self-relative + offset to an _Unwind_EHT_Entry structure. */ + ucbp->pr_cache.ehtp = + (_Unwind_EHT_Header *) selfrel_offset31 (&eitp->content); + ucbp->pr_cache.additional = 0; + } + + /* Discover the personality routine address. */ + if (*ucbp->pr_cache.ehtp & (1u << 31)) + { + /* One of the predefined standard routines. */ + _uw idx = (*(_uw *) ucbp->pr_cache.ehtp >> 24) & 0xf; + if (idx == 0) + UCB_PR_ADDR (ucbp) = (_uw) &__aeabi_unwind_cpp_pr0; + else if (idx == 1) + UCB_PR_ADDR (ucbp) = (_uw) &__aeabi_unwind_cpp_pr1; + else if (idx == 2) + UCB_PR_ADDR (ucbp) = (_uw) &__aeabi_unwind_cpp_pr2; + else + { /* Failed */ + UCB_PR_ADDR (ucbp) = 0; + return _URC_FAILURE; + } + } + else + { + /* Execute region offset to PR */ + UCB_PR_ADDR (ucbp) = selfrel_offset31 (ucbp->pr_cache.ehtp); + } + return _URC_OK; +} + + +/* Perform phase2 unwinding. VRS is the initial virtual register state. */ + +static void __attribute__((noreturn)) +unwind_phase2 (_Unwind_Control_Block * ucbp, phase2_vrs * vrs) +{ + _Unwind_Reason_Code pr_result; + + do + { + /* Find the entry for this routine. */ + if (get_eit_entry (ucbp, vrs->core.r[R_PC]) != _URC_OK) + abort (); + + UCB_SAVED_CALLSITE_ADDR (ucbp) = vrs->core.r[R_PC]; + + /* Call the pr to decide what to do. */ + pr_result = ((personality_routine) UCB_PR_ADDR (ucbp)) + (_US_UNWIND_FRAME_STARTING, ucbp, (_Unwind_Context *) vrs); + } + while (pr_result == _URC_CONTINUE_UNWIND); + + if (pr_result != _URC_INSTALL_CONTEXT) + abort(); + + restore_core_regs (&vrs->core); +} + +/* Perform phase2 forced unwinding. */ + +static _Unwind_Reason_Code +unwind_phase2_forced (_Unwind_Control_Block *ucbp, phase2_vrs *entry_vrs, + int resuming) +{ + _Unwind_Stop_Fn stop_fn = (_Unwind_Stop_Fn) UCB_FORCED_STOP_FN (ucbp); + void *stop_arg = (void *)UCB_FORCED_STOP_ARG (ucbp); + _Unwind_Reason_Code pr_result = 0; + /* We use phase1_vrs here even though we do not demand save, for the + prev_sp field. */ + phase1_vrs saved_vrs, next_vrs; + + /* Save the core registers. */ + saved_vrs.core = entry_vrs->core; + /* We don't need to demand-save the non-core registers, because we + unwind in a single pass. */ + saved_vrs.demand_save_flags = 0; + + /* Unwind until we reach a propagation barrier. */ + do + { + _Unwind_State action; + _Unwind_Reason_Code entry_code; + _Unwind_Reason_Code stop_code; + + /* Find the entry for this routine. */ + entry_code = get_eit_entry (ucbp, saved_vrs.core.r[R_PC]); + + if (resuming) + { + action = _US_UNWIND_FRAME_RESUME | _US_FORCE_UNWIND; + resuming = 0; + } + else + action = _US_UNWIND_FRAME_STARTING | _US_FORCE_UNWIND; + + if (entry_code == _URC_OK) + { + UCB_SAVED_CALLSITE_ADDR (ucbp) = saved_vrs.core.r[R_PC]; + + next_vrs = saved_vrs; + + /* Call the pr to decide what to do. */ + pr_result = ((personality_routine) UCB_PR_ADDR (ucbp)) + (action, ucbp, (void *) &next_vrs); + + saved_vrs.prev_sp = next_vrs.core.r[R_SP]; + } + else + { + /* Treat any failure as the end of unwinding, to cope more + gracefully with missing EH information. Mixed EH and + non-EH within one object will usually result in failure, + because the .ARM.exidx tables do not indicate the end + of the code to which they apply; but mixed EH and non-EH + shared objects should return an unwind failure at the + entry of a non-EH shared object. */ + action |= _US_END_OF_STACK; + + saved_vrs.prev_sp = saved_vrs.core.r[R_SP]; + } + + stop_code = stop_fn (1, action, ucbp->exception_class, ucbp, + (void *)&saved_vrs, stop_arg); + if (stop_code != _URC_NO_REASON) + return _URC_FAILURE; + + if (entry_code != _URC_OK) + return entry_code; + + saved_vrs = next_vrs; + } + while (pr_result == _URC_CONTINUE_UNWIND); + + if (pr_result != _URC_INSTALL_CONTEXT) + { + /* Some sort of failure has occurred in the pr and probably the + pr returned _URC_FAILURE. */ + return _URC_FAILURE; + } + + restore_core_regs (&saved_vrs.core); +} + +/* This is a very limited implementation of _Unwind_GetCFA. It returns + the stack pointer as it is about to be unwound, and is only valid + while calling the stop function during forced unwinding. If the + current personality routine result is going to run a cleanup, this + will not be the CFA; but when the frame is really unwound, it will + be. */ + +_Unwind_Word +_Unwind_GetCFA (_Unwind_Context *context) +{ + return ((phase1_vrs *) context)->prev_sp; +} + +/* Perform phase1 unwinding. UCBP is the exception being thrown, and + entry_VRS is the register state on entry to _Unwind_RaiseException. */ + +_Unwind_Reason_Code +__gnu_Unwind_RaiseException (_Unwind_Control_Block *, phase2_vrs *); + +_Unwind_Reason_Code +__gnu_Unwind_RaiseException (_Unwind_Control_Block * ucbp, + phase2_vrs * entry_vrs) +{ + phase1_vrs saved_vrs; + _Unwind_Reason_Code pr_result; + + /* Set the pc to the call site. */ + entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR]; + + /* Save the core registers. */ + saved_vrs.core = entry_vrs->core; + /* Set demand-save flags. */ + saved_vrs.demand_save_flags = ~(_uw) 0; + + /* Unwind until we reach a propagation barrier. */ + do + { + /* Find the entry for this routine. */ + if (get_eit_entry (ucbp, saved_vrs.core.r[R_PC]) != _URC_OK) + return _URC_FAILURE; + + /* Call the pr to decide what to do. */ + pr_result = ((personality_routine) UCB_PR_ADDR (ucbp)) + (_US_VIRTUAL_UNWIND_FRAME, ucbp, (void *) &saved_vrs); + } + while (pr_result == _URC_CONTINUE_UNWIND); + + /* We've unwound as far as we want to go, so restore the original + register state. */ + restore_non_core_regs (&saved_vrs); + if (pr_result != _URC_HANDLER_FOUND) + { + /* Some sort of failure has occurred in the pr and probably the + pr returned _URC_FAILURE. */ + return _URC_FAILURE; + } + + unwind_phase2 (ucbp, entry_vrs); +} + +/* Resume unwinding after a cleanup has been run. UCBP is the exception + being thrown and ENTRY_VRS is the register state on entry to + _Unwind_Resume. */ +_Unwind_Reason_Code +__gnu_Unwind_ForcedUnwind (_Unwind_Control_Block *, + _Unwind_Stop_Fn, void *, phase2_vrs *); + +_Unwind_Reason_Code +__gnu_Unwind_ForcedUnwind (_Unwind_Control_Block *ucbp, + _Unwind_Stop_Fn stop_fn, void *stop_arg, + phase2_vrs *entry_vrs) +{ + UCB_FORCED_STOP_FN (ucbp) = (_uw) stop_fn; + UCB_FORCED_STOP_ARG (ucbp) = (_uw) stop_arg; + + /* Set the pc to the call site. */ + entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR]; + + return unwind_phase2_forced (ucbp, entry_vrs, 0); +} + +_Unwind_Reason_Code +__gnu_Unwind_Resume (_Unwind_Control_Block *, phase2_vrs *); + +_Unwind_Reason_Code +__gnu_Unwind_Resume (_Unwind_Control_Block * ucbp, phase2_vrs * entry_vrs) +{ + _Unwind_Reason_Code pr_result; + + /* Recover the saved address. */ + entry_vrs->core.r[R_PC] = UCB_SAVED_CALLSITE_ADDR (ucbp); + + if (UCB_FORCED_STOP_FN (ucbp)) + { + unwind_phase2_forced (ucbp, entry_vrs, 1); + + /* We can't return failure at this point. */ + abort (); + } + + /* Call the cached PR. */ + pr_result = ((personality_routine) UCB_PR_ADDR (ucbp)) + (_US_UNWIND_FRAME_RESUME, ucbp, (_Unwind_Context *) entry_vrs); + + switch (pr_result) + { + case _URC_INSTALL_CONTEXT: + /* Upload the registers to enter the landing pad. */ + restore_core_regs (&entry_vrs->core); + + case _URC_CONTINUE_UNWIND: + /* Continue unwinding the next frame. */ + unwind_phase2 (ucbp, entry_vrs); + + default: + abort (); + } +} + +_Unwind_Reason_Code +__gnu_Unwind_Resume_or_Rethrow (_Unwind_Control_Block *, phase2_vrs *); + +_Unwind_Reason_Code +__gnu_Unwind_Resume_or_Rethrow (_Unwind_Control_Block * ucbp, + phase2_vrs * entry_vrs) +{ + if (!UCB_FORCED_STOP_FN (ucbp)) + return __gnu_Unwind_RaiseException (ucbp, entry_vrs); + + /* Set the pc to the call site. */ + entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR]; + /* Continue unwinding the next frame. */ + return unwind_phase2_forced (ucbp, entry_vrs, 0); +} + +/* Clean up an exception object when unwinding is complete. */ +void +_Unwind_Complete (_Unwind_Control_Block * ucbp __attribute__((unused))) +{ +} + + +/* Get the _Unwind_Control_Block from an _Unwind_Context. */ + +static inline _Unwind_Control_Block * +unwind_UCB_from_context (_Unwind_Context * context) +{ + return (_Unwind_Control_Block *) _Unwind_GetGR (context, R_IP); +} + + +/* Free an exception. */ + +void +_Unwind_DeleteException (_Unwind_Exception * exc) +{ + if (exc->exception_cleanup) + (*exc->exception_cleanup) (_URC_FOREIGN_EXCEPTION_CAUGHT, exc); +} + + +/* Perform stack backtrace through unwind data. */ +_Unwind_Reason_Code +__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument, + phase2_vrs * entry_vrs); +_Unwind_Reason_Code +__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument, + phase2_vrs * entry_vrs) +{ + phase1_vrs saved_vrs; + _Unwind_Reason_Code code; + + _Unwind_Control_Block ucb; + _Unwind_Control_Block *ucbp = &ucb; + + /* Set the pc to the call site. */ + entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR]; + + /* Save the core registers. */ + saved_vrs.core = entry_vrs->core; + /* Set demand-save flags. */ + saved_vrs.demand_save_flags = ~(_uw) 0; + + do + { + /* Find the entry for this routine. */ + if (get_eit_entry (ucbp, saved_vrs.core.r[R_PC]) != _URC_OK) + { + code = _URC_FAILURE; + break; + } + + /* The dwarf unwinder assumes the context structure holds things + like the function and LSDA pointers. The ARM implementation + caches these in the exception header (UCB). To avoid + rewriting everything we make the virtual IP register point at + the UCB. */ + _Unwind_SetGR((_Unwind_Context *)&saved_vrs, 12, (_Unwind_Ptr) ucbp); + + /* Call trace function. */ + if ((*trace) ((_Unwind_Context *) &saved_vrs, trace_argument) + != _URC_NO_REASON) + { + code = _URC_FAILURE; + break; + } + + /* Call the pr to decide what to do. */ + code = ((personality_routine) UCB_PR_ADDR (ucbp)) + (_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, + ucbp, (void *) &saved_vrs); + } + while (code != _URC_END_OF_STACK + && code != _URC_FAILURE); + + restore_non_core_regs (&saved_vrs); + return code; +} + + +/* Common implementation for ARM ABI defined personality routines. + ID is the index of the personality routine, other arguments are as defined + by __aeabi_unwind_cpp_pr{0,1,2}. */ + +static _Unwind_Reason_Code +__gnu_unwind_pr_common (_Unwind_State state, + _Unwind_Control_Block *ucbp, + _Unwind_Context *context, + int id) +{ + __gnu_unwind_state uws; + _uw *data; + _uw offset; + _uw len; + _uw rtti_count; + int phase2_call_unexpected_after_unwind = 0; + int in_range = 0; + int forced_unwind = state & _US_FORCE_UNWIND; + + state &= _US_ACTION_MASK; + + data = (_uw *) ucbp->pr_cache.ehtp; + uws.data = *(data++); + uws.next = data; + if (id == 0) + { + uws.data <<= 8; + uws.words_left = 0; + uws.bytes_left = 3; + } + else + { + uws.words_left = (uws.data >> 16) & 0xff; + uws.data <<= 16; + uws.bytes_left = 2; + data += uws.words_left; + } + + /* Restore the saved pointer. */ + if (state == _US_UNWIND_FRAME_RESUME) + data = (_uw *) ucbp->cleanup_cache.bitpattern[0]; + + if ((ucbp->pr_cache.additional & 1) == 0) + { + /* Process descriptors. */ + while (*data) + { + _uw addr; + _uw fnstart; + + if (id == 2) + { + len = ((EHT32 *) data)->length; + offset = ((EHT32 *) data)->offset; + data += 2; + } + else + { + len = ((EHT16 *) data)->length; + offset = ((EHT16 *) data)->offset; + data++; + } + + fnstart = ucbp->pr_cache.fnstart + (offset & ~1); + addr = _Unwind_GetGR (context, R_PC); + in_range = (fnstart <= addr && addr < fnstart + (len & ~1)); + + switch (((offset & 1) << 1) | (len & 1)) + { + case 0: + /* Cleanup. */ + if (state != _US_VIRTUAL_UNWIND_FRAME + && in_range) + { + /* Cleanup in range, and we are running cleanups. */ + _uw lp; + + /* Landing pad address is 31-bit pc-relative offset. */ + lp = selfrel_offset31 (data); + data++; + /* Save the exception data pointer. */ + ucbp->cleanup_cache.bitpattern[0] = (_uw) data; + if (!__cxa_begin_cleanup (ucbp)) + return _URC_FAILURE; + /* Setup the VRS to enter the landing pad. */ + _Unwind_SetGR (context, R_PC, lp); + return _URC_INSTALL_CONTEXT; + } + /* Cleanup not in range, or we are in stage 1. */ + data++; + break; + + case 1: + /* Catch handler. */ + if (state == _US_VIRTUAL_UNWIND_FRAME) + { + if (in_range) + { + /* Check for a barrier. */ + _uw rtti; + bool is_reference = (data[0] & uint32_highbit) != 0; + void *matched; + + /* Check for no-throw areas. */ + if (data[1] == (_uw) -2) + return _URC_FAILURE; + + /* The thrown object immediately follows the ECB. */ + matched = (void *)(ucbp + 1); + if (data[1] != (_uw) -1) + { + /* Match a catch specification. */ + rtti = _Unwind_decode_target2 ((_uw) &data[1]); + if (!__cxa_type_match (ucbp, (type_info *) rtti, + is_reference, + &matched)) + matched = (void *)0; + } + + if (matched) + { + ucbp->barrier_cache.sp = + _Unwind_GetGR (context, R_SP); + ucbp->barrier_cache.bitpattern[0] = (_uw) matched; + ucbp->barrier_cache.bitpattern[1] = (_uw) data; + return _URC_HANDLER_FOUND; + } + } + /* Handler out of range, or not matched. */ + } + else if (ucbp->barrier_cache.sp == _Unwind_GetGR (context, R_SP) + && ucbp->barrier_cache.bitpattern[1] == (_uw) data) + { + /* Matched a previous propagation barrier. */ + _uw lp; + + /* Setup for entry to the handler. */ + lp = selfrel_offset31 (data); + _Unwind_SetGR (context, R_PC, lp); + _Unwind_SetGR (context, 0, (_uw) ucbp); + return _URC_INSTALL_CONTEXT; + } + /* Catch handler not matched. Advance to the next descriptor. */ + data += 2; + break; + + case 2: + rtti_count = data[0] & 0x7fffffff; + /* Exception specification. */ + if (state == _US_VIRTUAL_UNWIND_FRAME) + { + if (in_range && (!forced_unwind || !rtti_count)) + { + /* Match against the exception specification. */ + _uw i; + _uw rtti; + void *matched; + + for (i = 0; i < rtti_count; i++) + { + matched = (void *)(ucbp + 1); + rtti = _Unwind_decode_target2 ((_uw) &data[i + 1]); + if (__cxa_type_match (ucbp, (type_info *) rtti, 0, + &matched)) + break; + } + + if (i == rtti_count) + { + /* Exception does not match the spec. */ + ucbp->barrier_cache.sp = + _Unwind_GetGR (context, R_SP); + ucbp->barrier_cache.bitpattern[0] = (_uw) matched; + ucbp->barrier_cache.bitpattern[1] = (_uw) data; + return _URC_HANDLER_FOUND; + } + } + /* Handler out of range, or exception is permitted. */ + } + else if (ucbp->barrier_cache.sp == _Unwind_GetGR (context, R_SP) + && ucbp->barrier_cache.bitpattern[1] == (_uw) data) + { + /* Matched a previous propagation barrier. */ + _uw lp; + /* Record the RTTI list for __cxa_call_unexpected. */ + ucbp->barrier_cache.bitpattern[1] = rtti_count; + ucbp->barrier_cache.bitpattern[2] = 0; + ucbp->barrier_cache.bitpattern[3] = 4; + ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1]; + + if (data[0] & uint32_highbit) + phase2_call_unexpected_after_unwind = 1; + else + { + data += rtti_count + 1; + /* Setup for entry to the handler. */ + lp = selfrel_offset31 (data); + data++; + _Unwind_SetGR (context, R_PC, lp); + _Unwind_SetGR (context, 0, (_uw) ucbp); + return _URC_INSTALL_CONTEXT; + } + } + if (data[0] & uint32_highbit) + data++; + data += rtti_count + 1; + break; + + default: + /* Should never happen. */ + return _URC_FAILURE; + } + /* Finished processing this descriptor. */ + } + } + + if (__gnu_unwind_execute (context, &uws) != _URC_OK) + return _URC_FAILURE; + + if (phase2_call_unexpected_after_unwind) + { + /* Enter __cxa_unexpected as if called from the call site. */ + _Unwind_SetGR (context, R_LR, _Unwind_GetGR (context, R_PC)); + _Unwind_SetGR (context, R_PC, (_uw) &__cxa_call_unexpected); + return _URC_INSTALL_CONTEXT; + } + + return _URC_CONTINUE_UNWIND; +} + + +/* ABI defined personality routine entry points. */ + +_Unwind_Reason_Code +__aeabi_unwind_cpp_pr0 (_Unwind_State state, + _Unwind_Control_Block *ucbp, + _Unwind_Context *context) +{ + return __gnu_unwind_pr_common (state, ucbp, context, 0); +} + +_Unwind_Reason_Code +__aeabi_unwind_cpp_pr1 (_Unwind_State state, + _Unwind_Control_Block *ucbp, + _Unwind_Context *context) +{ + return __gnu_unwind_pr_common (state, ucbp, context, 1); +} + +_Unwind_Reason_Code +__aeabi_unwind_cpp_pr2 (_Unwind_State state, + _Unwind_Control_Block *ucbp, + _Unwind_Context *context) +{ + return __gnu_unwind_pr_common (state, ucbp, context, 2); +} diff --git a/gcc/config/arm/unwind-arm.h b/gcc/config/arm/unwind-arm.h new file mode 100644 index 000000000..a9ba1267a --- /dev/null +++ b/gcc/config/arm/unwind-arm.h @@ -0,0 +1,281 @@ +/* Header file for the ARM EABI unwinder + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 + Free Software Foundation, Inc. + Contributed by Paul Brook + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Language-independent unwinder header public defines. This contains both + ABI defined objects, and GNU support routines. */ + +#ifndef UNWIND_ARM_H +#define UNWIND_ARM_H + +#define __ARM_EABI_UNWINDER__ 1 + +#ifdef __cplusplus +extern "C" { +#endif + typedef unsigned _Unwind_Word __attribute__((__mode__(__word__))); + typedef signed _Unwind_Sword __attribute__((__mode__(__word__))); + typedef unsigned _Unwind_Ptr __attribute__((__mode__(__pointer__))); + typedef unsigned _Unwind_Internal_Ptr __attribute__((__mode__(__pointer__))); + typedef _Unwind_Word _uw; + typedef unsigned _uw64 __attribute__((mode(__DI__))); + typedef unsigned _uw16 __attribute__((mode(__HI__))); + typedef unsigned _uw8 __attribute__((mode(__QI__))); + + typedef enum + { + _URC_OK = 0, /* operation completed successfully */ + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8, + _URC_FAILURE = 9 /* unspecified failure of some kind */ + } + _Unwind_Reason_Code; + + typedef enum + { + _US_VIRTUAL_UNWIND_FRAME = 0, + _US_UNWIND_FRAME_STARTING = 1, + _US_UNWIND_FRAME_RESUME = 2, + _US_ACTION_MASK = 3, + _US_FORCE_UNWIND = 8, + _US_END_OF_STACK = 16 + } + _Unwind_State; + + /* Provided only for for compatibility with existing code. */ + typedef int _Unwind_Action; +#define _UA_SEARCH_PHASE 1 +#define _UA_CLEANUP_PHASE 2 +#define _UA_HANDLER_FRAME 4 +#define _UA_FORCE_UNWIND 8 +#define _UA_END_OF_STACK 16 +#define _URC_NO_REASON _URC_OK + + typedef struct _Unwind_Control_Block _Unwind_Control_Block; + typedef struct _Unwind_Context _Unwind_Context; + typedef _uw _Unwind_EHT_Header; + + + /* UCB: */ + + struct _Unwind_Control_Block + { + char exception_class[8]; + void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *); + /* Unwinder cache, private fields for the unwinder's use */ + struct + { + _uw reserved1; /* Forced unwind stop fn, 0 if not forced */ + _uw reserved2; /* Personality routine address */ + _uw reserved3; /* Saved callsite address */ + _uw reserved4; /* Forced unwind stop arg */ + _uw reserved5; + } + unwinder_cache; + /* Propagation barrier cache (valid after phase 1): */ + struct + { + _uw sp; + _uw bitpattern[5]; + } + barrier_cache; + /* Cleanup cache (preserved over cleanup): */ + struct + { + _uw bitpattern[4]; + } + cleanup_cache; + /* Pr cache (for pr's benefit): */ + struct + { + _uw fnstart; /* function start address */ + _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */ + _uw additional; /* additional data */ + _uw reserved1; + } + pr_cache; + long long int :0; /* Force alignment to 8-byte boundary */ + }; + + /* Virtual Register Set*/ + + typedef enum + { + _UVRSC_CORE = 0, /* integer register */ + _UVRSC_VFP = 1, /* vfp */ + _UVRSC_FPA = 2, /* fpa */ + _UVRSC_WMMXD = 3, /* Intel WMMX data register */ + _UVRSC_WMMXC = 4 /* Intel WMMX control register */ + } + _Unwind_VRS_RegClass; + + typedef enum + { + _UVRSD_UINT32 = 0, + _UVRSD_VFPX = 1, + _UVRSD_FPAX = 2, + _UVRSD_UINT64 = 3, + _UVRSD_FLOAT = 4, + _UVRSD_DOUBLE = 5 + } + _Unwind_VRS_DataRepresentation; + + typedef enum + { + _UVRSR_OK = 0, + _UVRSR_NOT_IMPLEMENTED = 1, + _UVRSR_FAILED = 2 + } + _Unwind_VRS_Result; + + /* Frame unwinding state. */ + typedef struct + { + /* The current word (bytes packed msb first). */ + _uw data; + /* Pointer to the next word of data. */ + _uw *next; + /* The number of bytes left in this word. */ + _uw8 bytes_left; + /* The number of words pointed to by ptr. */ + _uw8 words_left; + } + __gnu_unwind_state; + + typedef _Unwind_Reason_Code (*personality_routine) (_Unwind_State, + _Unwind_Control_Block *, _Unwind_Context *); + + _Unwind_VRS_Result _Unwind_VRS_Set(_Unwind_Context *, _Unwind_VRS_RegClass, + _uw, _Unwind_VRS_DataRepresentation, + void *); + + _Unwind_VRS_Result _Unwind_VRS_Get(_Unwind_Context *, _Unwind_VRS_RegClass, + _uw, _Unwind_VRS_DataRepresentation, + void *); + + _Unwind_VRS_Result _Unwind_VRS_Pop(_Unwind_Context *, _Unwind_VRS_RegClass, + _uw, _Unwind_VRS_DataRepresentation); + + + /* Support functions for the PR. */ +#define _Unwind_Exception _Unwind_Control_Block + typedef char _Unwind_Exception_Class[8]; + + void * _Unwind_GetLanguageSpecificData (_Unwind_Context *); + _Unwind_Ptr _Unwind_GetRegionStart (_Unwind_Context *); + + /* These two should never be used. */ + _Unwind_Ptr _Unwind_GetDataRelBase (_Unwind_Context *); + _Unwind_Ptr _Unwind_GetTextRelBase (_Unwind_Context *); + + /* Interface functions: */ + _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Control_Block *ucbp); + void __attribute__((noreturn)) _Unwind_Resume(_Unwind_Control_Block *ucbp); + _Unwind_Reason_Code _Unwind_Resume_or_Rethrow (_Unwind_Control_Block *ucbp); + + typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) + (int, _Unwind_Action, _Unwind_Exception_Class, + _Unwind_Control_Block *, struct _Unwind_Context *, void *); + _Unwind_Reason_Code _Unwind_ForcedUnwind (_Unwind_Control_Block *, + _Unwind_Stop_Fn, void *); + /* @@@ Use unwind data to perform a stack backtrace. The trace callback + is called for every stack frame in the call chain, but no cleanup + actions are performed. */ + typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn) (_Unwind_Context *, void *); + _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, + void*); + + _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *); + void _Unwind_Complete(_Unwind_Control_Block *ucbp); + void _Unwind_DeleteException (_Unwind_Exception *); + + _Unwind_Reason_Code __gnu_unwind_frame (_Unwind_Control_Block *, + _Unwind_Context *); + _Unwind_Reason_Code __gnu_unwind_execute (_Unwind_Context *, + __gnu_unwind_state *); + + /* Decode an R_ARM_TARGET2 relocation. */ + static inline _Unwind_Word + _Unwind_decode_target2 (_Unwind_Word ptr) + { + _Unwind_Word tmp; + + tmp = *(_Unwind_Word *) ptr; + /* Zero values are always NULL. */ + if (!tmp) + return 0; + +#if (defined(linux) && !defined(__uClinux__)) || defined(__NetBSD__) + /* Pc-relative indirect. */ + tmp += ptr; + tmp = *(_Unwind_Word *) tmp; +#elif defined(__symbian__) || defined(__uClinux__) + /* Absolute pointer. Nothing more to do. */ +#else + /* Pc-relative pointer. */ + tmp += ptr; +#endif + return tmp; + } + + static inline _Unwind_Word + _Unwind_GetGR (_Unwind_Context *context, int regno) + { + _uw val; + _Unwind_VRS_Get (context, _UVRSC_CORE, regno, _UVRSD_UINT32, &val); + return val; + } + + /* Return the address of the instruction, not the actual IP value. */ +#define _Unwind_GetIP(context) \ + (_Unwind_GetGR (context, 15) & ~(_Unwind_Word)1) + +#define _Unwind_GetIPInfo(context, ip_before_insn) \ + (*ip_before_insn = 0, _Unwind_GetGR (context, 15) & ~(_Unwind_Word)1) + + static inline void + _Unwind_SetGR (_Unwind_Context *context, int regno, _Unwind_Word val) + { + _Unwind_VRS_Set (context, _UVRSC_CORE, regno, _UVRSD_UINT32, &val); + } + + /* The dwarf unwinder doesn't understand arm/thumb state. We assume the + landing pad uses the same instruction set as the call site. */ +#define _Unwind_SetIP(context, val) \ + _Unwind_SetGR (context, 15, val | (_Unwind_GetGR (context, 15) & 1)) + +/* leb128 type numbers have a potentially unlimited size. + The target of the following definitions of _sleb128_t and _uleb128_t + is to have efficient data types large enough to hold the leb128 type + numbers used in the unwind code. */ +typedef long _sleb128_t; +typedef unsigned long _uleb128_t; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* defined UNWIND_ARM_H */ diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md new file mode 100644 index 000000000..c27c41411 --- /dev/null +++ b/gcc/config/arm/vec-common.md @@ -0,0 +1,110 @@ +;; Machine Description for shared bits common to IWMMXT and Neon. +;; Copyright (C) 2006, 2007, 2010 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Vector Moves + +(define_expand "mov" + [(set (match_operand:VALL 0 "nonimmediate_operand" "") + (match_operand:VALL 1 "general_operand" ""))] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (mode, operands[1]); + else if (TARGET_NEON && CONSTANT_P (operands[1])) + { + operands[1] = neon_make_constant (operands[1]); + gcc_assert (operands[1] != NULL_RTX); + } + } +}) + +;; Vector arithmetic. Expanders are blank, then unnamed insns implement +;; patterns separately for IWMMXT and Neon. + +(define_expand "add3" + [(set (match_operand:VALL 0 "s_register_operand" "") + (plus:VALL (match_operand:VALL 1 "s_register_operand" "") + (match_operand:VALL 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "sub3" + [(set (match_operand:VALL 0 "s_register_operand" "") + (minus:VALL (match_operand:VALL 1 "s_register_operand" "") + (match_operand:VALL 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "mul3" + [(set (match_operand:VALLW 0 "s_register_operand" "") + (mult:VALLW (match_operand:VALLW 1 "s_register_operand" "") + (match_operand:VALLW 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (mode == V4HImode && TARGET_REALLY_IWMMXT)" +{ +}) + +(define_expand "smin3" + [(set (match_operand:VALLW 0 "s_register_operand" "") + (smin:VALLW (match_operand:VALLW 1 "s_register_operand" "") + (match_operand:VALLW 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "umin3" + [(set (match_operand:VINTW 0 "s_register_operand" "") + (umin:VINTW (match_operand:VINTW 1 "s_register_operand" "") + (match_operand:VINTW 2 "s_register_operand" "")))] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "smax3" + [(set (match_operand:VALLW 0 "s_register_operand" "") + (smax:VALLW (match_operand:VALLW 1 "s_register_operand" "") + (match_operand:VALLW 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "umax3" + [(set (match_operand:VINTW 0 "s_register_operand" "") + (umax:VINTW (match_operand:VINTW 1 "s_register_operand" "") + (match_operand:VINTW 2 "s_register_operand" "")))] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md new file mode 100644 index 000000000..1ac2d0c2d --- /dev/null +++ b/gcc/config/arm/vfp.md @@ -0,0 +1,1153 @@ +;; ARM VFP instruction patterns +;; Copyright (C) 2003, 2005, 2006, 2007, 2008, 2010 +;; Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; Additional register numbers +(define_constants + [(VFPCC_REGNUM 127)] +) + +;; The VFP "type" attributes differ from those used in the FPA model. +;; fcpys Single precision cpy. +;; ffariths Single precision abs, neg. +;; ffarithd Double precision abs, neg, cpy. +;; fadds Single precision add/sub. +;; faddd Double precision add/sub. +;; fconsts Single precision load immediate. +;; fconstd Double precision load immediate. +;; fcmps Single precision comparison. +;; fcmpd Double precision comparison. +;; fmuls Single precision multiply. +;; fmuld Double precision multiply. +;; fmacs Single precision multiply-accumulate. +;; fmacd Double precision multiply-accumulate. +;; fdivs Single precision sqrt or division. +;; fdivd Double precision sqrt or division. +;; f_flag fmstat operation +;; f_load[sd] Floating point load from memory. +;; f_store[sd] Floating point store to memory. +;; f_2_r Transfer vfp to arm reg. +;; r_2_f Transfer arm to vfp reg. +;; f_cvt Convert floating<->integral + +;; SImode moves +;; ??? For now do not allow loading constants into vfp regs. This causes +;; problems because small constants get converted into adds. +(define_insn "*arm_movsi_vfp" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))] + "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT + && ( s_register_operand (operands[0], SImode) + || s_register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: case 1: + return \"mov%?\\t%0, %1\"; + case 2: + return \"mvn%?\\t%0, #%B1\"; + case 3: + return \"movw%?\\t%0, %1\"; + case 4: + return \"ldr%?\\t%0, %1\"; + case 5: + return \"str%?\\t%1, %0\"; + case 6: + return \"fmsr%?\\t%0, %1\\t%@ int\"; + case 7: + return \"fmrs%?\\t%0, %1\\t%@ int\"; + case 8: + return \"fcpys%?\\t%0, %1\\t%@ int\"; + case 9: case 10: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*") + (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] +) + +;; See thumb2.md:thumb2_movsi_insn for an explanation of the split +;; high/low register alternatives for loads and stores here. +(define_insn "*thumb2_movsi_vfp" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] + "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT + && ( s_register_operand (operands[0], SImode) + || s_register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: case 1: + return \"mov%?\\t%0, %1\"; + case 2: + return \"mvn%?\\t%0, #%B1\"; + case 3: + return \"movw%?\\t%0, %1\"; + case 4: + case 5: + return \"ldr%?\\t%0, %1\"; + case 6: + case 7: + return \"str%?\\t%1, %0\"; + case 8: + return \"fmsr%?\\t%0, %1\\t%@ int\"; + case 9: + return \"fmrs%?\\t%0, %1\\t%@ int\"; + case 10: + return \"fcpys%?\\t%0, %1\\t%@ int\"; + case 11: case 12: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") + (set_attr "pool_range" "*,*,*,*,1020,4096,*,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] +) + + +;; DImode moves + +(define_insn "*arm_movdi_vfp" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv") + (match_operand:DI 1 "di_operand" "rIK,mi,r,r,w,w,Uvi,w"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + switch (which_alternative) + { + case 0: + return \"#\"; + case 1: + case 2: + return output_move_double (operands); + case 3: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 4: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 5: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 6: case 7: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) + (eq_attr "alternative" "5") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") + (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,1020,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")] +) + +(define_insn "*thumb2_movdi_vfp" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv") + (match_operand:DI 1 "di_operand" "rIK,mi,r,r,w,w,Uvi,w"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "* + switch (which_alternative) + { + case 0: case 1: case 2: + return (output_move_double (operands)); + case 3: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 4: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 5: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 6: case 7: + return output_move_vfp (operands); + default: + abort (); + } + " + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) + (eq_attr "alternative" "5") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") + (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "pool_range" "*,4096,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")] +) + +;; HFmode moves +(define_insn "*movhf_vfp_neon" + [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* S register from memory */ + return \"vld1.16\\t{%z0}, %A1\"; + case 1: /* memory from S register */ + return \"vst1.16\\t{%z1}, %A0\"; + case 2: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 3: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 4: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 5: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 6: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 7: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 8: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*") + (set_attr "length" "4,4,4,4,4,4,4,4,8")] +) + +;; FP16 without element load/store instructions. +(define_insn "*movhf_vfp" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 2: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 3: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 4: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 5: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 6: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "length" "4,4,4,4,4,4,8")] +) + + +;; SFmode moves +;; Disparage the w<->r cases because reloading an invalid address is +;; preferable to loading the value via integer registers. + +(define_insn "*movsf_vfp" + [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t ,Uv,r ,m,t,r") + (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( s_register_operand (operands[0], SFmode) + || s_register_operand (operands[1], SFmode))" + "* + switch (which_alternative) + { + case 0: + return \"fmsr%?\\t%0, %1\"; + case 1: + return \"fmrs%?\\t%0, %1\"; + case 2: + return \"fconsts%?\\t%0, #%G1\"; + case 3: case 4: + return output_move_vfp (operands); + case 5: + return \"ldr%?\\t%0, %1\\t%@ float\"; + case 6: + return \"str%?\\t%1, %0\\t%@ float\"; + case 7: + return \"fcpys%?\\t%0, %1\"; + case 8: + return \"mov%?\\t%0, %1\\t%@ float\"; + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" + "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + (set_attr "insn" "*,*,*,*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] +) + +(define_insn "*thumb2_movsf_vfp" + [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t ,Uv,r ,m,t,r") + (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP + && ( s_register_operand (operands[0], SFmode) + || s_register_operand (operands[1], SFmode))" + "* + switch (which_alternative) + { + case 0: + return \"fmsr%?\\t%0, %1\"; + case 1: + return \"fmrs%?\\t%0, %1\"; + case 2: + return \"fconsts%?\\t%0, #%G1\"; + case 3: case 4: + return output_move_vfp (operands); + case 5: + return \"ldr%?\\t%0, %1\\t%@ float\"; + case 6: + return \"str%?\\t%1, %0\\t%@ float\"; + case 7: + return \"fcpys%?\\t%0, %1\"; + case 8: + return \"mov%?\\t%0, %1\\t%@ float\"; + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" + "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + (set_attr "insn" "*,*,*,*,*,*,*,*,mov") + (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] +) + + +;; DFmode moves + +(define_insn "*movdf_vfp" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + case 0: + return \"fmdrr%?\\t%P0, %Q1, %R1\"; + case 1: + return \"fmrrd%?\\t%Q0, %R0, %P1\"; + case 2: + gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: + return output_move_double (operands); + case 5: case 6: + return output_move_vfp (operands); + case 7: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; + else + return \"fcpyd%?\\t%P0, %P1\"; + case 8: + return \"#\"; + default: + gcc_unreachable (); + } + } + " + [(set_attr "type" + "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") + (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") + (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")] +) + +(define_insn "*thumb2_movdf_vfp" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w ,Uv,w,r") + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,mF,r,UvF,w, w,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "* + { + switch (which_alternative) + { + case 0: + return \"fmdrr%?\\t%P0, %Q1, %R1\"; + case 1: + return \"fmrrd%?\\t%Q0, %R0, %P1\"; + case 2: + gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: case 8: + return output_move_double (operands); + case 5: case 6: + return output_move_vfp (operands); + case 7: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; + else + return \"fcpyd%?\\t%P0, %P1\"; + default: + abort (); + } + } + " + [(set_attr "type" + "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*") + (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") + (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*") + (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")] +) + + +;; Conditional move patterns + +(define_insn "*movsfcc_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r") + (if_then_else:SF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") + (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fcpys%D3\\t%0, %2 + fcpys%d3\\t%0, %1 + fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1 + fmsr%D3\\t%0, %2 + fmsr%d3\\t%0, %1 + fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1 + fmrs%D3\\t%0, %2 + fmrs%d3\\t%0, %1 + fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,4,4,8,4,4,8") + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] +) + +(define_insn "*thumb2_movsfcc_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r") + (if_then_else:SF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") + (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + it\\t%D3\;fcpys%D3\\t%0, %2 + it\\t%d3\;fcpys%d3\\t%0, %1 + ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1 + it\\t%D3\;fmsr%D3\\t%0, %2 + it\\t%d3\;fmsr%d3\\t%0, %1 + ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1 + it\\t%D3\;fmrs%D3\\t%0, %2 + it\\t%d3\;fmrs%d3\\t%0, %1 + ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "6,6,10,6,6,10,6,6,10") + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] +) + +(define_insn "*movdfcc_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r") + (if_then_else:DF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") + (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcpyd%D3\\t%P0, %P2 + fcpyd%d3\\t%P0, %P1 + fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1 + fmdrr%D3\\t%P0, %Q2, %R2 + fmdrr%d3\\t%P0, %Q1, %R1 + fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1 + fmrrd%D3\\t%Q0, %R0, %P2 + fmrrd%d3\\t%Q0, %R0, %P1 + fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,4,4,8,4,4,8") + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] +) + +(define_insn "*thumb2_movdfcc_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r") + (if_then_else:DF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") + (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + it\\t%D3\;fcpyd%D3\\t%P0, %P2 + it\\t%d3\;fcpyd%d3\\t%P0, %P1 + ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1 + it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2 + it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1 + ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1 + it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2 + it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1 + ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" + [(set_attr "conds" "use") + (set_attr "length" "6,6,10,6,6,10,6,6,10") + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] +) + + +;; Sign manipulation functions + +(define_insn "*abssf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (abs:SF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fabss%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "ffariths")] +) + +(define_insn "*absdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (abs:DF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fabsd%?\\t%P0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "ffarithd")] +) + +(define_insn "*negsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t,?r") + (neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fnegs%?\\t%0, %1 + eor%?\\t%0, %1, #-2147483648" + [(set_attr "predicable" "yes") + (set_attr "type" "ffariths")] +) + +(define_insn_and_split "*negdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,?r,?r") + (neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fnegd%?\\t%P0, %P1 + # + #" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed + && arm_general_register_operand (operands[0], DFmode)" + [(set (match_dup 0) (match_dup 1))] + " + if (REGNO (operands[0]) == REGNO (operands[1])) + { + operands[0] = gen_highpart (SImode, operands[0]); + operands[1] = gen_rtx_XOR (SImode, operands[0], GEN_INT (0x80000000)); + } + else + { + rtx in_hi, in_lo, out_hi, out_lo; + + in_hi = gen_rtx_XOR (SImode, gen_highpart (SImode, operands[1]), + GEN_INT (0x80000000)); + in_lo = gen_lowpart (SImode, operands[1]); + out_hi = gen_highpart (SImode, operands[0]); + out_lo = gen_lowpart (SImode, operands[0]); + + if (REGNO (in_lo) == REGNO (out_hi)) + { + emit_insn (gen_rtx_SET (SImode, out_lo, in_lo)); + operands[0] = out_hi; + operands[1] = in_hi; + } + else + { + emit_insn (gen_rtx_SET (SImode, out_hi, in_hi)); + operands[0] = out_lo; + operands[1] = in_lo; + } + } + " + [(set_attr "predicable" "yes") + (set_attr "length" "4,4,8") + (set_attr "type" "ffarithd")] +) + + +;; Arithmetic insns + +(define_insn "*addsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (plus:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fadds%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "fadds")] +) + +(define_insn "*adddf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (plus:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "faddd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "faddd")] +) + + +(define_insn "*subsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fsubs%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "fadds")] +) + +(define_insn "*subdf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsubd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "faddd")] +) + + +;; Division insns + +(define_insn "*divsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "+t") + (div:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fdivs%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "fdivs")] +) + +(define_insn "*divdf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "+w") + (div:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fdivd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "fdivd")] +) + + +;; Multiplication insns + +(define_insn "*mulsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "+t") + (mult:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmuls%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "fmuls")] +) + +(define_insn "*muldf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "+w") + (mult:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmuld%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "fmuld")] +) + + +(define_insn "*mulsf3negsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "+t") + (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t")) + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fnmuls%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "fmuls")] +) + +(define_insn "*muldf3negdf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "+w") + (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w")) + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmuld%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "type" "fmuld")] +) + + +;; Multiply-accumulate insns + +;; 0 = 1 * 2 + 0 +(define_insn "*mulsf3addsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +) + +(define_insn "*muldf3adddf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmacd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +) + +;; 0 = 1 * 2 - 0 +(define_insn "*mulsf3subsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmscs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +) + +(define_insn "*muldf3subdf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmscd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +) + +;; 0 = -(1 * 2) + 0 +(define_insn "*mulsf3negsfaddsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (match_operand:SF 1 "s_register_operand" "0") + (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fnmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +) + +(define_insn "*fmuldf3negdfadddf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (match_operand:DF 1 "s_register_operand" "0") + (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmacd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +) + + +;; 0 = -(1 * 2) - 0 +(define_insn "*mulsf3negsfsubsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (mult:SF + (neg:SF (match_operand:SF 2 "s_register_operand" "t")) + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fnmscs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacs")] +) + +(define_insn "*muldf3negdfsubdf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (mult:DF + (neg:DF (match_operand:DF 2 "s_register_operand" "w")) + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmscd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "type" "fmacd")] +) + + +;; Conversion routines + +(define_insn "*extendsfdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fcvtds%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "*truncdfsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fcvtsd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "vcvtb%?.f32.f16\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "vcvtb%?.f16.f32\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "*truncsisf2_vfp" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "ftosizs%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "*truncsidf2_vfp" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "ftosizd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + + +(define_insn "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "ftouizs%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "ftouizd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + + +(define_insn "*floatsisf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float:SF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fsitos%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "*floatsidf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float:DF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsitod%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + + +(define_insn "floatunssisf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fuitos%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "floatunssidf2" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fuitod%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + + +;; Sqrt insns. + +(define_insn "*sqrtsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fsqrts%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "fdivs")] +) + +(define_insn "*sqrtdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsqrtd%?\\t%P0, %P1" + [(set_attr "predicable" "yes") + (set_attr "type" "fdivd")] +) + + +;; Patterns to split/copy vfp condition flags. + +(define_insn "*movcc_vfp" + [(set (reg CC_REGNUM) + (reg VFPCC_REGNUM))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmstat%?" + [(set_attr "conds" "set") + (set_attr "type" "f_flag")] +) + +(define_insn_and_split "*cmpsf_split_vfp" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:SF 0 "s_register_operand" "t") + (match_operand:SF 1 "vfp_compare_operand" "tG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_dup 0) + (match_dup 1))) + (set (reg:CCFP CC_REGNUM) + (reg:CCFP VFPCC_REGNUM))] + "" +) + +(define_insn_and_split "*cmpsf_trap_split_vfp" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:SF 0 "s_register_operand" "t") + (match_operand:SF 1 "vfp_compare_operand" "tG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_dup 0) + (match_dup 1))) + (set (reg:CCFPE CC_REGNUM) + (reg:CCFPE VFPCC_REGNUM))] + "" +) + +(define_insn_and_split "*cmpdf_split_vfp" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "w") + (match_operand:DF 1 "vfp_compare_operand" "wG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_dup 0) + (match_dup 1))) + (set (reg:CCFP CC_REGNUM) + (reg:CCFP VFPCC_REGNUM))] + "" +) + +(define_insn_and_split "*cmpdf_trap_split_vfp" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w") + (match_operand:DF 1 "vfp_compare_operand" "wG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_dup 0) + (match_dup 1))) + (set (reg:CCFPE CC_REGNUM) + (reg:CCFPE VFPCC_REGNUM))] + "" +) + + +;; Comparison patterns + +(define_insn "*cmpsf_vfp" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_operand:SF 0 "s_register_operand" "t,t") + (match_operand:SF 1 "vfp_compare_operand" "t,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fcmps%?\\t%0, %1 + fcmpzs%?\\t%0" + [(set_attr "predicable" "yes") + (set_attr "type" "fcmps")] +) + +(define_insn "*cmpsf_trap_vfp" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_operand:SF 0 "s_register_operand" "t,t") + (match_operand:SF 1 "vfp_compare_operand" "t,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fcmpes%?\\t%0, %1 + fcmpezs%?\\t%0" + [(set_attr "predicable" "yes") + (set_attr "type" "fcmps")] +) + +(define_insn "*cmpdf_vfp" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "w,w") + (match_operand:DF 1 "vfp_compare_operand" "w,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcmpd%?\\t%P0, %P1 + fcmpzd%?\\t%P0" + [(set_attr "predicable" "yes") + (set_attr "type" "fcmpd")] +) + +(define_insn "*cmpdf_trap_vfp" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w,w") + (match_operand:DF 1 "vfp_compare_operand" "w,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcmped%?\\t%P0, %P1 + fcmpezd%?\\t%P0" + [(set_attr "predicable" "yes") + (set_attr "type" "fcmpd")] +) + + +;; Store multiple insn used in function prologue. + +(define_insn "*push_multi_vfp" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")] + UNSPEC_PUSH_MULT))])] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "* return vfp_output_fstmd (operands);" + [(set_attr "type" "f_stored")] +) + + +;; Unimplemented insns: +;; fldm* +;; fstm* +;; fmdhr et al (VFPv1) +;; Support for xD (single precision only) variants. +;; fmrrs, fmsrr diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md new file mode 100644 index 000000000..8f863fd70 --- /dev/null +++ b/gcc/config/arm/vfp11.md @@ -0,0 +1,92 @@ +;; ARM VFP11 pipeline description +;; Copyright (C) 2003, 2005, 2007, 2008 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "vfp11") + +;; There are 3 pipelines in the VFP11 unit. +;; +;; - A 8-stage FMAC pipeline (7 execute + writeback) with forward from +;; fourth stage for simple operations. +;; +;; - A 5-stage DS pipeline (4 execute + writeback) for divide/sqrt insns. +;; These insns also uses first execute stage of FMAC pipeline. +;; +;; - A 4-stage LS pipeline (execute + 2 memory + writeback) with forward from +;; second memory stage for loads. + +;; We do not model Write-After-Read hazards. +;; We do not do write scheduling with the arm core, so it is only necessary +;; to model the first stage of each pipeline +;; ??? Need to model LS pipeline properly for load/store multiple? +;; We do not model fmstat properly. This could be done by modeling pipelines +;; properly and defining an absence set between a dummy fmstat unit and all +;; other vfp units. + +(define_cpu_unit "fmac" "vfp11") + +(define_cpu_unit "ds" "vfp11") + +(define_cpu_unit "vfp_ls" "vfp11") + +(define_cpu_unit "fmstat" "vfp11") + +(exclusion_set "fmac,ds" "fmstat") + +(define_insn_reservation "vfp_ffarith" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) + "fmac") + +(define_insn_reservation "vfp_farith" 8 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs")) + "fmac") + +(define_insn_reservation "vfp_fmul" 9 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fmuld,fmacd")) + "fmac*2") + +(define_insn_reservation "vfp_fdivs" 19 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fdivs")) + "ds*15") + +(define_insn_reservation "vfp_fdivd" 33 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fdivd")) + "fmac+ds*29") + +;; Moves to/from arm regs also use the load/store pipeline. +(define_insn_reservation "vfp_fload" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "f_loads,f_loadd,r_2_f")) + "vfp_ls") + +(define_insn_reservation "vfp_fstore" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "f_stores,f_stored,f_2_r")) + "vfp_ls") + +(define_insn_reservation "vfp_to_cpsr" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "f_flag")) + "fmstat,vfp_ls*3") + diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h new file mode 100644 index 000000000..3ceaed903 --- /dev/null +++ b/gcc/config/arm/vxworks.h @@ -0,0 +1,113 @@ +/* Definitions of target machine for GCC, + for ARM with targetting the VXWorks run time environment. + Copyright (C) 1999, 2000, 2003, 2004, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + + Contributed by: Mike Stump + Brought up to date by CodeSourcery, LLC. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + if (TARGET_BIG_END) \ + builtin_define ("ARMEB"); \ + else \ + builtin_define ("ARMEL"); \ + \ + if (arm_arch_xscale) \ + builtin_define ("CPU=XSCALE"); \ + else if (arm_arch5) \ + builtin_define ("CPU=ARMARCH5"); \ + else if (arm_arch4) \ + { \ + if (thumb_code) \ + builtin_define ("CPU=ARMARCH4_T"); \ + else \ + builtin_define ("CPU=ARMARCH4"); \ + } \ + VXWORKS_OS_CPP_BUILTINS (); \ + } while (0) + +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS + +/* Subsume the arm/elf.h definition, and add RTP hooks. */ +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__ELF__" VXWORKS_ADDITIONAL_CPP_SPEC + +#undef CC1_SPEC +#define CC1_SPEC \ +"%{tstrongarm:-mlittle-endian -mcpu=strongarm ; \ + t4: -mlittle-endian -march=armv4 ; \ + t4be: -mbig-endian -march=armv4 ; \ + t4t: -mthumb -mthumb-interwork -mlittle-endian -march=armv4t ; \ + t4tbe: -mthumb -mthumb-interwork -mbig-endian -march=armv4t ; \ + t5: -mlittle-endian -march=armv5 ; \ + t5be: -mbig-endian -march=armv5 ; \ + t5t: -mthumb -mthumb-interwork -mlittle-endian -march=armv5 ; \ + t5tbe: -mthumb -mthumb-interwork -mbig-endian -march=armv5 ; \ + txscale: -mlittle-endian -mcpu=xscale ; \ + txscalebe: -mbig-endian -mcpu=xscale ; \ + : -march=armv4}" + +/* Pass -EB for big-endian targets. */ +#define VXWORKS_ENDIAN_SPEC \ + "%{mbig-endian|t4be|t4tbe|t5be|t5tbe|txscalebe:-EB}" + +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC VXWORKS_ENDIAN_SPEC + +#undef LINK_SPEC +#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_ENDIAN_SPEC + +#undef LIB_SPEC +#define LIB_SPEC VXWORKS_LIB_SPEC + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC + +#undef TARGET_VERSION +#define TARGET_VERSION fputs (" (ARM/VxWorks)", stderr); + +/* There is no default multilib. */ +#undef MULTILIB_DEFAULTS + +#define FPUTYPE_DEFAULT "vfp" + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER + +/* We want to be compatible with a version of "2.96" at one point in + the past before this macro was changed. */ +#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8 + +/* The kernel loader does not allow relocations to overflow, so we + cannot allow arbitrary relocation addends in kernel modules or RTP + executables. Also, the dynamic loader uses the resolved relocation + value to distinguish references to the text and data segments, so we + cannot allow arbitrary offsets for shared libraries either. */ +#undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P +#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1 + +#undef TARGET_DEFAULT_WORD_RELOCATIONS +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc/config/arm/vxworks.opt b/gcc/config/arm/vxworks.opt new file mode 100644 index 000000000..bc8478391 --- /dev/null +++ b/gcc/config/arm/vxworks.opt @@ -0,0 +1,60 @@ +; ARM VxWorks options. + +; Copyright (C) 2011 +; Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +t4 +Driver + +t4be +Driver + +t4t +Driver + +t4tbe +Driver + +t5 +Driver + +t5be +Driver + +t5t +Driver + +t5tbe +Driver + +tstrongarm +Driver + +txscale +Driver + +txscalebe +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc/config/arm/wince-pe.h b/gcc/config/arm/wince-pe.h new file mode 100644 index 000000000..ffaa0c6c3 --- /dev/null +++ b/gcc/config/arm/wince-pe.h @@ -0,0 +1,26 @@ +/* Definitions of target machine for GNU compiler, for ARM with WINCE-PE obj format. + Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc. + Contributed by Nick Clifton + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_NOP_FUN_DLLIMPORT) + +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork" } diff --git a/gcc/config/avr/avr-c.c b/gcc/config/avr/avr-c.c new file mode 100644 index 000000000..05e8e8b30 --- /dev/null +++ b/gcc/config/avr/avr-c.c @@ -0,0 +1,85 @@ +/* Copyright (C) 2009, 2010 + Free Software Foundation, Inc. + Contributed by Anatoly Sokolov (aesok@post.ru) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "cpplib.h" +#include "tree.h" +#include "c-family/c-common.h" + +/* Not included in avr.c since this requires C front end. */ + +/* Worker function for TARGET_CPU_CPP_BUILTINS. */ + +void +avr_cpu_cpp_builtins (struct cpp_reader *pfile) +{ + builtin_define_std ("AVR"); + + if (avr_current_arch->macro) + cpp_define (pfile, avr_current_arch->macro); + if (avr_extra_arch_macro) + cpp_define (pfile, avr_extra_arch_macro); + if (avr_current_arch->have_elpm) + cpp_define (pfile, "__AVR_HAVE_RAMPZ__"); + if (avr_current_arch->have_elpm) + cpp_define (pfile, "__AVR_HAVE_ELPM__"); + if (avr_current_arch->have_elpmx) + cpp_define (pfile, "__AVR_HAVE_ELPMX__"); + if (avr_current_arch->have_movw_lpmx) + { + cpp_define (pfile, "__AVR_HAVE_MOVW__"); + cpp_define (pfile, "__AVR_HAVE_LPMX__"); + } + if (avr_current_arch->asm_only) + cpp_define (pfile, "__AVR_ASM_ONLY__"); + if (avr_current_arch->have_mul) + { + cpp_define (pfile, "__AVR_ENHANCED__"); + cpp_define (pfile, "__AVR_HAVE_MUL__"); + } + if (avr_current_arch->have_jmp_call) + { + cpp_define (pfile, "__AVR_MEGA__"); + cpp_define (pfile, "__AVR_HAVE_JMP_CALL__"); + } + if (avr_current_arch->have_eijmp_eicall) + { + cpp_define (pfile, "__AVR_HAVE_EIJMP_EICALL__"); + cpp_define (pfile, "__AVR_3_BYTE_PC__"); + } + else + { + cpp_define (pfile, "__AVR_2_BYTE_PC__"); + } + + if (avr_current_device->short_sp) + cpp_define (pfile, "__AVR_HAVE_8BIT_SP__"); + else + cpp_define (pfile, "__AVR_HAVE_16BIT_SP__"); + + if (TARGET_NO_INTERRUPTS) + cpp_define (pfile, "__NO_INTERRUPTS__"); +} + diff --git a/gcc/config/avr/avr-devices.c b/gcc/config/avr/avr-devices.c new file mode 100755 index 000000000..91ca95e0f --- /dev/null +++ b/gcc/config/avr/avr-devices.c @@ -0,0 +1,229 @@ +/* Copyright (C) 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Anatoly Sokolov (aesok@post.ru) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* List of all known AVR MCU architectures. */ + +const struct base_arch_s avr_arch_types[] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, NULL, "avr2" }, /* unknown device specified */ + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=1", "avr1" }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=2", "avr2" }, + { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=25", "avr25" }, + { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=3", "avr3" }, + { 0, 0, 1, 0, 1, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=31", "avr31" }, + { 0, 0, 1, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=35", "avr35" }, + { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=4", "avr4" }, + { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=5", "avr5" }, + { 0, 1, 1, 1, 1, 1, 0, 0, 0, 0x0060, "__AVR_ARCH__=51", "avr51" }, + { 0, 1, 1, 1, 1, 1, 1, 0, 0, 0x0060, "__AVR_ARCH__=6", "avr6" } +}; + +/* List of all known AVR MCU types - if updated, it has to be kept + in sync in several places (FIXME: is there a better way?): + - here; + - t-avr (MULTILIB_MATCHES); + - gas/config/tc-avr.c; + - avr-libc. */ + +const struct mcu_type_s avr_mcu_types[] = { + /* Classic, <= 8K. */ + { "avr2", ARCH_AVR2, NULL, 0, 0x0060, "s8515" }, + { "at90s2313", ARCH_AVR2, "__AVR_AT90S2313__", 1, 0x0060, "s2313" }, + { "at90s2323", ARCH_AVR2, "__AVR_AT90S2323__", 1, 0x0060, "s2323" }, + { "at90s2333", ARCH_AVR2, "__AVR_AT90S2333__", 1, 0x0060, "s2333" }, + { "at90s2343", ARCH_AVR2, "__AVR_AT90S2343__", 1, 0x0060, "s2343" }, + { "attiny22", ARCH_AVR2, "__AVR_ATtiny22__", 1, 0x0060, "tn22" }, + { "attiny26", ARCH_AVR2, "__AVR_ATtiny26__", 1, 0x0060, "tn26" }, + { "at90s4414", ARCH_AVR2, "__AVR_AT90S4414__", 0, 0x0060, "s4414" }, + { "at90s4433", ARCH_AVR2, "__AVR_AT90S4433__", 1, 0x0060, "s4433" }, + { "at90s4434", ARCH_AVR2, "__AVR_AT90S4434__", 0, 0x0060, "s4434" }, + { "at90s8515", ARCH_AVR2, "__AVR_AT90S8515__", 0, 0x0060, "s8515" }, + { "at90c8534", ARCH_AVR2, "__AVR_AT90C8534__", 0, 0x0060, "c8534" }, + { "at90s8535", ARCH_AVR2, "__AVR_AT90S8535__", 0, 0x0060, "s8535" }, + /* Classic + MOVW, <= 8K. */ + { "avr25", ARCH_AVR25, NULL, 0, 0x0060, "tn85" }, + { "ata6289", ARCH_AVR25, "__AVR_ATA6289__", 0, 0x0100, "a6289" }, + { "attiny13", ARCH_AVR25, "__AVR_ATtiny13__", 1, 0x0060, "tn13" }, + { "attiny13a", ARCH_AVR25, "__AVR_ATtiny13A__", 1, 0x0060, "tn13a" }, + { "attiny2313", ARCH_AVR25, "__AVR_ATtiny2313__", 1, 0x0060, "tn2313" }, + { "attiny2313a", ARCH_AVR25, "__AVR_ATtiny2313A__", 1, 0x0060, "tn2313a" }, + { "attiny24", ARCH_AVR25, "__AVR_ATtiny24__", 1, 0x0060, "tn24" }, + { "attiny24a", ARCH_AVR25, "__AVR_ATtiny24A__", 1, 0x0060, "tn24a" }, + { "attiny4313", ARCH_AVR25, "__AVR_ATtiny4313__", 0, 0x0060, "tn4313" }, + { "attiny44", ARCH_AVR25, "__AVR_ATtiny44__", 0, 0x0060, "tn44" }, + { "attiny44a", ARCH_AVR25, "__AVR_ATtiny44A__", 0, 0x0060, "tn44a" }, + { "attiny84", ARCH_AVR25, "__AVR_ATtiny84__", 0, 0x0060, "tn84" }, + { "attiny84a", ARCH_AVR25, "__AVR_ATtiny84A__", 0, 0x0060, "tn84" }, + { "attiny25", ARCH_AVR25, "__AVR_ATtiny25__", 1, 0x0060, "tn25" }, + { "attiny45", ARCH_AVR25, "__AVR_ATtiny45__", 0, 0x0060, "tn45" }, + { "attiny85", ARCH_AVR25, "__AVR_ATtiny85__", 0, 0x0060, "tn85" }, + { "attiny261", ARCH_AVR25, "__AVR_ATtiny261__", 1, 0x0060, "tn261" }, + { "attiny261a", ARCH_AVR25, "__AVR_ATtiny261A__", 1, 0x0060, "tn261a" }, + { "attiny461", ARCH_AVR25, "__AVR_ATtiny461__", 0, 0x0060, "tn461" }, + { "attiny461a", ARCH_AVR25, "__AVR_ATtiny461A__", 0, 0x0060, "tn461a" }, + { "attiny861", ARCH_AVR25, "__AVR_ATtiny861__", 0, 0x0060, "tn861" }, + { "attiny861a", ARCH_AVR25, "__AVR_ATtiny861A__", 0, 0x0060, "tn861a" }, + { "attiny43u", ARCH_AVR25, "__AVR_ATtiny43U__", 0, 0x0060, "tn43u" }, + { "attiny87", ARCH_AVR25, "__AVR_ATtiny87__", 0, 0x0100, "tn87" }, + { "attiny48", ARCH_AVR25, "__AVR_ATtiny48__", 0, 0x0100, "tn48" }, + { "attiny88", ARCH_AVR25, "__AVR_ATtiny88__", 0, 0x0100, "tn88" }, + { "at86rf401", ARCH_AVR25, "__AVR_AT86RF401__", 0, 0x0060, "86401" }, + /* Classic, > 8K, <= 64K. */ + { "avr3", ARCH_AVR3, NULL, 0, 0x0060, "43355" }, + { "at43usb355", ARCH_AVR3, "__AVR_AT43USB355__", 0, 0x0060, "43355" }, + { "at76c711", ARCH_AVR3, "__AVR_AT76C711__", 0, 0x0060, "76711" }, + /* Classic, == 128K. */ + { "avr31", ARCH_AVR31, NULL, 0, 0x0060, "m103" }, + { "atmega103", ARCH_AVR31, "__AVR_ATmega103__", 0, 0x0060, "m103" }, + { "at43usb320", ARCH_AVR31, "__AVR_AT43USB320__", 0, 0x0060, "43320" }, + /* Classic + MOVW + JMP/CALL. */ + { "avr35", ARCH_AVR35, NULL, 0, 0x0100, "usb162" }, + { "at90usb82", ARCH_AVR35, "__AVR_AT90USB82__", 0, 0x0100, "usb82" }, + { "at90usb162", ARCH_AVR35, "__AVR_AT90USB162__", 0, 0x0100, "usb162" }, + { "atmega8u2", ARCH_AVR35, "__AVR_ATmega8U2__", 0, 0x0100, "m8u2" }, + { "atmega16u2", ARCH_AVR35, "__AVR_ATmega16U2__", 0, 0x0100, "m16u2" }, + { "atmega32u2", ARCH_AVR35, "__AVR_ATmega32U2__", 0, 0x0100, "m32u2" }, + { "attiny167", ARCH_AVR35, "__AVR_ATtiny167__", 0, 0x0100, "tn167" }, + /* Enhanced, <= 8K. */ + { "avr4", ARCH_AVR4, NULL, 0, 0x0060, "m8" }, + { "atmega8", ARCH_AVR4, "__AVR_ATmega8__", 0, 0x0060, "m8" }, + { "atmega48", ARCH_AVR4, "__AVR_ATmega48__", 0, 0x0100, "m48" }, + { "atmega48a", ARCH_AVR4, "__AVR_ATmega48A__", 0, 0x0100, "m48a" }, + { "atmega48p", ARCH_AVR4, "__AVR_ATmega48P__", 0, 0x0100, "m48p" }, + { "atmega88", ARCH_AVR4, "__AVR_ATmega88__", 0, 0x0100, "m88" }, + { "atmega88a", ARCH_AVR4, "__AVR_ATmega88A__", 0, 0x0100, "m88a" }, + { "atmega88p", ARCH_AVR4, "__AVR_ATmega88P__", 0, 0x0100, "m88p" }, + { "atmega88pa", ARCH_AVR4, "__AVR_ATmega88PA__", 0, 0x0100, "m88pa" }, + { "atmega8515", ARCH_AVR4, "__AVR_ATmega8515__", 0, 0x0060, "m8515" }, + { "atmega8535", ARCH_AVR4, "__AVR_ATmega8535__", 0, 0x0060, "m8535" }, + { "atmega8hva", ARCH_AVR4, "__AVR_ATmega8HVA__", 0, 0x0100, "m8hva" }, + { "at90pwm1", ARCH_AVR4, "__AVR_AT90PWM1__", 0, 0x0100, "90pwm1" }, + { "at90pwm2", ARCH_AVR4, "__AVR_AT90PWM2__", 0, 0x0100, "90pwm2" }, + { "at90pwm2b", ARCH_AVR4, "__AVR_AT90PWM2B__", 0, 0x0100, "90pwm2b" }, + { "at90pwm3", ARCH_AVR4, "__AVR_AT90PWM3__", 0, 0x0100, "90pwm3" }, + { "at90pwm3b", ARCH_AVR4, "__AVR_AT90PWM3B__", 0, 0x0100, "90pwm3b" }, + { "at90pwm81", ARCH_AVR4, "__AVR_AT90PWM81__", 0, 0x0100, "90pwm81" }, + /* Enhanced, > 8K, <= 64K. */ + { "avr5", ARCH_AVR5, NULL, 0, 0x0060, "m16" }, + { "atmega16", ARCH_AVR5, "__AVR_ATmega16__", 0, 0x0060, "m16" }, + { "atmega16a", ARCH_AVR5, "__AVR_ATmega16A__", 0, 0x0060, "m16a" }, + { "atmega161", ARCH_AVR5, "__AVR_ATmega161__", 0, 0x0060, "m161" }, + { "atmega162", ARCH_AVR5, "__AVR_ATmega162__", 0, 0x0100, "m162" }, + { "atmega163", ARCH_AVR5, "__AVR_ATmega163__", 0, 0x0060, "m163" }, + { "atmega164a", ARCH_AVR5, "__AVR_ATmega164A__", 0, 0x0100, "m164a" }, + { "atmega164p", ARCH_AVR5, "__AVR_ATmega164P__", 0, 0x0100, "m164p" }, + { "atmega165", ARCH_AVR5, "__AVR_ATmega165__", 0, 0x0100, "m165" }, + { "atmega165a", ARCH_AVR5, "__AVR_ATmega165A__", 0, 0x0100, "m165a" }, + { "atmega165p", ARCH_AVR5, "__AVR_ATmega165P__", 0, 0x0100, "m165p" }, + { "atmega168", ARCH_AVR5, "__AVR_ATmega168__", 0, 0x0100, "m168" }, + { "atmega168a", ARCH_AVR5, "__AVR_ATmega168A__", 0, 0x0100, "m168a" }, + { "atmega168p", ARCH_AVR5, "__AVR_ATmega168P__", 0, 0x0100, "m168p" }, + { "atmega169", ARCH_AVR5, "__AVR_ATmega169__", 0, 0x0100, "m169" }, + { "atmega169a", ARCH_AVR5, "__AVR_ATmega169A__", 0, 0x0100, "m169a" }, + { "atmega169p", ARCH_AVR5, "__AVR_ATmega169P__", 0, 0x0100, "m169p" }, + { "atmega169pa", ARCH_AVR5, "__AVR_ATmega169PA__", 0, 0x0100, "m169pa" }, + { "atmega32", ARCH_AVR5, "__AVR_ATmega32__", 0, 0x0060, "m32" }, + { "atmega323", ARCH_AVR5, "__AVR_ATmega323__", 0, 0x0060, "m323" }, + { "atmega324a", ARCH_AVR5, "__AVR_ATmega324A__", 0, 0x0100, "m324a" }, + { "atmega324p", ARCH_AVR5, "__AVR_ATmega324P__", 0, 0x0100, "m324p" }, + { "atmega324pa", ARCH_AVR5, "__AVR_ATmega324PA__", 0, 0x0100, "m324pa" }, + { "atmega325", ARCH_AVR5, "__AVR_ATmega325__", 0, 0x0100, "m325" }, + { "atmega325a", ARCH_AVR5, "__AVR_ATmega325A__", 0, 0x0100, "m325a" }, + { "atmega325p", ARCH_AVR5, "__AVR_ATmega325P__", 0, 0x0100, "m325p" }, + { "atmega3250", ARCH_AVR5, "__AVR_ATmega3250__", 0, 0x0100, "m3250" }, + { "atmega3250a", ARCH_AVR5, "__AVR_ATmega3250A__", 0, 0x0100, "m3250a" }, + { "atmega3250p", ARCH_AVR5, "__AVR_ATmega3250P__", 0, 0x0100, "m3250p" }, + { "atmega328", ARCH_AVR5, "__AVR_ATmega328__", 0, 0x0100, "m328" }, + { "atmega328p", ARCH_AVR5, "__AVR_ATmega328P__", 0, 0x0100, "m328p" }, + { "atmega329", ARCH_AVR5, "__AVR_ATmega329__", 0, 0x0100, "m329" }, + { "atmega329a", ARCH_AVR5, "__AVR_ATmega329A__", 0, 0x0100, "m329a" }, + { "atmega329p", ARCH_AVR5, "__AVR_ATmega329P__", 0, 0x0100, "m329p" }, + { "atmega329pa", ARCH_AVR5, "__AVR_ATmega329PA__", 0, 0x0100, "m329pa" }, + { "atmega3290", ARCH_AVR5, "__AVR_ATmega3290__", 0, 0x0100, "m3290" }, + { "atmega3290a", ARCH_AVR5, "__AVR_ATmega3290A__", 0, 0x0100, "m3290a" }, + { "atmega3290p", ARCH_AVR5, "__AVR_ATmega3290P__", 0, 0x0100, "m3290p" }, + { "atmega406", ARCH_AVR5, "__AVR_ATmega406__", 0, 0x0100, "m406" }, + { "atmega64", ARCH_AVR5, "__AVR_ATmega64__", 0, 0x0100, "m64" }, + { "atmega640", ARCH_AVR5, "__AVR_ATmega640__", 0, 0x0200, "m640" }, + { "atmega644", ARCH_AVR5, "__AVR_ATmega644__", 0, 0x0100, "m644" }, + { "atmega644a", ARCH_AVR5, "__AVR_ATmega644A__", 0, 0x0100, "m644a" }, + { "atmega644p", ARCH_AVR5, "__AVR_ATmega644P__", 0, 0x0100, "m644p" }, + { "atmega644pa", ARCH_AVR5, "__AVR_ATmega644PA__", 0, 0x0100, "m644pa" }, + { "atmega645", ARCH_AVR5, "__AVR_ATmega645__", 0, 0x0100, "m645" }, + { "atmega645a", ARCH_AVR5, "__AVR_ATmega645A__", 0, 0x0100, "m645a" }, + { "atmega645p", ARCH_AVR5, "__AVR_ATmega645P__", 0, 0x0100, "m645p" }, + { "atmega6450", ARCH_AVR5, "__AVR_ATmega6450__", 0, 0x0100, "m6450" }, + { "atmega6450a", ARCH_AVR5, "__AVR_ATmega6450A__", 0, 0x0100, "m6450a" }, + { "atmega6450p", ARCH_AVR5, "__AVR_ATmega6450P__", 0, 0x0100, "m6450p" }, + { "atmega649", ARCH_AVR5, "__AVR_ATmega649__", 0, 0x0100, "m649" }, + { "atmega649a", ARCH_AVR5, "__AVR_ATmega649A__", 0, 0x0100, "m649a" }, + { "atmega649p", ARCH_AVR5, "__AVR_ATmega649P__", 0, 0x0100, "m649p" }, + { "atmega6490", ARCH_AVR5, "__AVR_ATmega6490__", 0, 0x0100, "m6490" }, + { "atmega16hva", ARCH_AVR5, "__AVR_ATmega16HVA__", 0, 0x0100, "m16hva" }, + { "atmega16hva2", ARCH_AVR5, "__AVR_ATmega16HVA2__", 0, 0x0100, "m16hva2" }, + { "atmega16hvb", ARCH_AVR5, "__AVR_ATmega16HVB__", 0, 0x0100, "m16hvb" }, + { "atmega32hvb", ARCH_AVR5, "__AVR_ATmega32HVB__", 0, 0x0100, "m32hvb" }, + { "atmega64hve", ARCH_AVR5, "__AVR_ATmega64HVE__", 0, 0x0100, "m64hve" }, + { "at90can32", ARCH_AVR5, "__AVR_AT90CAN32__", 0, 0x0100, "can32" }, + { "at90can64", ARCH_AVR5, "__AVR_AT90CAN64__", 0, 0x0100, "can64" }, + { "at90pwm216", ARCH_AVR5, "__AVR_AT90PWM216__", 0, 0x0100, "90pwm216" }, + { "at90pwm316", ARCH_AVR5, "__AVR_AT90PWM316__", 0, 0x0100, "90pwm316" }, + { "atmega32c1", ARCH_AVR5, "__AVR_ATmega32C1__", 0, 0x0100, "m32c1" }, + { "atmega64c1", ARCH_AVR5, "__AVR_ATmega64C1__", 0, 0x0100, "m64c1" }, + { "atmega16m1", ARCH_AVR5, "__AVR_ATmega16M1__", 0, 0x0100, "m16m1" }, + { "atmega32m1", ARCH_AVR5, "__AVR_ATmega32M1__", 0, 0x0100, "m32m1" }, + { "atmega64m1", ARCH_AVR5, "__AVR_ATmega64M1__", 0, 0x0100, "m64m1" }, + { "atmega16u4", ARCH_AVR5, "__AVR_ATmega16U4__", 0, 0x0100, "m16u4" }, + { "atmega32u4", ARCH_AVR5, "__AVR_ATmega32U4__", 0, 0x0100, "m32u4" }, + { "atmega32u6", ARCH_AVR5, "__AVR_ATmega32U6__", 0, 0x0100, "m32u6" }, + { "at90scr100", ARCH_AVR5, "__AVR_AT90SCR100__", 0, 0x0100, "90scr100" }, + { "at90usb646", ARCH_AVR5, "__AVR_AT90USB646__", 0, 0x0100, "usb646" }, + { "at90usb647", ARCH_AVR5, "__AVR_AT90USB647__", 0, 0x0100, "usb647" }, + { "at94k", ARCH_AVR5, "__AVR_AT94K__", 0, 0x0060, "at94k" }, + { "m3000", ARCH_AVR5, "__AVR_M3000__", 0, 0x1000, "m3000" }, + /* Enhanced, == 128K. */ + { "avr51", ARCH_AVR51, NULL, 0, 0x0100, "m128" }, + { "atmega128", ARCH_AVR51, "__AVR_ATmega128__", 0, 0x0100, "m128" }, + { "atmega1280", ARCH_AVR51, "__AVR_ATmega1280__", 0, 0x0200, "m1280" }, + { "atmega1281", ARCH_AVR51, "__AVR_ATmega1281__", 0, 0x0200, "m1281" }, + { "atmega1284p", ARCH_AVR51, "__AVR_ATmega1284P__", 0, 0x0100, "m1284p" }, + { "atmega128rfa1", ARCH_AVR51, "__AVR_ATmega128RFA1__", 0, 0x0200, "m128rfa1" }, + { "at90can128", ARCH_AVR51, "__AVR_AT90CAN128__", 0, 0x0100, "can128" }, + { "at90usb1286", ARCH_AVR51, "__AVR_AT90USB1286__", 0, 0x0100, "usb1286" }, + { "at90usb1287", ARCH_AVR51, "__AVR_AT90USB1287__", 0, 0x0100, "usb1287" }, + /* 3-Byte PC. */ + { "avr6", ARCH_AVR6, NULL, 0, 0x0200, "m2561" }, + { "atmega2560", ARCH_AVR6, "__AVR_ATmega2560__", 0, 0x0200, "m2560" }, + { "atmega2561", ARCH_AVR6, "__AVR_ATmega2561__", 0, 0x0200, "m2561" }, + /* Assembler only. */ + { "avr1", ARCH_AVR1, NULL, 0, 0x0060, "s1200" }, + { "at90s1200", ARCH_AVR1, "__AVR_AT90S1200__", 0, 0x0060, "s1200" }, + { "attiny11", ARCH_AVR1, "__AVR_ATtiny11__", 0, 0x0060, "tn11" }, + { "attiny12", ARCH_AVR1, "__AVR_ATtiny12__", 0, 0x0060, "tn12" }, + { "attiny15", ARCH_AVR1, "__AVR_ATtiny15__", 0, 0x0060, "tn15" }, + { "attiny28", ARCH_AVR1, "__AVR_ATtiny28__", 0, 0x0060, "tn28" }, + /* End of list. */ + { NULL, ARCH_UNKNOWN, NULL, 0, 0, NULL } +}; + diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h new file mode 100644 index 000000000..06c9254fd --- /dev/null +++ b/gcc/config/avr/avr-protos.h @@ -0,0 +1,121 @@ +/* Prototypes for exported functions defined in avr.c + + Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Denis Chertykov (chertykov@gmail.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +extern int function_arg_regno_p (int r); +extern void avr_cpu_cpp_builtins (struct cpp_reader * pfile); +extern int avr_ret_register (void); +extern enum reg_class avr_regno_reg_class (int r); +extern void asm_globalize_label (FILE *file, const char *name); +extern void avr_asm_declare_function_name (FILE *, const char *, tree); +extern void order_regs_for_local_alloc (void); +extern int avr_initial_elimination_offset (int from, int to); +extern int avr_simple_epilogue (void); +extern void gas_output_limited_string (FILE *file, const char *str); +extern void gas_output_ascii (FILE *file, const char *str, size_t length); +extern int avr_hard_regno_rename_ok (unsigned int, unsigned int); +extern rtx avr_return_addr_rtx (int count, rtx tem); + +#ifdef TREE_CODE +extern void asm_output_external (FILE *file, tree decl, char *name); +extern int avr_progmem_p (tree decl, tree attributes); + +#ifdef RTX_CODE /* inside TREE_CODE */ +extern void init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, + rtx libname, tree fndecl); +#endif /* RTX_CODE inside TREE_CODE */ + +#endif /* TREE_CODE */ + +#ifdef RTX_CODE +extern void asm_output_external_libcall (FILE *file, rtx symref); +extern int compare_diff_p (rtx insn); +extern const char *output_movqi (rtx insn, rtx operands[], int *l); +extern const char *output_movhi (rtx insn, rtx operands[], int *l); +extern const char *out_movqi_r_mr (rtx insn, rtx op[], int *l); +extern const char *out_movqi_mr_r (rtx insn, rtx op[], int *l); +extern const char *out_movhi_r_mr (rtx insn, rtx op[], int *l); +extern const char *out_movhi_mr_r (rtx insn, rtx op[], int *l); +extern const char *out_movsi_r_mr (rtx insn, rtx op[], int *l); +extern const char *out_movsi_mr_r (rtx insn, rtx op[], int *l); +extern const char *output_movsisf (rtx insn, rtx operands[], int *l); +extern const char *out_tstsi (rtx insn, rtx src, int *l); +extern const char *out_tsthi (rtx insn, rtx src, int *l); +extern const char *ret_cond_branch (rtx x, int len, int reverse); + +extern const char *ashlqi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashlhi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashlsi3_out (rtx insn, rtx operands[], int *len); + +extern const char *ashrqi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashrhi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashrsi3_out (rtx insn, rtx operands[], int *len); + +extern const char *lshrqi3_out (rtx insn, rtx operands[], int *len); +extern const char *lshrhi3_out (rtx insn, rtx operands[], int *len); +extern const char *lshrsi3_out (rtx insn, rtx operands[], int *len); +extern bool avr_rotate_bytes (rtx operands[]); + +extern void expand_prologue (void); +extern void expand_epilogue (void); +extern int avr_epilogue_uses (int regno); + +extern void avr_output_bld (rtx operands[], int bit_nr); +extern void avr_output_addr_vec_elt (FILE *stream, int value); +extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]); + +extern int extra_constraint_Q (rtx x); +extern int adjust_insn_length (rtx insn, int len); +extern rtx avr_libcall_value (enum machine_mode mode); +extern const char *output_reload_inhi (rtx insn, rtx *operands, int *len); +extern const char *output_reload_insisf (rtx insn, rtx *operands, int *len); +extern enum reg_class secondary_input_reload_class (enum reg_class, + enum machine_mode, + rtx); +extern void notice_update_cc (rtx body, rtx insn); +extern void print_operand (FILE *file, rtx x, int code); +extern void print_operand_address (FILE *file, rtx addr); +extern int reg_unused_after (rtx insn, rtx reg); +extern int _reg_unused_after (rtx insn, rtx reg); +extern int avr_jump_mode (rtx x, rtx insn); +extern int byte_immediate_operand (rtx op, enum machine_mode mode); +extern int test_hard_reg_class (enum reg_class rclass, rtx x); +extern int jump_over_one_insn_p (rtx insn, rtx dest); + +extern int avr_hard_regno_mode_ok (int regno, enum machine_mode mode); +extern void final_prescan_insn (rtx insn, rtx *operand, int num_operands); +extern int avr_simplify_comparison_p (enum machine_mode mode, + RTX_CODE op, rtx x); +extern RTX_CODE avr_normalize_condition (RTX_CODE condition); +extern int compare_eq_p (rtx insn); +extern void out_shift_with_cnt (const char *templ, rtx insn, + rtx operands[], int *len, int t_len); +extern rtx avr_incoming_return_addr_rtx (void); +#endif /* RTX_CODE */ + +#ifdef HAVE_MACHINE_MODES +extern int class_max_nregs (enum reg_class rclass, enum machine_mode mode); +#endif /* HAVE_MACHINE_MODES */ + +#ifdef REAL_VALUE_TYPE +extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n); +#endif diff --git a/gcc/config/avr/avr-stdint.h b/gcc/config/avr/avr-stdint.h new file mode 100644 index 000000000..c3ec3ce9f --- /dev/null +++ b/gcc/config/avr/avr-stdint.h @@ -0,0 +1,66 @@ +/* Definitions for types on systems using newlib. + Copyright (C) 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* + The intention of this file is to supply definitions that work with + avr-gcc's -mint8 that sets int to an 8-bit type. + + This file is intended to yield the same results as newlib-stdint.h, + but there are some differences to newlib-stdint.h: + + - AVR is an 8-bit architecture that cannot access 16-bit values + atomically, this SIG_ATOMIC_TYPE is "char". + + - For the same reason, [u]int_fast8_t is defined as 8-bit type. + +*/ + +#define SIG_ATOMIC_TYPE "char" + +#define INT8_TYPE "signed char" +#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "short int" : "long int") +#define INT32_TYPE (INT_TYPE_SIZE == 16 ? "long int" : "long long int") +#define INT64_TYPE (INT_TYPE_SIZE == 16 ? "long long int" : 0) +#define UINT8_TYPE "unsigned char" +#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "short unsigned int" : "long unsigned int") +#define UINT32_TYPE (INT_TYPE_SIZE == 16 ? "long unsigned int" : "long long unsigned int") +#define UINT64_TYPE (INT_TYPE_SIZE == 16 ? "long long unsigned int" : 0) + +#define INT_LEAST8_TYPE INT8_TYPE +#define INT_LEAST16_TYPE INT16_TYPE +#define INT_LEAST32_TYPE INT32_TYPE +#define INT_LEAST64_TYPE INT64_TYPE +#define UINT_LEAST8_TYPE UINT8_TYPE +#define UINT_LEAST16_TYPE UINT16_TYPE +#define UINT_LEAST32_TYPE UINT32_TYPE +#define UINT_LEAST64_TYPE UINT64_TYPE + +#define INT_FAST8_TYPE INT8_TYPE +#define INT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "int" : INT16_TYPE) +#define INT_FAST32_TYPE INT32_TYPE +#define INT_FAST64_TYPE INT64_TYPE +#define UINT_FAST8_TYPE UINT8_TYPE +#define UINT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : UINT16_TYPE) +#define UINT_FAST32_TYPE UINT32_TYPE +#define UINT_FAST64_TYPE UINT64_TYPE + +#define INTPTR_TYPE PTRDIFF_TYPE +#ifndef UINTPTR_TYPE +#define UINTPTR_TYPE SIZE_TYPE +#endif diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c new file mode 100644 index 000000000..e60857980 --- /dev/null +++ b/gcc/config/avr/avr.c @@ -0,0 +1,6416 @@ +/* Subroutines for insn-output.c for ATMEL AVR micro controllers + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, + 2009, 2010 Free Software Foundation, Inc. + Contributed by Denis Chertykov (chertykov@gmail.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "reload.h" +#include "tree.h" +#include "output.h" +#include "expr.h" +#include "diagnostic-core.h" +#include "obstack.h" +#include "function.h" +#include "recog.h" +#include "ggc.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "params.h" +#include "df.h" + +/* Maximal allowed offset for an address in the LD command */ +#define MAX_LD_OFFSET(MODE) (64 - (signed)GET_MODE_SIZE (MODE)) + +static void avr_option_override (void); +static int avr_naked_function_p (tree); +static int interrupt_function_p (tree); +static int signal_function_p (tree); +static int avr_OS_task_function_p (tree); +static int avr_OS_main_function_p (tree); +static int avr_regs_to_save (HARD_REG_SET *); +static int get_sequence_length (rtx insns); +static int sequent_regs_live (void); +static const char *ptrreg_to_str (int); +static const char *cond_string (enum rtx_code); +static int avr_num_arg_regs (enum machine_mode, const_tree); + +static RTX_CODE compare_condition (rtx insn); +static rtx avr_legitimize_address (rtx, rtx, enum machine_mode); +static int compare_sign_p (rtx insn); +static tree avr_handle_progmem_attribute (tree *, tree, tree, int, bool *); +static tree avr_handle_fndecl_attribute (tree *, tree, tree, int, bool *); +static tree avr_handle_fntype_attribute (tree *, tree, tree, int, bool *); +static bool avr_assemble_integer (rtx, unsigned int, int); +static void avr_file_start (void); +static void avr_file_end (void); +static bool avr_legitimate_address_p (enum machine_mode, rtx, bool); +static void avr_asm_function_end_prologue (FILE *); +static void avr_asm_function_begin_epilogue (FILE *); +static bool avr_cannot_modify_jumps_p (void); +static rtx avr_function_value (const_tree, const_tree, bool); +static void avr_insert_attributes (tree, tree *); +static void avr_asm_init_sections (void); +static unsigned int avr_section_type_flags (tree, const char *, int); +static void avr_encode_section_info (tree, rtx, int); +static void avr_reorg (void); +static void avr_asm_out_ctor (rtx, int); +static void avr_asm_out_dtor (rtx, int); +static int avr_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); +static int avr_memory_move_cost (enum machine_mode, reg_class_t, bool); +static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code, bool); +static bool avr_rtx_costs (rtx, int, int, int *, bool); +static int avr_address_cost (rtx, bool); +static bool avr_return_in_memory (const_tree, const_tree); +static struct machine_function * avr_init_machine_status (void); +static rtx avr_builtin_setjmp_frame_value (void); +static bool avr_hard_regno_scratch_ok (unsigned int); +static unsigned int avr_case_values_threshold (void); +static bool avr_frame_pointer_required_p (void); +static bool avr_can_eliminate (const int, const int); +static bool avr_allocate_stack_slots_for_args (void); +static bool avr_class_likely_spilled_p (reg_class_t c); +static rtx avr_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static void avr_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static void avr_help (void); + +/* Allocate registers from r25 to r8 for parameters for function calls. */ +#define FIRST_CUM_REG 26 + +/* Temporary register RTX (gen_rtx_REG (QImode, TMP_REGNO)) */ +static GTY(()) rtx tmp_reg_rtx; + +/* Zeroed register RTX (gen_rtx_REG (QImode, ZERO_REGNO)) */ +static GTY(()) rtx zero_reg_rtx; + +/* AVR register names {"r0", "r1", ..., "r31"} */ +static const char *const avr_regnames[] = REGISTER_NAMES; + +/* Preprocessor macros to define depending on MCU type. */ +const char *avr_extra_arch_macro; + +/* Current architecture. */ +const struct base_arch_s *avr_current_arch; + +/* Current device. */ +const struct mcu_type_s *avr_current_device; + +section *progmem_section; + +/* AVR attributes. */ +static const struct attribute_spec avr_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "progmem", 0, 0, false, false, false, avr_handle_progmem_attribute }, + { "signal", 0, 0, true, false, false, avr_handle_fndecl_attribute }, + { "interrupt", 0, 0, true, false, false, avr_handle_fndecl_attribute }, + { "naked", 0, 0, false, true, true, avr_handle_fntype_attribute }, + { "OS_task", 0, 0, false, true, true, avr_handle_fntype_attribute }, + { "OS_main", 0, 0, false, true, true, avr_handle_fntype_attribute }, + { NULL, 0, 0, false, false, false, NULL } +}; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options avr_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Initialize the GCC target structure. */ +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.long\t" +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER avr_assemble_integer +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START avr_file_start +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END avr_file_end + +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE avr_asm_function_end_prologue +#undef TARGET_ASM_FUNCTION_BEGIN_EPILOGUE +#define TARGET_ASM_FUNCTION_BEGIN_EPILOGUE avr_asm_function_begin_epilogue +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE avr_function_value +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE avr_attribute_table +#undef TARGET_ASM_FUNCTION_RODATA_SECTION +#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES avr_insert_attributes +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS avr_section_type_flags +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO avr_encode_section_info +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST avr_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST avr_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS avr_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST avr_address_cost +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG avr_reorg +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG avr_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE avr_function_arg_advance + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS avr_legitimize_address + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY avr_return_in_memory + +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + +#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE +#define TARGET_BUILTIN_SETJMP_FRAME_VALUE avr_builtin_setjmp_frame_value + +#undef TARGET_HARD_REGNO_SCRATCH_OK +#define TARGET_HARD_REGNO_SCRATCH_OK avr_hard_regno_scratch_ok +#undef TARGET_CASE_VALUES_THRESHOLD +#define TARGET_CASE_VALUES_THRESHOLD avr_case_values_threshold + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P avr_legitimate_address_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED avr_frame_pointer_required_p +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE avr_can_eliminate + +#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS +#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS avr_allocate_stack_slots_for_args + +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P avr_class_likely_spilled_p + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE avr_option_override + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE avr_option_optimization_table + +#undef TARGET_CANNOT_MODIFY_JUMPS_P +#define TARGET_CANNOT_MODIFY_JUMPS_P avr_cannot_modify_jumps_p + +#undef TARGET_HELP +#define TARGET_HELP avr_help + +#undef TARGET_EXCEPT_UNWIND_INFO +#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info + +struct gcc_target targetm = TARGET_INITIALIZER; + +static void +avr_option_override (void) +{ + const struct mcu_type_s *t; + + flag_delete_null_pointer_checks = 0; + + for (t = avr_mcu_types; t->name; t++) + if (strcmp (t->name, avr_mcu_name) == 0) + break; + + if (!t->name) + { + error ("unrecognized argument to -mmcu= option: %qs", avr_mcu_name); + inform (input_location, "See --target-help for supported MCUs"); + } + + avr_current_device = t; + avr_current_arch = &avr_arch_types[avr_current_device->arch]; + avr_extra_arch_macro = avr_current_device->macro; + + tmp_reg_rtx = gen_rtx_REG (QImode, TMP_REGNO); + zero_reg_rtx = gen_rtx_REG (QImode, ZERO_REGNO); + + init_machine_status = avr_init_machine_status; +} + +/* Implement TARGET_HELP */ +/* Report extra information for --target-help */ + +static void +avr_help (void) +{ + const struct mcu_type_s *t; + const char * const indent = " "; + int len; + + /* Give a list of MCUs that are accepted by -mmcu=* . + Note that MCUs supported by the compiler might differ from + MCUs supported by binutils. */ + + len = strlen (indent); + printf ("Known MCU names:\n%s", indent); + + /* Print a blank-separated list of all supported MCUs */ + + for (t = avr_mcu_types; t->name; t++) + { + printf ("%s ", t->name); + len += 1 + strlen (t->name); + + /* Break long lines */ + + if (len > 66 && (t+1)->name) + { + printf ("\n%s", indent); + len = strlen (indent); + } + } + + printf ("\n\n"); +} + +/* return register class from register number. */ + +static const enum reg_class reg_class_tab[]={ + GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS, + GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS, + GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS, + GENERAL_REGS, /* r0 - r15 */ + LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS, + LD_REGS, /* r16 - 23 */ + ADDW_REGS,ADDW_REGS, /* r24,r25 */ + POINTER_X_REGS,POINTER_X_REGS, /* r26,27 */ + POINTER_Y_REGS,POINTER_Y_REGS, /* r28,r29 */ + POINTER_Z_REGS,POINTER_Z_REGS, /* r30,r31 */ + STACK_REG,STACK_REG /* SPL,SPH */ +}; + +/* Function to set up the backend function structure. */ + +static struct machine_function * +avr_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Return register class for register R. */ + +enum reg_class +avr_regno_reg_class (int r) +{ + if (r <= 33) + return reg_class_tab[r]; + return ALL_REGS; +} + +/* Return nonzero if FUNC is a naked function. */ + +static int +avr_naked_function_p (tree func) +{ + tree a; + + gcc_assert (TREE_CODE (func) == FUNCTION_DECL); + + a = lookup_attribute ("naked", TYPE_ATTRIBUTES (TREE_TYPE (func))); + return a != NULL_TREE; +} + +/* Return nonzero if FUNC is an interrupt function as specified + by the "interrupt" attribute. */ + +static int +interrupt_function_p (tree func) +{ + tree a; + + if (TREE_CODE (func) != FUNCTION_DECL) + return 0; + + a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func)); + return a != NULL_TREE; +} + +/* Return nonzero if FUNC is a signal function as specified + by the "signal" attribute. */ + +static int +signal_function_p (tree func) +{ + tree a; + + if (TREE_CODE (func) != FUNCTION_DECL) + return 0; + + a = lookup_attribute ("signal", DECL_ATTRIBUTES (func)); + return a != NULL_TREE; +} + +/* Return nonzero if FUNC is a OS_task function. */ + +static int +avr_OS_task_function_p (tree func) +{ + tree a; + + gcc_assert (TREE_CODE (func) == FUNCTION_DECL); + + a = lookup_attribute ("OS_task", TYPE_ATTRIBUTES (TREE_TYPE (func))); + return a != NULL_TREE; +} + +/* Return nonzero if FUNC is a OS_main function. */ + +static int +avr_OS_main_function_p (tree func) +{ + tree a; + + gcc_assert (TREE_CODE (func) == FUNCTION_DECL); + + a = lookup_attribute ("OS_main", TYPE_ATTRIBUTES (TREE_TYPE (func))); + return a != NULL_TREE; +} + +/* Return the number of hard registers to push/pop in the prologue/epilogue + of the current function, and optionally store these registers in SET. */ + +static int +avr_regs_to_save (HARD_REG_SET *set) +{ + int reg, count; + int int_or_sig_p = (interrupt_function_p (current_function_decl) + || signal_function_p (current_function_decl)); + + if (set) + CLEAR_HARD_REG_SET (*set); + count = 0; + + /* No need to save any registers if the function never returns or + is have "OS_task" or "OS_main" attribute. */ + if (TREE_THIS_VOLATILE (current_function_decl) + || cfun->machine->is_OS_task + || cfun->machine->is_OS_main) + return 0; + + for (reg = 0; reg < 32; reg++) + { + /* Do not push/pop __tmp_reg__, __zero_reg__, as well as + any global register variables. */ + if (fixed_regs[reg]) + continue; + + if ((int_or_sig_p && !current_function_is_leaf && call_used_regs[reg]) + || (df_regs_ever_live_p (reg) + && (int_or_sig_p || !call_used_regs[reg]) + && !(frame_pointer_needed + && (reg == REG_Y || reg == (REG_Y+1))))) + { + if (set) + SET_HARD_REG_BIT (*set, reg); + count++; + } + } + return count; +} + + +/* Implement `TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS' */ + +static bool +avr_allocate_stack_slots_for_args (void) +{ + return !cfun->machine->is_naked; +} + + +/* Return true if register FROM can be eliminated via register TO. */ + +bool +avr_can_eliminate (const int from, const int to) +{ + return ((from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) + || ((from == FRAME_POINTER_REGNUM + || from == FRAME_POINTER_REGNUM + 1) + && !frame_pointer_needed)); +} + +/* Compute offset between arg_pointer and frame_pointer. */ + +int +avr_initial_elimination_offset (int from, int to) +{ + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return 0; + else + { + int offset = frame_pointer_needed ? 2 : 0; + int avr_pc_size = AVR_HAVE_EIJMP_EICALL ? 3 : 2; + + offset += avr_regs_to_save (NULL); + return get_frame_size () + (avr_pc_size) + 1 + offset; + } +} + +/* Actual start of frame is virtual_stack_vars_rtx this is offset from + frame pointer by +STARTING_FRAME_OFFSET. + Using saved frame = virtual_stack_vars_rtx - STARTING_FRAME_OFFSET + avoids creating add/sub of offset in nonlocal goto and setjmp. */ + +rtx avr_builtin_setjmp_frame_value (void) +{ + return gen_rtx_MINUS (Pmode, virtual_stack_vars_rtx, + gen_int_mode (STARTING_FRAME_OFFSET, Pmode)); +} + +/* Return contents of MEM at frame pointer + stack size + 1 (+2 if 3 byte PC). + This is return address of function. */ +rtx +avr_return_addr_rtx (int count, rtx tem) +{ + rtx r; + + /* Can only return this functions return address. Others not supported. */ + if (count) + return NULL; + + if (AVR_3_BYTE_PC) + { + r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+2"); + warning (0, "'builtin_return_address' contains only 2 bytes of address"); + } + else + r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+1"); + + r = gen_rtx_PLUS (Pmode, tem, r); + r = gen_frame_mem (Pmode, memory_address (Pmode, r)); + r = gen_rtx_ROTATE (HImode, r, GEN_INT (8)); + return r; +} + +/* Return 1 if the function epilogue is just a single "ret". */ + +int +avr_simple_epilogue (void) +{ + return (! frame_pointer_needed + && get_frame_size () == 0 + && avr_regs_to_save (NULL) == 0 + && ! interrupt_function_p (current_function_decl) + && ! signal_function_p (current_function_decl) + && ! avr_naked_function_p (current_function_decl) + && ! TREE_THIS_VOLATILE (current_function_decl)); +} + +/* This function checks sequence of live registers. */ + +static int +sequent_regs_live (void) +{ + int reg; + int live_seq=0; + int cur_seq=0; + + for (reg = 0; reg < 18; ++reg) + { + if (fixed_regs[reg]) + { + /* Don't recognize sequences that contain global register + variables. */ + + if (live_seq != 0) + return 0; + else + continue; + } + + if (!call_used_regs[reg]) + { + if (df_regs_ever_live_p (reg)) + { + ++live_seq; + ++cur_seq; + } + else + cur_seq = 0; + } + } + + if (!frame_pointer_needed) + { + if (df_regs_ever_live_p (REG_Y)) + { + ++live_seq; + ++cur_seq; + } + else + cur_seq = 0; + + if (df_regs_ever_live_p (REG_Y+1)) + { + ++live_seq; + ++cur_seq; + } + else + cur_seq = 0; + } + else + { + cur_seq += 2; + live_seq += 2; + } + return (cur_seq == live_seq) ? live_seq : 0; +} + +/* Obtain the length sequence of insns. */ + +int +get_sequence_length (rtx insns) +{ + rtx insn; + int length; + + for (insn = insns, length = 0; insn; insn = NEXT_INSN (insn)) + length += get_attr_length (insn); + + return length; +} + +/* Implement INCOMING_RETURN_ADDR_RTX. */ + +rtx +avr_incoming_return_addr_rtx (void) +{ + /* The return address is at the top of the stack. Note that the push + was via post-decrement, which means the actual address is off by one. */ + return gen_frame_mem (HImode, plus_constant (stack_pointer_rtx, 1)); +} + +/* Helper for expand_prologue. Emit a push of a byte register. */ + +static void +emit_push_byte (unsigned regno, bool frame_related_p) +{ + rtx mem, reg, insn; + + mem = gen_rtx_POST_DEC (HImode, stack_pointer_rtx); + mem = gen_frame_mem (QImode, mem); + reg = gen_rtx_REG (QImode, regno); + + insn = emit_insn (gen_rtx_SET (VOIDmode, mem, reg)); + if (frame_related_p) + RTX_FRAME_RELATED_P (insn) = 1; + + cfun->machine->stack_usage++; +} + + +/* Output function prologue. */ + +void +expand_prologue (void) +{ + int live_seq; + HARD_REG_SET set; + int minimize; + HOST_WIDE_INT size = get_frame_size(); + rtx insn; + + /* Init cfun->machine. */ + cfun->machine->is_naked = avr_naked_function_p (current_function_decl); + cfun->machine->is_interrupt = interrupt_function_p (current_function_decl); + cfun->machine->is_signal = signal_function_p (current_function_decl); + cfun->machine->is_OS_task = avr_OS_task_function_p (current_function_decl); + cfun->machine->is_OS_main = avr_OS_main_function_p (current_function_decl); + cfun->machine->stack_usage = 0; + + /* Prologue: naked. */ + if (cfun->machine->is_naked) + { + return; + } + + avr_regs_to_save (&set); + live_seq = sequent_regs_live (); + minimize = (TARGET_CALL_PROLOGUES + && !cfun->machine->is_interrupt + && !cfun->machine->is_signal + && !cfun->machine->is_OS_task + && !cfun->machine->is_OS_main + && live_seq); + + if (cfun->machine->is_interrupt || cfun->machine->is_signal) + { + /* Enable interrupts. */ + if (cfun->machine->is_interrupt) + emit_insn (gen_enable_interrupt ()); + + /* Push zero reg. */ + emit_push_byte (ZERO_REGNO, true); + + /* Push tmp reg. */ + emit_push_byte (TMP_REGNO, true); + + /* Push SREG. */ + /* ??? There's no dwarf2 column reserved for SREG. */ + emit_move_insn (tmp_reg_rtx, gen_rtx_MEM (QImode, GEN_INT (SREG_ADDR))); + emit_push_byte (TMP_REGNO, false); + + /* Push RAMPZ. */ + /* ??? There's no dwarf2 column reserved for RAMPZ. */ + if (AVR_HAVE_RAMPZ + && TEST_HARD_REG_BIT (set, REG_Z) + && TEST_HARD_REG_BIT (set, REG_Z + 1)) + { + emit_move_insn (tmp_reg_rtx, + gen_rtx_MEM (QImode, GEN_INT (RAMPZ_ADDR))); + emit_push_byte (TMP_REGNO, false); + } + + /* Clear zero reg. */ + emit_move_insn (zero_reg_rtx, const0_rtx); + + /* Prevent any attempt to delete the setting of ZERO_REG! */ + emit_use (zero_reg_rtx); + } + if (minimize && (frame_pointer_needed + || (AVR_2_BYTE_PC && live_seq > 6) + || live_seq > 7)) + { + int first_reg, reg, offset; + + emit_move_insn (gen_rtx_REG (HImode, REG_X), + gen_int_mode (size, HImode)); + + insn = emit_insn (gen_call_prologue_saves + (gen_int_mode (live_seq, HImode), + gen_int_mode (size + live_seq, HImode))); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Describe the effect of the unspec_volatile call to prologue_saves. + Note that this formulation assumes that add_reg_note pushes the + notes to the front. Thus we build them in the reverse order of + how we want dwarf2out to process them. */ + + /* The function does always set frame_pointer_rtx, but whether that + is going to be permanent in the function is frame_pointer_needed. */ + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, + (frame_pointer_needed + ? frame_pointer_rtx : stack_pointer_rtx), + plus_constant (stack_pointer_rtx, + -(size + live_seq)))); + + /* Note that live_seq always contains r28+r29, but the other + registers to be saved are all below 18. */ + first_reg = 18 - (live_seq - 2); + + for (reg = 29, offset = -live_seq + 1; + reg >= first_reg; + reg = (reg == 28 ? 17 : reg - 1), ++offset) + { + rtx m, r; + + m = gen_rtx_MEM (QImode, plus_constant (stack_pointer_rtx, offset)); + r = gen_rtx_REG (QImode, reg); + add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, m, r)); + } + + cfun->machine->stack_usage += size + live_seq; + } + else + { + int reg; + for (reg = 0; reg < 32; ++reg) + if (TEST_HARD_REG_BIT (set, reg)) + emit_push_byte (reg, true); + + if (frame_pointer_needed) + { + if (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main)) + { + /* Push frame pointer. Always be consistent about the + ordering of pushes -- epilogue_restores expects the + register pair to be pushed low byte first. */ + emit_push_byte (REG_Y, true); + emit_push_byte (REG_Y + 1, true); + } + + if (!size) + { + insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + /* Creating a frame can be done by direct manipulation of the + stack or via the frame pointer. These two methods are: + fp=sp + fp-=size + sp=fp + OR + sp-=size + fp=sp + the optimum method depends on function type, stack and frame size. + To avoid a complex logic, both methods are tested and shortest + is selected. */ + rtx myfp; + rtx fp_plus_insns; + + if (AVR_HAVE_8BIT_SP) + { + /* The high byte (r29) doesn't change. Prefer 'subi' + (1 cycle) over 'sbiw' (2 cycles, same size). */ + myfp = gen_rtx_REG (QImode, FRAME_POINTER_REGNUM); + } + else + { + /* Normal sized addition. */ + myfp = frame_pointer_rtx; + } + + /* Method 1-Adjust frame pointer. */ + start_sequence (); + + /* Normally the dwarf2out frame-related-expr interpreter does + not expect to have the CFA change once the frame pointer is + set up. Thus we avoid marking the move insn below and + instead indicate that the entire operation is complete after + the frame pointer subtraction is done. */ + + emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + + insn = emit_move_insn (myfp, plus_constant (myfp, -size)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, frame_pointer_rtx, + plus_constant (stack_pointer_rtx, + -size))); + + /* Copy to stack pointer. Note that since we've already + changed the CFA to the frame pointer this operation + need not be annotated at all. */ + if (AVR_HAVE_8BIT_SP) + { + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + } + else if (TARGET_NO_INTERRUPTS + || cfun->machine->is_signal + || cfun->machine->is_OS_main) + { + emit_insn (gen_movhi_sp_r_irq_off (stack_pointer_rtx, + frame_pointer_rtx)); + } + else if (cfun->machine->is_interrupt) + { + emit_insn (gen_movhi_sp_r_irq_on (stack_pointer_rtx, + frame_pointer_rtx)); + } + else + { + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + } + + fp_plus_insns = get_insns (); + end_sequence (); + + /* Method 2-Adjust Stack pointer. */ + if (size <= 6) + { + rtx sp_plus_insns; + + start_sequence (); + + insn = plus_constant (stack_pointer_rtx, -size); + insn = emit_move_insn (stack_pointer_rtx, insn); + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + sp_plus_insns = get_insns (); + end_sequence (); + + /* Use shortest method. */ + if (get_sequence_length (sp_plus_insns) + < get_sequence_length (fp_plus_insns)) + emit_insn (sp_plus_insns); + else + emit_insn (fp_plus_insns); + } + else + emit_insn (fp_plus_insns); + + cfun->machine->stack_usage += size; + } + } + } + + if (flag_stack_usage) + current_function_static_stack_size = cfun->machine->stack_usage; +} + +/* Output summary at end of function prologue. */ + +static void +avr_asm_function_end_prologue (FILE *file) +{ + if (cfun->machine->is_naked) + { + fputs ("/* prologue: naked */\n", file); + } + else + { + if (cfun->machine->is_interrupt) + { + fputs ("/* prologue: Interrupt */\n", file); + } + else if (cfun->machine->is_signal) + { + fputs ("/* prologue: Signal */\n", file); + } + else + fputs ("/* prologue: function */\n", file); + } + fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n", + get_frame_size()); + fprintf (file, "/* stack size = %d */\n", + cfun->machine->stack_usage); + /* Create symbol stack offset here so all functions have it. Add 1 to stack + usage for offset so that SP + .L__stack_offset = return address. */ + fprintf (file, ".L__stack_usage = %d\n", cfun->machine->stack_usage); +} + + +/* Implement EPILOGUE_USES. */ + +int +avr_epilogue_uses (int regno ATTRIBUTE_UNUSED) +{ + if (reload_completed + && cfun->machine + && (cfun->machine->is_interrupt || cfun->machine->is_signal)) + return 1; + return 0; +} + +/* Helper for expand_epilogue. Emit a pop of a byte register. */ + +static void +emit_pop_byte (unsigned regno) +{ + rtx mem, reg; + + mem = gen_rtx_PRE_INC (HImode, stack_pointer_rtx); + mem = gen_frame_mem (QImode, mem); + reg = gen_rtx_REG (QImode, regno); + + emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); +} + +/* Output RTL epilogue. */ + +void +expand_epilogue (void) +{ + int reg; + int live_seq; + HARD_REG_SET set; + int minimize; + HOST_WIDE_INT size = get_frame_size(); + + /* epilogue: naked */ + if (cfun->machine->is_naked) + { + emit_jump_insn (gen_return ()); + return; + } + + avr_regs_to_save (&set); + live_seq = sequent_regs_live (); + minimize = (TARGET_CALL_PROLOGUES + && !cfun->machine->is_interrupt + && !cfun->machine->is_signal + && !cfun->machine->is_OS_task + && !cfun->machine->is_OS_main + && live_seq); + + if (minimize && (frame_pointer_needed || live_seq > 4)) + { + if (frame_pointer_needed) + { + /* Get rid of frame. */ + emit_move_insn(frame_pointer_rtx, + gen_rtx_PLUS (HImode, frame_pointer_rtx, + gen_int_mode (size, HImode))); + } + else + { + emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + } + + emit_insn (gen_epilogue_restores (gen_int_mode (live_seq, HImode))); + } + else + { + if (frame_pointer_needed) + { + if (size) + { + /* Try two methods to adjust stack and select shortest. */ + rtx myfp; + rtx fp_plus_insns; + + if (AVR_HAVE_8BIT_SP) + { + /* The high byte (r29) doesn't change - prefer 'subi' + (1 cycle) over 'sbiw' (2 cycles, same size). */ + myfp = gen_rtx_REG (QImode, FRAME_POINTER_REGNUM); + } + else + { + /* Normal sized addition. */ + myfp = frame_pointer_rtx; + } + + /* Method 1-Adjust frame pointer. */ + start_sequence (); + + emit_move_insn (myfp, plus_constant (myfp, size)); + + /* Copy to stack pointer. */ + if (AVR_HAVE_8BIT_SP) + { + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + } + else if (TARGET_NO_INTERRUPTS + || cfun->machine->is_signal) + { + emit_insn (gen_movhi_sp_r_irq_off (stack_pointer_rtx, + frame_pointer_rtx)); + } + else if (cfun->machine->is_interrupt) + { + emit_insn (gen_movhi_sp_r_irq_on (stack_pointer_rtx, + frame_pointer_rtx)); + } + else + { + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + } + + fp_plus_insns = get_insns (); + end_sequence (); + + /* Method 2-Adjust Stack pointer. */ + if (size <= 5) + { + rtx sp_plus_insns; + + start_sequence (); + + emit_move_insn (stack_pointer_rtx, + plus_constant (stack_pointer_rtx, size)); + + sp_plus_insns = get_insns (); + end_sequence (); + + /* Use shortest method. */ + if (get_sequence_length (sp_plus_insns) + < get_sequence_length (fp_plus_insns)) + emit_insn (sp_plus_insns); + else + emit_insn (fp_plus_insns); + } + else + emit_insn (fp_plus_insns); + } + if (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main)) + { + /* Restore previous frame_pointer. See expand_prologue for + rationale for not using pophi. */ + emit_pop_byte (REG_Y + 1); + emit_pop_byte (REG_Y); + } + } + + /* Restore used registers. */ + for (reg = 31; reg >= 0; --reg) + if (TEST_HARD_REG_BIT (set, reg)) + emit_pop_byte (reg); + + if (cfun->machine->is_interrupt || cfun->machine->is_signal) + { + /* Restore RAMPZ using tmp reg as scratch. */ + if (AVR_HAVE_RAMPZ + && TEST_HARD_REG_BIT (set, REG_Z) + && TEST_HARD_REG_BIT (set, REG_Z + 1)) + { + emit_pop_byte (TMP_REGNO); + emit_move_insn (gen_rtx_MEM (QImode, GEN_INT (RAMPZ_ADDR)), + tmp_reg_rtx); + } + + /* Restore SREG using tmp reg as scratch. */ + emit_pop_byte (TMP_REGNO); + + emit_move_insn (gen_rtx_MEM (QImode, GEN_INT (SREG_ADDR)), + tmp_reg_rtx); + + /* Restore tmp REG. */ + emit_pop_byte (TMP_REGNO); + + /* Restore zero REG. */ + emit_pop_byte (ZERO_REGNO); + } + + emit_jump_insn (gen_return ()); + } +} + +/* Output summary messages at beginning of function epilogue. */ + +static void +avr_asm_function_begin_epilogue (FILE *file) +{ + fprintf (file, "/* epilogue start */\n"); +} + + +/* Implement TARGET_CANNOT_MODITY_JUMPS_P */ + +static bool +avr_cannot_modify_jumps_p (void) +{ + + /* Naked Functions must not have any instructions after + their epilogue, see PR42240 */ + + if (reload_completed + && cfun->machine + && cfun->machine->is_naked) + { + return true; + } + + return false; +} + + +/* Return nonzero if X (an RTX) is a legitimate memory address on the target + machine for a memory operand of mode MODE. */ + +bool +avr_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + enum reg_class r = NO_REGS; + + if (TARGET_ALL_DEBUG) + { + fprintf (stderr, "mode: (%s) %s %s %s %s:", + GET_MODE_NAME(mode), + strict ? "(strict)": "", + reload_completed ? "(reload_completed)": "", + reload_in_progress ? "(reload_in_progress)": "", + reg_renumber ? "(reg_renumber)" : ""); + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) >= 0 + && INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode) + && reg_renumber + ) + fprintf (stderr, "(r%d ---> r%d)", REGNO (XEXP (x, 0)), + true_regnum (XEXP (x, 0))); + debug_rtx (x); + } + + if (REG_P (x) && (strict ? REG_OK_FOR_BASE_STRICT_P (x) + : REG_OK_FOR_BASE_NOSTRICT_P (x))) + r = POINTER_REGS; + else if (CONSTANT_ADDRESS_P (x)) + r = ALL_REGS; + else if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) >= 0) + { + int fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode); + if (fit) + { + if (! strict + || REGNO (XEXP (x,0)) == REG_X + || REGNO (XEXP (x,0)) == REG_Y + || REGNO (XEXP (x,0)) == REG_Z) + r = BASE_POINTER_REGS; + if (XEXP (x,0) == frame_pointer_rtx + || XEXP (x,0) == arg_pointer_rtx) + r = BASE_POINTER_REGS; + } + else if (frame_pointer_needed && XEXP (x,0) == frame_pointer_rtx) + r = POINTER_Y_REGS; + } + else if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC) + && REG_P (XEXP (x, 0)) + && (strict ? REG_OK_FOR_BASE_STRICT_P (XEXP (x, 0)) + : REG_OK_FOR_BASE_NOSTRICT_P (XEXP (x, 0)))) + { + r = POINTER_REGS; + } + if (TARGET_ALL_DEBUG) + { + fprintf (stderr, " ret = %c\n", r + '0'); + } + return r == NO_REGS ? 0 : (int)r; +} + +/* Attempts to replace X with a valid + memory address for an operand of mode MODE */ + +rtx +avr_legitimize_address (rtx x, rtx oldx, enum machine_mode mode) +{ + x = oldx; + if (TARGET_ALL_DEBUG) + { + fprintf (stderr, "legitimize_address mode: %s", GET_MODE_NAME(mode)); + debug_rtx (oldx); + } + + if (GET_CODE (oldx) == PLUS + && REG_P (XEXP (oldx,0))) + { + if (REG_P (XEXP (oldx,1))) + x = force_reg (GET_MODE (oldx), oldx); + else if (GET_CODE (XEXP (oldx, 1)) == CONST_INT) + { + int offs = INTVAL (XEXP (oldx,1)); + if (frame_pointer_rtx != XEXP (oldx,0)) + if (offs > MAX_LD_OFFSET (mode)) + { + if (TARGET_ALL_DEBUG) + fprintf (stderr, "force_reg (big offset)\n"); + x = force_reg (GET_MODE (oldx), oldx); + } + } + } + return x; +} + + +/* Return a pointer register name as a string. */ + +static const char * +ptrreg_to_str (int regno) +{ + switch (regno) + { + case REG_X: return "X"; + case REG_Y: return "Y"; + case REG_Z: return "Z"; + default: + output_operand_lossage ("address operand requires constraint for X, Y, or Z register"); + } + return NULL; +} + +/* Return the condition name as a string. + Used in conditional jump constructing */ + +static const char * +cond_string (enum rtx_code code) +{ + switch (code) + { + case NE: + return "ne"; + case EQ: + return "eq"; + case GE: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return "pl"; + else + return "ge"; + case LT: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return "mi"; + else + return "lt"; + case GEU: + return "sh"; + case LTU: + return "lo"; + default: + gcc_unreachable (); + } +} + +/* Output ADDR to FILE as address. */ + +void +print_operand_address (FILE *file, rtx addr) +{ + switch (GET_CODE (addr)) + { + case REG: + fprintf (file, ptrreg_to_str (REGNO (addr))); + break; + + case PRE_DEC: + fprintf (file, "-%s", ptrreg_to_str (REGNO (XEXP (addr, 0)))); + break; + + case POST_INC: + fprintf (file, "%s+", ptrreg_to_str (REGNO (XEXP (addr, 0)))); + break; + + default: + if (CONSTANT_ADDRESS_P (addr) + && text_segment_operand (addr, VOIDmode)) + { + rtx x = XEXP (addr,0); + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x,1)) == CONST_INT) + { + /* Assembler gs() will implant word address. Make offset + a byte offset inside gs() for assembler. This is + needed because the more logical (constant+gs(sym)) is not + accepted by gas. For 128K and lower devices this is ok. For + large devices it will create a Trampoline to offset from symbol + which may not be what the user really wanted. */ + fprintf (file, "gs("); + output_addr_const (file, XEXP (x,0)); + fprintf (file,"+" HOST_WIDE_INT_PRINT_DEC ")", 2 * INTVAL (XEXP (x,1))); + if (AVR_3_BYTE_PC) + if (warning (0, "pointer offset from symbol maybe incorrect")) + { + output_addr_const (stderr, addr); + fprintf(stderr,"\n"); + } + } + else + { + fprintf (file, "gs("); + output_addr_const (file, addr); + fprintf (file, ")"); + } + } + else + output_addr_const (file, addr); + } +} + + +/* Output X as assembler operand to file FILE. */ + +void +print_operand (FILE *file, rtx x, int code) +{ + int abcd = 0; + + if (code >= 'A' && code <= 'D') + abcd = code - 'A'; + + if (code == '~') + { + if (!AVR_HAVE_JMP_CALL) + fputc ('r', file); + } + else if (code == '!') + { + if (AVR_HAVE_EIJMP_EICALL) + fputc ('e', file); + } + else if (REG_P (x)) + { + if (x == zero_reg_rtx) + fprintf (file, "__zero_reg__"); + else + fprintf (file, reg_names[true_regnum (x) + abcd]); + } + else if (GET_CODE (x) == CONST_INT) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + abcd); + else if (GET_CODE (x) == MEM) + { + rtx addr = XEXP (x,0); + if (code == 'm') + { + if (!CONSTANT_P (addr)) + fatal_insn ("bad address, not a constant):", addr); + /* Assembler template with m-code is data - not progmem section */ + if (text_segment_operand (addr, VOIDmode)) + if (warning ( 0, "accessing data memory with program memory address")) + { + output_addr_const (stderr, addr); + fprintf(stderr,"\n"); + } + output_addr_const (file, addr); + } + else if (code == 'o') + { + if (GET_CODE (addr) != PLUS) + fatal_insn ("bad address, not (reg+disp):", addr); + + print_operand (file, XEXP (addr, 1), 0); + } + else if (code == 'p' || code == 'r') + { + if (GET_CODE (addr) != POST_INC && GET_CODE (addr) != PRE_DEC) + fatal_insn ("bad address, not post_inc or pre_dec:", addr); + + if (code == 'p') + print_operand_address (file, XEXP (addr, 0)); /* X, Y, Z */ + else + print_operand (file, XEXP (addr, 0), 0); /* r26, r28, r30 */ + } + else if (GET_CODE (addr) == PLUS) + { + print_operand_address (file, XEXP (addr,0)); + if (REGNO (XEXP (addr, 0)) == REG_X) + fatal_insn ("internal compiler error. Bad address:" + ,addr); + fputc ('+', file); + print_operand (file, XEXP (addr,1), code); + } + else + print_operand_address (file, addr); + } + else if (code == 'x') + { + /* Constant progmem address - like used in jmp or call */ + if (0 == text_segment_operand (x, VOIDmode)) + if (warning ( 0, "accessing program memory with data memory address")) + { + output_addr_const (stderr, x); + fprintf(stderr,"\n"); + } + /* Use normal symbol for direct address no linker trampoline needed */ + output_addr_const (file, x); + } + else if (GET_CODE (x) == CONST_DOUBLE) + { + long val; + REAL_VALUE_TYPE rv; + if (GET_MODE (x) != SFmode) + fatal_insn ("internal compiler error. Unknown mode:", x); + REAL_VALUE_FROM_CONST_DOUBLE (rv, x); + REAL_VALUE_TO_TARGET_SINGLE (rv, val); + fprintf (file, "0x%lx", val); + } + else if (code == 'j') + fputs (cond_string (GET_CODE (x)), file); + else if (code == 'k') + fputs (cond_string (reverse_condition (GET_CODE (x))), file); + else + print_operand_address (file, x); +} + +/* Update the condition code in the INSN. */ + +void +notice_update_cc (rtx body ATTRIBUTE_UNUSED, rtx insn) +{ + rtx set; + + switch (get_attr_cc (insn)) + { + case CC_NONE: + /* Insn does not affect CC at all. */ + break; + + case CC_SET_N: + CC_STATUS_INIT; + break; + + case CC_SET_ZN: + set = single_set (insn); + CC_STATUS_INIT; + if (set) + { + cc_status.flags |= CC_NO_OVERFLOW; + cc_status.value1 = SET_DEST (set); + } + break; + + case CC_SET_CZN: + /* Insn sets the Z,N,C flags of CC to recog_operand[0]. + The V flag may or may not be known but that's ok because + alter_cond will change tests to use EQ/NE. */ + set = single_set (insn); + CC_STATUS_INIT; + if (set) + { + cc_status.value1 = SET_DEST (set); + cc_status.flags |= CC_OVERFLOW_UNUSABLE; + } + break; + + case CC_COMPARE: + set = single_set (insn); + CC_STATUS_INIT; + if (set) + cc_status.value1 = SET_SRC (set); + break; + + case CC_CLOBBER: + /* Insn doesn't leave CC in a usable state. */ + CC_STATUS_INIT; + + /* Correct CC for the ashrqi3 with the shift count as CONST_INT != 6 */ + set = single_set (insn); + if (set) + { + rtx src = SET_SRC (set); + + if (GET_CODE (src) == ASHIFTRT + && GET_MODE (src) == QImode) + { + rtx x = XEXP (src, 1); + + if (CONST_INT_P (x) + && IN_RANGE (INTVAL (x), 1, 5)) + { + cc_status.value1 = SET_DEST (set); + cc_status.flags |= CC_OVERFLOW_UNUSABLE; + } + } + } + break; + } +} + +/* Return maximum number of consecutive registers of + class CLASS needed to hold a value of mode MODE. */ + +int +class_max_nregs (enum reg_class rclass ATTRIBUTE_UNUSED,enum machine_mode mode) +{ + return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); +} + +/* Choose mode for jump insn: + 1 - relative jump in range -63 <= x <= 62 ; + 2 - relative jump in range -2046 <= x <= 2045 ; + 3 - absolute jump (only for ATmega[16]03). */ + +int +avr_jump_mode (rtx x, rtx insn) +{ + int dest_addr = INSN_ADDRESSES (INSN_UID (GET_CODE (x) == LABEL_REF + ? XEXP (x, 0) : x)); + int cur_addr = INSN_ADDRESSES (INSN_UID (insn)); + int jump_distance = cur_addr - dest_addr; + + if (-63 <= jump_distance && jump_distance <= 62) + return 1; + else if (-2046 <= jump_distance && jump_distance <= 2045) + return 2; + else if (AVR_HAVE_JMP_CALL) + return 3; + + return 2; +} + +/* return an AVR condition jump commands. + X is a comparison RTX. + LEN is a number returned by avr_jump_mode function. + if REVERSE nonzero then condition code in X must be reversed. */ + +const char * +ret_cond_branch (rtx x, int len, int reverse) +{ + RTX_CODE cond = reverse ? reverse_condition (GET_CODE (x)) : GET_CODE (x); + + switch (cond) + { + case GT: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return (len == 1 ? (AS1 (breq,.+2) CR_TAB + AS1 (brpl,%0)) : + len == 2 ? (AS1 (breq,.+4) CR_TAB + AS1 (brmi,.+2) CR_TAB + AS1 (rjmp,%0)) : + (AS1 (breq,.+6) CR_TAB + AS1 (brmi,.+4) CR_TAB + AS1 (jmp,%0))); + + else + return (len == 1 ? (AS1 (breq,.+2) CR_TAB + AS1 (brge,%0)) : + len == 2 ? (AS1 (breq,.+4) CR_TAB + AS1 (brlt,.+2) CR_TAB + AS1 (rjmp,%0)) : + (AS1 (breq,.+6) CR_TAB + AS1 (brlt,.+4) CR_TAB + AS1 (jmp,%0))); + case GTU: + return (len == 1 ? (AS1 (breq,.+2) CR_TAB + AS1 (brsh,%0)) : + len == 2 ? (AS1 (breq,.+4) CR_TAB + AS1 (brlo,.+2) CR_TAB + AS1 (rjmp,%0)) : + (AS1 (breq,.+6) CR_TAB + AS1 (brlo,.+4) CR_TAB + AS1 (jmp,%0))); + case LE: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return (len == 1 ? (AS1 (breq,%0) CR_TAB + AS1 (brmi,%0)) : + len == 2 ? (AS1 (breq,.+2) CR_TAB + AS1 (brpl,.+2) CR_TAB + AS1 (rjmp,%0)) : + (AS1 (breq,.+2) CR_TAB + AS1 (brpl,.+4) CR_TAB + AS1 (jmp,%0))); + else + return (len == 1 ? (AS1 (breq,%0) CR_TAB + AS1 (brlt,%0)) : + len == 2 ? (AS1 (breq,.+2) CR_TAB + AS1 (brge,.+2) CR_TAB + AS1 (rjmp,%0)) : + (AS1 (breq,.+2) CR_TAB + AS1 (brge,.+4) CR_TAB + AS1 (jmp,%0))); + case LEU: + return (len == 1 ? (AS1 (breq,%0) CR_TAB + AS1 (brlo,%0)) : + len == 2 ? (AS1 (breq,.+2) CR_TAB + AS1 (brsh,.+2) CR_TAB + AS1 (rjmp,%0)) : + (AS1 (breq,.+2) CR_TAB + AS1 (brsh,.+4) CR_TAB + AS1 (jmp,%0))); + default: + if (reverse) + { + switch (len) + { + case 1: + return AS1 (br%k1,%0); + case 2: + return (AS1 (br%j1,.+2) CR_TAB + AS1 (rjmp,%0)); + default: + return (AS1 (br%j1,.+4) CR_TAB + AS1 (jmp,%0)); + } + } + else + { + switch (len) + { + case 1: + return AS1 (br%j1,%0); + case 2: + return (AS1 (br%k1,.+2) CR_TAB + AS1 (rjmp,%0)); + default: + return (AS1 (br%k1,.+4) CR_TAB + AS1 (jmp,%0)); + } + } + } + return ""; +} + +/* Predicate function for immediate operand which fits to byte (8bit) */ + +int +byte_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return (GET_CODE (op) == CONST_INT + && INTVAL (op) <= 0xff && INTVAL (op) >= 0); +} + +/* Output insn cost for next insn. */ + +void +final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED, + int num_operands ATTRIBUTE_UNUSED) +{ + if (TARGET_ALL_DEBUG) + { + fprintf (asm_out_file, "/* DEBUG: cost = %d. */\n", + rtx_cost (PATTERN (insn), INSN, !optimize_size)); + } +} + +/* Return 0 if undefined, 1 if always true or always false. */ + +int +avr_simplify_comparison_p (enum machine_mode mode, RTX_CODE op, rtx x) +{ + unsigned int max = (mode == QImode ? 0xff : + mode == HImode ? 0xffff : + mode == SImode ? 0xffffffff : 0); + if (max && op && GET_CODE (x) == CONST_INT) + { + if (unsigned_condition (op) != op) + max >>= 1; + + if (max != (INTVAL (x) & max) + && INTVAL (x) != 0xff) + return 1; + } + return 0; +} + + +/* Returns nonzero if REGNO is the number of a hard + register in which function arguments are sometimes passed. */ + +int +function_arg_regno_p(int r) +{ + return (r >= 8 && r <= 25); +} + +/* Initializing the variable cum for the state at the beginning + of the argument list. */ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname, + tree fndecl ATTRIBUTE_UNUSED) +{ + cum->nregs = 18; + cum->regno = FIRST_CUM_REG; + if (!libname && stdarg_p (fntype)) + cum->nregs = 0; +} + +/* Returns the number of registers to allocate for a function argument. */ + +static int +avr_num_arg_regs (enum machine_mode mode, const_tree type) +{ + int size; + + if (mode == BLKmode) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + /* Align all function arguments to start in even-numbered registers. + Odd-sized arguments leave holes above them. */ + + return (size + 1) & ~1; +} + +/* Controls whether a function argument is passed + in a register, and which register. */ + +static rtx +avr_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int bytes = avr_num_arg_regs (mode, type); + + if (cum->nregs && bytes <= cum->nregs) + return gen_rtx_REG (mode, cum->regno - bytes); + + return NULL_RTX; +} + +/* Update the summarizer variable CUM to advance past an argument + in the argument list. */ + +static void +avr_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int bytes = avr_num_arg_regs (mode, type); + + cum->nregs -= bytes; + cum->regno -= bytes; + + if (cum->nregs <= 0) + { + cum->nregs = 0; + cum->regno = FIRST_CUM_REG; + } +} + +/*********************************************************************** + Functions for outputting various mov's for a various modes +************************************************************************/ +const char * +output_movqi (rtx insn, rtx operands[], int *l) +{ + int dummy; + rtx dest = operands[0]; + rtx src = operands[1]; + int *real_l = l; + + if (!l) + l = &dummy; + + *l = 1; + + if (register_operand (dest, QImode)) + { + if (register_operand (src, QImode)) /* mov r,r */ + { + if (test_hard_reg_class (STACK_REG, dest)) + return AS2 (out,%0,%1); + else if (test_hard_reg_class (STACK_REG, src)) + return AS2 (in,%0,%1); + + return AS2 (mov,%0,%1); + } + else if (CONSTANT_P (src)) + { + if (test_hard_reg_class (LD_REGS, dest)) /* ldi d,i */ + return AS2 (ldi,%0,lo8(%1)); + + if (GET_CODE (src) == CONST_INT) + { + if (src == const0_rtx) /* mov r,L */ + return AS1 (clr,%0); + else if (src == const1_rtx) + { + *l = 2; + return (AS1 (clr,%0) CR_TAB + AS1 (inc,%0)); + } + else if (src == constm1_rtx) + { + /* Immediate constants -1 to any register */ + *l = 2; + return (AS1 (clr,%0) CR_TAB + AS1 (dec,%0)); + } + else + { + int bit_nr = exact_log2 (INTVAL (src)); + + if (bit_nr >= 0) + { + *l = 3; + if (!real_l) + output_asm_insn ((AS1 (clr,%0) CR_TAB + "set"), operands); + if (!real_l) + avr_output_bld (operands, bit_nr); + + return ""; + } + } + } + + /* Last resort, larger than loading from memory. */ + *l = 4; + return (AS2 (mov,__tmp_reg__,r31) CR_TAB + AS2 (ldi,r31,lo8(%1)) CR_TAB + AS2 (mov,%0,r31) CR_TAB + AS2 (mov,r31,__tmp_reg__)); + } + else if (GET_CODE (src) == MEM) + return out_movqi_r_mr (insn, operands, real_l); /* mov r,m */ + } + else if (GET_CODE (dest) == MEM) + { + const char *templ; + + if (src == const0_rtx) + operands[1] = zero_reg_rtx; + + templ = out_movqi_mr_r (insn, operands, real_l); + + if (!real_l) + output_asm_insn (templ, operands); + + operands[1] = src; + } + return ""; +} + + +const char * +output_movhi (rtx insn, rtx operands[], int *l) +{ + int dummy; + rtx dest = operands[0]; + rtx src = operands[1]; + int *real_l = l; + + if (!l) + l = &dummy; + + if (register_operand (dest, HImode)) + { + if (register_operand (src, HImode)) /* mov r,r */ + { + if (test_hard_reg_class (STACK_REG, dest)) + { + if (AVR_HAVE_8BIT_SP) + return *l = 1, AS2 (out,__SP_L__,%A1); + /* Use simple load of stack pointer if no interrupts are + used. */ + else if (TARGET_NO_INTERRUPTS) + return *l = 2, (AS2 (out,__SP_H__,%B1) CR_TAB + AS2 (out,__SP_L__,%A1)); + *l = 5; + return (AS2 (in,__tmp_reg__,__SREG__) CR_TAB + "cli" CR_TAB + AS2 (out,__SP_H__,%B1) CR_TAB + AS2 (out,__SREG__,__tmp_reg__) CR_TAB + AS2 (out,__SP_L__,%A1)); + } + else if (test_hard_reg_class (STACK_REG, src)) + { + *l = 2; + return (AS2 (in,%A0,__SP_L__) CR_TAB + AS2 (in,%B0,__SP_H__)); + } + + if (AVR_HAVE_MOVW) + { + *l = 1; + return (AS2 (movw,%0,%1)); + } + else + { + *l = 2; + return (AS2 (mov,%A0,%A1) CR_TAB + AS2 (mov,%B0,%B1)); + } + } + else if (CONSTANT_P (src)) + { + if (test_hard_reg_class (LD_REGS, dest)) /* ldi d,i */ + { + *l = 2; + return (AS2 (ldi,%A0,lo8(%1)) CR_TAB + AS2 (ldi,%B0,hi8(%1))); + } + + if (GET_CODE (src) == CONST_INT) + { + if (src == const0_rtx) /* mov r,L */ + { + *l = 2; + return (AS1 (clr,%A0) CR_TAB + AS1 (clr,%B0)); + } + else if (src == const1_rtx) + { + *l = 3; + return (AS1 (clr,%A0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (inc,%A0)); + } + else if (src == constm1_rtx) + { + /* Immediate constants -1 to any register */ + *l = 3; + return (AS1 (clr,%0) CR_TAB + AS1 (dec,%A0) CR_TAB + AS2 (mov,%B0,%A0)); + } + else + { + int bit_nr = exact_log2 (INTVAL (src)); + + if (bit_nr >= 0) + { + *l = 4; + if (!real_l) + output_asm_insn ((AS1 (clr,%A0) CR_TAB + AS1 (clr,%B0) CR_TAB + "set"), operands); + if (!real_l) + avr_output_bld (operands, bit_nr); + + return ""; + } + } + + if ((INTVAL (src) & 0xff) == 0) + { + *l = 5; + return (AS2 (mov,__tmp_reg__,r31) CR_TAB + AS1 (clr,%A0) CR_TAB + AS2 (ldi,r31,hi8(%1)) CR_TAB + AS2 (mov,%B0,r31) CR_TAB + AS2 (mov,r31,__tmp_reg__)); + } + else if ((INTVAL (src) & 0xff00) == 0) + { + *l = 5; + return (AS2 (mov,__tmp_reg__,r31) CR_TAB + AS2 (ldi,r31,lo8(%1)) CR_TAB + AS2 (mov,%A0,r31) CR_TAB + AS1 (clr,%B0) CR_TAB + AS2 (mov,r31,__tmp_reg__)); + } + } + + /* Last resort, equal to loading from memory. */ + *l = 6; + return (AS2 (mov,__tmp_reg__,r31) CR_TAB + AS2 (ldi,r31,lo8(%1)) CR_TAB + AS2 (mov,%A0,r31) CR_TAB + AS2 (ldi,r31,hi8(%1)) CR_TAB + AS2 (mov,%B0,r31) CR_TAB + AS2 (mov,r31,__tmp_reg__)); + } + else if (GET_CODE (src) == MEM) + return out_movhi_r_mr (insn, operands, real_l); /* mov r,m */ + } + else if (GET_CODE (dest) == MEM) + { + const char *templ; + + if (src == const0_rtx) + operands[1] = zero_reg_rtx; + + templ = out_movhi_mr_r (insn, operands, real_l); + + if (!real_l) + output_asm_insn (templ, operands); + + operands[1] = src; + return ""; + } + fatal_insn ("invalid insn:", insn); + return ""; +} + +const char * +out_movqi_r_mr (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx x = XEXP (src, 0); + int dummy; + + if (!l) + l = &dummy; + + if (CONSTANT_ADDRESS_P (x)) + { + if (CONST_INT_P (x) && INTVAL (x) == SREG_ADDR) + { + *l = 1; + return AS2 (in,%0,__SREG__); + } + if (optimize > 0 && io_address_operand (x, QImode)) + { + *l = 1; + return AS2 (in,%0,%m1-0x20); + } + *l = 2; + return AS2 (lds,%0,%m1); + } + /* memory access by reg+disp */ + else if (GET_CODE (x) == PLUS + && REG_P (XEXP (x,0)) + && GET_CODE (XEXP (x,1)) == CONST_INT) + { + if ((INTVAL (XEXP (x,1)) - GET_MODE_SIZE (GET_MODE (src))) >= 63) + { + int disp = INTVAL (XEXP (x,1)); + if (REGNO (XEXP (x,0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))) + return *l = 3, (AS2 (adiw,r28,%o1-63) CR_TAB + AS2 (ldd,%0,Y+63) CR_TAB + AS2 (sbiw,r28,%o1-63)); + + return *l = 5, (AS2 (subi,r28,lo8(-%o1)) CR_TAB + AS2 (sbci,r29,hi8(-%o1)) CR_TAB + AS2 (ld,%0,Y) CR_TAB + AS2 (subi,r28,lo8(%o1)) CR_TAB + AS2 (sbci,r29,hi8(%o1))); + } + else if (REGNO (XEXP (x,0)) == REG_X) + { + /* This is a paranoid case LEGITIMIZE_RELOAD_ADDRESS must exclude + it but I have this situation with extremal optimizing options. */ + if (reg_overlap_mentioned_p (dest, XEXP (x,0)) + || reg_unused_after (insn, XEXP (x,0))) + return *l = 2, (AS2 (adiw,r26,%o1) CR_TAB + AS2 (ld,%0,X)); + + return *l = 3, (AS2 (adiw,r26,%o1) CR_TAB + AS2 (ld,%0,X) CR_TAB + AS2 (sbiw,r26,%o1)); + } + *l = 1; + return AS2 (ldd,%0,%1); + } + *l = 1; + return AS2 (ld,%0,%1); +} + +const char * +out_movhi_r_mr (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (src, 0); + int reg_dest = true_regnum (dest); + int reg_base = true_regnum (base); + /* "volatile" forces reading low byte first, even if less efficient, + for correct operation with 16-bit I/O registers. */ + int mem_volatile_p = MEM_VOLATILE_P (src); + int tmp; + + if (!l) + l = &tmp; + + if (reg_base > 0) + { + if (reg_dest == reg_base) /* R = (R) */ + { + *l = 3; + return (AS2 (ld,__tmp_reg__,%1+) CR_TAB + AS2 (ld,%B0,%1) CR_TAB + AS2 (mov,%A0,__tmp_reg__)); + } + else if (reg_base == REG_X) /* (R26) */ + { + if (reg_unused_after (insn, base)) + { + *l = 2; + return (AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X)); + } + *l = 3; + return (AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X) CR_TAB + AS2 (sbiw,r26,1)); + } + else /* (R) */ + { + *l = 2; + return (AS2 (ld,%A0,%1) CR_TAB + AS2 (ldd,%B0,%1+1)); + } + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + int reg_base = true_regnum (XEXP (base, 0)); + + if (disp > MAX_LD_OFFSET (GET_MODE (src))) + { + if (REGNO (XEXP (base, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))) + return *l = 4, (AS2 (adiw,r28,%o1-62) CR_TAB + AS2 (ldd,%A0,Y+62) CR_TAB + AS2 (ldd,%B0,Y+63) CR_TAB + AS2 (sbiw,r28,%o1-62)); + + return *l = 6, (AS2 (subi,r28,lo8(-%o1)) CR_TAB + AS2 (sbci,r29,hi8(-%o1)) CR_TAB + AS2 (ld,%A0,Y) CR_TAB + AS2 (ldd,%B0,Y+1) CR_TAB + AS2 (subi,r28,lo8(%o1)) CR_TAB + AS2 (sbci,r29,hi8(%o1))); + } + if (reg_base == REG_X) + { + /* This is a paranoid case. LEGITIMIZE_RELOAD_ADDRESS must exclude + it but I have this situation with extremal + optimization options. */ + + *l = 4; + if (reg_base == reg_dest) + return (AS2 (adiw,r26,%o1) CR_TAB + AS2 (ld,__tmp_reg__,X+) CR_TAB + AS2 (ld,%B0,X) CR_TAB + AS2 (mov,%A0,__tmp_reg__)); + + return (AS2 (adiw,r26,%o1) CR_TAB + AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X) CR_TAB + AS2 (sbiw,r26,%o1+1)); + } + + if (reg_base == reg_dest) + { + *l = 3; + return (AS2 (ldd,__tmp_reg__,%A1) CR_TAB + AS2 (ldd,%B0,%B1) CR_TAB + AS2 (mov,%A0,__tmp_reg__)); + } + + *l = 2; + return (AS2 (ldd,%A0,%A1) CR_TAB + AS2 (ldd,%B0,%B1)); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + { + if (reg_overlap_mentioned_p (dest, XEXP (base, 0))) + fatal_insn ("incorrect insn:", insn); + + if (mem_volatile_p) + { + if (REGNO (XEXP (base, 0)) == REG_X) + { + *l = 4; + return (AS2 (sbiw,r26,2) CR_TAB + AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X) CR_TAB + AS2 (sbiw,r26,1)); + } + else + { + *l = 3; + return (AS2 (sbiw,%r1,2) CR_TAB + AS2 (ld,%A0,%p1) CR_TAB + AS2 (ldd,%B0,%p1+1)); + } + } + + *l = 2; + return (AS2 (ld,%B0,%1) CR_TAB + AS2 (ld,%A0,%1)); + } + else if (GET_CODE (base) == POST_INC) /* (R++) */ + { + if (reg_overlap_mentioned_p (dest, XEXP (base, 0))) + fatal_insn ("incorrect insn:", insn); + + *l = 2; + return (AS2 (ld,%A0,%1) CR_TAB + AS2 (ld,%B0,%1)); + } + else if (CONSTANT_ADDRESS_P (base)) + { + if (optimize > 0 && io_address_operand (base, HImode)) + { + *l = 2; + return (AS2 (in,%A0,%m1-0x20) CR_TAB + AS2 (in,%B0,%m1+1-0x20)); + } + *l = 4; + return (AS2 (lds,%A0,%m1) CR_TAB + AS2 (lds,%B0,%m1+1)); + } + + fatal_insn ("unknown move insn:",insn); + return ""; +} + +const char * +out_movsi_r_mr (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (src, 0); + int reg_dest = true_regnum (dest); + int reg_base = true_regnum (base); + int tmp; + + if (!l) + l = &tmp; + + if (reg_base > 0) + { + if (reg_base == REG_X) /* (R26) */ + { + if (reg_dest == REG_X) + /* "ld r26,-X" is undefined */ + return *l=7, (AS2 (adiw,r26,3) CR_TAB + AS2 (ld,r29,X) CR_TAB + AS2 (ld,r28,-X) CR_TAB + AS2 (ld,__tmp_reg__,-X) CR_TAB + AS2 (sbiw,r26,1) CR_TAB + AS2 (ld,r26,X) CR_TAB + AS2 (mov,r27,__tmp_reg__)); + else if (reg_dest == REG_X - 2) + return *l=5, (AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X+) CR_TAB + AS2 (ld,__tmp_reg__,X+) CR_TAB + AS2 (ld,%D0,X) CR_TAB + AS2 (mov,%C0,__tmp_reg__)); + else if (reg_unused_after (insn, base)) + return *l=4, (AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X+) CR_TAB + AS2 (ld,%C0,X+) CR_TAB + AS2 (ld,%D0,X)); + else + return *l=5, (AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X+) CR_TAB + AS2 (ld,%C0,X+) CR_TAB + AS2 (ld,%D0,X) CR_TAB + AS2 (sbiw,r26,3)); + } + else + { + if (reg_dest == reg_base) + return *l=5, (AS2 (ldd,%D0,%1+3) CR_TAB + AS2 (ldd,%C0,%1+2) CR_TAB + AS2 (ldd,__tmp_reg__,%1+1) CR_TAB + AS2 (ld,%A0,%1) CR_TAB + AS2 (mov,%B0,__tmp_reg__)); + else if (reg_base == reg_dest + 2) + return *l=5, (AS2 (ld ,%A0,%1) CR_TAB + AS2 (ldd,%B0,%1+1) CR_TAB + AS2 (ldd,__tmp_reg__,%1+2) CR_TAB + AS2 (ldd,%D0,%1+3) CR_TAB + AS2 (mov,%C0,__tmp_reg__)); + else + return *l=4, (AS2 (ld ,%A0,%1) CR_TAB + AS2 (ldd,%B0,%1+1) CR_TAB + AS2 (ldd,%C0,%1+2) CR_TAB + AS2 (ldd,%D0,%1+3)); + } + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + + if (disp > MAX_LD_OFFSET (GET_MODE (src))) + { + if (REGNO (XEXP (base, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))) + return *l = 6, (AS2 (adiw,r28,%o1-60) CR_TAB + AS2 (ldd,%A0,Y+60) CR_TAB + AS2 (ldd,%B0,Y+61) CR_TAB + AS2 (ldd,%C0,Y+62) CR_TAB + AS2 (ldd,%D0,Y+63) CR_TAB + AS2 (sbiw,r28,%o1-60)); + + return *l = 8, (AS2 (subi,r28,lo8(-%o1)) CR_TAB + AS2 (sbci,r29,hi8(-%o1)) CR_TAB + AS2 (ld,%A0,Y) CR_TAB + AS2 (ldd,%B0,Y+1) CR_TAB + AS2 (ldd,%C0,Y+2) CR_TAB + AS2 (ldd,%D0,Y+3) CR_TAB + AS2 (subi,r28,lo8(%o1)) CR_TAB + AS2 (sbci,r29,hi8(%o1))); + } + + reg_base = true_regnum (XEXP (base, 0)); + if (reg_base == REG_X) + { + /* R = (X + d) */ + if (reg_dest == REG_X) + { + *l = 7; + /* "ld r26,-X" is undefined */ + return (AS2 (adiw,r26,%o1+3) CR_TAB + AS2 (ld,r29,X) CR_TAB + AS2 (ld,r28,-X) CR_TAB + AS2 (ld,__tmp_reg__,-X) CR_TAB + AS2 (sbiw,r26,1) CR_TAB + AS2 (ld,r26,X) CR_TAB + AS2 (mov,r27,__tmp_reg__)); + } + *l = 6; + if (reg_dest == REG_X - 2) + return (AS2 (adiw,r26,%o1) CR_TAB + AS2 (ld,r24,X+) CR_TAB + AS2 (ld,r25,X+) CR_TAB + AS2 (ld,__tmp_reg__,X+) CR_TAB + AS2 (ld,r27,X) CR_TAB + AS2 (mov,r26,__tmp_reg__)); + + return (AS2 (adiw,r26,%o1) CR_TAB + AS2 (ld,%A0,X+) CR_TAB + AS2 (ld,%B0,X+) CR_TAB + AS2 (ld,%C0,X+) CR_TAB + AS2 (ld,%D0,X) CR_TAB + AS2 (sbiw,r26,%o1+3)); + } + if (reg_dest == reg_base) + return *l=5, (AS2 (ldd,%D0,%D1) CR_TAB + AS2 (ldd,%C0,%C1) CR_TAB + AS2 (ldd,__tmp_reg__,%B1) CR_TAB + AS2 (ldd,%A0,%A1) CR_TAB + AS2 (mov,%B0,__tmp_reg__)); + else if (reg_dest == reg_base - 2) + return *l=5, (AS2 (ldd,%A0,%A1) CR_TAB + AS2 (ldd,%B0,%B1) CR_TAB + AS2 (ldd,__tmp_reg__,%C1) CR_TAB + AS2 (ldd,%D0,%D1) CR_TAB + AS2 (mov,%C0,__tmp_reg__)); + return *l=4, (AS2 (ldd,%A0,%A1) CR_TAB + AS2 (ldd,%B0,%B1) CR_TAB + AS2 (ldd,%C0,%C1) CR_TAB + AS2 (ldd,%D0,%D1)); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return *l=4, (AS2 (ld,%D0,%1) CR_TAB + AS2 (ld,%C0,%1) CR_TAB + AS2 (ld,%B0,%1) CR_TAB + AS2 (ld,%A0,%1)); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + return *l=4, (AS2 (ld,%A0,%1) CR_TAB + AS2 (ld,%B0,%1) CR_TAB + AS2 (ld,%C0,%1) CR_TAB + AS2 (ld,%D0,%1)); + else if (CONSTANT_ADDRESS_P (base)) + return *l=8, (AS2 (lds,%A0,%m1) CR_TAB + AS2 (lds,%B0,%m1+1) CR_TAB + AS2 (lds,%C0,%m1+2) CR_TAB + AS2 (lds,%D0,%m1+3)); + + fatal_insn ("unknown move insn:",insn); + return ""; +} + +const char * +out_movsi_mr_r (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (dest, 0); + int reg_base = true_regnum (base); + int reg_src = true_regnum (src); + int tmp; + + if (!l) + l = &tmp; + + if (CONSTANT_ADDRESS_P (base)) + return *l=8,(AS2 (sts,%m0,%A1) CR_TAB + AS2 (sts,%m0+1,%B1) CR_TAB + AS2 (sts,%m0+2,%C1) CR_TAB + AS2 (sts,%m0+3,%D1)); + if (reg_base > 0) /* (r) */ + { + if (reg_base == REG_X) /* (R26) */ + { + if (reg_src == REG_X) + { + /* "st X+,r26" is undefined */ + if (reg_unused_after (insn, base)) + return *l=6, (AS2 (mov,__tmp_reg__,r27) CR_TAB + AS2 (st,X,r26) CR_TAB + AS2 (adiw,r26,1) CR_TAB + AS2 (st,X+,__tmp_reg__) CR_TAB + AS2 (st,X+,r28) CR_TAB + AS2 (st,X,r29)); + else + return *l=7, (AS2 (mov,__tmp_reg__,r27) CR_TAB + AS2 (st,X,r26) CR_TAB + AS2 (adiw,r26,1) CR_TAB + AS2 (st,X+,__tmp_reg__) CR_TAB + AS2 (st,X+,r28) CR_TAB + AS2 (st,X,r29) CR_TAB + AS2 (sbiw,r26,3)); + } + else if (reg_base == reg_src + 2) + { + if (reg_unused_after (insn, base)) + return *l=7, (AS2 (mov,__zero_reg__,%C1) CR_TAB + AS2 (mov,__tmp_reg__,%D1) CR_TAB + AS2 (st,%0+,%A1) CR_TAB + AS2 (st,%0+,%B1) CR_TAB + AS2 (st,%0+,__zero_reg__) CR_TAB + AS2 (st,%0,__tmp_reg__) CR_TAB + AS1 (clr,__zero_reg__)); + else + return *l=8, (AS2 (mov,__zero_reg__,%C1) CR_TAB + AS2 (mov,__tmp_reg__,%D1) CR_TAB + AS2 (st,%0+,%A1) CR_TAB + AS2 (st,%0+,%B1) CR_TAB + AS2 (st,%0+,__zero_reg__) CR_TAB + AS2 (st,%0,__tmp_reg__) CR_TAB + AS1 (clr,__zero_reg__) CR_TAB + AS2 (sbiw,r26,3)); + } + return *l=5, (AS2 (st,%0+,%A1) CR_TAB + AS2 (st,%0+,%B1) CR_TAB + AS2 (st,%0+,%C1) CR_TAB + AS2 (st,%0,%D1) CR_TAB + AS2 (sbiw,r26,3)); + } + else + return *l=4, (AS2 (st,%0,%A1) CR_TAB + AS2 (std,%0+1,%B1) CR_TAB + AS2 (std,%0+2,%C1) CR_TAB + AS2 (std,%0+3,%D1)); + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + reg_base = REGNO (XEXP (base, 0)); + if (disp > MAX_LD_OFFSET (GET_MODE (dest))) + { + if (reg_base != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))) + return *l = 6, (AS2 (adiw,r28,%o0-60) CR_TAB + AS2 (std,Y+60,%A1) CR_TAB + AS2 (std,Y+61,%B1) CR_TAB + AS2 (std,Y+62,%C1) CR_TAB + AS2 (std,Y+63,%D1) CR_TAB + AS2 (sbiw,r28,%o0-60)); + + return *l = 8, (AS2 (subi,r28,lo8(-%o0)) CR_TAB + AS2 (sbci,r29,hi8(-%o0)) CR_TAB + AS2 (st,Y,%A1) CR_TAB + AS2 (std,Y+1,%B1) CR_TAB + AS2 (std,Y+2,%C1) CR_TAB + AS2 (std,Y+3,%D1) CR_TAB + AS2 (subi,r28,lo8(%o0)) CR_TAB + AS2 (sbci,r29,hi8(%o0))); + } + if (reg_base == REG_X) + { + /* (X + d) = R */ + if (reg_src == REG_X) + { + *l = 9; + return (AS2 (mov,__tmp_reg__,r26) CR_TAB + AS2 (mov,__zero_reg__,r27) CR_TAB + AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X+,__tmp_reg__) CR_TAB + AS2 (st,X+,__zero_reg__) CR_TAB + AS2 (st,X+,r28) CR_TAB + AS2 (st,X,r29) CR_TAB + AS1 (clr,__zero_reg__) CR_TAB + AS2 (sbiw,r26,%o0+3)); + } + else if (reg_src == REG_X - 2) + { + *l = 9; + return (AS2 (mov,__tmp_reg__,r26) CR_TAB + AS2 (mov,__zero_reg__,r27) CR_TAB + AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X+,r24) CR_TAB + AS2 (st,X+,r25) CR_TAB + AS2 (st,X+,__tmp_reg__) CR_TAB + AS2 (st,X,__zero_reg__) CR_TAB + AS1 (clr,__zero_reg__) CR_TAB + AS2 (sbiw,r26,%o0+3)); + } + *l = 6; + return (AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X+,%A1) CR_TAB + AS2 (st,X+,%B1) CR_TAB + AS2 (st,X+,%C1) CR_TAB + AS2 (st,X,%D1) CR_TAB + AS2 (sbiw,r26,%o0+3)); + } + return *l=4, (AS2 (std,%A0,%A1) CR_TAB + AS2 (std,%B0,%B1) CR_TAB + AS2 (std,%C0,%C1) CR_TAB + AS2 (std,%D0,%D1)); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return *l=4, (AS2 (st,%0,%D1) CR_TAB + AS2 (st,%0,%C1) CR_TAB + AS2 (st,%0,%B1) CR_TAB + AS2 (st,%0,%A1)); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + return *l=4, (AS2 (st,%0,%A1) CR_TAB + AS2 (st,%0,%B1) CR_TAB + AS2 (st,%0,%C1) CR_TAB + AS2 (st,%0,%D1)); + fatal_insn ("unknown move insn:",insn); + return ""; +} + +const char * +output_movsisf(rtx insn, rtx operands[], int *l) +{ + int dummy; + rtx dest = operands[0]; + rtx src = operands[1]; + int *real_l = l; + + if (!l) + l = &dummy; + + if (register_operand (dest, VOIDmode)) + { + if (register_operand (src, VOIDmode)) /* mov r,r */ + { + if (true_regnum (dest) > true_regnum (src)) + { + if (AVR_HAVE_MOVW) + { + *l = 2; + return (AS2 (movw,%C0,%C1) CR_TAB + AS2 (movw,%A0,%A1)); + } + *l = 4; + return (AS2 (mov,%D0,%D1) CR_TAB + AS2 (mov,%C0,%C1) CR_TAB + AS2 (mov,%B0,%B1) CR_TAB + AS2 (mov,%A0,%A1)); + } + else + { + if (AVR_HAVE_MOVW) + { + *l = 2; + return (AS2 (movw,%A0,%A1) CR_TAB + AS2 (movw,%C0,%C1)); + } + *l = 4; + return (AS2 (mov,%A0,%A1) CR_TAB + AS2 (mov,%B0,%B1) CR_TAB + AS2 (mov,%C0,%C1) CR_TAB + AS2 (mov,%D0,%D1)); + } + } + else if (CONSTANT_P (src)) + { + if (test_hard_reg_class (LD_REGS, dest)) /* ldi d,i */ + { + *l = 4; + return (AS2 (ldi,%A0,lo8(%1)) CR_TAB + AS2 (ldi,%B0,hi8(%1)) CR_TAB + AS2 (ldi,%C0,hlo8(%1)) CR_TAB + AS2 (ldi,%D0,hhi8(%1))); + } + + if (GET_CODE (src) == CONST_INT) + { + const char *const clr_op0 = + AVR_HAVE_MOVW ? (AS1 (clr,%A0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS2 (movw,%C0,%A0)) + : (AS1 (clr,%A0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%D0)); + + if (src == const0_rtx) /* mov r,L */ + { + *l = AVR_HAVE_MOVW ? 3 : 4; + return clr_op0; + } + else if (src == const1_rtx) + { + if (!real_l) + output_asm_insn (clr_op0, operands); + *l = AVR_HAVE_MOVW ? 4 : 5; + return AS1 (inc,%A0); + } + else if (src == constm1_rtx) + { + /* Immediate constants -1 to any register */ + if (AVR_HAVE_MOVW) + { + *l = 4; + return (AS1 (clr,%A0) CR_TAB + AS1 (dec,%A0) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS2 (movw,%C0,%A0)); + } + *l = 5; + return (AS1 (clr,%A0) CR_TAB + AS1 (dec,%A0) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS2 (mov,%C0,%A0) CR_TAB + AS2 (mov,%D0,%A0)); + } + else + { + int bit_nr = exact_log2 (INTVAL (src)); + + if (bit_nr >= 0) + { + *l = AVR_HAVE_MOVW ? 5 : 6; + if (!real_l) + { + output_asm_insn (clr_op0, operands); + output_asm_insn ("set", operands); + } + if (!real_l) + avr_output_bld (operands, bit_nr); + + return ""; + } + } + } + + /* Last resort, better than loading from memory. */ + *l = 10; + return (AS2 (mov,__tmp_reg__,r31) CR_TAB + AS2 (ldi,r31,lo8(%1)) CR_TAB + AS2 (mov,%A0,r31) CR_TAB + AS2 (ldi,r31,hi8(%1)) CR_TAB + AS2 (mov,%B0,r31) CR_TAB + AS2 (ldi,r31,hlo8(%1)) CR_TAB + AS2 (mov,%C0,r31) CR_TAB + AS2 (ldi,r31,hhi8(%1)) CR_TAB + AS2 (mov,%D0,r31) CR_TAB + AS2 (mov,r31,__tmp_reg__)); + } + else if (GET_CODE (src) == MEM) + return out_movsi_r_mr (insn, operands, real_l); /* mov r,m */ + } + else if (GET_CODE (dest) == MEM) + { + const char *templ; + + if (src == const0_rtx) + operands[1] = zero_reg_rtx; + + templ = out_movsi_mr_r (insn, operands, real_l); + + if (!real_l) + output_asm_insn (templ, operands); + + operands[1] = src; + return ""; + } + fatal_insn ("invalid insn:", insn); + return ""; +} + +const char * +out_movqi_mr_r (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx x = XEXP (dest, 0); + int dummy; + + if (!l) + l = &dummy; + + if (CONSTANT_ADDRESS_P (x)) + { + if (CONST_INT_P (x) && INTVAL (x) == SREG_ADDR) + { + *l = 1; + return AS2 (out,__SREG__,%1); + } + if (optimize > 0 && io_address_operand (x, QImode)) + { + *l = 1; + return AS2 (out,%m0-0x20,%1); + } + *l = 2; + return AS2 (sts,%m0,%1); + } + /* memory access by reg+disp */ + else if (GET_CODE (x) == PLUS + && REG_P (XEXP (x,0)) + && GET_CODE (XEXP (x,1)) == CONST_INT) + { + if ((INTVAL (XEXP (x,1)) - GET_MODE_SIZE (GET_MODE (dest))) >= 63) + { + int disp = INTVAL (XEXP (x,1)); + if (REGNO (XEXP (x,0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))) + return *l = 3, (AS2 (adiw,r28,%o0-63) CR_TAB + AS2 (std,Y+63,%1) CR_TAB + AS2 (sbiw,r28,%o0-63)); + + return *l = 5, (AS2 (subi,r28,lo8(-%o0)) CR_TAB + AS2 (sbci,r29,hi8(-%o0)) CR_TAB + AS2 (st,Y,%1) CR_TAB + AS2 (subi,r28,lo8(%o0)) CR_TAB + AS2 (sbci,r29,hi8(%o0))); + } + else if (REGNO (XEXP (x,0)) == REG_X) + { + if (reg_overlap_mentioned_p (src, XEXP (x, 0))) + { + if (reg_unused_after (insn, XEXP (x,0))) + return *l = 3, (AS2 (mov,__tmp_reg__,%1) CR_TAB + AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X,__tmp_reg__)); + + return *l = 4, (AS2 (mov,__tmp_reg__,%1) CR_TAB + AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X,__tmp_reg__) CR_TAB + AS2 (sbiw,r26,%o0)); + } + else + { + if (reg_unused_after (insn, XEXP (x,0))) + return *l = 2, (AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X,%1)); + + return *l = 3, (AS2 (adiw,r26,%o0) CR_TAB + AS2 (st,X,%1) CR_TAB + AS2 (sbiw,r26,%o0)); + } + } + *l = 1; + return AS2 (std,%0,%1); + } + *l = 1; + return AS2 (st,%0,%1); +} + +const char * +out_movhi_mr_r (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (dest, 0); + int reg_base = true_regnum (base); + int reg_src = true_regnum (src); + /* "volatile" forces writing high byte first, even if less efficient, + for correct operation with 16-bit I/O registers. */ + int mem_volatile_p = MEM_VOLATILE_P (dest); + int tmp; + + if (!l) + l = &tmp; + if (CONSTANT_ADDRESS_P (base)) + { + if (optimize > 0 && io_address_operand (base, HImode)) + { + *l = 2; + return (AS2 (out,%m0+1-0x20,%B1) CR_TAB + AS2 (out,%m0-0x20,%A1)); + } + return *l = 4, (AS2 (sts,%m0+1,%B1) CR_TAB + AS2 (sts,%m0,%A1)); + } + if (reg_base > 0) + { + if (reg_base == REG_X) + { + if (reg_src == REG_X) + { + /* "st X+,r26" and "st -X,r26" are undefined. */ + if (!mem_volatile_p && reg_unused_after (insn, src)) + return *l=4, (AS2 (mov,__tmp_reg__,r27) CR_TAB + AS2 (st,X,r26) CR_TAB + AS2 (adiw,r26,1) CR_TAB + AS2 (st,X,__tmp_reg__)); + else + return *l=5, (AS2 (mov,__tmp_reg__,r27) CR_TAB + AS2 (adiw,r26,1) CR_TAB + AS2 (st,X,__tmp_reg__) CR_TAB + AS2 (sbiw,r26,1) CR_TAB + AS2 (st,X,r26)); + } + else + { + if (!mem_volatile_p && reg_unused_after (insn, base)) + return *l=2, (AS2 (st,X+,%A1) CR_TAB + AS2 (st,X,%B1)); + else + return *l=3, (AS2 (adiw,r26,1) CR_TAB + AS2 (st,X,%B1) CR_TAB + AS2 (st,-X,%A1)); + } + } + else + return *l=2, (AS2 (std,%0+1,%B1) CR_TAB + AS2 (st,%0,%A1)); + } + else if (GET_CODE (base) == PLUS) + { + int disp = INTVAL (XEXP (base, 1)); + reg_base = REGNO (XEXP (base, 0)); + if (disp > MAX_LD_OFFSET (GET_MODE (dest))) + { + if (reg_base != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))) + return *l = 4, (AS2 (adiw,r28,%o0-62) CR_TAB + AS2 (std,Y+63,%B1) CR_TAB + AS2 (std,Y+62,%A1) CR_TAB + AS2 (sbiw,r28,%o0-62)); + + return *l = 6, (AS2 (subi,r28,lo8(-%o0)) CR_TAB + AS2 (sbci,r29,hi8(-%o0)) CR_TAB + AS2 (std,Y+1,%B1) CR_TAB + AS2 (st,Y,%A1) CR_TAB + AS2 (subi,r28,lo8(%o0)) CR_TAB + AS2 (sbci,r29,hi8(%o0))); + } + if (reg_base == REG_X) + { + /* (X + d) = R */ + if (reg_src == REG_X) + { + *l = 7; + return (AS2 (mov,__tmp_reg__,r26) CR_TAB + AS2 (mov,__zero_reg__,r27) CR_TAB + AS2 (adiw,r26,%o0+1) CR_TAB + AS2 (st,X,__zero_reg__) CR_TAB + AS2 (st,-X,__tmp_reg__) CR_TAB + AS1 (clr,__zero_reg__) CR_TAB + AS2 (sbiw,r26,%o0)); + } + *l = 4; + return (AS2 (adiw,r26,%o0+1) CR_TAB + AS2 (st,X,%B1) CR_TAB + AS2 (st,-X,%A1) CR_TAB + AS2 (sbiw,r26,%o0)); + } + return *l=2, (AS2 (std,%B0,%B1) CR_TAB + AS2 (std,%A0,%A1)); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return *l=2, (AS2 (st,%0,%B1) CR_TAB + AS2 (st,%0,%A1)); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + { + if (mem_volatile_p) + { + if (REGNO (XEXP (base, 0)) == REG_X) + { + *l = 4; + return (AS2 (adiw,r26,1) CR_TAB + AS2 (st,X,%B1) CR_TAB + AS2 (st,-X,%A1) CR_TAB + AS2 (adiw,r26,2)); + } + else + { + *l = 3; + return (AS2 (std,%p0+1,%B1) CR_TAB + AS2 (st,%p0,%A1) CR_TAB + AS2 (adiw,%r0,2)); + } + } + + *l = 2; + return (AS2 (st,%0,%A1) CR_TAB + AS2 (st,%0,%B1)); + } + fatal_insn ("unknown move insn:",insn); + return ""; +} + +/* Return 1 if frame pointer for current function required. */ + +bool +avr_frame_pointer_required_p (void) +{ + return (cfun->calls_alloca + || crtl->args.info.nregs == 0 + || get_frame_size () > 0); +} + +/* Returns the condition of compare insn INSN, or UNKNOWN. */ + +static RTX_CODE +compare_condition (rtx insn) +{ + rtx next = next_real_insn (insn); + RTX_CODE cond = UNKNOWN; + if (next && GET_CODE (next) == JUMP_INSN) + { + rtx pat = PATTERN (next); + rtx src = SET_SRC (pat); + rtx t = XEXP (src, 0); + cond = GET_CODE (t); + } + return cond; +} + +/* Returns nonzero if INSN is a tst insn that only tests the sign. */ + +static int +compare_sign_p (rtx insn) +{ + RTX_CODE cond = compare_condition (insn); + return (cond == GE || cond == LT); +} + +/* Returns nonzero if the next insn is a JUMP_INSN with a condition + that needs to be swapped (GT, GTU, LE, LEU). */ + +int +compare_diff_p (rtx insn) +{ + RTX_CODE cond = compare_condition (insn); + return (cond == GT || cond == GTU || cond == LE || cond == LEU) ? cond : 0; +} + +/* Returns nonzero if INSN is a compare insn with the EQ or NE condition. */ + +int +compare_eq_p (rtx insn) +{ + RTX_CODE cond = compare_condition (insn); + return (cond == EQ || cond == NE); +} + + +/* Output test instruction for HImode. */ + +const char * +out_tsthi (rtx insn, rtx op, int *l) +{ + if (compare_sign_p (insn)) + { + if (l) *l = 1; + return AS1 (tst,%B0); + } + if (reg_unused_after (insn, op) + && compare_eq_p (insn)) + { + /* Faster than sbiw if we can clobber the operand. */ + if (l) *l = 1; + return "or %A0,%B0"; + } + if (test_hard_reg_class (ADDW_REGS, op)) + { + if (l) *l = 1; + return AS2 (sbiw,%0,0); + } + if (l) *l = 2; + return (AS2 (cp,%A0,__zero_reg__) CR_TAB + AS2 (cpc,%B0,__zero_reg__)); +} + + +/* Output test instruction for SImode. */ + +const char * +out_tstsi (rtx insn, rtx op, int *l) +{ + if (compare_sign_p (insn)) + { + if (l) *l = 1; + return AS1 (tst,%D0); + } + if (test_hard_reg_class (ADDW_REGS, op)) + { + if (l) *l = 3; + return (AS2 (sbiw,%A0,0) CR_TAB + AS2 (cpc,%C0,__zero_reg__) CR_TAB + AS2 (cpc,%D0,__zero_reg__)); + } + if (l) *l = 4; + return (AS2 (cp,%A0,__zero_reg__) CR_TAB + AS2 (cpc,%B0,__zero_reg__) CR_TAB + AS2 (cpc,%C0,__zero_reg__) CR_TAB + AS2 (cpc,%D0,__zero_reg__)); +} + + +/* Generate asm equivalent for various shifts. + Shift count is a CONST_INT, MEM or REG. + This only handles cases that are not already + carefully hand-optimized in ?sh??i3_out. */ + +void +out_shift_with_cnt (const char *templ, rtx insn, rtx operands[], + int *len, int t_len) +{ + rtx op[10]; + char str[500]; + int second_label = 1; + int saved_in_tmp = 0; + int use_zero_reg = 0; + + op[0] = operands[0]; + op[1] = operands[1]; + op[2] = operands[2]; + op[3] = operands[3]; + str[0] = 0; + + if (len) + *len = 1; + + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int count = INTVAL (operands[2]); + int max_len = 10; /* If larger than this, always use a loop. */ + + if (count <= 0) + { + if (len) + *len = 0; + return; + } + + if (count < 8 && !scratch) + use_zero_reg = 1; + + if (optimize_size) + max_len = t_len + (scratch ? 3 : (use_zero_reg ? 4 : 5)); + + if (t_len * count <= max_len) + { + /* Output shifts inline with no loop - faster. */ + if (len) + *len = t_len * count; + else + { + while (count-- > 0) + output_asm_insn (templ, op); + } + + return; + } + + if (scratch) + { + if (!len) + strcat (str, AS2 (ldi,%3,%2)); + } + else if (use_zero_reg) + { + /* Hack to save one word: use __zero_reg__ as loop counter. + Set one bit, then shift in a loop until it is 0 again. */ + + op[3] = zero_reg_rtx; + if (len) + *len = 2; + else + strcat (str, ("set" CR_TAB + AS2 (bld,%3,%2-1))); + } + else + { + /* No scratch register available, use one from LD_REGS (saved in + __tmp_reg__) that doesn't overlap with registers to shift. */ + + op[3] = gen_rtx_REG (QImode, + ((true_regnum (operands[0]) - 1) & 15) + 16); + op[4] = tmp_reg_rtx; + saved_in_tmp = 1; + + if (len) + *len = 3; /* Includes "mov %3,%4" after the loop. */ + else + strcat (str, (AS2 (mov,%4,%3) CR_TAB + AS2 (ldi,%3,%2))); + } + + second_label = 0; + } + else if (GET_CODE (operands[2]) == MEM) + { + rtx op_mov[10]; + + op[3] = op_mov[0] = tmp_reg_rtx; + op_mov[1] = op[2]; + + if (len) + out_movqi_r_mr (insn, op_mov, len); + else + output_asm_insn (out_movqi_r_mr (insn, op_mov, NULL), op_mov); + } + else if (register_operand (operands[2], QImode)) + { + if (reg_unused_after (insn, operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + { + op[3] = op[2]; + } + else + { + op[3] = tmp_reg_rtx; + if (!len) + strcat (str, (AS2 (mov,%3,%2) CR_TAB)); + } + } + else + fatal_insn ("bad shift insn:", insn); + + if (second_label) + { + if (len) + ++*len; + else + strcat (str, AS1 (rjmp,2f)); + } + + if (len) + *len += t_len + 2; /* template + dec + brXX */ + else + { + strcat (str, "\n1:\t"); + strcat (str, templ); + strcat (str, second_label ? "\n2:\t" : "\n\t"); + strcat (str, use_zero_reg ? AS1 (lsr,%3) : AS1 (dec,%3)); + strcat (str, CR_TAB); + strcat (str, second_label ? AS1 (brpl,1b) : AS1 (brne,1b)); + if (saved_in_tmp) + strcat (str, (CR_TAB AS2 (mov,%3,%4))); + output_asm_insn (str, op); + } +} + + +/* 8bit shift left ((char)x << i) */ + +const char * +ashlqi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 8) + break; + + *len = 1; + return AS1 (clr,%0); + + case 1: + *len = 1; + return AS1 (lsl,%0); + + case 2: + *len = 2; + return (AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0)); + + case 3: + *len = 3; + return (AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0)); + + case 4: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 2; + return (AS1 (swap,%0) CR_TAB + AS2 (andi,%0,0xf0)); + } + *len = 4; + return (AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0)); + + case 5: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 3; + return (AS1 (swap,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS2 (andi,%0,0xe0)); + } + *len = 5; + return (AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0)); + + case 6: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 4; + return (AS1 (swap,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS2 (andi,%0,0xc0)); + } + *len = 6; + return (AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0) CR_TAB + AS1 (lsl,%0)); + + case 7: + *len = 3; + return (AS1 (ror,%0) CR_TAB + AS1 (clr,%0) CR_TAB + AS1 (ror,%0)); + } + } + else if (CONSTANT_P (operands[2])) + fatal_insn ("internal compiler error. Incorrect shift:", insn); + + out_shift_with_cnt (AS1 (lsl,%0), + insn, operands, len, 1); + return ""; +} + + +/* 16bit shift left ((short)x << i) */ + +const char * +ashlhi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 16) + break; + + *len = 2; + return (AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + + case 4: + if (optimize_size && scratch) + break; /* 5 */ + if (ldi_ok) + { + *len = 6; + return (AS1 (swap,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS2 (andi,%B0,0xf0) CR_TAB + AS2 (eor,%B0,%A0) CR_TAB + AS2 (andi,%A0,0xf0) CR_TAB + AS2 (eor,%B0,%A0)); + } + if (scratch) + { + *len = 7; + return (AS1 (swap,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS2 (ldi,%3,0xf0) CR_TAB + "and %B0,%3" CR_TAB + AS2 (eor,%B0,%A0) CR_TAB + "and %A0,%3" CR_TAB + AS2 (eor,%B0,%A0)); + } + break; /* optimize_size ? 6 : 8 */ + + case 5: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + if (ldi_ok) + { + *len = 8; + return (AS1 (lsl,%A0) CR_TAB + AS1 (rol,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS2 (andi,%B0,0xf0) CR_TAB + AS2 (eor,%B0,%A0) CR_TAB + AS2 (andi,%A0,0xf0) CR_TAB + AS2 (eor,%B0,%A0)); + } + if (scratch) + { + *len = 9; + return (AS1 (lsl,%A0) CR_TAB + AS1 (rol,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS2 (ldi,%3,0xf0) CR_TAB + "and %B0,%3" CR_TAB + AS2 (eor,%B0,%A0) CR_TAB + "and %A0,%3" CR_TAB + AS2 (eor,%B0,%A0)); + } + break; /* 10 */ + + case 6: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + *len = 9; + return (AS1 (clr,__tmp_reg__) CR_TAB + AS1 (lsr,%B0) CR_TAB + AS1 (ror,%A0) CR_TAB + AS1 (ror,__tmp_reg__) CR_TAB + AS1 (lsr,%B0) CR_TAB + AS1 (ror,%A0) CR_TAB + AS1 (ror,__tmp_reg__) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS2 (mov,%A0,__tmp_reg__)); + + case 7: + *len = 5; + return (AS1 (lsr,%B0) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (ror,%B0) CR_TAB + AS1 (ror,%A0)); + + case 8: + return *len = 2, (AS2 (mov,%B0,%A1) CR_TAB + AS1 (clr,%A0)); + + case 9: + *len = 3; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0)); + + case 10: + *len = 4; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0)); + + case 11: + *len = 5; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0)); + + case 12: + if (ldi_ok) + { + *len = 4; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS2 (andi,%B0,0xf0)); + } + if (scratch) + { + *len = 5; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS2 (ldi,%3,0xf0) CR_TAB + "and %B0,%3"); + } + *len = 6; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0)); + + case 13: + if (ldi_ok) + { + *len = 5; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (andi,%B0,0xe0)); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return (AS2 (ldi,%3,0x20) CR_TAB + AS2 (mul,%A0,%3) CR_TAB + AS2 (mov,%B0,r0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size && scratch) + break; /* 5 */ + if (scratch) + { + *len = 6; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (ldi,%3,0xe0) CR_TAB + "and %B0,%3"); + } + if (AVR_HAVE_MUL) + { + *len = 6; + return ("set" CR_TAB + AS2 (bld,r1,5) CR_TAB + AS2 (mul,%A0,r1) CR_TAB + AS2 (mov,%B0,r0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (clr,__zero_reg__)); + } + *len = 7; + return (AS2 (mov,%B0,%A0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (lsl,%B0)); + + case 14: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return (AS2 (ldi,%B0,0x40) CR_TAB + AS2 (mul,%A0,%B0) CR_TAB + AS2 (mov,%B0,r0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return (AS2 (ldi,%3,0x40) CR_TAB + AS2 (mul,%A0,%3) CR_TAB + AS2 (mov,%B0,r0) CR_TAB + AS1 (clr,%A0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size && ldi_ok) + { + *len = 5; + return (AS2 (mov,%B0,%A0) CR_TAB + AS2 (ldi,%A0,6) "\n1:\t" + AS1 (lsl,%B0) CR_TAB + AS1 (dec,%A0) CR_TAB + AS1 (brne,1b)); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 6; + return (AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (ror,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (ror,%B0) CR_TAB + AS1 (clr,%A0)); + + case 15: + *len = 4; + return (AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (ror,%B0) CR_TAB + AS1 (clr,%A0)); + } + len = t; + } + out_shift_with_cnt ((AS1 (lsl,%A0) CR_TAB + AS1 (rol,%B0)), + insn, operands, len, 2); + return ""; +} + + +/* 32bit shift left ((long)x << i) */ + +const char * +ashlsi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 32) + break; + + if (AVR_HAVE_MOVW) + return *len = 3, (AS1 (clr,%D0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS2 (movw,%A0,%C0)); + *len = 4; + return (AS1 (clr,%D0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + *len = 4; + if (reg0 >= reg1) + return (AS2 (mov,%D0,%C1) CR_TAB + AS2 (mov,%C0,%B1) CR_TAB + AS2 (mov,%B0,%A1) CR_TAB + AS1 (clr,%A0)); + else + return (AS1 (clr,%A0) CR_TAB + AS2 (mov,%B0,%A1) CR_TAB + AS2 (mov,%C0,%B1) CR_TAB + AS2 (mov,%D0,%C1)); + } + + case 16: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + if (reg0 + 2 == reg1) + return *len = 2, (AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + if (AVR_HAVE_MOVW) + return *len = 3, (AS2 (movw,%C0,%A1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + else + return *len = 4, (AS2 (mov,%C0,%A1) CR_TAB + AS2 (mov,%D0,%B1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + } + + case 24: + *len = 4; + return (AS2 (mov,%D0,%A1) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + + case 31: + *len = 6; + return (AS1 (clr,%D0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (ror,%D0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + } + len = t; + } + out_shift_with_cnt ((AS1 (lsl,%A0) CR_TAB + AS1 (rol,%B0) CR_TAB + AS1 (rol,%C0) CR_TAB + AS1 (rol,%D0)), + insn, operands, len, 4); + return ""; +} + +/* 8bit arithmetic shift right ((signed char)x >> i) */ + +const char * +ashrqi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + case 1: + *len = 1; + return AS1 (asr,%0); + + case 2: + *len = 2; + return (AS1 (asr,%0) CR_TAB + AS1 (asr,%0)); + + case 3: + *len = 3; + return (AS1 (asr,%0) CR_TAB + AS1 (asr,%0) CR_TAB + AS1 (asr,%0)); + + case 4: + *len = 4; + return (AS1 (asr,%0) CR_TAB + AS1 (asr,%0) CR_TAB + AS1 (asr,%0) CR_TAB + AS1 (asr,%0)); + + case 5: + *len = 5; + return (AS1 (asr,%0) CR_TAB + AS1 (asr,%0) CR_TAB + AS1 (asr,%0) CR_TAB + AS1 (asr,%0) CR_TAB + AS1 (asr,%0)); + + case 6: + *len = 4; + return (AS2 (bst,%0,6) CR_TAB + AS1 (lsl,%0) CR_TAB + AS2 (sbc,%0,%0) CR_TAB + AS2 (bld,%0,0)); + + default: + if (INTVAL (operands[2]) < 8) + break; + + /* fall through */ + + case 7: + *len = 2; + return (AS1 (lsl,%0) CR_TAB + AS2 (sbc,%0,%0)); + } + } + else if (CONSTANT_P (operands[2])) + fatal_insn ("internal compiler error. Incorrect shift:", insn); + + out_shift_with_cnt (AS1 (asr,%0), + insn, operands, len, 1); + return ""; +} + + +/* 16bit arithmetic shift right ((signed short)x >> i) */ + +const char * +ashrhi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + case 4: + case 5: + /* XXX try to optimize this too? */ + break; + + case 6: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + *len = 8; + return (AS2 (mov,__tmp_reg__,%A0) CR_TAB + AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,__tmp_reg__) CR_TAB + AS1 (rol,%A0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (lsl,__tmp_reg__) CR_TAB + AS1 (rol,%A0) CR_TAB + AS1 (rol,%B0)); + + case 7: + *len = 4; + return (AS1 (lsl,%A0) CR_TAB + AS2 (mov,%A0,%B0) CR_TAB + AS1 (rol,%A0) CR_TAB + AS2 (sbc,%B0,%B0)); + + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + + if (reg0 == reg1) + return *len = 3, (AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%B0,%B0)); + else + return *len = 4, (AS2 (mov,%A0,%B1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS2 (sbrc,%A0,7) CR_TAB + AS1 (dec,%B0)); + } + + case 9: + *len = 4; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (asr,%A0)); + + case 10: + *len = 5; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0)); + + case 11: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return (AS2 (ldi,%A0,0x20) CR_TAB + AS2 (muls,%B0,%A0) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 6; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0)); + + case 12: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return (AS2 (ldi,%A0,0x10) CR_TAB + AS2 (muls,%B0,%A0) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 7; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0)); + + case 13: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return (AS2 (ldi,%A0,0x08) CR_TAB + AS2 (muls,%B0,%A0) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size) + break; /* scratch ? 5 : 7 */ + *len = 8; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0) CR_TAB + AS1 (asr,%A0)); + + case 14: + *len = 5; + return (AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%A0,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS1 (rol,%A0)); + + default: + if (INTVAL (operands[2]) < 16) + break; + + /* fall through */ + + case 15: + return *len = 3, (AS1 (lsl,%B0) CR_TAB + AS2 (sbc,%A0,%A0) CR_TAB + AS2 (mov,%B0,%A0)); + } + len = t; + } + out_shift_with_cnt ((AS1 (asr,%B0) CR_TAB + AS1 (ror,%A0)), + insn, operands, len, 2); + return ""; +} + + +/* 32bit arithmetic shift right ((signed long)x >> i) */ + +const char * +ashrsi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + *len=6; + if (reg0 <= reg1) + return (AS2 (mov,%A0,%B1) CR_TAB + AS2 (mov,%B0,%C1) CR_TAB + AS2 (mov,%C0,%D1) CR_TAB + AS1 (clr,%D0) CR_TAB + AS2 (sbrc,%C0,7) CR_TAB + AS1 (dec,%D0)); + else + return (AS1 (clr,%D0) CR_TAB + AS2 (sbrc,%D1,7) CR_TAB + AS1 (dec,%D0) CR_TAB + AS2 (mov,%C0,%D1) CR_TAB + AS2 (mov,%B0,%C1) CR_TAB + AS2 (mov,%A0,%B1)); + } + + case 16: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + + if (reg0 == reg1 + 2) + return *len = 4, (AS1 (clr,%D0) CR_TAB + AS2 (sbrc,%B0,7) CR_TAB + AS1 (com,%D0) CR_TAB + AS2 (mov,%C0,%D0)); + if (AVR_HAVE_MOVW) + return *len = 5, (AS2 (movw,%A0,%C1) CR_TAB + AS1 (clr,%D0) CR_TAB + AS2 (sbrc,%B0,7) CR_TAB + AS1 (com,%D0) CR_TAB + AS2 (mov,%C0,%D0)); + else + return *len = 6, (AS2 (mov,%B0,%D1) CR_TAB + AS2 (mov,%A0,%C1) CR_TAB + AS1 (clr,%D0) CR_TAB + AS2 (sbrc,%B0,7) CR_TAB + AS1 (com,%D0) CR_TAB + AS2 (mov,%C0,%D0)); + } + + case 24: + return *len = 6, (AS2 (mov,%A0,%D1) CR_TAB + AS1 (clr,%D0) CR_TAB + AS2 (sbrc,%A0,7) CR_TAB + AS1 (com,%D0) CR_TAB + AS2 (mov,%B0,%D0) CR_TAB + AS2 (mov,%C0,%D0)); + + default: + if (INTVAL (operands[2]) < 32) + break; + + /* fall through */ + + case 31: + if (AVR_HAVE_MOVW) + return *len = 4, (AS1 (lsl,%D0) CR_TAB + AS2 (sbc,%A0,%A0) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS2 (movw,%C0,%A0)); + else + return *len = 5, (AS1 (lsl,%D0) CR_TAB + AS2 (sbc,%A0,%A0) CR_TAB + AS2 (mov,%B0,%A0) CR_TAB + AS2 (mov,%C0,%A0) CR_TAB + AS2 (mov,%D0,%A0)); + } + len = t; + } + out_shift_with_cnt ((AS1 (asr,%D0) CR_TAB + AS1 (ror,%C0) CR_TAB + AS1 (ror,%B0) CR_TAB + AS1 (ror,%A0)), + insn, operands, len, 4); + return ""; +} + +/* 8bit logic shift right ((unsigned char)x >> i) */ + +const char * +lshrqi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 8) + break; + + *len = 1; + return AS1 (clr,%0); + + case 1: + *len = 1; + return AS1 (lsr,%0); + + case 2: + *len = 2; + return (AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0)); + case 3: + *len = 3; + return (AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0)); + + case 4: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len=2; + return (AS1 (swap,%0) CR_TAB + AS2 (andi,%0,0x0f)); + } + *len = 4; + return (AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0)); + + case 5: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 3; + return (AS1 (swap,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS2 (andi,%0,0x7)); + } + *len = 5; + return (AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0)); + + case 6: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 4; + return (AS1 (swap,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS2 (andi,%0,0x3)); + } + *len = 6; + return (AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0) CR_TAB + AS1 (lsr,%0)); + + case 7: + *len = 3; + return (AS1 (rol,%0) CR_TAB + AS1 (clr,%0) CR_TAB + AS1 (rol,%0)); + } + } + else if (CONSTANT_P (operands[2])) + fatal_insn ("internal compiler error. Incorrect shift:", insn); + + out_shift_with_cnt (AS1 (lsr,%0), + insn, operands, len, 1); + return ""; +} + +/* 16bit logic shift right ((unsigned short)x >> i) */ + +const char * +lshrhi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 16) + break; + + *len = 2; + return (AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + + case 4: + if (optimize_size && scratch) + break; /* 5 */ + if (ldi_ok) + { + *len = 6; + return (AS1 (swap,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS2 (andi,%A0,0x0f) CR_TAB + AS2 (eor,%A0,%B0) CR_TAB + AS2 (andi,%B0,0x0f) CR_TAB + AS2 (eor,%A0,%B0)); + } + if (scratch) + { + *len = 7; + return (AS1 (swap,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS2 (ldi,%3,0x0f) CR_TAB + "and %A0,%3" CR_TAB + AS2 (eor,%A0,%B0) CR_TAB + "and %B0,%3" CR_TAB + AS2 (eor,%A0,%B0)); + } + break; /* optimize_size ? 6 : 8 */ + + case 5: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + if (ldi_ok) + { + *len = 8; + return (AS1 (lsr,%B0) CR_TAB + AS1 (ror,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS2 (andi,%A0,0x0f) CR_TAB + AS2 (eor,%A0,%B0) CR_TAB + AS2 (andi,%B0,0x0f) CR_TAB + AS2 (eor,%A0,%B0)); + } + if (scratch) + { + *len = 9; + return (AS1 (lsr,%B0) CR_TAB + AS1 (ror,%A0) CR_TAB + AS1 (swap,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS2 (ldi,%3,0x0f) CR_TAB + "and %A0,%3" CR_TAB + AS2 (eor,%A0,%B0) CR_TAB + "and %B0,%3" CR_TAB + AS2 (eor,%A0,%B0)); + } + break; /* 10 */ + + case 6: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + *len = 9; + return (AS1 (clr,__tmp_reg__) CR_TAB + AS1 (lsl,%A0) CR_TAB + AS1 (rol,%B0) CR_TAB + AS1 (rol,__tmp_reg__) CR_TAB + AS1 (lsl,%A0) CR_TAB + AS1 (rol,%B0) CR_TAB + AS1 (rol,__tmp_reg__) CR_TAB + AS2 (mov,%A0,%B0) CR_TAB + AS2 (mov,%B0,__tmp_reg__)); + + case 7: + *len = 5; + return (AS1 (lsl,%A0) CR_TAB + AS2 (mov,%A0,%B0) CR_TAB + AS1 (rol,%A0) CR_TAB + AS2 (sbc,%B0,%B0) CR_TAB + AS1 (neg,%B0)); + + case 8: + return *len = 2, (AS2 (mov,%A0,%B1) CR_TAB + AS1 (clr,%B0)); + + case 9: + *len = 3; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0)); + + case 10: + *len = 4; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0)); + + case 11: + *len = 5; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0)); + + case 12: + if (ldi_ok) + { + *len = 4; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS2 (andi,%A0,0x0f)); + } + if (scratch) + { + *len = 5; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS2 (ldi,%3,0x0f) CR_TAB + "and %A0,%3"); + } + *len = 6; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0)); + + case 13: + if (ldi_ok) + { + *len = 5; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS2 (andi,%A0,0x07)); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return (AS2 (ldi,%3,0x08) CR_TAB + AS2 (mul,%B0,%3) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size && scratch) + break; /* 5 */ + if (scratch) + { + *len = 6; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (swap,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS2 (ldi,%3,0x07) CR_TAB + "and %A0,%3"); + } + if (AVR_HAVE_MUL) + { + *len = 6; + return ("set" CR_TAB + AS2 (bld,r1,3) CR_TAB + AS2 (mul,%B0,r1) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + *len = 7; + return (AS2 (mov,%A0,%B0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0) CR_TAB + AS1 (lsr,%A0)); + + case 14: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return (AS2 (ldi,%A0,0x04) CR_TAB + AS2 (mul,%B0,%A0) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return (AS2 (ldi,%3,0x04) CR_TAB + AS2 (mul,%B0,%3) CR_TAB + AS2 (mov,%A0,r1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,__zero_reg__)); + } + if (optimize_size && ldi_ok) + { + *len = 5; + return (AS2 (mov,%A0,%B0) CR_TAB + AS2 (ldi,%B0,6) "\n1:\t" + AS1 (lsr,%A0) CR_TAB + AS1 (dec,%B0) CR_TAB + AS1 (brne,1b)); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 6; + return (AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (rol,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (rol,%A0) CR_TAB + AS1 (clr,%B0)); + + case 15: + *len = 4; + return (AS1 (clr,%A0) CR_TAB + AS1 (lsl,%B0) CR_TAB + AS1 (rol,%A0) CR_TAB + AS1 (clr,%B0)); + } + len = t; + } + out_shift_with_cnt ((AS1 (lsr,%B0) CR_TAB + AS1 (ror,%A0)), + insn, operands, len, 2); + return ""; +} + +/* 32bit logic shift right ((unsigned int)x >> i) */ + +const char * +lshrsi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 32) + break; + + if (AVR_HAVE_MOVW) + return *len = 3, (AS1 (clr,%D0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS2 (movw,%A0,%C0)); + *len = 4; + return (AS1 (clr,%D0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%A0)); + + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + *len = 4; + if (reg0 <= reg1) + return (AS2 (mov,%A0,%B1) CR_TAB + AS2 (mov,%B0,%C1) CR_TAB + AS2 (mov,%C0,%D1) CR_TAB + AS1 (clr,%D0)); + else + return (AS1 (clr,%D0) CR_TAB + AS2 (mov,%C0,%D1) CR_TAB + AS2 (mov,%B0,%C1) CR_TAB + AS2 (mov,%A0,%B1)); + } + + case 16: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + + if (reg0 == reg1 + 2) + return *len = 2, (AS1 (clr,%C0) CR_TAB + AS1 (clr,%D0)); + if (AVR_HAVE_MOVW) + return *len = 3, (AS2 (movw,%A0,%C1) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%D0)); + else + return *len = 4, (AS2 (mov,%B0,%D1) CR_TAB + AS2 (mov,%A0,%C1) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%D0)); + } + + case 24: + return *len = 4, (AS2 (mov,%A0,%D1) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%D0)); + + case 31: + *len = 6; + return (AS1 (clr,%A0) CR_TAB + AS2 (sbrc,%D0,7) CR_TAB + AS1 (inc,%A0) CR_TAB + AS1 (clr,%B0) CR_TAB + AS1 (clr,%C0) CR_TAB + AS1 (clr,%D0)); + } + len = t; + } + out_shift_with_cnt ((AS1 (lsr,%D0) CR_TAB + AS1 (ror,%C0) CR_TAB + AS1 (ror,%B0) CR_TAB + AS1 (ror,%A0)), + insn, operands, len, 4); + return ""; +} + +/* Create RTL split patterns for byte sized rotate expressions. This + produces a series of move instructions and considers overlap situations. + Overlapping non-HImode operands need a scratch register. */ + +bool +avr_rotate_bytes (rtx operands[]) +{ + int i, j; + enum machine_mode mode = GET_MODE (operands[0]); + bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]); + bool same_reg = rtx_equal_p (operands[0], operands[1]); + int num = INTVAL (operands[2]); + rtx scratch = operands[3]; + /* Work out if byte or word move is needed. Odd byte rotates need QImode. + Word move if no scratch is needed, otherwise use size of scratch. */ + enum machine_mode move_mode = QImode; + int move_size, offset, size; + + if (num & 0xf) + move_mode = QImode; + else if ((mode == SImode && !same_reg) || !overlapped) + move_mode = HImode; + else + move_mode = GET_MODE (scratch); + + /* Force DI rotate to use QI moves since other DI moves are currently split + into QI moves so forward propagation works better. */ + if (mode == DImode) + move_mode = QImode; + /* Make scratch smaller if needed. */ + if (SCRATCH != GET_CODE (scratch) + && HImode == GET_MODE (scratch) + && QImode == move_mode) + scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); + + move_size = GET_MODE_SIZE (move_mode); + /* Number of bytes/words to rotate. */ + offset = (num >> 3) / move_size; + /* Number of moves needed. */ + size = GET_MODE_SIZE (mode) / move_size; + /* Himode byte swap is special case to avoid a scratch register. */ + if (mode == HImode && same_reg) + { + /* HImode byte swap, using xor. This is as quick as using scratch. */ + rtx src, dst; + src = simplify_gen_subreg (move_mode, operands[1], mode, 0); + dst = simplify_gen_subreg (move_mode, operands[0], mode, 1); + if (!rtx_equal_p (dst, src)) + { + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + emit_move_insn (src, gen_rtx_XOR (QImode, src, dst)); + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + } + } + else + { +#define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode) */ + /* Create linked list of moves to determine move order. */ + struct { + rtx src, dst; + int links; + } move[MAX_SIZE + 8]; + int blocked, moves; + + gcc_assert (size <= MAX_SIZE); + /* Generate list of subreg moves. */ + for (i = 0; i < size; i++) + { + int from = i; + int to = (from + offset) % size; + move[i].src = simplify_gen_subreg (move_mode, operands[1], + mode, from * move_size); + move[i].dst = simplify_gen_subreg (move_mode, operands[0], + mode, to * move_size); + move[i].links = -1; + } + /* Mark dependence where a dst of one move is the src of another move. + The first move is a conflict as it must wait until second is + performed. We ignore moves to self - we catch this later. */ + if (overlapped) + for (i = 0; i < size; i++) + if (reg_overlap_mentioned_p (move[i].dst, operands[1])) + for (j = 0; j < size; j++) + if (j != i && rtx_equal_p (move[j].src, move[i].dst)) + { + /* The dst of move i is the src of move j. */ + move[i].links = j; + break; + } + + blocked = -1; + moves = 0; + /* Go through move list and perform non-conflicting moves. As each + non-overlapping move is made, it may remove other conflicts + so the process is repeated until no conflicts remain. */ + do + { + blocked = -1; + moves = 0; + /* Emit move where dst is not also a src or we have used that + src already. */ + for (i = 0; i < size; i++) + if (move[i].src != NULL_RTX) + { + if (move[i].links == -1 + || move[move[i].links].src == NULL_RTX) + { + moves++; + /* Ignore NOP moves to self. */ + if (!rtx_equal_p (move[i].dst, move[i].src)) + emit_move_insn (move[i].dst, move[i].src); + + /* Remove conflict from list. */ + move[i].src = NULL_RTX; + } + else + blocked = i; + } + + /* Check for deadlock. This is when no moves occurred and we have + at least one blocked move. */ + if (moves == 0 && blocked != -1) + { + /* Need to use scratch register to break deadlock. + Add move to put dst of blocked move into scratch. + When this move occurs, it will break chain deadlock. + The scratch register is substituted for real move. */ + + gcc_assert (SCRATCH != GET_CODE (scratch)); + + move[size].src = move[blocked].dst; + move[size].dst = scratch; + /* Scratch move is never blocked. */ + move[size].links = -1; + /* Make sure we have valid link. */ + gcc_assert (move[blocked].links != -1); + /* Replace src of blocking move with scratch reg. */ + move[move[blocked].links].src = scratch; + /* Make dependent on scratch move occuring. */ + move[blocked].links = size; + size=size+1; + } + } + while (blocked != -1); + } + return true; +} + +/* Modifies the length assigned to instruction INSN + LEN is the initially computed length of the insn. */ + +int +adjust_insn_length (rtx insn, int len) +{ + rtx patt = PATTERN (insn); + rtx set; + + if (GET_CODE (patt) == SET) + { + rtx op[10]; + op[1] = SET_SRC (patt); + op[0] = SET_DEST (patt); + if (general_operand (op[1], VOIDmode) + && general_operand (op[0], VOIDmode)) + { + switch (GET_MODE (op[0])) + { + case QImode: + output_movqi (insn, op, &len); + break; + case HImode: + output_movhi (insn, op, &len); + break; + case SImode: + case SFmode: + output_movsisf (insn, op, &len); + break; + default: + break; + } + } + else if (op[0] == cc0_rtx && REG_P (op[1])) + { + switch (GET_MODE (op[1])) + { + case HImode: out_tsthi (insn, op[1], &len); break; + case SImode: out_tstsi (insn, op[1], &len); break; + default: break; + } + } + else if (GET_CODE (op[1]) == AND) + { + if (GET_CODE (XEXP (op[1],1)) == CONST_INT) + { + HOST_WIDE_INT mask = INTVAL (XEXP (op[1],1)); + if (GET_MODE (op[1]) == SImode) + len = (((mask & 0xff) != 0xff) + + ((mask & 0xff00) != 0xff00) + + ((mask & 0xff0000L) != 0xff0000L) + + ((mask & 0xff000000L) != 0xff000000L)); + else if (GET_MODE (op[1]) == HImode) + len = (((mask & 0xff) != 0xff) + + ((mask & 0xff00) != 0xff00)); + } + } + else if (GET_CODE (op[1]) == IOR) + { + if (GET_CODE (XEXP (op[1],1)) == CONST_INT) + { + HOST_WIDE_INT mask = INTVAL (XEXP (op[1],1)); + if (GET_MODE (op[1]) == SImode) + len = (((mask & 0xff) != 0) + + ((mask & 0xff00) != 0) + + ((mask & 0xff0000L) != 0) + + ((mask & 0xff000000L) != 0)); + else if (GET_MODE (op[1]) == HImode) + len = (((mask & 0xff) != 0) + + ((mask & 0xff00) != 0)); + } + } + } + set = single_set (insn); + if (set) + { + rtx op[10]; + + op[1] = SET_SRC (set); + op[0] = SET_DEST (set); + + if (GET_CODE (patt) == PARALLEL + && general_operand (op[1], VOIDmode) + && general_operand (op[0], VOIDmode)) + { + if (XVECLEN (patt, 0) == 2) + op[2] = XVECEXP (patt, 0, 1); + + switch (GET_MODE (op[0])) + { + case QImode: + len = 2; + break; + case HImode: + output_reload_inhi (insn, op, &len); + break; + case SImode: + case SFmode: + output_reload_insisf (insn, op, &len); + break; + default: + break; + } + } + else if (GET_CODE (op[1]) == ASHIFT + || GET_CODE (op[1]) == ASHIFTRT + || GET_CODE (op[1]) == LSHIFTRT) + { + rtx ops[10]; + ops[0] = op[0]; + ops[1] = XEXP (op[1],0); + ops[2] = XEXP (op[1],1); + switch (GET_CODE (op[1])) + { + case ASHIFT: + switch (GET_MODE (op[0])) + { + case QImode: ashlqi3_out (insn,ops,&len); break; + case HImode: ashlhi3_out (insn,ops,&len); break; + case SImode: ashlsi3_out (insn,ops,&len); break; + default: break; + } + break; + case ASHIFTRT: + switch (GET_MODE (op[0])) + { + case QImode: ashrqi3_out (insn,ops,&len); break; + case HImode: ashrhi3_out (insn,ops,&len); break; + case SImode: ashrsi3_out (insn,ops,&len); break; + default: break; + } + break; + case LSHIFTRT: + switch (GET_MODE (op[0])) + { + case QImode: lshrqi3_out (insn,ops,&len); break; + case HImode: lshrhi3_out (insn,ops,&len); break; + case SImode: lshrsi3_out (insn,ops,&len); break; + default: break; + } + break; + default: + break; + } + } + } + return len; +} + +/* Return nonzero if register REG dead after INSN. */ + +int +reg_unused_after (rtx insn, rtx reg) +{ + return (dead_or_set_p (insn, reg) + || (REG_P(reg) && _reg_unused_after (insn, reg))); +} + +/* Return nonzero if REG is not used after INSN. + We assume REG is a reload reg, and therefore does + not live past labels. It may live past calls or jumps though. */ + +int +_reg_unused_after (rtx insn, rtx reg) +{ + enum rtx_code code; + rtx set; + + /* If the reg is set by this instruction, then it is safe for our + case. Disregard the case where this is a store to memory, since + we are checking a register used in the store address. */ + set = single_set (insn); + if (set && GET_CODE (SET_DEST (set)) != MEM + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; + + while ((insn = NEXT_INSN (insn))) + { + rtx set; + code = GET_CODE (insn); + +#if 0 + /* If this is a label that existed before reload, then the register + if dead here. However, if this is a label added by reorg, then + the register may still be live here. We can't tell the difference, + so we just ignore labels completely. */ + if (code == CODE_LABEL) + return 1; + /* else */ +#endif + + if (!INSN_P (insn)) + continue; + + if (code == JUMP_INSN) + return 0; + + /* If this is a sequence, we must handle them all at once. + We could have for instance a call that sets the target register, + and an insn in a delay slot that uses the register. In this case, + we must return 0. */ + else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + { + int i; + int retval = 0; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx this_insn = XVECEXP (PATTERN (insn), 0, i); + rtx set = single_set (this_insn); + + if (GET_CODE (this_insn) == CALL_INSN) + code = CALL_INSN; + else if (GET_CODE (this_insn) == JUMP_INSN) + { + if (INSN_ANNULLED_BRANCH_P (this_insn)) + return 0; + code = JUMP_INSN; + } + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + { + if (GET_CODE (SET_DEST (set)) != MEM) + retval = 1; + else + return 0; + } + if (set == 0 + && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) + return 0; + } + if (retval == 1) + return 1; + else if (code == JUMP_INSN) + return 0; + } + + if (code == CALL_INSN) + { + rtx tem; + for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1)) + if (GET_CODE (XEXP (tem, 0)) == USE + && REG_P (XEXP (XEXP (tem, 0), 0)) + && reg_overlap_mentioned_p (reg, XEXP (XEXP (tem, 0), 0))) + return 0; + if (call_used_regs[REGNO (reg)]) + return 1; + } + + set = single_set (insn); + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return GET_CODE (SET_DEST (set)) != MEM; + if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return 0; + } + return 1; +} + +/* Target hook for assembling integer objects. The AVR version needs + special handling for references to certain labels. */ + +static bool +avr_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + if (size == POINTER_SIZE / BITS_PER_UNIT && aligned_p + && text_segment_operand (x, VOIDmode) ) + { + fputs ("\t.word\tgs(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")\n", asm_out_file); + return true; + } + return default_assemble_integer (x, size, aligned_p); +} + +/* Worker function for ASM_DECLARE_FUNCTION_NAME. */ + +void +avr_asm_declare_function_name (FILE *file, const char *name, tree decl) +{ + + /* If the function has the 'signal' or 'interrupt' attribute, test to + make sure that the name of the function is "__vector_NN" so as to + catch when the user misspells the interrupt vector name. */ + + if (cfun->machine->is_interrupt) + { + if (strncmp (name, "__vector", strlen ("__vector")) != 0) + { + warning_at (DECL_SOURCE_LOCATION (decl), 0, + "%qs appears to be a misspelled interrupt handler", + name); + } + } + else if (cfun->machine->is_signal) + { + if (strncmp (name, "__vector", strlen ("__vector")) != 0) + { + warning_at (DECL_SOURCE_LOCATION (decl), 0, + "%qs appears to be a misspelled signal handler", + name); + } + } + + ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); + ASM_OUTPUT_LABEL (file, name); +} + +/* The routine used to output NUL terminated strings. We use a special + version of this for most svr4 targets because doing so makes the + generated assembly code more compact (and thus faster to assemble) + as well as more readable, especially for targets like the i386 + (where the only alternative is to output character sequences as + comma separated lists of numbers). */ + +void +gas_output_limited_string(FILE *file, const char *str) +{ + const unsigned char *_limited_str = (const unsigned char *) str; + unsigned ch; + fprintf (file, "%s\"", STRING_ASM_OP); + for (; (ch = *_limited_str); _limited_str++) + { + int escape; + switch (escape = ESCAPES[ch]) + { + case 0: + putc (ch, file); + break; + case 1: + fprintf (file, "\\%03o", ch); + break; + default: + putc ('\\', file); + putc (escape, file); + break; + } + } + fprintf (file, "\"\n"); +} + +/* The routine used to output sequences of byte values. We use a special + version of this for most svr4 targets because doing so makes the + generated assembly code more compact (and thus faster to assemble) + as well as more readable. Note that if we find subparts of the + character sequence which end with NUL (and which are shorter than + STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING. */ + +void +gas_output_ascii(FILE *file, const char *str, size_t length) +{ + const unsigned char *_ascii_bytes = (const unsigned char *) str; + const unsigned char *limit = _ascii_bytes + length; + unsigned bytes_in_chunk = 0; + for (; _ascii_bytes < limit; _ascii_bytes++) + { + const unsigned char *p; + if (bytes_in_chunk >= 60) + { + fprintf (file, "\"\n"); + bytes_in_chunk = 0; + } + for (p = _ascii_bytes; p < limit && *p != '\0'; p++) + continue; + if (p < limit && (p - _ascii_bytes) <= (signed)STRING_LIMIT) + { + if (bytes_in_chunk > 0) + { + fprintf (file, "\"\n"); + bytes_in_chunk = 0; + } + gas_output_limited_string (file, (const char*)_ascii_bytes); + _ascii_bytes = p; + } + else + { + int escape; + unsigned ch; + if (bytes_in_chunk == 0) + fprintf (file, "\t.ascii\t\""); + switch (escape = ESCAPES[ch = *_ascii_bytes]) + { + case 0: + putc (ch, file); + bytes_in_chunk++; + break; + case 1: + fprintf (file, "\\%03o", ch); + bytes_in_chunk += 4; + break; + default: + putc ('\\', file); + putc (escape, file); + bytes_in_chunk += 2; + break; + } + } + } + if (bytes_in_chunk > 0) + fprintf (file, "\"\n"); +} + +/* Return value is nonzero if pseudos that have been + assigned to registers of class CLASS would likely be spilled + because registers of CLASS are needed for spill registers. */ + +static bool +avr_class_likely_spilled_p (reg_class_t c) +{ + return (c != ALL_REGS && c != ADDW_REGS); +} + +/* Valid attributes: + progmem - put data to program memory; + signal - make a function to be hardware interrupt. After function + prologue interrupts are disabled; + interrupt - make a function to be hardware interrupt. After function + prologue interrupts are enabled; + naked - don't generate function prologue/epilogue and `ret' command. + + Only `progmem' attribute valid for type. */ + +/* Handle a "progmem" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +avr_handle_progmem_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (DECL_P (*node)) + { + if (TREE_CODE (*node) == TYPE_DECL) + { + /* This is really a decl attribute, not a type attribute, + but try to handle it for GCC 3.0 backwards compatibility. */ + + tree type = TREE_TYPE (*node); + tree attr = tree_cons (name, args, TYPE_ATTRIBUTES (type)); + tree newtype = build_type_attribute_variant (type, attr); + + TYPE_MAIN_VARIANT (newtype) = TYPE_MAIN_VARIANT (type); + TREE_TYPE (*node) = newtype; + *no_add_attrs = true; + } + else if (TREE_STATIC (*node) || DECL_EXTERNAL (*node)) + { + *no_add_attrs = false; + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + } + + return NULL_TREE; +} + +/* Handle an attribute requiring a FUNCTION_DECL; arguments as in + struct attribute_spec.handler. */ + +static tree +avr_handle_fndecl_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +static tree +avr_handle_fntype_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Look for attribute `progmem' in DECL + if found return 1, otherwise 0. */ + +int +avr_progmem_p (tree decl, tree attributes) +{ + tree a; + + if (TREE_CODE (decl) != VAR_DECL) + return 0; + + if (NULL_TREE + != lookup_attribute ("progmem", attributes)) + return 1; + + a=decl; + do + a = TREE_TYPE(a); + while (TREE_CODE (a) == ARRAY_TYPE); + + if (a == error_mark_node) + return 0; + + if (NULL_TREE != lookup_attribute ("progmem", TYPE_ATTRIBUTES (a))) + return 1; + + return 0; +} + +/* Add the section attribute if the variable is in progmem. */ + +static void +avr_insert_attributes (tree node, tree *attributes) +{ + if (TREE_CODE (node) == VAR_DECL + && (TREE_STATIC (node) || DECL_EXTERNAL (node)) + && avr_progmem_p (node, *attributes)) + { + tree node0 = node; + + /* For C++, we have to peel arrays in order to get correct + determination of readonlyness. */ + + do + node0 = TREE_TYPE (node0); + while (TREE_CODE (node0) == ARRAY_TYPE); + + if (error_mark_node == node0) + return; + + if (TYPE_READONLY (node0)) + { + static const char dsec[] = ".progmem.data"; + + *attributes = tree_cons (get_identifier ("section"), + build_tree_list (NULL, build_string (strlen (dsec), dsec)), + *attributes); + } + else + { + error ("variable %q+D must be const in order to be put into" + " read-only section by means of %<__attribute__((progmem))%>", + node); + } + } +} + +/* A get_unnamed_section callback for switching to progmem_section. */ + +static void +avr_output_progmem_section_asm_op (const void *arg ATTRIBUTE_UNUSED) +{ + fprintf (asm_out_file, + "\t.section .progmem.gcc_sw_table, \"%s\", @progbits\n", + AVR_HAVE_JMP_CALL ? "a" : "ax"); + /* Should already be aligned, this is just to be safe if it isn't. */ + fprintf (asm_out_file, "\t.p2align 1\n"); +} + +/* Implement TARGET_ASM_INIT_SECTIONS. */ + +static void +avr_asm_init_sections (void) +{ + progmem_section = get_unnamed_section (AVR_HAVE_JMP_CALL ? 0 : SECTION_CODE, + avr_output_progmem_section_asm_op, + NULL); + readonly_data_section = data_section; +} + +static unsigned int +avr_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = default_section_type_flags (decl, name, reloc); + + if (strncmp (name, ".noinit", 7) == 0) + { + if (decl && TREE_CODE (decl) == VAR_DECL + && DECL_INITIAL (decl) == NULL_TREE) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in the " + ".noinit section"); + } + + if (0 == strncmp (name, ".progmem.data", strlen (".progmem.data"))) + flags &= ~SECTION_WRITE; + + return flags; +} + + +/* Implement `TARGET_ENCODE_SECTION_INFO'. */ + +static void +avr_encode_section_info (tree decl, rtx rtl, int new_decl_p) +{ + /* In avr_handle_progmem_attribute, DECL_INITIAL is not yet + readily available, see PR34734. So we postpone the warning + about uninitialized data in program memory section until here. */ + + if (new_decl_p + && decl && DECL_P (decl) + && NULL_TREE == DECL_INITIAL (decl) + && !DECL_EXTERNAL (decl) + && avr_progmem_p (decl, DECL_ATTRIBUTES (decl))) + { + warning (OPT_Wuninitialized, + "uninitialized variable %q+D put into " + "program memory area", decl); + } + + default_encode_section_info (decl, rtl, new_decl_p); +} + + +/* Outputs some appropriate text to go at the start of an assembler + file. */ + +static void +avr_file_start (void) +{ + if (avr_current_arch->asm_only) + error ("MCU %qs supported for assembler only", avr_mcu_name); + + default_file_start (); + + fputs ("__SREG__ = 0x3f\n" + "__SP_H__ = 0x3e\n" + "__SP_L__ = 0x3d\n", asm_out_file); + + fputs ("__tmp_reg__ = 0\n" + "__zero_reg__ = 1\n", asm_out_file); + + /* FIXME: output these only if there is anything in the .data / .bss + sections - some code size could be saved by not linking in the + initialization code from libgcc if one or both sections are empty. */ + fputs ("\t.global __do_copy_data\n", asm_out_file); + fputs ("\t.global __do_clear_bss\n", asm_out_file); +} + +/* Outputs to the stdio stream FILE some + appropriate text to go at the end of an assembler file. */ + +static void +avr_file_end (void) +{ +} + +/* Choose the order in which to allocate hard registers for + pseudo-registers local to a basic block. + + Store the desired register order in the array `reg_alloc_order'. + Element 0 should be the register to allocate first; element 1, the + next register; and so on. */ + +void +order_regs_for_local_alloc (void) +{ + unsigned int i; + static const int order_0[] = { + 24,25, + 18,19, + 20,21, + 22,23, + 30,31, + 26,27, + 28,29, + 17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2, + 0,1, + 32,33,34,35 + }; + static const int order_1[] = { + 18,19, + 20,21, + 22,23, + 24,25, + 30,31, + 26,27, + 28,29, + 17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2, + 0,1, + 32,33,34,35 + }; + static const int order_2[] = { + 25,24, + 23,22, + 21,20, + 19,18, + 30,31, + 26,27, + 28,29, + 17,16, + 15,14,13,12,11,10,9,8,7,6,5,4,3,2, + 1,0, + 32,33,34,35 + }; + + const int *order = (TARGET_ORDER_1 ? order_1 : + TARGET_ORDER_2 ? order_2 : + order_0); + for (i=0; i < ARRAY_SIZE (order_0); ++i) + reg_alloc_order[i] = order[i]; +} + + +/* Implement `TARGET_REGISTER_MOVE_COST' */ + +static int +avr_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + return (from == STACK_REG ? 6 + : to == STACK_REG ? 12 + : 2); +} + + +/* Implement `TARGET_MEMORY_MOVE_COST' */ + +static int +avr_memory_move_cost (enum machine_mode mode, reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + return (mode == QImode ? 2 + : mode == HImode ? 4 + : mode == SImode ? 8 + : mode == SFmode ? 8 + : 16); +} + + +/* Mutually recursive subroutine of avr_rtx_cost for calculating the + cost of an RTX operand given its context. X is the rtx of the + operand, MODE is its mode, and OUTER is the rtx_code of this + operand's parent operator. */ + +static int +avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer, + bool speed) +{ + enum rtx_code code = GET_CODE (x); + int total; + + switch (code) + { + case REG: + case SUBREG: + return 0; + + case CONST_INT: + case CONST_DOUBLE: + return COSTS_N_INSNS (GET_MODE_SIZE (mode)); + + default: + break; + } + + total = 0; + avr_rtx_costs (x, code, outer, &total, speed); + return total; +} + +/* The AVR backend's rtx_cost function. X is rtx expression whose cost + is to be calculated. Return true if the complete cost has been + computed, and false if subexpressions should be scanned. In either + case, *TOTAL contains the cost result. */ + +static bool +avr_rtx_costs (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, int *total, + bool speed) +{ + enum rtx_code code = (enum rtx_code) codearg; + enum machine_mode mode = GET_MODE (x); + HOST_WIDE_INT val; + + switch (code) + { + case CONST_INT: + case CONST_DOUBLE: + /* Immediate constants are as cheap as registers. */ + *total = 0; + return true; + + case MEM: + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + return true; + + case NEG: + switch (mode) + { + case QImode: + case SFmode: + *total = COSTS_N_INSNS (1); + break; + + case HImode: + *total = COSTS_N_INSNS (3); + break; + + case SImode: + *total = COSTS_N_INSNS (7); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case ABS: + switch (mode) + { + case QImode: + case SFmode: + *total = COSTS_N_INSNS (1); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case NOT: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case ZERO_EXTEND: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + - GET_MODE_SIZE (GET_MODE (XEXP (x, 0)))); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case SIGN_EXTEND: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + 2 + - GET_MODE_SIZE (GET_MODE (XEXP (x, 0)))); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case PLUS: + switch (mode) + { + case QImode: + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + break; + + case HImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (2); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (2); + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (4); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case MINUS: + case AND: + case IOR: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + return true; + + case XOR: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + return true; + + case MULT: + switch (mode) + { + case QImode: + if (AVR_HAVE_MUL) + *total = COSTS_N_INSNS (!speed ? 3 : 4); + else if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + return false; + break; + + case HImode: + if (AVR_HAVE_MUL) + *total = COSTS_N_INSNS (!speed ? 7 : 10); + else if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + return false; + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + return true; + + case DIV: + case MOD: + case UDIV: + case UMOD: + if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + return false; + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + return true; + + case ROTATE: + switch (mode) + { + case QImode: + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 4) + *total = COSTS_N_INSNS (1); + + break; + + case HImode: + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 8) + *total = COSTS_N_INSNS (3); + + break; + + case SImode: + if (CONST_INT_P (XEXP (x, 1))) + switch (INTVAL (XEXP (x, 1))) + { + case 8: + case 24: + *total = COSTS_N_INSNS (5); + break; + case 16: + *total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 6); + break; + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case ASHIFT: + switch (mode) + { + case QImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 4 : 17); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + { + val = INTVAL (XEXP (x, 1)); + if (val == 7) + *total = COSTS_N_INSNS (3); + else if (val >= 0 && val <= 7) + *total = COSTS_N_INSNS (val); + else + *total = COSTS_N_INSNS (1); + } + break; + + case HImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + case 8: + *total = COSTS_N_INSNS (2); + break; + case 9: + *total = COSTS_N_INSNS (3); + break; + case 2: + case 3: + case 10: + case 15: + *total = COSTS_N_INSNS (4); + break; + case 7: + case 11: + case 12: + *total = COSTS_N_INSNS (5); + break; + case 4: + *total = COSTS_N_INSNS (!speed ? 5 : 8); + break; + case 6: + *total = COSTS_N_INSNS (!speed ? 5 : 9); + break; + case 5: + *total = COSTS_N_INSNS (!speed ? 5 : 10); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 24: + *total = COSTS_N_INSNS (3); + break; + case 1: + case 8: + case 16: + *total = COSTS_N_INSNS (4); + break; + case 31: + *total = COSTS_N_INSNS (6); + break; + case 2: + *total = COSTS_N_INSNS (!speed ? 7 : 8); + break; + default: + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case ASHIFTRT: + switch (mode) + { + case QImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 4 : 17); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + { + val = INTVAL (XEXP (x, 1)); + if (val == 6) + *total = COSTS_N_INSNS (4); + else if (val == 7) + *total = COSTS_N_INSNS (2); + else if (val >= 0 && val <= 7) + *total = COSTS_N_INSNS (val); + else + *total = COSTS_N_INSNS (1); + } + break; + + case HImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (2); + break; + case 15: + *total = COSTS_N_INSNS (3); + break; + case 2: + case 7: + case 8: + case 9: + *total = COSTS_N_INSNS (4); + break; + case 10: + case 14: + *total = COSTS_N_INSNS (5); + break; + case 11: + *total = COSTS_N_INSNS (!speed ? 5 : 6); + break; + case 12: + *total = COSTS_N_INSNS (!speed ? 5 : 7); + break; + case 6: + case 13: + *total = COSTS_N_INSNS (!speed ? 5 : 8); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (4); + break; + case 8: + case 16: + case 24: + *total = COSTS_N_INSNS (6); + break; + case 2: + *total = COSTS_N_INSNS (!speed ? 7 : 8); + break; + case 31: + *total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5); + break; + default: + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case LSHIFTRT: + switch (mode) + { + case QImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 4 : 17); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + { + val = INTVAL (XEXP (x, 1)); + if (val == 7) + *total = COSTS_N_INSNS (3); + else if (val >= 0 && val <= 7) + *total = COSTS_N_INSNS (val); + else + *total = COSTS_N_INSNS (1); + } + break; + + case HImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + case 8: + *total = COSTS_N_INSNS (2); + break; + case 9: + *total = COSTS_N_INSNS (3); + break; + case 2: + case 10: + case 15: + *total = COSTS_N_INSNS (4); + break; + case 7: + case 11: + *total = COSTS_N_INSNS (5); + break; + case 3: + case 12: + case 13: + case 14: + *total = COSTS_N_INSNS (!speed ? 5 : 6); + break; + case 4: + *total = COSTS_N_INSNS (!speed ? 5 : 7); + break; + case 5: + case 6: + *total = COSTS_N_INSNS (!speed ? 5 : 9); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (4); + break; + case 2: + *total = COSTS_N_INSNS (!speed ? 7 : 8); + break; + case 8: + case 16: + case 24: + *total = COSTS_N_INSNS (4); + break; + case 31: + *total = COSTS_N_INSNS (6); + break; + default: + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + case COMPARE: + switch (GET_MODE (XEXP (x, 0))) + { + case QImode: + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + break; + + case HImode: + *total = COSTS_N_INSNS (2); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + else if (INTVAL (XEXP (x, 1)) != 0) + *total += COSTS_N_INSNS (1); + break; + + case SImode: + *total = COSTS_N_INSNS (4); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed); + else if (INTVAL (XEXP (x, 1)) != 0) + *total += COSTS_N_INSNS (3); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); + return true; + + default: + break; + } + return false; +} + +/* Calculate the cost of a memory address. */ + +static int +avr_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) +{ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x,1)) == CONST_INT + && (REG_P (XEXP (x,0)) || GET_CODE (XEXP (x,0)) == SUBREG) + && INTVAL (XEXP (x,1)) >= 61) + return 18; + if (CONSTANT_ADDRESS_P (x)) + { + if (optimize > 0 && io_address_operand (x, QImode)) + return 2; + return 4; + } + return 4; +} + +/* Test for extra memory constraint 'Q'. + It's a memory address based on Y or Z pointer with valid displacement. */ + +int +extra_constraint_Q (rtx x) +{ + if (GET_CODE (XEXP (x,0)) == PLUS + && REG_P (XEXP (XEXP (x,0), 0)) + && GET_CODE (XEXP (XEXP (x,0), 1)) == CONST_INT + && (INTVAL (XEXP (XEXP (x,0), 1)) + <= MAX_LD_OFFSET (GET_MODE (x)))) + { + rtx xx = XEXP (XEXP (x,0), 0); + int regno = REGNO (xx); + if (TARGET_ALL_DEBUG) + { + fprintf (stderr, ("extra_constraint:\n" + "reload_completed: %d\n" + "reload_in_progress: %d\n"), + reload_completed, reload_in_progress); + debug_rtx (x); + } + if (regno >= FIRST_PSEUDO_REGISTER) + return 1; /* allocate pseudos */ + else if (regno == REG_Z || regno == REG_Y) + return 1; /* strictly check */ + else if (xx == frame_pointer_rtx + || xx == arg_pointer_rtx) + return 1; /* XXX frame & arg pointer checks */ + } + return 0; +} + +/* Convert condition code CONDITION to the valid AVR condition code. */ + +RTX_CODE +avr_normalize_condition (RTX_CODE condition) +{ + switch (condition) + { + case GT: + return GE; + case GTU: + return GEU; + case LE: + return LT; + case LEU: + return LTU; + default: + gcc_unreachable (); + } +} + +/* This function optimizes conditional jumps. */ + +static void +avr_reorg (void) +{ + rtx insn, pattern; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (! (GET_CODE (insn) == INSN + || GET_CODE (insn) == CALL_INSN + || GET_CODE (insn) == JUMP_INSN) + || !single_set (insn)) + continue; + + pattern = PATTERN (insn); + + if (GET_CODE (pattern) == PARALLEL) + pattern = XVECEXP (pattern, 0, 0); + if (GET_CODE (pattern) == SET + && SET_DEST (pattern) == cc0_rtx + && compare_diff_p (insn)) + { + if (GET_CODE (SET_SRC (pattern)) == COMPARE) + { + /* Now we work under compare insn. */ + + pattern = SET_SRC (pattern); + if (true_regnum (XEXP (pattern,0)) >= 0 + && true_regnum (XEXP (pattern,1)) >= 0 ) + { + rtx x = XEXP (pattern,0); + rtx next = next_real_insn (insn); + rtx pat = PATTERN (next); + rtx src = SET_SRC (pat); + rtx t = XEXP (src,0); + PUT_CODE (t, swap_condition (GET_CODE (t))); + XEXP (pattern,0) = XEXP (pattern,1); + XEXP (pattern,1) = x; + INSN_CODE (next) = -1; + } + else if (true_regnum (XEXP (pattern, 0)) >= 0 + && XEXP (pattern, 1) == const0_rtx) + { + /* This is a tst insn, we can reverse it. */ + rtx next = next_real_insn (insn); + rtx pat = PATTERN (next); + rtx src = SET_SRC (pat); + rtx t = XEXP (src,0); + + PUT_CODE (t, swap_condition (GET_CODE (t))); + XEXP (pattern, 1) = XEXP (pattern, 0); + XEXP (pattern, 0) = const0_rtx; + INSN_CODE (next) = -1; + INSN_CODE (insn) = -1; + } + else if (true_regnum (XEXP (pattern,0)) >= 0 + && GET_CODE (XEXP (pattern,1)) == CONST_INT) + { + rtx x = XEXP (pattern,1); + rtx next = next_real_insn (insn); + rtx pat = PATTERN (next); + rtx src = SET_SRC (pat); + rtx t = XEXP (src,0); + enum machine_mode mode = GET_MODE (XEXP (pattern, 0)); + + if (avr_simplify_comparison_p (mode, GET_CODE (t), x)) + { + XEXP (pattern, 1) = gen_int_mode (INTVAL (x) + 1, mode); + PUT_CODE (t, avr_normalize_condition (GET_CODE (t))); + INSN_CODE (next) = -1; + INSN_CODE (insn) = -1; + } + } + } + } + } +} + +/* Returns register number for function return value.*/ + +int +avr_ret_register (void) +{ + return 24; +} + +/* Create an RTX representing the place where a + library function returns a value of mode MODE. */ + +rtx +avr_libcall_value (enum machine_mode mode) +{ + int offs = GET_MODE_SIZE (mode); + if (offs < 2) + offs = 2; + return gen_rtx_REG (mode, RET_REGISTER + 2 - offs); +} + +/* Create an RTX representing the place where a + function returns a value of data type VALTYPE. */ + +rtx +avr_function_value (const_tree type, + const_tree func ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + unsigned int offs; + + if (TYPE_MODE (type) != BLKmode) + return avr_libcall_value (TYPE_MODE (type)); + + offs = int_size_in_bytes (type); + if (offs < 2) + offs = 2; + if (offs > 2 && offs < GET_MODE_SIZE (SImode)) + offs = GET_MODE_SIZE (SImode); + else if (offs > GET_MODE_SIZE (SImode) && offs < GET_MODE_SIZE (DImode)) + offs = GET_MODE_SIZE (DImode); + + return gen_rtx_REG (BLKmode, RET_REGISTER + 2 - offs); +} + +int +test_hard_reg_class (enum reg_class rclass, rtx x) +{ + int regno = true_regnum (x); + if (regno < 0) + return 0; + + if (TEST_HARD_REG_CLASS (rclass, regno)) + return 1; + + return 0; +} + + +int +jump_over_one_insn_p (rtx insn, rtx dest) +{ + int uid = INSN_UID (GET_CODE (dest) == LABEL_REF + ? XEXP (dest, 0) + : dest); + int jump_addr = INSN_ADDRESSES (INSN_UID (insn)); + int dest_addr = INSN_ADDRESSES (uid); + return dest_addr - jump_addr == get_attr_length (insn) + 1; +} + +/* Returns 1 if a value of mode MODE can be stored starting with hard + register number REGNO. On the enhanced core, anything larger than + 1 byte must start in even numbered register for "movw" to work + (this way we don't have to check for odd registers everywhere). */ + +int +avr_hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* NOTE: 8-bit values must not be disallowed for R28 or R29. + Disallowing QI et al. in these regs might lead to code like + (set (subreg:QI (reg:HI 28) n) ...) + which will result in wrong code because reload does not + handle SUBREGs of hard regsisters like this. + This could be fixed in reload. However, it appears + that fixing reload is not wanted by reload people. */ + + /* Any GENERAL_REGS register can hold 8-bit values. */ + + if (GET_MODE_SIZE (mode) == 1) + return 1; + + /* FIXME: Ideally, the following test is not needed. + However, it turned out that it can reduce the number + of spill fails. AVR and it's poor endowment with + address registers is extreme stress test for reload. */ + + if (GET_MODE_SIZE (mode) >= 4 + && regno >= REG_X) + return 0; + + /* All modes larger than 8 bits should start in an even register. */ + + return !(regno & 1); +} + +const char * +output_reload_inhi (rtx insn ATTRIBUTE_UNUSED, rtx *operands, int *len) +{ + int tmp; + if (!len) + len = &tmp; + + if (GET_CODE (operands[1]) == CONST_INT) + { + int val = INTVAL (operands[1]); + if ((val & 0xff) == 0) + { + *len = 3; + return (AS2 (mov,%A0,__zero_reg__) CR_TAB + AS2 (ldi,%2,hi8(%1)) CR_TAB + AS2 (mov,%B0,%2)); + } + else if ((val & 0xff00) == 0) + { + *len = 3; + return (AS2 (ldi,%2,lo8(%1)) CR_TAB + AS2 (mov,%A0,%2) CR_TAB + AS2 (mov,%B0,__zero_reg__)); + } + else if ((val & 0xff) == ((val & 0xff00) >> 8)) + { + *len = 3; + return (AS2 (ldi,%2,lo8(%1)) CR_TAB + AS2 (mov,%A0,%2) CR_TAB + AS2 (mov,%B0,%2)); + } + } + *len = 4; + return (AS2 (ldi,%2,lo8(%1)) CR_TAB + AS2 (mov,%A0,%2) CR_TAB + AS2 (ldi,%2,hi8(%1)) CR_TAB + AS2 (mov,%B0,%2)); +} + + +const char * +output_reload_insisf (rtx insn ATTRIBUTE_UNUSED, rtx *operands, int *len) +{ + rtx src = operands[1]; + int cnst = (GET_CODE (src) == CONST_INT); + + if (len) + { + if (cnst) + *len = 4 + ((INTVAL (src) & 0xff) != 0) + + ((INTVAL (src) & 0xff00) != 0) + + ((INTVAL (src) & 0xff0000) != 0) + + ((INTVAL (src) & 0xff000000) != 0); + else + *len = 8; + + return ""; + } + + if (cnst && ((INTVAL (src) & 0xff) == 0)) + output_asm_insn (AS2 (mov, %A0, __zero_reg__), operands); + else + { + output_asm_insn (AS2 (ldi, %2, lo8(%1)), operands); + output_asm_insn (AS2 (mov, %A0, %2), operands); + } + if (cnst && ((INTVAL (src) & 0xff00) == 0)) + output_asm_insn (AS2 (mov, %B0, __zero_reg__), operands); + else + { + output_asm_insn (AS2 (ldi, %2, hi8(%1)), operands); + output_asm_insn (AS2 (mov, %B0, %2), operands); + } + if (cnst && ((INTVAL (src) & 0xff0000) == 0)) + output_asm_insn (AS2 (mov, %C0, __zero_reg__), operands); + else + { + output_asm_insn (AS2 (ldi, %2, hlo8(%1)), operands); + output_asm_insn (AS2 (mov, %C0, %2), operands); + } + if (cnst && ((INTVAL (src) & 0xff000000) == 0)) + output_asm_insn (AS2 (mov, %D0, __zero_reg__), operands); + else + { + output_asm_insn (AS2 (ldi, %2, hhi8(%1)), operands); + output_asm_insn (AS2 (mov, %D0, %2), operands); + } + return ""; +} + +void +avr_output_bld (rtx operands[], int bit_nr) +{ + static char s[] = "bld %A0,0"; + + s[5] = 'A' + (bit_nr >> 3); + s[8] = '0' + (bit_nr & 7); + output_asm_insn (s, operands); +} + +void +avr_output_addr_vec_elt (FILE *stream, int value) +{ + switch_to_section (progmem_section); + if (AVR_HAVE_JMP_CALL) + fprintf (stream, "\t.word gs(.L%d)\n", value); + else + fprintf (stream, "\trjmp .L%d\n", value); +} + +/* Returns true if SCRATCH are safe to be allocated as a scratch + registers (for a define_peephole2) in the current function. */ + +bool +avr_hard_regno_scratch_ok (unsigned int regno) +{ + /* Interrupt functions can only use registers that have already been saved + by the prologue, even if they would normally be call-clobbered. */ + + if ((cfun->machine->is_interrupt || cfun->machine->is_signal) + && !df_regs_ever_live_p (regno)) + return false; + + /* Don't allow hard registers that might be part of the frame pointer. + Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM + and don't care for a frame pointer that spans more than one register. */ + + if ((!reload_completed || frame_pointer_needed) + && (regno == REG_Y || regno == REG_Y + 1)) + { + return false; + } + + return true; +} + +/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ + +int +avr_hard_regno_rename_ok (unsigned int old_reg, + unsigned int new_reg) +{ + /* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ + + if ((cfun->machine->is_interrupt || cfun->machine->is_signal) + && !df_regs_ever_live_p (new_reg)) + return 0; + + /* Don't allow hard registers that might be part of the frame pointer. + Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM + and don't care for a frame pointer that spans more than one register. */ + + if ((!reload_completed || frame_pointer_needed) + && (old_reg == REG_Y || old_reg == REG_Y + 1 + || new_reg == REG_Y || new_reg == REG_Y + 1)) + { + return 0; + } + + return 1; +} + +/* Output a branch that tests a single bit of a register (QI, HI, SI or DImode) + or memory location in the I/O space (QImode only). + + Operand 0: comparison operator (must be EQ or NE, compare bit to zero). + Operand 1: register operand to test, or CONST_INT memory address. + Operand 2: bit number. + Operand 3: label to jump to if the test is true. */ + +const char * +avr_out_sbxx_branch (rtx insn, rtx operands[]) +{ + enum rtx_code comp = GET_CODE (operands[0]); + int long_jump = (get_attr_length (insn) >= 4); + int reverse = long_jump || jump_over_one_insn_p (insn, operands[3]); + + if (comp == GE) + comp = EQ; + else if (comp == LT) + comp = NE; + + if (reverse) + comp = reverse_condition (comp); + + if (GET_CODE (operands[1]) == CONST_INT) + { + if (INTVAL (operands[1]) < 0x40) + { + if (comp == EQ) + output_asm_insn (AS2 (sbis,%m1-0x20,%2), operands); + else + output_asm_insn (AS2 (sbic,%m1-0x20,%2), operands); + } + else + { + output_asm_insn (AS2 (in,__tmp_reg__,%m1-0x20), operands); + if (comp == EQ) + output_asm_insn (AS2 (sbrs,__tmp_reg__,%2), operands); + else + output_asm_insn (AS2 (sbrc,__tmp_reg__,%2), operands); + } + } + else /* GET_CODE (operands[1]) == REG */ + { + if (GET_MODE (operands[1]) == QImode) + { + if (comp == EQ) + output_asm_insn (AS2 (sbrs,%1,%2), operands); + else + output_asm_insn (AS2 (sbrc,%1,%2), operands); + } + else /* HImode or SImode */ + { + static char buf[] = "sbrc %A1,0"; + int bit_nr = INTVAL (operands[2]); + buf[3] = (comp == EQ) ? 's' : 'c'; + buf[6] = 'A' + (bit_nr >> 3); + buf[9] = '0' + (bit_nr & 7); + output_asm_insn (buf, operands); + } + } + + if (long_jump) + return (AS1 (rjmp,.+4) CR_TAB + AS1 (jmp,%x3)); + if (!reverse) + return AS1 (rjmp,%x3); + return ""; +} + +/* Worker function for TARGET_ASM_CONSTRUCTOR. */ + +static void +avr_asm_out_ctor (rtx symbol, int priority) +{ + fputs ("\t.global __do_global_ctors\n", asm_out_file); + default_ctor_section_asm_out_constructor (symbol, priority); +} + +/* Worker function for TARGET_ASM_DESTRUCTOR. */ + +static void +avr_asm_out_dtor (rtx symbol, int priority) +{ + fputs ("\t.global __do_global_dtors\n", asm_out_file); + default_dtor_section_asm_out_destructor (symbol, priority); +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +avr_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (TYPE_MODE (type) == BLKmode) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + return (size == -1 || size > 8); + } + else + return false; +} + +/* Worker function for CASE_VALUES_THRESHOLD. */ + +unsigned int avr_case_values_threshold (void) +{ + return (!AVR_HAVE_JMP_CALL || TARGET_CALL_PROLOGUES) ? 8 : 17; +} + +#include "gt-avr.h" diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h new file mode 100644 index 000000000..efe782df7 --- /dev/null +++ b/gcc/config/avr/avr.h @@ -0,0 +1,835 @@ +/* Definitions of target machine for GNU compiler, + for ATMEL AVR at90s8515, ATmega103/103L, ATmega603/603L microcontrollers. + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, + 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Denis Chertykov (chertykov@gmail.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Names to predefine in the preprocessor for this target machine. */ + +struct base_arch_s { + /* Assembler only. */ + int asm_only; + + /* Core have 'MUL*' instructions. */ + int have_mul; + + /* Core have 'CALL' and 'JMP' instructions. */ + int have_jmp_call; + + /* Core have 'MOVW' and 'LPM Rx,Z' instructions. */ + int have_movw_lpmx; + + /* Core have 'ELPM' instructions. */ + int have_elpm; + + /* Core have 'ELPM Rx,Z' instructions. */ + int have_elpmx; + + /* Core have 'EICALL' and 'EIJMP' instructions. */ + int have_eijmp_eicall; + + /* Reserved for xmega architecture. */ + int reserved; + + /* Reserved for xmega architecture. */ + int reserved2; + + /* Default start of data section address for architecture. */ + int default_data_section_start; + + const char *const macro; + + /* Architecture name. */ + const char *const arch_name; +}; + +/* These names are used as the index into the avr_arch_types[] table + above. */ + +enum avr_arch +{ + ARCH_UNKNOWN, + ARCH_AVR1, + ARCH_AVR2, + ARCH_AVR25, + ARCH_AVR3, + ARCH_AVR31, + ARCH_AVR35, + ARCH_AVR4, + ARCH_AVR5, + ARCH_AVR51, + ARCH_AVR6 +}; + +struct mcu_type_s { + /* Device name. */ + const char *const name; + + /* Index in avr_arch_types[]. */ + int arch; + + /* Must lie outside user's namespace. NULL == no macro. */ + const char *const macro; + + /* Stack pointer have 8 bits width. */ + int short_sp; + + /* Start of data section. */ + int data_section_start; + + /* Name of device library. */ + const char *const library_name; +}; + +/* Preprocessor macros to define depending on MCU type. */ +extern const char *avr_extra_arch_macro; +extern const struct base_arch_s *avr_current_arch; +extern const struct mcu_type_s *avr_current_device; +extern const struct mcu_type_s avr_mcu_types[]; +extern const struct base_arch_s avr_arch_types[]; + +#define TARGET_CPU_CPP_BUILTINS() avr_cpu_cpp_builtins (pfile) + +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) +extern GTY(()) section *progmem_section; +#endif + +#define AVR_HAVE_JMP_CALL (avr_current_arch->have_jmp_call && !TARGET_SHORT_CALLS) +#define AVR_HAVE_MUL (avr_current_arch->have_mul) +#define AVR_HAVE_MOVW (avr_current_arch->have_movw_lpmx) +#define AVR_HAVE_LPMX (avr_current_arch->have_movw_lpmx) +#define AVR_HAVE_RAMPZ (avr_current_arch->have_elpm) +#define AVR_HAVE_EIJMP_EICALL (avr_current_arch->have_eijmp_eicall) +#define AVR_HAVE_8BIT_SP (avr_current_device->short_sp || TARGET_TINY_STACK) + +#define AVR_2_BYTE_PC (!AVR_HAVE_EIJMP_EICALL) +#define AVR_3_BYTE_PC (AVR_HAVE_EIJMP_EICALL) + +#define TARGET_VERSION fprintf (stderr, " (GNU assembler syntax)"); + +#define BITS_BIG_ENDIAN 0 +#define BYTES_BIG_ENDIAN 0 +#define WORDS_BIG_ENDIAN 0 + +#ifdef IN_LIBGCC2 +/* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits). */ +#define UNITS_PER_WORD 4 +#else +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 1 +#endif + +#define POINTER_SIZE 16 + + +/* Maximum sized of reasonable data type + DImode or Dfmode ... */ +#define MAX_FIXED_MODE_SIZE 32 + +#define PARM_BOUNDARY 8 + +#define FUNCTION_BOUNDARY 8 + +#define EMPTY_FIELD_BOUNDARY 8 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 8 + +#define MAX_OFILE_ALIGNMENT (32768 * 8) + +#define TARGET_VTABLE_ENTRY_ALIGN 8 + +#define STRICT_ALIGNMENT 0 + +#define INT_TYPE_SIZE (TARGET_INT8 ? 8 : 16) +#define SHORT_TYPE_SIZE (INT_TYPE_SIZE == 8 ? INT_TYPE_SIZE : 16) +#define LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 16 : 32) +#define LONG_LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 32 : 64) +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 32 +#define LONG_DOUBLE_TYPE_SIZE 32 + +#define DEFAULT_SIGNED_CHAR 1 + +#define SIZE_TYPE (INT_TYPE_SIZE == 8 ? "long unsigned int" : "unsigned int") +#define PTRDIFF_TYPE (INT_TYPE_SIZE == 8 ? "long int" :"int") + +#define WCHAR_TYPE_SIZE 16 + +#define FIRST_PSEUDO_REGISTER 36 + +#define FIXED_REGISTERS {\ + 1,1,/* r0 r1 */\ + 0,0,/* r2 r3 */\ + 0,0,/* r4 r5 */\ + 0,0,/* r6 r7 */\ + 0,0,/* r8 r9 */\ + 0,0,/* r10 r11 */\ + 0,0,/* r12 r13 */\ + 0,0,/* r14 r15 */\ + 0,0,/* r16 r17 */\ + 0,0,/* r18 r19 */\ + 0,0,/* r20 r21 */\ + 0,0,/* r22 r23 */\ + 0,0,/* r24 r25 */\ + 0,0,/* r26 r27 */\ + 0,0,/* r28 r29 */\ + 0,0,/* r30 r31 */\ + 1,1,/* STACK */\ + 1,1 /* arg pointer */ } + +#define CALL_USED_REGISTERS { \ + 1,1,/* r0 r1 */ \ + 0,0,/* r2 r3 */ \ + 0,0,/* r4 r5 */ \ + 0,0,/* r6 r7 */ \ + 0,0,/* r8 r9 */ \ + 0,0,/* r10 r11 */ \ + 0,0,/* r12 r13 */ \ + 0,0,/* r14 r15 */ \ + 0,0,/* r16 r17 */ \ + 1,1,/* r18 r19 */ \ + 1,1,/* r20 r21 */ \ + 1,1,/* r22 r23 */ \ + 1,1,/* r24 r25 */ \ + 1,1,/* r26 r27 */ \ + 0,0,/* r28 r29 */ \ + 1,1,/* r30 r31 */ \ + 1,1,/* STACK */ \ + 1,1 /* arg pointer */ } + +#define REG_ALLOC_ORDER { \ + 24,25, \ + 18,19, \ + 20,21, \ + 22,23, \ + 30,31, \ + 26,27, \ + 28,29, \ + 17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2, \ + 0,1, \ + 32,33,34,35 \ + } + +#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc () + + +#define HARD_REGNO_NREGS(REGNO, MODE) ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +#define HARD_REGNO_MODE_OK(REGNO, MODE) avr_hard_regno_mode_ok(REGNO, MODE) + +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +enum reg_class { + NO_REGS, + R0_REG, /* r0 */ + POINTER_X_REGS, /* r26 - r27 */ + POINTER_Y_REGS, /* r28 - r29 */ + POINTER_Z_REGS, /* r30 - r31 */ + STACK_REG, /* STACK */ + BASE_POINTER_REGS, /* r28 - r31 */ + POINTER_REGS, /* r26 - r31 */ + ADDW_REGS, /* r24 - r31 */ + SIMPLE_LD_REGS, /* r16 - r23 */ + LD_REGS, /* r16 - r31 */ + NO_LD_REGS, /* r0 - r15 */ + GENERAL_REGS, /* r0 - r31 */ + ALL_REGS, LIM_REG_CLASSES +}; + + +#define N_REG_CLASSES (int)LIM_REG_CLASSES + +#define REG_CLASS_NAMES { \ + "NO_REGS", \ + "R0_REG", /* r0 */ \ + "POINTER_X_REGS", /* r26 - r27 */ \ + "POINTER_Y_REGS", /* r28 - r29 */ \ + "POINTER_Z_REGS", /* r30 - r31 */ \ + "STACK_REG", /* STACK */ \ + "BASE_POINTER_REGS", /* r28 - r31 */ \ + "POINTER_REGS", /* r26 - r31 */ \ + "ADDW_REGS", /* r24 - r31 */ \ + "SIMPLE_LD_REGS", /* r16 - r23 */ \ + "LD_REGS", /* r16 - r31 */ \ + "NO_LD_REGS", /* r0 - r15 */ \ + "GENERAL_REGS", /* r0 - r31 */ \ + "ALL_REGS" } + +#define REG_CLASS_CONTENTS { \ + {0x00000000,0x00000000}, /* NO_REGS */ \ + {0x00000001,0x00000000}, /* R0_REG */ \ + {3 << REG_X,0x00000000}, /* POINTER_X_REGS, r26 - r27 */ \ + {3 << REG_Y,0x00000000}, /* POINTER_Y_REGS, r28 - r29 */ \ + {3 << REG_Z,0x00000000}, /* POINTER_Z_REGS, r30 - r31 */ \ + {0x00000000,0x00000003}, /* STACK_REG, STACK */ \ + {(3 << REG_Y) | (3 << REG_Z), \ + 0x00000000}, /* BASE_POINTER_REGS, r28 - r31 */ \ + {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z), \ + 0x00000000}, /* POINTER_REGS, r26 - r31 */ \ + {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W), \ + 0x00000000}, /* ADDW_REGS, r24 - r31 */ \ + {0x00ff0000,0x00000000}, /* SIMPLE_LD_REGS r16 - r23 */ \ + {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16), \ + 0x00000000}, /* LD_REGS, r16 - r31 */ \ + {0x0000ffff,0x00000000}, /* NO_LD_REGS r0 - r15 */ \ + {0xffffffff,0x00000000}, /* GENERAL_REGS, r0 - r31 */ \ + {0xffffffff,0x00000003} /* ALL_REGS */ \ +} + +#define REGNO_REG_CLASS(R) avr_regno_reg_class(R) + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, LIM_REG_CLASSES \ +} + +#define BASE_REG_CLASS (reload_completed ? BASE_POINTER_REGS : POINTER_REGS) + +#define INDEX_REG_CLASS NO_REGS + +#define REGNO_OK_FOR_BASE_P(r) (((r) < FIRST_PSEUDO_REGISTER \ + && ((r) == REG_X \ + || (r) == REG_Y \ + || (r) == REG_Z \ + || (r) == ARG_POINTER_REGNUM)) \ + || (reg_renumber \ + && (reg_renumber[r] == REG_X \ + || reg_renumber[r] == REG_Y \ + || reg_renumber[r] == REG_Z \ + || (reg_renumber[r] \ + == ARG_POINTER_REGNUM)))) + +#define REGNO_OK_FOR_INDEX_P(NUM) 0 + +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true + +#define CLASS_MAX_NREGS(CLASS, MODE) class_max_nregs (CLASS, MODE) + +#define STACK_PUSH_CODE POST_DEC + +#define STACK_GROWS_DOWNWARD + +#define STARTING_FRAME_OFFSET 1 + +#define STACK_POINTER_OFFSET 1 + +#define FIRST_PARM_OFFSET(FUNDECL) 0 + +#define STACK_BOUNDARY 8 + +#define STACK_POINTER_REGNUM 32 + +#define FRAME_POINTER_REGNUM REG_Y + +#define ARG_POINTER_REGNUM 34 + +#define STATIC_CHAIN_REGNUM 2 + +#define ELIMINABLE_REGS { \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ + ,{FRAME_POINTER_REGNUM+1,STACK_POINTER_REGNUM+1}} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + OFFSET = avr_initial_elimination_offset (FROM, TO) + +#define RETURN_ADDR_RTX(count, tem) avr_return_addr_rtx (count, tem) + +/* Don't use Push rounding. expr.c: emit_single_push_insn is broken + for POST_DEC targets (PR27386). */ +/*#define PUSH_ROUNDING(NPUSHED) (NPUSHED)*/ + +typedef struct avr_args { + int nregs; /* # registers available for passing */ + int regno; /* next available register number */ +} CUMULATIVE_ARGS; + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + init_cumulative_args (&(CUM), FNTYPE, LIBNAME, FNDECL) + +#define FUNCTION_ARG_REGNO_P(r) function_arg_regno_p(r) + +extern int avr_reg_order[]; + +#define RET_REGISTER avr_ret_register () + +#define LIBCALL_VALUE(MODE) avr_libcall_value (MODE) + +#define FUNCTION_VALUE_REGNO_P(N) ((int) (N) == RET_REGISTER) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#define EPILOGUE_USES(REGNO) avr_epilogue_uses(REGNO) + +#define HAVE_POST_INCREMENT 1 +#define HAVE_PRE_DECREMENT 1 + +#define MAX_REGS_PER_ADDRESS 1 + +#define REG_OK_FOR_BASE_NOSTRICT_P(X) \ + (REGNO (X) >= FIRST_PSEUDO_REGISTER || REG_OK_FOR_BASE_STRICT_P(X)) + +#define REG_OK_FOR_BASE_STRICT_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +/* LEGITIMIZE_RELOAD_ADDRESS will allow register R26/27 to be used, where it + is no worse than normal base pointers R28/29 and R30/31. For example: + If base offset is greater than 63 bytes or for R++ or --R addressing. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \ +do { \ + if (1&&(GET_CODE (X) == POST_INC || GET_CODE (X) == PRE_DEC)) \ + { \ + push_reload (XEXP (X,0), XEXP (X,0), &XEXP (X,0), &XEXP (X,0), \ + POINTER_REGS, GET_MODE (X),GET_MODE (X) , 0, 0, \ + OPNUM, RELOAD_OTHER); \ + goto WIN; \ + } \ + if (GET_CODE (X) == PLUS \ + && REG_P (XEXP (X, 0)) \ + && reg_equiv_constant[REGNO (XEXP (X, 0))] == 0 \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && INTVAL (XEXP (X, 1)) >= 1) \ + { \ + int fit = INTVAL (XEXP (X, 1)) <= (64 - GET_MODE_SIZE (MODE)); \ + if (fit) \ + { \ + if (reg_equiv_address[REGNO (XEXP (X, 0))] != 0) \ + { \ + int regno = REGNO (XEXP (X, 0)); \ + rtx mem = make_memloc (X, regno); \ + push_reload (XEXP (mem,0), NULL, &XEXP (mem,0), NULL, \ + POINTER_REGS, Pmode, VOIDmode, 0, 0, \ + 1, ADDR_TYPE (TYPE)); \ + push_reload (mem, NULL_RTX, &XEXP (X, 0), NULL, \ + BASE_POINTER_REGS, GET_MODE (X), VOIDmode, 0, 0, \ + OPNUM, TYPE); \ + goto WIN; \ + } \ + } \ + else if (! (frame_pointer_needed && XEXP (X,0) == frame_pointer_rtx)) \ + { \ + push_reload (X, NULL_RTX, &X, NULL, \ + POINTER_REGS, GET_MODE (X), VOIDmode, 0, 0, \ + OPNUM, TYPE); \ + goto WIN; \ + } \ + } \ +} while(0) + +#define LEGITIMATE_CONSTANT_P(X) 1 + +#define BRANCH_COST(speed_p, predictable_p) 0 + +#define SLOW_BYTE_ACCESS 0 + +#define NO_FUNCTION_CSE + +#define TEXT_SECTION_ASM_OP "\t.text" + +#define DATA_SECTION_ASM_OP "\t.data" + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +/* Define the pseudo-ops used to switch to the .ctors and .dtors sections. + There are no shared libraries on this target, and these sections are + placed in the read-only program memory, so they are not writable. */ + +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP "\t.section .ctors,\"a\",@progbits" + +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP "\t.section .dtors,\"a\",@progbits" + +#define TARGET_ASM_CONSTRUCTOR avr_asm_out_ctor + +#define TARGET_ASM_DESTRUCTOR avr_asm_out_dtor + +#define SUPPORTS_INIT_PRIORITY 0 + +#define JUMP_TABLES_IN_TEXT_SECTION 0 + +#define ASM_COMMENT_START " ; " + +#define ASM_APP_ON "/* #APP */\n" + +#define ASM_APP_OFF "/* #NOAPP */\n" + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section +#define TARGET_ASM_INIT_SECTIONS avr_asm_init_sections + +#define ASM_OUTPUT_ASCII(FILE, P, SIZE) gas_output_ascii (FILE,P,SIZE) + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '\n' || ((C) == '$')) + +#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED) \ +do { \ + fputs ("\t.comm ", (STREAM)); \ + assemble_name ((STREAM), (NAME)); \ + fprintf ((STREAM), ",%lu,1\n", (unsigned long)(SIZE)); \ +} while (0) + +#define ASM_OUTPUT_BSS(FILE, DECL, NAME, SIZE, ROUNDED) \ + asm_output_bss ((FILE), (DECL), (NAME), (SIZE), (ROUNDED)) + +#define ASM_OUTPUT_LOCAL(STREAM, NAME, SIZE, ROUNDED) \ +do { \ + fputs ("\t.lcomm ", (STREAM)); \ + assemble_name ((STREAM), (NAME)); \ + fprintf ((STREAM), ",%d\n", (int)(SIZE)); \ +} while (0) + +#undef TYPE_ASM_OP +#undef SIZE_ASM_OP +#undef WEAK_ASM_OP +#define TYPE_ASM_OP "\t.type\t" +#define SIZE_ASM_OP "\t.size\t" +#define WEAK_ASM_OP "\t.weak\t" +/* Define the strings used for the special svr4 .type and .size directives. + These strings generally do not vary from one system running svr4 to + another, but if a given system (e.g. m88k running svr) needs to use + different pseudo-op names for these, they may be overridden in the + file which includes this one. */ + + +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "@%s" +/* The following macro defines the format used to output the second + operand of the .type assembler directive. Different svr4 assemblers + expect various different forms for this operand. The one given here + is just a default. You may need to override it in your machine- + specific tm.h file (depending upon the particulars of your assembler). */ + +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ +avr_asm_declare_function_name ((FILE), (NAME), (DECL)) + +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do { \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ + } while (0) + +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ +do { \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, \ + int_size_in_bytes (TREE_TYPE (DECL))); \ + } \ + ASM_OUTPUT_LABEL(FILE, NAME); \ +} while (0) + +#undef ASM_FINISH_DECLARE_OBJECT +#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END) \ +do { \ + const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + HOST_WIDE_INT size; \ + if (!flag_inhibit_size_directive && DECL_SIZE (DECL) \ + && ! AT_END && TOP_LEVEL \ + && DECL_INITIAL (DECL) == error_mark_node \ + && !size_directive_output) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ + } \ + } while (0) + + +#define ESCAPES \ +"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" +/* A table of bytes codes used by the ASM_OUTPUT_ASCII and + ASM_OUTPUT_LIMITED_STRING macros. Each byte in the table + corresponds to a particular byte value [0..255]. For any + given byte value, if the value in the corresponding table + position is zero, the given character can be output directly. + If the table value is 1, the byte must be output as a \ooo + octal escape. If the tables value is anything else, then the + byte value should be output as a \ followed by the value + in the table. Note that we can use standard UN*X escape + sequences for many control characters, but we don't use + \a to represent BEL because some svr4 assemblers (e.g. on + the i386) don't know about that. Also, we don't use \v + since some versions of gas, such as 2.2 did not accept it. */ + +#define STRING_LIMIT ((unsigned) 64) +#define STRING_ASM_OP "\t.string\t" +/* Some svr4 assemblers have a limit on the number of characters which + can appear in the operand of a .string directive. If your assembler + has such a limitation, you should define STRING_LIMIT to reflect that + limit. Note that at least some svr4 assemblers have a limit on the + actual number of bytes in the double-quoted string, and that they + count each character in an escape sequence as one byte. Thus, an + escape sequence like \377 would count as four bytes. + + If your target assembler doesn't support the .string directive, you + should define this to zero. */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP ".global\t" + +#define SET_ASM_OP "\t.set\t" + +#define ASM_WEAKEN_LABEL(FILE, NAME) \ + do \ + { \ + fputs ("\t.weak\t", (FILE)); \ + assemble_name ((FILE), (NAME)); \ + fputc ('\n', (FILE)); \ + } \ + while (0) + +#define SUPPORTS_WEAK 1 + +#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \ +sprintf (STRING, "*.%s%lu", PREFIX, (unsigned long)(NUM)) + +#define HAS_INIT_SECTION 1 + +#define REGISTER_NAMES { \ + "r0","r1","r2","r3","r4","r5","r6","r7", \ + "r8","r9","r10","r11","r12","r13","r14","r15", \ + "r16","r17","r18","r19","r20","r21","r22","r23", \ + "r24","r25","r26","r27","r28","r29","r30","r31", \ + "__SP_L__","__SP_H__","argL","argH"} + +#define FINAL_PRESCAN_INSN(insn, operand, nop) final_prescan_insn (insn, operand,nop) + +#define PRINT_OPERAND(STREAM, X, CODE) print_operand (STREAM, X, CODE) + +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) ((CODE) == '~' || (CODE) == '!') + +#define PRINT_OPERAND_ADDRESS(STREAM, X) print_operand_address(STREAM, X) + +#define USER_LABEL_PREFIX "" + +#define ASSEMBLER_DIALECT AVR_HAVE_MOVW + +#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \ +{ \ + gcc_assert (REGNO < 32); \ + fprintf (STREAM, "\tpush\tr%d", REGNO); \ +} + +#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \ +{ \ + gcc_assert (REGNO < 32); \ + fprintf (STREAM, "\tpop\tr%d", REGNO); \ +} + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + avr_output_addr_vec_elt(STREAM, VALUE) + +#define ASM_OUTPUT_CASE_LABEL(STREAM, PREFIX, NUM, TABLE) \ + (switch_to_section (progmem_section), \ + (*targetm.asm_out.internal_label) (STREAM, PREFIX, NUM)) + +#define ASM_OUTPUT_SKIP(STREAM, N) \ +fprintf (STREAM, "\t.skip %lu,0\n", (unsigned long)(N)) + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + do { \ + if ((POWER) > 1) \ + fprintf (STREAM, "\t.p2align\t%d\n", POWER); \ + } while (0) + +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + default_elf_asm_output_external (FILE, DECL, NAME) + +#define CASE_VECTOR_MODE HImode + +#undef WORD_REGISTER_OPERATIONS + +#define MOVE_MAX 4 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define Pmode HImode + +#define FUNCTION_MODE HImode + +#define DOLLARS_IN_IDENTIFIERS 0 + +#define NO_DOLLAR_IN_LABEL 1 + +#define TRAMPOLINE_SIZE 4 + +/* Store in cc_status the expressions + that the condition codes will describe + after execution of an instruction whose pattern is EXP. + Do not alter them if the instruction would not alter the cc's. */ + +#define NOTICE_UPDATE_CC(EXP, INSN) notice_update_cc(EXP, INSN) + +/* The add insns don't set overflow in a usable way. */ +#define CC_OVERFLOW_UNUSABLE 01000 +/* The mov,and,or,xor insns don't set carry. That's ok though as the + Z bit is all we need when doing unsigned comparisons on the result of + these insns (since they're always with 0). However, conditions.h has + CC_NO_OVERFLOW defined for this purpose. Rename it to something more + understandable. */ +#define CC_NO_CARRY CC_NO_OVERFLOW + + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + fprintf (FILE, "/* profiler %d */", (LABELNO)) + +#define ADJUST_INSN_LENGTH(INSN, LENGTH) (LENGTH =\ + adjust_insn_length (INSN, LENGTH)) + +extern const char *avr_device_to_arch (int argc, const char **argv); +extern const char *avr_device_to_data_start (int argc, const char **argv); +extern const char *avr_device_to_startfiles (int argc, const char **argv); +extern const char *avr_device_to_devicelib (int argc, const char **argv); + +#define EXTRA_SPEC_FUNCTIONS \ + { "device_to_arch", avr_device_to_arch }, \ + { "device_to_data_start", avr_device_to_data_start }, \ + { "device_to_startfile", avr_device_to_startfiles }, \ + { "device_to_devicelib", avr_device_to_devicelib }, + +#define CPP_SPEC "" + +#define CC1_SPEC "" + +#define CC1PLUS_SPEC "%{!frtti:-fno-rtti} \ + %{!fenforce-eh-specs:-fno-enforce-eh-specs} \ + %{!fexceptions:-fno-exceptions}" +/* A C string constant that tells the GCC driver program options to + pass to `cc1plus'. */ + +#define ASM_SPEC "%{mmcu=avr25:-mmcu=avr2;mmcu=avr35:-mmcu=avr3;mmcu=avr31:-mmcu=avr3;mmcu=avr51:-mmcu=avr5;\ +mmcu=*:-mmcu=%*}" + +#define LINK_SPEC "\ +%{mrelax:--relax\ + %{mpmem-wrap-around:%{mmcu=at90usb8*:--pmem-wrap-around=8k}\ + %{mmcu=atmega16*:--pmem-wrap-around=16k}\ + %{mmcu=atmega32*|\ + mmcu=at90can32*:--pmem-wrap-around=32k}\ + %{mmcu=atmega64*|\ + mmcu=at90can64*|\ + mmcu=at90usb64*:--pmem-wrap-around=64k}}}\ +%:device_to_arch(%{mmcu=*:%*})\ +%:device_to_data_start(%{mmcu=*:%*})" + +#define LIB_SPEC \ + "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lc }}}}}" + +#define LIBSTDCXX "gcc" +/* No libstdc++ for now. Empty string doesn't work. */ + +#define LIBGCC_SPEC \ + "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lgcc }}}}}" + +#define STARTFILE_SPEC "%:device_to_startfile(%{mmcu=*:%*})" + +#define ENDFILE_SPEC "" + +/* This is the default without any -mmcu=* option (AT90S*). */ +#define MULTILIB_DEFAULTS { "mmcu=avr2" } + +#define TEST_HARD_REG_CLASS(CLASS, REGNO) \ + TEST_HARD_REG_BIT (reg_class_contents[ (int) (CLASS)], REGNO) + +/* Note that the other files fail to use these + in some of the places where they should. */ + +#if defined(__STDC__) || defined(ALMOST_STDC) +#define AS2(a,b,c) #a " " #b "," #c +#define AS2C(b,c) " " #b "," #c +#define AS3(a,b,c,d) #a " " #b "," #c "," #d +#define AS1(a,b) #a " " #b +#else +#define AS1(a,b) "a b" +#define AS2(a,b,c) "a b,c" +#define AS2C(b,c) " b,c" +#define AS3(a,b,c,d) "a b,c,d" +#endif +#define OUT_AS1(a,b) output_asm_insn (AS1(a,b), operands) +#define OUT_AS2(a,b,c) output_asm_insn (AS2(a,b,c), operands) +#define CR_TAB "\n\t" + +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + +#define DWARF2_DEBUGGING_INFO 1 + +#define DWARF2_ADDR_SIZE 4 + +#define OBJECT_FORMAT_ELF + +#define INCOMING_RETURN_ADDR_RTX avr_incoming_return_addr_rtx () +#define INCOMING_FRAME_SP_OFFSET (AVR_3_BYTE_PC ? 3 : 2) + +/* The caller's stack pointer value immediately before the call + is one byte below the first argument. */ +#define ARG_POINTER_CFA_OFFSET(FNDECL) -1 + +#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ + avr_hard_regno_rename_ok (OLD_REG, NEW_REG) + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +struct GTY(()) machine_function +{ + /* 'true' - if current function is a naked function. */ + int is_naked; + + /* 'true' - if current function is an interrupt function + as specified by the "interrupt" attribute. */ + int is_interrupt; + + /* 'true' - if current function is a signal function + as specified by the "signal" attribute. */ + int is_signal; + + /* 'true' - if current function is a 'task' function + as specified by the "OS_task" attribute. */ + int is_OS_task; + + /* 'true' - if current function is a 'main' function + as specified by the "OS_main" attribute. */ + int is_OS_main; + + /* Current function stack size. */ + int stack_usage; +}; diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md new file mode 100644 index 000000000..1fc6fee57 --- /dev/null +++ b/gcc/config/avr/avr.md @@ -0,0 +1,3248 @@ +;; Machine description for GNU compiler, +;; for ATMEL AVR micro controllers. +;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, +;; 2009, 2010 Free Software Foundation, Inc. +;; Contributed by Denis Chertykov (chertykov@gmail.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Special characters after '%': +;; A No effect (add 0). +;; B Add 1 to REG number, MEM address or CONST_INT. +;; C Add 2. +;; D Add 3. +;; j Branch condition. +;; k Reverse branch condition. +;;..m..Constant Direct Data memory address. +;; o Displacement for (mem (plus (reg) (const_int))) operands. +;; p POST_INC or PRE_DEC address as a pointer (X, Y, Z) +;; r POST_INC or PRE_DEC address as a register (r26, r28, r30) +;;..x..Constant Direct Program memory address. +;; ~ Output 'r' if not AVR_HAVE_JMP_CALL. +;; ! Output 'e' if AVR_HAVE_EIJMP_EICALL. + +;; UNSPEC usage: +;; 0 Length of a string, see "strlenhi". +;; 1 Jump by register pair Z or by table addressed by Z, see "casesi". + +(define_constants + [(REG_X 26) + (REG_Y 28) + (REG_Z 30) + (REG_W 24) + (REG_SP 32) + (TMP_REGNO 0) ; temporary register r0 + (ZERO_REGNO 1) ; zero register r1 + + (SREG_ADDR 0x5F) + (RAMPZ_ADDR 0x5B) + + (UNSPEC_STRLEN 0) + (UNSPEC_INDEX_JMP 1) + (UNSPEC_SEI 2) + (UNSPEC_CLI 3) + + (UNSPECV_PROLOGUE_SAVES 0) + (UNSPECV_EPILOGUE_RESTORES 1) + (UNSPECV_WRITE_SP_IRQ_ON 2) + (UNSPECV_WRITE_SP_IRQ_OFF 3) + (UNSPECV_GOTO_RECEIVER 4)]) + +(include "predicates.md") +(include "constraints.md") + +;; Condition code settings. +(define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber" + (const_string "none")) + +(define_attr "type" "branch,branch1,arith,xcall" + (const_string "arith")) + +(define_attr "mcu_have_movw" "yes,no" + (const (if_then_else (symbol_ref "AVR_HAVE_MOVW") + (const_string "yes") + (const_string "no")))) + +(define_attr "mcu_mega" "yes,no" + (const (if_then_else (symbol_ref "AVR_HAVE_JMP_CALL") + (const_string "yes") + (const_string "no")))) + + +;; The size of instructions in bytes. +;; XXX may depend from "cc" + +(define_attr "length" "" + (cond [(eq_attr "type" "branch") + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -63)) + (le (minus (pc) (match_dup 0)) + (const_int 62))) + (const_int 1) + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -2045)) + (le (minus (pc) (match_dup 0)) + (const_int 2045))) + (const_int 2) + (const_int 3))) + (eq_attr "type" "branch1") + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -62)) + (le (minus (pc) (match_dup 0)) + (const_int 61))) + (const_int 2) + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -2044)) + (le (minus (pc) (match_dup 0)) + (const_int 2043))) + (const_int 3) + (const_int 4))) + (eq_attr "type" "xcall") + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 1) + (const_int 2))] + (const_int 2))) + +;; Define mode iterator +(define_mode_iterator QISI [(QI "") (HI "") (SI "")]) +(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")]) +(define_mode_iterator HIDI [(HI "") (SI "") (DI "")]) +(define_mode_iterator HISI [(HI "") (SI "")]) + +;;======================================================================== +;; The following is used by nonlocal_goto and setjmp. +;; The receiver pattern will create no instructions since internally +;; virtual_stack_vars = hard_frame_pointer + 1 so the RTL become R28=R28 +;; This avoids creating add/sub offsets in frame_pointer save/resore. +;; The 'null' receiver also avoids problems with optimisation +;; not recognising incoming jmp and removing code that resets frame_pointer. +;; The code derived from builtins.c. + +(define_expand "nonlocal_goto_receiver" + [(set (reg:HI REG_Y) + (unspec_volatile:HI [(const_int 0)] UNSPECV_GOTO_RECEIVER))] + "" + { + emit_move_insn (virtual_stack_vars_rtx, + gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, + gen_int_mode (STARTING_FRAME_OFFSET, + Pmode))); + /* This might change the hard frame pointer in ways that aren't + apparent to early optimization passes, so force a clobber. */ + emit_clobber (hard_frame_pointer_rtx); + DONE; + }) + + +;; Defining nonlocal_goto_receiver means we must also define this. +;; even though its function is identical to that in builtins.c + +(define_expand "nonlocal_goto" + [ + (use (match_operand 0 "general_operand")) + (use (match_operand 1 "general_operand")) + (use (match_operand 2 "general_operand")) + (use (match_operand 3 "general_operand")) + ] + "" +{ + rtx r_label = copy_to_reg (operands[1]); + rtx r_fp = operands[3]; + rtx r_sp = operands[2]; + + emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); + + emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); + + emit_move_insn (hard_frame_pointer_rtx, r_fp); + emit_stack_restore (SAVE_NONLOCAL, r_sp); + + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + emit_indirect_jump (r_label); + + DONE; +}) + + +(define_insn "*pushqi" + [(set (mem:QI (post_dec:HI (reg:HI REG_SP))) + (match_operand:QI 0 "reg_or_0_operand" "r,L"))] + "" + "@ + push %0 + push __zero_reg__" + [(set_attr "length" "1,1")]) + +(define_insn "*pushhi" + [(set (mem:HI (post_dec:HI (reg:HI REG_SP))) + (match_operand:HI 0 "reg_or_0_operand" "r,L"))] + "" + "@ + push %B0\;push %A0 + push __zero_reg__\;push __zero_reg__" + [(set_attr "length" "2,2")]) + +(define_insn "*pushsi" + [(set (mem:SI (post_dec:HI (reg:HI REG_SP))) + (match_operand:SI 0 "reg_or_0_operand" "r,L"))] + "" + "@ + push %D0\;push %C0\;push %B0\;push %A0 + push __zero_reg__\;push __zero_reg__\;push __zero_reg__\;push __zero_reg__" + [(set_attr "length" "4,4")]) + +(define_insn "*pushsf" + [(set (mem:SF (post_dec:HI (reg:HI REG_SP))) + (match_operand:SF 0 "register_operand" "r"))] + "" + "push %D0 + push %C0 + push %B0 + push %A0" + [(set_attr "length" "4")]) + +;;======================================================================== +;; move byte +;; The last alternative (any immediate constant to any register) is +;; very expensive. It should be optimized by peephole2 if a scratch +;; register is available, but then that register could just as well be +;; allocated for the variable we are loading. But, most of NO_LD_REGS +;; are call-saved registers, and most of LD_REGS are call-used registers, +;; so this may still be a win for registers live across function calls. + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "/* One of the ops has to be in a register. */ + if (!register_operand(operand0, QImode) + && ! (register_operand(operand1, QImode) || const0_rtx == operand1)) + operands[1] = copy_to_mode_reg(QImode, operand1); + ") + +(define_insn "*movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,d,Qm,r,q,r,*r") + (match_operand:QI 1 "general_operand" "rL,i,rL,Qm,r,q,i"))] + "(register_operand (operands[0],QImode) + || register_operand (operands[1], QImode) || const0_rtx == operands[1])" + "* return output_movqi (insn, operands, NULL);" + [(set_attr "length" "1,1,5,5,1,1,4") + (set_attr "cc" "none,none,clobber,clobber,none,none,clobber")]) + +;; This is used in peephole2 to optimize loading immediate constants +;; if a scratch register from LD_REGS happens to be available. + +(define_insn "*reload_inqi" + [(set (match_operand:QI 0 "register_operand" "=l") + (match_operand:QI 1 "immediate_operand" "i")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + "ldi %2,lo8(%1) + mov %0,%2" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +(define_peephole2 + [(match_scratch:QI 2 "d") + (set (match_operand:QI 0 "l_register_operand" "") + (match_operand:QI 1 "immediate_operand" ""))] + "(operands[1] != const0_rtx + && operands[1] != const1_rtx + && operands[1] != constm1_rtx)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2))])] + "") + +;;============================================================================ +;; move word (16 bit) + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + /* One of the ops has to be in a register. */ + if (!register_operand(operand0, HImode) + && !(register_operand(operand1, HImode) || const0_rtx == operands[1])) + { + operands[1] = copy_to_mode_reg(HImode, operand1); + } +}") + +(define_insn "*movhi_sp" + [(set (match_operand:HI 0 "register_operand" "=q,r") + (match_operand:HI 1 "register_operand" "r,q"))] + "((stack_register_operand(operands[0], HImode) && register_operand (operands[1], HImode)) + || (register_operand (operands[0], HImode) && stack_register_operand(operands[1], HImode)))" + "* return output_movhi (insn, operands, NULL);" + [(set_attr "length" "5,2") + (set_attr "cc" "none,none")]) + +(define_insn "movhi_sp_r_irq_off" + [(set (match_operand:HI 0 "stack_register_operand" "=q") + (unspec_volatile:HI [(match_operand:HI 1 "register_operand" "r")] + UNSPECV_WRITE_SP_IRQ_OFF))] + "" + "out __SP_H__, %B1 + out __SP_L__, %A1" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +(define_insn "movhi_sp_r_irq_on" + [(set (match_operand:HI 0 "stack_register_operand" "=q") + (unspec_volatile:HI [(match_operand:HI 1 "register_operand" "r")] + UNSPECV_WRITE_SP_IRQ_ON))] + "" + "cli + out __SP_H__, %B1 + sei + out __SP_L__, %A1" + [(set_attr "length" "4") + (set_attr "cc" "none")]) + +(define_peephole2 + [(match_scratch:QI 2 "d") + (set (match_operand:HI 0 "l_register_operand" "") + (match_operand:HI 1 "immediate_operand" ""))] + "(operands[1] != const0_rtx + && operands[1] != constm1_rtx)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2))])] + "") + +;; '*' because it is not used in rtl generation, only in above peephole +(define_insn "*reload_inhi" + [(set (match_operand:HI 0 "register_operand" "=r") + (match_operand:HI 1 "immediate_operand" "i")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + "* return output_reload_inhi (insn, operands, NULL);" + [(set_attr "length" "4") + (set_attr "cc" "none")]) + +(define_insn "*movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,d,*r,q,r") + (match_operand:HI 1 "general_operand" "rL,m,rL,i,i,r,q"))] + "(register_operand (operands[0],HImode) + || register_operand (operands[1],HImode) || const0_rtx == operands[1])" + "* return output_movhi (insn, operands, NULL);" + [(set_attr "length" "2,6,7,2,6,5,2") + (set_attr "cc" "none,clobber,clobber,none,clobber,none,none")]) + +(define_peephole2 ; movw + [(set (match_operand:QI 0 "even_register_operand" "") + (match_operand:QI 1 "even_register_operand" "")) + (set (match_operand:QI 2 "odd_register_operand" "") + (match_operand:QI 3 "odd_register_operand" ""))] + "(AVR_HAVE_MOVW + && REGNO (operands[0]) == REGNO (operands[2]) - 1 + && REGNO (operands[1]) == REGNO (operands[3]) - 1)" + [(set (match_dup 4) (match_dup 5))] + { + operands[4] = gen_rtx_REG (HImode, REGNO (operands[0])); + operands[5] = gen_rtx_REG (HImode, REGNO (operands[1])); + }) + +(define_peephole2 ; movw_r + [(set (match_operand:QI 0 "odd_register_operand" "") + (match_operand:QI 1 "odd_register_operand" "")) + (set (match_operand:QI 2 "even_register_operand" "") + (match_operand:QI 3 "even_register_operand" ""))] + "(AVR_HAVE_MOVW + && REGNO (operands[2]) == REGNO (operands[0]) - 1 + && REGNO (operands[3]) == REGNO (operands[1]) - 1)" + [(set (match_dup 4) (match_dup 5))] + { + operands[4] = gen_rtx_REG (HImode, REGNO (operands[2])); + operands[5] = gen_rtx_REG (HImode, REGNO (operands[3])); + }) + +;;========================================================================== +;; move double word (32 bit) + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + " +{ + /* One of the ops has to be in a register. */ + if (!register_operand (operand0, SImode) + && !(register_operand (operand1, SImode) || const0_rtx == operand1)) + { + operands[1] = copy_to_mode_reg (SImode, operand1); + } +}") + + + +(define_peephole2 ; movsi_lreg_const + [(match_scratch:QI 2 "d") + (set (match_operand:SI 0 "l_register_operand" "") + (match_operand:SI 1 "immediate_operand" "")) + (match_dup 2)] + "(operands[1] != const0_rtx + && operands[1] != constm1_rtx)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (match_dup 2))])] + "") + +;; '*' because it is not used in rtl generation. +(define_insn "*reload_insi" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "immediate_operand" "i")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + "* return output_reload_insisf (insn, operands, NULL);" + [(set_attr "length" "8") + (set_attr "cc" "none")]) + + +(define_insn "*movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,Qm,!d,r") + (match_operand:SI 1 "general_operand" "r,L,Qm,rL,i,i"))] + "(register_operand (operands[0],SImode) + || register_operand (operands[1],SImode) || const0_rtx == operands[1])" + "* return output_movsisf (insn, operands, NULL);" + [(set_attr "length" "4,4,8,9,4,10") + (set_attr "cc" "none,set_zn,clobber,clobber,none,clobber")]) + +;; fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +;; move floating point numbers (32 bit) + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + " +{ + /* One of the ops has to be in a register. */ + if (!register_operand (operand1, SFmode) + && !register_operand (operand0, SFmode)) + { + operands[1] = copy_to_mode_reg (SFmode, operand1); + } +}") + +(define_insn "*movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,Qm,!d,r") + (match_operand:SF 1 "general_operand" "r,G,Qm,r,F,F"))] + "register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode)" + "* return output_movsisf (insn, operands, NULL);" + [(set_attr "length" "4,4,8,9,4,10") + (set_attr "cc" "none,set_zn,clobber,clobber,none,clobber")]) + +;;========================================================================= +;; move string (like memcpy) +;; implement as RTL loop + +(define_expand "movmemhi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:HI 2 "const_int_operand" "")) + (use (match_operand:HI 3 "const_int_operand" ""))])] + "" + "{ + int prob; + HOST_WIDE_INT count; + enum machine_mode mode; + rtx label = gen_label_rtx (); + rtx loop_reg; + rtx jump; + + /* Copy pointers into new psuedos - they will be changed. */ + rtx addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + rtx addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + + /* Create rtx for tmp register - we use this as scratch. */ + rtx tmp_reg_rtx = gen_rtx_REG (QImode, TMP_REGNO); + + if (GET_CODE (operands[2]) != CONST_INT) + FAIL; + + count = INTVAL (operands[2]); + if (count <= 0) + FAIL; + + /* Work out branch probability for latter use. */ + prob = REG_BR_PROB_BASE - REG_BR_PROB_BASE / count; + + /* See if constant fit 8 bits. */ + mode = (count < 0x100) ? QImode : HImode; + /* Create loop counter register. */ + loop_reg = copy_to_mode_reg (mode, gen_int_mode (count, mode)); + + /* Now create RTL code for move loop. */ + /* Label at top of loop. */ + emit_label (label); + + /* Move one byte into scratch and inc pointer. */ + emit_move_insn (tmp_reg_rtx, gen_rtx_MEM (QImode, addr1)); + emit_move_insn (addr1, gen_rtx_PLUS (Pmode, addr1, const1_rtx)); + + /* Move to mem and inc pointer. */ + emit_move_insn (gen_rtx_MEM (QImode, addr0), tmp_reg_rtx); + emit_move_insn (addr0, gen_rtx_PLUS (Pmode, addr0, const1_rtx)); + + /* Decrement count. */ + emit_move_insn (loop_reg, gen_rtx_PLUS (mode, loop_reg, constm1_rtx)); + + /* Compare with zero and jump if not equal. */ + emit_cmp_and_jump_insns (loop_reg, const0_rtx, NE, NULL_RTX, mode, 1, + label); + /* Set jump probability based on loop count. */ + jump = get_last_insn (); + add_reg_note (jump, REG_BR_PROB, GEN_INT (prob)); + DONE; +}") + +;; =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 +;; memset (%0, %2, %1) + +(define_expand "setmemhi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand 2 "const_int_operand" "")) + (use (match_operand:HI 1 "const_int_operand" "")) + (use (match_operand:HI 3 "const_int_operand" "n")) + (clobber (match_scratch:HI 4 "")) + (clobber (match_dup 5))])] + "" + "{ + rtx addr0; + int cnt8; + enum machine_mode mode; + + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (GET_CODE (operands[1]) != CONST_INT) + FAIL; + + cnt8 = byte_immediate_operand (operands[1], GET_MODE (operands[1])); + mode = cnt8 ? QImode : HImode; + operands[5] = gen_rtx_SCRATCH (mode); + operands[1] = copy_to_mode_reg (mode, + gen_int_mode (INTVAL (operands[1]), mode)); + addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + operands[0] = gen_rtx_MEM (BLKmode, addr0); +}") + +(define_insn "*clrmemqi" + [(set (mem:BLK (match_operand:HI 0 "register_operand" "e")) + (const_int 0)) + (use (match_operand:QI 1 "register_operand" "r")) + (use (match_operand:QI 2 "const_int_operand" "n")) + (clobber (match_scratch:HI 3 "=0")) + (clobber (match_scratch:QI 4 "=&1"))] + "" + "st %a0+,__zero_reg__ + dec %1 + brne .-6" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "*clrmemhi" + [(set (mem:BLK (match_operand:HI 0 "register_operand" "e,e")) + (const_int 0)) + (use (match_operand:HI 1 "register_operand" "!w,d")) + (use (match_operand:HI 2 "const_int_operand" "n,n")) + (clobber (match_scratch:HI 3 "=0,0")) + (clobber (match_scratch:HI 4 "=&1,&1"))] + "" + "*{ + if (which_alternative==0) + return (AS2 (st,%a0+,__zero_reg__) CR_TAB + AS2 (sbiw,%A1,1) CR_TAB + AS1 (brne,.-6)); + else + return (AS2 (st,%a0+,__zero_reg__) CR_TAB + AS2 (subi,%A1,1) CR_TAB + AS2 (sbci,%B1,0) CR_TAB + AS1 (brne,.-8)); +}" + [(set_attr "length" "3,4") + (set_attr "cc" "clobber,clobber")]) + +(define_expand "strlenhi" + [(set (match_dup 4) + (unspec:HI [(match_operand:BLK 1 "memory_operand" "") + (match_operand:QI 2 "const_int_operand" "") + (match_operand:HI 3 "immediate_operand" "")] + UNSPEC_STRLEN)) + (set (match_dup 4) (plus:HI (match_dup 4) + (const_int -1))) + (set (match_operand:HI 0 "register_operand" "") + (minus:HI (match_dup 4) + (match_dup 5)))] + "" + "{ + rtx addr; + if (! (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 0)) + FAIL; + addr = copy_to_mode_reg (Pmode, XEXP (operands[1],0)); + operands[1] = gen_rtx_MEM (BLKmode, addr); + operands[5] = addr; + operands[4] = gen_reg_rtx (HImode); +}") + +(define_insn "*strlenhi" + [(set (match_operand:HI 0 "register_operand" "=e") + (unspec:HI [(mem:BLK (match_operand:HI 1 "register_operand" "%0")) + (const_int 0) + (match_operand:HI 2 "immediate_operand" "i")] + UNSPEC_STRLEN))] + "" + "ld __tmp_reg__,%a0+ + tst __tmp_reg__ + brne .-6" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +; add bytes + +(define_insn "addqi3" + [(set (match_operand:QI 0 "register_operand" "=r,d,r,r") + (plus:QI (match_operand:QI 1 "register_operand" "%0,0,0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i,P,N")))] + "" + "@ + add %0,%2 + subi %0,lo8(-(%2)) + inc %0 + dec %0" + [(set_attr "length" "1,1,1,1") + (set_attr "cc" "set_czn,set_czn,set_zn,set_zn")]) + + +(define_expand "addhi3" + [(set (match_operand:HI 0 "register_operand" "") + (plus:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "nonmemory_operand" "")))] + "" + " +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + short tmp = INTVAL (operands[2]); + operands[2] = GEN_INT(tmp); + } +}") + + +(define_insn "*addhi3_zero_extend" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (zero_extend:HI + (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "register_operand" "0")))] + "" + "add %A0,%1 + adc %B0,__zero_reg__" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "*addhi3_zero_extend1" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (match_operand:HI 1 "register_operand" "%0") + (zero_extend:HI + (match_operand:QI 2 "register_operand" "r"))))] + "" + "add %A0,%2 + adc %B0,__zero_reg__" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "*addhi3_sp_R_pc2" + [(set (match_operand:HI 1 "stack_register_operand" "=q") + (plus:HI (match_operand:HI 2 "stack_register_operand" "q") + (match_operand:HI 0 "avr_sp_immediate_operand" "R")))] + "AVR_2_BYTE_PC" + "*{ + if (CONST_INT_P (operands[0])) + { + switch(INTVAL (operands[0])) + { + case -6: + return \"rcall .\" CR_TAB + \"rcall .\" CR_TAB + \"rcall .\"; + case -5: + return \"rcall .\" CR_TAB + \"rcall .\" CR_TAB + \"push __tmp_reg__\"; + case -4: + return \"rcall .\" CR_TAB + \"rcall .\"; + case -3: + return \"rcall .\" CR_TAB + \"push __tmp_reg__\"; + case -2: + return \"rcall .\"; + case -1: + return \"push __tmp_reg__\"; + case 0: + return \"\"; + case 1: + return \"pop __tmp_reg__\"; + case 2: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + case 3: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + case 4: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + case 5: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + } + } + return \"bug\"; + }" + [(set (attr "length") + (cond [(eq (const_int -6) (symbol_ref "INTVAL (operands[0])")) (const_int 3) + (eq (const_int -5) (symbol_ref "INTVAL (operands[0])")) (const_int 3) + (eq (const_int -4) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int -3) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int -2) (symbol_ref "INTVAL (operands[0])")) (const_int 1) + (eq (const_int -1) (symbol_ref "INTVAL (operands[0])")) (const_int 1) + (eq (const_int 0) (symbol_ref "INTVAL (operands[0])")) (const_int 0) + (eq (const_int 1) (symbol_ref "INTVAL (operands[0])")) (const_int 1) + (eq (const_int 2) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int 3) (symbol_ref "INTVAL (operands[0])")) (const_int 3) + (eq (const_int 4) (symbol_ref "INTVAL (operands[0])")) (const_int 4) + (eq (const_int 5) (symbol_ref "INTVAL (operands[0])")) (const_int 5)] + (const_int 0)))]) + +(define_insn "*addhi3_sp_R_pc3" + [(set (match_operand:HI 1 "stack_register_operand" "=q") + (plus:HI (match_operand:HI 2 "stack_register_operand" "q") + (match_operand:QI 0 "avr_sp_immediate_operand" "R")))] + "AVR_3_BYTE_PC" + "*{ + if (CONST_INT_P (operands[0])) + { + switch(INTVAL (operands[0])) + { + case -6: + return \"rcall .\" CR_TAB + \"rcall .\"; + case -5: + return \"rcall .\" CR_TAB + \"push __tmp_reg__\" CR_TAB + \"push __tmp_reg__\"; + case -4: + return \"rcall .\" CR_TAB + \"push __tmp_reg__\"; + case -3: + return \"rcall .\"; + case -2: + return \"push __tmp_reg__\" CR_TAB + \"push __tmp_reg__\"; + case -1: + return \"push __tmp_reg__\"; + case 0: + return \"\"; + case 1: + return \"pop __tmp_reg__\"; + case 2: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + case 3: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + case 4: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + case 5: + return \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\" CR_TAB + \"pop __tmp_reg__\"; + } + } + return \"bug\"; + }" + [(set (attr "length") + (cond [(eq (const_int -6) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int -5) (symbol_ref "INTVAL (operands[0])")) (const_int 3) + (eq (const_int -4) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int -3) (symbol_ref "INTVAL (operands[0])")) (const_int 1) + (eq (const_int -2) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int -1) (symbol_ref "INTVAL (operands[0])")) (const_int 1) + (eq (const_int 0) (symbol_ref "INTVAL (operands[0])")) (const_int 0) + (eq (const_int 1) (symbol_ref "INTVAL (operands[0])")) (const_int 1) + (eq (const_int 2) (symbol_ref "INTVAL (operands[0])")) (const_int 2) + (eq (const_int 3) (symbol_ref "INTVAL (operands[0])")) (const_int 3) + (eq (const_int 4) (symbol_ref "INTVAL (operands[0])")) (const_int 4) + (eq (const_int 5) (symbol_ref "INTVAL (operands[0])")) (const_int 5)] + (const_int 0)))]) + +(define_insn "*addhi3" + [(set (match_operand:HI 0 "register_operand" "=r,!w,!w,d,r,r") + (plus:HI + (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:HI 2 "nonmemory_operand" "r,I,J,i,P,N")))] + "" + "@ + add %A0,%A2\;adc %B0,%B2 + adiw %A0,%2 + sbiw %A0,%n2 + subi %A0,lo8(-(%2))\;sbci %B0,hi8(-(%2)) + sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__ + sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__" + [(set_attr "length" "2,1,1,2,3,3") + (set_attr "cc" "set_n,set_czn,set_czn,set_czn,set_n,set_n")]) + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,!w,!w,d,r,r") + (plus:SI + (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,J,i,P,N")))] + "" + "@ + add %A0,%A2\;adc %B0,%B2\;adc %C0,%C2\;adc %D0,%D2 + adiw %0,%2\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__ + sbiw %0,%n2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__ + subi %0,lo8(-(%2))\;sbci %B0,hi8(-(%2))\;sbci %C0,hlo8(-(%2))\;sbci %D0,hhi8(-(%2)) + sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__ + sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" + [(set_attr "length" "4,3,3,4,5,5") + (set_attr "cc" "set_n,set_n,set_czn,set_czn,set_n,set_n")]) + +(define_insn "*addsi3_zero_extend" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (zero_extend:SI + (match_operand:QI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "0")))] + "" + "add %A0,%1 + adc %B0,__zero_reg__ + adc %C0,__zero_reg__ + adc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +;----------------------------------------------------------------------------- +; sub bytes +(define_insn "subqi3" + [(set (match_operand:QI 0 "register_operand" "=r,d") + (minus:QI (match_operand:QI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i")))] + "" + "@ + sub %0,%2 + subi %0,lo8(%2)" + [(set_attr "length" "1,1") + (set_attr "cc" "set_czn,set_czn")]) + +(define_insn "subhi3" + [(set (match_operand:HI 0 "register_operand" "=r,d") + (minus:HI (match_operand:HI 1 "register_operand" "0,0") + (match_operand:HI 2 "nonmemory_operand" "r,i")))] + "" + "@ + sub %A0,%A2\;sbc %B0,%B2 + subi %A0,lo8(%2)\;sbci %B0,hi8(%2)" + [(set_attr "length" "2,2") + (set_attr "cc" "set_czn,set_czn")]) + +(define_insn "*subhi3_zero_extend1" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI + (match_operand:QI 2 "register_operand" "r"))))] + "" + "sub %A0,%2 + sbc %B0,__zero_reg__" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r,d") + (minus:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "r,i")))] + "" + "@ + sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2 + subi %A0,lo8(%2)\;sbci %B0,hi8(%2)\;sbci %C0,hlo8(%2)\;sbci %D0,hhi8(%2)" + [(set_attr "length" "4,4") + (set_attr "cc" "set_czn,set_czn")]) + +(define_insn "*subsi3_zero_extend" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))))] + "" + "sub %A0,%2 + sbc %B0,__zero_reg__ + sbc %C0,__zero_reg__ + sbc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +;****************************************************************************** +; mul + +(define_expand "mulqi3" + [(set (match_operand:QI 0 "register_operand" "") + (mult:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "register_operand" "")))] + "" + "{ + if (!AVR_HAVE_MUL) + { + emit_insn (gen_mulqi3_call (operands[0], operands[1], operands[2])); + DONE; + } +}") + +(define_insn "*mulqi3_enh" + [(set (match_operand:QI 0 "register_operand" "=r") + (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%2 + mov %0,r0 + clr r1" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_expand "mulqi3_call" + [(set (reg:QI 24) (match_operand:QI 1 "register_operand" "")) + (set (reg:QI 22) (match_operand:QI 2 "register_operand" "")) + (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22))]) + (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))] + "" + "") + +(define_insn "*mulqi3_call" + [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22))] + "!AVR_HAVE_MUL" + "%~call __mulqi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "mulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "d"))))] + "AVR_HAVE_MUL" + "muls %1,%2 + movw %0,r0 + clr r1" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "umulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %1,%2 + movw %0,r0 + clr r1" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_expand "mulhi3" + [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "register_operand" "")))] + "" + " +{ + if (!AVR_HAVE_MUL) + { + emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2])); + DONE; + } +}") + +(define_insn "*mulhi3_enh" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (match_operand:HI 1 "register_operand" "r") + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %A1,%A2 + movw %0,r0 + mul %A1,%B2 + add %B0,r0 + mul %B1,%A2 + add %B0,r0 + clr r1" + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_expand "mulhi3_call" + [(set (reg:HI 24) (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 22) (match_operand:HI 2 "register_operand" "")) + (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 22)) + (clobber (reg:QI 21))]) + (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))] + "" + "") + +(define_insn "*mulhi3_call" + [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 22)) + (clobber (reg:QI 21))] + "!AVR_HAVE_MUL" + "%~call __mulhi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Operand 2 (reg:SI 18) not clobbered on the enhanced core. +;; All call-used registers clobbered otherwise - normal library call. +(define_expand "mulsi3" + [(set (reg:SI 22) (match_operand:SI 1 "register_operand" "")) + (set (reg:SI 18) (match_operand:SI 2 "register_operand" "")) + (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))] + "AVR_HAVE_MUL" + "") + +(define_insn "*mulsi3_call" + [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "AVR_HAVE_MUL" + "%~call __mulsi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % +; divmod + +;; Generate libgcc.S calls ourselves, because: +;; - we know exactly which registers are clobbered (for QI and HI +;; modes, some of the call-used registers are preserved) +;; - we get both the quotient and the remainder at no extra cost +;; - we split the patterns only after the first CSE passes because +;; CSE has problems to operate on hard regs. +;; +(define_insn_and_split "divmodqi4" + [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "") + (div:QI (match_operand:QI 1 "pseudo_register_operand" "") + (match_operand:QI 2 "pseudo_register_operand" ""))) + (set (match_operand:QI 3 "pseudo_register_operand" "") + (mod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23)) + (clobber (reg:QI 24)) + (clobber (reg:QI 25))])] + "" + "this divmodqi4 pattern should have been splitted;" + "" + [(set (reg:QI 24) (match_dup 1)) + (set (reg:QI 22) (match_dup 2)) + (parallel [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23))]) + (set (match_dup 0) (reg:QI 24)) + (set (match_dup 3) (reg:QI 25))] + "") + +(define_insn "*divmodqi4_call" + [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23))] + "" + "%~call __divmodqi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodqi4" + [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "") + (udiv:QI (match_operand:QI 1 "pseudo_register_operand" "") + (match_operand:QI 2 "pseudo_register_operand" ""))) + (set (match_operand:QI 3 "pseudo_register_operand" "") + (umod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23)) + (clobber (reg:QI 24)) + (clobber (reg:QI 25))])] + "" + "this udivmodqi4 pattern should have been splitted;" + "" + [(set (reg:QI 24) (match_dup 1)) + (set (reg:QI 22) (match_dup 2)) + (parallel [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 23))]) + (set (match_dup 0) (reg:QI 24)) + (set (match_dup 3) (reg:QI 25))] + "") + +(define_insn "*udivmodqi4_call" + [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 23))] + "" + "%~call __udivmodqi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "divmodhi4" + [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "") + (div:HI (match_operand:HI 1 "pseudo_register_operand" "") + (match_operand:HI 2 "pseudo_register_operand" ""))) + (set (match_operand:HI 3 "pseudo_register_operand" "") + (mod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 21)) + (clobber (reg:HI 22)) + (clobber (reg:HI 24)) + (clobber (reg:HI 26))])] + "" + "this should have been splitted;" + "" + [(set (reg:HI 24) (match_dup 1)) + (set (reg:HI 22) (match_dup 2)) + (parallel [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))]) + (set (match_dup 0) (reg:HI 22)) + (set (match_dup 3) (reg:HI 24))] + "") + +(define_insn "*divmodhi4_call" + [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))] + "" + "%~call __divmodhi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodhi4" + [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "") + (udiv:HI (match_operand:HI 1 "pseudo_register_operand" "") + (match_operand:HI 2 "pseudo_register_operand" ""))) + (set (match_operand:HI 3 "pseudo_register_operand" "") + (umod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 21)) + (clobber (reg:HI 22)) + (clobber (reg:HI 24)) + (clobber (reg:HI 26))])] + "" + "this udivmodhi4 pattern should have been splitted.;" + "" + [(set (reg:HI 24) (match_dup 1)) + (set (reg:HI 22) (match_dup 2)) + (parallel [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))]) + (set (match_dup 0) (reg:HI 22)) + (set (match_dup 3) (reg:HI 24))] + "") + +(define_insn "*udivmodhi4_call" + [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))] + "" + "%~call __udivmodhi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "divmodsi4" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (div:SI (match_operand:SI 1 "pseudo_register_operand" "") + (match_operand:SI 2 "pseudo_register_operand" ""))) + (set (match_operand:SI 3 "pseudo_register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:SI 18)) + (clobber (reg:SI 22)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))])] + "" + "this divmodsi4 pattern should have been splitted;" + "" + [(set (reg:SI 22) (match_dup 1)) + (set (reg:SI 18) (match_dup 2)) + (parallel [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_dup 0) (reg:SI 18)) + (set (match_dup 3) (reg:SI 22))] + "") + +(define_insn "*divmodsi4_call" + [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "" + "%~call __divmodsi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodsi4" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (udiv:SI (match_operand:SI 1 "pseudo_register_operand" "") + (match_operand:SI 2 "pseudo_register_operand" ""))) + (set (match_operand:SI 3 "pseudo_register_operand" "") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:SI 18)) + (clobber (reg:SI 22)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))])] + "" + "this udivmodsi4 pattern should have been splitted;" + "" + [(set (reg:SI 22) (match_dup 1)) + (set (reg:SI 18) (match_dup 2)) + (parallel [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_dup 0) (reg:SI 18)) + (set (match_dup 3) (reg:SI 22))] + "") + +(define_insn "*udivmodsi4_call" + [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "" + "%~call __udivmodsi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& +; and + +(define_insn "andqi3" + [(set (match_operand:QI 0 "register_operand" "=r,d") + (and:QI (match_operand:QI 1 "register_operand" "%0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i")))] + "" + "@ + and %0,%2 + andi %0,lo8(%2)" + [(set_attr "length" "1,1") + (set_attr "cc" "set_zn,set_zn")]) + +(define_insn "andhi3" + [(set (match_operand:HI 0 "register_operand" "=r,d,r") + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0") + (match_operand:HI 2 "nonmemory_operand" "r,i,M"))) + (clobber (match_scratch:QI 3 "=X,X,&d"))] + "" +{ + if (which_alternative==0) + return ("and %A0,%A2" CR_TAB + "and %B0,%B2"); + else if (which_alternative==1) + { + if (GET_CODE (operands[2]) == CONST_INT) + { + int mask = INTVAL (operands[2]); + if ((mask & 0xff) != 0xff) + output_asm_insn (AS2 (andi,%A0,lo8(%2)), operands); + if ((mask & 0xff00) != 0xff00) + output_asm_insn (AS2 (andi,%B0,hi8(%2)), operands); + return ""; + } + return (AS2 (andi,%A0,lo8(%2)) CR_TAB + AS2 (andi,%B0,hi8(%2))); + } + return (AS2 (ldi,%3,lo8(%2)) CR_TAB + "and %A0,%3" CR_TAB + AS1 (clr,%B0)); +} + [(set_attr "length" "2,2,3") + (set_attr "cc" "set_n,clobber,set_n")]) + +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=r,d") + (and:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "nonmemory_operand" "r,i")))] + "" +{ + if (which_alternative==0) + return ("and %0,%2" CR_TAB + "and %B0,%B2" CR_TAB + "and %C0,%C2" CR_TAB + "and %D0,%D2"); + else if (which_alternative==1) + { + if (GET_CODE (operands[2]) == CONST_INT) + { + HOST_WIDE_INT mask = INTVAL (operands[2]); + if ((mask & 0xff) != 0xff) + output_asm_insn (AS2 (andi,%A0,lo8(%2)), operands); + if ((mask & 0xff00) != 0xff00) + output_asm_insn (AS2 (andi,%B0,hi8(%2)), operands); + if ((mask & 0xff0000L) != 0xff0000L) + output_asm_insn (AS2 (andi,%C0,hlo8(%2)), operands); + if ((mask & 0xff000000L) != 0xff000000L) + output_asm_insn (AS2 (andi,%D0,hhi8(%2)), operands); + return ""; + } + return (AS2 (andi, %A0,lo8(%2)) CR_TAB + AS2 (andi, %B0,hi8(%2)) CR_TAB + AS2 (andi, %C0,hlo8(%2)) CR_TAB + AS2 (andi, %D0,hhi8(%2))); + } + return "bug"; +} + [(set_attr "length" "4,4") + (set_attr "cc" "set_n,clobber")]) + +(define_peephole2 ; andi + [(set (match_operand:QI 0 "d_register_operand" "") + (and:QI (match_dup 0) + (match_operand:QI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:QI (match_dup 0) + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + { + operands[1] = GEN_INT (INTVAL (operands[1]) & INTVAL (operands[2])); + }) + +;;||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +;; ior + +(define_insn "iorqi3" + [(set (match_operand:QI 0 "register_operand" "=r,d") + (ior:QI (match_operand:QI 1 "register_operand" "%0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i")))] + "" + "@ + or %0,%2 + ori %0,lo8(%2)" + [(set_attr "length" "1,1") + (set_attr "cc" "set_zn,set_zn")]) + +(define_insn "iorhi3" + [(set (match_operand:HI 0 "register_operand" "=r,d") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0") + (match_operand:HI 2 "nonmemory_operand" "r,i")))] + "" +{ + if (which_alternative==0) + return ("or %A0,%A2" CR_TAB + "or %B0,%B2"); + if (GET_CODE (operands[2]) == CONST_INT) + { + int mask = INTVAL (operands[2]); + if (mask & 0xff) + output_asm_insn (AS2 (ori,%A0,lo8(%2)), operands); + if (mask & 0xff00) + output_asm_insn (AS2 (ori,%B0,hi8(%2)), operands); + return ""; + } + return (AS2 (ori,%0,lo8(%2)) CR_TAB + AS2 (ori,%B0,hi8(%2))); +} + [(set_attr "length" "2,2") + (set_attr "cc" "set_n,clobber")]) + +(define_insn "*iorhi3_clobber" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0") + (match_operand:HI 2 "immediate_operand" "M,i"))) + (clobber (match_scratch:QI 3 "=&d,&d"))] + "" + "@ + ldi %3,lo8(%2)\;or %A0,%3 + ldi %3,lo8(%2)\;or %A0,%3\;ldi %3,hi8(%2)\;or %B0,%3" + [(set_attr "length" "2,4") + (set_attr "cc" "clobber,set_n")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=r,d") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "nonmemory_operand" "r,i")))] + "" +{ + if (which_alternative==0) + return ("or %0,%2" CR_TAB + "or %B0,%B2" CR_TAB + "or %C0,%C2" CR_TAB + "or %D0,%D2"); + if (GET_CODE (operands[2]) == CONST_INT) + { + HOST_WIDE_INT mask = INTVAL (operands[2]); + if (mask & 0xff) + output_asm_insn (AS2 (ori,%A0,lo8(%2)), operands); + if (mask & 0xff00) + output_asm_insn (AS2 (ori,%B0,hi8(%2)), operands); + if (mask & 0xff0000L) + output_asm_insn (AS2 (ori,%C0,hlo8(%2)), operands); + if (mask & 0xff000000L) + output_asm_insn (AS2 (ori,%D0,hhi8(%2)), operands); + return ""; + } + return (AS2 (ori, %A0,lo8(%2)) CR_TAB + AS2 (ori, %B0,hi8(%2)) CR_TAB + AS2 (ori, %C0,hlo8(%2)) CR_TAB + AS2 (ori, %D0,hhi8(%2))); +} + [(set_attr "length" "4,4") + (set_attr "cc" "set_n,clobber")]) + +(define_insn "*iorsi3_clobber" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "immediate_operand" "M,i"))) + (clobber (match_scratch:QI 3 "=&d,&d"))] + "" + "@ + ldi %3,lo8(%2)\;or %A0,%3 + ldi %3,lo8(%2)\;or %A0,%3\;ldi %3,hi8(%2)\;or %B0,%3\;ldi %3,hlo8(%2)\;or %C0,%3\;ldi %3,hhi8(%2)\;or %D0,%3" + [(set_attr "length" "2,8") + (set_attr "cc" "clobber,set_n")]) + +;;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +;; xor + +(define_insn "xorqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (xor:QI (match_operand:QI 1 "register_operand" "%0") + (match_operand:QI 2 "register_operand" "r")))] + "" + "eor %0,%2" + [(set_attr "length" "1") + (set_attr "cc" "set_zn")]) + +(define_insn "xorhi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (xor:HI (match_operand:HI 1 "register_operand" "%0") + (match_operand:HI 2 "register_operand" "r")))] + "" + "eor %0,%2 + eor %B0,%B2" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "eor %0,%2 + eor %B0,%B2 + eor %C0,%C2 + eor %D0,%D2" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +;; swap swap swap swap swap swap swap swap swap swap swap swap swap swap swap +;; swap + +(define_expand "rotlqi3" + [(set (match_operand:QI 0 "register_operand" "") + (rotate:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + " +{ + if (!CONST_INT_P (operands[2]) || (INTVAL (operands[2]) != 4)) + FAIL; +}") + +(define_insn "*rotlqi3_4" + [(set (match_operand:QI 0 "register_operand" "=r") + (rotate:QI (match_operand:QI 1 "register_operand" "0") + (const_int 4)))] + "" + "swap %0" + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; Split all rotates of HI,SI and DImode registers where rotation is by +;; a whole number of bytes. The split creates the appropriate moves and +;; considers all overlap situations. DImode is split before reload. + +;; HImode does not need scratch. Use attribute for this constraint. +;; Use QI scratch for DI mode as this is often split into byte sized operands. + +(define_mode_attr rotx [(DI "&r,&r,X") (SI "&r,&r,X") (HI "X,X,X")]) +(define_mode_attr rotsmode [(DI "QI") (SI "HI") (HI "QI")]) + +(define_expand "rotl3" + [(parallel [(set (match_operand:HIDI 0 "register_operand" "") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "") + (match_operand:VOID 2 "const_int_operand" ""))) + (clobber (match_operand 3 ""))])] + "" + { + if (CONST_INT_P (operands[2]) + && 0 == INTVAL (operands[2]) % 8) + { + if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16) + operands[3] = gen_rtx_SCRATCH (mode); + else + operands[3] = gen_rtx_SCRATCH (QImode); + } + else + FAIL; + }) + + +;; Overlapping non-HImode registers often (but not always) need a scratch. +;; The best we can do is use early clobber alternative "#&r" so that +;; completely non-overlapping operands dont get a scratch but # so register +;; allocation does not prefer non-overlapping. + + +; Split word aligned rotates using scratch that is mode dependent. +(define_insn_and_split "*rotw" + [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") + (match_operand 2 "const_int_operand" "n,n,n"))) + (clobber (match_scratch: 3 "="))] + "AVR_HAVE_MOVW + && CONST_INT_P (operands[2]) + && 0 == INTVAL (operands[2]) % 16" + "#" + "&& (reload_completed || mode == DImode)" + [(const_int 0)] + { + avr_rotate_bytes (operands); + DONE; + }) + + +; Split byte aligned rotates using scratch that is always QI mode. +(define_insn_and_split "*rotb" + [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") + (match_operand 2 "const_int_operand" "n,n,n"))) + (clobber (match_scratch:QI 3 "="))] + "CONST_INT_P (operands[2]) + && (8 == INTVAL (operands[2]) % 16 + || (!AVR_HAVE_MOVW + && 0 == INTVAL (operands[2]) % 16))" + "#" + "&& (reload_completed || mode == DImode)" + [(const_int 0)] + { + avr_rotate_bytes (operands); + DONE; + }) + + +;;<< << << << << << << << << << << << << << << << << << << << << << << << << << +;; arithmetic shift left + +(define_expand "ashlqi3" + [(set (match_operand:QI 0 "register_operand" "") + (ashift:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "" + "") + +(define_split ; ashlqi3_const4 + [(set (match_operand:QI 0 "d_register_operand" "") + (ashift:QI (match_dup 0) + (const_int 4)))] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (and:QI (match_dup 0) (const_int -16)))] + "") + +(define_split ; ashlqi3_const5 + [(set (match_operand:QI 0 "d_register_operand" "") + (ashift:QI (match_dup 0) + (const_int 5)))] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 1))) + (set (match_dup 0) (and:QI (match_dup 0) (const_int -32)))] + "") + +(define_split ; ashlqi3_const6 + [(set (match_operand:QI 0 "d_register_operand" "") + (ashift:QI (match_dup 0) + (const_int 6)))] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 2))) + (set (match_dup 0) (and:QI (match_dup 0) (const_int -64)))] + "") + +(define_insn "*ashlqi3" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,!d,r,r") + (ashift:QI (match_operand:QI 1 "register_operand" "0,0,0,0,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,K,n,n,Qm")))] + "" + "* return ashlqi3_out (insn, operands, NULL);" + [(set_attr "length" "5,0,1,2,4,6,9") + (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")]) + +(define_insn "ashlhi3" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r,r") + (ashift:HI (match_operand:HI 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,O,K,n,Qm")))] + "" + "* return ashlhi3_out (insn, operands, NULL);" + [(set_attr "length" "6,0,2,2,4,10,10") + (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")]) + +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r") + (ashift:SI (match_operand:SI 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,O,K,n,Qm")))] + "" + "* return ashlsi3_out (insn, operands, NULL);" + [(set_attr "length" "8,0,4,4,8,10,12") + (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")]) + +;; Optimize if a scratch register from LD_REGS happens to be available. + +(define_peephole2 ; ashlqi3_l_const4 + [(set (match_operand:QI 0 "l_register_operand" "") + (ashift:QI (match_dup 0) + (const_int 4))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 1) (const_int -16)) + (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + "") + +(define_peephole2 ; ashlqi3_l_const5 + [(set (match_operand:QI 0 "l_register_operand" "") + (ashift:QI (match_dup 0) + (const_int 5))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 1))) + (set (match_dup 1) (const_int -32)) + (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + "") + +(define_peephole2 ; ashlqi3_l_const6 + [(set (match_operand:QI 0 "l_register_operand" "") + (ashift:QI (match_dup 0) + (const_int 6))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 2))) + (set (match_dup 1) (const_int -64)) + (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + "") + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:HI 0 "register_operand" "") + (ashift:HI (match_operand:HI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) (ashift:HI (match_dup 1) (match_dup 2))) + (clobber (match_dup 3))])] + "") + +(define_insn "*ashlhi3_const" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (ashift:HI (match_operand:HI 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "reload_completed" + "* return ashlhi3_out (insn, operands, NULL);" + [(set_attr "length" "0,2,2,4,10") + (set_attr "cc" "none,set_n,clobber,set_n,clobber")]) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) + (clobber (match_dup 3))])] + "") + +(define_insn "*ashlsi3_const" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (ashift:SI (match_operand:SI 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "reload_completed" + "* return ashlsi3_out (insn, operands, NULL);" + [(set_attr "length" "0,4,4,10") + (set_attr "cc" "none,set_n,clobber,clobber")]) + +;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> +;; arithmetic shift right + +(define_insn "ashrqi3" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r") + (ashiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,K,n,Qm")))] + "" + "* return ashrqi3_out (insn, operands, NULL);" + [(set_attr "length" "5,0,1,2,5,9") + (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber")]) + +(define_insn "ashrhi3" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r,r") + (ashiftrt:HI (match_operand:HI 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,O,K,n,Qm")))] + "" + "* return ashrhi3_out (insn, operands, NULL);" + [(set_attr "length" "6,0,2,4,4,10,10") + (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")]) + +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,O,K,n,Qm")))] + "" + "* return ashrsi3_out (insn, operands, NULL);" + [(set_attr "length" "8,0,4,6,8,10,12") + (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")]) + +;; Optimize if a scratch register from LD_REGS happens to be available. + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:HI 0 "register_operand" "") + (ashiftrt:HI (match_operand:HI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) (ashiftrt:HI (match_dup 1) (match_dup 2))) + (clobber (match_dup 3))])] + "") + +(define_insn "*ashrhi3_const" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (ashiftrt:HI (match_operand:HI 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "reload_completed" + "* return ashrhi3_out (insn, operands, NULL);" + [(set_attr "length" "0,2,4,4,10") + (set_attr "cc" "none,clobber,set_n,clobber,clobber")]) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (match_dup 2))) + (clobber (match_dup 3))])] + "") + +(define_insn "*ashrsi3_const" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "reload_completed" + "* return ashrsi3_out (insn, operands, NULL);" + [(set_attr "length" "0,4,4,10") + (set_attr "cc" "none,clobber,set_n,clobber")]) + +;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> +;; logical shift right + +(define_expand "lshrqi3" + [(set (match_operand:QI 0 "register_operand" "") + (lshiftrt:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "" + "") + +(define_split ; lshrqi3_const4 + [(set (match_operand:QI 0 "d_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 4)))] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (and:QI (match_dup 0) (const_int 15)))] + "") + +(define_split ; lshrqi3_const5 + [(set (match_operand:QI 0 "d_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 5)))] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 1))) + (set (match_dup 0) (and:QI (match_dup 0) (const_int 7)))] + "") + +(define_split ; lshrqi3_const6 + [(set (match_operand:QI 0 "d_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 6)))] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 2))) + (set (match_dup 0) (and:QI (match_dup 0) (const_int 3)))] + "") + +(define_insn "*lshrqi3" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,!d,r,r") + (lshiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0,0,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,K,n,n,Qm")))] + "" + "* return lshrqi3_out (insn, operands, NULL);" + [(set_attr "length" "5,0,1,2,4,6,9") + (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")]) + +(define_insn "lshrhi3" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r,r") + (lshiftrt:HI (match_operand:HI 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,O,K,n,Qm")))] + "" + "* return lshrhi3_out (insn, operands, NULL);" + [(set_attr "length" "6,0,2,2,4,10,10") + (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")]) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "general_operand" "r,L,P,O,K,n,Qm")))] + "" + "* return lshrsi3_out (insn, operands, NULL);" + [(set_attr "length" "8,0,4,4,8,10,12") + (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")]) + +;; Optimize if a scratch register from LD_REGS happens to be available. + +(define_peephole2 ; lshrqi3_l_const4 + [(set (match_operand:QI 0 "l_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 4))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 1) (const_int 15)) + (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + "") + +(define_peephole2 ; lshrqi3_l_const5 + [(set (match_operand:QI 0 "l_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 5))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 1))) + (set (match_dup 1) (const_int 7)) + (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + "") + +(define_peephole2 ; lshrqi3_l_const6 + [(set (match_operand:QI 0 "l_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 6))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4))) + (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 2))) + (set (match_dup 1) (const_int 3)) + (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + "") + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:HI 0 "register_operand" "") + (lshiftrt:HI (match_operand:HI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) (lshiftrt:HI (match_dup 1) (match_dup 2))) + (clobber (match_dup 3))])] + "") + +(define_insn "*lshrhi3_const" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (lshiftrt:HI (match_operand:HI 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "reload_completed" + "* return lshrhi3_out (insn, operands, NULL);" + [(set_attr "length" "0,2,2,4,10") + (set_attr "cc" "none,clobber,clobber,clobber,clobber")]) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 1) (match_dup 2))) + (clobber (match_dup 3))])] + "") + +(define_insn "*lshrsi3_const" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "reload_completed" + "* return lshrsi3_out (insn, operands, NULL);" + [(set_attr "length" "0,4,4,10") + (set_attr "cc" "none,clobber,clobber,clobber")]) + +;; abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) +;; abs + +(define_insn "absqi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (abs:QI (match_operand:QI 1 "register_operand" "0")))] + "" + "sbrc %0,7 + neg %0" + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=d,r") + (abs:SF (match_operand:SF 1 "register_operand" "0,0")))] + "" + "@ + andi %D0,0x7f + clt\;bld %D0,7" + [(set_attr "length" "1,2") + (set_attr "cc" "set_n,clobber")]) + +;; 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x +;; neg + +(define_insn "negqi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (neg:QI (match_operand:QI 1 "register_operand" "0")))] + "" + "neg %0" + [(set_attr "length" "1") + (set_attr "cc" "set_zn")]) + +(define_insn "neghi2" + [(set (match_operand:HI 0 "register_operand" "=!d,r,&r") + (neg:HI (match_operand:HI 1 "register_operand" "0,0,r")))] + "" + "@ + com %B0\;neg %A0\;sbci %B0,lo8(-1) + com %B0\;neg %A0\;sbc %B0,__zero_reg__\;inc %B0 + clr %A0\;clr %B0\;sub %A0,%A1\;sbc %B0,%B1" + [(set_attr "length" "3,4,4") + (set_attr "cc" "set_czn,set_n,set_czn")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=!d,r,&r") + (neg:SI (match_operand:SI 1 "register_operand" "0,0,r")))] + "" + "@ + com %D0\;com %C0\;com %B0\;neg %A0\;sbci %B0,lo8(-1)\;sbci %C0,lo8(-1)\;sbci %D0,lo8(-1) + com %D0\;com %C0\;com %B0\;com %A0\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__ + clr %A0\;clr %B0\;{clr %C0\;clr %D0|movw %C0,%A0}\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1\;sbc %D0,%D1" + [(set_attr_alternative "length" + [(const_int 7) + (const_int 8) + (if_then_else (eq_attr "mcu_have_movw" "yes") + (const_int 7) + (const_int 8))]) + (set_attr "cc" "set_czn,set_n,set_czn")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=d,r") + (neg:SF (match_operand:SF 1 "register_operand" "0,0")))] + "" + "@ + subi %D0,0x80 + bst %D0,7\;com %D0\;bld %D0,7\;com %D0" + [(set_attr "length" "1,4") + (set_attr "cc" "set_n,set_n")]) + +;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +;; not + +(define_insn "one_cmplqi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (not:QI (match_operand:QI 1 "register_operand" "0")))] + "" + "com %0" + [(set_attr "length" "1") + (set_attr "cc" "set_czn")]) + +(define_insn "one_cmplhi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (not:HI (match_operand:HI 1 "register_operand" "0")))] + "" + "com %0 + com %B0" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "0")))] + "" + "com %0 + com %B0 + com %C0 + com %D0" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x +;; sign extend + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))] + "" + "@ + clr %B0\;sbrc %0,7\;com %B0 + mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0" + [(set_attr "length" "3,4") + (set_attr "cc" "set_n,set_n")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))] + "" + "@ + clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0 + mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0" + [(set_attr "length" "5,6") + (set_attr "cc" "set_n,set_n")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r,&r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))] + "" + "@ + clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0 + {mov %A0,%A1\;mov %B0,%B1|movw %A0,%A1}\;clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0" + [(set_attr_alternative "length" + [(const_int 4) + (if_then_else (eq_attr "mcu_have_movw" "yes") + (const_int 5) + (const_int 6))]) + (set_attr "cc" "set_n,set_n")]) + +;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x +;; zero extend + +(define_insn_and_split "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] +{ + unsigned int low_off = subreg_lowpart_offset (QImode, HImode); + unsigned int high_off = subreg_highpart_offset (QImode, HImode); + + operands[2] = simplify_gen_subreg (QImode, operands[0], HImode, low_off); + operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, high_off); +}) + +(define_insn_and_split "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (zero_extend:HI (match_dup 1))) + (set (match_dup 3) (const_int 0))] +{ + unsigned int low_off = subreg_lowpart_offset (HImode, SImode); + unsigned int high_off = subreg_highpart_offset (HImode, SImode); + + operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off); + operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off); +}) + +(define_insn_and_split "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] +{ + unsigned int low_off = subreg_lowpart_offset (HImode, SImode); + unsigned int high_off = subreg_highpart_offset (HImode, SImode); + + operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off); + operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off); +}) + +(define_insn_and_split "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (zero_extend:SI (match_dup 1))) + (set (match_dup 3) (const_int 0))] +{ + unsigned int low_off = subreg_lowpart_offset (SImode, DImode); + unsigned int high_off = subreg_highpart_offset (SImode, DImode); + + operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off); + operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off); +}) + +(define_insn_and_split "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (zero_extend:SI (match_dup 1))) + (set (match_dup 3) (const_int 0))] +{ + unsigned int low_off = subreg_lowpart_offset (SImode, DImode); + unsigned int high_off = subreg_highpart_offset (SImode, DImode); + + operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off); + operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off); +}) + +(define_insn_and_split "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] +{ + unsigned int low_off = subreg_lowpart_offset (SImode, DImode); + unsigned int high_off = subreg_highpart_offset (SImode, DImode); + + operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off); + operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off); +}) + +;;<=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=> +;; compare + +; Optimize negated tests into reverse compare if overflow is undefined. +(define_insn "*negated_tstqi" + [(set (cc0) + (compare (neg:QI (match_operand:QI 0 "register_operand" "r")) + (const_int 0)))] + "(!flag_wrapv && !flag_trapv && flag_strict_overflow)" + "cp __zero_reg__,%0" + [(set_attr "cc" "compare") + (set_attr "length" "1")]) + +(define_insn "*reversed_tstqi" + [(set (cc0) + (compare (const_int 0) + (match_operand:QI 0 "register_operand" "r")))] + "" + "cp __zero_reg__,%0" +[(set_attr "cc" "compare") + (set_attr "length" "2")]) + +(define_insn "*negated_tsthi" + [(set (cc0) + (compare (neg:HI (match_operand:HI 0 "register_operand" "r")) + (const_int 0)))] + "(!flag_wrapv && !flag_trapv && flag_strict_overflow)" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0" +[(set_attr "cc" "compare") + (set_attr "length" "2")]) + +;; Leave here the clobber used by the cmphi pattern for simplicity, even +;; though it is unused, because this pattern is synthesized by avr_reorg. +(define_insn "*reversed_tsthi" + [(set (cc0) + (compare (const_int 0) + (match_operand:HI 0 "register_operand" "r"))) + (clobber (match_scratch:QI 1 "=X"))] + "" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0" +[(set_attr "cc" "compare") + (set_attr "length" "2")]) + +(define_insn "*negated_tstsi" + [(set (cc0) + (compare (neg:SI (match_operand:SI 0 "register_operand" "r")) + (const_int 0)))] + "(!flag_wrapv && !flag_trapv && flag_strict_overflow)" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0 + cpc __zero_reg__,%C0 + cpc __zero_reg__,%D0" + [(set_attr "cc" "compare") + (set_attr "length" "4")]) + +(define_insn "*reversed_tstsi" + [(set (cc0) + (compare (const_int 0) + (match_operand:SI 0 "register_operand" "r"))) + (clobber (match_scratch:QI 1 "=X"))] + "" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0 + cpc __zero_reg__,%C0 + cpc __zero_reg__,%D0" + [(set_attr "cc" "compare") + (set_attr "length" "4")]) + + +(define_insn "*cmpqi" + [(set (cc0) + (compare (match_operand:QI 0 "register_operand" "r,r,d") + (match_operand:QI 1 "nonmemory_operand" "L,r,i")))] + "" + "@ + tst %0 + cp %0,%1 + cpi %0,lo8(%1)" + [(set_attr "cc" "compare,compare,compare") + (set_attr "length" "1,1,1")]) + +(define_insn "*cmpqi_sign_extend" + [(set (cc0) + (compare (sign_extend:HI + (match_operand:QI 0 "register_operand" "d")) + (match_operand:HI 1 "const_int_operand" "n")))] + "INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) <= 127" + "cpi %0,lo8(%1)" + [(set_attr "cc" "compare") + (set_attr "length" "1")]) + +(define_insn "*cmphi" + [(set (cc0) + (compare (match_operand:HI 0 "register_operand" "!w,r,r,d,d,r,r") + (match_operand:HI 1 "nonmemory_operand" "L,L,r,M,i,M,i"))) + (clobber (match_scratch:QI 2 "=X,X,X,X,&d,&d,&d"))] + "" + "*{ + switch (which_alternative) + { + case 0: case 1: + return out_tsthi (insn, operands[0], NULL); + + case 2: + return (AS2 (cp,%A0,%A1) CR_TAB + AS2 (cpc,%B0,%B1)); + case 3: + if (reg_unused_after (insn, operands[0]) + && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 63 + && test_hard_reg_class (ADDW_REGS, operands[0])) + return AS2 (sbiw,%0,%1); + else + return (AS2 (cpi,%0,%1) CR_TAB + AS2 (cpc,%B0,__zero_reg__)); + case 4: + if (reg_unused_after (insn, operands[0])) + return (AS2 (subi,%0,lo8(%1)) CR_TAB + AS2 (sbci,%B0,hi8(%1))); + else + return (AS2 (ldi, %2,hi8(%1)) CR_TAB + AS2 (cpi, %A0,lo8(%1)) CR_TAB + AS2 (cpc, %B0,%2)); + case 5: + return (AS2 (ldi, %2,lo8(%1)) CR_TAB + AS2 (cp, %A0,%2) CR_TAB + AS2 (cpc, %B0,__zero_reg__)); + + case 6: + return (AS2 (ldi, %2,lo8(%1)) CR_TAB + AS2 (cp, %A0,%2) CR_TAB + AS2 (ldi, %2,hi8(%1)) CR_TAB + AS2 (cpc, %B0,%2)); + } + return \"bug\"; +}" + [(set_attr "cc" "compare,compare,compare,compare,compare,compare,compare") + (set_attr "length" "1,2,2,2,3,3,4")]) + + +(define_insn "*cmpsi" + [(set (cc0) + (compare (match_operand:SI 0 "register_operand" "r,r,d,d,r,r") + (match_operand:SI 1 "nonmemory_operand" "L,r,M,i,M,i"))) + (clobber (match_scratch:QI 2 "=X,X,X,&d,&d,&d"))] + "" + "*{ + switch (which_alternative) + { + case 0: + return out_tstsi (insn, operands[0], NULL); + + case 1: + return (AS2 (cp,%A0,%A1) CR_TAB + AS2 (cpc,%B0,%B1) CR_TAB + AS2 (cpc,%C0,%C1) CR_TAB + AS2 (cpc,%D0,%D1)); + case 2: + if (reg_unused_after (insn, operands[0]) + && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 63 + && test_hard_reg_class (ADDW_REGS, operands[0])) + return (AS2 (sbiw,%0,%1) CR_TAB + AS2 (cpc,%C0,__zero_reg__) CR_TAB + AS2 (cpc,%D0,__zero_reg__)); + else + return (AS2 (cpi,%A0,lo8(%1)) CR_TAB + AS2 (cpc,%B0,__zero_reg__) CR_TAB + AS2 (cpc,%C0,__zero_reg__) CR_TAB + AS2 (cpc,%D0,__zero_reg__)); + case 3: + if (reg_unused_after (insn, operands[0])) + return (AS2 (subi,%A0,lo8(%1)) CR_TAB + AS2 (sbci,%B0,hi8(%1)) CR_TAB + AS2 (sbci,%C0,hlo8(%1)) CR_TAB + AS2 (sbci,%D0,hhi8(%1))); + else + return (AS2 (cpi, %A0,lo8(%1)) CR_TAB + AS2 (ldi, %2,hi8(%1)) CR_TAB + AS2 (cpc, %B0,%2) CR_TAB + AS2 (ldi, %2,hlo8(%1)) CR_TAB + AS2 (cpc, %C0,%2) CR_TAB + AS2 (ldi, %2,hhi8(%1)) CR_TAB + AS2 (cpc, %D0,%2)); + case 4: + return (AS2 (ldi,%2,lo8(%1)) CR_TAB + AS2 (cp,%A0,%2) CR_TAB + AS2 (cpc,%B0,__zero_reg__) CR_TAB + AS2 (cpc,%C0,__zero_reg__) CR_TAB + AS2 (cpc,%D0,__zero_reg__)); + case 5: + return (AS2 (ldi, %2,lo8(%1)) CR_TAB + AS2 (cp, %A0,%2) CR_TAB + AS2 (ldi, %2,hi8(%1)) CR_TAB + AS2 (cpc, %B0,%2) CR_TAB + AS2 (ldi, %2,hlo8(%1)) CR_TAB + AS2 (cpc, %C0,%2) CR_TAB + AS2 (ldi, %2,hhi8(%1)) CR_TAB + AS2 (cpc, %D0,%2)); + } + return \"bug\"; +}" + [(set_attr "cc" "compare,compare,compare,compare,compare,compare") + (set_attr "length" "4,4,4,7,5,8")]) + + +;; ---------------------------------------------------------------------- +;; JUMP INSTRUCTIONS +;; ---------------------------------------------------------------------- +;; Conditional jump instructions + +(define_expand "cbranchsi4" + [(parallel [(set (cc0) + (compare (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:QI 4 ""))]) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "") + +(define_expand "cbranchhi4" + [(parallel [(set (cc0) + (compare (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:QI 4 ""))]) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "") + +(define_expand "cbranchqi4" + [(set (cc0) + (compare (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "") + + +;; Test a single bit in a QI/HI/SImode register. +;; Combine will create zero extract patterns for single bit tests. +;; permit any mode in source pattern by using VOIDmode. + +(define_insn "*sbrx_branch" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(zero_extract:QIDI + (match_operand:VOID 1 "register_operand" "r") + (const_int 1) + (match_operand 2 "const_int_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "* return avr_out_sbxx_branch (insn, operands);" + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2046))) + (const_int 2) + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Same test based on Bitwise AND RTL. Keep this incase gcc changes patterns. +;; or for old peepholes. +;; Fixme - bitwise Mask will not work for DImode + +(define_insn "*sbrx_and_branch" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(and:QISI + (match_operand:QISI 1 "register_operand" "r") + (match_operand:QISI 2 "single_one_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + HOST_WIDE_INT bitnumber; + bitnumber = exact_log2 (GET_MODE_MASK (mode) & INTVAL (operands[2])); + operands[2] = GEN_INT (bitnumber); + return avr_out_sbxx_branch (insn, operands); +} + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2046))) + (const_int 2) + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Convert sign tests to bit 7/15/31 tests that match the above insns. +(define_peephole2 + [(set (cc0) (compare (match_operand:QI 0 "register_operand" "") + (const_int 0))) + (set (pc) (if_then_else (ge (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (eq (zero_extract:HI (match_dup 0) + (const_int 1) + (const_int 7)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") + +(define_peephole2 + [(set (cc0) (compare (match_operand:QI 0 "register_operand" "") + (const_int 0))) + (set (pc) (if_then_else (lt (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (ne (zero_extract:HI (match_dup 0) + (const_int 1) + (const_int 7)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:HI 2 ""))]) + (set (pc) (if_then_else (ge (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (eq (and:HI (match_dup 0) (const_int -32768)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:HI 2 ""))]) + (set (pc) (if_then_else (lt (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (ne (and:HI (match_dup 0) (const_int -32768)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:SI 2 ""))]) + (set (pc) (if_then_else (ge (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (eq (and:SI (match_dup 0) (match_dup 2)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "operands[2] = GEN_INT (-2147483647 - 1);") + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:SI 2 ""))]) + (set (pc) (if_then_else (lt (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (ne (and:SI (match_dup 0) (match_dup 2)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "operands[2] = GEN_INT (-2147483647 - 1);") + +;; ************************************************************************ +;; Implementation of conditional jumps here. +;; Compare with 0 (test) jumps +;; ************************************************************************ + +(define_insn "branch" + [(set (pc) + (if_then_else (match_operator 1 "simple_comparison_operator" + [(cc0) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* + return ret_cond_branch (operands[1], avr_jump_mode (operands[0],insn), 0);" + [(set_attr "type" "branch") + (set_attr "cc" "clobber")]) + +;; **************************************************************** +;; AVR does not have following conditional jumps: LE,LEU,GT,GTU. +;; Convert them all to proper jumps. +;; ****************************************************************/ + +(define_insn "difficult_branch" + [(set (pc) + (if_then_else (match_operator 1 "difficult_comparison_operator" + [(cc0) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* + return ret_cond_branch (operands[1], avr_jump_mode (operands[0],insn), 0);" + [(set_attr "type" "branch1") + (set_attr "cc" "clobber")]) + +;; revers branch + +(define_insn "rvbranch" + [(set (pc) + (if_then_else (match_operator 1 "simple_comparison_operator" + [(cc0) + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "* + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1);" + [(set_attr "type" "branch1") + (set_attr "cc" "clobber")]) + +(define_insn "difficult_rvbranch" + [(set (pc) + (if_then_else (match_operator 1 "difficult_comparison_operator" + [(cc0) + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "* + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1);" + [(set_attr "type" "branch") + (set_attr "cc" "clobber")]) + +;; ************************************************************************** +;; Unconditional and other jump instructions. + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "*{ + if (AVR_HAVE_JMP_CALL && get_attr_length (insn) != 1) + return AS1 (jmp,%x0); + return AS1 (rjmp,%x0); +}" + [(set (attr "length") + (if_then_else (match_operand 0 "symbol_ref_operand" "") + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 1) + (const_int 2)) + (if_then_else (and (ge (minus (pc) (match_dup 0)) (const_int -2047)) + (le (minus (pc) (match_dup 0)) (const_int 2047))) + (const_int 1) + (const_int 2)))) + (set_attr "cc" "none")]) + +;; call + +(define_expand "call" + [(call (match_operand:HI 0 "call_insn_operand" "") + (match_operand:HI 1 "general_operand" ""))] + ;; Operand 1 not used on the AVR. + "" + "") + +;; call value + +(define_expand "call_value" + [(set (match_operand 0 "register_operand" "") + (call (match_operand:HI 1 "call_insn_operand" "") + (match_operand:HI 2 "general_operand" "")))] + ;; Operand 2 not used on the AVR. + "" + "") + +(define_insn "call_insn" + [(call (mem:HI (match_operand:HI 0 "nonmemory_operand" "!z,*r,s,n")) + (match_operand:HI 1 "general_operand" "X,X,X,X"))] +;; We don't need in saving Z register because r30,r31 is a call used registers + ;; Operand 1 not used on the AVR. + "(register_operand (operands[0], HImode) || CONSTANT_P (operands[0]))" + "*{ + if (which_alternative==0) + return \"%!icall\"; + else if (which_alternative==1) + { + if (AVR_HAVE_MOVW) + return (AS2 (movw, r30, %0) CR_TAB + \"%!icall\"); + else + return (AS2 (mov, r30, %A0) CR_TAB + AS2 (mov, r31, %B0) CR_TAB + \"%!icall\"); + } + else if (which_alternative==2) + return AS1(%~call,%x0); + return (AS2 (ldi,r30,lo8(%0)) CR_TAB + AS2 (ldi,r31,hi8(%0)) CR_TAB + \"%!icall\"); +}" + [(set_attr "cc" "clobber,clobber,clobber,clobber") + (set_attr_alternative "length" + [(const_int 1) + (if_then_else (eq_attr "mcu_have_movw" "yes") + (const_int 2) + (const_int 3)) + (if_then_else (eq_attr "mcu_mega" "yes") + (const_int 2) + (const_int 1)) + (const_int 3)])]) + +(define_insn "call_value_insn" + [(set (match_operand 0 "register_operand" "=r,r,r,r") + (call (mem:HI (match_operand:HI 1 "nonmemory_operand" "!z,*r,s,n")) +;; We don't need in saving Z register because r30,r31 is a call used registers + (match_operand:HI 2 "general_operand" "X,X,X,X")))] + ;; Operand 2 not used on the AVR. + "(register_operand (operands[0], VOIDmode) || CONSTANT_P (operands[0]))" + "*{ + if (which_alternative==0) + return \"%!icall\"; + else if (which_alternative==1) + { + if (AVR_HAVE_MOVW) + return (AS2 (movw, r30, %1) CR_TAB + \"%!icall\"); + else + return (AS2 (mov, r30, %A1) CR_TAB + AS2 (mov, r31, %B1) CR_TAB + \"%!icall\"); + } + else if (which_alternative==2) + return AS1(%~call,%x1); + return (AS2 (ldi, r30, lo8(%1)) CR_TAB + AS2 (ldi, r31, hi8(%1)) CR_TAB + \"%!icall\"); +}" + [(set_attr "cc" "clobber,clobber,clobber,clobber") + (set_attr_alternative "length" + [(const_int 1) + (if_then_else (eq_attr "mcu_have_movw" "yes") + (const_int 2) + (const_int 3)) + (if_then_else (eq_attr "mcu_mega" "yes") + (const_int 2) + (const_int 1)) + (const_int 3)])]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +; indirect jump + +(define_expand "indirect_jump" + [(set (pc) (match_operand:HI 0 "nonmemory_operand" ""))] + "" + " if ((!AVR_HAVE_JMP_CALL) && !register_operand(operand0, HImode)) + { + operands[0] = copy_to_mode_reg(HImode, operand0); + }" +) + +; indirect jump +(define_insn "*jcindirect_jump" + [(set (pc) (match_operand:HI 0 "immediate_operand" "i"))] + "" + "@ + %~jmp %x0" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; +(define_insn "*njcindirect_jump" + [(set (pc) (match_operand:HI 0 "register_operand" "!z,*r"))] + "!AVR_HAVE_EIJMP_EICALL" + "@ + ijmp + push %A0\;push %B0\;ret" + [(set_attr "length" "1,3") + (set_attr "cc" "none,none")]) + +(define_insn "*indirect_jump_avr6" + [(set (pc) (match_operand:HI 0 "register_operand" "z"))] + "AVR_HAVE_EIJMP_EICALL" + "eijmp" + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; table jump + +;; Table made from "rjmp" instructions for <=8K devices. +(define_insn "*tablejump_rjmp" + [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r")] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_dup 0))] + "(!AVR_HAVE_JMP_CALL) && (!AVR_HAVE_EIJMP_EICALL)" + "@ + ijmp + push %A0\;push %B0\;ret" + [(set_attr "length" "1,3") + (set_attr "cc" "none,none")]) + +;; Not a prologue, but similar idea - move the common piece of code to libgcc. +(define_insn "*tablejump_lib" + [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "z")] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_dup 0))] + "AVR_HAVE_JMP_CALL && TARGET_CALL_PROLOGUES" + "%~jmp __tablejump2__" + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + +(define_insn "*tablejump_enh" + [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "z")] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_dup 0))] + "AVR_HAVE_JMP_CALL && AVR_HAVE_LPMX" + "lsl r30 + rol r31 + lpm __tmp_reg__,Z+ + lpm r31,Z + mov r30,__tmp_reg__ + %!ijmp" + [(set_attr "length" "6") + (set_attr "cc" "clobber")]) + +(define_insn "*tablejump" + [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "z")] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_dup 0))] + "AVR_HAVE_JMP_CALL && !AVR_HAVE_EIJMP_EICALL" + "lsl r30 + rol r31 + lpm + inc r30 + push r0 + lpm + push r0 + ret" + [(set_attr "length" "8") + (set_attr "cc" "clobber")]) + +(define_expand "casesi" + [(set (match_dup 6) + (minus:HI (subreg:HI (match_operand:SI 0 "register_operand" "") 0) + (match_operand:HI 1 "register_operand" ""))) + (parallel [(set (cc0) + (compare (match_dup 6) + (match_operand:HI 2 "register_operand" ""))) + (clobber (match_scratch:QI 9 ""))]) + + (set (pc) + (if_then_else (gtu (cc0) + (const_int 0)) + (label_ref (match_operand 4 "" "")) + (pc))) + + (set (match_dup 6) + (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" "")))) + + (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP)) + (use (label_ref (match_dup 3))) + (clobber (match_dup 6))])] + "" + " +{ + operands[6] = gen_reg_rtx (HImode); +}") + + +;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +;; This instruction sets Z flag + +(define_insn "sez" + [(set (cc0) (const_int 0))] + "" + "sez" + [(set_attr "length" "1") + (set_attr "cc" "compare")]) + +;; Clear/set/test a single bit in I/O address space. + +(define_insn "*cbi" + [(set (mem:QI (match_operand 0 "low_io_address_operand" "n")) + (and:QI (mem:QI (match_dup 0)) + (match_operand:QI 1 "single_zero_operand" "n")))] + "(optimize > 0)" +{ + operands[2] = GEN_INT (exact_log2 (~INTVAL (operands[1]) & 0xff)); + return AS2 (cbi,%m0-0x20,%2); +} + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +(define_insn "*sbi" + [(set (mem:QI (match_operand 0 "low_io_address_operand" "n")) + (ior:QI (mem:QI (match_dup 0)) + (match_operand:QI 1 "single_one_operand" "n")))] + "(optimize > 0)" +{ + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1]) & 0xff)); + return AS2 (sbi,%m0-0x20,%2); +} + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; Lower half of the I/O space - use sbic/sbis directly. +(define_insn "*sbix_branch" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(zero_extract:HI + (mem:QI (match_operand 1 "low_io_address_operand" "n")) + (const_int 1) + (match_operand 2 "const_int_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "(optimize > 0)" + "* return avr_out_sbxx_branch (insn, operands);" + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2046))) + (const_int 2) + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Tests of bit 7 are pessimized to sign tests, so we need this too... +(define_insn "*sbix_branch_bit7" + [(set (pc) + (if_then_else + (match_operator 0 "gelt_operator" + [(mem:QI (match_operand 1 "low_io_address_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "(optimize > 0)" +{ + operands[3] = operands[2]; + operands[2] = GEN_INT (7); + return avr_out_sbxx_branch (insn, operands); +} + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046)) + (le (minus (pc) (match_dup 2)) (const_int 2046))) + (const_int 2) + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Upper half of the I/O space - read port to __tmp_reg__ and use sbrc/sbrs. +(define_insn "*sbix_branch_tmp" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(zero_extract:HI + (mem:QI (match_operand 1 "high_io_address_operand" "n")) + (const_int 1) + (match_operand 2 "const_int_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "(optimize > 0)" + "* return avr_out_sbxx_branch (insn, operands);" + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2045))) + (const_int 3) + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 3) + (const_int 5)))) + (set_attr "cc" "clobber")]) + +(define_insn "*sbix_branch_tmp_bit7" + [(set (pc) + (if_then_else + (match_operator 0 "gelt_operator" + [(mem:QI (match_operand 1 "high_io_address_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "(optimize > 0)" +{ + operands[3] = operands[2]; + operands[2] = GEN_INT (7); + return avr_out_sbxx_branch (insn, operands); +} + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046)) + (le (minus (pc) (match_dup 2)) (const_int 2045))) + (const_int 3) + (if_then_else (eq_attr "mcu_mega" "no") + (const_int 3) + (const_int 5)))) + (set_attr "cc" "clobber")]) + +;; ************************* Peepholes ******************************** + +(define_peephole + [(set (match_operand:SI 0 "d_register_operand" "") + (plus:SI (match_dup 0) + (const_int -1))) + (parallel + [(set (cc0) + (compare (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) + (set (pc) + (if_then_else (ne (cc0) (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "* +{ + CC_STATUS_INIT; + if (test_hard_reg_class (ADDW_REGS, operands[0])) + output_asm_insn (AS2 (sbiw,%0,1) CR_TAB + AS2 (sbc,%C0,__zero_reg__) CR_TAB + AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands); + else + output_asm_insn (AS2 (subi,%A0,1) CR_TAB + AS2 (sbc,%B0,__zero_reg__) CR_TAB + AS2 (sbc,%C0,__zero_reg__) CR_TAB + AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands); + switch (avr_jump_mode (operands[2],insn)) + { + case 1: + return AS1 (brcc,%2); + case 2: + return (AS1 (brcs,.+2) CR_TAB + AS1 (rjmp,%2)); + } + return (AS1 (brcs,.+4) CR_TAB + AS1 (jmp,%2)); +}") + +(define_peephole + [(set (match_operand:HI 0 "d_register_operand" "") + (plus:HI (match_dup 0) + (const_int -1))) + (parallel + [(set (cc0) + (compare (match_dup 0) + (const_int 65535))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) + (set (pc) + (if_then_else (ne (cc0) (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "* +{ + CC_STATUS_INIT; + if (test_hard_reg_class (ADDW_REGS, operands[0])) + output_asm_insn (AS2 (sbiw,%0,1), operands); + else + output_asm_insn (AS2 (subi,%A0,1) CR_TAB + AS2 (sbc,%B0,__zero_reg__) \"\\n\", operands); + switch (avr_jump_mode (operands[2],insn)) + { + case 1: + return AS1 (brcc,%2); + case 2: + return (AS1 (brcs,.+2) CR_TAB + AS1 (rjmp,%2)); + } + return (AS1 (brcs,.+4) CR_TAB + AS1 (jmp,%2)); +}") + +(define_peephole + [(set (match_operand:QI 0 "d_register_operand" "") + (plus:QI (match_dup 0) + (const_int -1))) + (set (cc0) + (compare (match_dup 0) + (const_int -1))) + (set (pc) + (if_then_else (ne (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "* +{ + CC_STATUS_INIT; + cc_status.value1 = operands[0]; + cc_status.flags |= CC_OVERFLOW_UNUSABLE; + output_asm_insn (AS2 (subi,%A0,1), operands); + switch (avr_jump_mode (operands[1],insn)) + { + case 1: + return AS1 (brcc,%1); + case 2: + return (AS1 (brcs,.+2) CR_TAB + AS1 (rjmp,%1)); + } + return (AS1 (brcs,.+4) CR_TAB + AS1 (jmp,%1)); +}") + +(define_peephole + [(set (cc0) + (compare (match_operand:QI 0 "register_operand" "") + (const_int 0))) + (set (pc) + (if_then_else (eq (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "jump_over_one_insn_p (insn, operands[1])" + "cpse %0,__zero_reg__") + +(define_peephole + [(set (cc0) + (compare (match_operand:QI 0 "register_operand" "") + (match_operand:QI 1 "register_operand" ""))) + (set (pc) + (if_then_else (eq (cc0) (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "jump_over_one_insn_p (insn, operands[2])" + "cpse %0,%1") + +;;pppppppppppppppppppppppppppppppppppppppppppppppppppp +;;prologue/epilogue support instructions + +(define_insn "popqi" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (pre_inc:HI (reg:HI REG_SP))))] + "" + "pop %0" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +;; Enable Interrupts +(define_insn "enable_interrupt" + [(unspec [(const_int 0)] UNSPEC_SEI)] + "" + "sei" + [(set_attr "length" "1") + (set_attr "cc" "none") + ]) + +;; Disable Interrupts +(define_insn "disable_interrupt" + [(unspec [(const_int 0)] UNSPEC_CLI)] + "" + "cli" + [(set_attr "length" "1") + (set_attr "cc" "none") + ]) + +;; Library prologue saves +(define_insn "call_prologue_saves" + [(unspec_volatile:HI [(const_int 0)] UNSPECV_PROLOGUE_SAVES) + (match_operand:HI 0 "immediate_operand" "") + (set (reg:HI REG_SP) (minus:HI + (reg:HI REG_SP) + (match_operand:HI 1 "immediate_operand" ""))) + (use (reg:HI REG_X)) + (clobber (reg:HI REG_Z))] + "" + "ldi r30,lo8(gs(1f)) + ldi r31,hi8(gs(1f)) + %~jmp __prologue_saves__+((18 - %0) * 2) +1:" + [(set_attr_alternative "length" + [(if_then_else (eq_attr "mcu_mega" "yes") + (const_int 6) + (const_int 5))]) + (set_attr "cc" "clobber") + ]) + +; epilogue restores using library +(define_insn "epilogue_restores" + [(unspec_volatile:QI [(const_int 0)] UNSPECV_EPILOGUE_RESTORES) + (set (reg:HI REG_Y ) (plus:HI + (reg:HI REG_Y) + (match_operand:HI 0 "immediate_operand" ""))) + (set (reg:HI REG_SP) (reg:HI REG_Y)) + (clobber (reg:QI REG_Z))] + "" + "ldi r30, lo8(%0) + %~jmp __epilogue_restores__ + ((18 - %0) * 2)" + [(set_attr_alternative "length" + [(if_then_else (eq_attr "mcu_mega" "yes") + (const_int 3) + (const_int 2))]) + (set_attr "cc" "clobber") + ]) + +; return +(define_insn "return" + [(return)] + "reload_completed && avr_simple_epilogue ()" + "ret" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +(define_insn "return_from_epilogue" + [(return)] + "(reload_completed + && cfun->machine + && !(cfun->machine->is_interrupt || cfun->machine->is_signal) + && !cfun->machine->is_naked)" + "ret" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +(define_insn "return_from_interrupt_epilogue" + [(return)] + "(reload_completed + && cfun->machine + && (cfun->machine->is_interrupt || cfun->machine->is_signal) + && !cfun->machine->is_naked)" + "reti" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +(define_insn "return_from_naked_epilogue" + [(return)] + "(reload_completed + && cfun->machine + && cfun->machine->is_naked)" + "" + [(set_attr "cc" "none") + (set_attr "length" "0")]) + +(define_expand "prologue" + [(const_int 0)] + "" + " + { + expand_prologue (); + DONE; + }") + +(define_expand "epilogue" + [(const_int 0)] + "" + " + { + expand_epilogue (); + DONE; + }") diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt new file mode 100644 index 000000000..d9c3c0f27 --- /dev/null +++ b/gcc/config/avr/avr.opt @@ -0,0 +1,60 @@ +; Options for the ATMEL AVR port of the compiler. + +; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mcall-prologues +Target Report Mask(CALL_PROLOGUES) +Use subroutines for function prologues and epilogues + +mmcu= +Target RejectNegative Joined Var(avr_mcu_name) Init("avr2") +-mmcu=MCU Select the target MCU + +mdeb +Target Report Undocumented Mask(ALL_DEBUG) + +mint8 +Target Report Mask(INT8) +Use an 8-bit 'int' type + +mno-interrupts +Target Report RejectNegative Mask(NO_INTERRUPTS) +Change the stack pointer without disabling interrupts + +morder1 +Target Report Undocumented Mask(ORDER_1) + +morder2 +Target Report Undocumented Mask(ORDER_2) + +mshort-calls +Target Report Mask(SHORT_CALLS) +Use rjmp/rcall (limited range) on >8K devices + +mtiny-stack +Target Report Mask(TINY_STACK) +Change only the low 8 bits of the stack pointer + +mrelax +Target Report +Relax branches + +mpmem-wrap-around +Target Report +Make the linker relaxation machine assume that a program counter wrap-around occurs. diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md new file mode 100644 index 000000000..2ac8833bd --- /dev/null +++ b/gcc/config/avr/constraints.md @@ -0,0 +1,109 @@ +;; Constraint definitions for ATMEL AVR micro controllers. +;; Copyright (C) 2006, 2007 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints + +(define_register_constraint "t" "R0_REG" + "Temporary register r0") + +(define_register_constraint "b" "BASE_POINTER_REGS" + "Base pointer registers (r28--r31)") + +(define_register_constraint "e" "POINTER_REGS" + "Pointer registers (r26--r31)") + +(define_register_constraint "w" "ADDW_REGS" + "Registers from r24 to r31. These registers + can be used in @samp{adiw} command.") + +(define_register_constraint "d" "LD_REGS" + "Registers from r16 to r31.") + +(define_register_constraint "l" "NO_LD_REGS" + "Registers from r0 to r15.") + +(define_register_constraint "a" "SIMPLE_LD_REGS" + "Registers from r16 to r23.") + +(define_register_constraint "x" "POINTER_X_REGS" + "Register pair X (r27:r26).") + +(define_register_constraint "y" "POINTER_Y_REGS" + "Register pair Y (r29:r28).") + +(define_register_constraint "z" "POINTER_Z_REGS" + "Register pair Z (r31:r30).") + +(define_register_constraint "q" "STACK_REG" + "Stack pointer register (SPH:SPL).") + +(define_constraint "I" + "Integer constant in the range 0 @dots{} 63." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 63"))) + +(define_constraint "J" + "Integer constant in the range -63 @dots{} 0." + (and (match_code "const_int") + (match_test "ival <= 0 && ival >= -63"))) + +(define_constraint "K" + "Integer constant 2." + (and (match_code "const_int") + (match_test "ival == 2"))) + +(define_constraint "L" + "Zero." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "M" + "Integer constant in the range 0 @dots{} 0xff." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 0xff"))) + +(define_constraint "N" + "Constant integer @minus{}1." + (and (match_code "const_int") + (match_test "ival == -1"))) + +(define_constraint "O" + "Constant integer 8, 16, or 24." + (and (match_code "const_int") + (match_test "ival == 8 || ival == 16 || ival == 24"))) + +(define_constraint "P" + "Constant integer 1." + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "G" + "Constant float 0." + (and (match_code "const_double") + (match_test "op == CONST0_RTX (SFmode)"))) + +(define_constraint "R" + "Integer constant in the range -6 @dots{} 5." + (and (match_code "const_int") + (match_test "ival >= -6 && ival <= 5"))) + +(define_memory_constraint "Q" + "A memory address based on Y or Z pointer with displacement." + (and (match_code "mem") + (match_test "extra_constraint_Q (op)"))) diff --git a/gcc/config/avr/driver-avr.c b/gcc/config/avr/driver-avr.c new file mode 100755 index 000000000..6ab0bb822 --- /dev/null +++ b/gcc/config/avr/driver-avr.c @@ -0,0 +1,114 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2009, 2010 Free Software Foundation, Inc. + Contributed by Anatoly Sokolov + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* Current architecture. */ +const struct base_arch_s *avr_current_arch = NULL; + +/* Current device. */ +const struct mcu_type_s *avr_current_device = NULL; + +/* Initialize avr_current_arch and avr_current_device variables. */ + +static void +avr_set_current_device (const char *name) +{ + + if (NULL != avr_current_arch) + return; + + for (avr_current_device = avr_mcu_types; avr_current_device->name; + avr_current_device++) + { + if (strcmp (avr_current_device->name, name) == 0) + break; + } + + avr_current_arch = &avr_arch_types[avr_current_device->arch]; +} + +/* Returns command line parameters that describe the device architecture. */ + +const char * +avr_device_to_arch (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("-m ", avr_current_arch->arch_name, NULL); +} + +/* Returns command line parameters that describe start of date section. */ + +const char * +avr_device_to_data_start (int argc, const char **argv) +{ + unsigned long data_section_start; + char data_section_start_str[16]; + + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + if (avr_current_device->data_section_start + == avr_current_arch->default_data_section_start) + return NULL; + + data_section_start = 0x800000 + avr_current_device->data_section_start; + + snprintf (data_section_start_str, sizeof(data_section_start_str) - 1, + "0x%lX", data_section_start); + + return concat ("-Tdata ", data_section_start_str, NULL); +} + +/* Returns command line parameters that describe the device startfile. */ + +const char * +avr_device_to_startfiles (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("crt", avr_current_device->library_name, ".o%s", NULL); +} + +/* Returns command line parameters that describe the device library. */ + +const char * +avr_device_to_devicelib (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("-l", avr_current_device->library_name, NULL); +} + diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S new file mode 100644 index 000000000..ac8e5cd94 --- /dev/null +++ b/gcc/config/avr/libgcc.S @@ -0,0 +1,901 @@ +/* -*- Mode: Asm -*- */ +/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009 + Free Software Foundation, Inc. + Contributed by Denis Chertykov + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define __zero_reg__ r1 +#define __tmp_reg__ r0 +#define __SREG__ 0x3f +#define __SP_H__ 0x3e +#define __SP_L__ 0x3d +#define __RAMPZ__ 0x3B +#define __EIND__ 0x3C + +/* Most of the functions here are called directly from avr.md + patterns, instead of using the standard libcall mechanisms. + This can make better code because GCC knows exactly which + of the call-used registers (not all of them) are clobbered. */ + + .section .text.libgcc, "ax", @progbits + + .macro mov_l r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + movw \r_dest, \r_src +#else + mov \r_dest, \r_src +#endif + .endm + + .macro mov_h r_dest, r_src +#if defined (__AVR_HAVE_MOVW__) + ; empty +#else + mov \r_dest, \r_src +#endif + .endm + +/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ +#if !defined (__AVR_HAVE_MUL__) +/******************************************************* + Multiplication 8 x 8 +*******************************************************/ +#if defined (L_mulqi3) + +#define r_arg2 r22 /* multiplicand */ +#define r_arg1 r24 /* multiplier */ +#define r_res __tmp_reg__ /* result */ + + .global __mulqi3 + .func __mulqi3 +__mulqi3: + clr r_res ; clear result +__mulqi3_loop: + sbrc r_arg1,0 + add r_res,r_arg2 + add r_arg2,r_arg2 ; shift multiplicand + breq __mulqi3_exit ; while multiplicand != 0 + lsr r_arg1 ; + brne __mulqi3_loop ; exit if multiplier = 0 +__mulqi3_exit: + mov r_arg1,r_res ; result to return register + ret + +#undef r_arg2 +#undef r_arg1 +#undef r_res + +.endfunc +#endif /* defined (L_mulqi3) */ + +#if defined (L_mulqihi3) + .global __mulqihi3 + .func __mulqihi3 +__mulqihi3: + clr r25 + sbrc r24, 7 + dec r25 + clr r23 + sbrc r22, 7 + dec r22 + rjmp __mulhi3 + .endfunc +#endif /* defined (L_mulqihi3) */ + +#if defined (L_umulqihi3) + .global __umulqihi3 + .func __umulqihi3 +__umulqihi3: + clr r25 + clr r23 + rjmp __mulhi3 + .endfunc +#endif /* defined (L_umulqihi3) */ + +/******************************************************* + Multiplication 16 x 16 +*******************************************************/ +#if defined (L_mulhi3) +#define r_arg1L r24 /* multiplier Low */ +#define r_arg1H r25 /* multiplier High */ +#define r_arg2L r22 /* multiplicand Low */ +#define r_arg2H r23 /* multiplicand High */ +#define r_resL __tmp_reg__ /* result Low */ +#define r_resH r21 /* result High */ + + .global __mulhi3 + .func __mulhi3 +__mulhi3: + clr r_resH ; clear result + clr r_resL ; clear result +__mulhi3_loop: + sbrs r_arg1L,0 + rjmp __mulhi3_skip1 + add r_resL,r_arg2L ; result + multiplicand + adc r_resH,r_arg2H +__mulhi3_skip1: + add r_arg2L,r_arg2L ; shift multiplicand + adc r_arg2H,r_arg2H + + cp r_arg2L,__zero_reg__ + cpc r_arg2H,__zero_reg__ + breq __mulhi3_exit ; while multiplicand != 0 + + lsr r_arg1H ; gets LSB of multiplier + ror r_arg1L + sbiw r_arg1L,0 + brne __mulhi3_loop ; exit if multiplier = 0 +__mulhi3_exit: + mov r_arg1H,r_resH ; result to return register + mov r_arg1L,r_resL + ret + +#undef r_arg1L +#undef r_arg1H +#undef r_arg2L +#undef r_arg2H +#undef r_resL +#undef r_resH + +.endfunc +#endif /* defined (L_mulhi3) */ +#endif /* !defined (__AVR_HAVE_MUL__) */ + +#if defined (L_mulhisi3) + .global __mulhisi3 + .func __mulhisi3 +__mulhisi3: + mov_l r18, r24 + mov_h r19, r25 + clr r24 + sbrc r23, 7 + dec r24 + mov r25, r24 + clr r20 + sbrc r19, 7 + dec r20 + mov r21, r20 + rjmp __mulsi3 + .endfunc +#endif /* defined (L_mulhisi3) */ + +#if defined (L_umulhisi3) + .global __umulhisi3 + .func __umulhisi3 +__umulhisi3: + mov_l r18, r24 + mov_h r19, r25 + clr r24 + clr r25 + clr r20 + clr r21 + rjmp __mulsi3 + .endfunc +#endif /* defined (L_umulhisi3) */ + +#if defined (L_mulsi3) +/******************************************************* + Multiplication 32 x 32 +*******************************************************/ +#define r_arg1L r22 /* multiplier Low */ +#define r_arg1H r23 +#define r_arg1HL r24 +#define r_arg1HH r25 /* multiplier High */ + + +#define r_arg2L r18 /* multiplicand Low */ +#define r_arg2H r19 +#define r_arg2HL r20 +#define r_arg2HH r21 /* multiplicand High */ + +#define r_resL r26 /* result Low */ +#define r_resH r27 +#define r_resHL r30 +#define r_resHH r31 /* result High */ + + + .global __mulsi3 + .func __mulsi3 +__mulsi3: +#if defined (__AVR_HAVE_MUL__) + mul r_arg1L, r_arg2L + movw r_resL, r0 + mul r_arg1H, r_arg2H + movw r_resHL, r0 + mul r_arg1HL, r_arg2L + add r_resHL, r0 + adc r_resHH, r1 + mul r_arg1L, r_arg2HL + add r_resHL, r0 + adc r_resHH, r1 + mul r_arg1HH, r_arg2L + add r_resHH, r0 + mul r_arg1HL, r_arg2H + add r_resHH, r0 + mul r_arg1H, r_arg2HL + add r_resHH, r0 + mul r_arg1L, r_arg2HH + add r_resHH, r0 + clr r_arg1HH ; use instead of __zero_reg__ to add carry + mul r_arg1H, r_arg2L + add r_resH, r0 + adc r_resHL, r1 + adc r_resHH, r_arg1HH ; add carry + mul r_arg1L, r_arg2H + add r_resH, r0 + adc r_resHL, r1 + adc r_resHH, r_arg1HH ; add carry + movw r_arg1L, r_resL + movw r_arg1HL, r_resHL + clr r1 ; __zero_reg__ clobbered by "mul" + ret +#else + clr r_resHH ; clear result + clr r_resHL ; clear result + clr r_resH ; clear result + clr r_resL ; clear result +__mulsi3_loop: + sbrs r_arg1L,0 + rjmp __mulsi3_skip1 + add r_resL,r_arg2L ; result + multiplicand + adc r_resH,r_arg2H + adc r_resHL,r_arg2HL + adc r_resHH,r_arg2HH +__mulsi3_skip1: + add r_arg2L,r_arg2L ; shift multiplicand + adc r_arg2H,r_arg2H + adc r_arg2HL,r_arg2HL + adc r_arg2HH,r_arg2HH + + lsr r_arg1HH ; gets LSB of multiplier + ror r_arg1HL + ror r_arg1H + ror r_arg1L + brne __mulsi3_loop + sbiw r_arg1HL,0 + cpc r_arg1H,r_arg1L + brne __mulsi3_loop ; exit if multiplier = 0 +__mulsi3_exit: + mov_h r_arg1HH,r_resHH ; result to return register + mov_l r_arg1HL,r_resHL + mov_h r_arg1H,r_resH + mov_l r_arg1L,r_resL + ret +#endif /* defined (__AVR_HAVE_MUL__) */ +#undef r_arg1L +#undef r_arg1H +#undef r_arg1HL +#undef r_arg1HH + + +#undef r_arg2L +#undef r_arg2H +#undef r_arg2HL +#undef r_arg2HH + +#undef r_resL +#undef r_resH +#undef r_resHL +#undef r_resHH + +.endfunc +#endif /* defined (L_mulsi3) */ + +/******************************************************* + Division 8 / 8 => (result + remainder) +*******************************************************/ +#define r_rem r25 /* remainder */ +#define r_arg1 r24 /* dividend, quotient */ +#define r_arg2 r22 /* divisor */ +#define r_cnt r23 /* loop count */ + +#if defined (L_udivmodqi4) + .global __udivmodqi4 + .func __udivmodqi4 +__udivmodqi4: + sub r_rem,r_rem ; clear remainder and carry + ldi r_cnt,9 ; init loop counter + rjmp __udivmodqi4_ep ; jump to entry point +__udivmodqi4_loop: + rol r_rem ; shift dividend into remainder + cp r_rem,r_arg2 ; compare remainder & divisor + brcs __udivmodqi4_ep ; remainder <= divisor + sub r_rem,r_arg2 ; restore remainder +__udivmodqi4_ep: + rol r_arg1 ; shift dividend (with CARRY) + dec r_cnt ; decrement loop counter + brne __udivmodqi4_loop + com r_arg1 ; complement result + ; because C flag was complemented in loop + ret + .endfunc +#endif /* defined (L_udivmodqi4) */ + +#if defined (L_divmodqi4) + .global __divmodqi4 + .func __divmodqi4 +__divmodqi4: + bst r_arg1,7 ; store sign of dividend + mov __tmp_reg__,r_arg1 + eor __tmp_reg__,r_arg2; r0.7 is sign of result + sbrc r_arg1,7 + neg r_arg1 ; dividend negative : negate + sbrc r_arg2,7 + neg r_arg2 ; divisor negative : negate + rcall __udivmodqi4 ; do the unsigned div/mod + brtc __divmodqi4_1 + neg r_rem ; correct remainder sign +__divmodqi4_1: + sbrc __tmp_reg__,7 + neg r_arg1 ; correct result sign +__divmodqi4_exit: + ret + .endfunc +#endif /* defined (L_divmodqi4) */ + +#undef r_rem +#undef r_arg1 +#undef r_arg2 +#undef r_cnt + + +/******************************************************* + Division 16 / 16 => (result + remainder) +*******************************************************/ +#define r_remL r26 /* remainder Low */ +#define r_remH r27 /* remainder High */ + +/* return: remainder */ +#define r_arg1L r24 /* dividend Low */ +#define r_arg1H r25 /* dividend High */ + +/* return: quotient */ +#define r_arg2L r22 /* divisor Low */ +#define r_arg2H r23 /* divisor High */ + +#define r_cnt r21 /* loop count */ + +#if defined (L_udivmodhi4) + .global __udivmodhi4 + .func __udivmodhi4 +__udivmodhi4: + sub r_remL,r_remL + sub r_remH,r_remH ; clear remainder and carry + ldi r_cnt,17 ; init loop counter + rjmp __udivmodhi4_ep ; jump to entry point +__udivmodhi4_loop: + rol r_remL ; shift dividend into remainder + rol r_remH + cp r_remL,r_arg2L ; compare remainder & divisor + cpc r_remH,r_arg2H + brcs __udivmodhi4_ep ; remainder < divisor + sub r_remL,r_arg2L ; restore remainder + sbc r_remH,r_arg2H +__udivmodhi4_ep: + rol r_arg1L ; shift dividend (with CARRY) + rol r_arg1H + dec r_cnt ; decrement loop counter + brne __udivmodhi4_loop + com r_arg1L + com r_arg1H +; div/mod results to return registers, as for the div() function + mov_l r_arg2L, r_arg1L ; quotient + mov_h r_arg2H, r_arg1H + mov_l r_arg1L, r_remL ; remainder + mov_h r_arg1H, r_remH + ret + .endfunc +#endif /* defined (L_udivmodhi4) */ + +#if defined (L_divmodhi4) + .global __divmodhi4 + .func __divmodhi4 +__divmodhi4: + .global _div +_div: + bst r_arg1H,7 ; store sign of dividend + mov __tmp_reg__,r_arg1H + eor __tmp_reg__,r_arg2H ; r0.7 is sign of result + rcall __divmodhi4_neg1 ; dividend negative : negate + sbrc r_arg2H,7 + rcall __divmodhi4_neg2 ; divisor negative : negate + rcall __udivmodhi4 ; do the unsigned div/mod + rcall __divmodhi4_neg1 ; correct remainder sign + tst __tmp_reg__ + brpl __divmodhi4_exit +__divmodhi4_neg2: + com r_arg2H + neg r_arg2L ; correct divisor/result sign + sbci r_arg2H,0xff +__divmodhi4_exit: + ret +__divmodhi4_neg1: + brtc __divmodhi4_exit + com r_arg1H + neg r_arg1L ; correct dividend/remainder sign + sbci r_arg1H,0xff + ret + .endfunc +#endif /* defined (L_divmodhi4) */ + +#undef r_remH +#undef r_remL + +#undef r_arg1H +#undef r_arg1L + +#undef r_arg2H +#undef r_arg2L + +#undef r_cnt + +/******************************************************* + Division 32 / 32 => (result + remainder) +*******************************************************/ +#define r_remHH r31 /* remainder High */ +#define r_remHL r30 +#define r_remH r27 +#define r_remL r26 /* remainder Low */ + +/* return: remainder */ +#define r_arg1HH r25 /* dividend High */ +#define r_arg1HL r24 +#define r_arg1H r23 +#define r_arg1L r22 /* dividend Low */ + +/* return: quotient */ +#define r_arg2HH r21 /* divisor High */ +#define r_arg2HL r20 +#define r_arg2H r19 +#define r_arg2L r18 /* divisor Low */ + +#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ + +#if defined (L_udivmodsi4) + .global __udivmodsi4 + .func __udivmodsi4 +__udivmodsi4: + ldi r_remL, 33 ; init loop counter + mov r_cnt, r_remL + sub r_remL,r_remL + sub r_remH,r_remH ; clear remainder and carry + mov_l r_remHL, r_remL + mov_h r_remHH, r_remH + rjmp __udivmodsi4_ep ; jump to entry point +__udivmodsi4_loop: + rol r_remL ; shift dividend into remainder + rol r_remH + rol r_remHL + rol r_remHH + cp r_remL,r_arg2L ; compare remainder & divisor + cpc r_remH,r_arg2H + cpc r_remHL,r_arg2HL + cpc r_remHH,r_arg2HH + brcs __udivmodsi4_ep ; remainder <= divisor + sub r_remL,r_arg2L ; restore remainder + sbc r_remH,r_arg2H + sbc r_remHL,r_arg2HL + sbc r_remHH,r_arg2HH +__udivmodsi4_ep: + rol r_arg1L ; shift dividend (with CARRY) + rol r_arg1H + rol r_arg1HL + rol r_arg1HH + dec r_cnt ; decrement loop counter + brne __udivmodsi4_loop + ; __zero_reg__ now restored (r_cnt == 0) + com r_arg1L + com r_arg1H + com r_arg1HL + com r_arg1HH +; div/mod results to return registers, as for the ldiv() function + mov_l r_arg2L, r_arg1L ; quotient + mov_h r_arg2H, r_arg1H + mov_l r_arg2HL, r_arg1HL + mov_h r_arg2HH, r_arg1HH + mov_l r_arg1L, r_remL ; remainder + mov_h r_arg1H, r_remH + mov_l r_arg1HL, r_remHL + mov_h r_arg1HH, r_remHH + ret + .endfunc +#endif /* defined (L_udivmodsi4) */ + +#if defined (L_divmodsi4) + .global __divmodsi4 + .func __divmodsi4 +__divmodsi4: + bst r_arg1HH,7 ; store sign of dividend + mov __tmp_reg__,r_arg1HH + eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result + rcall __divmodsi4_neg1 ; dividend negative : negate + sbrc r_arg2HH,7 + rcall __divmodsi4_neg2 ; divisor negative : negate + rcall __udivmodsi4 ; do the unsigned div/mod + rcall __divmodsi4_neg1 ; correct remainder sign + rol __tmp_reg__ + brcc __divmodsi4_exit +__divmodsi4_neg2: + com r_arg2HH + com r_arg2HL + com r_arg2H + neg r_arg2L ; correct divisor/quotient sign + sbci r_arg2H,0xff + sbci r_arg2HL,0xff + sbci r_arg2HH,0xff +__divmodsi4_exit: + ret +__divmodsi4_neg1: + brtc __divmodsi4_exit + com r_arg1HH + com r_arg1HL + com r_arg1H + neg r_arg1L ; correct dividend/remainder sign + sbci r_arg1H, 0xff + sbci r_arg1HL,0xff + sbci r_arg1HH,0xff + ret + .endfunc +#endif /* defined (L_divmodsi4) */ + +/********************************** + * This is a prologue subroutine + **********************************/ +#if defined (L_prologue) + + .global __prologue_saves__ + .func __prologue_saves__ +__prologue_saves__: + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + push r16 + push r17 + push r28 + push r29 + in r28,__SP_L__ + in r29,__SP_H__ + sub r28,r26 + sbc r29,r27 + in __tmp_reg__,__SREG__ + cli + out __SP_H__,r29 + out __SREG__,__tmp_reg__ + out __SP_L__,r28 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +.endfunc +#endif /* defined (L_prologue) */ + +/* + * This is an epilogue subroutine + */ +#if defined (L_epilogue) + + .global __epilogue_restores__ + .func __epilogue_restores__ +__epilogue_restores__: + ldd r2,Y+18 + ldd r3,Y+17 + ldd r4,Y+16 + ldd r5,Y+15 + ldd r6,Y+14 + ldd r7,Y+13 + ldd r8,Y+12 + ldd r9,Y+11 + ldd r10,Y+10 + ldd r11,Y+9 + ldd r12,Y+8 + ldd r13,Y+7 + ldd r14,Y+6 + ldd r15,Y+5 + ldd r16,Y+4 + ldd r17,Y+3 + ldd r26,Y+2 + ldd r27,Y+1 + add r28,r30 + adc r29,__zero_reg__ + in __tmp_reg__,__SREG__ + cli + out __SP_H__,r29 + out __SREG__,__tmp_reg__ + out __SP_L__,r28 + mov_l r28, r26 + mov_h r29, r27 + ret +.endfunc +#endif /* defined (L_epilogue) */ + +#ifdef L_exit + .section .fini9,"ax",@progbits + .global _exit + .func _exit +_exit: + .weak exit +exit: + .endfunc + + /* Code from .fini8 ... .fini1 sections inserted by ld script. */ + + .section .fini0,"ax",@progbits + cli +__stop_program: + rjmp __stop_program +#endif /* defined (L_exit) */ + +#ifdef L_cleanup + .weak _cleanup + .func _cleanup +_cleanup: + ret +.endfunc +#endif /* defined (L_cleanup) */ + +#ifdef L_tablejump + .global __tablejump2__ + .func __tablejump2__ +__tablejump2__: + lsl r30 + rol r31 + .global __tablejump__ +__tablejump__: +#if defined (__AVR_HAVE_LPMX__) + lpm __tmp_reg__, Z+ + lpm r31, Z + mov r30, __tmp_reg__ +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +#else + lpm + adiw r30, 1 + push r0 + lpm + push r0 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + in __tmp_reg__, __EIND__ + push __tmp_reg__ +#endif + ret +#endif + .endfunc +#endif /* defined (L_tablejump) */ + +#ifdef L_copy_data + .section .init4,"ax",@progbits + .global __do_copy_data +__do_copy_data: +#if defined(__AVR_HAVE_ELPMX__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start) + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm r0, Z+ + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + ldi r16, hh8(__data_load_start - 0x10000) +.L__do_copy_data_carry: + inc r16 + out __RAMPZ__, r16 + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: + elpm + st X+, r0 + adiw r30, 1 + brcs .L__do_copy_data_carry +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) + ldi r17, hi8(__data_end) + ldi r26, lo8(__data_start) + ldi r27, hi8(__data_start) + ldi r30, lo8(__data_load_start) + ldi r31, hi8(__data_load_start) + rjmp .L__do_copy_data_start +.L__do_copy_data_loop: +#if defined (__AVR_HAVE_LPMX__) + lpm r0, Z+ +#else + lpm + adiw r30, 1 +#endif + st X+, r0 +.L__do_copy_data_start: + cpi r26, lo8(__data_end) + cpc r27, r17 + brne .L__do_copy_data_loop +#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ +#endif /* L_copy_data */ + +/* __do_clear_bss is only necessary if there is anything in .bss section. */ + +#ifdef L_clear_bss + .section .init4,"ax",@progbits + .global __do_clear_bss +__do_clear_bss: + ldi r17, hi8(__bss_end) + ldi r26, lo8(__bss_start) + ldi r27, hi8(__bss_start) + rjmp .do_clear_bss_start +.do_clear_bss_loop: + st X+, __zero_reg__ +.do_clear_bss_start: + cpi r26, lo8(__bss_end) + cpc r27, r17 + brne .do_clear_bss_loop +#endif /* L_clear_bss */ + +/* __do_global_ctors and __do_global_dtors are only necessary + if there are any constructors/destructors. */ + +#if defined (__AVR_HAVE_JMP_CALL__) +#define XCALL call +#else +#define XCALL rcall +#endif + +#ifdef L_ctors + .section .init6,"ax",@progbits + .global __do_global_ctors +#if defined(__AVR_HAVE_RAMPZ__) +__do_global_ctors: + ldi r17, hi8(__ctors_start) + ldi r28, lo8(__ctors_end) + ldi r29, hi8(__ctors_end) + ldi r16, hh8(__ctors_end) + rjmp .L__do_global_ctors_start +.L__do_global_ctors_loop: + sbiw r28, 2 + sbc r16, __zero_reg__ + mov_h r31, r29 + mov_l r30, r28 + out __RAMPZ__, r16 + XCALL __tablejump_elpm__ +.L__do_global_ctors_start: + cpi r28, lo8(__ctors_start) + cpc r29, r17 + ldi r24, hh8(__ctors_start) + cpc r16, r24 + brne .L__do_global_ctors_loop +#else +__do_global_ctors: + ldi r17, hi8(__ctors_start) + ldi r28, lo8(__ctors_end) + ldi r29, hi8(__ctors_end) + rjmp .L__do_global_ctors_start +.L__do_global_ctors_loop: + sbiw r28, 2 + mov_h r31, r29 + mov_l r30, r28 + XCALL __tablejump__ +.L__do_global_ctors_start: + cpi r28, lo8(__ctors_start) + cpc r29, r17 + brne .L__do_global_ctors_loop +#endif /* defined(__AVR_HAVE_RAMPZ__) */ +#endif /* L_ctors */ + +#ifdef L_dtors + .section .fini6,"ax",@progbits + .global __do_global_dtors +#if defined(__AVR_HAVE_RAMPZ__) +__do_global_dtors: + ldi r17, hi8(__dtors_end) + ldi r28, lo8(__dtors_start) + ldi r29, hi8(__dtors_start) + ldi r16, hh8(__dtors_start) + rjmp .L__do_global_dtors_start +.L__do_global_dtors_loop: + sbiw r28, 2 + sbc r16, __zero_reg__ + mov_h r31, r29 + mov_l r30, r28 + out __RAMPZ__, r16 + XCALL __tablejump_elpm__ +.L__do_global_dtors_start: + cpi r28, lo8(__dtors_end) + cpc r29, r17 + ldi r24, hh8(__dtors_end) + cpc r16, r24 + brne .L__do_global_dtors_loop +#else +__do_global_dtors: + ldi r17, hi8(__dtors_end) + ldi r28, lo8(__dtors_start) + ldi r29, hi8(__dtors_start) + rjmp .L__do_global_dtors_start +.L__do_global_dtors_loop: + mov_h r31, r29 + mov_l r30, r28 + XCALL __tablejump__ + adiw r28, 2 +.L__do_global_dtors_start: + cpi r28, lo8(__dtors_end) + cpc r29, r17 + brne .L__do_global_dtors_loop +#endif /* defined(__AVR_HAVE_RAMPZ__) */ +#endif /* L_dtors */ + +#ifdef L_tablejump_elpm + .global __tablejump_elpm__ + .func __tablejump_elpm__ +__tablejump_elpm__: +#if defined (__AVR_HAVE_ELPM__) +#if defined (__AVR_HAVE_LPMX__) + elpm __tmp_reg__, Z+ + elpm r31, Z + mov r30, __tmp_reg__ +#if defined (__AVR_HAVE_EIJMP_EICALL__) + eijmp +#else + ijmp +#endif + +#else + elpm + adiw r30, 1 + push r0 + elpm + push r0 +#if defined (__AVR_HAVE_EIJMP_EICALL__) + in __tmp_reg__, __EIND__ + push __tmp_reg__ +#endif + ret +#endif +#endif /* defined (__AVR_HAVE_ELPM__) */ + .endfunc +#endif /* defined (L_tablejump_elpm) */ + diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md new file mode 100755 index 000000000..9a3473bf8 --- /dev/null +++ b/gcc/config/avr/predicates.md @@ -0,0 +1,140 @@ +;; Predicate definitions for ATMEL AVR micro controllers. +;; Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Registers from r0 to r15. +(define_predicate "l_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) <= 15"))) + +;; Registers from r16 to r31. +(define_predicate "d_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) >= 16 && REGNO (op) <= 31"))) + +(define_predicate "even_register_operand" + (and (match_code "reg") + (and (match_test "REGNO (op) <= 31") + (match_test "(REGNO (op) & 1) == 0")))) + +(define_predicate "odd_register_operand" + (and (match_code "reg") + (and (match_test "REGNO (op) <= 31") + (match_test "(REGNO (op) & 1) != 0")))) + +;; SP register. +(define_predicate "stack_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_SP"))) + +;; Return true if OP is a valid address for lower half of I/O space. +(define_predicate "low_io_address_operand" + (and (match_code "const_int") + (match_test "IN_RANGE((INTVAL (op)), 0x20, 0x3F)"))) + +;; Return true if OP is a valid address for high half of I/O space. +(define_predicate "high_io_address_operand" + (and (match_code "const_int") + (match_test "IN_RANGE((INTVAL (op)), 0x40, 0x5F)"))) + +;; Return true if OP is a valid address of I/O space. +(define_predicate "io_address_operand" + (and (match_code "const_int") + (match_test "IN_RANGE((INTVAL (op)), 0x20, (0x60 - GET_MODE_SIZE(mode)))"))) + +;; Return 1 if OP is the zero constant for MODE. +(define_predicate "const0_operand" + (and (match_code "const_int,const_double") + (match_test "op == CONST0_RTX (mode)"))) + +;; Returns true if OP is either the constant zero or a register. +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const0_operand"))) + +;; Returns 1 if OP is a SYMBOL_REF. +(define_predicate "symbol_ref_operand" + (match_code "symbol_ref")) + +;; Return true if OP is a text segment reference. +;; This is needed for program memory address expressions. +(define_predicate "text_segment_operand" + (match_code "code_label,label_ref,symbol_ref,plus,const") +{ + switch (GET_CODE (op)) + { + case CODE_LABEL: + return true; + case LABEL_REF : + return true; + case SYMBOL_REF : + return SYMBOL_REF_FUNCTION_P (op); + case PLUS : + /* Assume canonical format of symbol + constant. + Fall through. */ + case CONST : + return text_segment_operand (XEXP (op, 0), VOIDmode); + default : + return false; + } +}) + +;; Return true if OP is a constant that contains only one 1 in its +;; binary representation. +(define_predicate "single_one_operand" + (and (match_code "const_int") + (match_test "exact_log2(INTVAL (op) & GET_MODE_MASK (mode)) >= 0"))) + +;; Return true if OP is a constant that contains only one 0 in its +;; binary representation. +(define_predicate "single_zero_operand" + (and (match_code "const_int") + (match_test "exact_log2(~INTVAL (op) & GET_MODE_MASK (mode)) >= 0"))) + +;; +(define_predicate "avr_sp_immediate_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= -6 && INTVAL (op) <= 5"))) + +;; True for EQ & NE +(define_predicate "eqne_operator" + (match_code "eq,ne")) + +;; True for GE & LT +(define_predicate "gelt_operator" + (match_code "ge,lt")) + +;; True for GT, GTU, LE & LEU +(define_predicate "difficult_comparison_operator" + (match_code "gt,gtu,le,leu")) + +;; False for GT, GTU, LE & LEU +(define_predicate "simple_comparison_operator" + (and (match_operand 0 "comparison_operator") + (not (match_code "gt,gtu,le,leu")))) + +;; Return true if OP is a valid call operand. +(define_predicate "call_insn_operand" + (and (match_code "mem") + (ior (match_test "register_operand (XEXP (op, 0), mode)") + (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))")))) + +;; True for register that is pseudo register. +(define_predicate "pseudo_register_operand" + (and (match_code "reg") + (match_test "!HARD_REGISTER_P (op)"))) diff --git a/gcc/config/avr/rtems.h b/gcc/config/avr/rtems.h new file mode 100644 index 000000000..efd8aface --- /dev/null +++ b/gcc/config/avr/rtems.h @@ -0,0 +1,28 @@ +/* Definitions for rtems targeting a AVR using ELF. + Copyright (C) 2004, 2007 Free Software Foundation, Inc. + Contributed by Ralf Corsepius (ralf.corsepius@rtems.org). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Specify predefined symbols in preprocessor. */ + +#define TARGET_OS_CPP_BUILTINS() \ +do { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ +} while (0) diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr new file mode 100644 index 000000000..18769ebb2 --- /dev/null +++ b/gcc/config/avr/t-avr @@ -0,0 +1,225 @@ +# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, +# 2009, 2010 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +driver-avr.o: $(srcdir)/config/avr/driver-avr.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +avr-devices.o: $(srcdir)/config/avr/avr-devices.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + +avr-c.o: $(srcdir)/config/avr/avr-c.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(C_COMMON_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + + + +LIB1ASMSRC = avr/libgcc.S +LIB1ASMFUNCS = \ + _mulqi3 \ + _mulhi3 \ + _mulsi3 \ + _udivmodqi4 \ + _divmodqi4 \ + _udivmodhi4 \ + _divmodhi4 \ + _udivmodsi4 \ + _divmodsi4 \ + _prologue \ + _epilogue \ + _exit \ + _cleanup \ + _tablejump \ + _tablejump_elpm \ + _copy_data \ + _clear_bss \ + _ctors \ + _dtors + +# We do not have the DF type. +# Most of the C functions in libgcc2 use almost all registers, +# so use -mcall-prologues for smaller code size. +TARGET_LIBGCC2_CFLAGS = -DDF=SF -Dinhibit_libc -mcall-prologues -Os + +fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/avr/t-avr + echo '#define FLOAT' > fp-bit.c + echo '#define FLOAT_ONLY' >> fp-bit.c + echo '#define CMPtype QItype' >> fp-bit.c + echo '#define DF SF' >> fp-bit.c + echo '#define DI SI' >> fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c + echo '#define SMALL_MACHINE' >> fp-bit.c + echo 'typedef int QItype __attribute__ ((mode (QI)));' >> fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +FPBIT = fp-bit.c + +MULTILIB_OPTIONS = mmcu=avr2/mmcu=avr25/mmcu=avr3/mmcu=avr31/mmcu=avr35/mmcu=avr4/mmcu=avr5/mmcu=avr51/mmcu=avr6 +MULTILIB_DIRNAMES = avr2 avr25 avr3 avr31 avr35 avr4 avr5 avr51 avr6 + +# The many avr2 matches are not listed here - this is the default. +MULTILIB_MATCHES = \ + mmcu?avr25=mmcu?ata6289 \ + mmcu?avr25=mmcu?attiny13 \ + mmcu?avr25=mmcu?attiny13a \ + mmcu?avr25=mmcu?attiny2313 \ + mmcu?avr25=mmcu?attiny2313a \ + mmcu?avr25=mmcu?attiny4313 \ + mmcu?avr25=mmcu?attiny24 \ + mmcu?avr25=mmcu?attiny24a \ + mmcu?avr25=mmcu?attiny44 \ + mmcu?avr25=mmcu?attiny44a \ + mmcu?avr25=mmcu?attiny84 \ + mmcu?avr25=mmcu?attiny84a \ + mmcu?avr25=mmcu?attiny25 \ + mmcu?avr25=mmcu?attiny45 \ + mmcu?avr25=mmcu?attiny85 \ + mmcu?avr25=mmcu?attiny261 \ + mmcu?avr25=mmcu?attiny261a \ + mmcu?avr25=mmcu?attiny461 \ + mmcu?avr25=mmcu?attiny461a \ + mmcu?avr25=mmcu?attiny861 \ + mmcu?avr25=mmcu?attiny861a \ + mmcu?avr25=mmcu?attiny43u \ + mmcu?avr25=mmcu?attiny87 \ + mmcu?avr25=mmcu?attiny48 \ + mmcu?avr25=mmcu?attiny88 \ + mmcu?avr25=mmcu?at86rf401 \ + mmcu?avr3=mmcu?at43usb355 \ + mmcu?avr3=mmcu?at76c711 \ + mmcu?avr31=mmcu?atmega103 \ + mmcu?avr31=mmcu?at43usb320 \ + mmcu?avr35=mmcu?at90usb82 \ + mmcu?avr35=mmcu?at90usb162 \ + mmcu?avr35=mmcu?atmega8u2 \ + mmcu?avr35=mmcu?atmega16u2 \ + mmcu?avr35=mmcu?atmega32u2 \ + mmcu?avr35=mmcu?attiny167 \ + mmcu?avr4=mmcu?atmega48 \ + mmcu?avr4=mmcu?atmega48a \ + mmcu?avr4=mmcu?atmega48p \ + mmcu?avr4=mmcu?atmega8 \ + mmcu?avr4=mmcu?atmega8515 \ + mmcu?avr4=mmcu?atmega8535 \ + mmcu?avr4=mmcu?atmega88 \ + mmcu?avr4=mmcu?atmega88a \ + mmcu?avr4=mmcu?atmega88p \ + mmcu?avr4=mmcu?atmega88pa \ + mmcu?avr4=mmcu?atmega8hva \ + mmcu?avr4=mmcu?at90pwm1 \ + mmcu?avr4=mmcu?at90pwm2 \ + mmcu?avr4=mmcu?at90pwm2b \ + mmcu?avr4=mmcu?at90pwm3 \ + mmcu?avr4=mmcu?at90pwm3b \ + mmcu?avr4=mmcu?at90pwm81 \ + mmcu?avr5=mmcu?atmega16 \ + mmcu?avr5=mmcu?atmega16a \ + mmcu?avr5=mmcu?atmega161 \ + mmcu?avr5=mmcu?atmega162 \ + mmcu?avr5=mmcu?atmega163 \ + mmcu?avr5=mmcu?atmega164a \ + mmcu?avr5=mmcu?atmega164p \ + mmcu?avr5=mmcu?atmega165 \ + mmcu?avr5=mmcu?atmega165a \ + mmcu?avr5=mmcu?atmega165p \ + mmcu?avr5=mmcu?atmega168 \ + mmcu?avr5=mmcu?atmega168a \ + mmcu?avr5=mmcu?atmega168p \ + mmcu?avr5=mmcu?atmega169 \ + mmcu?avr5=mmcu?atmega169a \ + mmcu?avr5=mmcu?atmega169p \ + mmcu?avr5=mmcu?atmega169pa \ + mmcu?avr5=mmcu?atmega32 \ + mmcu?avr5=mmcu?atmega323 \ + mmcu?avr5=mmcu?atmega324a \ + mmcu?avr5=mmcu?atmega324p \ + mmcu?avr5=mmcu?atmega324pa \ + mmcu?avr5=mmcu?atmega325 \ + mmcu?avr5=mmcu?atmega325a \ + mmcu?avr5=mmcu?atmega325p \ + mmcu?avr5=mmcu?atmega3250 \ + mmcu?avr5=mmcu?atmega3250a \ + mmcu?avr5=mmcu?atmega3250p \ + mmcu?avr5=mmcu?atmega328 \ + mmcu?avr5=mmcu?atmega328p \ + mmcu?avr5=mmcu?atmega329 \ + mmcu?avr5=mmcu?atmega329a \ + mmcu?avr5=mmcu?atmega329p \ + mmcu?avr5=mmcu?atmega329pa \ + mmcu?avr5=mmcu?atmega3290 \ + mmcu?avr5=mmcu?atmega3290a \ + mmcu?avr5=mmcu?atmega3290p \ + mmcu?avr5=mmcu?atmega406 \ + mmcu?avr5=mmcu?atmega64 \ + mmcu?avr5=mmcu?atmega640 \ + mmcu?avr5=mmcu?atmega644 \ + mmcu?avr5=mmcu?atmega644a \ + mmcu?avr5=mmcu?atmega644p \ + mmcu?avr5=mmcu?atmega644pa \ + mmcu?avr5=mmcu?atmega645 \ + mmcu?avr5=mmcu?atmega645a \ + mmcu?avr5=mmcu?atmega645p \ + mmcu?avr5=mmcu?atmega6450 \ + mmcu?avr5=mmcu?atmega6450a \ + mmcu?avr5=mmcu?atmega6450p \ + mmcu?avr5=mmcu?atmega649 \ + mmcu?avr5=mmcu?atmega649a \ + mmcu?avr5=mmcu?atmega649p \ + mmcu?avr5=mmcu?atmega6490 \ + mmcu?avr5=mmcu?atmega6490a \ + mmcu?avr5=mmcu?atmega6490p \ + mmcu?avr5=mmcu?atmega16hva \ + mmcu?avr5=mmcu?atmega16hva2 \ + mmcu?avr5=mmcu?atmega16hvb \ + mmcu?avr5=mmcu?atmega32hvb \ + mmcu?avr5=mmcu?atmega64hve \ + mmcu?avr5=mmcu?at90can32 \ + mmcu?avr5=mmcu?at90can64 \ + mmcu?avr5=mmcu?at90pwm216 \ + mmcu?avr5=mmcu?at90pwm316 \ + mmcu?avr5=mmcu?atmega32c1 \ + mmcu?avr5=mmcu?atmega64c1 \ + mmcu?avr5=mmcu?atmega16m1 \ + mmcu?avr5=mmcu?atmega32m1 \ + mmcu?avr5=mmcu?atmega64m1 \ + mmcu?avr5=mmcu?atmega16u4 \ + mmcu?avr5=mmcu?atmega32u4 \ + mmcu?avr5=mmcu?atmega32u6 \ + mmcu?avr5=mmcu?at90scr100 \ + mmcu?avr5=mmcu?at90usb646 \ + mmcu?avr5=mmcu?at90usb647 \ + mmcu?avr5=mmcu?at94k \ + mmcu?avr5=mmcu?m3000 \ + mmcu?avr51=mmcu?atmega128 \ + mmcu?avr51=mmcu?atmega1280 \ + mmcu?avr51=mmcu?atmega1281 \ + mmcu?avr51=mmcu?atmega1284p \ + mmcu?avr51=mmcu?atmega128rfa1 \ + mmcu?avr51=mmcu?at90can128 \ + mmcu?avr51=mmcu?at90usb1286 \ + mmcu?avr51=mmcu?at90usb1287 \ + mmcu?avr6=mmcu?atmega2560 \ + mmcu?avr6=mmcu?atmega2561 + +MULTILIB_EXCEPTIONS = + +LIBGCC = stmp-multilib +INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/avr/t-rtems b/gcc/config/avr/t-rtems new file mode 100644 index 000000000..a3ef8bd80 --- /dev/null +++ b/gcc/config/avr/t-rtems @@ -0,0 +1,3 @@ +# Multilibs for avr RTEMS targets. + +# ATM, this is just a stub diff --git a/gcc/config/bfin/bfin-modes.def b/gcc/config/bfin/bfin-modes.def new file mode 100644 index 000000000..27459cc13 --- /dev/null +++ b/gcc/config/bfin/bfin-modes.def @@ -0,0 +1,28 @@ +/* Definitions of target machine for GNU compiler, for Blackfin. + Copyright (C) 2005, 2007 Free Software Foundation, Inc. + Contributed by Analog Devices. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* PDImode for the 40-bit accumulators. */ +PARTIAL_INT_MODE (DI); + +/* Two of those - covering both accumulators for vector multiplications. */ +VECTOR_MODE (INT, PDI, 2); + +VECTOR_MODE (INT, HI, 2); /* V2HI */ +VECTOR_MODE (INT, SI, 2); /* V2SI - occasionally used. */ diff --git a/gcc/config/bfin/bfin-protos.h b/gcc/config/bfin/bfin-protos.h new file mode 100644 index 000000000..1e85e16ff --- /dev/null +++ b/gcc/config/bfin/bfin-protos.h @@ -0,0 +1,122 @@ +/* Prototypes for Blackfin functions used in the md file & elsewhere. + Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Function prototypes that cannot exist in bfin.h due to dependency + complications. */ +#ifndef GCC_BFIN_PROTOS_H +#define GCC_BFIN_PROTOS_H + +/* For the anomaly 05-00-0245 */ +#define WA_SPECULATIVE_LOADS 0x00000001 +#define ENABLE_WA_SPECULATIVE_LOADS \ + (bfin_workarounds & WA_SPECULATIVE_LOADS) + +/* For the anomaly 05-00-0244 */ +#define WA_SPECULATIVE_SYNCS 0x00000002 +#define ENABLE_WA_SPECULATIVE_SYNCS \ + (bfin_workarounds & WA_SPECULATIVE_SYNCS) + +/* For the anomaly 05-00-0371 */ +#define WA_RETS 0x00000004 +#define ENABLE_WA_RETS \ + (bfin_workarounds & WA_RETS) + +/* For the anomaly 05-00-0426 */ +#define WA_INDIRECT_CALLS 0x00000008 +#define ENABLE_WA_INDIRECT_CALLS \ + ((bfin_workarounds & WA_INDIRECT_CALLS) && !TARGET_ICPLB) + +#define WA_05000257 0x00000010 +#define ENABLE_WA_05000257 \ + (bfin_workarounds & WA_05000257) + +#define WA_05000283 0x00000020 +#define ENABLE_WA_05000283 \ + (bfin_workarounds & WA_05000283) + +#define WA_05000315 0x00000040 +#define ENABLE_WA_05000315 \ + (bfin_workarounds & WA_05000315) + +/* For the anomaly 05-00-0312 */ +#define WA_LOAD_LCREGS 0x00000080 +#define ENABLE_WA_LOAD_LCREGS \ + (bfin_workarounds & WA_LOAD_LCREGS) + +#define WA_05000074 0x00000100 +#define ENABLE_WA_05000074 \ + (bfin_workarounds & WA_05000074) + +#define Mmode enum machine_mode + +extern bool function_arg_regno_p (int); + +extern const char *output_load_immediate (rtx *); +extern const char *output_casesi_internal (rtx *); +extern char *bfin_asm_long (void); +extern char *bfin_asm_short (void); +extern int log2constp (unsigned HOST_WIDE_INT); + +extern bool bfin_legitimate_constant_p (rtx); +extern int hard_regno_mode_ok (int, Mmode); +extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx); +extern HOST_WIDE_INT bfin_initial_elimination_offset (int, int); + +extern int effective_address_32bit_p (rtx, Mmode); +extern int symbolic_reference_mentioned_p (rtx); +extern rtx bfin_gen_compare (rtx, Mmode); +extern bool expand_move (rtx *, Mmode); +extern void bfin_expand_call (rtx, rtx, rtx, rtx, int); +extern bool bfin_longcall_p (rtx, int); +extern bool bfin_dsp_memref_p (rtx); +extern bool bfin_expand_movmem (rtx, rtx, rtx, rtx); + +extern int bfin_register_move_cost (enum machine_mode, enum reg_class, + enum reg_class); +extern int bfin_memory_move_cost (enum machine_mode, enum reg_class, int in); +extern enum reg_class secondary_input_reload_class (enum reg_class, Mmode, + rtx); +extern enum reg_class secondary_output_reload_class (enum reg_class, Mmode, + rtx); +extern char *section_asm_op_1 (SECT_ENUM_T); +extern char *section_asm_op (SECT_ENUM_T); +extern void print_operand (FILE *, rtx, char); +extern void print_address_operand (FILE *, rtx); +extern void split_di (rtx [], int, rtx [], rtx []); +extern int split_load_immediate (rtx []); +extern void emit_pic_move (rtx *, Mmode); +extern void asm_conditional_branch (rtx, rtx *, int, int); +extern rtx bfin_gen_compare (rtx, Mmode); + +extern unsigned bfin_local_alignment (tree, unsigned); +extern rtx bfin_va_arg (tree, tree); + +extern void bfin_expand_prologue (void); +extern void bfin_expand_epilogue (int, int, bool); +extern int push_multiple_operation (rtx, Mmode); +extern int pop_multiple_operation (rtx, Mmode); +extern void output_push_multiple (rtx, rtx *); +extern void output_pop_multiple (rtx, rtx *); +extern int bfin_hard_regno_rename_ok (unsigned int, unsigned int); +extern rtx bfin_return_addr_rtx (int); +extern void bfin_hardware_loop (void); +#undef Mmode + +#endif + diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c new file mode 100644 index 000000000..60cd09eff --- /dev/null +++ b/gcc/config/bfin/bfin.c @@ -0,0 +1,6695 @@ +/* The Blackfin code generation auxiliary output file. + Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Analog Devices. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "insn-codes.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "tree.h" +#include "flags.h" +#include "except.h" +#include "function.h" +#include "input.h" +#include "target.h" +#include "target-def.h" +#include "expr.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "optabs.h" +#include "ggc.h" +#include "integrate.h" +#include "cgraph.h" +#include "langhooks.h" +#include "bfin-protos.h" +#include "tm-preds.h" +#include "tm-constrs.h" +#include "gt-bfin.h" +#include "basic-block.h" +#include "cfglayout.h" +#include "timevar.h" +#include "df.h" +#include "sel-sched.h" + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +struct GTY(()) machine_function +{ + /* Set if we are notified by the doloop pass that a hardware loop + was created. */ + int has_hardware_loops; + + /* Set if we create a memcpy pattern that uses loop registers. */ + int has_loopreg_clobber; +}; + +/* RTX for condition code flag register and RETS register */ +extern GTY(()) rtx bfin_cc_rtx; +extern GTY(()) rtx bfin_rets_rtx; +rtx bfin_cc_rtx, bfin_rets_rtx; + +int max_arg_registers = 0; + +/* Arrays used when emitting register names. */ +const char *short_reg_names[] = SHORT_REGISTER_NAMES; +const char *high_reg_names[] = HIGH_REGISTER_NAMES; +const char *dregs_pair_names[] = DREGS_PAIR_NAMES; +const char *byte_reg_names[] = BYTE_REGISTER_NAMES; + +static int arg_regs[] = FUNCTION_ARG_REGISTERS; +static int ret_regs[] = FUNCTION_RETURN_REGISTERS; + +/* Nonzero if -mshared-library-id was given. */ +static int bfin_lib_id_given; + +/* -mcpu support */ +bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN; + +/* -msi-revision support. There are three special values: + -1 -msi-revision=none. + 0xffff -msi-revision=any. */ +int bfin_si_revision; + +/* The workarounds enabled */ +unsigned int bfin_workarounds = 0; + +struct bfin_cpu +{ + const char *name; + bfin_cpu_t type; + int si_revision; + unsigned int workarounds; +}; + +struct bfin_cpu bfin_cpus[] = +{ + {"bf512", BFIN_CPU_BF512, 0x0000, + WA_SPECULATIVE_LOADS | WA_05000074}, + + {"bf514", BFIN_CPU_BF514, 0x0000, + WA_SPECULATIVE_LOADS | WA_05000074}, + + {"bf516", BFIN_CPU_BF516, 0x0000, + WA_SPECULATIVE_LOADS | WA_05000074}, + + {"bf518", BFIN_CPU_BF518, 0x0000, + WA_SPECULATIVE_LOADS | WA_05000074}, + + {"bf522", BFIN_CPU_BF522, 0x0002, + WA_SPECULATIVE_LOADS | WA_05000074}, + {"bf522", BFIN_CPU_BF522, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + {"bf522", BFIN_CPU_BF522, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + + {"bf523", BFIN_CPU_BF523, 0x0002, + WA_SPECULATIVE_LOADS | WA_05000074}, + {"bf523", BFIN_CPU_BF523, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + {"bf523", BFIN_CPU_BF523, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + + {"bf524", BFIN_CPU_BF524, 0x0002, + WA_SPECULATIVE_LOADS | WA_05000074}, + {"bf524", BFIN_CPU_BF524, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + {"bf524", BFIN_CPU_BF524, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + + {"bf525", BFIN_CPU_BF525, 0x0002, + WA_SPECULATIVE_LOADS | WA_05000074}, + {"bf525", BFIN_CPU_BF525, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + {"bf525", BFIN_CPU_BF525, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + + {"bf526", BFIN_CPU_BF526, 0x0002, + WA_SPECULATIVE_LOADS | WA_05000074}, + {"bf526", BFIN_CPU_BF526, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + {"bf526", BFIN_CPU_BF526, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + + {"bf527", BFIN_CPU_BF527, 0x0002, + WA_SPECULATIVE_LOADS | WA_05000074}, + {"bf527", BFIN_CPU_BF527, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + {"bf527", BFIN_CPU_BF527, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074}, + + {"bf531", BFIN_CPU_BF531, 0x0006, + WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074}, + {"bf531", BFIN_CPU_BF531, 0x0005, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 + | WA_LOAD_LCREGS | WA_05000074}, + {"bf531", BFIN_CPU_BF531, 0x0004, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf531", BFIN_CPU_BF531, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf532", BFIN_CPU_BF532, 0x0006, + WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074}, + {"bf532", BFIN_CPU_BF532, 0x0005, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 + | WA_LOAD_LCREGS | WA_05000074}, + {"bf532", BFIN_CPU_BF532, 0x0004, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf532", BFIN_CPU_BF532, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf533", BFIN_CPU_BF533, 0x0006, + WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074}, + {"bf533", BFIN_CPU_BF533, 0x0005, + WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 + | WA_LOAD_LCREGS | WA_05000074}, + {"bf533", BFIN_CPU_BF533, 0x0004, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf533", BFIN_CPU_BF533, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf534", BFIN_CPU_BF534, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074}, + {"bf534", BFIN_CPU_BF534, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf534", BFIN_CPU_BF534, 0x0001, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf536", BFIN_CPU_BF536, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074}, + {"bf536", BFIN_CPU_BF536, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf536", BFIN_CPU_BF536, 0x0001, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf537", BFIN_CPU_BF537, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074}, + {"bf537", BFIN_CPU_BF537, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf537", BFIN_CPU_BF537, 0x0001, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf538", BFIN_CPU_BF538, 0x0005, + WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074}, + {"bf538", BFIN_CPU_BF538, 0x0004, + WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074}, + {"bf538", BFIN_CPU_BF538, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS + | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074}, + {"bf538", BFIN_CPU_BF538, 0x0002, + WA_SPECULATIVE_LOADS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf539", BFIN_CPU_BF539, 0x0005, + WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074}, + {"bf539", BFIN_CPU_BF539, 0x0004, + WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074}, + {"bf539", BFIN_CPU_BF539, 0x0003, + WA_SPECULATIVE_LOADS | WA_RETS + | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074}, + {"bf539", BFIN_CPU_BF539, 0x0002, + WA_SPECULATIVE_LOADS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf542m", BFIN_CPU_BF542M, 0x0003, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + + {"bf542", BFIN_CPU_BF542, 0x0002, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf542", BFIN_CPU_BF542, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf542", BFIN_CPU_BF542, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf544m", BFIN_CPU_BF544M, 0x0003, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + + {"bf544", BFIN_CPU_BF544, 0x0002, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf544", BFIN_CPU_BF544, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf544", BFIN_CPU_BF544, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf547m", BFIN_CPU_BF547M, 0x0003, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + + {"bf547", BFIN_CPU_BF547, 0x0002, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf547", BFIN_CPU_BF547, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf547", BFIN_CPU_BF547, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf548m", BFIN_CPU_BF548M, 0x0003, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + + {"bf548", BFIN_CPU_BF548, 0x0002, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf548", BFIN_CPU_BF548, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf548", BFIN_CPU_BF548, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf549m", BFIN_CPU_BF549M, 0x0003, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + + {"bf549", BFIN_CPU_BF549, 0x0002, + WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf549", BFIN_CPU_BF549, 0x0001, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074}, + {"bf549", BFIN_CPU_BF549, 0x0000, + WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS + | WA_05000074}, + + {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS + | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074}, + {"bf561", BFIN_CPU_BF561, 0x0003, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + {"bf561", BFIN_CPU_BF561, 0x0002, + WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS + | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS + | WA_05000074}, + + {NULL, BFIN_CPU_UNKNOWN, 0, 0} +}; + +int splitting_for_sched, splitting_loops; + +static void +bfin_globalize_label (FILE *stream, const char *name) +{ + fputs (".global ", stream); + assemble_name (stream, name); + fputc (';',stream); + fputc ('\n',stream); +} + +static void +output_file_start (void) +{ + FILE *file = asm_out_file; + int i; + + fprintf (file, ".file \"%s\";\n", input_filename); + + for (i = 0; arg_regs[i] >= 0; i++) + ; + max_arg_registers = i; /* how many arg reg used */ +} + +/* Examine machine-dependent attributes of function type FUNTYPE and return its + type. See the definition of E_FUNKIND. */ + +static e_funkind +funkind (const_tree funtype) +{ + tree attrs = TYPE_ATTRIBUTES (funtype); + if (lookup_attribute ("interrupt_handler", attrs)) + return INTERRUPT_HANDLER; + else if (lookup_attribute ("exception_handler", attrs)) + return EXCPT_HANDLER; + else if (lookup_attribute ("nmi_handler", attrs)) + return NMI_HANDLER; + else + return SUBROUTINE; +} + +/* Legitimize PIC addresses. If the address is already position-independent, + we return ORIG. Newly generated position-independent addresses go into a + reg. This is REG if nonzero, otherwise we allocate register(s) as + necessary. PICREG is the register holding the pointer to the PIC offset + table. */ + +static rtx +legitimize_pic_address (rtx orig, rtx reg, rtx picreg) +{ + rtx addr = orig; + rtx new_rtx = orig; + + if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF) + { + int unspec; + rtx tmp; + + if (TARGET_ID_SHARED_LIBRARY) + unspec = UNSPEC_MOVE_PIC; + else if (GET_CODE (addr) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (addr)) + unspec = UNSPEC_FUNCDESC_GOT17M4; + else + unspec = UNSPEC_MOVE_FDPIC; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec); + new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp)); + + emit_move_insn (reg, new_rtx); + if (picreg == pic_offset_table_rtx) + crtl->uses_pic_offset_table = 1; + return reg; + } + + else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS) + { + rtx base; + + if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + gcc_assert (GET_CODE (addr) == PLUS); + } + + if (XEXP (addr, 0) == picreg) + return orig; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + base = legitimize_pic_address (XEXP (addr, 0), reg, picreg); + addr = legitimize_pic_address (XEXP (addr, 1), + base == reg ? NULL_RTX : reg, + picreg); + + if (GET_CODE (addr) == CONST_INT) + { + gcc_assert (! reload_in_progress && ! reload_completed); + addr = force_reg (Pmode, addr); + } + + if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1))) + { + base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0)); + addr = XEXP (addr, 1); + } + + return gen_rtx_PLUS (Pmode, base, addr); + } + + return new_rtx; +} + +/* Stack frame layout. */ + +/* For a given REGNO, determine whether it must be saved in the function + prologue. IS_INTHANDLER specifies whether we're generating a normal + prologue or an interrupt/exception one. */ +static bool +must_save_p (bool is_inthandler, unsigned regno) +{ + if (D_REGNO_P (regno)) + { + bool is_eh_return_reg = false; + if (crtl->calls_eh_return) + { + unsigned j; + for (j = 0; ; j++) + { + unsigned test = EH_RETURN_DATA_REGNO (j); + if (test == INVALID_REGNUM) + break; + if (test == regno) + is_eh_return_reg = true; + } + } + + return (is_eh_return_reg + || (df_regs_ever_live_p (regno) + && !fixed_regs[regno] + && (is_inthandler || !call_used_regs[regno]))); + } + else if (P_REGNO_P (regno)) + { + return ((df_regs_ever_live_p (regno) + && !fixed_regs[regno] + && (is_inthandler || !call_used_regs[regno])) + || (is_inthandler + && (ENABLE_WA_05000283 || ENABLE_WA_05000315) + && regno == REG_P5) + || (!TARGET_FDPIC + && regno == PIC_OFFSET_TABLE_REGNUM + && (crtl->uses_pic_offset_table + || (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf)))); + } + else + return ((is_inthandler || !call_used_regs[regno]) + && (df_regs_ever_live_p (regno) + || (!leaf_function_p () && call_used_regs[regno]))); + +} + +/* Compute the number of DREGS to save with a push_multiple operation. + This could include registers that aren't modified in the function, + since push_multiple only takes a range of registers. + If IS_INTHANDLER, then everything that is live must be saved, even + if normally call-clobbered. + If CONSECUTIVE, return the number of registers we can save in one + instruction with a push/pop multiple instruction. */ + +static int +n_dregs_to_save (bool is_inthandler, bool consecutive) +{ + int count = 0; + unsigned i; + + for (i = REG_R7 + 1; i-- != REG_R0;) + { + if (must_save_p (is_inthandler, i)) + count++; + else if (consecutive) + return count; + } + return count; +} + +/* Like n_dregs_to_save, but compute number of PREGS to save. */ + +static int +n_pregs_to_save (bool is_inthandler, bool consecutive) +{ + int count = 0; + unsigned i; + + for (i = REG_P5 + 1; i-- != REG_P0;) + if (must_save_p (is_inthandler, i)) + count++; + else if (consecutive) + return count; + return count; +} + +/* Determine if we are going to save the frame pointer in the prologue. */ + +static bool +must_save_fp_p (void) +{ + return df_regs_ever_live_p (REG_FP); +} + +/* Determine if we are going to save the RETS register. */ +static bool +must_save_rets_p (void) +{ + return df_regs_ever_live_p (REG_RETS); +} + +static bool +stack_frame_needed_p (void) +{ + /* EH return puts a new return address into the frame using an + address relative to the frame pointer. */ + if (crtl->calls_eh_return) + return true; + return frame_pointer_needed; +} + +/* Emit code to save registers in the prologue. SAVEALL is nonzero if we + must save all registers; this is used for interrupt handlers. + SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing + this for an interrupt (or exception) handler. */ + +static void +expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler) +{ + rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg); + rtx predec = gen_rtx_MEM (SImode, predec1); + int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false); + int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true); + int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true); + int dregno, pregno; + int total_consec = ndregs_consec + npregs_consec; + int i, d_to_save; + + if (saveall || is_inthandler) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT)); + + RTX_FRAME_RELATED_P (insn) = 1; + for (dregno = REG_LT0; dregno <= REG_LB1; dregno++) + if (! current_function_is_leaf + || cfun->machine->has_hardware_loops + || cfun->machine->has_loopreg_clobber + || (ENABLE_WA_05000257 + && (dregno == REG_LC0 || dregno == REG_LC1))) + { + insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (total_consec != 0) + { + rtx insn; + rtx val = GEN_INT (-total_consec * 4); + rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2)); + + XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val), + UNSPEC_PUSH_MULTIPLE); + XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg, + gen_rtx_PLUS (Pmode, + spreg, + val)); + RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1; + d_to_save = ndregs_consec; + dregno = REG_R7 + 1 - ndregs_consec; + pregno = REG_P5 + 1 - npregs_consec; + for (i = 0; i < total_consec; i++) + { + rtx memref = gen_rtx_MEM (word_mode, + gen_rtx_PLUS (Pmode, spreg, + GEN_INT (- i * 4 - 4))); + rtx subpat; + if (d_to_save > 0) + { + subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, + dregno++)); + d_to_save--; + } + else + { + subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, + pregno++)); + } + XVECEXP (pat, 0, i + 1) = subpat; + RTX_FRAME_RELATED_P (subpat) = 1; + } + insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + + for (dregno = REG_R0; ndregs != ndregs_consec; dregno++) + { + if (must_save_p (is_inthandler, dregno)) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno)); + RTX_FRAME_RELATED_P (insn) = 1; + ndregs--; + } + } + for (pregno = REG_P0; npregs != npregs_consec; pregno++) + { + if (must_save_p (is_inthandler, pregno)) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno)); + RTX_FRAME_RELATED_P (insn) = 1; + npregs--; + } + } + for (i = REG_P7 + 1; i < REG_CC; i++) + if (saveall + || (is_inthandler + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + { + rtx insn; + if (i == REG_A0 || i == REG_A1) + insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1), + gen_rtx_REG (PDImode, i)); + else + insn = emit_move_insn (predec, gen_rtx_REG (SImode, i)); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Emit code to restore registers in the epilogue. SAVEALL is nonzero if we + must save all registers; this is used for interrupt handlers. + SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing + this for an interrupt (or exception) handler. */ + +static void +expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler) +{ + rtx postinc1 = gen_rtx_POST_INC (SImode, spreg); + rtx postinc = gen_rtx_MEM (SImode, postinc1); + + int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false); + int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true); + int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true); + int total_consec = ndregs_consec + npregs_consec; + int i, regno; + rtx insn; + + /* A slightly crude technique to stop flow from trying to delete "dead" + insns. */ + MEM_VOLATILE_P (postinc) = 1; + + for (i = REG_CC - 1; i > REG_P7; i--) + if (saveall + || (is_inthandler + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + { + if (i == REG_A0 || i == REG_A1) + { + rtx mem = gen_rtx_MEM (PDImode, postinc1); + MEM_VOLATILE_P (mem) = 1; + emit_move_insn (gen_rtx_REG (PDImode, i), mem); + } + else + emit_move_insn (gen_rtx_REG (SImode, i), postinc); + } + + regno = REG_P5 - npregs_consec; + for (; npregs != npregs_consec; regno--) + { + if (must_save_p (is_inthandler, regno)) + { + emit_move_insn (gen_rtx_REG (word_mode, regno), postinc); + npregs--; + } + } + regno = REG_R7 - ndregs_consec; + for (; ndregs != ndregs_consec; regno--) + { + if (must_save_p (is_inthandler, regno)) + { + emit_move_insn (gen_rtx_REG (word_mode, regno), postinc); + ndregs--; + } + } + + if (total_consec != 0) + { + rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1)); + XVECEXP (pat, 0, 0) + = gen_rtx_SET (VOIDmode, spreg, + gen_rtx_PLUS (Pmode, spreg, + GEN_INT (total_consec * 4))); + + if (npregs_consec > 0) + regno = REG_P5 + 1; + else + regno = REG_R7 + 1; + + for (i = 0; i < total_consec; i++) + { + rtx addr = (i > 0 + ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4)) + : spreg); + rtx memref = gen_rtx_MEM (word_mode, addr); + + regno--; + XVECEXP (pat, 0, i + 1) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref); + + if (npregs_consec > 0) + { + if (--npregs_consec == 0) + regno = REG_R7 + 1; + } + } + + insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (saveall || is_inthandler) + { + for (regno = REG_LB1; regno >= REG_LT0; regno--) + if (! current_function_is_leaf + || cfun->machine->has_hardware_loops + || cfun->machine->has_loopreg_clobber + || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1))) + emit_move_insn (gen_rtx_REG (SImode, regno), postinc); + + emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc); + } +} + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. + + CUM is as above. + + MODE and TYPE are the mode and type of the current parameter. + + PRETEND_SIZE is a variable that should be set to the amount of stack + that must be pushed by the prolog to pretend that our caller pushed + it. + + Normally, this macro will push all remaining incoming registers on the + stack and set PRETEND_SIZE to the length of the registers pushed. + + Blackfin specific : + - VDSP C compiler manual (our ABI) says that a variable args function + should save the R0, R1 and R2 registers in the stack. + - The caller will always leave space on the stack for the + arguments that are passed in registers, so we dont have + to leave any extra space. + - now, the vastart pointer can access all arguments from the stack. */ + +static void +setup_incoming_varargs (CUMULATIVE_ARGS *cum, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED, int *pretend_size, + int no_rtl) +{ + rtx mem; + int i; + + if (no_rtl) + return; + + /* The move for named arguments will be generated automatically by the + compiler. We need to generate the move rtx for the unnamed arguments + if they are in the first 3 words. We assume at least 1 named argument + exists, so we never generate [ARGP] = R0 here. */ + + for (i = cum->words + 1; i < max_arg_registers; i++) + { + mem = gen_rtx_MEM (Pmode, + plus_constant (arg_pointer_rtx, (i * UNITS_PER_WORD))); + emit_move_insn (mem, gen_rtx_REG (Pmode, i)); + } + + *pretend_size = 0; +} + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may + be accessed via the stack pointer) in functions that seem suitable. */ + +static bool +bfin_frame_pointer_required (void) +{ + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + + if (fkind != SUBROUTINE) + return true; + + /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used, + so we have to override it for non-leaf functions. */ + if (TARGET_OMIT_LEAF_FRAME_POINTER && ! current_function_is_leaf) + return true; + + return false; +} + +/* Return the number of registers pushed during the prologue. */ + +static int +n_regs_saved_by_prologue (void) +{ + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + bool is_inthandler = fkind != SUBROUTINE; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE + || (is_inthandler && !current_function_is_leaf)); + int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false); + int n = ndregs + npregs; + int i; + + if (all || stack_frame_needed_p ()) + n += 2; + else + { + if (must_save_fp_p ()) + n++; + if (must_save_rets_p ()) + n++; + } + + if (fkind != SUBROUTINE || all) + { + /* Increment once for ASTAT. */ + n++; + if (! current_function_is_leaf + || cfun->machine->has_hardware_loops + || cfun->machine->has_loopreg_clobber) + { + n += 6; + } + } + + if (fkind != SUBROUTINE) + { + /* RETE/X/N. */ + if (lookup_attribute ("nesting", attrs)) + n++; + } + + for (i = REG_P7 + 1; i < REG_CC; i++) + if (all + || (fkind != SUBROUTINE + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + n += i == REG_A0 || i == REG_A1 ? 2 : 1; + + return n; +} + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. Frame pointer elimination is automatically handled. + + All other eliminations are valid. */ + +static bool +bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true); +} + +/* Return the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +HOST_WIDE_INT +bfin_initial_elimination_offset (int from, int to) +{ + HOST_WIDE_INT offset = 0; + + if (from == ARG_POINTER_REGNUM) + offset = n_regs_saved_by_prologue () * 4; + + if (to == STACK_POINTER_REGNUM) + { + if (crtl->outgoing_args_size >= FIXED_STACK_AREA) + offset += crtl->outgoing_args_size; + else if (crtl->outgoing_args_size) + offset += FIXED_STACK_AREA; + + offset += get_frame_size (); + } + + return offset; +} + +/* Emit code to load a constant CONSTANT into register REG; setting + RTX_FRAME_RELATED_P on all insns we generate if RELATED is true. + Make sure that the insns we generate need not be split. */ + +static void +frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related) +{ + rtx insn; + rtx cst = GEN_INT (constant); + + if (constant >= -32768 && constant < 65536) + insn = emit_move_insn (reg, cst); + else + { + /* We don't call split_load_immediate here, since dwarf2out.c can get + confused about some of the more clever sequences it can generate. */ + insn = emit_insn (gen_movsi_high (reg, cst)); + if (related) + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movsi_low (reg, reg, cst)); + } + if (related) + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Generate efficient code to add a value to a P register. + Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero. + EPILOGUE_P is zero if this function is called for prologue, + otherwise it's nonzero. And it's less than zero if this is for + sibcall epilogue. */ + +static void +add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p) +{ + if (value == 0) + return; + + /* Choose whether to use a sequence using a temporary register, or + a sequence with multiple adds. We can add a signed 7-bit value + in one instruction. */ + if (value > 120 || value < -120) + { + rtx tmpreg; + rtx tmpreg2; + rtx insn; + + tmpreg2 = NULL_RTX; + + /* For prologue or normal epilogue, P1 can be safely used + as the temporary register. For sibcall epilogue, we try to find + a call used P register, which will be restored in epilogue. + If we cannot find such a P register, we have to use one I register + to help us. */ + + if (epilogue_p >= 0) + tmpreg = gen_rtx_REG (SImode, REG_P1); + else + { + int i; + for (i = REG_P0; i <= REG_P5; i++) + if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) + || (!TARGET_FDPIC + && i == PIC_OFFSET_TABLE_REGNUM + && (crtl->uses_pic_offset_table + || (TARGET_ID_SHARED_LIBRARY + && ! current_function_is_leaf)))) + break; + if (i <= REG_P5) + tmpreg = gen_rtx_REG (SImode, i); + else + { + tmpreg = gen_rtx_REG (SImode, REG_P1); + tmpreg2 = gen_rtx_REG (SImode, REG_I0); + emit_move_insn (tmpreg2, tmpreg); + } + } + + if (frame) + frame_related_constant_load (tmpreg, value, TRUE); + else + insn = emit_move_insn (tmpreg, GEN_INT (value)); + + insn = emit_insn (gen_addsi3 (reg, reg, tmpreg)); + if (frame) + RTX_FRAME_RELATED_P (insn) = 1; + + if (tmpreg2 != NULL_RTX) + emit_move_insn (tmpreg, tmpreg2); + } + else + do + { + int size = value; + rtx insn; + + if (size > 60) + size = 60; + else if (size < -60) + /* We could use -62, but that would leave the stack unaligned, so + it's no good. */ + size = -60; + + insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); + if (frame) + RTX_FRAME_RELATED_P (insn) = 1; + value -= size; + } + while (value != 0); +} + +/* Generate a LINK insn for a frame sized FRAME_SIZE. If this constant + is too large, generate a sequence of insns that has the same effect. + SPREG contains (reg:SI REG_SP). */ + +static void +emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size) +{ + HOST_WIDE_INT link_size = frame_size; + rtx insn; + int i; + + if (link_size > 262140) + link_size = 262140; + + /* Use a LINK insn with as big a constant as possible, then subtract + any remaining size from the SP. */ + insn = emit_insn (gen_link (GEN_INT (-8 - link_size))); + RTX_FRAME_RELATED_P (insn) = 1; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx set = XVECEXP (PATTERN (insn), 0, i); + gcc_assert (GET_CODE (set) == SET); + RTX_FRAME_RELATED_P (set) = 1; + } + + frame_size -= link_size; + + if (frame_size > 0) + { + /* Must use a call-clobbered PREG that isn't the static chain. */ + rtx tmpreg = gen_rtx_REG (Pmode, REG_P1); + + frame_related_constant_load (tmpreg, -frame_size, TRUE); + insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg)); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Return the number of bytes we must reserve for outgoing arguments + in the current function's stack frame. */ + +static HOST_WIDE_INT +arg_area_size (void) +{ + if (crtl->outgoing_args_size) + { + if (crtl->outgoing_args_size >= FIXED_STACK_AREA) + return crtl->outgoing_args_size; + else + return FIXED_STACK_AREA; + } + return 0; +} + +/* Save RETS and FP, and allocate a stack frame. ALL is true if the + function must save all its registers (true only for certain interrupt + handlers). */ + +static void +do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all) +{ + frame_size += arg_area_size (); + + if (all + || stack_frame_needed_p () + || (must_save_rets_p () && must_save_fp_p ())) + emit_link_insn (spreg, frame_size); + else + { + if (must_save_rets_p ()) + { + rtx pat = gen_movsi (gen_rtx_MEM (Pmode, + gen_rtx_PRE_DEC (Pmode, spreg)), + bfin_rets_rtx); + rtx insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (must_save_fp_p ()) + { + rtx pat = gen_movsi (gen_rtx_MEM (Pmode, + gen_rtx_PRE_DEC (Pmode, spreg)), + gen_rtx_REG (Pmode, REG_FP)); + rtx insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + add_to_reg (spreg, -frame_size, 1, 0); + } +} + +/* Like do_link, but used for epilogues to deallocate the stack frame. + EPILOGUE_P is zero if this function is called for prologue, + otherwise it's nonzero. And it's less than zero if this is for + sibcall epilogue. */ + +static void +do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p) +{ + frame_size += arg_area_size (); + + if (stack_frame_needed_p ()) + emit_insn (gen_unlink ()); + else + { + rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg)); + + add_to_reg (spreg, frame_size, 0, epilogue_p); + if (all || must_save_fp_p ()) + { + rtx fpreg = gen_rtx_REG (Pmode, REG_FP); + emit_move_insn (fpreg, postinc); + emit_use (fpreg); + } + if (all || must_save_rets_p ()) + { + emit_move_insn (bfin_rets_rtx, postinc); + emit_use (bfin_rets_rtx); + } + } +} + +/* Generate a prologue suitable for a function of kind FKIND. This is + called for interrupt and exception handler prologues. + SPREG contains (reg:SI REG_SP). */ + +static void +expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all) +{ + HOST_WIDE_INT frame_size = get_frame_size (); + rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg); + rtx predec = gen_rtx_MEM (SImode, predec1); + rtx insn; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + tree kspisusp = lookup_attribute ("kspisusp", attrs); + + if (kspisusp) + { + insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* We need space on the stack in case we need to save the argument + registers. */ + if (fkind == EXCPT_HANDLER) + { + insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* If we're calling other functions, they won't save their call-clobbered + registers, so we must save everything here. */ + if (!current_function_is_leaf) + all = true; + expand_prologue_reg_save (spreg, all, true); + + if (ENABLE_WA_05000283 || ENABLE_WA_05000315) + { + rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode)); + rtx p5reg = gen_rtx_REG (Pmode, REG_P5); + emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx)); + emit_insn (gen_movsi_high (p5reg, chipid)); + emit_insn (gen_movsi_low (p5reg, p5reg, chipid)); + emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx)); + } + + if (lookup_attribute ("nesting", attrs)) + { + rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]); + insn = emit_move_insn (predec, srcreg); + RTX_FRAME_RELATED_P (insn) = 1; + } + + do_link (spreg, frame_size, all); + + if (fkind == EXCPT_HANDLER) + { + rtx r0reg = gen_rtx_REG (SImode, REG_R0); + rtx r1reg = gen_rtx_REG (SImode, REG_R1); + rtx r2reg = gen_rtx_REG (SImode, REG_R2); + + emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT)); + emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26))); + emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26))); + emit_move_insn (r1reg, spreg); + emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP)); + emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8))); + } +} + +/* Generate an epilogue suitable for a function of kind FKIND. This is + called for interrupt and exception handler epilogues. + SPREG contains (reg:SI REG_SP). */ + +static void +expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all) +{ + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + rtx postinc1 = gen_rtx_POST_INC (SImode, spreg); + rtx postinc = gen_rtx_MEM (SImode, postinc1); + + /* A slightly crude technique to stop flow from trying to delete "dead" + insns. */ + MEM_VOLATILE_P (postinc) = 1; + + do_unlink (spreg, get_frame_size (), all, 1); + + if (lookup_attribute ("nesting", attrs)) + { + rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]); + emit_move_insn (srcreg, postinc); + } + + /* If we're calling other functions, they won't save their call-clobbered + registers, so we must save (and restore) everything here. */ + if (!current_function_is_leaf) + all = true; + + expand_epilogue_reg_restore (spreg, all, true); + + /* Deallocate any space we left on the stack in case we needed to save the + argument registers. */ + if (fkind == EXCPT_HANDLER) + emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12))); + + emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind]))); +} + +/* Used while emitting the prologue to generate code to load the correct value + into the PIC register, which is passed in DEST. */ + +static rtx +bfin_load_pic_reg (rtx dest) +{ + struct cgraph_local_info *i = NULL; + rtx addr; + + i = cgraph_local_info (current_function_decl); + + /* Functions local to the translation unit don't need to reload the + pic reg, since the caller always passes a usable one. */ + if (i && i->local) + return pic_offset_table_rtx; + + if (bfin_lib_id_given) + addr = plus_constant (pic_offset_table_rtx, -4 - bfin_library_id * 4); + else + addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_LIBRARY_OFFSET)); + emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr))); + return dest; +} + +/* Generate RTL for the prologue of the current function. */ + +void +bfin_expand_prologue (void) +{ + HOST_WIDE_INT frame_size = get_frame_size (); + rtx spreg = gen_rtx_REG (Pmode, REG_SP); + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + rtx pic_reg_loaded = NULL_RTX; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; + + if (fkind != SUBROUTINE) + { + expand_interrupt_handler_prologue (spreg, fkind, all); + return; + } + + if (crtl->limit_stack + || (TARGET_STACK_CHECK_L1 + && !DECL_NO_LIMIT_STACK (current_function_decl))) + { + HOST_WIDE_INT offset + = bfin_initial_elimination_offset (ARG_POINTER_REGNUM, + STACK_POINTER_REGNUM); + rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX; + rtx p2reg = gen_rtx_REG (Pmode, REG_P2); + + if (!lim) + { + emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode)); + emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg)); + lim = p2reg; + } + if (GET_CODE (lim) == SYMBOL_REF) + { + if (TARGET_ID_SHARED_LIBRARY) + { + rtx p1reg = gen_rtx_REG (Pmode, REG_P1); + rtx val; + pic_reg_loaded = bfin_load_pic_reg (p2reg); + val = legitimize_pic_address (stack_limit_rtx, p1reg, + pic_reg_loaded); + emit_move_insn (p1reg, val); + frame_related_constant_load (p2reg, offset, FALSE); + emit_insn (gen_addsi3 (p2reg, p2reg, p1reg)); + lim = p2reg; + } + else + { + rtx limit = plus_constant (lim, offset); + emit_move_insn (p2reg, limit); + lim = p2reg; + } + } + else + { + if (lim != p2reg) + emit_move_insn (p2reg, lim); + add_to_reg (p2reg, offset, 0, 0); + lim = p2reg; + } + emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim)); + emit_insn (gen_trapifcc ()); + } + expand_prologue_reg_save (spreg, all, false); + + do_link (spreg, frame_size, all); + + if (TARGET_ID_SHARED_LIBRARY + && !TARGET_SEP_DATA + && (crtl->uses_pic_offset_table + || !current_function_is_leaf)) + bfin_load_pic_reg (pic_offset_table_rtx); +} + +/* Generate RTL for the epilogue of the current function. NEED_RETURN is zero + if this is for a sibcall. EH_RETURN is nonzero if we're expanding an + eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue, + false otherwise. */ + +void +bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p) +{ + rtx spreg = gen_rtx_REG (Pmode, REG_SP); + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + int e = sibcall_p ? -1 : 1; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; + + if (fkind != SUBROUTINE) + { + expand_interrupt_handler_epilogue (spreg, fkind, all); + return; + } + + do_unlink (spreg, get_frame_size (), all, e); + + expand_epilogue_reg_restore (spreg, all, false); + + /* Omit the return insn if this is for a sibcall. */ + if (! need_return) + return; + + if (eh_return) + emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2))); + + emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS))); +} + +/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ + +int +bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, + unsigned int new_reg) +{ + /* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ + + if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE + && !df_regs_ever_live_p (new_reg)) + return 0; + + return 1; +} + +/* Return the value of the return address for the frame COUNT steps up + from the current frame, after the prologue. + We punt for everything but the current frame by returning const0_rtx. */ + +rtx +bfin_return_addr_rtx (int count) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, REG_RETS); +} + +static rtx +bfin_delegitimize_address (rtx orig_x) +{ + rtx x = orig_x; + + if (GET_CODE (x) != MEM) + return orig_x; + + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC + && GET_CODE (XEXP (x, 0)) == REG + && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) + return XVECEXP (XEXP (x, 1), 0, 0); + + return orig_x; +} + +/* This predicate is used to compute the length of a load/store insn. + OP is a MEM rtx, we return nonzero if its addressing mode requires a + 32-bit instruction. */ + +int +effective_address_32bit_p (rtx op, enum machine_mode mode) +{ + HOST_WIDE_INT offset; + + mode = GET_MODE (op); + op = XEXP (op, 0); + + if (GET_CODE (op) != PLUS) + { + gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC + || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC); + return 0; + } + + if (GET_CODE (XEXP (op, 1)) == UNSPEC) + return 1; + + offset = INTVAL (XEXP (op, 1)); + + /* All byte loads use a 16-bit offset. */ + if (GET_MODE_SIZE (mode) == 1) + return 1; + + if (GET_MODE_SIZE (mode) == 4) + { + /* Frame pointer relative loads can use a negative offset, all others + are restricted to a small positive one. */ + if (XEXP (op, 0) == frame_pointer_rtx) + return offset < -128 || offset > 60; + return offset < 0 || offset > 60; + } + + /* Must be HImode now. */ + return offset < 0 || offset > 30; +} + +/* Returns true if X is a memory reference using an I register. */ +bool +bfin_dsp_memref_p (rtx x) +{ + if (! MEM_P (x)) + return false; + x = XEXP (x, 0); + if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC + || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC) + x = XEXP (x, 0); + return IREG_P (x); +} + +/* Return cost of the memory address ADDR. + All addressing modes are equally cheap on the Blackfin. */ + +static int +bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) +{ + return 1; +} + +/* Subroutine of print_operand; used to print a memory reference X to FILE. */ + +void +print_address_operand (FILE *file, rtx x) +{ + switch (GET_CODE (x)) + { + case PLUS: + output_address (XEXP (x, 0)); + fprintf (file, "+"); + output_address (XEXP (x, 1)); + break; + + case PRE_DEC: + fprintf (file, "--"); + output_address (XEXP (x, 0)); + break; + case POST_INC: + output_address (XEXP (x, 0)); + fprintf (file, "++"); + break; + case POST_DEC: + output_address (XEXP (x, 0)); + fprintf (file, "--"); + break; + + default: + gcc_assert (GET_CODE (x) != MEM); + print_operand (file, x, 0); + break; + } +} + +/* Adding intp DImode support by Tony + * -- Q: (low word) + * -- R: (high word) + */ + +void +print_operand (FILE *file, rtx x, char code) +{ + enum machine_mode mode; + + if (code == '!') + { + if (GET_MODE (current_output_insn) == SImode) + fprintf (file, " ||"); + else + fprintf (file, ";"); + return; + } + + mode = GET_MODE (x); + + switch (code) + { + case 'j': + switch (GET_CODE (x)) + { + case EQ: + fprintf (file, "e"); + break; + case NE: + fprintf (file, "ne"); + break; + case GT: + fprintf (file, "g"); + break; + case LT: + fprintf (file, "l"); + break; + case GE: + fprintf (file, "ge"); + break; + case LE: + fprintf (file, "le"); + break; + case GTU: + fprintf (file, "g"); + break; + case LTU: + fprintf (file, "l"); + break; + case GEU: + fprintf (file, "ge"); + break; + case LEU: + fprintf (file, "le"); + break; + default: + output_operand_lossage ("invalid %%j value"); + } + break; + + case 'J': /* reverse logic */ + switch (GET_CODE(x)) + { + case EQ: + fprintf (file, "ne"); + break; + case NE: + fprintf (file, "e"); + break; + case GT: + fprintf (file, "le"); + break; + case LT: + fprintf (file, "ge"); + break; + case GE: + fprintf (file, "l"); + break; + case LE: + fprintf (file, "g"); + break; + case GTU: + fprintf (file, "le"); + break; + case LTU: + fprintf (file, "ge"); + break; + case GEU: + fprintf (file, "l"); + break; + case LEU: + fprintf (file, "g"); + break; + default: + output_operand_lossage ("invalid %%J value"); + } + break; + + default: + switch (GET_CODE (x)) + { + case REG: + if (code == 'h') + { + if (REGNO (x) < 32) + fprintf (file, "%s", short_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'd') + { + if (REGNO (x) < 32) + fprintf (file, "%s", high_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'w') + { + if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1) + fprintf (file, "%s.w", reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'x') + { + if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1) + fprintf (file, "%s.x", reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'v') + { + if (REGNO (x) == REG_A0) + fprintf (file, "AV0"); + else if (REGNO (x) == REG_A1) + fprintf (file, "AV1"); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'D') + { + if (D_REGNO_P (REGNO (x))) + fprintf (file, "%s", dregs_pair_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'H') + { + if ((mode == DImode || mode == DFmode) && REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x) + 1]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'T') + { + if (D_REGNO_P (REGNO (x))) + fprintf (file, "%s", byte_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else + fprintf (file, "%s", reg_names[REGNO (x)]); + break; + + case MEM: + fputc ('[', file); + x = XEXP (x,0); + print_address_operand (file, x); + fputc (']', file); + break; + + case CONST_INT: + if (code == 'M') + { + switch (INTVAL (x)) + { + case MACFLAG_NONE: + break; + case MACFLAG_FU: + fputs ("(FU)", file); + break; + case MACFLAG_T: + fputs ("(T)", file); + break; + case MACFLAG_TFU: + fputs ("(TFU)", file); + break; + case MACFLAG_W32: + fputs ("(W32)", file); + break; + case MACFLAG_IS: + fputs ("(IS)", file); + break; + case MACFLAG_IU: + fputs ("(IU)", file); + break; + case MACFLAG_IH: + fputs ("(IH)", file); + break; + case MACFLAG_M: + fputs ("(M)", file); + break; + case MACFLAG_IS_M: + fputs ("(IS,M)", file); + break; + case MACFLAG_ISS2: + fputs ("(ISS2)", file); + break; + case MACFLAG_S2RND: + fputs ("(S2RND)", file); + break; + default: + gcc_unreachable (); + } + break; + } + else if (code == 'b') + { + if (INTVAL (x) == 0) + fputs ("+=", file); + else if (INTVAL (x) == 1) + fputs ("-=", file); + else + gcc_unreachable (); + break; + } + /* Moves to half registers with d or h modifiers always use unsigned + constants. */ + else if (code == 'd') + x = GEN_INT ((INTVAL (x) >> 16) & 0xffff); + else if (code == 'h') + x = GEN_INT (INTVAL (x) & 0xffff); + else if (code == 'N') + x = GEN_INT (-INTVAL (x)); + else if (code == 'X') + x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x))); + else if (code == 'Y') + x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x))); + else if (code == 'Z') + /* Used for LINK insns. */ + x = GEN_INT (-8 - INTVAL (x)); + + /* fall through */ + + case SYMBOL_REF: + output_addr_const (file, x); + break; + + case CONST_DOUBLE: + output_operand_lossage ("invalid const_double operand"); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + case UNSPEC_MOVE_PIC: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@GOT"); + break; + + case UNSPEC_MOVE_FDPIC: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@GOT17M4"); + break; + + case UNSPEC_FUNCDESC_GOT17M4: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@FUNCDESC_GOT17M4"); + break; + + case UNSPEC_LIBRARY_OFFSET: + fprintf (file, "_current_shared_library_p5_offset_"); + break; + + default: + gcc_unreachable (); + } + break; + + default: + output_addr_const (file, x); + } + } +} + +/* Argument support functions. */ + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. + VDSP C Compiler manual, our ABI says that + first 3 words of arguments will use R0, R1 and R2. +*/ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, + rtx libname ATTRIBUTE_UNUSED) +{ + static CUMULATIVE_ARGS zero_cum; + + *cum = zero_cum; + + /* Set up the number of registers to use for passing arguments. */ + + cum->nregs = max_arg_registers; + cum->arg_regs = arg_regs; + + cum->call_cookie = CALL_NORMAL; + /* Check for a longcall attribute. */ + if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))) + cum->call_cookie |= CALL_SHORT; + else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))) + cum->call_cookie |= CALL_LONG; + + return; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +bfin_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int count, bytes, words; + + bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + cum->words += words; + cum->nregs -= words; + + if (cum->nregs <= 0) + { + cum->nregs = 0; + cum->arg_regs = NULL; + } + else + { + for (count = 1; count <= words; count++) + cum->arg_regs++; + } + + return; +} + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ + +static rtx +bfin_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + int bytes + = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + + if (mode == VOIDmode) + /* Compute operand 2 of the call insn. */ + return GEN_INT (cum->call_cookie); + + if (bytes == -1) + return NULL_RTX; + + if (cum->nregs) + return gen_rtx_REG (mode, *(cum->arg_regs)); + + return NULL_RTX; +} + +/* For an arg passed partly in registers and partly in memory, + this is the number of bytes passed in registers. + For args passed entirely in registers or entirely in memory, zero. + + Refer VDSP C Compiler manual, our ABI. + First 3 words are in registers. So, if an argument is larger + than the registers available, it will span the register and + stack. */ + +static int +bfin_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + int bytes + = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + int bytes_left = cum->nregs * UNITS_PER_WORD; + + if (bytes == -1) + return 0; + + if (bytes_left == 0) + return 0; + if (bytes > bytes_left) + return bytes_left; + return 0; +} + +/* Variable sized types are passed by reference. */ + +static bool +bfin_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; +} + +/* Decide whether a type should be returned in memory (true) + or in a register (false). This is called by the macro + TARGET_RETURN_IN_MEMORY. */ + +static bool +bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + int size = int_size_in_bytes (type); + return size > 2 * UNITS_PER_WORD || size == -1; +} + +/* Register in which address to store a structure value + is passed to a function. */ +static rtx +bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, REG_P0); +} + +/* Return true when register may be used to pass function parameters. */ + +bool +function_arg_regno_p (int n) +{ + int i; + for (i = 0; arg_regs[i] != -1; i++) + if (n == arg_regs[i]) + return true; + return false; +} + +/* Returns 1 if OP contains a symbol reference */ + +int +symbolic_reference_mentioned_p (rtx op) +{ + register const char *fmt; + register int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return 1; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return 1; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return 1; + } + + return 0; +} + +/* Decide whether we can make a sibling call to a function. DECL is the + declaration of the function being targeted by the call and EXP is the + CALL_EXPR representing the call. */ + +static bool +bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, + tree exp ATTRIBUTE_UNUSED) +{ + struct cgraph_local_info *this_func, *called_func; + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + if (fkind != SUBROUTINE) + return false; + if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA) + return true; + + /* When compiling for ID shared libraries, can't sibcall a local function + from a non-local function, because the local function thinks it does + not need to reload P5 in the prologue, but the sibcall wil pop P5 in the + sibcall epilogue, and we end up with the wrong value in P5. */ + + if (!decl) + /* Not enough information. */ + return false; + + this_func = cgraph_local_info (current_function_decl); + called_func = cgraph_local_info (decl); + return !called_func->local || this_func->local; +} + +/* Write a template for a trampoline to F. */ + +static void +bfin_asm_trampoline_template (FILE *f) +{ + if (TARGET_FDPIC) + { + fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */ + fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */ + fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */ + fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */ + fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */ + fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */ + fprintf (f, "\t.dw\t0xac4b\n"); /* p3 = [p1 + 4] */ + fprintf (f, "\t.dw\t0x9149\n"); /* p1 = [p1] */ + fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/ + } + else + { + fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */ + fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */ + fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */ + fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */ + fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/ + } +} + +/* Emit RTL insns to initialize the variable parts of a trampoline at + M_TRAMP. FNDECL is the target function. CHAIN_VALUE is an RTX for + the static chain value for the function. */ + +static void +bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0)); + rtx t2 = copy_to_reg (chain_value); + rtx mem; + int i = 0; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + if (TARGET_FDPIC) + { + rtx a = force_reg (Pmode, plus_constant (XEXP (m_tramp, 0), 8)); + mem = adjust_address (m_tramp, Pmode, 0); + emit_move_insn (mem, a); + i = 8; + } + + mem = adjust_address (m_tramp, HImode, i + 2); + emit_move_insn (mem, gen_lowpart (HImode, t1)); + emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16))); + mem = adjust_address (m_tramp, HImode, i + 6); + emit_move_insn (mem, gen_lowpart (HImode, t1)); + + mem = adjust_address (m_tramp, HImode, i + 10); + emit_move_insn (mem, gen_lowpart (HImode, t2)); + emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16))); + mem = adjust_address (m_tramp, HImode, i + 14); + emit_move_insn (mem, gen_lowpart (HImode, t2)); +} + +/* Emit insns to move operands[1] into operands[0]. */ + +void +emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode); + + gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed)); + if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + else + operands[1] = legitimize_pic_address (operands[1], temp, + TARGET_FDPIC ? OUR_FDPIC_REG + : pic_offset_table_rtx); +} + +/* Expand a move operation in mode MODE. The operands are in OPERANDS. + Returns true if no further code must be generated, false if the caller + should generate an insn to move OPERANDS[1] to OPERANDS[0]. */ + +bool +expand_move (rtx *operands, enum machine_mode mode) +{ + rtx op = operands[1]; + if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC) + && SYMBOLIC_CONST (op)) + emit_pic_move (operands, mode); + else if (mode == SImode && GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + && !bfin_legitimate_constant_p (op)) + { + rtx dest = operands[0]; + rtx op0, op1; + gcc_assert (!reload_in_progress && !reload_completed); + op = XEXP (op, 0); + op0 = force_reg (mode, XEXP (op, 0)); + op1 = XEXP (op, 1); + if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode)) + op1 = force_reg (mode, op1); + if (GET_CODE (dest) == MEM) + dest = gen_reg_rtx (mode); + emit_insn (gen_addsi3 (dest, op0, op1)); + if (dest == operands[0]) + return true; + operands[1] = dest; + } + /* Don't generate memory->memory or constant->memory moves, go through a + register */ + else if ((reload_in_progress | reload_completed) == 0 + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) != REG) + operands[1] = force_reg (mode, operands[1]); + return false; +} + +/* Split one or more DImode RTL references into pairs of SImode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) +{ + while (num--) + { + rtx op = operands[num]; + + /* simplify_subreg refuse to split volatile memory addresses, + but we still have to handle it. */ + if (GET_CODE (op) == MEM) + { + lo_half[num] = adjust_address (op, SImode, 0); + hi_half[num] = adjust_address (op, SImode, 4); + } + else + { + lo_half[num] = simplify_gen_subreg (SImode, op, + GET_MODE (op) == VOIDmode + ? DImode : GET_MODE (op), 0); + hi_half[num] = simplify_gen_subreg (SImode, op, + GET_MODE (op) == VOIDmode + ? DImode : GET_MODE (op), 4); + } + } +} + +bool +bfin_longcall_p (rtx op, int call_cookie) +{ + gcc_assert (GET_CODE (op) == SYMBOL_REF); + if (SYMBOL_REF_WEAK (op)) + return 1; + if (call_cookie & CALL_SHORT) + return 0; + if (call_cookie & CALL_LONG) + return 1; + if (TARGET_LONG_CALLS) + return 1; + return 0; +} + +/* Expand a call instruction. FNADDR is the call target, RETVAL the return value. + COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args. + SIBCALL is nonzero if this is a sibling call. */ + +void +bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall) +{ + rtx use = NULL, call; + rtx callee = XEXP (fnaddr, 0); + int nelts = 3; + rtx pat; + rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO); + rtx retsreg = gen_rtx_REG (Pmode, REG_RETS); + int n; + + /* In an untyped call, we can get NULL for operand 2. */ + if (cookie == NULL_RTX) + cookie = const0_rtx; + + /* Static functions and indirect calls don't need the pic register. */ + if (!TARGET_FDPIC && flag_pic + && GET_CODE (callee) == SYMBOL_REF + && !SYMBOL_REF_LOCAL_P (callee)) + use_reg (&use, pic_offset_table_rtx); + + if (TARGET_FDPIC) + { + int caller_in_sram, callee_in_sram; + + /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram. */ + caller_in_sram = callee_in_sram = 0; + + if (lookup_attribute ("l1_text", + DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE) + caller_in_sram = 1; + else if (lookup_attribute ("l2", + DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE) + caller_in_sram = 2; + + if (GET_CODE (callee) == SYMBOL_REF + && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee))) + { + if (lookup_attribute + ("l1_text", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE) + callee_in_sram = 1; + else if (lookup_attribute + ("l2", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE) + callee_in_sram = 2; + } + + if (GET_CODE (callee) != SYMBOL_REF + || bfin_longcall_p (callee, INTVAL (cookie)) + || (GET_CODE (callee) == SYMBOL_REF + && !SYMBOL_REF_LOCAL_P (callee) + && TARGET_INLINE_PLT) + || caller_in_sram != callee_in_sram + || (caller_in_sram && callee_in_sram + && (GET_CODE (callee) != SYMBOL_REF + || !SYMBOL_REF_LOCAL_P (callee)))) + { + rtx addr = callee; + if (! address_operand (addr, Pmode)) + addr = force_reg (Pmode, addr); + + fnaddr = gen_reg_rtx (SImode); + emit_insn (gen_load_funcdescsi (fnaddr, addr)); + fnaddr = gen_rtx_MEM (Pmode, fnaddr); + + picreg = gen_reg_rtx (SImode); + emit_insn (gen_load_funcdescsi (picreg, + plus_constant (addr, 4))); + } + + nelts++; + } + else if ((!register_no_elim_operand (callee, Pmode) + && GET_CODE (callee) != SYMBOL_REF) + || (GET_CODE (callee) == SYMBOL_REF + && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY) + || bfin_longcall_p (callee, INTVAL (cookie))))) + { + callee = copy_to_mode_reg (Pmode, callee); + fnaddr = gen_rtx_MEM (Pmode, callee); + } + call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + + if (retval) + call = gen_rtx_SET (VOIDmode, retval, call); + + pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts)); + n = 0; + XVECEXP (pat, 0, n++) = call; + if (TARGET_FDPIC) + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg); + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie); + if (sibcall) + XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode); + else + XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg); + call = emit_call_insn (pat); + if (use) + CALL_INSN_FUNCTION_USAGE (call) = use; +} + +/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ + +int +hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* Allow only dregs to store value of mode HI or QI */ + enum reg_class rclass = REGNO_REG_CLASS (regno); + + if (mode == CCmode) + return 0; + + if (mode == V2HImode) + return D_REGNO_P (regno); + if (rclass == CCREGS) + return mode == BImode; + if (mode == PDImode || mode == V2PDImode) + return regno == REG_A0 || regno == REG_A1; + + /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes + up with a bad register class (such as ALL_REGS) for DImode. */ + if (mode == DImode) + return regno < REG_M3; + + if (mode == SImode + && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno)) + return 1; + + return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno); +} + +/* Implements target hook vector_mode_supported_p. */ + +static bool +bfin_vector_mode_supported_p (enum machine_mode mode) +{ + return mode == V2HImode; +} + +/* Return the cost of moving data from a register in class CLASS1 to + one in class CLASS2. A cost of 2 is the default. */ + +int +bfin_register_move_cost (enum machine_mode mode, + enum reg_class class1, enum reg_class class2) +{ + /* These need secondary reloads, so they're more expensive. */ + if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS)) + || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS))) + return 4; + + /* If optimizing for size, always prefer reg-reg over reg-memory moves. */ + if (optimize_size) + return 2; + + if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* Discourage trying to use the accumulators. */ + if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0) + || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1) + || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0) + || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1)) + return 20; + } + return 2; +} + +/* Return the cost of moving data of mode M between a + register and memory. A value of 2 is the default; this cost is + relative to those in `REGISTER_MOVE_COST'. + + ??? In theory L1 memory has single-cycle latency. We should add a switch + that tells the compiler whether we expect to use only L1 memory for the + program; it'll make the costs more accurate. */ + +int +bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + enum reg_class rclass, + int in ATTRIBUTE_UNUSED) +{ + /* Make memory accesses slightly more expensive than any register-register + move. Also, penalize non-DP registers, since they need secondary + reloads to load and store. */ + if (! reg_class_subset_p (rclass, DPREGS)) + return 10; + + return 8; +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch register. Return the class needed for the + scratch register. */ + +static reg_class_t +bfin_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + /* If we have HImode or QImode, we can only use DREGS as secondary registers; + in most other cases we can also use PREGS. */ + enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS; + enum reg_class x_class = NO_REGS; + enum rtx_code code = GET_CODE (x); + enum reg_class rclass = (enum reg_class) rclass_i; + + if (code == SUBREG) + x = SUBREG_REG (x), code = GET_CODE (x); + if (REG_P (x)) + { + int regno = REGNO (x); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = reg_renumber[regno]; + + if (regno == -1) + code = MEM; + else + x_class = REGNO_REG_CLASS (regno); + } + + /* We can be asked to reload (plus (FP) (large_constant)) into a DREG. + This happens as a side effect of register elimination, and we need + a scratch register to do it. */ + if (fp_plus_const_operand (x, mode)) + { + rtx op2 = XEXP (x, 1); + int large_constant_p = ! satisfies_constraint_Ks7 (op2); + + if (rclass == PREGS || rclass == PREGS_CLOBBERED) + return NO_REGS; + /* If destination is a DREG, we can do this without a scratch register + if the constant is valid for an add instruction. */ + if ((rclass == DREGS || rclass == DPREGS) + && ! large_constant_p) + return NO_REGS; + /* Reloading to anything other than a DREG? Use a PREG scratch + register. */ + sri->icode = CODE_FOR_reload_insi; + return NO_REGS; + } + + /* Data can usually be moved freely between registers of most classes. + AREGS are an exception; they can only move to or from another register + in AREGS or one in DREGS. They can also be assigned the constant 0. */ + if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS) + return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS + || rclass == ODD_AREGS + ? NO_REGS : DREGS); + + if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS) + { + if (code == MEM) + { + sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi; + return NO_REGS; + } + + if (x != const0_rtx && x_class != DREGS) + { + return DREGS; + } + else + return NO_REGS; + } + + /* CCREGS can only be moved from/to DREGS. */ + if (rclass == CCREGS && x_class != DREGS) + return DREGS; + if (x_class == CCREGS && rclass != DREGS) + return DREGS; + + /* All registers other than AREGS can load arbitrary constants. The only + case that remains is MEM. */ + if (code == MEM) + if (! reg_class_subset_p (rclass, default_class)) + return default_class; + + return NO_REGS; +} + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ + +static bool +bfin_class_likely_spilled_p (reg_class_t rclass) +{ + switch (rclass) + { + case PREGS_CLOBBERED: + case PROLOGUE_REGS: + case P0REGS: + case D0REGS: + case D1REGS: + case D2REGS: + case CCREGS: + return true; + + default: + break; + } + + return false; +} + +/* Implement TARGET_HANDLE_OPTION. */ + +static bool +bfin_handle_option (size_t code, const char *arg, int value) +{ + switch (code) + { + case OPT_mshared_library_id_: + if (value > MAX_LIBRARY_ID) + error ("-mshared-library-id=%s is not between 0 and %d", + arg, MAX_LIBRARY_ID); + bfin_lib_id_given = 1; + return true; + + case OPT_mcpu_: + { + const char *p, *q; + int i; + + i = 0; + while ((p = bfin_cpus[i].name) != NULL) + { + if (strncmp (arg, p, strlen (p)) == 0) + break; + i++; + } + + if (p == NULL) + { + error ("-mcpu=%s is not valid", arg); + return false; + } + + bfin_cpu_type = bfin_cpus[i].type; + + q = arg + strlen (p); + + if (*q == '\0') + { + bfin_si_revision = bfin_cpus[i].si_revision; + bfin_workarounds |= bfin_cpus[i].workarounds; + } + else if (strcmp (q, "-none") == 0) + bfin_si_revision = -1; + else if (strcmp (q, "-any") == 0) + { + bfin_si_revision = 0xffff; + while (bfin_cpus[i].type == bfin_cpu_type) + { + bfin_workarounds |= bfin_cpus[i].workarounds; + i++; + } + } + else + { + unsigned int si_major, si_minor; + int rev_len, n; + + rev_len = strlen (q); + + if (sscanf (q, "-%u.%u%n", &si_major, &si_minor, &n) != 2 + || n != rev_len + || si_major > 0xff || si_minor > 0xff) + { + invalid_silicon_revision: + error ("-mcpu=%s has invalid silicon revision", arg); + return false; + } + + bfin_si_revision = (si_major << 8) | si_minor; + + while (bfin_cpus[i].type == bfin_cpu_type + && bfin_cpus[i].si_revision != bfin_si_revision) + i++; + + if (bfin_cpus[i].type != bfin_cpu_type) + goto invalid_silicon_revision; + + bfin_workarounds |= bfin_cpus[i].workarounds; + } + + return true; + } + + default: + return true; + } +} + +static struct machine_function * +bfin_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Implement the TARGET_OPTION_OVERRIDE hook. */ + +static void +bfin_option_override (void) +{ + /* If processor type is not specified, enable all workarounds. */ + if (bfin_cpu_type == BFIN_CPU_UNKNOWN) + { + int i; + + for (i = 0; bfin_cpus[i].name != NULL; i++) + bfin_workarounds |= bfin_cpus[i].workarounds; + + bfin_si_revision = 0xffff; + } + + if (bfin_csync_anomaly == 1) + bfin_workarounds |= WA_SPECULATIVE_SYNCS; + else if (bfin_csync_anomaly == 0) + bfin_workarounds &= ~WA_SPECULATIVE_SYNCS; + + if (bfin_specld_anomaly == 1) + bfin_workarounds |= WA_SPECULATIVE_LOADS; + else if (bfin_specld_anomaly == 0) + bfin_workarounds &= ~WA_SPECULATIVE_LOADS; + + if (TARGET_OMIT_LEAF_FRAME_POINTER) + flag_omit_frame_pointer = 1; + + /* Library identification */ + if (bfin_lib_id_given && ! TARGET_ID_SHARED_LIBRARY) + error ("-mshared-library-id= specified without -mid-shared-library"); + + if (stack_limit_rtx && TARGET_STACK_CHECK_L1) + error ("can%'t use multiple stack checking methods together"); + + if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC) + error ("ID shared libraries and FD-PIC mode can%'t be used together"); + + /* Don't allow the user to specify -mid-shared-library and -msep-data + together, as it makes little sense from a user's point of view... */ + if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY) + error ("cannot specify both -msep-data and -mid-shared-library"); + /* ... internally, however, it's nearly the same. */ + if (TARGET_SEP_DATA) + target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY; + + if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0) + flag_pic = 1; + + /* There is no single unaligned SI op for PIC code. Sometimes we + need to use ".4byte" and sometimes we need to use ".picptr". + See bfin_assemble_integer for details. */ + if (TARGET_FDPIC) + targetm.asm_out.unaligned_op.si = 0; + + /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries, + since we don't support it and it'll just break. */ + if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY) + flag_pic = 0; + + if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561) + error ("-mmulticore can only be used with BF561"); + + if (TARGET_COREA && !TARGET_MULTICORE) + error ("-mcorea should be used with -mmulticore"); + + if (TARGET_COREB && !TARGET_MULTICORE) + error ("-mcoreb should be used with -mmulticore"); + + if (TARGET_COREA && TARGET_COREB) + error ("-mcorea and -mcoreb can%'t be used together"); + + flag_schedule_insns = 0; + + init_machine_status = bfin_init_machine_status; +} + +/* Return the destination address of BRANCH. + We need to use this instead of get_attr_length, because the + cbranch_with_nops pattern conservatively sets its length to 6, and + we still prefer to use shorter sequences. */ + +static int +branch_dest (rtx branch) +{ + rtx dest; + int dest_uid; + rtx pat = PATTERN (branch); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + dest = SET_SRC (pat); + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, 1); + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + return INSN_ADDRESSES (dest_uid); +} + +/* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates + it's a branch that's predicted taken. */ + +static int +cbranch_predicted_taken_p (rtx insn) +{ + rtx x = find_reg_note (insn, REG_BR_PROB, 0); + + if (x) + { + int pred_val = INTVAL (XEXP (x, 0)); + + return pred_val >= REG_BR_PROB_BASE / 2; + } + + return 0; +} + +/* Templates for use by asm_conditional_branch. */ + +static const char *ccbranch_templates[][3] = { + { "if !cc jump %3;", "if cc jump 4 (bp); jump.s %3;", "if cc jump 6 (bp); jump.l %3;" }, + { "if cc jump %3;", "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" }, + { "if !cc jump %3 (bp);", "if cc jump 4; jump.s %3;", "if cc jump 6; jump.l %3;" }, + { "if cc jump %3 (bp);", "if !cc jump 4; jump.s %3;", "if !cc jump 6; jump.l %3;" }, +}; + +/* Output INSN, which is a conditional branch instruction with operands + OPERANDS. + + We deal with the various forms of conditional branches that can be generated + by bfin_reorg to prevent the hardware from doing speculative loads, by + - emitting a sufficient number of nops, if N_NOPS is nonzero, or + - always emitting the branch as predicted taken, if PREDICT_TAKEN is true. + Either of these is only necessary if the branch is short, otherwise the + template we use ends in an unconditional jump which flushes the pipeline + anyway. */ + +void +asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken) +{ + int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + /* Note : offset for instructions like if cc jmp; jump.[sl] offset + is to be taken from start of if cc rather than jump. + Range for jump.s is (-4094, 4096) instead of (-4096, 4094) + */ + int len = (offset >= -1024 && offset <= 1022 ? 0 + : offset >= -4094 && offset <= 4096 ? 1 + : 2); + int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn); + int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT); + output_asm_insn (ccbranch_templates[idx][len], operands); + gcc_assert (n_nops == 0 || !bp); + if (len == 0) + while (n_nops-- > 0) + output_asm_insn ("nop;", NULL); +} + +/* Emit rtl for a comparison operation CMP in mode MODE. Operands have been + stored in bfin_compare_op0 and bfin_compare_op1 already. */ + +rtx +bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + enum rtx_code code1, code2; + rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1); + rtx tem = bfin_cc_rtx; + enum rtx_code code = GET_CODE (cmp); + + /* If we have a BImode input, then we already have a compare result, and + do not need to emit another comparison. */ + if (GET_MODE (op0) == BImode) + { + gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx); + tem = op0, code2 = code; + } + else + { + switch (code) { + /* bfin has these conditions */ + case EQ: + case LT: + case LE: + case LEU: + case LTU: + code1 = code; + code2 = NE; + break; + default: + code1 = reverse_condition (code); + code2 = EQ; + break; + } + emit_insn (gen_rtx_SET (VOIDmode, tem, + gen_rtx_fmt_ee (code1, BImode, op0, op1))); + } + + return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode)); +} + +/* Return nonzero iff C has exactly one bit set if it is interpreted + as a 32-bit constant. */ + +int +log2constp (unsigned HOST_WIDE_INT c) +{ + c &= 0xFFFFFFFF; + return c != 0 && (c & (c-1)) == 0; +} + +/* Returns the number of consecutive least significant zeros in the binary + representation of *V. + We modify *V to contain the original value arithmetically shifted right by + the number of zeroes. */ + +static int +shiftr_zero (HOST_WIDE_INT *v) +{ + unsigned HOST_WIDE_INT tmp = *v; + unsigned HOST_WIDE_INT sgn; + int n = 0; + + if (tmp == 0) + return 0; + + sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1)); + while ((tmp & 0x1) == 0 && n <= 32) + { + tmp = (tmp >> 1) | sgn; + n++; + } + *v = tmp; + return n; +} + +/* After reload, split the load of an immediate constant. OPERANDS are the + operands of the movsi_insn pattern which we are splitting. We return + nonzero if we emitted a sequence to load the constant, zero if we emitted + nothing because we want to use the splitter's default sequence. */ + +int +split_load_immediate (rtx operands[]) +{ + HOST_WIDE_INT val = INTVAL (operands[1]); + HOST_WIDE_INT tmp; + HOST_WIDE_INT shifted = val; + HOST_WIDE_INT shifted_compl = ~val; + int num_zero = shiftr_zero (&shifted); + int num_compl_zero = shiftr_zero (&shifted_compl); + unsigned int regno = REGNO (operands[0]); + + /* This case takes care of single-bit set/clear constants, which we could + also implement with BITSET/BITCLR. */ + if (num_zero + && shifted >= -32768 && shifted < 65536 + && (D_REGNO_P (regno) + || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2))) + { + emit_insn (gen_movsi (operands[0], GEN_INT (shifted))); + emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero))); + return 1; + } + + tmp = val & 0xFFFF; + tmp |= -(tmp & 0x8000); + + /* If high word has one bit set or clear, try to use a bit operation. */ + if (D_REGNO_P (regno)) + { + if (log2constp (val & 0xFFFF0000)) + { + emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF))); + emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000))); + return 1; + } + else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0) + { + emit_insn (gen_movsi (operands[0], GEN_INT (tmp))); + emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF))); + } + } + + if (D_REGNO_P (regno)) + { + if (tmp >= -64 && tmp <= 63) + { + emit_insn (gen_movsi (operands[0], GEN_INT (tmp))); + emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536))); + return 1; + } + + if ((val & 0xFFFF0000) == 0) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + emit_insn (gen_movsi_low (operands[0], operands[0], operands[1])); + return 1; + } + + if ((val & 0xFFFF0000) == 0xFFFF0000) + { + emit_insn (gen_movsi (operands[0], constm1_rtx)); + emit_insn (gen_movsi_low (operands[0], operands[0], operands[1])); + return 1; + } + } + + /* Need DREGs for the remaining case. */ + if (regno > REG_R7) + return 0; + + if (optimize_size + && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63) + { + /* If optimizing for size, generate a sequence that has more instructions + but is shorter. */ + emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl))); + emit_insn (gen_ashlsi3 (operands[0], operands[0], + GEN_INT (num_compl_zero))); + emit_insn (gen_one_cmplsi2 (operands[0], operands[0])); + return 1; + } + return 0; +} + +/* Return true if the legitimate memory address for a memory operand of mode + MODE. Return false if not. */ + +static bool +bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value) +{ + unsigned HOST_WIDE_INT v = value > 0 ? value : -value; + int sz = GET_MODE_SIZE (mode); + int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2; + /* The usual offsettable_memref machinery doesn't work so well for this + port, so we deal with the problem here. */ + if (value > 0 && sz == 8) + v += 4; + return (v & ~(0x7fff << shift)) == 0; +} + +static bool +bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode, + enum rtx_code outer_code) +{ + if (strict) + return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH); + else + return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH); +} + +/* Recognize an RTL expression that is a valid memory address for an + instruction. The MODE argument is the machine mode for the MEM expression + that wants to use this address. + + Blackfin addressing modes are as follows: + + [preg] + [preg + imm16] + + B [ Preg + uimm15 ] + W [ Preg + uimm16m2 ] + [ Preg + uimm17m4 ] + + [preg++] + [preg--] + [--sp] +*/ + +static bool +bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + switch (GET_CODE (x)) { + case REG: + if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM)) + return true; + break; + case PLUS: + if (REG_P (XEXP (x, 0)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS) + && ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode) + || (GET_CODE (XEXP (x, 1)) == CONST_INT + && bfin_valid_add (mode, INTVAL (XEXP (x, 1)))))) + return true; + break; + case POST_INC: + case POST_DEC: + if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode) + && REG_P (XEXP (x, 0)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC)) + return true; + case PRE_DEC: + if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode) + && XEXP (x, 0) == stack_pointer_rtx + && REG_P (XEXP (x, 0)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC)) + return true; + break; + default: + break; + } + return false; +} + +/* Decide whether we can force certain constants to memory. If we + decide we can't, the caller should be able to cope with it in + another way. */ + +static bool +bfin_cannot_force_const_mem (rtx x ATTRIBUTE_UNUSED) +{ + /* We have only one class of non-legitimate constants, and our movsi + expander knows how to handle them. Dropping these constants into the + data section would only shift the problem - we'd still get relocs + outside the object, in the data section rather than the text section. */ + return true; +} + +/* Ensure that for any constant of the form symbol + offset, the offset + remains within the object. Any other constants are ok. + This ensures that flat binaries never have to deal with relocations + crossing section boundaries. */ + +bool +bfin_legitimate_constant_p (rtx x) +{ + rtx sym; + HOST_WIDE_INT offset; + + if (GET_CODE (x) != CONST) + return true; + + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS); + + sym = XEXP (x, 0); + x = XEXP (x, 1); + if (GET_CODE (sym) != SYMBOL_REF + || GET_CODE (x) != CONST_INT) + return true; + offset = INTVAL (x); + + if (SYMBOL_REF_DECL (sym) == 0) + return true; + if (offset < 0 + || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym)))) + return false; + + return true; +} + +static bool +bfin_rtx_costs (rtx x, int code_i, int outer_code_i, int *total, bool speed) +{ + enum rtx_code code = (enum rtx_code) code_i; + enum rtx_code outer_code = (enum rtx_code) outer_code_i; + int cost2 = COSTS_N_INSNS (1); + rtx op0, op1; + + switch (code) + { + case CONST_INT: + if (outer_code == SET || outer_code == PLUS) + *total = satisfies_constraint_Ks7 (x) ? 0 : cost2; + else if (outer_code == AND) + *total = log2constp (~INTVAL (x)) ? 0 : cost2; + else if (outer_code == LE || outer_code == LT || outer_code == EQ) + *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2; + else if (outer_code == LEU || outer_code == LTU) + *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2; + else if (outer_code == MULT) + *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2; + else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2)) + *total = 0; + else if (outer_code == ASHIFT || outer_code == ASHIFTRT + || outer_code == LSHIFTRT) + *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2; + else if (outer_code == IOR || outer_code == XOR) + *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2; + else + *total = cost2; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + case CONST_DOUBLE: + *total = COSTS_N_INSNS (2); + return true; + + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_MODE (x) == SImode) + { + if (GET_CODE (op0) == MULT + && GET_CODE (XEXP (op0, 1)) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (XEXP (op0, 1)); + if (val == 2 || val == 4) + { + *total = cost2; + *total += rtx_cost (XEXP (op0, 0), outer_code, speed); + *total += rtx_cost (op1, outer_code, speed); + return true; + } + } + *total = cost2; + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, SET, speed); +#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer + towards creating too many induction variables. */ + if (!reg_or_7bit_operand (op1, SImode)) + *total += rtx_cost (op1, SET, speed); +#endif + } + else if (GET_MODE (x) == DImode) + { + *total = 6 * cost2; + if (GET_CODE (op1) != CONST_INT + || !satisfies_constraint_Ks7 (op1)) + *total += rtx_cost (op1, PLUS, speed); + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, PLUS, speed); + } + return true; + + case MINUS: + if (GET_MODE (x) == DImode) + *total = 6 * cost2; + else + *total = cost2; + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (GET_MODE (x) == DImode) + *total = 6 * cost2; + else + *total = cost2; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, code, speed); + + return true; + + case IOR: + case AND: + case XOR: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */ + if (code == IOR) + { + if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) + || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND) + || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) + || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT)) + { + *total = cost2; + return true; + } + } + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, code, speed); + + if (GET_MODE (x) == DImode) + { + *total = 2 * cost2; + return true; + } + *total = cost2; + if (GET_MODE (x) != SImode) + return true; + + if (code == AND) + { + if (! rhs_andsi3_operand (XEXP (x, 1), SImode)) + *total += rtx_cost (XEXP (x, 1), code, speed); + } + else + { + if (! regorlog2_operand (XEXP (x, 1), SImode)) + *total += rtx_cost (XEXP (x, 1), code, speed); + } + + return true; + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + if (outer_code == SET + && XEXP (x, 1) == const1_rtx + && GET_CODE (XEXP (x, 2)) == CONST_INT) + { + *total = 2 * cost2; + return true; + } + /* fall through */ + + case SIGN_EXTEND: + case ZERO_EXTEND: + *total = cost2; + return true; + + case MULT: + { + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_CODE (op0) == GET_CODE (op1) + && (GET_CODE (op0) == ZERO_EXTEND + || GET_CODE (op0) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (1); + op0 = XEXP (op0, 0); + op1 = XEXP (op1, 0); + } + else if (!speed) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (3); + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, MULT, speed); + if (GET_CODE (op1) != REG + && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG)) + *total += rtx_cost (op1, MULT, speed); + } + return true; + + case UDIV: + case UMOD: + *total = COSTS_N_INSNS (32); + return true; + + case VEC_CONCAT: + case VEC_SELECT: + if (outer_code == SET) + *total = cost2; + return true; + + default: + return false; + } +} + +/* Used for communication between {push,pop}_multiple_operation (which + we use not only as a predicate) and the corresponding output functions. */ +static int first_preg_to_save, first_dreg_to_save; +static int n_regs_to_save; + +int +push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + int lastdreg = 8, lastpreg = 6; + int i, group; + + first_preg_to_save = lastpreg; + first_dreg_to_save = lastdreg; + for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++) + { + rtx t = XVECEXP (op, 0, i); + rtx src, dest; + int regno; + + if (GET_CODE (t) != SET) + return 0; + + src = SET_SRC (t); + dest = SET_DEST (t); + if (GET_CODE (dest) != MEM || ! REG_P (src)) + return 0; + dest = XEXP (dest, 0); + if (GET_CODE (dest) != PLUS + || ! REG_P (XEXP (dest, 0)) + || REGNO (XEXP (dest, 0)) != REG_SP + || GET_CODE (XEXP (dest, 1)) != CONST_INT + || INTVAL (XEXP (dest, 1)) != -i * 4) + return 0; + + regno = REGNO (src); + if (group == 0) + { + if (D_REGNO_P (regno)) + { + group = 1; + first_dreg_to_save = lastdreg = regno - REG_R0; + } + else if (regno >= REG_P0 && regno <= REG_P7) + { + group = 2; + first_preg_to_save = lastpreg = regno - REG_P0; + } + else + return 0; + + continue; + } + + if (group == 1) + { + if (regno >= REG_P0 && regno <= REG_P7) + { + group = 2; + first_preg_to_save = lastpreg = regno - REG_P0; + } + else if (regno != REG_R0 + lastdreg + 1) + return 0; + else + lastdreg++; + } + else if (group == 2) + { + if (regno != REG_P0 + lastpreg + 1) + return 0; + lastpreg++; + } + } + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; + return 1; +} + +int +pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + int lastdreg = 8, lastpreg = 6; + int i, group; + + for (i = 1, group = 0; i < XVECLEN (op, 0); i++) + { + rtx t = XVECEXP (op, 0, i); + rtx src, dest; + int regno; + + if (GET_CODE (t) != SET) + return 0; + + src = SET_SRC (t); + dest = SET_DEST (t); + if (GET_CODE (src) != MEM || ! REG_P (dest)) + return 0; + src = XEXP (src, 0); + + if (i == 1) + { + if (! REG_P (src) || REGNO (src) != REG_SP) + return 0; + } + else if (GET_CODE (src) != PLUS + || ! REG_P (XEXP (src, 0)) + || REGNO (XEXP (src, 0)) != REG_SP + || GET_CODE (XEXP (src, 1)) != CONST_INT + || INTVAL (XEXP (src, 1)) != (i - 1) * 4) + return 0; + + regno = REGNO (dest); + if (group == 0) + { + if (regno == REG_R7) + { + group = 1; + lastdreg = 7; + } + else if (regno != REG_P0 + lastpreg - 1) + return 0; + else + lastpreg--; + } + else if (group == 1) + { + if (regno != REG_R0 + lastdreg - 1) + return 0; + else + lastdreg--; + } + } + first_dreg_to_save = lastdreg; + first_preg_to_save = lastpreg; + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; + return 1; +} + +/* Emit assembly code for one multi-register push described by INSN, with + operands in OPERANDS. */ + +void +output_push_multiple (rtx insn, rtx *operands) +{ + char buf[80]; + int ok; + + /* Validate the insn again, and compute first_[dp]reg_to_save. */ + ok = push_multiple_operation (PATTERN (insn), VOIDmode); + gcc_assert (ok); + + if (first_dreg_to_save == 8) + sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save); + else if (first_preg_to_save == 6) + sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save); + else + sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n", + first_dreg_to_save, first_preg_to_save); + + output_asm_insn (buf, operands); +} + +/* Emit assembly code for one multi-register pop described by INSN, with + operands in OPERANDS. */ + +void +output_pop_multiple (rtx insn, rtx *operands) +{ + char buf[80]; + int ok; + + /* Validate the insn again, and compute first_[dp]reg_to_save. */ + ok = pop_multiple_operation (PATTERN (insn), VOIDmode); + gcc_assert (ok); + + if (first_dreg_to_save == 8) + sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save); + else if (first_preg_to_save == 6) + sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save); + else + sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n", + first_dreg_to_save, first_preg_to_save); + + output_asm_insn (buf, operands); +} + +/* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */ + +static void +single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset) +{ + rtx scratch = gen_reg_rtx (mode); + rtx srcmem, dstmem; + + srcmem = adjust_address_nv (src, mode, offset); + dstmem = adjust_address_nv (dst, mode, offset); + emit_move_insn (scratch, srcmem); + emit_move_insn (dstmem, scratch); +} + +/* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with + alignment ALIGN_EXP. Return true if successful, false if we should fall + back on a different method. */ + +bool +bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) +{ + rtx srcreg, destreg, countreg; + HOST_WIDE_INT align = 0; + unsigned HOST_WIDE_INT count = 0; + + if (GET_CODE (align_exp) == CONST_INT) + align = INTVAL (align_exp); + if (GET_CODE (count_exp) == CONST_INT) + { + count = INTVAL (count_exp); +#if 0 + if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) + return false; +#endif + } + + /* If optimizing for size, only do single copies inline. */ + if (optimize_size) + { + if (count == 2 && align < 2) + return false; + if (count == 4 && align < 4) + return false; + if (count != 1 && count != 2 && count != 4) + return false; + } + if (align < 2 && count != 1) + return false; + + destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); + if (destreg != XEXP (dst, 0)) + dst = replace_equiv_address_nv (dst, destreg); + srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); + if (srcreg != XEXP (src, 0)) + src = replace_equiv_address_nv (src, srcreg); + + if (count != 0 && align >= 2) + { + unsigned HOST_WIDE_INT offset = 0; + + if (align >= 4) + { + if ((count & ~3) == 4) + { + single_move_for_movmem (dst, src, SImode, offset); + offset = 4; + } + else if (count & ~3) + { + HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1; + countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count)); + + emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg)); + cfun->machine->has_loopreg_clobber = true; + } + if (count & 2) + { + single_move_for_movmem (dst, src, HImode, offset); + offset += 2; + } + } + else + { + if ((count & ~1) == 2) + { + single_move_for_movmem (dst, src, HImode, offset); + offset = 2; + } + else if (count & ~1) + { + HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1; + countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count)); + + emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg)); + cfun->machine->has_loopreg_clobber = true; + } + } + if (count & 1) + { + single_move_for_movmem (dst, src, QImode, offset); + } + return true; + } + return false; +} + +/* Compute the alignment for a local variable. + TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. */ + +unsigned +bfin_local_alignment (tree type, unsigned align) +{ + /* Increasing alignment for (relatively) big types allows the builtin + memcpy can use 32 bit loads/stores. */ + if (TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32) + return 32; + return align; +} + +/* Implement TARGET_SCHED_ISSUE_RATE. */ + +static int +bfin_issue_rate (void) +{ + return 3; +} + +static int +bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type dep_insn_type; + int dep_insn_code_number; + + /* Anti and output dependencies have zero cost. */ + if (REG_NOTE_KIND (link) != 0) + return 0; + + dep_insn_code_number = recog_memoized (dep_insn); + + /* If we can't recognize the insns, we can't really do anything. */ + if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) + return cost; + + dep_insn_type = get_attr_type (dep_insn); + + if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD) + { + rtx pat = PATTERN (dep_insn); + rtx dest, src; + + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + dest = SET_DEST (pat); + src = SET_SRC (pat); + if (! ADDRESS_REGNO_P (REGNO (dest)) + || ! (MEM_P (src) || D_REGNO_P (REGNO (src)))) + return cost; + return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3); + } + + return cost; +} + +/* This function acts like NEXT_INSN, but is aware of three-insn bundles and + skips all subsequent parallel instructions if INSN is the start of such + a group. */ +static rtx +find_next_insn_start (rtx insn) +{ + if (GET_MODE (insn) == SImode) + { + while (GET_MODE (insn) != QImode) + insn = NEXT_INSN (insn); + } + return NEXT_INSN (insn); +} + +/* This function acts like PREV_INSN, but is aware of three-insn bundles and + skips all subsequent parallel instructions if INSN is the start of such + a group. */ +static rtx +find_prev_insn_start (rtx insn) +{ + insn = PREV_INSN (insn); + gcc_assert (GET_MODE (insn) != SImode); + if (GET_MODE (insn) == QImode) + { + while (GET_MODE (PREV_INSN (insn)) == SImode) + insn = PREV_INSN (insn); + } + return insn; +} + +/* Increment the counter for the number of loop instructions in the + current function. */ + +void +bfin_hardware_loop (void) +{ + cfun->machine->has_hardware_loops++; +} + +/* Maximum loop nesting depth. */ +#define MAX_LOOP_DEPTH 2 + +/* Maximum size of a loop. */ +#define MAX_LOOP_LENGTH 2042 + +/* Maximum distance of the LSETUP instruction from the loop start. */ +#define MAX_LSETUP_DISTANCE 30 + +/* We need to keep a vector of loops */ +typedef struct loop_info_d *loop_info; +DEF_VEC_P (loop_info); +DEF_VEC_ALLOC_P (loop_info,heap); + +/* Information about a loop we have found (or are in the process of + finding). */ +struct GTY (()) loop_info_d +{ + /* loop number, for dumps */ + int loop_no; + + /* All edges that jump into and out of the loop. */ + VEC(edge,gc) *incoming; + + /* We can handle two cases: all incoming edges have the same destination + block, or all incoming edges have the same source block. These two + members are set to the common source or destination we found, or NULL + if different blocks were found. If both are NULL the loop can't be + optimized. */ + basic_block incoming_src; + basic_block incoming_dest; + + /* First block in the loop. This is the one branched to by the loop_end + insn. */ + basic_block head; + + /* Last block in the loop (the one with the loop_end insn). */ + basic_block tail; + + /* The successor block of the loop. This is the one the loop_end insn + falls into. */ + basic_block successor; + + /* The last instruction in the tail. */ + rtx last_insn; + + /* The loop_end insn. */ + rtx loop_end; + + /* The iteration register. */ + rtx iter_reg; + + /* The new label placed at the beginning of the loop. */ + rtx start_label; + + /* The new label placed at the end of the loop. */ + rtx end_label; + + /* The length of the loop. */ + int length; + + /* The nesting depth of the loop. */ + int depth; + + /* Nonzero if we can't optimize this loop. */ + int bad; + + /* True if we have visited this loop. */ + int visited; + + /* True if this loop body clobbers any of LC0, LT0, or LB0. */ + int clobber_loop0; + + /* True if this loop body clobbers any of LC1, LT1, or LB1. */ + int clobber_loop1; + + /* Next loop in the graph. */ + struct loop_info_d *next; + + /* Immediate outer loop of this loop. */ + struct loop_info_d *outer; + + /* Vector of blocks only within the loop, including those within + inner loops. */ + VEC (basic_block,heap) *blocks; + + /* Same information in a bitmap. */ + bitmap block_bitmap; + + /* Vector of inner loops within this loop */ + VEC (loop_info,heap) *loops; +}; + +static void +bfin_dump_loops (loop_info loops) +{ + loop_info loop; + + for (loop = loops; loop; loop = loop->next) + { + loop_info i; + basic_block b; + unsigned ix; + + fprintf (dump_file, ";; loop %d: ", loop->loop_no); + if (loop->bad) + fprintf (dump_file, "(bad) "); + fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth); + + fprintf (dump_file, " blocks: [ "); + FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, b) + fprintf (dump_file, "%d ", b->index); + fprintf (dump_file, "] "); + + fprintf (dump_file, " inner loops: [ "); + FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, i) + fprintf (dump_file, "%d ", i->loop_no); + fprintf (dump_file, "]\n"); + } + fprintf (dump_file, "\n"); +} + +/* Scan the blocks of LOOP (and its inferiors) looking for basic block + BB. Return true, if we find it. */ + +static bool +bfin_bb_in_loop (loop_info loop, basic_block bb) +{ + return bitmap_bit_p (loop->block_bitmap, bb->index); +} + +/* Scan the blocks of LOOP (and its inferiors) looking for uses of + REG. Return true, if we find any. Don't count the loop's loop_end + insn if it matches LOOP_END. */ + +static bool +bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end) +{ + unsigned ix; + basic_block bb; + + FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb) + { + rtx insn; + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + if (insn == loop_end) + continue; + if (reg_mentioned_p (reg, PATTERN (insn))) + return true; + } + } + return false; +} + +/* Estimate the length of INSN conservatively. */ + +static int +length_for_loop (rtx insn) +{ + int length = 0; + if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size) + { + if (ENABLE_WA_SPECULATIVE_SYNCS) + length = 8; + else if (ENABLE_WA_SPECULATIVE_LOADS) + length = 6; + } + else if (LABEL_P (insn)) + { + if (ENABLE_WA_SPECULATIVE_SYNCS) + length = 4; + } + + if (NONDEBUG_INSN_P (insn)) + length += get_attr_length (insn); + + return length; +} + +/* Optimize LOOP. */ + +static void +bfin_optimize_loop (loop_info loop) +{ + basic_block bb; + loop_info inner; + rtx insn, last_insn; + rtx loop_init, start_label, end_label; + rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1; + rtx iter_reg, scratchreg, scratch_init, scratch_init_insn; + rtx lc_reg, lt_reg, lb_reg; + rtx seq, seq_end; + int length; + unsigned ix; + int inner_depth = 0; + + if (loop->visited) + return; + + loop->visited = 1; + + if (loop->bad) + { + if (dump_file) + fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); + goto bad_loop; + } + + /* Every loop contains in its list of inner loops every loop nested inside + it, even if there are intermediate loops. This works because we're doing + a depth-first search here and never visit a loop more than once. */ + FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, inner) + { + bfin_optimize_loop (inner); + + if (!inner->bad && inner_depth < inner->depth) + { + inner_depth = inner->depth; + + loop->clobber_loop0 |= inner->clobber_loop0; + loop->clobber_loop1 |= inner->clobber_loop1; + } + } + + loop->depth = inner_depth + 1; + if (loop->depth > MAX_LOOP_DEPTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no); + goto bad_loop; + } + + /* Get the loop iteration register. */ + iter_reg = loop->iter_reg; + + if (!REG_P (iter_reg)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d iteration count not in a register\n", + loop->loop_no); + goto bad_loop; + } + scratchreg = NULL_RTX; + scratch_init = iter_reg; + scratch_init_insn = NULL_RTX; + if (!PREG_P (iter_reg) && loop->incoming_src) + { + basic_block bb_in = loop->incoming_src; + int i; + for (i = REG_P0; i <= REG_P5; i++) + if ((df_regs_ever_live_p (i) + || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE + && call_used_regs[i])) + && !REGNO_REG_SET_P (df_get_live_out (bb_in), i)) + { + scratchreg = gen_rtx_REG (SImode, i); + break; + } + for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in); + insn = PREV_INSN (insn)) + { + rtx set; + if (NOTE_P (insn) || BARRIER_P (insn)) + continue; + set = single_set (insn); + if (set && rtx_equal_p (SET_DEST (set), iter_reg)) + { + if (CONSTANT_P (SET_SRC (set))) + { + scratch_init = SET_SRC (set); + scratch_init_insn = insn; + } + break; + } + else if (reg_mentioned_p (iter_reg, PATTERN (insn))) + break; + } + } + + if (loop->incoming_src) + { + /* Make sure the predecessor is before the loop start label, as required by + the LSETUP instruction. */ + length = 0; + insn = BB_END (loop->incoming_src); + /* If we have to insert the LSETUP before a jump, count that jump in the + length. */ + if (VEC_length (edge, loop->incoming) > 1 + || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU)) + { + gcc_assert (JUMP_P (insn)); + insn = PREV_INSN (insn); + } + + for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn)) + length += length_for_loop (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d lsetup not before loop_start\n", + loop->loop_no); + goto bad_loop; + } + + /* Account for the pop of a scratch register where necessary. */ + if (!PREG_P (iter_reg) && scratchreg == NULL_RTX + && ENABLE_WA_LOAD_LCREGS) + length += 2; + + if (length > MAX_LSETUP_DISTANCE) + { + if (dump_file) + fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no); + goto bad_loop; + } + } + + /* Check if start_label appears before loop_end and calculate the + offset between them. We calculate the length of instructions + conservatively. */ + length = 0; + for (insn = loop->start_label; + insn && insn != loop->loop_end; + insn = NEXT_INSN (insn)) + length += length_for_loop (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", + loop->loop_no); + goto bad_loop; + } + + loop->length = length; + if (loop->length > MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + goto bad_loop; + } + + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (bfin_scan_loop (loop, iter_reg, loop->loop_end)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no); + goto bad_loop; + } + + /* Scan all the insns to see if the loop body clobber + any hardware loop registers. */ + + reg_lc0 = gen_rtx_REG (SImode, REG_LC0); + reg_lc1 = gen_rtx_REG (SImode, REG_LC1); + reg_lt0 = gen_rtx_REG (SImode, REG_LT0); + reg_lt1 = gen_rtx_REG (SImode, REG_LT1); + reg_lb0 = gen_rtx_REG (SImode, REG_LB0); + reg_lb1 = gen_rtx_REG (SImode, REG_LB1); + + FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb) + { + rtx insn; + + for (insn = BB_HEAD (bb); + insn != NEXT_INSN (BB_END (bb)); + insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + + if (reg_set_p (reg_lc0, insn) + || reg_set_p (reg_lt0, insn) + || reg_set_p (reg_lb0, insn)) + loop->clobber_loop0 = 1; + + if (reg_set_p (reg_lc1, insn) + || reg_set_p (reg_lt1, insn) + || reg_set_p (reg_lb1, insn)) + loop->clobber_loop1 |= 1; + } + } + + if ((loop->clobber_loop0 && loop->clobber_loop1) + || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0)) + { + loop->depth = MAX_LOOP_DEPTH + 1; + if (dump_file) + fprintf (dump_file, ";; loop %d no loop reg available\n", + loop->loop_no); + goto bad_loop; + } + + /* There should be an instruction before the loop_end instruction + in the same basic block. And the instruction must not be + - JUMP + - CONDITIONAL BRANCH + - CALL + - CSYNC + - SSYNC + - Returns (RTS, RTN, etc.) */ + + bb = loop->tail; + last_insn = find_prev_insn_start (loop->loop_end); + + while (1) + { + for (; last_insn != BB_HEAD (bb); + last_insn = find_prev_insn_start (last_insn)) + if (NONDEBUG_INSN_P (last_insn)) + break; + + if (last_insn != BB_HEAD (bb)) + break; + + if (single_pred_p (bb) + && single_pred_edge (bb)->flags & EDGE_FALLTHRU + && single_pred (bb) != ENTRY_BLOCK_PTR) + { + bb = single_pred (bb); + last_insn = BB_END (bb); + continue; + } + else + { + last_insn = NULL_RTX; + break; + } + } + + if (!last_insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has no last instruction\n", + loop->loop_no); + goto bad_loop; + } + + if (JUMP_P (last_insn) && !any_condjump_p (last_insn)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has bad last instruction\n", + loop->loop_no); + goto bad_loop; + } + /* In all other cases, try to replace a bad last insn with a nop. */ + else if (JUMP_P (last_insn) + || CALL_P (last_insn) + || get_attr_type (last_insn) == TYPE_SYNC + || get_attr_type (last_insn) == TYPE_CALL + || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI + || recog_memoized (last_insn) == CODE_FOR_return_internal + || GET_CODE (PATTERN (last_insn)) == ASM_INPUT + || asm_noperands (PATTERN (last_insn)) >= 0) + { + if (loop->length + 2 > MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + goto bad_loop; + } + if (dump_file) + fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n", + loop->loop_no); + + last_insn = emit_insn_after (gen_forced_nop (), last_insn); + } + + loop->last_insn = last_insn; + + /* The loop is good for replacement. */ + start_label = loop->start_label; + end_label = gen_label_rtx (); + iter_reg = loop->iter_reg; + + if (loop->depth == 1 && !loop->clobber_loop1) + { + lc_reg = reg_lc1; + lt_reg = reg_lt1; + lb_reg = reg_lb1; + loop->clobber_loop1 = 1; + } + else + { + lc_reg = reg_lc0; + lt_reg = reg_lt0; + lb_reg = reg_lb0; + loop->clobber_loop0 = 1; + } + + loop->end_label = end_label; + + /* Create a sequence containing the loop setup. */ + start_sequence (); + + /* LSETUP only accepts P registers. If we have one, we can use it, + otherwise there are several ways of working around the problem. + If we're not affected by anomaly 312, we can load the LC register + from any iteration register, and use LSETUP without initialization. + If we've found a P scratch register that's not live here, we can + instead copy the iter_reg into that and use an initializing LSETUP. + If all else fails, push and pop P0 and use it as a scratch. */ + if (P_REGNO_P (REGNO (iter_reg))) + { + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, iter_reg); + seq_end = emit_insn (loop_init); + } + else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg)) + { + emit_insn (gen_movsi (lc_reg, iter_reg)); + loop_init = gen_lsetup_without_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg); + seq_end = emit_insn (loop_init); + } + else if (scratchreg != NULL_RTX) + { + emit_insn (gen_movsi (scratchreg, scratch_init)); + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, scratchreg); + seq_end = emit_insn (loop_init); + if (scratch_init_insn != NULL_RTX) + delete_insn (scratch_init_insn); + } + else + { + rtx p0reg = gen_rtx_REG (SImode, REG_P0); + rtx push = gen_frame_mem (SImode, + gen_rtx_PRE_DEC (SImode, stack_pointer_rtx)); + rtx pop = gen_frame_mem (SImode, + gen_rtx_POST_INC (SImode, stack_pointer_rtx)); + emit_insn (gen_movsi (push, p0reg)); + emit_insn (gen_movsi (p0reg, scratch_init)); + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, p0reg); + emit_insn (loop_init); + seq_end = emit_insn (gen_movsi (p0reg, pop)); + if (scratch_init_insn != NULL_RTX) + delete_insn (scratch_init_insn); + } + + if (dump_file) + { + fprintf (dump_file, ";; replacing loop %d initializer with\n", + loop->loop_no); + print_rtl_single (dump_file, loop_init); + fprintf (dump_file, ";; replacing loop %d terminator with\n", + loop->loop_no); + print_rtl_single (dump_file, loop->loop_end); + } + + /* If the loop isn't entered at the top, also create a jump to the entry + point. */ + if (!loop->incoming_src && loop->head != loop->incoming_dest) + { + rtx label = BB_HEAD (loop->incoming_dest); + /* If we're jumping to the final basic block in the loop, and there's + only one cheap instruction before the end (typically an increment of + an induction variable), we can just emit a copy here instead of a + jump. */ + if (loop->incoming_dest == loop->tail + && next_real_insn (label) == last_insn + && asm_noperands (last_insn) < 0 + && GET_CODE (PATTERN (last_insn)) == SET) + { + seq_end = emit_insn (copy_rtx (PATTERN (last_insn))); + } + else + { + emit_jump_insn (gen_jump (label)); + seq_end = emit_barrier (); + } + } + + seq = get_insns (); + end_sequence (); + + if (loop->incoming_src) + { + rtx prev = BB_END (loop->incoming_src); + if (VEC_length (edge, loop->incoming) > 1 + || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU)) + { + gcc_assert (JUMP_P (prev)); + prev = PREV_INSN (prev); + } + emit_insn_after (seq, prev); + } + else + { + basic_block new_bb; + edge e; + edge_iterator ei; + +#ifdef ENABLE_CHECKING + if (loop->head != loop->incoming_dest) + { + /* We aren't entering the loop at the top. Since we've established + that the loop is entered only at one point, this means there + can't be fallthru edges into the head. Any such fallthru edges + would become invalid when we insert the new block, so verify + that this does not in fact happen. */ + FOR_EACH_EDGE (e, ei, loop->head->preds) + gcc_assert (!(e->flags & EDGE_FALLTHRU)); + } +#endif + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + + new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU) + || e->dest != loop->head) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + e = make_edge (new_bb, loop->head, 0); + } + + delete_insn (loop->loop_end); + /* Insert the loop end label before the last instruction of the loop. */ + emit_label_before (loop->end_label, loop->last_insn); + + return; + + bad_loop: + + if (dump_file) + fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); + + loop->bad = 1; + + if (DPREG_P (loop->iter_reg)) + { + /* If loop->iter_reg is a DREG or PREG, we can split it here + without scratch register. */ + rtx insn, test; + + emit_insn_before (gen_addsi3 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, + loop->iter_reg, const0_rtx, + loop->start_label), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); + } +} + +/* Called from bfin_reorg_loops when a potential loop end is found. LOOP is + a newly set up structure describing the loop, it is this function's + responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the + loop_end insn and its enclosing basic block. */ + +static void +bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn) +{ + unsigned dwork = 0; + basic_block bb; + VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20); + + loop->tail = tail_bb; + loop->head = BRANCH_EDGE (tail_bb)->dest; + loop->successor = FALLTHRU_EDGE (tail_bb)->dest; + loop->loop_end = tail_insn; + loop->last_insn = NULL_RTX; + loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1)); + loop->depth = loop->length = 0; + loop->visited = 0; + loop->clobber_loop0 = loop->clobber_loop1 = 0; + loop->outer = NULL; + loop->loops = NULL; + loop->incoming = VEC_alloc (edge, gc, 2); + loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0); + loop->end_label = NULL_RTX; + loop->bad = 0; + + VEC_safe_push (basic_block, heap, works, loop->head); + + while (VEC_iterate (basic_block, works, dwork++, bb)) + { + edge e; + edge_iterator ei; + if (bb == EXIT_BLOCK_PTR) + { + /* We've reached the exit block. The loop must be bad. */ + if (dump_file) + fprintf (dump_file, + ";; Loop is bad - reached exit block while scanning\n"); + loop->bad = 1; + break; + } + + if (!bitmap_set_bit (loop->block_bitmap, bb->index)) + continue; + + /* We've not seen this block before. Add it to the loop's + list and then add each successor to the work list. */ + + VEC_safe_push (basic_block, heap, loop->blocks, bb); + + if (bb != tail_bb) + { + FOR_EACH_EDGE (e, ei, bb->succs) + { + basic_block succ = EDGE_SUCC (bb, ei.index)->dest; + if (!REGNO_REG_SET_P (df_get_live_in (succ), + REGNO (loop->iter_reg))) + continue; + if (!VEC_space (basic_block, works, 1)) + { + if (dwork) + { + VEC_block_remove (basic_block, works, 0, dwork); + dwork = 0; + } + else + VEC_reserve (basic_block, heap, works, 1); + } + VEC_quick_push (basic_block, works, succ); + } + } + } + + /* Find the predecessor, and make sure nothing else jumps into this loop. */ + if (!loop->bad) + { + int pass, retry; + FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block pred = e->src; + + if (!bfin_bb_in_loop (loop, pred)) + { + if (dump_file) + fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", + loop->loop_no, pred->index, + e->dest->index); + VEC_safe_push (edge, gc, loop->incoming, e); + } + } + } + + for (pass = 0, retry = 1; retry && pass < 2; pass++) + { + edge e; + edge_iterator ei; + bool first = true; + retry = 0; + + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (first) + { + loop->incoming_src = e->src; + loop->incoming_dest = e->dest; + first = false; + } + else + { + if (e->dest != loop->incoming_dest) + loop->incoming_dest = NULL; + if (e->src != loop->incoming_src) + loop->incoming_src = NULL; + } + if (loop->incoming_src == NULL && loop->incoming_dest == NULL) + { + if (pass == 0) + { + if (dump_file) + fprintf (dump_file, + ";; retrying loop %d with forwarder blocks\n", + loop->loop_no); + retry = 1; + break; + } + loop->bad = 1; + if (dump_file) + fprintf (dump_file, + ";; can't find suitable entry for loop %d\n", + loop->loop_no); + goto out; + } + } + if (retry) + { + retry = 0; + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (forwarder_block_p (e->src)) + { + edge e2; + edge_iterator ei2; + + if (dump_file) + fprintf (dump_file, + ";; Adding forwarder block %d to loop %d and retrying\n", + e->src->index, loop->loop_no); + VEC_safe_push (basic_block, heap, loop->blocks, e->src); + bitmap_set_bit (loop->block_bitmap, e->src->index); + FOR_EACH_EDGE (e2, ei2, e->src->preds) + VEC_safe_push (edge, gc, loop->incoming, e2); + VEC_unordered_remove (edge, loop->incoming, ei.index); + retry = 1; + break; + } + } + if (!retry) + { + if (dump_file) + fprintf (dump_file, ";; No forwarder blocks found\n"); + loop->bad = 1; + } + } + } + } + + out: + VEC_free (basic_block, heap, works); +} + +/* Analyze the structure of the loops in the current function. Use STACK + for bitmap allocations. Returns all the valid candidates for hardware + loops found in this function. */ +static loop_info +bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file) +{ + loop_info loops = NULL; + loop_info loop; + basic_block bb; + bitmap tmp_bitmap; + int nloops = 0; + + /* Find all the possible loop tails. This means searching for every + loop_end instruction. For each one found, create a loop_info + structure and add the head block to the work list. */ + FOR_EACH_BB (bb) + { + rtx tail = BB_END (bb); + + while (GET_CODE (tail) == NOTE) + tail = PREV_INSN (tail); + + bb->aux = NULL; + + if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end) + { + rtx insn; + /* A possible loop end */ + + /* There's a degenerate case we can handle - an empty loop consisting + of only a back branch. Handle that by deleting the branch. */ + insn = BB_HEAD (BRANCH_EDGE (bb)->dest); + if (next_real_insn (insn) == tail) + { + if (dump_file) + { + fprintf (dump_file, ";; degenerate loop ending at\n"); + print_rtl_single (dump_file, tail); + } + delete_insn_and_edges (tail); + continue; + } + + loop = XNEW (struct loop_info_d); + loop->next = loops; + loops = loop; + loop->loop_no = nloops++; + loop->blocks = VEC_alloc (basic_block, heap, 20); + loop->block_bitmap = BITMAP_ALLOC (stack); + bb->aux = loop; + + if (dump_file) + { + fprintf (dump_file, ";; potential loop %d ending at\n", + loop->loop_no); + print_rtl_single (dump_file, tail); + } + + bfin_discover_loop (loop, bb, tail); + } + } + + tmp_bitmap = BITMAP_ALLOC (stack); + /* Compute loop nestings. */ + for (loop = loops; loop; loop = loop->next) + { + loop_info other; + if (loop->bad) + continue; + + for (other = loop->next; other; other = other->next) + { + if (other->bad) + continue; + + bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap); + if (bitmap_empty_p (tmp_bitmap)) + continue; + if (bitmap_equal_p (tmp_bitmap, other->block_bitmap)) + { + other->outer = loop; + VEC_safe_push (loop_info, heap, loop->loops, other); + } + else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap)) + { + loop->outer = other; + VEC_safe_push (loop_info, heap, other->loops, loop); + } + else + { + if (dump_file) + fprintf (dump_file, + ";; can't find suitable nesting for loops %d and %d\n", + loop->loop_no, other->loop_no); + loop->bad = other->bad = 1; + } + } + } + BITMAP_FREE (tmp_bitmap); + + return loops; +} + +/* Free up the loop structures in LOOPS. */ +static void +free_loops (loop_info loops) +{ + while (loops) + { + loop_info loop = loops; + loops = loop->next; + VEC_free (loop_info, heap, loop->loops); + VEC_free (basic_block, heap, loop->blocks); + BITMAP_FREE (loop->block_bitmap); + XDELETE (loop); + } +} + +#define BB_AUX_INDEX(BB) ((intptr_t)(BB)->aux) + +/* The taken-branch edge from the loop end can actually go forward. Since the + Blackfin's LSETUP instruction requires that the loop end be after the loop + start, try to reorder a loop's basic blocks when we find such a case. */ +static void +bfin_reorder_loops (loop_info loops, FILE *dump_file) +{ + basic_block bb; + loop_info loop; + + FOR_EACH_BB (bb) + bb->aux = NULL; + cfg_layout_initialize (0); + + for (loop = loops; loop; loop = loop->next) + { + intptr_t index; + basic_block bb; + edge e; + edge_iterator ei; + + if (loop->bad) + continue; + + /* Recreate an index for basic blocks that represents their order. */ + for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0; + bb != EXIT_BLOCK_PTR; + bb = bb->next_bb, index++) + bb->aux = (PTR) index; + + if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail)) + continue; + + FOR_EACH_EDGE (e, ei, loop->head->succs) + { + if (bitmap_bit_p (loop->block_bitmap, e->dest->index) + && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) + { + basic_block start_bb = e->dest; + basic_block start_prev_bb = start_bb->prev_bb; + + if (dump_file) + fprintf (dump_file, ";; Moving block %d before block %d\n", + loop->head->index, start_bb->index); + loop->head->prev_bb->next_bb = loop->head->next_bb; + loop->head->next_bb->prev_bb = loop->head->prev_bb; + + loop->head->prev_bb = start_prev_bb; + loop->head->next_bb = start_bb; + start_prev_bb->next_bb = start_bb->prev_bb = loop->head; + break; + } + } + loops = loops->next; + } + + FOR_EACH_BB (bb) + { + if (bb->next_bb != EXIT_BLOCK_PTR) + bb->aux = bb->next_bb; + else + bb->aux = NULL; + } + cfg_layout_finalize (); + df_analyze (); +} + +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns + and tries to rewrite the RTL of these loops so that proper Blackfin + hardware loops are generated. */ + +static void +bfin_reorg_loops (FILE *dump_file) +{ + loop_info loops = NULL; + loop_info loop; + basic_block bb; + bitmap_obstack stack; + + bitmap_obstack_initialize (&stack); + + if (dump_file) + fprintf (dump_file, ";; Find loops, first pass\n\n"); + + loops = bfin_discover_loops (&stack, dump_file); + + if (dump_file) + bfin_dump_loops (loops); + + bfin_reorder_loops (loops, dump_file); + free_loops (loops); + + if (dump_file) + fprintf (dump_file, ";; Find loops, second pass\n\n"); + + loops = bfin_discover_loops (&stack, dump_file); + if (dump_file) + { + fprintf (dump_file, ";; All loops found:\n\n"); + bfin_dump_loops (loops); + } + + /* Now apply the optimizations. */ + for (loop = loops; loop; loop = loop->next) + bfin_optimize_loop (loop); + + if (dump_file) + { + fprintf (dump_file, ";; After hardware loops optimization:\n\n"); + bfin_dump_loops (loops); + } + + free_loops (loops); + + if (dump_file) + print_rtl (dump_file, get_insns ()); + + FOR_EACH_BB (bb) + bb->aux = NULL; + + splitting_loops = 1; + FOR_EACH_BB (bb) + { + rtx insn = BB_END (bb); + if (!JUMP_P (insn)) + continue; + + try_split (PATTERN (insn), insn, 1); + } + splitting_loops = 0; +} + +/* Possibly generate a SEQUENCE out of three insns found in SLOT. + Returns true if we modified the insn chain, false otherwise. */ +static bool +gen_one_bundle (rtx slot[3]) +{ + gcc_assert (slot[1] != NULL_RTX); + + /* Don't add extra NOPs if optimizing for size. */ + if (optimize_size + && (slot[0] == NULL_RTX || slot[2] == NULL_RTX)) + return false; + + /* Verify that we really can do the multi-issue. */ + if (slot[0]) + { + rtx t = NEXT_INSN (slot[0]); + while (t != slot[1]) + { + if (GET_CODE (t) != NOTE + || NOTE_KIND (t) != NOTE_INSN_DELETED) + return false; + t = NEXT_INSN (t); + } + } + if (slot[2]) + { + rtx t = NEXT_INSN (slot[1]); + while (t != slot[2]) + { + if (GET_CODE (t) != NOTE + || NOTE_KIND (t) != NOTE_INSN_DELETED) + return false; + t = NEXT_INSN (t); + } + } + + if (slot[0] == NULL_RTX) + { + slot[0] = emit_insn_before (gen_mnop (), slot[1]); + df_insn_rescan (slot[0]); + } + if (slot[2] == NULL_RTX) + { + slot[2] = emit_insn_after (gen_forced_nop (), slot[1]); + df_insn_rescan (slot[2]); + } + + /* Avoid line number information being printed inside one bundle. */ + if (INSN_LOCATOR (slot[1]) + && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0])) + INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]); + if (INSN_LOCATOR (slot[2]) + && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0])) + INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]); + + /* Terminate them with "|| " instead of ";" in the output. */ + PUT_MODE (slot[0], SImode); + PUT_MODE (slot[1], SImode); + /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */ + PUT_MODE (slot[2], QImode); + return true; +} + +/* Go through all insns, and use the information generated during scheduling + to generate SEQUENCEs to represent bundles of instructions issued + simultaneously. */ + +static void +bfin_gen_bundles (void) +{ + basic_block bb; + FOR_EACH_BB (bb) + { + rtx insn, next; + rtx slot[3]; + int n_filled = 0; + + slot[0] = slot[1] = slot[2] = NULL_RTX; + for (insn = BB_HEAD (bb);; insn = next) + { + int at_end; + rtx delete_this = NULL_RTX; + + if (NONDEBUG_INSN_P (insn)) + { + enum attr_type type = get_attr_type (insn); + + if (type == TYPE_STALL) + { + gcc_assert (n_filled == 0); + delete_this = insn; + } + else + { + if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM) + slot[0] = insn; + else if (slot[1] == NULL_RTX) + slot[1] = insn; + else + slot[2] = insn; + n_filled++; + } + } + + next = NEXT_INSN (insn); + while (next && insn != BB_END (bb) + && !(INSN_P (next) + && GET_CODE (PATTERN (next)) != USE + && GET_CODE (PATTERN (next)) != CLOBBER)) + { + insn = next; + next = NEXT_INSN (insn); + } + + /* BB_END can change due to emitting extra NOPs, so check here. */ + at_end = insn == BB_END (bb); + if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode)) + { + if ((n_filled < 2 + || !gen_one_bundle (slot)) + && slot[0] != NULL_RTX) + { + rtx pat = PATTERN (slot[0]); + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC + && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT) + { + SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0); + INSN_CODE (slot[0]) = -1; + df_insn_rescan (slot[0]); + } + } + n_filled = 0; + slot[0] = slot[1] = slot[2] = NULL_RTX; + } + if (delete_this != NULL_RTX) + delete_insn (delete_this); + if (at_end) + break; + } + } +} + +/* Ensure that no var tracking notes are emitted in the middle of a + three-instruction bundle. */ + +static void +reorder_var_tracking_notes (void) +{ + basic_block bb; + FOR_EACH_BB (bb) + { + rtx insn, next; + rtx queue = NULL_RTX; + bool in_bundle = false; + + for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next) + { + next = NEXT_INSN (insn); + + if (INSN_P (insn)) + { + /* Emit queued up notes at the last instruction of a bundle. */ + if (GET_MODE (insn) == QImode) + { + while (queue) + { + rtx next_queue = PREV_INSN (queue); + PREV_INSN (NEXT_INSN (insn)) = queue; + NEXT_INSN (queue) = NEXT_INSN (insn); + NEXT_INSN (insn) = queue; + PREV_INSN (queue) = insn; + queue = next_queue; + } + in_bundle = false; + } + else if (GET_MODE (insn) == SImode) + in_bundle = true; + } + else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION) + { + if (in_bundle) + { + rtx prev = PREV_INSN (insn); + PREV_INSN (next) = prev; + NEXT_INSN (prev) = next; + + PREV_INSN (insn) = queue; + queue = insn; + } + } + } + } +} + +/* On some silicon revisions, functions shorter than a certain number of cycles + can cause unpredictable behaviour. Work around this by adding NOPs as + needed. */ +static void +workaround_rts_anomaly (void) +{ + rtx insn, first_insn = NULL_RTX; + int cycles = 4; + + if (! ENABLE_WA_RETS) + return; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (BARRIER_P (insn)) + return; + + if (NOTE_P (insn) || LABEL_P (insn)) + continue; + + if (first_insn == NULL_RTX) + first_insn = insn; + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (CALL_P (insn)) + return; + + if (JUMP_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_return_internal) + break; + + /* Nothing to worry about for direct jumps. */ + if (!any_condjump_p (insn)) + return; + if (cycles <= 1) + return; + cycles--; + } + else if (INSN_P (insn)) + { + rtx pat = PATTERN (insn); + int this_cycles = 1; + + if (GET_CODE (pat) == PARALLEL) + { + if (push_multiple_operation (pat, VOIDmode) + || pop_multiple_operation (pat, VOIDmode)) + this_cycles = n_regs_to_save; + } + else + { + int icode = recog_memoized (insn); + + if (icode == CODE_FOR_link) + this_cycles = 4; + else if (icode == CODE_FOR_unlink) + this_cycles = 3; + else if (icode == CODE_FOR_mulsi3) + this_cycles = 5; + } + if (this_cycles >= cycles) + return; + + cycles -= this_cycles; + } + } + while (cycles > 0) + { + emit_insn_before (gen_nop (), first_insn); + cycles--; + } +} + +/* Return an insn type for INSN that can be used by the caller for anomaly + workarounds. This differs from plain get_attr_type in that it handles + SEQUENCEs. */ + +static enum attr_type +type_for_anomaly (rtx insn) +{ + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == SEQUENCE) + { + enum attr_type t; + t = get_attr_type (XVECEXP (pat, 0, 1)); + if (t == TYPE_MCLD) + return t; + t = get_attr_type (XVECEXP (pat, 0, 2)); + if (t == TYPE_MCLD) + return t; + return TYPE_MCST; + } + else + return get_attr_type (insn); +} + +/* Return true iff the address found in MEM is based on the register + NP_REG and optionally has a positive offset. */ +static bool +harmless_null_pointer_p (rtx mem, int np_reg) +{ + mem = XEXP (mem, 0); + if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC) + mem = XEXP (mem, 0); + if (REG_P (mem) && (int) REGNO (mem) == np_reg) + return true; + if (GET_CODE (mem) == PLUS + && REG_P (XEXP (mem, 0)) && (int) REGNO (XEXP (mem, 0)) == np_reg) + { + mem = XEXP (mem, 1); + if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0) + return true; + } + return false; +} + +/* Return nonzero if INSN contains any loads that may trap. */ + +static bool +trapping_loads_p (rtx insn, int np_reg, bool after_np_branch) +{ + rtx mem = SET_SRC (single_set (insn)); + + if (!after_np_branch) + np_reg = -1; + return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg)) + && may_trap_p (mem)); +} + +/* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of + a three-insn bundle, see if one of them is a load and return that if so. + Return NULL_RTX if the insn does not contain loads. */ +static rtx +find_load (rtx insn) +{ + if (!NONDEBUG_INSN_P (insn)) + return NULL_RTX; + if (get_attr_type (insn) == TYPE_MCLD) + return insn; + if (GET_MODE (insn) != SImode) + return NULL_RTX; + do { + insn = NEXT_INSN (insn); + if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode) + && get_attr_type (insn) == TYPE_MCLD) + return insn; + } while (GET_MODE (insn) != QImode); + return NULL_RTX; +} + +/* Determine whether PAT is an indirect call pattern. */ +static bool +indirect_call_p (rtx pat) +{ + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET) + pat = SET_SRC (pat); + gcc_assert (GET_CODE (pat) == CALL); + pat = XEXP (pat, 0); + gcc_assert (GET_CODE (pat) == MEM); + pat = XEXP (pat, 0); + + return REG_P (pat); +} + +/* During workaround_speculation, track whether we're in the shadow of a + conditional branch that tests a P register for NULL. If so, we can omit + emitting NOPs if we see a load from that P register, since a speculative + access at address 0 isn't a problem, and the load is executed in all other + cases anyway. + Global for communication with note_np_check_stores through note_stores. + */ +int np_check_regno = -1; +bool np_after_branch = false; + +/* Subroutine of workaround_speculation, called through note_stores. */ +static void +note_np_check_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + if (REG_P (x) && (REGNO (x) == REG_CC || (int) REGNO (x) == np_check_regno)) + np_check_regno = -1; +} + +static void +workaround_speculation (void) +{ + rtx insn, next; + rtx last_condjump = NULL_RTX; + int cycles_since_jump = INT_MAX; + int delay_added = 0; + + if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS + && ! ENABLE_WA_INDIRECT_CALLS) + return; + + /* First pass: find predicted-false branches; if something after them + needs nops, insert them or change the branch to predict true. */ + for (insn = get_insns (); insn; insn = next) + { + rtx pat; + int delay_needed = 0; + + next = find_next_insn_start (insn); + + if (NOTE_P (insn) || BARRIER_P (insn)) + continue; + + if (LABEL_P (insn)) + { + np_check_regno = -1; + continue; + } + + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC) + continue; + + if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0) + { + np_check_regno = -1; + continue; + } + + if (JUMP_P (insn)) + { + /* Is this a condjump based on a null pointer comparison we saw + earlier? */ + if (np_check_regno != -1 + && recog_memoized (insn) == CODE_FOR_cbranchbi4) + { + rtx op = XEXP (SET_SRC (PATTERN (insn)), 0); + gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE); + if (GET_CODE (op) == NE) + np_after_branch = true; + } + if (any_condjump_p (insn) + && ! cbranch_predicted_taken_p (insn)) + { + last_condjump = insn; + delay_added = 0; + cycles_since_jump = 0; + } + else + cycles_since_jump = INT_MAX; + } + else if (CALL_P (insn)) + { + np_check_regno = -1; + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS) + { + delay_needed = 3; + } + } + else if (NONDEBUG_INSN_P (insn)) + { + rtx load_insn = find_load (insn); + enum attr_type type = type_for_anomaly (insn); + + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + + /* Detect a comparison of a P register with zero. If we later + see a condjump based on it, we have found a null pointer + check. */ + if (recog_memoized (insn) == CODE_FOR_compare_eq) + { + rtx src = SET_SRC (PATTERN (insn)); + if (REG_P (XEXP (src, 0)) + && P_REGNO_P (REGNO (XEXP (src, 0))) + && XEXP (src, 1) == const0_rtx) + { + np_check_regno = REGNO (XEXP (src, 0)); + np_after_branch = false; + } + else + np_check_regno = -1; + } + + if (load_insn && ENABLE_WA_SPECULATIVE_LOADS) + { + if (trapping_loads_p (load_insn, np_check_regno, + np_after_branch)) + delay_needed = 4; + } + else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS) + delay_needed = 3; + + /* See if we need to forget about a null pointer comparison + we found earlier. */ + if (recog_memoized (insn) != CODE_FOR_compare_eq) + { + note_stores (PATTERN (insn), note_np_check_stores, NULL); + if (np_check_regno != -1) + { + if (find_regno_note (insn, REG_INC, np_check_regno)) + np_check_regno = -1; + } + } + + } + + if (delay_needed > cycles_since_jump + && (delay_needed - cycles_since_jump) > delay_added) + { + rtx pat1; + int num_clobbers; + rtx *op = recog_data.operand; + + delay_needed -= cycles_since_jump; + + extract_insn (last_condjump); + if (optimize_size) + { + pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2], + op[3]); + cycles_since_jump = INT_MAX; + } + else + { + /* Do not adjust cycles_since_jump in this case, so that + we'll increase the number of NOPs for a subsequent insn + if necessary. */ + pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3], + GEN_INT (delay_needed)); + delay_added = delay_needed; + } + PATTERN (last_condjump) = pat1; + INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers); + } + if (CALL_P (insn)) + { + cycles_since_jump = INT_MAX; + delay_added = 0; + } + } + + /* Second pass: for predicted-true branches, see if anything at the + branch destination needs extra nops. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + int cycles_since_jump; + if (JUMP_P (insn) + && any_condjump_p (insn) + && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken + || cbranch_predicted_taken_p (insn))) + { + rtx target = JUMP_LABEL (insn); + rtx label = target; + rtx next_tgt; + + cycles_since_jump = 0; + for (; target && cycles_since_jump < 3; target = next_tgt) + { + rtx pat; + + next_tgt = find_next_insn_start (target); + + if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target)) + continue; + + pat = PATTERN (target); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (NONDEBUG_INSN_P (target)) + { + rtx load_insn = find_load (target); + enum attr_type type = type_for_anomaly (target); + int delay_needed = 0; + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + + if (load_insn && ENABLE_WA_SPECULATIVE_LOADS) + { + if (trapping_loads_p (load_insn, -1, false)) + delay_needed = 2; + } + else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS) + delay_needed = 2; + + if (delay_needed > cycles_since_jump) + { + rtx prev = prev_real_insn (label); + delay_needed -= cycles_since_jump; + if (dump_file) + fprintf (dump_file, "Adding %d nops after %d\n", + delay_needed, INSN_UID (label)); + if (JUMP_P (prev) + && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops) + { + rtx x; + HOST_WIDE_INT v; + + if (dump_file) + fprintf (dump_file, + "Reducing nops on insn %d.\n", + INSN_UID (prev)); + x = PATTERN (prev); + x = XVECEXP (x, 0, 1); + v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed; + XVECEXP (x, 0, 0) = GEN_INT (v); + } + while (delay_needed-- > 0) + emit_insn_after (gen_nop (), label); + break; + } + } + } + } + } +} + +/* Called just before the final scheduling pass. If we need to insert NOPs + later on to work around speculative loads, insert special placeholder + insns that cause loads to be delayed for as many cycles as necessary + (and possible). This reduces the number of NOPs we need to add. + The dummy insns we generate are later removed by bfin_gen_bundles. */ +static void +add_sched_insns_for_speculation (void) +{ + rtx insn; + + if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS + && ! ENABLE_WA_INDIRECT_CALLS) + return; + + /* First pass: find predicted-false branches; if something after them + needs nops, insert them or change the branch to predict true. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn)) + continue; + + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC + || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0) + continue; + + if (JUMP_P (insn)) + { + if (any_condjump_p (insn) + && !cbranch_predicted_taken_p (insn)) + { + rtx n = next_real_insn (insn); + emit_insn_before (gen_stall (GEN_INT (3)), n); + } + } + } + + /* Second pass: for predicted-true branches, see if anything at the + branch destination needs extra nops. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (JUMP_P (insn) + && any_condjump_p (insn) + && (cbranch_predicted_taken_p (insn))) + { + rtx target = JUMP_LABEL (insn); + rtx next = next_real_insn (target); + + if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE + && get_attr_type (next) == TYPE_STALL) + continue; + emit_insn_before (gen_stall (GEN_INT (1)), next); + } + } +} + +/* We use the machine specific reorg pass for emitting CSYNC instructions + after conditional branches as needed. + + The Blackfin is unusual in that a code sequence like + if cc jump label + r0 = (p0) + may speculatively perform the load even if the condition isn't true. This + happens for a branch that is predicted not taken, because the pipeline + isn't flushed or stalled, so the early stages of the following instructions, + which perform the memory reference, are allowed to execute before the + jump condition is evaluated. + Therefore, we must insert additional instructions in all places where this + could lead to incorrect behavior. The manual recommends CSYNC, while + VDSP seems to use NOPs (even though its corresponding compiler option is + named CSYNC). + + When optimizing for speed, we emit NOPs, which seems faster than a CSYNC. + When optimizing for size, we turn the branch into a predicted taken one. + This may be slower due to mispredicts, but saves code size. */ + +static void +bfin_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + if (flag_schedule_insns_after_reload) + { + splitting_for_sched = 1; + split_all_insns (); + splitting_for_sched = 0; + + add_sched_insns_for_speculation (); + + timevar_push (TV_SCHED2); + if (flag_selective_scheduling2 + && !maybe_skip_selective_scheduling ()) + run_selective_scheduling (); + else + schedule_insns (); + timevar_pop (TV_SCHED2); + + /* Examine the schedule and insert nops as necessary for 64-bit parallel + instructions. */ + bfin_gen_bundles (); + } + + df_analyze (); + + /* Doloop optimization */ + if (cfun->machine->has_hardware_loops) + bfin_reorg_loops (dump_file); + + workaround_speculation (); + + if (flag_var_tracking) + { + timevar_push (TV_VAR_TRACKING); + variable_tracking_main (); + reorder_var_tracking_notes (); + timevar_pop (TV_VAR_TRACKING); + } + + df_finish_pass (false); + + workaround_rts_anomaly (); +} + +/* Handle interrupt_handler, exception_handler and nmi_handler function + attributes; arguments as in struct attribute_spec.handler. */ + +static tree +handle_int_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree x = *node; + if (TREE_CODE (x) == FUNCTION_DECL) + x = TREE_TYPE (x); + + if (TREE_CODE (x) != FUNCTION_TYPE) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + else if (funkind (x) != SUBROUTINE) + error ("multiple function type attributes specified"); + + return NULL_TREE; +} + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible, and 2 if they are nearly compatible (which causes a + warning to be generated). */ + +static int +bfin_comp_type_attributes (const_tree type1, const_tree type2) +{ + e_funkind kind1, kind2; + + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + kind1 = funkind (type1); + kind2 = funkind (type2); + + if (kind1 != kind2) + return 0; + + /* Check for mismatched modifiers */ + if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2))) + return 0; + + if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2))) + return 0; + + if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2))) + return 0; + + if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2))) + return 0; + + return 1; +} + +/* Handle a "longcall" or "shortcall" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +bfin_handle_longcall_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0 + && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node))) + || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0 + && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node)))) + { + warning (OPT_Wattributes, + "can%'t apply both longcall and shortcall attributes to the same function"); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle a "l1_text" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error ("%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + /* The decl may have already been given a section attribute + from a previous declaration. Ensure they match. */ + else if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l1.text") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l1.text"); + + return NULL_TREE; +} + +/* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute; + arguments as in struct attribute_spec.handler. */ + +static tree +bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != VAR_DECL) + { + error ("%qE attribute only applies to variables", + name); + *no_add_attrs = true; + } + else if (current_function_decl != NULL_TREE + && !TREE_STATIC (decl)) + { + error ("%qE attribute cannot be specified for local variables", + name); + *no_add_attrs = true; + } + else + { + const char *section_name; + + if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0) + section_name = ".l1.data"; + else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0) + section_name = ".l1.data.A"; + else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0) + section_name = ".l1.data.B"; + else + gcc_unreachable (); + + /* The decl may have already been given a section attribute + from a previous declaration. Ensure they match. */ + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + section_name) != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) + = build_string (strlen (section_name) + 1, section_name); + } + + return NULL_TREE; +} + +/* Handle a "l2" attribute; arguments as in struct attribute_spec.handler. */ + +static tree +bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name), + tree ARG_UNUSED (args), int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) == FUNCTION_DECL) + { + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l2.text") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l2.text"); + } + else if (TREE_CODE (decl) == VAR_DECL) + { + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l2.data") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l2.data"); + } + + return NULL_TREE; +} + +/* Table of valid machine attributes. */ +static const struct attribute_spec bfin_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "interrupt_handler", 0, 0, false, true, true, handle_int_attribute }, + { "exception_handler", 0, 0, false, true, true, handle_int_attribute }, + { "nmi_handler", 0, 0, false, true, true, handle_int_attribute }, + { "nesting", 0, 0, false, true, true, NULL }, + { "kspisusp", 0, 0, false, true, true, NULL }, + { "saveall", 0, 0, false, true, true, NULL }, + { "longcall", 0, 0, false, true, true, bfin_handle_longcall_attribute }, + { "shortcall", 0, 0, false, true, true, bfin_handle_longcall_attribute }, + { "l1_text", 0, 0, true, false, false, bfin_handle_l1_text_attribute }, + { "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute }, + { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute }, + { "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute }, + { "l2", 0, 0, true, false, false, bfin_handle_l2_attribute }, + { NULL, 0, 0, false, false, false, NULL } +}; + +/* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to + tell the assembler to generate pointers to function descriptors in + some cases. */ + +static bool +bfin_assemble_integer (rtx value, unsigned int size, int aligned_p) +{ + if (TARGET_FDPIC && size == UNITS_PER_WORD) + { + if (GET_CODE (value) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (value)) + { + fputs ("\t.picptr\tfuncdesc(", asm_out_file); + output_addr_const (asm_out_file, value); + fputs (")\n", asm_out_file); + return true; + } + if (!aligned_p) + { + /* We've set the unaligned SI op to NULL, so we always have to + handle the unaligned case here. */ + assemble_integer_with_op ("\t.4byte\t", value); + return true; + } + } + return default_assemble_integer (value, size, aligned_p); +} + +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ + +static void +bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, + tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, tree function) +{ + rtx xops[3]; + /* The this parameter is passed as the first argument. */ + rtx this_rtx = gen_rtx_REG (Pmode, REG_R0); + + /* Adjust the this parameter by a fixed constant. */ + if (delta) + { + xops[1] = this_rtx; + if (delta >= -64 && delta <= 63) + { + xops[0] = GEN_INT (delta); + output_asm_insn ("%1 += %0;", xops); + } + else if (delta >= -128 && delta < -64) + { + xops[0] = GEN_INT (delta + 64); + output_asm_insn ("%1 += -64; %1 += %0;", xops); + } + else if (delta > 63 && delta <= 126) + { + xops[0] = GEN_INT (delta - 63); + output_asm_insn ("%1 += 63; %1 += %0;", xops); + } + else + { + xops[0] = GEN_INT (delta); + output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops); + } + } + + /* Adjust the this parameter by a value stored in the vtable. */ + if (vcall_offset) + { + rtx p2tmp = gen_rtx_REG (Pmode, REG_P2); + rtx tmp = gen_rtx_REG (Pmode, REG_R3); + + xops[1] = tmp; + xops[2] = p2tmp; + output_asm_insn ("%2 = r0; %2 = [%2];", xops); + + /* Adjust the this parameter. */ + xops[0] = gen_rtx_MEM (Pmode, plus_constant (p2tmp, vcall_offset)); + if (!memory_operand (xops[0], Pmode)) + { + rtx tmp2 = gen_rtx_REG (Pmode, REG_P1); + xops[0] = GEN_INT (vcall_offset); + xops[1] = tmp2; + output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops); + xops[0] = gen_rtx_MEM (Pmode, p2tmp); + } + xops[2] = this_rtx; + output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops); + } + + xops[0] = XEXP (DECL_RTL (function), 0); + if (1 || !flag_pic || (*targetm.binds_local_p) (function)) + output_asm_insn ("jump.l\t%P0", xops); +} + +/* Codes for all the Blackfin builtins. */ +enum bfin_builtins +{ + BFIN_BUILTIN_CSYNC, + BFIN_BUILTIN_SSYNC, + BFIN_BUILTIN_ONES, + BFIN_BUILTIN_COMPOSE_2X16, + BFIN_BUILTIN_EXTRACTLO, + BFIN_BUILTIN_EXTRACTHI, + + BFIN_BUILTIN_SSADD_2X16, + BFIN_BUILTIN_SSSUB_2X16, + BFIN_BUILTIN_SSADDSUB_2X16, + BFIN_BUILTIN_SSSUBADD_2X16, + BFIN_BUILTIN_MULT_2X16, + BFIN_BUILTIN_MULTR_2X16, + BFIN_BUILTIN_NEG_2X16, + BFIN_BUILTIN_ABS_2X16, + BFIN_BUILTIN_MIN_2X16, + BFIN_BUILTIN_MAX_2X16, + + BFIN_BUILTIN_SSADD_1X16, + BFIN_BUILTIN_SSSUB_1X16, + BFIN_BUILTIN_MULT_1X16, + BFIN_BUILTIN_MULTR_1X16, + BFIN_BUILTIN_NORM_1X16, + BFIN_BUILTIN_NEG_1X16, + BFIN_BUILTIN_ABS_1X16, + BFIN_BUILTIN_MIN_1X16, + BFIN_BUILTIN_MAX_1X16, + + BFIN_BUILTIN_SUM_2X16, + BFIN_BUILTIN_DIFFHL_2X16, + BFIN_BUILTIN_DIFFLH_2X16, + + BFIN_BUILTIN_SSADD_1X32, + BFIN_BUILTIN_SSSUB_1X32, + BFIN_BUILTIN_NORM_1X32, + BFIN_BUILTIN_ROUND_1X32, + BFIN_BUILTIN_NEG_1X32, + BFIN_BUILTIN_ABS_1X32, + BFIN_BUILTIN_MIN_1X32, + BFIN_BUILTIN_MAX_1X32, + BFIN_BUILTIN_MULT_1X32, + BFIN_BUILTIN_MULT_1X32X32, + BFIN_BUILTIN_MULT_1X32X32NS, + + BFIN_BUILTIN_MULHISILL, + BFIN_BUILTIN_MULHISILH, + BFIN_BUILTIN_MULHISIHL, + BFIN_BUILTIN_MULHISIHH, + + BFIN_BUILTIN_LSHIFT_1X16, + BFIN_BUILTIN_LSHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X16, + BFIN_BUILTIN_SSASHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X32, + + BFIN_BUILTIN_CPLX_MUL_16, + BFIN_BUILTIN_CPLX_MAC_16, + BFIN_BUILTIN_CPLX_MSU_16, + + BFIN_BUILTIN_CPLX_MUL_16_S40, + BFIN_BUILTIN_CPLX_MAC_16_S40, + BFIN_BUILTIN_CPLX_MSU_16_S40, + + BFIN_BUILTIN_CPLX_SQU, + + BFIN_BUILTIN_LOADBYTES, + + BFIN_BUILTIN_MAX +}; + +#define def_builtin(NAME, TYPE, CODE) \ +do { \ + add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + NULL, NULL_TREE); \ +} while (0) + +/* Set up all builtin functions for this target. */ +static void +bfin_init_builtins (void) +{ + tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode); + tree void_ftype_void + = build_function_type (void_type_node, void_list_node); + tree short_ftype_short + = build_function_type_list (short_integer_type_node, short_integer_type_node, + NULL_TREE); + tree short_ftype_int_int + = build_function_type_list (short_integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_int_int + = build_function_type_list (integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_int + = build_function_type_list (integer_type_node, integer_type_node, + NULL_TREE); + tree short_ftype_int + = build_function_type_list (short_integer_type_node, integer_type_node, + NULL_TREE); + tree int_ftype_v2hi_v2hi + = build_function_type_list (integer_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v2hi_ftype_int_int + = build_function_type_list (V2HI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_int + = build_function_type_list (V2HI_type_node, V2HI_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_short_short + = build_function_type_list (integer_type_node, short_integer_type_node, + short_integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE); + tree short_ftype_v2hi + = build_function_type_list (short_integer_type_node, V2HI_type_node, + NULL_TREE); + tree int_ftype_pint + = build_function_type_list (integer_type_node, + build_pointer_type (integer_type_node), + NULL_TREE); + + /* Add the remaining MMX insns with somewhat more complicated types. */ + def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC); + def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC); + + def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES); + + def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int, + BFIN_BUILTIN_COMPOSE_2X16); + def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi, + BFIN_BUILTIN_EXTRACTHI); + def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi, + BFIN_BUILTIN_EXTRACTLO); + + def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MIN_2X16); + def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MAX_2X16); + + def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADD_2X16); + def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUB_2X16); + def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADDSUB_2X16); + def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUBADD_2X16); + def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULT_2X16); + def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULTR_2X16); + def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi, + BFIN_BUILTIN_NEG_2X16); + def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi, + BFIN_BUILTIN_ABS_2X16); + + def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MIN_1X16); + def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MAX_1X16); + + def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSADD_1X16); + def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSSUB_1X16); + def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MULT_1X16); + def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MULTR_1X16); + def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short, + BFIN_BUILTIN_NEG_1X16); + def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short, + BFIN_BUILTIN_ABS_1X16); + def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int, + BFIN_BUILTIN_NORM_1X16); + + def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_SUM_2X16); + def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_DIFFHL_2X16); + def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_DIFFLH_2X16); + + def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISILL); + def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISIHL); + def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISILH); + def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISIHH); + + def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_MIN_1X32); + def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_MAX_1X32); + + def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSADD_1X32); + def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSSUB_1X32); + def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int, + BFIN_BUILTIN_NEG_1X32); + def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int, + BFIN_BUILTIN_ABS_1X32); + def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int, + BFIN_BUILTIN_NORM_1X32); + def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int, + BFIN_BUILTIN_ROUND_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short, + BFIN_BUILTIN_MULT_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32NS); + + /* Shifts. */ + def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X16); + def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int, + BFIN_BUILTIN_SSASHIFT_2X16); + def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_LSHIFT_1X16); + def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int, + BFIN_BUILTIN_LSHIFT_2X16); + def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X32); + + /* Complex numbers. */ + def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADD_2X16); + def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUB_2X16); + def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MUL_16); + def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MAC_16); + def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MSU_16); + def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MUL_16_S40); + def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MAC_16_S40); + def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MSU_16_S40); + def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi, + BFIN_BUILTIN_CPLX_SQU); + + /* "Unaligned" load. */ + def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint, + BFIN_BUILTIN_LOADBYTES); + +} + + +struct builtin_description +{ + const enum insn_code icode; + const char *const name; + const enum bfin_builtins code; + int macflag; +}; + +static const struct builtin_description bdesc_2arg[] = +{ + { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 }, + + { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 }, + { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 }, + { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 }, + { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 }, + { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 }, + + { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 }, + { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 }, + { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 }, + { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 }, + + { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 }, + { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 }, + { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 }, + { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 }, + + { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 }, + { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 }, + { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 }, + { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 }, + { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 }, + { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 }, + + { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE }, + { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T }, + { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE }, + { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T }, + { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE }, + + { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 }, + { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 }, + { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 }, + { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 } + +}; + +static const struct builtin_description bdesc_1arg[] = +{ + { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 }, + + { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 }, + + { CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 }, + { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 }, + { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 }, + + { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 }, + { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 }, + { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 }, + { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 }, + + { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 }, + { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 }, + { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 }, + { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 } +}; + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x != const0_rtx) + return x; + x = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (x, CONST0_RTX (SImode))); + return gen_lowpart (mode, x); +} + +/* Subroutine of bfin_expand_builtin to take care of binop insns. MACFLAG is -1 + if this is a normal binary op, or one of the MACFLAG_xxx constants. */ + +static rtx +bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target, + int macflag) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode op1mode = GET_MODE (op1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode) + { + op1mode = HImode; + op1 = gen_lowpart (HImode, op1); + } + /* In case the insn wants input operands in modes different from + the result, abort. */ + gcc_assert ((op0mode == mode0 || op0mode == VOIDmode) + && (op1mode == mode1 || op1mode == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + if (macflag == -1) + pat = GEN_FCN (icode) (target, op0, op1); + else + pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag)); + if (! pat) + return 0; + + emit_insn (pat); + return target; +} + +/* Subroutine of bfin_expand_builtin to take care of unop insns. */ + +static rtx +bfin_expand_unop_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (op0mode == SImode && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + gcc_assert (op0mode == mode0 || op0mode == VOIDmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + size_t i; + enum insn_code icode; + const struct builtin_description *d; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1, arg2; + rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg; + enum machine_mode tmode, mode0; + + switch (fcode) + { + case BFIN_BUILTIN_CSYNC: + emit_insn (gen_csync ()); + return 0; + case BFIN_BUILTIN_SSYNC: + emit_insn (gen_ssync ()); + return 0; + + case BFIN_BUILTIN_DIFFHL_2X16: + case BFIN_BUILTIN_DIFFLH_2X16: + case BFIN_BUILTIN_SUM_2X16: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3 + : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3 + : CODE_FOR_ssaddhilov2hi3); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case BFIN_BUILTIN_MULT_1X32X32: + case BFIN_BUILTIN_MULT_1X32X32NS: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + if (! target + || !register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + if (! register_operand (op0, SImode)) + op0 = copy_to_mode_reg (SImode, op0); + if (! register_operand (op1, SImode)) + op1 = copy_to_mode_reg (SImode, op1); + + a1reg = gen_rtx_REG (PDImode, REG_A1); + a0reg = gen_rtx_REG (PDImode, REG_A0); + tmp1 = gen_lowpart (V2HImode, op0); + tmp2 = gen_lowpart (V2HImode, op1); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, op0), + gen_lowpart (HImode, op1), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + + if (fcode == BFIN_BUILTIN_MULT_1X32X32) + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + else + { + /* For saturating multiplication, there's exactly one special case + to be handled: multiplying the smallest negative value with + itself. Due to shift correction in fractional multiplies, this + can overflow. Iff this happens, OP2 will contain 1, which, when + added in 32 bits to the smallest negative, wraps to the largest + positive, which is the result we want. */ + op2 = gen_reg_rtx (V2HImode); + emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx)); + emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC), + gen_lowpart (SImode, op2))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + op2 = gen_reg_rtx (SImode); + emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC))); + } + emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1, + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15))); + emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg)); + if (fcode == BFIN_BUILTIN_MULT_1X32X32NS) + emit_insn (gen_addsi3 (target, target, op2)); + return target; + + case BFIN_BUILTIN_CPLX_MUL_16: + case BFIN_BUILTIN_CPLX_MUL_16_S40: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_macv2hi_parts; + tmode = insn_data[icode].operand[0].mode; + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (tmode); + if (! register_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + if (! register_operand (op1, GET_MODE (op1))) + op1 = copy_to_mode_reg (GET_MODE (op1), op1); + + if (fcode == BFIN_BUILTIN_CPLX_MUL_16) + emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, GEN_INT (MACFLAG_W32))); + else + emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, GEN_INT (MACFLAG_NONE))); + emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx, + const1_rtx, const1_rtx, + const0_rtx, accvec, const1_rtx, const0_rtx, + GEN_INT (MACFLAG_NONE), accvec)); + + return target; + + case BFIN_BUILTIN_CPLX_MAC_16: + case BFIN_BUILTIN_CPLX_MSU_16: + case BFIN_BUILTIN_CPLX_MAC_16_S40: + case BFIN_BUILTIN_CPLX_MSU_16_S40: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_macv2hi_parts; + tmode = insn_data[icode].operand[0].mode; + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (tmode); + if (! register_operand (op1, GET_MODE (op1))) + op1 = copy_to_mode_reg (GET_MODE (op1), op1); + if (! register_operand (op2, GET_MODE (op2))) + op2 = copy_to_mode_reg (GET_MODE (op2), op2); + + tmp1 = gen_reg_rtx (SImode); + tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16))); + emit_move_insn (tmp2, gen_lowpart (SImode, op0)); + emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx)); + emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2)); + if (fcode == BFIN_BUILTIN_CPLX_MAC_16 + || fcode == BFIN_BUILTIN_CPLX_MSU_16) + emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, accvec, const0_rtx, + const0_rtx, + GEN_INT (MACFLAG_W32))); + else + emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, accvec, const0_rtx, + const0_rtx, + GEN_INT (MACFLAG_NONE))); + if (fcode == BFIN_BUILTIN_CPLX_MAC_16 + || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40) + { + tmp1 = const1_rtx; + tmp2 = const0_rtx; + } + else + { + tmp1 = const0_rtx; + tmp2 = const1_rtx; + } + emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx, + const1_rtx, const1_rtx, + const0_rtx, accvec, tmp1, tmp2, + GEN_INT (MACFLAG_NONE), accvec)); + + return target; + + case BFIN_BUILTIN_CPLX_SQU: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_mulv2hi; + tmp1 = gen_reg_rtx (V2HImode); + tmp2 = gen_reg_rtx (V2HImode); + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (V2HImode); + if (! register_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + + emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE))); + + emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0, + const0_rtx, const1_rtx, + GEN_INT (MACFLAG_NONE))); + + emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx, + const0_rtx)); + emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1, + const0_rtx, const1_rtx)); + + return target; + + default: + break; + } + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + return bfin_expand_binop_builtin (d->icode, exp, target, + d->macflag); + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == fcode) + return bfin_expand_unop_builtin (d->icode, exp, target); + + gcc_unreachable (); +} + +static void +bfin_conditional_register_usage (void) +{ + /* initialize condition code flag register rtx */ + bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC); + bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS); + if (TARGET_FDPIC) + call_used_regs[FDPIC_REGNO] = 1; + if (!TARGET_FDPIC && flag_pic) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } +} + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS bfin_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN bfin_expand_builtin + +#undef TARGET_ASM_GLOBALIZE_LABEL +#define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START output_file_start + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE bfin_attribute_table + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS bfin_rtx_costs + +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST bfin_address_cost + +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER bfin_assemble_integer + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST bfin_adjust_cost + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE bfin_issue_rate + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote + +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG bfin_function_arg + +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE bfin_function_arg_advance + +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p + +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION bfin_handle_option + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE bfin_option_override + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD bfin_secondary_reload + +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P bfin_class_likely_spilled_p + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY bfin_return_in_memory + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P bfin_legitimate_address_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE bfin_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE bfin_conditional_register_usage + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT bfin_trampoline_init + +/* Passes after sched2 can break the helpful TImode annotations that + haifa-sched puts on every insn. Just do scheduling in reorg. */ +#undef TARGET_DELAY_SCHED2 +#define TARGET_DELAY_SCHED2 true + +/* Variable tracking should be run after all optimizations which + change order of insns. It also needs a valid CFG. */ +#undef TARGET_DELAY_VARTRACK +#define TARGET_DELAY_VARTRACK true + +struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h new file mode 100644 index 000000000..c26b41cc5 --- /dev/null +++ b/gcc/config/bfin/bfin.h @@ -0,0 +1,1220 @@ +/* Definitions for the Blackfin port. + Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Analog Devices. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef _BFIN_CONFIG +#define _BFIN_CONFIG + +#define OBJECT_FORMAT_ELF + +#define BRT 1 +#define BRF 0 + +/* CPU type. */ +typedef enum bfin_cpu_type +{ + BFIN_CPU_UNKNOWN, + BFIN_CPU_BF512, + BFIN_CPU_BF514, + BFIN_CPU_BF516, + BFIN_CPU_BF518, + BFIN_CPU_BF522, + BFIN_CPU_BF523, + BFIN_CPU_BF524, + BFIN_CPU_BF525, + BFIN_CPU_BF526, + BFIN_CPU_BF527, + BFIN_CPU_BF531, + BFIN_CPU_BF532, + BFIN_CPU_BF533, + BFIN_CPU_BF534, + BFIN_CPU_BF536, + BFIN_CPU_BF537, + BFIN_CPU_BF538, + BFIN_CPU_BF539, + BFIN_CPU_BF542, + BFIN_CPU_BF542M, + BFIN_CPU_BF544, + BFIN_CPU_BF544M, + BFIN_CPU_BF547, + BFIN_CPU_BF547M, + BFIN_CPU_BF548, + BFIN_CPU_BF548M, + BFIN_CPU_BF549, + BFIN_CPU_BF549M, + BFIN_CPU_BF561 +} bfin_cpu_t; + +/* Value of -mcpu= */ +extern bfin_cpu_t bfin_cpu_type; + +/* Value of -msi-revision= */ +extern int bfin_si_revision; + +extern unsigned int bfin_workarounds; + +/* Print subsidiary information on the compiler version in use. */ +#define TARGET_VERSION fprintf (stderr, " (BlackFin bfin)") + +/* Predefinition in the preprocessor for this target machine */ +#ifndef TARGET_CPU_CPP_BUILTINS +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("bfin"); \ + builtin_define_std ("BFIN"); \ + builtin_define ("__ADSPBLACKFIN__"); \ + builtin_define ("__ADSPLPBLACKFIN__"); \ + \ + switch (bfin_cpu_type) \ + { \ + case BFIN_CPU_BF512: \ + builtin_define ("__ADSPBF512__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF514: \ + builtin_define ("__ADSPBF514__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF516: \ + builtin_define ("__ADSPBF516__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF518: \ + builtin_define ("__ADSPBF518__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF522: \ + builtin_define ("__ADSPBF522__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF523: \ + builtin_define ("__ADSPBF523__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF524: \ + builtin_define ("__ADSPBF524__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF525: \ + builtin_define ("__ADSPBF525__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF526: \ + builtin_define ("__ADSPBF526__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF527: \ + builtin_define ("__ADSPBF527__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF531: \ + builtin_define ("__ADSPBF531__"); \ + break; \ + case BFIN_CPU_BF532: \ + builtin_define ("__ADSPBF532__"); \ + break; \ + case BFIN_CPU_BF533: \ + builtin_define ("__ADSPBF533__"); \ + break; \ + case BFIN_CPU_BF534: \ + builtin_define ("__ADSPBF534__"); \ + break; \ + case BFIN_CPU_BF536: \ + builtin_define ("__ADSPBF536__"); \ + break; \ + case BFIN_CPU_BF537: \ + builtin_define ("__ADSPBF537__"); \ + break; \ + case BFIN_CPU_BF538: \ + builtin_define ("__ADSPBF538__"); \ + break; \ + case BFIN_CPU_BF539: \ + builtin_define ("__ADSPBF539__"); \ + break; \ + case BFIN_CPU_BF542M: \ + builtin_define ("__ADSPBF542M__"); \ + case BFIN_CPU_BF542: \ + builtin_define ("__ADSPBF542__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF544M: \ + builtin_define ("__ADSPBF544M__"); \ + case BFIN_CPU_BF544: \ + builtin_define ("__ADSPBF544__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF547M: \ + builtin_define ("__ADSPBF547M__"); \ + case BFIN_CPU_BF547: \ + builtin_define ("__ADSPBF547__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF548M: \ + builtin_define ("__ADSPBF548M__"); \ + case BFIN_CPU_BF548: \ + builtin_define ("__ADSPBF548__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF549M: \ + builtin_define ("__ADSPBF549M__"); \ + case BFIN_CPU_BF549: \ + builtin_define ("__ADSPBF549__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF561: \ + builtin_define ("__ADSPBF561__"); \ + break; \ + } \ + \ + if (bfin_si_revision != -1) \ + { \ + /* space of 0xnnnn and a NUL */ \ + char *buf = XALLOCAVEC (char, 7); \ + \ + sprintf (buf, "0x%04x", bfin_si_revision); \ + builtin_define_with_value ("__SILICON_REVISION__", buf, 0); \ + } \ + \ + if (bfin_workarounds) \ + builtin_define ("__WORKAROUNDS_ENABLED"); \ + if (ENABLE_WA_SPECULATIVE_LOADS) \ + builtin_define ("__WORKAROUND_SPECULATIVE_LOADS"); \ + if (ENABLE_WA_SPECULATIVE_SYNCS) \ + builtin_define ("__WORKAROUND_SPECULATIVE_SYNCS"); \ + if (ENABLE_WA_INDIRECT_CALLS) \ + builtin_define ("__WORKAROUND_INDIRECT_CALLS"); \ + if (ENABLE_WA_RETS) \ + builtin_define ("__WORKAROUND_RETS"); \ + \ + if (TARGET_FDPIC) \ + { \ + builtin_define ("__BFIN_FDPIC__"); \ + builtin_define ("__FDPIC__"); \ + } \ + if (TARGET_ID_SHARED_LIBRARY \ + && !TARGET_SEP_DATA) \ + builtin_define ("__ID_SHARED_LIB__"); \ + if (flag_no_builtin) \ + builtin_define ("__NO_BUILTIN"); \ + if (TARGET_MULTICORE) \ + builtin_define ("__BFIN_MULTICORE"); \ + if (TARGET_COREA) \ + builtin_define ("__BFIN_COREA"); \ + if (TARGET_COREB) \ + builtin_define ("__BFIN_COREB"); \ + if (TARGET_SDRAM) \ + builtin_define ("__BFIN_SDRAM"); \ + } \ + while (0) +#endif + +#define DRIVER_SELF_SPECS SUBTARGET_DRIVER_SELF_SPECS "\ + %{mleaf-id-shared-library:%{!mid-shared-library:-mid-shared-library}} \ + %{mfdpic:%{!fpic:%{!fpie:%{!fPIC:%{!fPIE:\ + %{!fno-pic:%{!fno-pie:%{!fno-PIC:%{!fno-PIE:-fpie}}}}}}}}} \ +" +#ifndef SUBTARGET_DRIVER_SELF_SPECS +# define SUBTARGET_DRIVER_SELF_SPECS +#endif + +#define LINK_GCC_C_SEQUENCE_SPEC "\ + %{mfast-fp:-lbffastfp} %G %L %{mfast-fp:-lbffastfp} %G \ +" + +#undef ASM_SPEC +#define ASM_SPEC "\ + %{mno-fdpic:-mnopic} %{mfdpic}" + +#define LINK_SPEC "\ +%{h*} %{v:-V} \ +%{mfdpic:-melf32bfinfd -z text} \ +%{static:-dn -Bstatic} \ +%{shared:-G -Bdynamic} \ +%{symbolic:-Bsymbolic} \ +-init __init -fini __fini " + +/* Generate DSP instructions, like DSP halfword loads */ +#define TARGET_DSP (1) + +#define TARGET_DEFAULT 0 + +/* Maximum number of library ids we permit */ +#define MAX_LIBRARY_ID 255 + +extern const char *bfin_library_id_string; + +#define FUNCTION_MODE SImode +#define Pmode SImode + +/* store-condition-codes instructions store 0 for false + This is the value stored for true. */ +#define STORE_FLAG_VALUE 1 + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +#define STACK_PUSH_CODE PRE_DEC + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* We define a dummy ARGP register; the parameters start at offset 0 from + it. */ +#define FIRST_PARM_OFFSET(DECL) 0 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM REG_P6 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM REG_P7 + +/* A dummy register that will be eliminated to either FP or SP. */ +#define ARG_POINTER_REGNUM REG_ARGP + +/* `PIC_OFFSET_TABLE_REGNUM' + The register number of the register used to address a table of + static data addresses in memory. In some cases this register is + defined by a processor's "application binary interface" (ABI). + When this macro is defined, RTL is generated for this register + once, as with the stack pointer and frame pointer registers. If + this macro is not defined, it is up to the machine-dependent files + to allocate such a register (if necessary). */ +#define PIC_OFFSET_TABLE_REGNUM (REG_P5) + +#define FDPIC_FPTR_REGNO REG_P1 +#define FDPIC_REGNO REG_P3 +#define OUR_FDPIC_REG get_hard_reg_initial_val (SImode, FDPIC_REGNO) + +/* A static chain register for nested functions. We need to use a + call-clobbered register for this. */ +#define STATIC_CHAIN_REGNUM REG_P2 + +/* Define this if functions should assume that stack space has been + allocated for arguments even when their values are passed in + registers. + + The value of this macro is the size, in bytes, of the area reserved for + arguments passed in registers. + + This space can either be allocated by the caller or be a part of the + machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' + says which. */ +#define FIXED_STACK_AREA 12 +#define REG_PARM_STACK_SPACE(FNDECL) FIXED_STACK_AREA + +/* Define this if the above stack space is to be considered part of the + * space allocated by the caller. */ +#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/*#define DATA_ALIGNMENT(TYPE, BASIC-ALIGN) for arrays.. */ + +/* If defined, a C expression to compute the alignment for a local + variable. TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. + + If this macro is not defined, then ALIGN is used. + + One use of this macro is to increase alignment of medium-size + data to make it all fit in fewer cache lines. */ + +#define LOCAL_ALIGNMENT(TYPE, ALIGN) bfin_local_alignment ((TYPE), (ALIGN)) + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define TRAMPOLINE_SIZE (TARGET_FDPIC ? 30 : 18) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + There are two registers that can always be eliminated on the i386. + The frame pointer and the arg pointer can be replaced by either the + hard frame pointer or to the stack pointer, depending upon the + circumstances. The hard frame pointer is not used before reload and + so it is not eligible for elimination. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} \ + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = bfin_initial_elimination_offset ((FROM), (TO))) + +/* This processor has + 8 data register for doing arithmetic + 8 pointer register for doing addressing, including + 1 stack pointer P6 + 1 frame pointer P7 + 4 sets of indexing registers (I0-3, B0-3, L0-3, M0-3) + 1 condition code flag register CC + 5 return address registers RETS/I/X/N/E + 1 arithmetic status register (ASTAT). */ + +#define FIRST_PSEUDO_REGISTER 50 + +#define D_REGNO_P(X) ((X) <= REG_R7) +#define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7) +#define I_REGNO_P(X) ((X) >= REG_I0 && (X) <= REG_I3) +#define DP_REGNO_P(X) (D_REGNO_P (X) || P_REGNO_P (X)) +#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3) +#define DREG_P(X) (REG_P (X) && D_REGNO_P (REGNO (X))) +#define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X))) +#define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X))) +#define DPREG_P(X) (REG_P (X) && DP_REGNO_P (REGNO (X))) + +#define REGISTER_NAMES { \ + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", \ + "P0", "P1", "P2", "P3", "P4", "P5", "SP", "FP", \ + "I0", "I1", "I2", "I3", "B0", "B1", "B2", "B3", \ + "L0", "L1", "L2", "L3", "M0", "M1", "M2", "M3", \ + "A0", "A1", \ + "CC", \ + "RETS", "RETI", "RETX", "RETN", "RETE", "ASTAT", "SEQSTAT", "USP", \ + "ARGP", \ + "LT0", "LT1", "LC0", "LC1", "LB0", "LB1" \ +} + +#define SHORT_REGISTER_NAMES { \ + "R0.L", "R1.L", "R2.L", "R3.L", "R4.L", "R5.L", "R6.L", "R7.L", \ + "P0.L", "P1.L", "P2.L", "P3.L", "P4.L", "P5.L", "SP.L", "FP.L", \ + "I0.L", "I1.L", "I2.L", "I3.L", "B0.L", "B1.L", "B2.L", "B3.L", \ + "L0.L", "L1.L", "L2.L", "L3.L", "M0.L", "M1.L", "M2.L", "M3.L", } + +#define HIGH_REGISTER_NAMES { \ + "R0.H", "R1.H", "R2.H", "R3.H", "R4.H", "R5.H", "R6.H", "R7.H", \ + "P0.H", "P1.H", "P2.H", "P3.H", "P4.H", "P5.H", "SP.H", "FP.H", \ + "I0.H", "I1.H", "I2.H", "I3.H", "B0.H", "B1.H", "B2.H", "B3.H", \ + "L0.H", "L1.H", "L2.H", "L3.H", "M0.H", "M1.H", "M2.H", "M3.H", } + +#define DREGS_PAIR_NAMES { \ + "R1:0.p", 0, "R3:2.p", 0, "R5:4.p", 0, "R7:6.p", 0, } + +#define BYTE_REGISTER_NAMES { \ + "R0.B", "R1.B", "R2.B", "R3.B", "R4.B", "R5.B", "R6.B", "R7.B", } + + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ + +#define FIXED_REGISTERS \ +/*r0 r1 r2 r3 r4 r5 r6 r7 p0 p1 p2 p3 p4 p5 p6 p7 */ \ +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, \ +/*i0 i1 i2 i3 b0 b1 b2 b3 l0 l1 l2 l3 m0 m1 m2 m3 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, \ +/*a0 a1 cc rets/i/x/n/e astat seqstat usp argp lt0/1 lc0/1 */ \ + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*lb0/1 */ \ + 1, 1 \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ +/*r0 r1 r2 r3 r4 r5 r6 r7 p0 p1 p2 p3 p4 p5 p6 p7 */ \ +{ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, \ +/*i0 i1 i2 i3 b0 b1 b2 b3 l0 l1 l2 l3 m0 m1 m2 m3 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*a0 a1 cc rets/i/x/n/e astat seqstat usp argp lt0/1 lc0/1 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*lb0/1 */ \ + 1, 1 \ +} + +/* Order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. List frame pointer + late and fixed registers last. Note that, in general, we prefer + registers listed in CALL_USED_REGISTERS, keeping the others + available for storage of persistent values. */ + +#define REG_ALLOC_ORDER \ +{ REG_R0, REG_R1, REG_R2, REG_R3, REG_R7, REG_R6, REG_R5, REG_R4, \ + REG_P2, REG_P1, REG_P0, REG_P5, REG_P4, REG_P3, REG_P6, REG_P7, \ + REG_A0, REG_A1, \ + REG_I0, REG_I1, REG_I2, REG_I3, REG_B0, REG_B1, REG_B2, REG_B3, \ + REG_L0, REG_L1, REG_L2, REG_L3, REG_M0, REG_M1, REG_M2, REG_M3, \ + REG_RETS, REG_RETI, REG_RETX, REG_RETN, REG_RETE, \ + REG_ASTAT, REG_SEQSTAT, REG_USP, \ + REG_CC, REG_ARGP, \ + REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1 \ +} + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + + +enum reg_class +{ + NO_REGS, + IREGS, + BREGS, + LREGS, + MREGS, + CIRCREGS, /* Circular buffering registers, Ix, Bx, Lx together form. See Automatic Circular Buffering. */ + DAGREGS, + EVEN_AREGS, + ODD_AREGS, + AREGS, + CCREGS, + EVEN_DREGS, + ODD_DREGS, + D0REGS, + D1REGS, + D2REGS, + D3REGS, + D4REGS, + D5REGS, + D6REGS, + D7REGS, + DREGS, + P0REGS, + FDPIC_REGS, + FDPIC_FPTR_REGS, + PREGS_CLOBBERED, + PREGS, + IPREGS, + DPREGS, + MOST_REGS, + LT_REGS, + LC_REGS, + LB_REGS, + PROLOGUE_REGS, + NON_A_CC_REGS, + ALL_REGS, LIM_REG_CLASSES +}; + +#define N_REG_CLASSES ((int)LIM_REG_CLASSES) + +#define GENERAL_REGS DPREGS + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ +{ "NO_REGS", \ + "IREGS", \ + "BREGS", \ + "LREGS", \ + "MREGS", \ + "CIRCREGS", \ + "DAGREGS", \ + "EVEN_AREGS", \ + "ODD_AREGS", \ + "AREGS", \ + "CCREGS", \ + "EVEN_DREGS", \ + "ODD_DREGS", \ + "D0REGS", \ + "D1REGS", \ + "D2REGS", \ + "D3REGS", \ + "D4REGS", \ + "D5REGS", \ + "D6REGS", \ + "D7REGS", \ + "DREGS", \ + "P0REGS", \ + "FDPIC_REGS", \ + "FDPIC_FPTR_REGS", \ + "PREGS_CLOBBERED", \ + "PREGS", \ + "IPREGS", \ + "DPREGS", \ + "MOST_REGS", \ + "LT_REGS", \ + "LC_REGS", \ + "LB_REGS", \ + "PROLOGUE_REGS", \ + "NON_A_CC_REGS", \ + "ALL_REGS" } + +/* An initializer containing the contents of the register classes, as integers + which are bit masks. The Nth integer specifies the contents of class N. + The way the integer MASK is interpreted is that register R is in the class + if `MASK & (1 << R)' is 1. + + When the machine has more than 32 registers, an integer does not suffice. + Then the integers are replaced by sub-initializers, braced groupings + containing several integers. Each sub-initializer must be suitable as an + initializer for the type `HARD_REG_SET' which is defined in + `hard-reg-set.h'. */ + +/* NOTE: DSP registers, IREGS - AREGS, are not GENERAL_REGS. We use + MOST_REGS as the union of DPREGS and DAGREGS. */ + +#define REG_CLASS_CONTENTS \ + /* 31 - 0 63-32 */ \ +{ { 0x00000000, 0 }, /* NO_REGS */ \ + { 0x000f0000, 0 }, /* IREGS */ \ + { 0x00f00000, 0 }, /* BREGS */ \ + { 0x0f000000, 0 }, /* LREGS */ \ + { 0xf0000000, 0 }, /* MREGS */ \ + { 0x0fff0000, 0 }, /* CIRCREGS */ \ + { 0xffff0000, 0 }, /* DAGREGS */ \ + { 0x00000000, 0x1 }, /* EVEN_AREGS */ \ + { 0x00000000, 0x2 }, /* ODD_AREGS */ \ + { 0x00000000, 0x3 }, /* AREGS */ \ + { 0x00000000, 0x4 }, /* CCREGS */ \ + { 0x00000055, 0 }, /* EVEN_DREGS */ \ + { 0x000000aa, 0 }, /* ODD_DREGS */ \ + { 0x00000001, 0 }, /* D0REGS */ \ + { 0x00000002, 0 }, /* D1REGS */ \ + { 0x00000004, 0 }, /* D2REGS */ \ + { 0x00000008, 0 }, /* D3REGS */ \ + { 0x00000010, 0 }, /* D4REGS */ \ + { 0x00000020, 0 }, /* D5REGS */ \ + { 0x00000040, 0 }, /* D6REGS */ \ + { 0x00000080, 0 }, /* D7REGS */ \ + { 0x000000ff, 0 }, /* DREGS */ \ + { 0x00000100, 0x000 }, /* P0REGS */ \ + { 0x00000800, 0x000 }, /* FDPIC_REGS */ \ + { 0x00000200, 0x000 }, /* FDPIC_FPTR_REGS */ \ + { 0x00004700, 0x800 }, /* PREGS_CLOBBERED */ \ + { 0x0000ff00, 0x800 }, /* PREGS */ \ + { 0x000fff00, 0x800 }, /* IPREGS */ \ + { 0x0000ffff, 0x800 }, /* DPREGS */ \ + { 0xffffffff, 0x800 }, /* MOST_REGS */\ + { 0x00000000, 0x3000 }, /* LT_REGS */\ + { 0x00000000, 0xc000 }, /* LC_REGS */\ + { 0x00000000, 0x30000 }, /* LB_REGS */\ + { 0x00000000, 0x3f7f8 }, /* PROLOGUE_REGS */\ + { 0xffffffff, 0x3fff8 }, /* NON_A_CC_REGS */\ + { 0xffffffff, 0x3ffff }} /* ALL_REGS */ + +#define IREG_POSSIBLE_P(OUTER) \ + ((OUTER) == POST_INC || (OUTER) == PRE_INC \ + || (OUTER) == POST_DEC || (OUTER) == PRE_DEC \ + || (OUTER) == MEM || (OUTER) == ADDRESS) + +#define MODE_CODE_BASE_REG_CLASS(MODE, OUTER, INDEX) \ + ((MODE) == HImode && IREG_POSSIBLE_P (OUTER) ? IPREGS : PREGS) + +#define INDEX_REG_CLASS PREGS + +#define REGNO_OK_FOR_BASE_STRICT_P(X, MODE, OUTER, INDEX) \ + (P_REGNO_P (X) || (X) == REG_ARGP \ + || (IREG_POSSIBLE_P (OUTER) && (MODE) == HImode \ + && I_REGNO_P (X))) + +#define REGNO_OK_FOR_BASE_NONSTRICT_P(X, MODE, OUTER, INDEX) \ + ((X) >= FIRST_PSEUDO_REGISTER \ + || REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX)) + +#ifdef REG_OK_STRICT +#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, OUTER, INDEX) \ + REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX) +#else +#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, OUTER, INDEX) \ + REGNO_OK_FOR_BASE_NONSTRICT_P (X, MODE, OUTER, INDEX) +#endif + +#define REGNO_OK_FOR_INDEX_P(X) 0 + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ +((REGNO) == REG_R0 ? D0REGS \ + : (REGNO) == REG_R1 ? D1REGS \ + : (REGNO) == REG_R2 ? D2REGS \ + : (REGNO) == REG_R3 ? D3REGS \ + : (REGNO) == REG_R4 ? D4REGS \ + : (REGNO) == REG_R5 ? D5REGS \ + : (REGNO) == REG_R6 ? D6REGS \ + : (REGNO) == REG_R7 ? D7REGS \ + : (REGNO) == REG_P0 ? P0REGS \ + : (REGNO) < REG_I0 ? PREGS \ + : (REGNO) == REG_ARGP ? PREGS \ + : (REGNO) >= REG_I0 && (REGNO) <= REG_I3 ? IREGS \ + : (REGNO) >= REG_L0 && (REGNO) <= REG_L3 ? LREGS \ + : (REGNO) >= REG_B0 && (REGNO) <= REG_B3 ? BREGS \ + : (REGNO) >= REG_M0 && (REGNO) <= REG_M3 ? MREGS \ + : (REGNO) == REG_A0 || (REGNO) == REG_A1 ? AREGS \ + : (REGNO) == REG_LT0 || (REGNO) == REG_LT1 ? LT_REGS \ + : (REGNO) == REG_LC0 || (REGNO) == REG_LC1 ? LC_REGS \ + : (REGNO) == REG_LB0 || (REGNO) == REG_LB1 ? LB_REGS \ + : (REGNO) == REG_CC ? CCREGS \ + : (REGNO) >= REG_RETS ? PROLOGUE_REGS \ + : NO_REGS) + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + MOST_REGS, AREGS, CCREGS, LIM_REG_CLASSES \ +} + +/* When this hook returns true for MODE, the compiler allows + registers explicitly used in the rtl to be used as spill registers + but prevents the compiler from extending the lifetime of these + registers. */ +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true + +/* Do not allow to store a value in REG_CC for any mode */ +/* Do not allow to store value in pregs if mode is not SI*/ +#define HARD_REGNO_MODE_OK(REGNO, MODE) hard_regno_mode_ok((REGNO), (MODE)) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((MODE) == V2PDImode && (CLASS) == AREGS ? 2 \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((MODE) == PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 1 \ + : (MODE) == V2PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 2 \ + : CLASS_MAX_NREGS (GENERAL_REGS, MODE)) + +/* A C expression that is nonzero if hard register TO can be + considered for use as a rename register for FROM register */ +#define HARD_REGNO_RENAME_OK(FROM, TO) bfin_hard_regno_rename_ok (FROM, TO) + +/* A C expression that is nonzero if it is desirable to choose + register allocation so as to avoid move instructions between a + value of mode MODE1 and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, + MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1, + MODE2)' must be zero. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((MODE1) == (MODE2) \ + || ((GET_MODE_CLASS (MODE1) == MODE_INT \ + || GET_MODE_CLASS (MODE1) == MODE_FLOAT) \ + && (GET_MODE_CLASS (MODE2) == MODE_INT \ + || GET_MODE_CLASS (MODE2) == MODE_FLOAT) \ + && (MODE1) != BImode && (MODE2) != BImode \ + && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD \ + && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)) + +/* `PREFERRED_RELOAD_CLASS (X, CLASS)' + A C expression that places additional restrictions on the register + class to use when it is necessary to copy value X into a register + in class CLASS. The value is a register class; perhaps CLASS, or + perhaps another, smaller class. */ +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + (GET_CODE (X) == POST_INC \ + || GET_CODE (X) == POST_DEC \ + || GET_CODE (X) == PRE_DEC ? PREGS : (CLASS)) + +/* Function Calling Conventions. */ + +/* The type of the current function; normal functions are of type + SUBROUTINE. */ +typedef enum { + SUBROUTINE, INTERRUPT_HANDLER, EXCPT_HANDLER, NMI_HANDLER +} e_funkind; +#define FUNCTION_RETURN_REGISTERS { REG_RETS, REG_RETI, REG_RETX, REG_RETN } + +#define FUNCTION_ARG_REGISTERS { REG_R0, REG_R1, REG_R2, -1 } + +/* Flags for the call/call_value rtl operations set up by function_arg */ +#define CALL_NORMAL 0x00000000 /* no special processing */ +#define CALL_LONG 0x00000001 /* always call indirect */ +#define CALL_SHORT 0x00000002 /* always call by symbol */ + +typedef struct { + int words; /* # words passed so far */ + int nregs; /* # registers available for passing */ + int *arg_regs; /* array of register -1 terminated */ + int call_cookie; /* Do special things for this call */ +} CUMULATIVE_ARGS; + +#define FUNCTION_ARG_REGNO_P(REGNO) function_arg_regno_p (REGNO) + + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT, N_NAMED_ARGS) \ + (init_cumulative_args (&CUM, FNTYPE, LIBNAME)) + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. +*/ + +#define VALUE_REGNO(MODE) (REG_R0) + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + gen_rtx_REG (TYPE_MODE (VALTYPE), \ + VALUE_REGNO(TYPE_MODE(VALTYPE))) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, VALUE_REGNO(MODE)) + +#define FUNCTION_VALUE_REGNO_P(N) ((N) == REG_R0) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Before the prologue, the return address is in the RETS register. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, REG_RETS) + +#define RETURN_ADDR_RTX(COUNT, FRAME) bfin_return_addr_rtx (COUNT) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (REG_RETS) + +/* Call instructions don't modify the stack pointer on the Blackfin. */ +#define INCOMING_FRAME_SP_OFFSET 0 + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, REG_P2) +#define EH_RETURN_HANDLER_RTX \ + gen_frame_mem (Pmode, plus_constant (frame_pointer_rtx, UNITS_PER_WORD)) + +/* Addressing Modes */ + +/* Nonzero if the constant value X is a legitimate general operand. + symbol_ref are not legitimate and will be put into constant pool. + See force_const_mem(). + If -mno-pool, all constants are legitimate. + */ +#define LEGITIMATE_CONSTANT_P(X) bfin_legitimate_constant_p (X) + +/* A number, the maximum number of registers that can appear in a + valid memory address. Note that it is up to you to specify a + value equal to the maximum number that `TARGET_LEGITIMATE_ADDRESS_P' + would ever accept. */ +#define MAX_REGS_PER_ADDRESS 1 + +#define LEGITIMATE_MODE_FOR_AUTOINC_P(MODE) \ + (GET_MODE_SIZE (MODE) <= 4 || (MODE) == PDImode) + +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_DECREMENT 1 + +/* `LEGITIMATE_PIC_OPERAND_P (X)' + A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent + code. You can assume that X satisfies `CONSTANT_P', so you need + not check this. You can also assume FLAG_PIC is true, so you need + not check it either. You need not define this macro if all + constants (including `SYMBOL_REF') can be immediate operands when + generating position independent code. */ +#define LEGITIMATE_PIC_OPERAND_P(X) ! SYMBOLIC_CONST (X) + +#define SYMBOLIC_CONST(X) \ +(GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) + +#define NOTICE_UPDATE_CC(EXPR, INSN) 0 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX UNITS_PER_WORD + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. */ + +#define MOVE_RATIO(speed) 5 + +/* STORAGE LAYOUT: target machine storage layout + Define this macro as a C expression which is nonzero if accessing + less than a word of memory (i.e. a `char' or a `short') is no + faster than accessing a word of memory, i.e., if such access + require more than one instruction or if there is no difference in + cost between byte and (aligned) word loads. + + When this macro is not defined, the compiler will access a field by + finding the smallest containing object; when it is defined, a + fullword load will be used if alignment permits. Unless bytes + accesses are faster than word accesses, using word accesses is + preferable since it may eliminate subsequent memory access if + subsequent accesses occur to other fields in the same word of the + structure, but to different bytes. */ +#define SLOW_BYTE_ACCESS 0 +#define SLOW_SHORT_ACCESS 0 + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + We can't access bytes but if we could we would in the Big Endian order. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is numbered. */ +#define WORDS_BIG_ENDIAN 0 + +/* number of bits in an addressable storage unit */ +#define BITS_PER_UNIT 8 + +/* Width in bits of a "word", which is the contents of a machine register. + Note that this is not necessarily the width of data type `int'; + if using 16-bit ints on a 68000, this would still be 32. + But on a machine with 16-bit registers, this would be 16. */ +#define BITS_PER_WORD 32 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 + +/* Width in bits of a pointer. + See also the macro `Pmode1' defined below. */ +#define POINTER_SIZE 32 + +/* Allocation boundary (in *bits*) for storing pointers in memory. */ +#define POINTER_BOUNDARY 32 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY 32 + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 32 + +/* Define this if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* (shell-command "rm c-decl.o stor-layout.o") + * never define PCC_BITFIELD_TYPE_MATTERS + * really cause some alignment problem + */ + +#define UNITS_PER_FLOAT ((FLOAT_TYPE_SIZE + BITS_PER_UNIT - 1) / \ + BITS_PER_UNIT) + +#define UNITS_PER_DOUBLE ((DOUBLE_TYPE_SIZE + BITS_PER_UNIT - 1) / \ + BITS_PER_UNIT) + + +/* what is the 'type' of size_t */ +#define SIZE_TYPE "long unsigned int" + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 +#define FLOAT_TYPE_SIZE BITS_PER_WORD +#define SHORT_TYPE_SIZE 16 +#define CHAR_TYPE_SIZE 8 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 + +/* Note: Fix this to depend on target switch. -- lev */ + +/* Note: Try to implement double and force long double. -- tonyko + * #define __DOUBLES_ARE_FLOATS__ + * #define DOUBLE_TYPE_SIZE FLOAT_TYPE_SIZE + * #define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE + * #define DOUBLES_ARE_FLOATS 1 + */ + +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* `PROMOTE_MODE (M, UNSIGNEDP, TYPE)' + A macro to update M and UNSIGNEDP when an object whose type is + TYPE and which has the specified mode and signedness is to be + stored in a register. This macro is only called when TYPE is a + scalar type. + + On most RISC machines, which only have operations that operate on + a full register, define this macro to set M to `word_mode' if M is + an integer mode narrower than `BITS_PER_WORD'. In most cases, + only integer modes should be widened because wider-precision + floating-point operations are usually more expensive than their + narrower counterparts. + + For most machines, the macro definition does not change UNSIGNEDP. + However, some machines, have instructions that preferentially + handle either signed or unsigned quantities of certain modes. For + example, on the DEC Alpha, 32-bit loads from memory and 32-bit add + instructions sign-extend the result to 64 bits. On such machines, + set UNSIGNEDP according to which kind of extension is more + efficient. + + Do not define this macro if it would never modify M.*/ + +#define BFIN_PROMOTE_MODE_P(MODE) \ + (!TARGET_DSP && GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (BFIN_PROMOTE_MODE_P(MODE)) \ + { \ + if (MODE == QImode) \ + UNSIGNEDP = 1; \ + else if (MODE == HImode) \ + UNSIGNEDP = 0; \ + (MODE) = SImode; \ + } + +/* Describing Relative Costs of Operations */ + +/* Do not put function addr into constant pool */ +#define NO_FUNCTION_CSE 1 + +/* A C expression for the cost of moving data from a register in class FROM to + one in class TO. The classes are expressed using the enumeration values + such as `GENERAL_REGS'. A value of 2 is the default; other values are + interpreted relative to that. + + It is not required that the cost always equal 2 when FROM is the same as TO; + on some machines it is expensive to move between registers if they are not + general registers. */ + +#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \ + bfin_register_move_cost ((MODE), (CLASS1), (CLASS2)) + +/* A C expression for the cost of moving data of mode M between a + register and memory. A value of 2 is the default; this cost is + relative to those in `REGISTER_MOVE_COST'. + + If moving between registers and memory is more expensive than + between two registers, you should define this macro to express the + relative cost. */ + +#define MEMORY_MOVE_COST(MODE, CLASS, IN) \ + bfin_memory_move_cost ((MODE), (CLASS), (IN)) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE SImode + +#define JUMP_TABLES_IN_TEXT_SECTION flag_pic + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. +#define WORD_REGISTER_OPERATIONS +*/ + +/* Evaluates to true if A and B are mac flags that can be used + together in a single multiply insn. That is the case if they are + both the same flag not involving M, or if one is a combination of + the other with M. */ +#define MACFLAGS_MATCH_P(A, B) \ + ((A) == (B) \ + || ((A) == MACFLAG_NONE && (B) == MACFLAG_M) \ + || ((A) == MACFLAG_M && (B) == MACFLAG_NONE) \ + || ((A) == MACFLAG_IS && (B) == MACFLAG_IS_M) \ + || ((A) == MACFLAG_IS_M && (B) == MACFLAG_IS)) + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#define PRINT_OPERAND(FILE, RTX, CODE) print_operand (FILE, RTX, CODE) +#define PRINT_OPERAND_ADDRESS(FILE, RTX) print_address_operand (FILE, RTX) + +typedef enum sections { + CODE_DIR, + DATA_DIR, + LAST_SECT_NM +} SECT_ENUM_T; + +typedef enum directives { + LONG_CONST_DIR, + SHORT_CONST_DIR, + BYTE_CONST_DIR, + SPACE_DIR, + INIT_DIR, + LAST_DIR_NM +} DIR_ENUM_T; + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) \ + ((C) == ';' \ + || ((C) == '|' && (STR)[1] == '|')) + +#define TEXT_SECTION_ASM_OP ".text;" +#define DATA_SECTION_ASM_OP ".data;" + +#define ASM_APP_ON "" +#define ASM_APP_OFF "" + +#define ASM_GLOBALIZE_LABEL1(FILE, NAME) \ + do { fputs (".global ", FILE); \ + assemble_name (FILE, NAME); \ + fputc (';',FILE); \ + fputc ('\n',FILE); \ + } while (0) + +#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \ + do { \ + fputs (".type ", FILE); \ + assemble_name (FILE, NAME); \ + fputs (", STT_FUNC", FILE); \ + fputc (';',FILE); \ + fputc ('\n',FILE); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + } while (0) + +#define ASM_OUTPUT_LABEL(FILE, NAME) \ + do { assemble_name (FILE, NAME); \ + fputs (":\n",FILE); \ + } while (0) + +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { fprintf (FILE, "_%s", NAME); \ + } while (0) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { char __buf[256]; \ + fprintf (FILE, "\t.dd\t"); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE); \ + assemble_name (FILE, __buf); \ + fputc (';', FILE); \ + fputc ('\n', FILE); \ + } while (0) + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL) + +#define MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL) \ + do { \ + char __buf[256]; \ + fprintf (FILE, "\t.dd\t"); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE); \ + assemble_name (FILE, __buf); \ + fputs (" - ", FILE); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", REL); \ + assemble_name (FILE, __buf); \ + fputc (';', FILE); \ + fputc ('\n', FILE); \ + } while (0) + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", 1 << (LOG)); \ + } while (0) + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + do { \ + asm_output_skip (FILE, SIZE); \ + } while (0) + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +do { \ + switch_to_section (data_section); \ + if ((SIZE) >= (unsigned int) 4 ) ASM_OUTPUT_ALIGN(FILE,2); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + fprintf (FILE, "%s %ld;\n", ASM_SPACE, \ + (ROUNDED) > (unsigned int) 1 ? (ROUNDED) : 1); \ +} while (0) + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ + do { \ + ASM_GLOBALIZE_LABEL1(FILE,NAME); \ + ASM_OUTPUT_LOCAL (FILE, NAME, SIZE, ROUNDED); } while(0) + +#define ASM_COMMENT_START "//" + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + do { \ + fprintf (FILE, "\tCALL __mcount;\n"); \ + } while(0) + +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 1 + +#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) fprintf (FILE, "[SP--] = %s;\n", reg_names[REGNO]) +#define ASM_OUTPUT_REG_POP(FILE, REGNO) fprintf (FILE, "%s = [SP++];\n", reg_names[REGNO]) + +extern struct rtx_def *bfin_cc_rtx, *bfin_rets_rtx; + +/* This works for GAS and some other assemblers. */ +#define SET_ASM_OP ".set " + +/* DBX register number for a given compiler register number */ +#define DBX_REGISTER_NUMBER(REGNO) (REGNO) + +#define SIZE_ASM_OP "\t.size\t" + +extern int splitting_for_sched, splitting_loops; + +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) ((CHAR) == '!') + +#ifndef TARGET_SUPPORTS_SYNC_CALLS +#define TARGET_SUPPORTS_SYNC_CALLS 0 +#endif + +#endif /* _BFIN_CONFIG */ diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md new file mode 100644 index 000000000..3fac01ca5 --- /dev/null +++ b/gcc/config/bfin/bfin.md @@ -0,0 +1,4211 @@ +;;- Machine description for Blackfin for GNU compiler +;; Copyright 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +;; Contributed by Analog Devices. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; operand punctuation marks: +; +; X -- integer value printed as log2 +; Y -- integer value printed as log2(~value) - for bitclear +; h -- print half word register, low part +; d -- print half word register, high part +; D -- print operand as dregs pairs +; w -- print operand as accumulator register word (a0w, a1w) +; H -- high part of double mode operand +; T -- byte register representation Oct. 02 2001 + +; constant operand classes +; +; J 2**N 5bit imm scaled +; Ks7 -64 .. 63 signed 7bit imm +; Ku5 0..31 unsigned 5bit imm +; Ks4 -8 .. 7 signed 4bit imm +; Ks3 -4 .. 3 signed 3bit imm +; Ku3 0 .. 7 unsigned 3bit imm +; Pn 0, 1, 2 constants 0, 1 or 2, corresponding to n +; +; register operands +; d (r0..r7) +; a (p0..p5,fp,sp) +; e (a0, a1) +; b (i0..i3) +; f (m0..m3) +; v (b0..b3) +; c (i0..i3,m0..m3) CIRCREGS +; C (CC) CCREGS +; t (lt0,lt1) +; k (lc0,lc1) +; u (lb0,lb1) +; + +;; Define constants for hard registers. + +(define_constants + [(REG_R0 0) + (REG_R1 1) + (REG_R2 2) + (REG_R3 3) + (REG_R4 4) + (REG_R5 5) + (REG_R6 6) + (REG_R7 7) + + (REG_P0 8) + (REG_P1 9) + (REG_P2 10) + (REG_P3 11) + (REG_P4 12) + (REG_P5 13) + (REG_P6 14) + (REG_P7 15) + + (REG_SP 14) + (REG_FP 15) + + (REG_I0 16) + (REG_I1 17) + (REG_I2 18) + (REG_I3 19) + + (REG_B0 20) + (REG_B1 21) + (REG_B2 22) + (REG_B3 23) + + (REG_L0 24) + (REG_L1 25) + (REG_L2 26) + (REG_L3 27) + + (REG_M0 28) + (REG_M1 29) + (REG_M2 30) + (REG_M3 31) + + (REG_A0 32) + (REG_A1 33) + + (REG_CC 34) + (REG_RETS 35) + (REG_RETI 36) + (REG_RETX 37) + (REG_RETN 38) + (REG_RETE 39) + + (REG_ASTAT 40) + (REG_SEQSTAT 41) + (REG_USP 42) + + (REG_ARGP 43) + + (REG_LT0 44) + (REG_LT1 45) + (REG_LC0 46) + (REG_LC1 47) + (REG_LB0 48) + (REG_LB1 49)]) + +;; Constants used in UNSPECs and UNSPEC_VOLATILEs. + +(define_constants + [(UNSPEC_CBRANCH_TAKEN 0) + (UNSPEC_CBRANCH_NOPS 1) + (UNSPEC_RETURN 2) + (UNSPEC_MOVE_PIC 3) + (UNSPEC_LIBRARY_OFFSET 4) + (UNSPEC_PUSH_MULTIPLE 5) + ;; Multiply or MAC with extra CONST_INT operand specifying the macflag + (UNSPEC_MUL_WITH_FLAG 6) + (UNSPEC_MAC_WITH_FLAG 7) + (UNSPEC_MOVE_FDPIC 8) + (UNSPEC_FUNCDESC_GOT17M4 9) + (UNSPEC_LSETUP_END 10) + ;; Distinguish a 32-bit version of an insn from a 16-bit version. + (UNSPEC_32BIT 11) + (UNSPEC_NOP 12) + (UNSPEC_ONES 13) + (UNSPEC_ATOMIC 14)]) + +(define_constants + [(UNSPEC_VOLATILE_CSYNC 1) + (UNSPEC_VOLATILE_SSYNC 2) + (UNSPEC_VOLATILE_LOAD_FUNCDESC 3) + (UNSPEC_VOLATILE_STORE_EH_HANDLER 4) + (UNSPEC_VOLATILE_DUMMY 5) + (UNSPEC_VOLATILE_STALL 6)]) + +(define_constants + [(MACFLAG_NONE 0) + (MACFLAG_T 1) + (MACFLAG_FU 2) + (MACFLAG_TFU 3) + (MACFLAG_IS 4) + (MACFLAG_IU 5) + (MACFLAG_W32 6) + (MACFLAG_M 7) + (MACFLAG_IS_M 8) + (MACFLAG_S2RND 9) + (MACFLAG_ISS2 10) + (MACFLAG_IH 11)]) + +(define_attr "type" + "move,movcc,mvi,mcld,mcst,dsp32,dsp32shiftimm,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall" + (const_string "misc")) + +(define_attr "addrtype" "32bit,preg,spreg,ireg" + (cond [(and (eq_attr "type" "mcld") + (and (match_operand 0 "dp_register_operand" "") + (match_operand 1 "mem_p_address_operand" ""))) + (const_string "preg") + (and (eq_attr "type" "mcld") + (and (match_operand 0 "dp_register_operand" "") + (match_operand 1 "mem_spfp_address_operand" ""))) + (const_string "spreg") + (and (eq_attr "type" "mcld") + (and (match_operand 0 "dp_register_operand" "") + (match_operand 1 "mem_i_address_operand" ""))) + (const_string "ireg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "dp_register_operand" "") + (match_operand 0 "mem_p_address_operand" ""))) + (const_string "preg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "dp_register_operand" "") + (match_operand 0 "mem_spfp_address_operand" ""))) + (const_string "spreg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "dp_register_operand" "") + (match_operand 0 "mem_i_address_operand" ""))) + (const_string "ireg")] + (const_string "32bit"))) + +(define_attr "storereg" "preg,other" + (cond [(and (eq_attr "type" "mcst") + (match_operand 1 "p_register_operand" "")) + (const_string "preg")] + (const_string "other"))) + +;; Scheduling definitions + +(define_automaton "bfin") + +(define_cpu_unit "slot0" "bfin") +(define_cpu_unit "slot1" "bfin") +(define_cpu_unit "slot2" "bfin") + +;; Three units used to enforce parallel issue restrictions: +;; only one of the 16-bit slots can use a P register in an address, +;; and only one them can be a store. +(define_cpu_unit "store" "bfin") +(define_cpu_unit "pregs" "bfin") + +;; A dummy unit used to delay scheduling of loads after a conditional +;; branch. +(define_cpu_unit "load" "bfin") + +;; A logical unit used to work around anomaly 05000074. +(define_cpu_unit "anomaly_05000074" "bfin") + +(define_reservation "core" "slot0+slot1+slot2") + +(define_insn_reservation "alu" 1 + (eq_attr "type" "move,movcc,mvi,alu0,shft,brcc,br,call,misc,sync,compare") + "core") + +(define_insn_reservation "imul" 3 + (eq_attr "type" "mult") + "core*3") + +(define_insn_reservation "dsp32" 1 + (eq_attr "type" "dsp32") + "slot0") + +(define_insn_reservation "dsp32shiftimm" 1 + (and (eq_attr "type" "dsp32shiftimm") + (eq (symbol_ref "ENABLE_WA_05000074") + (const_int 0))) + "slot0") + +(define_insn_reservation "dsp32shiftimm_anomaly_05000074" 1 + (and (eq_attr "type" "dsp32shiftimm") + (ne (symbol_ref "ENABLE_WA_05000074") + (const_int 0))) + "slot0+anomaly_05000074") + +(define_insn_reservation "load32" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit"))) + "core+load") + +(define_insn_reservation "loadp" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg"))) + "slot1+pregs+load") + +(define_insn_reservation "loadsp" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg"))) + "slot1+pregs") + +(define_insn_reservation "loadi" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg"))) + "(slot1|slot2)+load") + +(define_insn_reservation "store32" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") (eq_attr "addrtype" "32bit"))) + "core") + +(define_insn_reservation "storep" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") + (ior (eq_attr "addrtype" "preg") + (eq_attr "addrtype" "spreg")))) + (ior (eq (symbol_ref "ENABLE_WA_05000074") + (const_int 0)) + (eq_attr "storereg" "other"))) + "slot1+pregs+store") + +(define_insn_reservation "storep_anomaly_05000074" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") + (ior (eq_attr "addrtype" "preg") + (eq_attr "addrtype" "spreg")))) + (and (ne (symbol_ref "ENABLE_WA_05000074") + (const_int 0)) + (eq_attr "storereg" "preg"))) + "slot1+anomaly_05000074+pregs+store") + +(define_insn_reservation "storei" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg"))) + (ior (eq (symbol_ref "ENABLE_WA_05000074") + (const_int 0)) + (eq_attr "storereg" "other"))) + "(slot1|slot2)+store") + +(define_insn_reservation "storei_anomaly_05000074" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg"))) + (and (ne (symbol_ref "ENABLE_WA_05000074") + (const_int 0)) + (eq_attr "storereg" "preg"))) + "((slot1+anomaly_05000074)|slot2)+store") + +(define_insn_reservation "multi" 2 + (eq_attr "seq_insns" "multi") + "core") + +(define_insn_reservation "load_stall1" 1 + (and (eq_attr "type" "stall") + (match_operand 0 "const1_operand" "")) + "core+load*2") + +(define_insn_reservation "load_stall3" 1 + (and (eq_attr "type" "stall") + (match_operand 0 "const3_operand" "")) + "core+load*4") + +(absence_set "slot0" "slot1,slot2") +(absence_set "slot1" "slot2") + +;; Make sure genautomata knows about the maximum latency that can be produced +;; by the adjust_cost function. +(define_insn_reservation "dummy" 5 + (eq_attr "type" "dummy") + "core") + +;; Operand and operator predicates + +(include "predicates.md") +(include "constraints.md") + +;;; FRIO branches have been optimized for code density +;;; this comes at a slight cost of complexity when +;;; a compiler needs to generate branches in the general +;;; case. In order to generate the correct branching +;;; mechanisms the compiler needs keep track of instruction +;;; lengths. The follow table describes how to count instructions +;;; for the FRIO architecture. +;;; +;;; unconditional br are 12-bit imm pcrelative branches *2 +;;; conditional br are 10-bit imm pcrelative branches *2 +;;; brcc 10-bit: +;;; 1024 10-bit imm *2 is 2048 (-1024..1022) +;;; br 12-bit : +;;; 4096 12-bit imm *2 is 8192 (-4096..4094) +;;; NOTE : For brcc we generate instructions such as +;;; if cc jmp; jump.[sl] offset +;;; offset of jump.[sl] is from the jump instruction but +;;; gcc calculates length from the if cc jmp instruction +;;; furthermore gcc takes the end address of the branch instruction +;;; as (pc) for a forward branch +;;; hence our range is (-4094, 4092) instead of (-4096, 4094) for a br +;;; +;;; The way the (pc) rtx works in these calculations is somewhat odd; +;;; for backward branches it's the address of the current instruction, +;;; for forward branches it's the previously known address of the following +;;; instruction - we have to take this into account by reducing the range +;;; for a forward branch. + +;; Lengths for type "mvi" insns are always defined by the instructions +;; themselves. +(define_attr "length" "" + (cond [(eq_attr "type" "mcld") + (if_then_else (match_operand 1 "effective_address_32bit_p" "") + (const_int 4) (const_int 2)) + + (eq_attr "type" "mcst") + (if_then_else (match_operand 0 "effective_address_32bit_p" "") + (const_int 4) (const_int 2)) + + (eq_attr "type" "move") (const_int 2) + + (eq_attr "type" "dsp32") (const_int 4) + (eq_attr "type" "dsp32shiftimm") (const_int 4) + (eq_attr "type" "call") (const_int 4) + + (eq_attr "type" "br") + (if_then_else (and + (le (minus (match_dup 0) (pc)) (const_int 4092)) + (ge (minus (match_dup 0) (pc)) (const_int -4096))) + (const_int 2) + (const_int 4)) + + (eq_attr "type" "brcc") + (cond [(and + (le (minus (match_dup 3) (pc)) (const_int 1020)) + (ge (minus (match_dup 3) (pc)) (const_int -1024))) + (const_int 2) + (and + (le (minus (match_dup 3) (pc)) (const_int 4092)) + (ge (minus (match_dup 3) (pc)) (const_int -4094))) + (const_int 4)] + (const_int 6)) + ] + + (const_int 2))) + +;; Classify the insns into those that are one instruction and those that +;; are more than one in sequence. +(define_attr "seq_insns" "single,multi" + (const_string "single")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "misc") + (set_attr "seq_insns" "multi") + (set_attr "length" "4")]) + +;; Conditional moves + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")))] + "" +{ + operands[1] = bfin_gen_compare (operands[1], SImode); +}) + +(define_insn "*movsicc_insn1" + [(set (match_operand:SI 0 "register_operand" "=da,da,da") + (if_then_else:SI + (eq:BI (match_operand:BI 3 "register_operand" "C,C,C") + (const_int 0)) + (match_operand:SI 1 "register_operand" "da,0,da") + (match_operand:SI 2 "register_operand" "0,da,da")))] + "" + "@ + if !cc %0 =%1; /* movsicc-1a */ + if cc %0 =%2; /* movsicc-1b */ + if !cc %0 =%1; if cc %0=%2; /* movsicc-1 */" + [(set_attr "length" "2,2,4") + (set_attr "type" "movcc") + (set_attr "seq_insns" "*,*,multi")]) + +(define_insn "*movsicc_insn2" + [(set (match_operand:SI 0 "register_operand" "=da,da,da") + (if_then_else:SI + (ne:BI (match_operand:BI 3 "register_operand" "C,C,C") + (const_int 0)) + (match_operand:SI 1 "register_operand" "0,da,da") + (match_operand:SI 2 "register_operand" "da,0,da")))] + "" + "@ + if !cc %0 =%2; /* movsicc-2b */ + if cc %0 =%1; /* movsicc-2a */ + if cc %0 =%1; if !cc %0=%2; /* movsicc-1 */" + [(set_attr "length" "2,2,4") + (set_attr "type" "movcc") + (set_attr "seq_insns" "*,*,multi")]) + +;; Insns to load HIGH and LO_SUM + +(define_insn "movsi_high" + [(set (match_operand:SI 0 "register_operand" "=x") + (high:SI (match_operand:SI 1 "immediate_operand" "i")))] + "reload_completed" + "%d0 = %d1;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movstricthi_high" + [(set (match_operand:SI 0 "register_operand" "+x") + (ior:SI (and:SI (match_dup 0) (const_int 65535)) + (match_operand:SI 1 "immediate_operand" "i")))] + "reload_completed" + "%d0 = %d1;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movsi_low" + [(set (match_operand:SI 0 "register_operand" "=x") + (lo_sum:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "reload_completed" + "%h0 = %h2;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movsi_high_pic" + [(set (match_operand:SI 0 "register_operand" "=x") + (high:SI (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_MOVE_PIC)))] + "" + "%d0 = %1@GOT_LOW;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movsi_low_pic" + [(set (match_operand:SI 0 "register_operand" "=x") + (lo_sum:SI (match_operand:SI 1 "register_operand" "0") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_MOVE_PIC)))] + "" + "%h0 = %h2@GOT_HIGH;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +;;; Move instructions + +(define_insn_and_split "movdi_insn" + [(set (match_operand:DI 0 "nonimmediate_operand" "=x,mx,r") + (match_operand:DI 1 "general_operand" "iFx,r,mx"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx lo_half[2], hi_half[2]; + split_di (operands, 2, lo_half, hi_half); + + if (reg_overlap_mentioned_p (lo_half[0], hi_half[1])) + { + operands[2] = hi_half[0]; + operands[3] = hi_half[1]; + operands[4] = lo_half[0]; + operands[5] = lo_half[1]; + } + else + { + operands[2] = lo_half[0]; + operands[3] = lo_half[1]; + operands[4] = hi_half[0]; + operands[5] = hi_half[1]; + } +}) + +(define_insn "movbi" + [(set (match_operand:BI 0 "nonimmediate_operand" "=x,x,d,md,C,d,C,P1") + (match_operand:BI 1 "general_operand" "x,xKs3,md,d,d,C,P0,P1"))] + + "" + "@ + %0 = %1; + %0 = %1 (X); + %0 = B %1 (Z)%! + B %0 = %1; + CC = %1; + %0 = CC; + CC = R0 < R0; + CC = R0 == R0;" + [(set_attr "type" "move,mvi,mcld,mcst,compare,compare,compare,compare") + (set_attr "length" "2,2,*,*,2,2,2,2") + (set_attr "seq_insns" "*,*,*,*,*,*,*,*")]) + +(define_insn "movpdi" + [(set (match_operand:PDI 0 "nonimmediate_operand" "=e,<,e") + (match_operand:PDI 1 "general_operand" " e,e,>"))] + "" + "@ + %0 = %1; + %0 = %x1; %0 = %w1; + %w0 = %1; %x0 = %1;" + [(set_attr "type" "move,mcst,mcld") + (set_attr "seq_insns" "*,multi,multi")]) + +(define_insn "load_accumulator" + [(set (match_operand:PDI 0 "register_operand" "=e") + (sign_extend:PDI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = %1;" + [(set_attr "type" "move")]) + +(define_insn_and_split "load_accumulator_pair" + [(set (match_operand:V2PDI 0 "register_operand" "=e") + (sign_extend:V2PDI (vec_concat:V2SI + (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d"))))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (sign_extend:PDI (match_dup 1))) + (set (match_dup 4) (sign_extend:PDI (match_dup 2)))] +{ + operands[3] = gen_rtx_REG (PDImode, REGNO (operands[0])); + operands[4] = gen_rtx_REG (PDImode, REGNO (operands[0]) + 1); +}) + +(define_insn "*pushsi_insn" + [(set (mem:SI (pre_dec:SI (reg:SI REG_SP))) + (match_operand:SI 0 "register_operand" "xy"))] + "" + "[--SP] = %0;" + [(set_attr "type" "mcst") + (set_attr "addrtype" "32bit") + (set_attr "length" "2")]) + +(define_insn "*popsi_insn" + [(set (match_operand:SI 0 "register_operand" "=d,xy") + (mem:SI (post_inc:SI (reg:SI REG_SP))))] + "" + "%0 = [SP++]%!" + [(set_attr "type" "mcld") + (set_attr "addrtype" "preg,32bit") + (set_attr "length" "2")]) + +;; The first alternative is used to make reload choose a limited register +;; class when faced with a movsi_insn that had its input operand replaced +;; with a PLUS. We generally require fewer secondary reloads this way. + +(define_insn "*movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x,da,y,da,x,x,x,da,mr") + (match_operand:SI 1 "general_operand" "da,x,y,da,xKs7,xKsh,xKuh,ix,mr,da"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %0 = %1; + %0 = %1; + %0 = %1; + %0 = %1; + %0 = %1 (X); + %0 = %1 (X); + %0 = %1 (Z); + # + %0 = %1%! + %0 = %1%!" + [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst") + (set_attr "length" "2,2,2,2,2,4,4,*,*,*")]) + +(define_insn "*movsi_insn32" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (unspec:SI [(match_operand:SI 1 "nonmemory_operand" "d,P0")] UNSPEC_32BIT))] + "" + "@ + %0 = ROT %1 BY 0%! + %0 = %0 -|- %0%!" + [(set_attr "type" "dsp32shiftimm,dsp32")]) + +(define_split + [(set (match_operand:SI 0 "d_register_operand" "") + (const_int 0))] + "splitting_for_sched && !optimize_size" + [(set (match_dup 0) (unspec:SI [(const_int 0)] UNSPEC_32BIT))]) + +(define_split + [(set (match_operand:SI 0 "d_register_operand" "") + (match_operand:SI 1 "d_register_operand" ""))] + "splitting_for_sched && !optimize_size" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_32BIT))]) + +(define_insn_and_split "*movv2hi_insn" + [(set (match_operand:V2HI 0 "nonimmediate_operand" "=da,da,d,dm") + (match_operand:V2HI 1 "general_operand" "i,di,md,d"))] + + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + # + %0 = %1; + %0 = %1%! + %0 = %1%!" + "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR" + [(set (match_dup 0) (high:SI (match_dup 2))) + (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 3)))] +{ + HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; + intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; + + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + operands[2] = operands[3] = GEN_INT (trunc_int_for_mode (intval, SImode)); +} + [(set_attr "type" "move,move,mcld,mcst") + (set_attr "length" "2,2,*,*")]) + +(define_insn "*movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=x,da,x,d,mr") + (match_operand:HI 1 "general_operand" "x,xKs7,xKsh,mr,d"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" +{ + static const char *templates[] = { + "%0 = %1;", + "%0 = %1 (X);", + "%0 = %1 (X);", + "%0 = W %1 (X)%!", + "W %0 = %1%!", + "%h0 = W %1%!", + "W %0 = %h1%!" + }; + int alt = which_alternative; + rtx mem = (MEM_P (operands[0]) ? operands[0] + : MEM_P (operands[1]) ? operands[1] : NULL_RTX); + if (mem && bfin_dsp_memref_p (mem)) + alt += 2; + return templates[alt]; +} + [(set_attr "type" "move,mvi,mvi,mcld,mcst") + (set_attr "length" "2,2,4,*,*")]) + +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=x,da,x,d,mr") + (match_operand:QI 1 "general_operand" "x,xKs7,xKsh,mr,d"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %0 = %1; + %0 = %1 (X); + %0 = %1 (X); + %0 = B %1 (X)%! + B %0 = %1%!" + [(set_attr "type" "move,mvi,mvi,mcld,mcst") + (set_attr "length" "2,2,4,*,*")]) + +(define_insn "*movsf_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,da,mr") + (match_operand:SF 1 "general_operand" "x,Fx,mr,da"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %0 = %1; + # + %0 = %1%! + %0 = %1%!" + [(set_attr "type" "move,*,mcld,mcst")]) + +(define_insn_and_split "movdf_insn" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x,mx,r") + (match_operand:DF 1 "general_operand" "iFx,r,mx"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx lo_half[2], hi_half[2]; + split_di (operands, 2, lo_half, hi_half); + + if (reg_overlap_mentioned_p (lo_half[0], hi_half[1])) + { + operands[2] = hi_half[0]; + operands[3] = hi_half[1]; + operands[4] = lo_half[0]; + operands[5] = lo_half[1]; + } + else + { + operands[2] = lo_half[0]; + operands[3] = lo_half[1]; + operands[4] = hi_half[0]; + operands[5] = hi_half[1]; + } +}) + +;; Storing halfwords. +(define_insn "*movsi_insv" + [(set (zero_extract:SI (match_operand 0 "register_operand" "+d,x") + (const_int 16) + (const_int 16)) + (match_operand:SI 1 "nonmemory_operand" "d,n"))] + "" + "@ + %d0 = %h1 << 0%! + %d0 = %1;" + [(set_attr "type" "dsp32shiftimm,mvi")]) + +(define_expand "insv" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")) + (match_operand:SI 3 "nonmemory_operand" ""))] + "" +{ + if (INTVAL (operands[1]) != 16 || INTVAL (operands[2]) != 16) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! register_operand (operands[0], VOIDmode)) + FAIL; +}) + +;; This is the main "hook" for PIC code. When generating +;; PIC, movsi is responsible for determining when the source address +;; needs PIC relocation and appropriately calling legitimize_pic_address +;; to perform the actual relocation. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" +{ + if (expand_move (operands, SImode)) + DONE; +}) + +(define_expand "movv2hi" + [(set (match_operand:V2HI 0 "nonimmediate_operand" "") + (match_operand:V2HI 1 "general_operand" ""))] + "" + "expand_move (operands, V2HImode);") + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + "expand_move (operands, DImode);") + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "expand_move (operands, SFmode);") + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + "expand_move (operands, DFmode);") + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + "expand_move (operands, HImode);") + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " expand_move (operands, QImode); ") + +;; Some define_splits to break up SI/SFmode loads of immediate constants. + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "symbolic_or_const_operand" ""))] + "reload_completed + /* Always split symbolic operands; split integer constants that are + too large for a single instruction. */ + && (GET_CODE (operands[1]) != CONST_INT + || (INTVAL (operands[1]) < -32768 + || INTVAL (operands[1]) >= 65536 + || (INTVAL (operands[1]) >= 32768 && PREG_P (operands[0]))))" + [(set (match_dup 0) (high:SI (match_dup 1))) + (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))] +{ + if (GET_CODE (operands[1]) == CONST_INT + && split_load_immediate (operands)) + DONE; + /* ??? Do something about TARGET_LOW_64K. */ +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "immediate_operand" ""))] + "reload_completed" + [(set (match_dup 2) (high:SI (match_dup 3))) + (set (match_dup 2) (lo_sum:SI (match_dup 2) (match_dup 3)))] +{ + long values; + REAL_VALUE_TYPE value; + + gcc_assert (GET_CODE (operands[1]) == CONST_DOUBLE); + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (value, values); + + operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0])); + operands[3] = GEN_INT (trunc_int_for_mode (values, SImode)); + if (values >= -32768 && values < 65536) + { + emit_move_insn (operands[2], operands[3]); + DONE; + } + if (split_load_immediate (operands + 2)) + DONE; +}) + +;; Sadly, this can't be a proper named movstrict pattern, since the compiler +;; expects to be able to use registers for operand 1. +;; Note that the asm instruction is defined by the manual to take an unsigned +;; constant, but it doesn't matter to the assembler, and the compiler only +;; deals with sign-extended constants. Hence "Ksh". +(define_insn "movstricthi_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+x")) + (match_operand:HI 1 "immediate_operand" "Ksh"))] + "" + "%h0 = %1;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +;; Sign and zero extensions + +(define_insn_and_split "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))] + "" + "@ + %0 = %h1 (X); + %0 = W %h1 (X)%!" + "reload_completed && bfin_dsp_memref_p (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] +{ + operands[2] = gen_lowpart (HImode, operands[0]); +} + [(set_attr "type" "alu0,mcld")]) + +(define_insn_and_split "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))] + "" + "@ + %0 = %h1 (Z); + %0 = W %h1 (Z)%!" + "reload_completed && bfin_dsp_memref_p (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:SI (match_dup 2)))] +{ + operands[2] = gen_lowpart (HImode, operands[0]); +} + [(set_attr "type" "alu0,mcld")]) + +(define_insn "zero_extendbisi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extend:SI (match_operand:BI 1 "nonimmediate_operand" "C")))] + "" + "%0 = %1;" + [(set_attr "type" "compare")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=d, d") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (X)%! + %0 = %T1 (X);" + [(set_attr "type" "mcld,alu0")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (X)%! + %0 = %T1 (X);" + [(set_attr "type" "mcld,alu0")]) + + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=d, d") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (Z)%! + %0 = %T1 (Z);" + [(set_attr "type" "mcld,alu0")]) + + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (Z)%! + %0 = %T1 (Z);" + [(set_attr "type" "mcld,alu0")]) + +;; DImode logical operations + +(define_code_iterator any_logical [and ior xor]) +(define_code_attr optab [(and "and") + (ior "ior") + (xor "xor")]) +(define_code_attr op [(and "&") + (ior "|") + (xor "^")]) +(define_code_attr high_result [(and "0") + (ior "%H1") + (xor "%H1")]) + +;; Keep this pattern around to avoid generating NO_CONFLICT blocks. +(define_expand "di3" + [(set (match_operand:DI 0 "register_operand" "=d") + (any_logical:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "general_operand" "d")))] + "" +{ + rtx hi_half[3], lo_half[3]; + enum insn_code icode = CODE_FOR_si3; + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + emit_clobber (operands[0]); + split_di (operands, 3, lo_half, hi_half); + if (!(*insn_data[icode].operand[2].predicate) (lo_half[2], SImode)) + lo_half[2] = force_reg (SImode, lo_half[2]); + emit_insn (GEN_FCN (icode) (lo_half[0], lo_half[1], lo_half[2])); + if (!(*insn_data[icode].operand[2].predicate) (hi_half[2], SImode)) + hi_half[2] = force_reg (SImode, hi_half[2]); + emit_insn (GEN_FCN (icode) (hi_half[0], hi_half[1], hi_half[2])); + DONE; +}) + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (match_operand:QI 1 "register_operand" "d")))] + "" + "%0 = %T1 (Z);\\n\\t%H0 = 0;" + [(set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = %h1 (Z);\\n\\t%H0 = 0;" + [(set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_insn_and_split "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI (match_operand:SI 1 "register_operand" "d")))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))] +{ + split_di (operands, 1, operands + 2, operands + 3); + if (REGNO (operands[0]) != REGNO (operands[1])) + emit_move_insn (operands[2], operands[1]); +}) + +(define_insn_and_split "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI (match_operand:QI 1 "register_operand" "d")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))] +{ + split_di (operands, 1, operands + 2, operands + 3); +}) + +(define_insn_and_split "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI (match_operand:HI 1 "register_operand" "d")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))] +{ + split_di (operands, 1, operands + 2, operands + 3); +}) + +;; DImode arithmetic operations + +(define_insn "add_with_carry" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (plus:SI (match_operand:SI 1 "register_operand" "%0,d") + (match_operand:SI 2 "nonmemory_operand" "Ks7,d"))) + (set (match_operand:BI 3 "register_operand" "=C,C") + (ltu:BI (not:SI (match_dup 1)) (match_dup 2)))] + "" + "@ + %0 += %2; cc = ac0; + %0 = %1 + %2; cc = ac0;" + [(set_attr "type" "alu0") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_insn "sub_with_carry" + [(set (match_operand:SI 0 "register_operand" "=d") + (minus:SI (match_operand:SI 1 "register_operand" "%d") + (match_operand:SI 2 "nonmemory_operand" "d"))) + (set (match_operand:BI 3 "register_operand" "=C") + (leu:BI (match_dup 2) (match_dup 1)))] + "" + "%0 = %1 - %2; cc = ac0;" + [(set_attr "type" "alu0") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC 34))] + "" +{ + rtx xops[8]; + xops[0] = gen_lowpart (SImode, operands[0]); + xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4); + xops[2] = gen_lowpart (SImode, operands[1]); + xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4); + xops[4] = gen_lowpart (SImode, operands[2]); + xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4); + xops[6] = gen_reg_rtx (SImode); + xops[7] = gen_rtx_REG (BImode, REG_CC); + if (!register_operand (xops[4], SImode) + && (GET_CODE (xops[4]) != CONST_INT + || !satisfies_constraint_Ks7 (xops[4]))) + xops[4] = force_reg (SImode, xops[4]); + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + emit_clobber (operands[0]); + emit_insn (gen_add_with_carry (xops[0], xops[2], xops[4], xops[7])); + emit_insn (gen_movbisi (xops[6], xops[7])); + if (!register_operand (xops[5], SImode) + && (GET_CODE (xops[5]) != CONST_INT + || !satisfies_constraint_Ks7 (xops[5]))) + xops[5] = force_reg (SImode, xops[5]); + if (xops[5] != const0_rtx) + emit_insn (gen_addsi3 (xops[1], xops[3], xops[5])); + else + emit_move_insn (xops[1], xops[3]); + emit_insn (gen_addsi3 (xops[1], xops[1], xops[6])); + DONE; +}) + +(define_expand "subdi3" + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (clobber (reg:CC 34))] + "" +{ + rtx xops[8]; + xops[0] = gen_lowpart (SImode, operands[0]); + xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4); + xops[2] = gen_lowpart (SImode, operands[1]); + xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4); + xops[4] = gen_lowpart (SImode, operands[2]); + xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4); + xops[6] = gen_reg_rtx (SImode); + xops[7] = gen_rtx_REG (BImode, REG_CC); + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + emit_clobber (operands[0]); + emit_insn (gen_sub_with_carry (xops[0], xops[2], xops[4], xops[7])); + emit_insn (gen_notbi (xops[7], xops[7])); + emit_insn (gen_movbisi (xops[6], xops[7])); + emit_insn (gen_subsi3 (xops[1], xops[3], xops[5])); + emit_insn (gen_subsi3 (xops[1], xops[1], xops[6])); + DONE; +}) + +;; Combined shift/add instructions + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a,d") + (ashift:SI (plus:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "register_operand" "a,d")) + (match_operand:SI 3 "pos_scale_operand" "P1P2,P1P2")))] + "" + "%0 = (%0 + %2) << %3;" /* "shadd %0,%2,%3;" */ + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (match_operand:SI 1 "register_operand" "a") + (mult:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "scale_by_operand" "i"))))] + "" + "%0 = %1 + (%2 << %X3);" + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (match_operand:SI 1 "register_operand" "a") + (ashift:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "pos_scale_operand" "i"))))] + "" + "%0 = %1 + (%2 << %3);" + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "scale_by_operand" "i")) + (match_operand:SI 3 "register_operand" "a")))] + "" + "%0 = %3 + (%1 << %X2);" + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "pos_scale_operand" "i")) + (match_operand:SI 3 "register_operand" "a")))] + "" + "%0 = %3 + (%1 << %2);" + [(set_attr "type" "alu0")]) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%d")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "d"))))] + "" + "%0 = %h1 * %h2 (IS)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%d")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "d"))))] + "" + "%0 = %h1 * %h2 (FU)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi3" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "W")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "W"))))] + "" + "%0 = %h2 * %h1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +;; The processor also supports ireg += mreg or ireg -= mreg, but these +;; are unusable if we don't ensure that the corresponding lreg is zero. +;; The same applies to the add/subtract constant versions involving +;; iregs + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=ad,a,d") + (plus:SI (match_operand:SI 1 "register_operand" "%0, a,d") + (match_operand:SI 2 "reg_or_7bit_operand" "Ks7, a,d")))] + "" + "@ + %0 += %2; + %0 = %1 + %2; + %0 = %1 + %2;" + [(set_attr "type" "alu0") + (set_attr "length" "2,2,2")]) + +(define_insn "ssaddsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_plus:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = %1 + %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=da,d,a") + (minus:SI (match_operand:SI 1 "register_operand" "0,d,0") + (match_operand:SI 2 "reg_or_neg7bit_operand" "KN7,d,a")))] + "" +{ + static const char *const strings_subsi3[] = { + "%0 += -%2;", + "%0 = %1 - %2;", + "%0 -= %2;", + }; + + if (CONSTANT_P (operands[2]) && INTVAL (operands[2]) < 0) { + rtx tmp_op = operands[2]; + operands[2] = GEN_INT (-INTVAL (operands[2])); + output_asm_insn ("%0 += %2;", operands); + operands[2] = tmp_op; + return ""; + } + + return strings_subsi3[which_alternative]; +} + [(set_attr "type" "alu0")]) + +(define_insn "sssubsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_minus:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = %1 - %2 (S)%!" + [(set_attr "type" "dsp32")]) + +;; Accumulator addition + +(define_insn "addpdi3" + [(set (match_operand:PDI 0 "register_operand" "=A") + (ss_plus:PDI (match_operand:PDI 1 "register_operand" "%0") + (match_operand:PDI 2 "nonmemory_operand" "B")))] + "" + "A0 += A1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sum_of_accumulators" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_truncate:SI + (ss_plus:PDI (match_operand:PDI 2 "register_operand" "1") + (match_operand:PDI 3 "register_operand" "B")))) + (set (match_operand:PDI 1 "register_operand" "=A") + (ss_plus:PDI (match_dup 2) (match_dup 3)))] + "" + "%0 = (A0 += A1)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "us_truncpdisi2" + [(set (match_operand:SI 0 "register_operand" "=D,W") + (us_truncate:SI (match_operand:PDI 1 "register_operand" "A,B")))] + "" + "%0 = %1 (FU)%!" + [(set_attr "type" "dsp32")]) + +;; Bit test instructions + +(define_insn "*not_bittst" + [(set (match_operand:BI 0 "register_operand" "=C") + (eq:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d") + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5")) + (const_int 0)))] + "" + "cc = !BITTST (%1,%2);" + [(set_attr "type" "alu0")]) + +(define_insn "*bittst" + [(set (match_operand:BI 0 "register_operand" "=C") + (ne:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d") + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5")) + (const_int 0)))] + "" + "cc = BITTST (%1,%2);" + [(set_attr "type" "alu0")]) + +(define_insn_and_split "*bit_extract" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extract:SI (match_operand:SI 1 "register_operand" "d") + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5"))) + (clobber (reg:BI REG_CC))] + "" + "#" + "" + [(set (reg:BI REG_CC) + (ne:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (ne:SI (reg:BI REG_CC) (const_int 0)))]) + +(define_insn_and_split "*not_bit_extract" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extract:SI (not:SI (match_operand:SI 1 "register_operand" "d")) + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5"))) + (clobber (reg:BI REG_CC))] + "" + "#" + "" + [(set (reg:BI REG_CC) + (eq:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (ne:SI (reg:BI REG_CC) (const_int 0)))]) + +(define_insn "*andsi_insn" + [(set (match_operand:SI 0 "register_operand" "=d,d,d,d") + (and:SI (match_operand:SI 1 "register_operand" "%0,d,d,d") + (match_operand:SI 2 "rhs_andsi3_operand" "L,M1,M2,d")))] + "" + "@ + BITCLR (%0,%Y2); + %0 = %T1 (Z); + %0 = %h1 (Z); + %0 = %1 & %2;" + [(set_attr "type" "alu0")]) + +(define_expand "andsi3" + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (highbits_operand (operands[2], SImode)) + { + operands[2] = GEN_INT (exact_log2 (-INTVAL (operands[2]))); + emit_insn (gen_ashrsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2])); + DONE; + } + if (! rhs_andsi3_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (ior:SI (match_operand:SI 1 "register_operand" "%0,d") + (match_operand:SI 2 "regorlog2_operand" "J,d")))] + "" + "@ + BITSET (%0, %X2); + %0 = %1 | %2;" + [(set_attr "type" "alu0")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (xor:SI (match_operand:SI 1 "register_operand" "%0,d") + (match_operand:SI 2 "regorlog2_operand" "J,d")))] + "" + "@ + BITTGL (%0, %X2); + %0 = %1 ^ %2;" + [(set_attr "type" "alu0")]) + +(define_insn "ones" + [(set (match_operand:HI 0 "register_operand" "=d") + (unspec:HI [(match_operand:SI 1 "register_operand" "d")] + UNSPEC_ONES))] + "" + "%h0 = ONES %1;" + [(set_attr "type" "alu0")]) + +(define_insn "smaxsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (smax:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = max(%1,%2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (smin:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = min(%1,%2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (abs:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = abs %1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssabssi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_abs:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = abs %1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (neg:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = -%1;" + [(set_attr "type" "alu0")]) + +(define_insn "ssnegsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_neg:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = -%1 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (not:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = ~%1;" + [(set_attr "type" "alu0")]) + +(define_insn "signbitssi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (if_then_else:HI + (lt (match_operand:SI 1 "register_operand" "d") (const_int 0)) + (clz:HI (not:SI (match_dup 1))) + (clz:HI (match_dup 1))))] + "" + "%h0 = signbits %1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssroundsi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (truncate:HI + (lshiftrt:SI (ss_plus:SI (match_operand:SI 1 "register_operand" "d") + (const_int 32768)) + (const_int 16))))] + "" + "%h0 = %1 (RND)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "smaxhi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (smax:HI (match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d")))] + "" + "%0 = max(%1,%2) (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sminhi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (smin:HI (match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d")))] + "" + "%0 = min(%1,%2) (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "abshi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (abs:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = abs %1 (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "neghi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (neg:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = -%1;" + [(set_attr "type" "alu0")]) + +(define_insn "ssneghi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_neg:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = -%1 (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "signbitshi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (if_then_else:HI + (lt (match_operand:HI 1 "register_operand" "d") (const_int 0)) + (clz:HI (not:HI (match_dup 1))) + (clz:HI (match_dup 1))))] + "" + "%h0 = signbits %h1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 *= %2;" + [(set_attr "type" "mult")]) + +(define_expand "umulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (reg:PDI REG_A0)) + (clobber (reg:PDI REG_A1))])] + "" +{ + if (!optimize_size) + { + rtx a1reg = gen_rtx_REG (PDImode, REG_A1); + rtx a0reg = gen_rtx_REG (PDImode, REG_A0); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, + gen_lowpart (V2HImode, operands[1]), + gen_lowpart (V2HImode, operands[2]), + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_FU), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_flag_machi_parts_acconly (a1reg, + gen_lowpart (V2HImode, operands[2]), + gen_lowpart (V2HImode, operands[1]), + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_addpdi3 (a0reg, a0reg, a1reg)); + emit_insn (gen_us_truncpdisi2 (operands[0], a0reg)); + } + else + { + rtx umulsi3_highpart_libfunc + = init_one_libfunc ("__umulsi3_highpart"); + + emit_library_call_value (umulsi3_highpart_libfunc, + operands[0], LCT_NORMAL, SImode, + 2, operands[1], SImode, operands[2], SImode); + } + DONE; +}) + +(define_expand "smulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (reg:PDI REG_A0)) + (clobber (reg:PDI REG_A1))])] + "" +{ + if (!optimize_size) + { + rtx a1reg = gen_rtx_REG (PDImode, REG_A1); + rtx a0reg = gen_rtx_REG (PDImode, REG_A0); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, + gen_lowpart (V2HImode, operands[1]), + gen_lowpart (V2HImode, operands[2]), + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_IS), + GEN_INT (MACFLAG_IS_M))); + emit_insn (gen_flag_machi_parts_acconly (a1reg, + gen_lowpart (V2HImode, operands[2]), + gen_lowpart (V2HImode, operands[1]), + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg)); + } + else + { + rtx smulsi3_highpart_libfunc + = init_one_libfunc ("__smulsi3_highpart"); + + emit_library_call_value (smulsi3_highpart_libfunc, + operands[0], LCT_NORMAL, SImode, + 2, operands[1], SImode, operands[2], SImode); + } + DONE; +}) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" +{ + if (GET_CODE (operands[2]) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + DONE; + } +}) + +(define_insn_and_split "*ashlsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=d,d,a,a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "0,d,a,a,a") + (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1,P2,?P3P4")))] + "" + "@ + %0 <<= %2; + %0 = %1 << %2%! + %0 = %1 + %1; + %0 = %1 << %2; + #" + "PREG_P (operands[0]) && INTVAL (operands[2]) > 2" + [(set (match_dup 0) (ashift:SI (match_dup 1) (const_int 2))) + (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))] + "operands[3] = GEN_INT (INTVAL (operands[2]) - 2);" + [(set_attr "type" "shft,dsp32shiftimm,shft,shft,*")]) + +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,d") + (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5")))] + "" + "@ + %0 >>>= %2; + %0 = %1 >>> %2%!" + [(set_attr "type" "shft,dsp32shiftimm")]) + +(define_insn "rotl16" + [(set (match_operand:SI 0 "register_operand" "=d") + (rotate:SI (match_operand:SI 1 "register_operand" "d") + (const_int 16)))] + "" + "%0 = PACK (%h1, %d1)%!" + [(set_attr "type" "dsp32")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (rotate:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")))] + "" +{ + if (INTVAL (operands[2]) != 16) + FAIL; +}) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "") + (rotatert:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")))] + "" +{ + if (INTVAL (operands[2]) != 16) + FAIL; + emit_insn (gen_rotl16 (operands[0], operands[1])); + DONE; +}) + + +(define_insn "ror_one" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "d") (const_int 1)) + (ashift:SI (zero_extend:SI (reg:BI REG_CC)) (const_int 31)))) + (set (reg:BI REG_CC) + (zero_extract:BI (match_dup 1) (const_int 1) (const_int 0)))] + "" + "%0 = ROT %1 BY -1%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_insn "rol_one" + [(set (match_operand:SI 0 "register_operand" "+d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") (const_int 1)) + (zero_extend:SI (reg:BI REG_CC)))) + (set (reg:BI REG_CC) + (zero_extract:BI (match_dup 1) (const_int 31) (const_int 0)))] + "" + "%0 = ROT %1 BY 1%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "" +{ + rtx lo_half[2], hi_half[2]; + + if (operands[2] != const1_rtx) + FAIL; + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 2, lo_half, hi_half); + + emit_move_insn (bfin_cc_rtx, const0_rtx); + emit_insn (gen_ror_one (hi_half[0], hi_half[0])); + emit_insn (gen_ror_one (lo_half[0], lo_half[0])); + DONE; +}) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "" +{ + rtx lo_half[2], hi_half[2]; + + if (operands[2] != const1_rtx) + FAIL; + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 2, lo_half, hi_half); + + emit_insn (gen_compare_lt (gen_rtx_REG (BImode, REG_CC), + hi_half[1], const0_rtx)); + emit_insn (gen_ror_one (hi_half[0], hi_half[0])); + emit_insn (gen_ror_one (lo_half[0], lo_half[0])); + DONE; +}) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "" +{ + rtx lo_half[2], hi_half[2]; + + if (operands[2] != const1_rtx) + FAIL; + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 2, lo_half, hi_half); + + emit_move_insn (bfin_cc_rtx, const0_rtx); + emit_insn (gen_rol_one (lo_half[0], lo_half[0])); + emit_insn (gen_rol_one (hi_half[0], hi_half[0])); + DONE; +}) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,d,a") + (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1P2")))] + "" + "@ + %0 >>= %2; + %0 = %1 >> %2%! + %0 = %1 >> %2;" + [(set_attr "type" "shft,dsp32shiftimm,shft")]) + +(define_insn "lshrpdi3" + [(set (match_operand:PDI 0 "register_operand" "=e") + (lshiftrt:PDI (match_operand:PDI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "Ku5")))] + "" + "%0 = %1 >> %2%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_insn "ashrpdi3" + [(set (match_operand:PDI 0 "register_operand" "=e") + (ashiftrt:PDI (match_operand:PDI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "Ku5")))] + "" + "%0 = %1 >>> %2%!" + [(set_attr "type" "dsp32shiftimm")]) + +;; A pattern to reload the equivalent of +;; (set (Dreg) (plus (FP) (large_constant))) +;; or +;; (set (dagreg) (plus (FP) (arbitrary_constant))) +;; using a scratch register +(define_expand "reload_insi" + [(parallel [(set (match_operand:SI 0 "register_operand" "=w") + (match_operand:SI 1 "fp_plus_const_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "=&a"))])] + "" +{ + rtx fp_op = XEXP (operands[1], 0); + rtx const_op = XEXP (operands[1], 1); + rtx primary = operands[0]; + rtx scratch = operands[2]; + + emit_move_insn (scratch, const_op); + emit_insn (gen_addsi3 (scratch, scratch, fp_op)); + emit_move_insn (primary, scratch); + DONE; +}) + +(define_mode_iterator AREG [PDI V2PDI]) + +(define_insn "reload_in" + [(set (match_operand:AREG 0 "register_operand" "=e") + (match_operand:AREG 1 "memory_operand" "m")) + (clobber (match_operand:SI 2 "register_operand" "=d"))] + "" +{ + rtx xops[4]; + xops[0] = operands[0]; + xops[1] = operands[2]; + split_di (operands + 1, 1, xops + 2, xops + 3); + output_asm_insn ("%1 = %2;", xops); + output_asm_insn ("%w0 = %1;", xops); + output_asm_insn ("%1 = %3;", xops); + output_asm_insn ("%x0 = %1;", xops); + return ""; +} + [(set_attr "seq_insns" "multi") + (set_attr "type" "mcld") + (set_attr "length" "12")]) + +(define_insn "reload_out" + [(set (match_operand:AREG 0 "memory_operand" "=m") + (match_operand:AREG 1 "register_operand" "e")) + (clobber (match_operand:SI 2 "register_operand" "=d"))] + "" +{ + rtx xops[4]; + xops[0] = operands[1]; + xops[1] = operands[2]; + split_di (operands, 1, xops + 2, xops + 3); + output_asm_insn ("%1 = %w0;", xops); + output_asm_insn ("%2 = %1;", xops); + output_asm_insn ("%1 = %x0;", xops); + output_asm_insn ("%3 = %1;", xops); + return ""; +} + [(set_attr "seq_insns" "multi") + (set_attr "type" "mcld") + (set_attr "length" "12")]) + +;; Jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" +{ + if (get_attr_length (insn) == 2) + return "jump.s %0;"; + else + return "jump.l %0;"; +} + [(set_attr "type" "br")]) + +(define_insn "indirect_jump" + [(set (pc) + (match_operand:SI 0 "register_operand" "a"))] + "" + "jump (%0);" + [(set_attr "type" "misc")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "a")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + /* In PIC mode, the table entries are stored PC relative. + Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + + operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], + op1, NULL_RTX, 0, OPTAB_DIRECT); + } +}) + +(define_insn "*tablejump_internal" + [(set (pc) (match_operand:SI 0 "register_operand" "a")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jump (%0);" + [(set_attr "type" "misc")]) + +;; Hardware loop + +; operand 0 is the loop count pseudo register +; operand 1 is the number of loop iterations or 0 if it is unknown +; operand 2 is the maximum number of loop iterations +; operand 3 is the number of levels of enclosed loops +; operand 4 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else + (ne (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 4 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 5 ""))])] + "" +{ + /* The loop optimizer doesn't check the predicates... */ + if (GET_MODE (operands[0]) != SImode) + FAIL; + /* Due to limitations in the hardware (an initial loop count of 0 + does not loop 2^32 times) we must avoid to generate a hardware + loops when we cannot rule out this case. */ + if (!flag_unsafe_loop_optimizations + && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0xFFFFFFFF) + FAIL; + bfin_hardware_loop (); +}) + +(define_insn "loop_end" + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "+a*d,*b*v*f,m") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 2 "=X,&r,&r"))] + "" + "@ + /* loop end %0 %l1 */ + # + #" + [(set_attr "length" "6,10,14")]) + +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "nondp_reg_or_memory_operand" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 2 "=&r"))] + "splitting_loops" + [(set (match_dup 2) (match_dup 0)) + (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1))) + (set (match_dup 0) (match_dup 2)) + (set (reg:BI REG_CC) (eq:BI (match_dup 2) (const_int 0))) + (set (pc) + (if_then_else (eq (reg:BI REG_CC) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") + +(define_insn "lsetup_with_autoinit" + [(set (match_operand:SI 0 "lt_register_operand" "=t") + (label_ref (match_operand 1 "" ""))) + (set (match_operand:SI 2 "lb_register_operand" "=u") + (label_ref (match_operand 3 "" ""))) + (set (match_operand:SI 4 "lc_register_operand" "=k") + (match_operand:SI 5 "register_operand" "a"))] + "" + "LSETUP (%1, %3) %4 = %5;" + [(set_attr "length" "4")]) + +(define_insn "lsetup_without_autoinit" + [(set (match_operand:SI 0 "lt_register_operand" "=t") + (label_ref (match_operand 1 "" ""))) + (set (match_operand:SI 2 "lb_register_operand" "=u") + (label_ref (match_operand 3 "" ""))) + (use (match_operand:SI 4 "lc_register_operand" "k"))] + "" + "LSETUP (%1, %3) %4;" + [(set_attr "length" "4")]) + +;; Call instructions.. + +;; The explicit MEM inside the UNSPEC prevents the compiler from moving +;; the load before a branch after a NULL test, or before a store that +;; initializes a function descriptor. + +(define_insn_and_split "load_funcdescsi" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec_volatile:SI [(mem:SI (match_operand:SI 1 "address_operand" "p"))] + UNSPEC_VOLATILE_LOAD_FUNCDESC))] + "" + "#" + "reload_completed" + [(set (match_dup 0) (mem:SI (match_dup 1)))]) + +(define_expand "call" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))])] + "" +{ + bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 0); + DONE; +}) + +(define_expand "sibcall" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (return)])] + "" +{ + bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 1); + DONE; +}) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "register_operand" "") + (call (match_operand:SI 1 "" "") + (match_operand 2 "" ""))) + (use (match_operand 3 "" ""))])] + "" +{ + bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 0); + DONE; +}) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "register_operand" "") + (call (match_operand:SI 1 "" "") + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (return)])] + "" +{ + bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 1); + DONE; +}) + +(define_insn "*call_symbol_fdpic" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[3]))" + "call %0;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_symbol_fdpic" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[3]))" + "jump.l %0;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_value_symbol_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[4]))" + "call %1;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_value_symbol_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[4]))" + "jump.l %1;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_insn_fdpic" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%0);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_insn_fdpic" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%0);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +(define_insn "*call_value_insn_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%1);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_value_insn_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%1);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +(define_insn "*call_symbol" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[2]))" + "call %0;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_symbol" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[2]))" + "jump.l %0;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_value_symbol" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[3]))" + "call %1;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_value_symbol" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[3]))" + "jump.l %1;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_insn" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "a")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%0);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_insn" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "z")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%0);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +(define_insn "*call_value_insn" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "a")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%1);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "z")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%1);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +;; Block move patterns + +;; We cheat. This copies one more word than operand 2 indicates. + +(define_insn "rep_movsi" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0") + (ashift:SI (match_operand:SI 2 "register_operand" "a") + (const_int 2))) + (const_int 4))) + (set (match_operand:SI 1 "register_operand" "=&b") + (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1") + (ashift:SI (match_dup 2) (const_int 2))) + (const_int 4))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 2)) + (clobber (match_scratch:HI 5 "=&d")) + (clobber (reg:SI REG_LT1)) + (clobber (reg:SI REG_LC1)) + (clobber (reg:SI REG_LB1))] + "" + "%5 = [%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || [%3++] = %5 || %5 = [%4++]; [%3++] = %5;" + [(set_attr "type" "misc") + (set_attr "length" "16") + (set_attr "seq_insns" "multi")]) + +(define_insn "rep_movhi" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0") + (ashift:SI (match_operand:SI 2 "register_operand" "a") + (const_int 1))) + (const_int 2))) + (set (match_operand:SI 1 "register_operand" "=&b") + (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1") + (ashift:SI (match_dup 2) (const_int 1))) + (const_int 2))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 2)) + (clobber (match_scratch:HI 5 "=&d")) + (clobber (reg:SI REG_LT1)) + (clobber (reg:SI REG_LC1)) + (clobber (reg:SI REG_LB1))] + "" + "%h5 = W[%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || W [%3++] = %5 || %h5 = W [%4++]; W [%3++] = %5;" + [(set_attr "type" "misc") + (set_attr "length" "16") + (set_attr "seq_insns" "multi")]) + +(define_expand "movmemsi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" +{ + if (bfin_expand_movmem (operands[0], operands[1], operands[2], operands[3])) + DONE; + FAIL; +}) + +;; Conditional branch patterns +;; The Blackfin has only few condition codes: eq, lt, lte, ltu, leu + +(define_insn "compare_eq" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (eq:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "" + "cc =%1==%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_ne" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (ne:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "0" + "cc =%1!=%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_lt" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (lt:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "" + "cc =%1<%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_le" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (le:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "" + "cc =%1<=%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_leu" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (leu:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))] + "" + "cc =%1<=%2 (iu);" + [(set_attr "type" "compare")]) + +(define_insn "compare_ltu" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (ltu:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))] + "" + "cc =%1<%2 (iu);" + [(set_attr "type" "compare")]) + +;; Same as above, but and CC with the overflow bit generated by the first +;; multiplication. +(define_insn "flag_mul_macv2hi_parts_acconly_andcc0" + [(set (match_operand:PDI 0 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d,d,d") + (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand 10 "const_int_operand" "PB,PA,PA")] + UNSPEC_MUL_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_dup 2) + (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_dup 3) + (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand:PDI 8 "register_operand" "1,1,1") + (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 11 "const_int_operand" "PA,PB,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (reg:BI REG_CC) + (and:BI (reg:BI REG_CC) + (unspec:BI [(vec_select:HI (match_dup 2) (parallel [(match_dup 4)])) + (vec_select:HI (match_dup 3) (parallel [(match_dup 6)])) + (match_dup 10)] + UNSPEC_MUL_WITH_FLAG)))] + "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))" +{ + rtx xops[6]; + const char *templates[] = { + "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;" }; + int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1) + + (INTVAL (operands[6]) << 2) + (INTVAL (operands[7]) << 3)); + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = operands[3]; + xops[4] = operands[9]; + xops[5] = which_alternative == 0 ? operands[10] : operands[11]; + output_asm_insn (templates[alt], xops); + return ""; +} + [(set_attr "type" "misc") + (set_attr "length" "6") + (set_attr "seq_insns" "multi")]) + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "reg_or_const_int_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx bi_compare = bfin_gen_compare (operands[0], SImode); + emit_jump_insn (gen_cbranchbi4 (bi_compare, bfin_cc_rtx, CONST0_RTX (BImode), + operands[3])); + DONE; +}) + +(define_insn "cbranchbi4" + [(set (pc) + (if_then_else + (match_operator 0 "bfin_bimode_comparison_operator" + [(match_operand:BI 1 "register_operand" "C") + (match_operand:BI 2 "immediate_operand" "P0")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + asm_conditional_branch (insn, operands, 0, 0); + return ""; +} + [(set_attr "type" "brcc")]) + +;; Special cbranch patterns to deal with the speculative load problem - see +;; bfin_reorg for details. + +(define_insn "cbranch_predicted_taken" + [(set (pc) + (if_then_else + (match_operator 0 "bfin_bimode_comparison_operator" + [(match_operand:BI 1 "register_operand" "C") + (match_operand:BI 2 "immediate_operand" "P0")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (unspec [(const_int 0)] UNSPEC_CBRANCH_TAKEN)] + "" +{ + asm_conditional_branch (insn, operands, 0, 1); + return ""; +} + [(set_attr "type" "brcc")]) + +(define_insn "cbranch_with_nops" + [(set (pc) + (if_then_else + (match_operator 0 "bfin_bimode_comparison_operator" + [(match_operand:BI 1 "register_operand" "C") + (match_operand:BI 2 "immediate_operand" "P0")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (unspec [(match_operand 4 "immediate_operand" "")] UNSPEC_CBRANCH_NOPS)] + "reload_completed" +{ + asm_conditional_branch (insn, operands, INTVAL (operands[4]), 0); + return ""; +} + [(set_attr "type" "brcc") + (set_attr "length" "8")]) + +;; setcc insns. + +(define_expand "cstorebi4" + [(set (match_dup 4) + (match_operator:BI 1 "bfin_bimode_comparison_operator" + [(match_operand:BI 2 "register_operand" "") + (match_operand:BI 3 "reg_or_const_int_operand" "")])) + (set (match_operand:SI 0 "register_operand" "") + (ne:SI (match_dup 4) (const_int 0)))] + "" +{ + /* It could be expanded as a movbisi instruction, but the portable + alternative produces better code. */ + if (GET_CODE (operands[1]) == NE) + FAIL; + + operands[4] = bfin_cc_rtx; +}) + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "reg_or_const_int_operand" "")]))] + "" +{ + rtx bi_compare, test; + + if (!bfin_direct_comparison_operator (operands[1], SImode)) + { + if (!register_operand (operands[3], SImode) + || GET_CODE (operands[1]) == NE) + FAIL; + test = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), + SImode, operands[3], operands[2]); + } + else + test = operands[1]; + + bi_compare = bfin_gen_compare (test, SImode); + gcc_assert (GET_CODE (bi_compare) == NE); + emit_insn (gen_movbisi (operands[0], bfin_cc_rtx)); + DONE; +}) + +(define_insn "nop" + [(const_int 0)] + "" + "nop;") + +;; A nop which stays there when emitted. +(define_insn "forced_nop" + [(unspec [(const_int 0)] UNSPEC_NOP)] + "" + "nop;") + +(define_insn "mnop" + [(unspec [(const_int 0)] UNSPEC_32BIT)] + "" + "mnop%!" + [(set_attr "type" "dsp32")]) + +;;;;;;;;;;;;;;;;;;;; CC2dreg ;;;;;;;;;;;;;;;;;;;;;;;;; +(define_insn "movsibi" + [(set (match_operand:BI 0 "register_operand" "=C") + (ne:BI (match_operand:SI 1 "register_operand" "d") + (const_int 0)))] + "" + "CC = %1;" + [(set_attr "length" "2")]) + +(define_insn_and_split "movbisi" + [(set (match_operand:SI 0 "register_operand" "=d") + (ne:SI (match_operand:BI 1 "register_operand" "C") + (const_int 0)))] + "" + "#" + "" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:BI 1 "register_operand" "")))] + "") + +(define_insn "notbi" + [(set (match_operand:BI 0 "register_operand" "=C") + (eq:BI (match_operand:BI 1 "register_operand" " 0") + (const_int 0)))] + "" + "%0 = ! %0;" /* NOT CC;" */ + [(set_attr "type" "compare")]) + +;; Vector and DSP insns + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") + (const_int 24)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "d") + (const_int 8))))] + "" + "%0 = ALIGN8(%1, %2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") + (const_int 16)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "d") + (const_int 16))))] + "" + "%0 = ALIGN16(%1, %2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") + (const_int 8)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "d") + (const_int 24))))] + "" + "%0 = ALIGN24(%1, %2)%!" + [(set_attr "type" "dsp32")]) + +;; Prologue and epilogue. + +(define_expand "prologue" + [(const_int 1)] + "" + "bfin_expand_prologue (); DONE;") + +(define_expand "epilogue" + [(const_int 1)] + "" + "bfin_expand_epilogue (1, 0, 0); DONE;") + +(define_expand "sibcall_epilogue" + [(const_int 1)] + "" + "bfin_expand_epilogue (0, 0, 1); DONE;") + +(define_expand "eh_return" + [(use (match_operand:SI 0 "register_operand" ""))] + "" +{ + emit_insn (gen_eh_store_handler (EH_RETURN_HANDLER_RTX, operands[0])); + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn "eh_store_handler" + [(unspec_volatile [(match_operand:SI 1 "register_operand" "da")] + UNSPEC_VOLATILE_STORE_EH_HANDLER) + (clobber (match_operand:SI 0 "memory_operand" "=m"))] + "" + "%0 = %1%!" + [(set_attr "type" "mcst")]) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 1)] + "bfin_expand_epilogue (1, 1, 0); DONE;") + +(define_insn "link" + [(set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -4))) (reg:SI REG_RETS)) + (set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -8))) (reg:SI REG_FP)) + (set (reg:SI REG_FP) + (plus:SI (reg:SI REG_SP) (const_int -8))) + (set (reg:SI REG_SP) + (plus:SI (reg:SI REG_SP) (match_operand:SI 0 "immediate_operand" "i")))] + "" + "LINK %Z0;" + [(set_attr "length" "4")]) + +(define_insn "unlink" + [(set (reg:SI REG_FP) (mem:SI (reg:SI REG_FP))) + (set (reg:SI REG_RETS) (mem:SI (plus:SI (reg:SI REG_FP) (const_int 4)))) + (set (reg:SI REG_SP) (plus:SI (reg:SI REG_FP) (const_int 8)))] + "" + "UNLINK;" + [(set_attr "length" "4")]) + +;; This pattern is slightly clumsy. The stack adjust must be the final SET in +;; the pattern, otherwise dwarf2out becomes very confused about which reg goes +;; where on the stack, since it goes through all elements of the parallel in +;; sequence. +(define_insn "push_multiple" + [(match_parallel 0 "push_multiple_operation" + [(unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_PUSH_MULTIPLE)])] + "" +{ + output_push_multiple (insn, operands); + return ""; +}) + +(define_insn "pop_multiple" + [(match_parallel 0 "pop_multiple_operation" + [(set (reg:SI REG_SP) + (plus:SI (reg:SI REG_SP) (match_operand:SI 1 "immediate_operand" "i")))])] + "" +{ + output_pop_multiple (insn, operands); + return ""; +}) + +(define_insn "return_internal" + [(return) + (use (match_operand 0 "register_operand" ""))] + "reload_completed" +{ + switch (REGNO (operands[0])) + { + case REG_RETX: + return "rtx;"; + case REG_RETN: + return "rtn;"; + case REG_RETI: + return "rti;"; + case REG_RETS: + return "rts;"; + } + gcc_unreachable (); +}) + +;; When used at a location where CC contains 1, causes a speculative load +;; that is later cancelled. This is used for certain workarounds in +;; interrupt handler prologues. +(define_insn "dummy_load" + [(unspec_volatile [(match_operand 0 "register_operand" "a") + (match_operand 1 "register_operand" "C")] + UNSPEC_VOLATILE_DUMMY)] + "" + "if cc jump 4;\n\tr7 = [%0];" + [(set_attr "type" "misc") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +;; A placeholder insn inserted before the final scheduling pass. It is used +;; to improve scheduling of loads when workarounds for speculative loads are +;; needed, by not placing them in the first few cycles after a conditional +;; branch. +(define_insn "stall" + [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")] + UNSPEC_VOLATILE_STALL)] + "" + "" + [(set_attr "type" "stall")]) + +(define_insn "csync" + [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)] + "" + "csync;" + [(set_attr "type" "sync")]) + +(define_insn "ssync" + [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_SSYNC)] + "" + "ssync;" + [(set_attr "type" "sync")]) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 3))] + "" + "excpt 3;" + [(set_attr "type" "misc") + (set_attr "length" "2")]) + +(define_insn "trapifcc" + [(trap_if (reg:BI REG_CC) (const_int 3))] + "" + "if !cc jump 4 (bp); excpt 3;" + [(set_attr "type" "misc") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +;;; Vector instructions + +;; First, all sorts of move variants + +(define_insn "movhiv2hi_low" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (match_operand:HI 2 "register_operand" "d") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h2 << 0%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_insn "movhiv2hi_high" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (match_operand:HI 2 "register_operand" "d")))] + "" + "%d0 = %h2 << 0%!" + [(set_attr "type" "dsp32shiftimm")]) + +;; No earlyclobber on alternative two since our sequence ought to be safe. +;; The order of operands is intentional to match the VDSP builtin (high word +;; is passed first). +(define_insn_and_split "composev2hi" + [(set (match_operand:V2HI 0 "register_operand" "=d,d") + (vec_concat:V2HI (match_operand:HI 2 "register_operand" "0,d") + (match_operand:HI 1 "register_operand" "d,d")))] + "" + "@ + %d0 = %h1 << 0%! + #" + "reload_completed" + [(set (match_dup 0) + (vec_concat:V2HI + (vec_select:HI (match_dup 0) (parallel [(const_int 0)])) + (match_dup 1))) + (set (match_dup 0) + (vec_concat:V2HI + (match_dup 2) + (vec_select:HI (match_dup 0) (parallel [(const_int 1)]))))] + "" + [(set_attr "type" "dsp32shiftimm")]) + +; Like composev2hi, but operating on elements of V2HI vectors. +; Useful on its own, and as a combiner bridge for the multiply and +; mac patterns. +(define_insn "packv2hi" + [(set (match_operand:V2HI 0 "register_operand" "=d,d,d,d,d,d,d,d") + (vec_concat:V2HI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "0,0,d,d,d,d,d,d") + (parallel [(match_operand 3 "const01_operand" "P0,P0,P0,P1,P0,P1,P0,P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,0,0,d,d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0,P1,P1,P1,P0,P0,P1,P1")]))))] + "" + "@ + %d0 = %h2 << 0%! + %d0 = %d2 << 0%! + %h0 = %h1 << 0%! + %h0 = %d1 << 0%! + %0 = PACK (%h2,%h1)%! + %0 = PACK (%h2,%d1)%! + %0 = PACK (%d2,%h1)%! + %0 = PACK (%d2,%d1)%!" + [(set_attr "type" "dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32,dsp32,dsp32,dsp32")]) + +(define_insn "movv2hi_hi" + [(set (match_operand:HI 0 "register_operand" "=d,d,d") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,d,d") + (parallel [(match_operand 2 "const01_operand" "P0,P0,P1")])))] + "" + "@ + /* optimized out */ + %h0 = %h1 << 0%! + %h0 = %d1 << 0%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_expand "movv2hi_hi_low" + [(set (match_operand:HI 0 "register_operand" "") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "") + (parallel [(const_int 0)])))] + "" + "") + +(define_expand "movv2hi_hi_high" + [(set (match_operand:HI 0 "register_operand" "") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "") + (parallel [(const_int 1)])))] + "" + "") + +;; Unusual arithmetic operations on 16-bit registers. + +(define_code_iterator sp_or_sm [ss_plus ss_minus]) +(define_code_attr spm_string [(ss_plus "+") (ss_minus "-")]) +(define_code_attr spm_name [(ss_plus "add") (ss_minus "sub")]) + +(define_insn "sshi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (sp_or_sm:HI (match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d")))] + "" + "%h0 = %h1 %h2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sshi3_parts" + [(set (match_operand:HI 0 "register_operand" "=d") + (sp_or_sm:HI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")]))))] + "" +{ + const char *templates[] = { + "%h0 = %h1 %h2 (S)%!", + "%h0 = %d1 %h2 (S)%!", + "%h0 = %h1 %d2 (S)%!", + "%h0 = %d1 %d2 (S)%!" }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "sshi3_low_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (sp_or_sm:HI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])))))] + "" +{ + const char *templates[] = { + "%h0 = %h2 %h3 (S)%!", + "%h0 = %d2 %h3 (S)%!", + "%h0 = %h2 %d3 (S)%!", + "%h0 = %d2 %d3 (S)%!" }; + int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "sshi3_high_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (sp_or_sm:HI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")]))) + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 1)]))))] + "" +{ + const char *templates[] = { + "%d0 = %h2 %h3 (S)%!", + "%d0 = %d2 %h3 (S)%!", + "%d0 = %h2 %d3 (S)%!", + "%d0 = %d2 %d3 (S)%!" }; + int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; V2HI vector insns + +(define_insn "addv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (plus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 +|+ %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_plus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 +|+ %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (minus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 -|- %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_minus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 -|- %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "addsubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 +|- %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subaddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 -|+ %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddsubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (ss_minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 +|- %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssubaddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (ss_plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 -|+ %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sublohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 - %h2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 - %d2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssublohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 - %h2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssubhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 - %d2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "addlohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 + %h2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "addhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 + %d2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddlohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 + %h2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 + %d2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sminv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (smin:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = MIN (%1, %2) (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "smaxv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (smax:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = MAX (%1, %2) (V)%!" + [(set_attr "type" "dsp32")]) + +;; Multiplications. + +;; The Blackfin allows a lot of different options, and we need many patterns to +;; cover most of the hardware's abilities. +;; There are a few simple patterns using MULT rtx codes, but most of them use +;; an unspec with a const_int operand that determines which flag to use in the +;; instruction. +;; There are variants for single and parallel multiplications. +;; There are variants which just use 16-bit lowparts as inputs, and variants +;; which allow the user to choose just which halves to use as input values. +;; There are variants which set D registers, variants which set accumulators, +;; variants which set both, some of them optionally using the accumulators as +;; inputs for multiply-accumulate operations. + +(define_insn "flag_mulhi" + [(set (match_operand:HI 0 "register_operand" "=d") + (unspec:HI [(match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" + "%h0 = %h1 * %h2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulhi_parts" + [(set (match_operand:HI 0 "register_operand" "=d") + (unspec:HI [(vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (match_operand 5 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = %h1 * %h2 %M5%!", + "%h0 = %d1 * %h2 %M5%!", + "%h0 = %h1 * %d2 %M5%!", + "%h0 = %d1 * %d2 %M5%!" }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulhisi" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" + "%0 = %h1 * %h2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulhisi_parts" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (match_operand 5 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%0 = %h1 * %h2 %M5%!", + "%0 = %d1 * %h2 %M5%!", + "%0 = %h1 * %d2 %M5%!", + "%0 = %d1 * %d2 %M5%!" }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; Three alternatives here to cover all possible allocations: +;; 0. mac flag is usable only for accumulator 1 - use A1 and odd DREG +;; 1. mac flag is usable for accumulator 0 - use A0 and even DREG +;; 2. mac flag is usable in any accumulator - use A1 and odd DREG +;; Other patterns which don't have a DREG destination can collapse cases +;; 1 and 2 into one. +(define_insn "flag_machi" + [(set (match_operand:HI 0 "register_operand" "=W,D,W") + (unspec:HI [(match_operand:HI 2 "register_operand" "d,d,d") + (match_operand:HI 3 "register_operand" "d,d,d") + (match_operand 4 "register_operand" "1,1,1") + (match_operand 5 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 6 "const_int_operand" "PB,PA,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,A,B") + (unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3) + (match_dup 4) (match_dup 5)] + UNSPEC_MAC_WITH_FLAG))] + "" + "%h0 = (%1 %b5 %h2 * %h3) %M6%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_machi_acconly" + [(set (match_operand:PDI 0 "register_operand" "=B,e") + (unspec:PDI [(match_operand:HI 1 "register_operand" "d,d") + (match_operand:HI 2 "register_operand" "d,d") + (match_operand 3 "register_operand" "0,0") + (match_operand 4 "const01_operand" "P0P1,P0P1") + (match_operand 5 "const_int_operand" "PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "" + "%0 %b4 %h1 * %h2 %M5%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_machi_parts_acconly" + [(set (match_operand:PDI 0 "register_operand" "=B,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 1 "register_operand" "d,d") + (parallel [(match_operand 3 "const01_operand" "P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1")])) + (match_operand:PDI 5 "register_operand" "0,0") + (match_operand 6 "const01_operand" "P0P1,P0P1") + (match_operand 7 "const_int_operand" "PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%0 %b6 %h1 * %h2 %M7%!", + "%0 %b6 %d1 * %h2 %M7%!", + "%0 %b6 %h1 * %d2 %M7%!", + "%0 %b6 %d1 * %d2 %M7%!" + }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macinithi" + [(set (match_operand:HI 0 "register_operand" "=W,D,W") + (unspec:HI [(match_operand:HI 1 "register_operand" "d,d,d") + (match_operand:HI 2 "register_operand" "d,d,d") + (match_operand 3 "const_int_operand" "PB,PA,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:PDI 4 "register_operand" "=B,A,B") + (unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3)] + UNSPEC_MAC_WITH_FLAG))] + "" + "%h0 = (%4 = %h1 * %h2) %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macinit1hi" + [(set (match_operand:PDI 0 "register_operand" "=B,e") + (unspec:PDI [(match_operand:HI 1 "register_operand" "d,d") + (match_operand:HI 2 "register_operand" "d,d") + (match_operand 3 "const_int_operand" "PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "" + "%0 = %h1 * %h2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "mulv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (mult:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%h0 = %h1 * %h2, %d0 = %d1 * %d2 (IS)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulv2hi" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" + "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulv2hi_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand 7 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = %h1 * %h2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %h1 * %h2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %h1 * %h2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %d1 * %d2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %d1 * %d2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %d1 * %d2 %M7%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; A slightly complicated pattern. +;; Operand 0 is the halfword output; operand 11 is the accumulator output +;; Halfword inputs are operands 1 and 2; operands 3, 4, 5 and 6 specify which +;; parts of these 2x16 bit registers to use. +;; Operand 7 is the accumulator input. +;; Operands 8/9 specify whether low/high parts are mac (0) or msu (1) +;; Operand 10 is the macflag to be used. +(define_insn "flag_macv2hi_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand:V2PDI 7 "register_operand" "e") + (match_operand 8 "const01_operand" "P0P1") + (match_operand 9 "const01_operand" "P0P1") + (match_operand 10 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:V2PDI 11 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI (match_dup 1) (parallel [(match_dup 3)])) + (vec_select:HI (match_dup 1) (parallel [(match_dup 4)]))) + (vec_concat:V2HI + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)])) + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))) + (match_dup 7) (match_dup 8) (match_dup 9) (match_dup 10)] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macv2hi_parts_acconly" + [(set (match_operand:V2PDI 0 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand:V2PDI 7 "register_operand" "e") + (match_operand 8 "const01_operand" "P0P1") + (match_operand 9 "const01_operand" "P0P1") + (match_operand 10 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %d2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %d2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %d2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %d2 %M10%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; Same as above, but initializing the accumulators and therefore a couple fewer +;; necessary operands. +(define_insn "flag_macinitv2hi_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand 7 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:V2PDI 8 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI (match_dup 1) (parallel [(match_dup 3)])) + (vec_select:HI (match_dup 1) (parallel [(match_dup 4)]))) + (vec_concat:V2HI + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)])) + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))) + (match_dup 7)] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macinit1v2hi_parts" + [(set (match_operand:V2PDI 0 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand 7 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "A0 = %h1 * %h2, A1 = %h1 * %h2 %M7%!", + "A0 = %d1 * %h2, A1 = %h1 * %h2 %M7%!", + "A0 = %h1 * %h2, A1 = %d1 * %h2 %M7%!", + "A0 = %d1 * %h2, A1 = %d1 * %h2 %M7%!", + "A0 = %h1 * %d2, A1 = %h1 * %h2 %M7%!", + "A0 = %d1 * %d2, A1 = %h1 * %h2 %M7%!", + "A0 = %h1 * %d2, A1 = %d1 * %h2 %M7%!", + "A0 = %d1 * %d2, A1 = %d1 * %h2 %M7%!", + "A0 = %h1 * %h2, A1 = %h1 * %d2 %M7%!", + "A0 = %d1 * %h2, A1 = %h1 * %d2 %M7%!", + "A0 = %h1 * %h2, A1 = %d1 * %d2 %M7%!", + "A0 = %d1 * %h2, A1 = %d1 * %d2 %M7%!", + "A0 = %h1 * %d2, A1 = %h1 * %d2 %M7%!", + "A0 = %d1 * %d2, A1 = %h1 * %d2 %M7%!", + "A0 = %h1 * %d2, A1 = %d1 * %d2 %M7%!", + "A0 = %d1 * %d2, A1 = %d1 * %d2 %M7%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; A mixture of multiply and multiply-accumulate for when we only want to +;; initialize one part. +(define_insn "flag_mul_macv2hi_parts_acconly" + [(set (match_operand:PDI 0 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d,d,d") + (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand 10 "const_int_operand" "PB,PA,PA")] + UNSPEC_MUL_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_dup 2) + (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_dup 3) + (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand:PDI 8 "register_operand" "1,1,1") + (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 11 "const_int_operand" "PA,PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))" +{ + rtx xops[6]; + const char *templates[] = { + "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5%!" }; + int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1) + + (INTVAL (operands[6]) << 2) + (INTVAL (operands[7]) << 3)); + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = operands[3]; + xops[4] = operands[9]; + xops[5] = which_alternative == 0 ? operands[10] : operands[11]; + output_asm_insn (templates[alt], xops); + return ""; +} + [(set_attr "type" "dsp32")]) + + +(define_code_iterator s_or_u [sign_extend zero_extend]) +(define_code_attr su_optab [(sign_extend "mul") + (zero_extend "umul")]) +(define_code_attr su_modifier [(sign_extend "IS") + (zero_extend "FU")]) + +(define_insn "hisi_ll" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_lh" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_hl" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_hh" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +;; Additional variants for signed * unsigned multiply. + +(define_insn "usmulhisi_ull" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %h2 * %h1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ulh" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %d2 * %h1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_uhl" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %h2 * %d1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_uhh" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %d2 * %d1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +;; Parallel versions of these operations. First, normal signed or unsigned +;; multiplies. + +(define_insn "hisi_ll_lh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %h1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_ll_hl" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_ll_hh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_lh_hl" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_lh_hh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_hl_hh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %h2, %3 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +;; Special signed * unsigned variants. + +(define_insn "usmulhisi_ll_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ll_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ll_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ll_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %d2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %h2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %h2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %h2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %h2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %d2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %d2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %d2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %d2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +;; Vector neg/abs. + +(define_insn "ssnegv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_neg:V2HI (match_operand:V2HI 1 "register_operand" "d")))] + "" + "%0 = - %1 (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssabsv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "d")))] + "" + "%0 = ABS %1 (V)%!" + [(set_attr "type" "dsp32")]) + +;; Shifts. + +(define_insn "ssashiftv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d,d,d") + (if_then_else:V2HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:V2HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (V, S)%! + %0 = %1 << %2 (V,S)%! + %0 = %1 >>> %N2 (V,S)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "ssashifthi3" + [(set (match_operand:HI 0 "register_operand" "=d,d,d") + (if_then_else:HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (ashiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (V, S)%! + %0 = %1 << %2 (V,S)%! + %0 = %1 >>> %N2 (V,S)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "ssashiftsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d,d") + (if_then_else:SI + (lt (match_operand:HI 2 "reg_or_const_int_operand" "d,Ku5,Ks5") (const_int 0)) + (ashiftrt:SI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:SI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (S)%! + %0 = %1 << %2 (S)%! + %0 = %1 >>> %N2 (S)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "lshiftv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d,d,d") + (if_then_else:V2HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ashift:V2HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = LSHIFT %1 BY %h2 (V)%! + %0 = %1 << %2 (V)%! + %0 = %1 >> %N2 (V)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "lshifthi3" + [(set (match_operand:HI 0 "register_operand" "=d,d,d") + (if_then_else:HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lshiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ashift:HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = LSHIFT %1 BY %h2 (V)%! + %0 = %1 << %2 (V)%! + %0 = %1 >> %N2 (V)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +;; Load without alignment exception (masking off low bits) + +(define_insn "loadbytes" + [(set (match_operand:SI 0 "register_operand" "=d") + (mem:SI (and:SI (match_operand:SI 1 "register_operand" "b") + (const_int -4))))] + "" + "DISALGNEXCPT || %0 = [%1];" + [(set_attr "type" "mcld") + (set_attr "length" "8")]) + +(include "sync.md") diff --git a/gcc/config/bfin/bfin.opt b/gcc/config/bfin/bfin.opt new file mode 100644 index 000000000..c7a905602 --- /dev/null +++ b/gcc/config/bfin/bfin.opt @@ -0,0 +1,101 @@ +; Options for the Blackfin port of the compiler +; +; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +msim +Target RejectNegative +Use simulator runtime + +mcpu= +Target RejectNegative Joined +Specify the name of the target CPU + +momit-leaf-frame-pointer +Target Report Mask(OMIT_LEAF_FRAME_POINTER) +Omit frame pointer for leaf functions + +mlow64k +Target Report Mask(LOW_64K) +Program is entirely located in low 64k of memory + +mcsync-anomaly +Target Report Var(bfin_csync_anomaly) Init(-1) +Work around a hardware anomaly by adding a number of NOPs before a +CSYNC or SSYNC instruction. + +mspecld-anomaly +Target Report Var(bfin_specld_anomaly) Init(-1) +Avoid speculative loads to work around a hardware anomaly. + +mid-shared-library +Target Report Mask(ID_SHARED_LIBRARY) +Enabled ID based shared library + +mleaf-id-shared-library +Target Report Mask(LEAF_ID_SHARED_LIBRARY) +Generate code that won't be linked against any other ID shared libraries, +but may be used as a shared library. + +mshared-library-id= +Target RejectNegative Joined UInteger Var(bfin_library_id) +ID of shared library to build + +msep-data +Target Report Mask(SEP_DATA) +Enable separate data segment + +mlong-calls +Target Report Mask(LONG_CALLS) +Avoid generating pc-relative calls; use indirection + +mfast-fp +Target Report Mask(FAST_FP) +Link with the fast floating-point library + +mfdpic +Target Report Mask(FDPIC) +Enable Function Descriptor PIC mode + +minline-plt +Target Report Mask(INLINE_PLT) +Enable inlining of PLT in function calls + +mstack-check-l1 +Target Report Mask(STACK_CHECK_L1) +Do stack checking using bounds in L1 scratch memory + +mmulticore +Target Report Mask(MULTICORE) +Enable multicore support + +mcorea +Target Report Mask(COREA) +Build for Core A + +mcoreb +Target Report Mask(COREB) +Build for Core B + +msdram +Target Report Mask(SDRAM) +Build for SDRAM + +micplb +Target Report Mask(ICPLB) +Assume ICPLBs are enabled at runtime. diff --git a/gcc/config/bfin/constraints.md b/gcc/config/bfin/constraints.md new file mode 100644 index 000000000..fa9dcf143 --- /dev/null +++ b/gcc/config/bfin/constraints.md @@ -0,0 +1,225 @@ +;; Constraint definitions for Blackfin +;; Copyright (C) 2008 Free Software Foundation, Inc. +;; Contributed by Analog Devices + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_register_constraint "a" "PREGS" + "A Pn register.") + +(define_register_constraint "d" "DREGS" + "A Rn register.") + +(define_register_constraint "z" "PREGS_CLOBBERED" + "A call clobbered Pn register.") + +(define_register_constraint "D" "EVEN_DREGS" + "An even-numbered Rn register.") + +(define_register_constraint "W" "ODD_DREGS" + "An odd-numbered Rn register.") + +(define_register_constraint "e" "AREGS" + "An accumulator register.") + +(define_register_constraint "A" "EVEN_AREGS" + "An even-numbered accumulator; A0.") + +(define_register_constraint "B" "ODD_AREGS" + "An odd-numbered accumulator; A1.") + +(define_register_constraint "b" "IREGS" + "An I register.") + +(define_register_constraint "v" "BREGS" + "A B register.") + +(define_register_constraint "f" "MREGS" + "An M register.") + +(define_register_constraint "c" "CIRCREGS" + "A register used for circular buffering, i.e. I, B, or L registers.") + +(define_register_constraint "C" "CCREGS" + "The CC register.") + +(define_register_constraint "t" "LT_REGS" + "LT0 or LT1.") + +(define_register_constraint "u" "LB_REGS" + "LB0 or LB1.") + +(define_register_constraint "k" "LC_REGS" + "LC0 or LC1.") + +(define_register_constraint "x" "MOST_REGS" + "Any R, P, B, M, I or L register.") + +(define_register_constraint "y" "PROLOGUE_REGS" + "Additional registers typically used only in prologues and epilogues: + RETS, RETN, RETI, RETX, RETE, ASTAT, SEQSTAT and USP.") + +(define_register_constraint "w" "NON_A_CC_REGS" + "Any register except accumulators or CC.") + +(define_register_constraint "Z" "FDPIC_REGS" + "@internal The FD-PIC GOT pointer; P3.") + +(define_register_constraint "Y" "FDPIC_FPTR_REGS" + "@internal The FD-PIC function pointer register; P1.") + +(define_register_constraint "q0" "D0REGS" + "The register R0.") + +(define_register_constraint "q1" "D1REGS" + "The register R1.") + +(define_register_constraint "q2" "D2REGS" + "The register R2.") + +(define_register_constraint "q3" "D3REGS" + "The register R3.") + +(define_register_constraint "q4" "D4REGS" + "The register R4.") + +(define_register_constraint "q5" "D5REGS" + "The register R5.") + +(define_register_constraint "q6" "D6REGS" + "The register R6.") + +(define_register_constraint "q7" "D7REGS" + "The register R7.") + +(define_register_constraint "qA" "P0REGS" + "The register P0.") + +;; Constant constraints. + +(define_constraint "J" + "A constant value of the form 2**N, where N 5-bit wide." + (and (match_code "const_int") + (match_test "log2constp (ival)"))) + +(define_constraint "Ks3" + "A signed 3 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -4 && ival <= 3"))) + +(define_constraint "Ku3" + "An unsigned 3 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 7"))) + +(define_constraint "Ks4" + "A signed 4 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -8 && ival <= 7"))) + +(define_constraint "Ku4" + "An unsigned 4 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 15"))) + +(define_constraint "Ks5" + "A signed 5 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -16 && ival <= 15"))) + +(define_constraint "Ku5" + "An unsigned 5 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 31"))) + +(define_constraint "Ks7" + "A signed 7 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -64 && ival <= 63"))) + +(define_constraint "KN7" + "A constant that when negated is a signed 7 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -63 && ival <= 64"))) + +(define_constraint "Ksh" + "A signed 16 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -32768 && ival <= 32767"))) + +(define_constraint "Kuh" + "An unsigned 16 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 65535"))) + +(define_constraint "L" + "A constant value of the form ~(2**N)." + (and (match_code "const_int") + (match_test "log2constp (~ival)"))) + +(define_constraint "M1" + "An integer with the value 255." + (and (match_code "const_int") + (match_test "ival == 255"))) + +(define_constraint "M2" + "An integer with the value 65535." + (and (match_code "const_int") + (match_test "ival == 65535"))) + +(define_constraint "P0" + "An integer with the value 0." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "P1" + "An integer with the value 1." + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "P2" + "An integer with the value 2." + (and (match_code "const_int") + (match_test "ival == 2"))) + +(define_constraint "P3" + "An integer with the value 3." + (and (match_code "const_int") + (match_test "ival == 3"))) + +(define_constraint "P4" + "An integer with the value 4." + (and (match_code "const_int") + (match_test "ival == 4"))) + +(define_constraint "PA" + "An integer constant describing any macflag except variants involving M." + (and (match_code "const_int") + (match_test "ival != MACFLAG_M && ival != MACFLAG_IS_M"))) + +(define_constraint "PB" + "An integer constant describing any macflag involving M." + (and (match_code "const_int") + (match_test "ival == MACFLAG_M || ival == MACFLAG_IS_M"))) + + +;; Extra constraints + +(define_constraint "Q" + "A SYMBOL_REF." + (match_code "symbol_ref")) + diff --git a/gcc/config/bfin/crti.s b/gcc/config/bfin/crti.s new file mode 100644 index 000000000..b6f20fc9e --- /dev/null +++ b/gcc/config/bfin/crti.s @@ -0,0 +1,59 @@ +/* Specialized code needed to support construction and destruction of + file-scope objects in C++ and Java code, and to support exception handling. + Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. + Contributed by Analog Devices. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* + * This file just supplies function prologues for the .init and .fini + * sections. It is linked in before crtbegin.o. + */ + + .ident "GNU C crti.o" + + .section .init + .globl __init + .type __init,@function +__init: +#if defined __ID_SHARED_LIB__ + [--SP] = P5; +#elif defined __BFIN_FDPIC__ + [--SP] = P3; +#endif + LINK 12; +#if defined __ID_SHARED_LIB__ + P5 = [P5 + _current_shared_library_p5_offset_] +#endif + .section .fini + .globl __fini + .type __fini,@function +__fini: +#if defined __ID_SHARED_LIB__ + [--SP] = P5; +#elif defined __BFIN_FDPIC__ + [--SP] = P3; +#endif + LINK 12; +#if defined __ID_SHARED_LIB__ + P5 = [P5 + _current_shared_library_p5_offset_] +#endif diff --git a/gcc/config/bfin/crtlibid.s b/gcc/config/bfin/crtlibid.s new file mode 100644 index 000000000..beab80938 --- /dev/null +++ b/gcc/config/bfin/crtlibid.s @@ -0,0 +1,29 @@ +/* Provide a weak definition of the library ID, for the benefit of certain + configure scripts. + Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + .ident "GNU C crtlibid.o" + +.weak _current_shared_library_p5_offset_ +.set _current_shared_library_p5_offset_, 0 diff --git a/gcc/config/bfin/crtn.s b/gcc/config/bfin/crtn.s new file mode 100644 index 000000000..7fcd27bfa --- /dev/null +++ b/gcc/config/bfin/crtn.s @@ -0,0 +1,50 @@ +/* Specialized code needed to support construction and destruction of + file-scope objects in C++ and Java code, and to support exception handling. + Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. + Contributed by Analog Devices. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* + * This file supplies function epilogues for the .init and .fini sections. + * It is linked in after all other files. + */ + + .ident "GNU C crtn.o" + + .section .init + unlink; +#if defined __ID_SHARED_LIB__ + P5 = [SP++]; +#elif defined __BFIN_FDPIC__ + P3 = [SP++]; +#endif + rts; + + .section .fini + unlink; +#if defined __ID_SHARED_LIB__ + P5 = [SP++]; +#elif defined __BFIN_FDPIC__ + P3 = [SP++]; +#endif + rts; diff --git a/gcc/config/bfin/elf.h b/gcc/config/bfin/elf.h new file mode 100644 index 000000000..975212faa --- /dev/null +++ b/gcc/config/bfin/elf.h @@ -0,0 +1,73 @@ +/* Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "\ +%{msim:%{!shared:crt0%O%s}} \ +%{!msim:%{!mcpu=bf561*:%{!msdram:basiccrt%O%s} %{msdram:basiccrts%O%s};: \ + %{!msdram:basiccrt561%O%s} %{msdram:basiccrt561s%O%s}} \ + %{mcpu=bf561*:%{mmulticore:%{!mcorea:%{!mcoreb:basiccrt561b%O%s}}}}} \ +crti%O%s crtbegin%O%s crtlibid%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef LIB_SPEC +#define LIB_SPEC "--start-group -lc %{msim:-lsim}%{!msim:-lnosys} --end-group \ +%{!T*:%{!msim:%{!msdram: \ + %{mcpu=bf512*:-T bf512.ld%s}%{mcpu=bf514*:-T bf514.ld%s} \ + %{mcpu=bf516*:-T bf516.ld%s}%{mcpu=bf518*:-T bf518.ld%s} \ + %{mcpu=bf522*:-T bf522.ld%s}%{mcpu=bf523*:-T bf523.ld%s} \ + %{mcpu=bf524*:-T bf524.ld%s}%{mcpu=bf525*:-T bf525.ld%s} \ + %{mcpu=bf526*:-T bf526.ld%s}%{mcpu=bf527*:-T bf527.ld%s} \ + %{mcpu=bf531*:-T bf531.ld%s}%{mcpu=bf532*:-T bf532.ld%s} \ + %{mcpu=bf533*:-T bf533.ld%s}%{mcpu=bf534*:-T bf534.ld%s} \ + %{mcpu=bf536*:-T bf536.ld%s}%{mcpu=bf537*:-T bf537.ld%s} \ + %{mcpu=bf538*:-T bf538.ld%s}%{mcpu=bf539*:-T bf539.ld%s} \ + %{mcpu=bf542*:-T bf542.ld%s}%{mcpu=bf544*:-T bf544.ld%s} \ + %{mcpu=bf547*:-T bf547.ld%s}%{mcpu=bf548*:-T bf548.ld%s} \ + %{mcpu=bf549*:-T bf549.ld%s} \ + %{mcpu=bf561*:%{!mmulticore:-T bf561.ld%s} \ + %{mmulticore:%{mcorea:-T bf561a.ld%s}} \ + %{mmulticore:%{mcoreb:-T bf561b.ld%s}} \ + %{mmulticore:%{!mcorea:%{!mcoreb:-T bf561m.ld%s}}}} \ + %{!mcpu=*:%eno processor type specified for linking} \ + %{!mcpu=bf561*:-T bfin-common-sc.ld%s} \ + %{mcpu=bf561*:%{!mmulticore:-T bfin-common-sc.ld%s} \ + %{mmulticore:-T bfin-common-mc.ld%s}}}}}" + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#ifdef __BFIN_FDPIC__ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ +asm (SECTION_OP); \ +asm ("P3 = [SP + 20];\n\tcall " USER_LABEL_PREFIX #FUNC ";"); \ +asm (TEXT_SECTION_ASM_OP); +#endif + +#undef SUBTARGET_DRIVER_SELF_SPECS +#define SUBTARGET_DRIVER_SELF_SPECS \ + "%{mfdpic:-msim} %{mid-shared-library:-msim}" + +#define NO_IMPLICIT_EXTERN_C diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm new file mode 100644 index 000000000..4e15ad230 --- /dev/null +++ b/gcc/config/bfin/lib1funcs.asm @@ -0,0 +1,146 @@ +/* libgcc functions for Blackfin. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. + Contributed by Analog Devices. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifdef L_divsi3 +.text +.align 2 +.global ___divsi3; +.type ___divsi3, STT_FUNC; + +___divsi3: + [--SP]= RETS; + [--SP] = R7; + + R2 = -R0; + CC = R0 < 0; + IF CC R0 = R2; + R7 = CC; + + R2 = -R1; + CC = R1 < 0; + IF CC R1 = R2; + R2 = CC; + R7 = R7 ^ R2; + + CALL ___udivsi3; + + CC = R7; + R1 = -R0; + IF CC R0 = R1; + + R7 = [SP++]; + RETS = [SP++]; + RTS; +#endif + +#ifdef L_modsi3 +.align 2 +.global ___modsi3; +.type ___modsi3, STT_FUNC; + +___modsi3: + [--SP] = RETS; + [--SP] = R0; + [--SP] = R1; + CALL ___divsi3; + R2 = [SP++]; + R1 = [SP++]; + R2 *= R0; + R0 = R1 - R2; + RETS = [SP++]; + RTS; +#endif + +#ifdef L_udivsi3 +.align 2 +.global ___udivsi3; +.type ___udivsi3, STT_FUNC; + +___udivsi3: + P0 = 32; + LSETUP (0f, 1f) LC0 = P0; + /* upper half of dividend */ + R3 = 0; +0: + /* The first time round in the loop we shift in garbage, but since we + perform 33 shifts, it doesn't matter. */ + R0 = ROT R0 BY 1; + R3 = ROT R3 BY 1; + R2 = R3 - R1; + CC = R3 < R1 (IU); +1: + /* Last instruction of the loop. */ + IF ! CC R3 = R2; + + /* Shift in the last bit. */ + R0 = ROT R0 BY 1; + /* R0 is the result, R3 contains the remainder. */ + R0 = ~ R0; + RTS; +#endif + +#ifdef L_umodsi3 +.align 2 +.global ___umodsi3; +.type ___umodsi3, STT_FUNC; + +___umodsi3: + [--SP] = RETS; + CALL ___udivsi3; + R0 = R3; + RETS = [SP++]; + RTS; +#endif + +#ifdef L_umulsi3_highpart +.align 2 +.global ___umulsi3_highpart; +.type ___umulsi3_highpart, STT_FUNC; + +___umulsi3_highpart: + A1 = R1.L * R0.L (FU); + A1 = A1 >> 16; + A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); + A1 += R0.L * R1.H (FU); + A1 = A1 >> 16; + A0 += A1; + R0 = A0 (FU); + RTS; +#endif + +#ifdef L_smulsi3_highpart +.align 2 +.global ___smulsi3_highpart; +.type ___smulsi3_highpart, STT_FUNC; + +___smulsi3_highpart: + A1 = R1.L * R0.L (FU); + A1 = A1 >> 16; + A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); + A1 += R1.H * R0.L (IS,M); + A1 = A1 >>> 16; + R0 = (A0 += A1); + RTS; +#endif diff --git a/gcc/config/bfin/libgcc-bfin.ver b/gcc/config/bfin/libgcc-bfin.ver new file mode 100644 index 000000000..516d91f65 --- /dev/null +++ b/gcc/config/bfin/libgcc-bfin.ver @@ -0,0 +1,1914 @@ +# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, +# 2008, 2009, 2010 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +GCC_3.0 { + # libgcc1 integer symbols + ___absvsi2 + ___addvsi3 + ___ashlsi3 + ___ashrsi3 + ___divsi3 + ___lshrsi3 + ___modsi3 + ___mulsi3 + ___mulvsi3 + ___negvsi2 + ___subvsi3 + ___udivsi3 + ___umodsi3 + + # libgcc1 floating point symbols + ___addsf3 + ___adddf3 + ___addxf3 + ___addtf3 + ___divsf3 + ___divdf3 + ___divxf3 + ___divtf3 + ___eqsf2 + ___eqdf2 + ___eqxf2 + ___eqtf2 + ___extenddfxf2 + ___extenddftf2 + ___extendsfdf2 + ___extendsfxf2 + ___extendsftf2 + ___fixsfsi + ___fixdfsi + ___fixxfsi + ___fixtfsi + ___floatsisf + ___floatsidf + ___floatsixf + ___floatsitf + ___gesf2 + ___gedf2 + ___gexf2 + ___getf2 + ___gtsf2 + ___gtdf2 + ___gtxf2 + ___gttf2 + ___lesf2 + ___ledf2 + ___lexf2 + ___letf2 + ___ltsf2 + ___ltdf2 + ___ltxf2 + ___lttf2 + ___mulsf3 + ___muldf3 + ___mulxf3 + ___multf3 + ___negsf2 + ___negdf2 + ___negxf2 + ___negtf2 + ___nesf2 + ___nedf2 + ___nexf2 + ___netf2 + ___subsf3 + ___subdf3 + ___subxf3 + ___subtf3 + ___truncdfsf2 + ___truncxfsf2 + ___trunctfsf2 + ___truncxfdf2 + ___trunctfdf2 + + # libgcc2 DImode arithmetic (for 32-bit targets). + ___absvdi2 + ___addvdi3 + ___ashldi3 + ___ashrdi3 + ___cmpdi2 + ___divdi3 + ___ffsdi2 + ___fixdfdi + ___fixsfdi + ___fixtfdi + ___fixxfdi + ___fixunsdfdi + ___fixunsdfsi + ___fixunssfsi + ___fixunssfdi + ___fixunstfdi + ___fixunstfsi + ___fixunsxfdi + ___fixunsxfsi + ___floatdidf + ___floatdisf + ___floatdixf + ___floatditf + ___lshrdi3 + ___moddi3 + ___muldi3 + ___mulvdi3 + ___negdi2 + ___negvdi2 + ___subvdi3 + ___ucmpdi2 + ___udivdi3 + ___udivmoddi4 + ___umoddi3 + + # libgcc2 TImode arithmetic (for 64-bit targets). + ___ashlti3 + ___ashrti3 + ___cmpti2 + ___divti3 + ___ffsti2 + ___fixdfti + ___fixsfti + ___fixtfti + ___fixxfti + ___lshrti3 + ___modti3 + ___multi3 + ___negti2 + ___ucmpti2 + ___udivmodti4 + ___udivti3 + ___umodti3 + ___fixunsdfti + ___fixunssfti + ___fixunstfti + ___fixunsxfti + ___floattidf + ___floattisf + ___floattixf + ___floattitf + + # Used to deal with trampoline initialization on some platforms + ___clear_cache + + # EH symbols + __Unwind_DeleteException + __Unwind_Find_FDE + __Unwind_ForcedUnwind + __Unwind_GetGR + __Unwind_GetIP + __Unwind_GetLanguageSpecificData + __Unwind_GetRegionStart + __Unwind_GetTextRelBase + __Unwind_GetDataRelBase + __Unwind_RaiseException + __Unwind_Resume + __Unwind_SetGR + __Unwind_SetIP + ___deregister_frame + ___deregister_frame_info + ___deregister_frame_info_bases + ___register_frame + ___register_frame_info + ___register_frame_info_bases + ___register_frame_info_table + ___register_frame_info_table_bases + ___register_frame_table + + # SjLj EH symbols + __Unwind_SjLj_Register + __Unwind_SjLj_Unregister + __Unwind_SjLj_RaiseException + __Unwind_SjLj_ForcedUnwind + __Unwind_SjLj_Resume +} + +%inherit GCC_3.3 GCC_3.0 +GCC_3.3 { + __Unwind_FindEnclosingFunction + __Unwind_GetCFA + __Unwind_Backtrace + __Unwind_Resume_or_Rethrow + __Unwind_SjLj_Resume_or_Rethrow +} + +%inherit GCC_3.3.1 GCC_3.3 +GCC_3.3.1 { + ___gcc_personality_sj0 + ___gcc_personality_v0 +} + +%inherit GCC_3.3.2 GCC_3.3.1 +GCC_3.3.2 { +} +%inherit GCC_3.3.4 GCC_3.3.2 +GCC_3.3.4 { + ___unorddf2 + ___unordsf2 +} + +%inherit GCC_3.4 GCC_3.3.4 +GCC_3.4 { + # bit scanning and counting built-ins + ___clzsi2 + ___clzdi2 + ___clzti2 + ___ctzsi2 + ___ctzdi2 + ___ctzti2 + ___popcountsi2 + ___popcountdi2 + ___popcountti2 + ___paritysi2 + ___paritydi2 + ___parityti2 +} + +%inherit GCC_3.4.2 GCC_3.4 +GCC_3.4.2 { + # Used to deal with trampoline initialization on some platforms + ___enable_execute_stack + ___trampoline_setup +} + +%inherit GCC_3.4.4 GCC_3.4.2 +GCC_3.4.4 { + # libgcc2 TImode arithmetic (for 64-bit targets). + ___absvti2 + ___addvti3 + ___mulvti3 + ___negvti2 + ___subvti3 +} + +%inherit GCC_4.0.0 GCC_3.4.4 +GCC_4.0.0 { + # libgcc2 __builtin_powi helpers. + ___powisf2 + ___powidf2 + ___powixf2 + ___powitf2 + + # c99 compliant complex arithmetic + ___divsc3 + ___divdc3 + ___divxc3 + ___divtc3 + ___mulsc3 + ___muldc3 + ___mulxc3 + ___multc3 +} + +%inherit GCC_4.1.0 GCC_4.0.0 +GCC_4.1.0 { + ___smulsi3_highpart + ___umulsi3_highpart +} + +%inherit GCC_4.2.0 GCC_4.1.0 +GCC_4.2.0 { + # unsigned-to-floating conversions + ___floatunsisf + ___floatunsidf + ___floatunsixf + ___floatunsitf + ___floatundidf + ___floatundisf + ___floatundixf + ___floatunditf + ___floatuntidf + ___floatuntisf + ___floatuntixf + ___floatuntitf + __Unwind_GetIPInfo +} + +%inherit GCC_4.3.0 GCC_4.2.0 +GCC_4.3.0 { + # byte swapping routines + ___bswapsi2 + ___bswapdi2 + ___emutls_get_address + ___emutls_register_common + ___ffssi2 + ___extendxftf2 + ___trunctfxf2 + + # fixed-point routines + ___addqq3 + ___addhq3 + ___addsq3 + ___adddq3 + ___addtq3 + ___adduqq3 + ___adduhq3 + ___addusq3 + ___addudq3 + ___addutq3 + ___addha3 + ___addsa3 + ___addda3 + ___addta3 + ___adduha3 + ___addusa3 + ___adduda3 + ___adduta3 + ___ssaddqq3 + ___ssaddhq3 + ___ssaddsq3 + ___ssadddq3 + ___ssaddtq3 + ___ssaddha3 + ___ssaddsa3 + ___ssaddda3 + ___ssaddta3 + ___usadduqq3 + ___usadduhq3 + ___usaddusq3 + ___usaddudq3 + ___usaddutq3 + ___usadduha3 + ___usaddusa3 + ___usadduda3 + ___usadduta3 + ___subqq3 + ___subhq3 + ___subsq3 + ___subdq3 + ___subtq3 + ___subuqq3 + ___subuhq3 + ___subusq3 + ___subudq3 + ___subutq3 + ___subha3 + ___subsa3 + ___subda3 + ___subta3 + ___subuha3 + ___subusa3 + ___subuda3 + ___subuta3 + ___sssubqq3 + ___sssubhq3 + ___sssubsq3 + ___sssubdq3 + ___sssubtq3 + ___sssubha3 + ___sssubsa3 + ___sssubda3 + ___sssubta3 + ___ussubuqq3 + ___ussubuhq3 + ___ussubusq3 + ___ussubudq3 + ___ussubutq3 + ___ussubuha3 + ___ussubusa3 + ___ussubuda3 + ___ussubuta3 + ___mulqq3 + ___mulhq3 + ___mulsq3 + ___muldq3 + ___multq3 + ___muluqq3 + ___muluhq3 + ___mulusq3 + ___muludq3 + ___mulutq3 + ___mulha3 + ___mulsa3 + ___mulda3 + ___multa3 + ___muluha3 + ___mulusa3 + ___muluda3 + ___muluta3 + ___ssmulqq3 + ___ssmulhq3 + ___ssmulsq3 + ___ssmuldq3 + ___ssmultq3 + ___ssmulha3 + ___ssmulsa3 + ___ssmulda3 + ___ssmulta3 + ___usmuluqq3 + ___usmuluhq3 + ___usmulusq3 + ___usmuludq3 + ___usmulutq3 + ___usmuluha3 + ___usmulusa3 + ___usmuluda3 + ___usmuluta3 + ___divqq3 + ___divhq3 + ___divsq3 + ___divdq3 + ___divtq3 + ___divha3 + ___divsa3 + ___divda3 + ___divta3 + ___udivuqq3 + ___udivuhq3 + ___udivusq3 + ___udivudq3 + ___udivutq3 + ___udivuha3 + ___udivusa3 + ___udivuda3 + ___udivuta3 + ___ssdivqq3 + ___ssdivhq3 + ___ssdivsq3 + ___ssdivdq3 + ___ssdivtq3 + ___ssdivha3 + ___ssdivsa3 + ___ssdivda3 + ___ssdivta3 + ___usdivuqq3 + ___usdivuhq3 + ___usdivusq3 + ___usdivudq3 + ___usdivutq3 + ___usdivuha3 + ___usdivusa3 + ___usdivuda3 + ___usdivuta3 + ___negqq2 + ___neghq2 + ___negsq2 + ___negdq2 + ___negtq2 + ___neguqq2 + ___neguhq2 + ___negusq2 + ___negudq2 + ___negutq2 + ___negha2 + ___negsa2 + ___negda2 + ___negta2 + ___neguha2 + ___negusa2 + ___neguda2 + ___neguta2 + ___ssnegqq2 + ___ssneghq2 + ___ssnegsq2 + ___ssnegdq2 + ___ssnegtq2 + ___ssnegha2 + ___ssnegsa2 + ___ssnegda2 + ___ssnegta2 + ___usneguqq2 + ___usneguhq2 + ___usnegusq2 + ___usnegudq2 + ___usnegutq2 + ___usneguha2 + ___usnegusa2 + ___usneguda2 + ___usneguta2 + ___ashlqq3 + ___ashlhq3 + ___ashlsq3 + ___ashldq3 + ___ashltq3 + ___ashluqq3 + ___ashluhq3 + ___ashlusq3 + ___ashludq3 + ___ashlutq3 + ___ashlha3 + ___ashlsa3 + ___ashlda3 + ___ashlta3 + ___ashluha3 + ___ashlusa3 + ___ashluda3 + ___ashluta3 + ___ashrqq3 + ___ashrhq3 + ___ashrsq3 + ___ashrdq3 + ___ashrtq3 + ___ashrha3 + ___ashrsa3 + ___ashrda3 + ___ashrta3 + ___lshruqq3 + ___lshruhq3 + ___lshrusq3 + ___lshrudq3 + ___lshrutq3 + ___lshruha3 + ___lshrusa3 + ___lshruda3 + ___lshruta3 + ___ssashlqq3 + ___ssashlhq3 + ___ssashlsq3 + ___ssashldq3 + ___ssashltq3 + ___ssashlha3 + ___ssashlsa3 + ___ssashlda3 + ___ssashlta3 + ___usashluqq3 + ___usashluhq3 + ___usashlusq3 + ___usashludq3 + ___usashlutq3 + ___usashluha3 + ___usashlusa3 + ___usashluda3 + ___usashluta3 + ___cmpqq2 + ___cmphq2 + ___cmpsq2 + ___cmpdq2 + ___cmptq2 + ___cmpuqq2 + ___cmpuhq2 + ___cmpusq2 + ___cmpudq2 + ___cmputq2 + ___cmpha2 + ___cmpsa2 + ___cmpda2 + ___cmpta2 + ___cmpuha2 + ___cmpusa2 + ___cmpuda2 + ___cmputa2 + ___fractqqhq2 + ___fractqqsq2 + ___fractqqdq2 + ___fractqqtq2 + ___fractqqha + ___fractqqsa + ___fractqqda + ___fractqqta + ___fractqquqq + ___fractqquhq + ___fractqqusq + ___fractqqudq + ___fractqqutq + ___fractqquha + ___fractqqusa + ___fractqquda + ___fractqquta + ___fractqqqi + ___fractqqhi + ___fractqqsi + ___fractqqdi + ___fractqqti + ___fractqqsf + ___fractqqdf + ___fracthqqq2 + ___fracthqsq2 + ___fracthqdq2 + ___fracthqtq2 + ___fracthqha + ___fracthqsa + ___fracthqda + ___fracthqta + ___fracthquqq + ___fracthquhq + ___fracthqusq + ___fracthqudq + ___fracthqutq + ___fracthquha + ___fracthqusa + ___fracthquda + ___fracthquta + ___fracthqqi + ___fracthqhi + ___fracthqsi + ___fracthqdi + ___fracthqti + ___fracthqsf + ___fracthqdf + ___fractsqqq2 + ___fractsqhq2 + ___fractsqdq2 + ___fractsqtq2 + ___fractsqha + ___fractsqsa + ___fractsqda + ___fractsqta + ___fractsquqq + ___fractsquhq + ___fractsqusq + ___fractsqudq + ___fractsqutq + ___fractsquha + ___fractsqusa + ___fractsquda + ___fractsquta + ___fractsqqi + ___fractsqhi + ___fractsqsi + ___fractsqdi + ___fractsqti + ___fractsqsf + ___fractsqdf + ___fractdqqq2 + ___fractdqhq2 + ___fractdqsq2 + ___fractdqtq2 + ___fractdqha + ___fractdqsa + ___fractdqda + ___fractdqta + ___fractdquqq + ___fractdquhq + ___fractdqusq + ___fractdqudq + ___fractdqutq + ___fractdquha + ___fractdqusa + ___fractdquda + ___fractdquta + ___fractdqqi + ___fractdqhi + ___fractdqsi + ___fractdqdi + ___fractdqti + ___fractdqsf + ___fractdqdf + ___fracttqqq2 + ___fracttqhq2 + ___fracttqsq2 + ___fracttqdq2 + ___fracttqha + ___fracttqsa + ___fracttqda + ___fracttqta + ___fracttquqq + ___fracttquhq + ___fracttqusq + ___fracttqudq + ___fracttqutq + ___fracttquha + ___fracttqusa + ___fracttquda + ___fracttquta + ___fracttqqi + ___fracttqhi + ___fracttqsi + ___fracttqdi + ___fracttqti + ___fracttqsf + ___fracttqdf + ___fracthaqq + ___fracthahq + ___fracthasq + ___fracthadq + ___fracthatq + ___fracthasa2 + ___fracthada2 + ___fracthata2 + ___fracthauqq + ___fracthauhq + ___fracthausq + ___fracthaudq + ___fracthautq + ___fracthauha + ___fracthausa + ___fracthauda + ___fracthauta + ___fracthaqi + ___fracthahi + ___fracthasi + ___fracthadi + ___fracthati + ___fracthasf + ___fracthadf + ___fractsaqq + ___fractsahq + ___fractsasq + ___fractsadq + ___fractsatq + ___fractsaha2 + ___fractsada2 + ___fractsata2 + ___fractsauqq + ___fractsauhq + ___fractsausq + ___fractsaudq + ___fractsautq + ___fractsauha + ___fractsausa + ___fractsauda + ___fractsauta + ___fractsaqi + ___fractsahi + ___fractsasi + ___fractsadi + ___fractsati + ___fractsasf + ___fractsadf + ___fractdaqq + ___fractdahq + ___fractdasq + ___fractdadq + ___fractdatq + ___fractdaha2 + ___fractdasa2 + ___fractdata2 + ___fractdauqq + ___fractdauhq + ___fractdausq + ___fractdaudq + ___fractdautq + ___fractdauha + ___fractdausa + ___fractdauda + ___fractdauta + ___fractdaqi + ___fractdahi + ___fractdasi + ___fractdadi + ___fractdati + ___fractdasf + ___fractdadf + ___fracttaqq + ___fracttahq + ___fracttasq + ___fracttadq + ___fracttatq + ___fracttaha2 + ___fracttasa2 + ___fracttada2 + ___fracttauqq + ___fracttauhq + ___fracttausq + ___fracttaudq + ___fracttautq + ___fracttauha + ___fracttausa + ___fracttauda + ___fracttauta + ___fracttaqi + ___fracttahi + ___fracttasi + ___fracttadi + ___fracttati + ___fracttasf + ___fracttadf + ___fractuqqqq + ___fractuqqhq + ___fractuqqsq + ___fractuqqdq + ___fractuqqtq + ___fractuqqha + ___fractuqqsa + ___fractuqqda + ___fractuqqta + ___fractuqquhq2 + ___fractuqqusq2 + ___fractuqqudq2 + ___fractuqqutq2 + ___fractuqquha + ___fractuqqusa + ___fractuqquda + ___fractuqquta + ___fractuqqqi + ___fractuqqhi + ___fractuqqsi + ___fractuqqdi + ___fractuqqti + ___fractuqqsf + ___fractuqqdf + ___fractuhqqq + ___fractuhqhq + ___fractuhqsq + ___fractuhqdq + ___fractuhqtq + ___fractuhqha + ___fractuhqsa + ___fractuhqda + ___fractuhqta + ___fractuhquqq2 + ___fractuhqusq2 + ___fractuhqudq2 + ___fractuhqutq2 + ___fractuhquha + ___fractuhqusa + ___fractuhquda + ___fractuhquta + ___fractuhqqi + ___fractuhqhi + ___fractuhqsi + ___fractuhqdi + ___fractuhqti + ___fractuhqsf + ___fractuhqdf + ___fractusqqq + ___fractusqhq + ___fractusqsq + ___fractusqdq + ___fractusqtq + ___fractusqha + ___fractusqsa + ___fractusqda + ___fractusqta + ___fractusquqq2 + ___fractusquhq2 + ___fractusqudq2 + ___fractusqutq2 + ___fractusquha + ___fractusqusa + ___fractusquda + ___fractusquta + ___fractusqqi + ___fractusqhi + ___fractusqsi + ___fractusqdi + ___fractusqti + ___fractusqsf + ___fractusqdf + ___fractudqqq + ___fractudqhq + ___fractudqsq + ___fractudqdq + ___fractudqtq + ___fractudqha + ___fractudqsa + ___fractudqda + ___fractudqta + ___fractudquqq2 + ___fractudquhq2 + ___fractudqusq2 + ___fractudqutq2 + ___fractudquha + ___fractudqusa + ___fractudquda + ___fractudquta + ___fractudqqi + ___fractudqhi + ___fractudqsi + ___fractudqdi + ___fractudqti + ___fractudqsf + ___fractudqdf + ___fractutqqq + ___fractutqhq + ___fractutqsq + ___fractutqdq + ___fractutqtq + ___fractutqha + ___fractutqsa + ___fractutqda + ___fractutqta + ___fractutquqq2 + ___fractutquhq2 + ___fractutqusq2 + ___fractutqudq2 + ___fractutquha + ___fractutqusa + ___fractutquda + ___fractutquta + ___fractutqqi + ___fractutqhi + ___fractutqsi + ___fractutqdi + ___fractutqti + ___fractutqsf + ___fractutqdf + ___fractuhaqq + ___fractuhahq + ___fractuhasq + ___fractuhadq + ___fractuhatq + ___fractuhaha + ___fractuhasa + ___fractuhada + ___fractuhata + ___fractuhauqq + ___fractuhauhq + ___fractuhausq + ___fractuhaudq + ___fractuhautq + ___fractuhausa2 + ___fractuhauda2 + ___fractuhauta2 + ___fractuhaqi + ___fractuhahi + ___fractuhasi + ___fractuhadi + ___fractuhati + ___fractuhasf + ___fractuhadf + ___fractusaqq + ___fractusahq + ___fractusasq + ___fractusadq + ___fractusatq + ___fractusaha + ___fractusasa + ___fractusada + ___fractusata + ___fractusauqq + ___fractusauhq + ___fractusausq + ___fractusaudq + ___fractusautq + ___fractusauha2 + ___fractusauda2 + ___fractusauta2 + ___fractusaqi + ___fractusahi + ___fractusasi + ___fractusadi + ___fractusati + ___fractusasf + ___fractusadf + ___fractudaqq + ___fractudahq + ___fractudasq + ___fractudadq + ___fractudatq + ___fractudaha + ___fractudasa + ___fractudada + ___fractudata + ___fractudauqq + ___fractudauhq + ___fractudausq + ___fractudaudq + ___fractudautq + ___fractudauha2 + ___fractudausa2 + ___fractudauta2 + ___fractudaqi + ___fractudahi + ___fractudasi + ___fractudadi + ___fractudati + ___fractudasf + ___fractudadf + ___fractutaqq + ___fractutahq + ___fractutasq + ___fractutadq + ___fractutatq + ___fractutaha + ___fractutasa + ___fractutada + ___fractutata + ___fractutauqq + ___fractutauhq + ___fractutausq + ___fractutaudq + ___fractutautq + ___fractutauha2 + ___fractutausa2 + ___fractutauda2 + ___fractutaqi + ___fractutahi + ___fractutasi + ___fractutadi + ___fractutati + ___fractutasf + ___fractutadf + ___fractqiqq + ___fractqihq + ___fractqisq + ___fractqidq + ___fractqitq + ___fractqiha + ___fractqisa + ___fractqida + ___fractqita + ___fractqiuqq + ___fractqiuhq + ___fractqiusq + ___fractqiudq + ___fractqiutq + ___fractqiuha + ___fractqiusa + ___fractqiuda + ___fractqiuta + ___fracthiqq + ___fracthihq + ___fracthisq + ___fracthidq + ___fracthitq + ___fracthiha + ___fracthisa + ___fracthida + ___fracthita + ___fracthiuqq + ___fracthiuhq + ___fracthiusq + ___fracthiudq + ___fracthiutq + ___fracthiuha + ___fracthiusa + ___fracthiuda + ___fracthiuta + ___fractsiqq + ___fractsihq + ___fractsisq + ___fractsidq + ___fractsitq + ___fractsiha + ___fractsisa + ___fractsida + ___fractsita + ___fractsiuqq + ___fractsiuhq + ___fractsiusq + ___fractsiudq + ___fractsiutq + ___fractsiuha + ___fractsiusa + ___fractsiuda + ___fractsiuta + ___fractdiqq + ___fractdihq + ___fractdisq + ___fractdidq + ___fractditq + ___fractdiha + ___fractdisa + ___fractdida + ___fractdita + ___fractdiuqq + ___fractdiuhq + ___fractdiusq + ___fractdiudq + ___fractdiutq + ___fractdiuha + ___fractdiusa + ___fractdiuda + ___fractdiuta + ___fracttiqq + ___fracttihq + ___fracttisq + ___fracttidq + ___fracttitq + ___fracttiha + ___fracttisa + ___fracttida + ___fracttita + ___fracttiuqq + ___fracttiuhq + ___fracttiusq + ___fracttiudq + ___fracttiutq + ___fracttiuha + ___fracttiusa + ___fracttiuda + ___fracttiuta + ___fractsfqq + ___fractsfhq + ___fractsfsq + ___fractsfdq + ___fractsftq + ___fractsfha + ___fractsfsa + ___fractsfda + ___fractsfta + ___fractsfuqq + ___fractsfuhq + ___fractsfusq + ___fractsfudq + ___fractsfutq + ___fractsfuha + ___fractsfusa + ___fractsfuda + ___fractsfuta + ___fractdfqq + ___fractdfhq + ___fractdfsq + ___fractdfdq + ___fractdftq + ___fractdfha + ___fractdfsa + ___fractdfda + ___fractdfta + ___fractdfuqq + ___fractdfuhq + ___fractdfusq + ___fractdfudq + ___fractdfutq + ___fractdfuha + ___fractdfusa + ___fractdfuda + ___fractdfuta + ___satfractqqhq2 + ___satfractqqsq2 + ___satfractqqdq2 + ___satfractqqtq2 + ___satfractqqha + ___satfractqqsa + ___satfractqqda + ___satfractqqta + ___satfractqquqq + ___satfractqquhq + ___satfractqqusq + ___satfractqqudq + ___satfractqqutq + ___satfractqquha + ___satfractqqusa + ___satfractqquda + ___satfractqquta + ___satfracthqqq2 + ___satfracthqsq2 + ___satfracthqdq2 + ___satfracthqtq2 + ___satfracthqha + ___satfracthqsa + ___satfracthqda + ___satfracthqta + ___satfracthquqq + ___satfracthquhq + ___satfracthqusq + ___satfracthqudq + ___satfracthqutq + ___satfracthquha + ___satfracthqusa + ___satfracthquda + ___satfracthquta + ___satfractsqqq2 + ___satfractsqhq2 + ___satfractsqdq2 + ___satfractsqtq2 + ___satfractsqha + ___satfractsqsa + ___satfractsqda + ___satfractsqta + ___satfractsquqq + ___satfractsquhq + ___satfractsqusq + ___satfractsqudq + ___satfractsqutq + ___satfractsquha + ___satfractsqusa + ___satfractsquda + ___satfractsquta + ___satfractdqqq2 + ___satfractdqhq2 + ___satfractdqsq2 + ___satfractdqtq2 + ___satfractdqha + ___satfractdqsa + ___satfractdqda + ___satfractdqta + ___satfractdquqq + ___satfractdquhq + ___satfractdqusq + ___satfractdqudq + ___satfractdqutq + ___satfractdquha + ___satfractdqusa + ___satfractdquda + ___satfractdquta + ___satfracttqqq2 + ___satfracttqhq2 + ___satfracttqsq2 + ___satfracttqdq2 + ___satfracttqha + ___satfracttqsa + ___satfracttqda + ___satfracttqta + ___satfracttquqq + ___satfracttquhq + ___satfracttqusq + ___satfracttqudq + ___satfracttqutq + ___satfracttquha + ___satfracttqusa + ___satfracttquda + ___satfracttquta + ___satfracthaqq + ___satfracthahq + ___satfracthasq + ___satfracthadq + ___satfracthatq + ___satfracthasa2 + ___satfracthada2 + ___satfracthata2 + ___satfracthauqq + ___satfracthauhq + ___satfracthausq + ___satfracthaudq + ___satfracthautq + ___satfracthauha + ___satfracthausa + ___satfracthauda + ___satfracthauta + ___satfractsaqq + ___satfractsahq + ___satfractsasq + ___satfractsadq + ___satfractsatq + ___satfractsaha2 + ___satfractsada2 + ___satfractsata2 + ___satfractsauqq + ___satfractsauhq + ___satfractsausq + ___satfractsaudq + ___satfractsautq + ___satfractsauha + ___satfractsausa + ___satfractsauda + ___satfractsauta + ___satfractdaqq + ___satfractdahq + ___satfractdasq + ___satfractdadq + ___satfractdatq + ___satfractdaha2 + ___satfractdasa2 + ___satfractdata2 + ___satfractdauqq + ___satfractdauhq + ___satfractdausq + ___satfractdaudq + ___satfractdautq + ___satfractdauha + ___satfractdausa + ___satfractdauda + ___satfractdauta + ___satfracttaqq + ___satfracttahq + ___satfracttasq + ___satfracttadq + ___satfracttatq + ___satfracttaha2 + ___satfracttasa2 + ___satfracttada2 + ___satfracttauqq + ___satfracttauhq + ___satfracttausq + ___satfracttaudq + ___satfracttautq + ___satfracttauha + ___satfracttausa + ___satfracttauda + ___satfracttauta + ___satfractuqqqq + ___satfractuqqhq + ___satfractuqqsq + ___satfractuqqdq + ___satfractuqqtq + ___satfractuqqha + ___satfractuqqsa + ___satfractuqqda + ___satfractuqqta + ___satfractuqquhq2 + ___satfractuqqusq2 + ___satfractuqqudq2 + ___satfractuqqutq2 + ___satfractuqquha + ___satfractuqqusa + ___satfractuqquda + ___satfractuqquta + ___satfractuhqqq + ___satfractuhqhq + ___satfractuhqsq + ___satfractuhqdq + ___satfractuhqtq + ___satfractuhqha + ___satfractuhqsa + ___satfractuhqda + ___satfractuhqta + ___satfractuhquqq2 + ___satfractuhqusq2 + ___satfractuhqudq2 + ___satfractuhqutq2 + ___satfractuhquha + ___satfractuhqusa + ___satfractuhquda + ___satfractuhquta + ___satfractusqqq + ___satfractusqhq + ___satfractusqsq + ___satfractusqdq + ___satfractusqtq + ___satfractusqha + ___satfractusqsa + ___satfractusqda + ___satfractusqta + ___satfractusquqq2 + ___satfractusquhq2 + ___satfractusqudq2 + ___satfractusqutq2 + ___satfractusquha + ___satfractusqusa + ___satfractusquda + ___satfractusquta + ___satfractudqqq + ___satfractudqhq + ___satfractudqsq + ___satfractudqdq + ___satfractudqtq + ___satfractudqha + ___satfractudqsa + ___satfractudqda + ___satfractudqta + ___satfractudquqq2 + ___satfractudquhq2 + ___satfractudqusq2 + ___satfractudqutq2 + ___satfractudquha + ___satfractudqusa + ___satfractudquda + ___satfractudquta + ___satfractutqqq + ___satfractutqhq + ___satfractutqsq + ___satfractutqdq + ___satfractutqtq + ___satfractutqha + ___satfractutqsa + ___satfractutqda + ___satfractutqta + ___satfractutquqq2 + ___satfractutquhq2 + ___satfractutqusq2 + ___satfractutqudq2 + ___satfractutquha + ___satfractutqusa + ___satfractutquda + ___satfractutquta + ___satfractuhaqq + ___satfractuhahq + ___satfractuhasq + ___satfractuhadq + ___satfractuhatq + ___satfractuhaha + ___satfractuhasa + ___satfractuhada + ___satfractuhata + ___satfractuhauqq + ___satfractuhauhq + ___satfractuhausq + ___satfractuhaudq + ___satfractuhautq + ___satfractuhausa2 + ___satfractuhauda2 + ___satfractuhauta2 + ___satfractusaqq + ___satfractusahq + ___satfractusasq + ___satfractusadq + ___satfractusatq + ___satfractusaha + ___satfractusasa + ___satfractusada + ___satfractusata + ___satfractusauqq + ___satfractusauhq + ___satfractusausq + ___satfractusaudq + ___satfractusautq + ___satfractusauha2 + ___satfractusauda2 + ___satfractusauta2 + ___satfractudaqq + ___satfractudahq + ___satfractudasq + ___satfractudadq + ___satfractudatq + ___satfractudaha + ___satfractudasa + ___satfractudada + ___satfractudata + ___satfractudauqq + ___satfractudauhq + ___satfractudausq + ___satfractudaudq + ___satfractudautq + ___satfractudauha2 + ___satfractudausa2 + ___satfractudauta2 + ___satfractutaqq + ___satfractutahq + ___satfractutasq + ___satfractutadq + ___satfractutatq + ___satfractutaha + ___satfractutasa + ___satfractutada + ___satfractutata + ___satfractutauqq + ___satfractutauhq + ___satfractutausq + ___satfractutaudq + ___satfractutautq + ___satfractutauha2 + ___satfractutausa2 + ___satfractutauda2 + ___satfractqiqq + ___satfractqihq + ___satfractqisq + ___satfractqidq + ___satfractqitq + ___satfractqiha + ___satfractqisa + ___satfractqida + ___satfractqita + ___satfractqiuqq + ___satfractqiuhq + ___satfractqiusq + ___satfractqiudq + ___satfractqiutq + ___satfractqiuha + ___satfractqiusa + ___satfractqiuda + ___satfractqiuta + ___satfracthiqq + ___satfracthihq + ___satfracthisq + ___satfracthidq + ___satfracthitq + ___satfracthiha + ___satfracthisa + ___satfracthida + ___satfracthita + ___satfracthiuqq + ___satfracthiuhq + ___satfracthiusq + ___satfracthiudq + ___satfracthiutq + ___satfracthiuha + ___satfracthiusa + ___satfracthiuda + ___satfracthiuta + ___satfractsiqq + ___satfractsihq + ___satfractsisq + ___satfractsidq + ___satfractsitq + ___satfractsiha + ___satfractsisa + ___satfractsida + ___satfractsita + ___satfractsiuqq + ___satfractsiuhq + ___satfractsiusq + ___satfractsiudq + ___satfractsiutq + ___satfractsiuha + ___satfractsiusa + ___satfractsiuda + ___satfractsiuta + ___satfractdiqq + ___satfractdihq + ___satfractdisq + ___satfractdidq + ___satfractditq + ___satfractdiha + ___satfractdisa + ___satfractdida + ___satfractdita + ___satfractdiuqq + ___satfractdiuhq + ___satfractdiusq + ___satfractdiudq + ___satfractdiutq + ___satfractdiuha + ___satfractdiusa + ___satfractdiuda + ___satfractdiuta + ___satfracttiqq + ___satfracttihq + ___satfracttisq + ___satfracttidq + ___satfracttitq + ___satfracttiha + ___satfracttisa + ___satfracttida + ___satfracttita + ___satfracttiuqq + ___satfracttiuhq + ___satfracttiusq + ___satfracttiudq + ___satfracttiutq + ___satfracttiuha + ___satfracttiusa + ___satfracttiuda + ___satfracttiuta + ___satfractsfqq + ___satfractsfhq + ___satfractsfsq + ___satfractsfdq + ___satfractsftq + ___satfractsfha + ___satfractsfsa + ___satfractsfda + ___satfractsfta + ___satfractsfuqq + ___satfractsfuhq + ___satfractsfusq + ___satfractsfudq + ___satfractsfutq + ___satfractsfuha + ___satfractsfusa + ___satfractsfuda + ___satfractsfuta + ___satfractdfqq + ___satfractdfhq + ___satfractdfsq + ___satfractdfdq + ___satfractdftq + ___satfractdfha + ___satfractdfsa + ___satfractdfda + ___satfractdfta + ___satfractdfuqq + ___satfractdfuhq + ___satfractdfusq + ___satfractdfudq + ___satfractdfutq + ___satfractdfuha + ___satfractdfusa + ___satfractdfuda + ___satfractdfuta + ___fractunsqqqi + ___fractunsqqhi + ___fractunsqqsi + ___fractunsqqdi + ___fractunsqqti + ___fractunshqqi + ___fractunshqhi + ___fractunshqsi + ___fractunshqdi + ___fractunshqti + ___fractunssqqi + ___fractunssqhi + ___fractunssqsi + ___fractunssqdi + ___fractunssqti + ___fractunsdqqi + ___fractunsdqhi + ___fractunsdqsi + ___fractunsdqdi + ___fractunsdqti + ___fractunstqqi + ___fractunstqhi + ___fractunstqsi + ___fractunstqdi + ___fractunstqti + ___fractunshaqi + ___fractunshahi + ___fractunshasi + ___fractunshadi + ___fractunshati + ___fractunssaqi + ___fractunssahi + ___fractunssasi + ___fractunssadi + ___fractunssati + ___fractunsdaqi + ___fractunsdahi + ___fractunsdasi + ___fractunsdadi + ___fractunsdati + ___fractunstaqi + ___fractunstahi + ___fractunstasi + ___fractunstadi + ___fractunstati + ___fractunsuqqqi + ___fractunsuqqhi + ___fractunsuqqsi + ___fractunsuqqdi + ___fractunsuqqti + ___fractunsuhqqi + ___fractunsuhqhi + ___fractunsuhqsi + ___fractunsuhqdi + ___fractunsuhqti + ___fractunsusqqi + ___fractunsusqhi + ___fractunsusqsi + ___fractunsusqdi + ___fractunsusqti + ___fractunsudqqi + ___fractunsudqhi + ___fractunsudqsi + ___fractunsudqdi + ___fractunsudqti + ___fractunsutqqi + ___fractunsutqhi + ___fractunsutqsi + ___fractunsutqdi + ___fractunsutqti + ___fractunsuhaqi + ___fractunsuhahi + ___fractunsuhasi + ___fractunsuhadi + ___fractunsuhati + ___fractunsusaqi + ___fractunsusahi + ___fractunsusasi + ___fractunsusadi + ___fractunsusati + ___fractunsudaqi + ___fractunsudahi + ___fractunsudasi + ___fractunsudadi + ___fractunsudati + ___fractunsutaqi + ___fractunsutahi + ___fractunsutasi + ___fractunsutadi + ___fractunsutati + ___fractunsqiqq + ___fractunsqihq + ___fractunsqisq + ___fractunsqidq + ___fractunsqitq + ___fractunsqiha + ___fractunsqisa + ___fractunsqida + ___fractunsqita + ___fractunsqiuqq + ___fractunsqiuhq + ___fractunsqiusq + ___fractunsqiudq + ___fractunsqiutq + ___fractunsqiuha + ___fractunsqiusa + ___fractunsqiuda + ___fractunsqiuta + ___fractunshiqq + ___fractunshihq + ___fractunshisq + ___fractunshidq + ___fractunshitq + ___fractunshiha + ___fractunshisa + ___fractunshida + ___fractunshita + ___fractunshiuqq + ___fractunshiuhq + ___fractunshiusq + ___fractunshiudq + ___fractunshiutq + ___fractunshiuha + ___fractunshiusa + ___fractunshiuda + ___fractunshiuta + ___fractunssiqq + ___fractunssihq + ___fractunssisq + ___fractunssidq + ___fractunssitq + ___fractunssiha + ___fractunssisa + ___fractunssida + ___fractunssita + ___fractunssiuqq + ___fractunssiuhq + ___fractunssiusq + ___fractunssiudq + ___fractunssiutq + ___fractunssiuha + ___fractunssiusa + ___fractunssiuda + ___fractunssiuta + ___fractunsdiqq + ___fractunsdihq + ___fractunsdisq + ___fractunsdidq + ___fractunsditq + ___fractunsdiha + ___fractunsdisa + ___fractunsdida + ___fractunsdita + ___fractunsdiuqq + ___fractunsdiuhq + ___fractunsdiusq + ___fractunsdiudq + ___fractunsdiutq + ___fractunsdiuha + ___fractunsdiusa + ___fractunsdiuda + ___fractunsdiuta + ___fractunstiqq + ___fractunstihq + ___fractunstisq + ___fractunstidq + ___fractunstitq + ___fractunstiha + ___fractunstisa + ___fractunstida + ___fractunstita + ___fractunstiuqq + ___fractunstiuhq + ___fractunstiusq + ___fractunstiudq + ___fractunstiutq + ___fractunstiuha + ___fractunstiusa + ___fractunstiuda + ___fractunstiuta + ___satfractunsqiqq + ___satfractunsqihq + ___satfractunsqisq + ___satfractunsqidq + ___satfractunsqitq + ___satfractunsqiha + ___satfractunsqisa + ___satfractunsqida + ___satfractunsqita + ___satfractunsqiuqq + ___satfractunsqiuhq + ___satfractunsqiusq + ___satfractunsqiudq + ___satfractunsqiutq + ___satfractunsqiuha + ___satfractunsqiusa + ___satfractunsqiuda + ___satfractunsqiuta + ___satfractunshiqq + ___satfractunshihq + ___satfractunshisq + ___satfractunshidq + ___satfractunshitq + ___satfractunshiha + ___satfractunshisa + ___satfractunshida + ___satfractunshita + ___satfractunshiuqq + ___satfractunshiuhq + ___satfractunshiusq + ___satfractunshiudq + ___satfractunshiutq + ___satfractunshiuha + ___satfractunshiusa + ___satfractunshiuda + ___satfractunshiuta + ___satfractunssiqq + ___satfractunssihq + ___satfractunssisq + ___satfractunssidq + ___satfractunssitq + ___satfractunssiha + ___satfractunssisa + ___satfractunssida + ___satfractunssita + ___satfractunssiuqq + ___satfractunssiuhq + ___satfractunssiusq + ___satfractunssiudq + ___satfractunssiutq + ___satfractunssiuha + ___satfractunssiusa + ___satfractunssiuda + ___satfractunssiuta + ___satfractunsdiqq + ___satfractunsdihq + ___satfractunsdisq + ___satfractunsdidq + ___satfractunsditq + ___satfractunsdiha + ___satfractunsdisa + ___satfractunsdida + ___satfractunsdita + ___satfractunsdiuqq + ___satfractunsdiuhq + ___satfractunsdiusq + ___satfractunsdiudq + ___satfractunsdiutq + ___satfractunsdiuha + ___satfractunsdiusa + ___satfractunsdiuda + ___satfractunsdiuta + ___satfractunstiqq + ___satfractunstihq + ___satfractunstisq + ___satfractunstidq + ___satfractunstitq + ___satfractunstiha + ___satfractunstisa + ___satfractunstida + ___satfractunstita + ___satfractunstiuqq + ___satfractunstiuhq + ___satfractunstiusq + ___satfractunstiudq + ___satfractunstiutq + ___satfractunstiuha + ___satfractunstiusa + ___satfractunstiuda + ___satfractunstiuta +} + +%inherit GCC_4.4.0 GCC_4.3.0 +GCC_4.4.0 { + ___sync_fetch_and_add_1 + ___sync_fetch_and_sub_1 + ___sync_fetch_and_or_1 + ___sync_fetch_and_and_1 + ___sync_fetch_and_xor_1 + ___sync_fetch_and_nand_1 + ___sync_add_and_fetch_1 + ___sync_sub_and_fetch_1 + ___sync_or_and_fetch_1 + ___sync_and_and_fetch_1 + ___sync_xor_and_fetch_1 + ___sync_nand_and_fetch_1 + ___sync_bool_compare_and_swap_1 + ___sync_val_compare_and_swap_1 + ___sync_lock_test_and_set_1 + + ___sync_fetch_and_add_2 + ___sync_fetch_and_sub_2 + ___sync_fetch_and_or_2 + ___sync_fetch_and_and_2 + ___sync_fetch_and_xor_2 + ___sync_fetch_and_nand_2 + ___sync_add_and_fetch_2 + ___sync_sub_and_fetch_2 + ___sync_or_and_fetch_2 + ___sync_and_and_fetch_2 + ___sync_xor_and_fetch_2 + ___sync_nand_and_fetch_2 + ___sync_bool_compare_and_swap_2 + ___sync_val_compare_and_swap_2 + ___sync_lock_test_and_set_2 + + ___sync_fetch_and_add_4 + ___sync_fetch_and_sub_4 + ___sync_fetch_and_or_4 + ___sync_fetch_and_and_4 + ___sync_fetch_and_xor_4 + ___sync_fetch_and_nand_4 + ___sync_add_and_fetch_4 + ___sync_sub_and_fetch_4 + ___sync_or_and_fetch_4 + ___sync_and_and_fetch_4 + ___sync_xor_and_fetch_4 + ___sync_nand_and_fetch_4 + ___sync_bool_compare_and_swap_4 + ___sync_val_compare_and_swap_4 + ___sync_lock_test_and_set_4 + + ___sync_fetch_and_add_8 + ___sync_fetch_and_sub_8 + ___sync_fetch_and_or_8 + ___sync_fetch_and_and_8 + ___sync_fetch_and_xor_8 + ___sync_fetch_and_nand_8 + ___sync_add_and_fetch_8 + ___sync_sub_and_fetch_8 + ___sync_or_and_fetch_8 + ___sync_and_and_fetch_8 + ___sync_xor_and_fetch_8 + ___sync_nand_and_fetch_8 + ___sync_bool_compare_and_swap_8 + ___sync_val_compare_and_swap_8 + ___sync_lock_test_and_set_8 + + ___sync_fetch_and_add_16 + ___sync_fetch_and_sub_16 + ___sync_fetch_and_or_16 + ___sync_fetch_and_and_16 + ___sync_fetch_and_xor_16 + ___sync_fetch_and_nand_16 + ___sync_add_and_fetch_16 + ___sync_sub_and_fetch_16 + ___sync_or_and_fetch_16 + ___sync_and_and_fetch_16 + ___sync_xor_and_fetch_16 + ___sync_nand_and_fetch_16 + ___sync_bool_compare_and_swap_16 + ___sync_val_compare_and_swap_16 + ___sync_lock_test_and_set_16 + + ___sync_synchronize +} + +%inherit GCC_4.5.0 GCC_4.4.0 +GCC_4.5.0 { + ___unordxf2 + ___unordtf2 +} diff --git a/gcc/config/bfin/linux-unwind.h b/gcc/config/bfin/linux-unwind.h new file mode 100644 index 000000000..15bb2f12b --- /dev/null +++ b/gcc/config/bfin/linux-unwind.h @@ -0,0 +1,164 @@ +/* DWARF2 EH unwinding support for Blackfin. + Copyright (C) 2007, 2009, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. + Don't use this at all if inhibit_libc is used. */ + +#ifndef inhibit_libc + +#include +#include + +#define MD_FALLBACK_FRAME_STATE_FOR bfin_fallback_frame_state + +static _Unwind_Reason_Code +bfin_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + + /* P0=__NR_rt_sigreturn (X); EXCPT 0x0; */ + if (*(unsigned short *)pc == 0xe128 + && *(unsigned short *)(pc + 2) == 0x00ad + && *(unsigned short *)(pc + 4) == 0x00a0) + { + struct rt_sigframe { + int sig; + siginfo_t *pinfo; + void *puc; + char retcode[8]; + siginfo_t info; + struct ucontext uc; + } *rt_ = context->cfa; + + /* The void * cast is necessary to avoid an aliasing warning. + The aliasing warning is correct, but should not be a problem + because it does not alias anything. */ + sc = (struct sigcontext *)(void *)&rt_->uc.uc_mcontext.gregs; + } + else + return _URC_END_OF_STACK; + + new_cfa = sc->sc_usp; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = 14; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + fs->regs.reg[0].how = REG_SAVED_OFFSET; + fs->regs.reg[0].loc.offset = (long)&sc->sc_r0 - new_cfa; + fs->regs.reg[1].how = REG_SAVED_OFFSET; + fs->regs.reg[1].loc.offset = (long)&sc->sc_r1 - new_cfa; + fs->regs.reg[2].how = REG_SAVED_OFFSET; + fs->regs.reg[2].loc.offset = (long)&sc->sc_r2 - new_cfa; + fs->regs.reg[3].how = REG_SAVED_OFFSET; + fs->regs.reg[3].loc.offset = (long)&sc->sc_r3 - new_cfa; + fs->regs.reg[4].how = REG_SAVED_OFFSET; + fs->regs.reg[4].loc.offset = (long)&sc->sc_r4 - new_cfa; + fs->regs.reg[5].how = REG_SAVED_OFFSET; + fs->regs.reg[5].loc.offset = (long)&sc->sc_r5 - new_cfa; + fs->regs.reg[6].how = REG_SAVED_OFFSET; + fs->regs.reg[6].loc.offset = (long)&sc->sc_r6 - new_cfa; + fs->regs.reg[7].how = REG_SAVED_OFFSET; + fs->regs.reg[7].loc.offset = (long)&sc->sc_r7 - new_cfa; + fs->regs.reg[8].how = REG_SAVED_OFFSET; + fs->regs.reg[8].loc.offset = (long)&sc->sc_p0 - new_cfa; + fs->regs.reg[9].how = REG_SAVED_OFFSET; + fs->regs.reg[9].loc.offset = (long)&sc->sc_p1 - new_cfa; + fs->regs.reg[10].how = REG_SAVED_OFFSET; + fs->regs.reg[10].loc.offset = (long)&sc->sc_p2 - new_cfa; + fs->regs.reg[11].how = REG_SAVED_OFFSET; + fs->regs.reg[11].loc.offset = (long)&sc->sc_p3 - new_cfa; + fs->regs.reg[12].how = REG_SAVED_OFFSET; + fs->regs.reg[12].loc.offset = (long)&sc->sc_p4 - new_cfa; + fs->regs.reg[13].how = REG_SAVED_OFFSET; + fs->regs.reg[13].loc.offset = (long)&sc->sc_p5 - new_cfa; + + fs->regs.reg[15].how = REG_SAVED_OFFSET; + fs->regs.reg[15].loc.offset = (long)&sc->sc_fp - new_cfa; + fs->regs.reg[16].how = REG_SAVED_OFFSET; + fs->regs.reg[16].loc.offset = (long)&sc->sc_i0 - new_cfa; + fs->regs.reg[17].how = REG_SAVED_OFFSET; + fs->regs.reg[17].loc.offset = (long)&sc->sc_i1 - new_cfa; + fs->regs.reg[18].how = REG_SAVED_OFFSET; + fs->regs.reg[18].loc.offset = (long)&sc->sc_i2 - new_cfa; + fs->regs.reg[19].how = REG_SAVED_OFFSET; + fs->regs.reg[19].loc.offset = (long)&sc->sc_i3 - new_cfa; + fs->regs.reg[20].how = REG_SAVED_OFFSET; + fs->regs.reg[20].loc.offset = (long)&sc->sc_b0 - new_cfa; + fs->regs.reg[21].how = REG_SAVED_OFFSET; + fs->regs.reg[21].loc.offset = (long)&sc->sc_b1 - new_cfa; + fs->regs.reg[22].how = REG_SAVED_OFFSET; + fs->regs.reg[22].loc.offset = (long)&sc->sc_b2 - new_cfa; + fs->regs.reg[23].how = REG_SAVED_OFFSET; + fs->regs.reg[23].loc.offset = (long)&sc->sc_b3 - new_cfa; + fs->regs.reg[24].how = REG_SAVED_OFFSET; + fs->regs.reg[24].loc.offset = (long)&sc->sc_l0 - new_cfa; + fs->regs.reg[25].how = REG_SAVED_OFFSET; + fs->regs.reg[25].loc.offset = (long)&sc->sc_l1 - new_cfa; + fs->regs.reg[26].how = REG_SAVED_OFFSET; + fs->regs.reg[26].loc.offset = (long)&sc->sc_l2 - new_cfa; + fs->regs.reg[27].how = REG_SAVED_OFFSET; + fs->regs.reg[27].loc.offset = (long)&sc->sc_l3 - new_cfa; + fs->regs.reg[28].how = REG_SAVED_OFFSET; + fs->regs.reg[28].loc.offset = (long)&sc->sc_m0 - new_cfa; + fs->regs.reg[29].how = REG_SAVED_OFFSET; + fs->regs.reg[29].loc.offset = (long)&sc->sc_m1 - new_cfa; + fs->regs.reg[30].how = REG_SAVED_OFFSET; + fs->regs.reg[30].loc.offset = (long)&sc->sc_m2 - new_cfa; + fs->regs.reg[31].how = REG_SAVED_OFFSET; + fs->regs.reg[31].loc.offset = (long)&sc->sc_m3 - new_cfa; + /* FIXME: Handle A0, A1, CC. */ + fs->regs.reg[35].how = REG_SAVED_OFFSET; + fs->regs.reg[35].loc.offset = (long)&sc->sc_rets - new_cfa; + fs->regs.reg[36].how = REG_SAVED_OFFSET; + fs->regs.reg[36].loc.offset = (long)&sc->sc_pc - new_cfa; + fs->regs.reg[37].how = REG_SAVED_OFFSET; + fs->regs.reg[37].loc.offset = (long)&sc->sc_retx - new_cfa; + + fs->regs.reg[40].how = REG_SAVED_OFFSET; + fs->regs.reg[40].loc.offset = (long)&sc->sc_astat - new_cfa; + fs->regs.reg[41].how = REG_SAVED_OFFSET; + fs->regs.reg[41].loc.offset = (long)&sc->sc_seqstat - new_cfa; + + fs->regs.reg[44].how = REG_SAVED_OFFSET; + fs->regs.reg[44].loc.offset = (long)&sc->sc_lt0 - new_cfa; + fs->regs.reg[45].how = REG_SAVED_OFFSET; + fs->regs.reg[45].loc.offset = (long)&sc->sc_lt1 - new_cfa; + fs->regs.reg[46].how = REG_SAVED_OFFSET; + fs->regs.reg[46].loc.offset = (long)&sc->sc_lc0 - new_cfa; + fs->regs.reg[47].how = REG_SAVED_OFFSET; + fs->regs.reg[47].loc.offset = (long)&sc->sc_lc1 - new_cfa; + fs->regs.reg[48].how = REG_SAVED_OFFSET; + fs->regs.reg[48].loc.offset = (long)&sc->sc_lb0 - new_cfa; + fs->regs.reg[49].how = REG_SAVED_OFFSET; + fs->regs.reg[49].loc.offset = (long)&sc->sc_lb1 - new_cfa; + fs->retaddr_column = 35; + + return _URC_NO_REASON; +} + +#endif /* ifdef inhibit_libc */ diff --git a/gcc/config/bfin/linux.h b/gcc/config/bfin/linux.h new file mode 100644 index 000000000..a75074592 --- /dev/null +++ b/gcc/config/bfin/linux.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef SUBTARGET_DRIVER_SELF_SPECS +#define SUBTARGET_DRIVER_SELF_SPECS \ + "%{!mno-fdpic:-mfdpic} -micplb", + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS() + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} crtreloc.o%s \ + crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}" + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %{mfast-fp:-lbffastfp} %G %L %{static:--end-group} \ + %{!static:%{mfast-fp:-lbffastfp} %G}" + +#undef LINK_SPEC +#define LINK_SPEC "\ + %{mfdpic: -m elf32bfinfd -z text} %{shared} %{pie} \ + %{static:-dn -Bstatic} \ + %{shared:-G -Bdynamic} \ + %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker /lib/ld-uClibc.so.0} \ + %{static}} -init __init -fini __fini" + +#define MD_UNWIND_SUPPORT "config/bfin/linux-unwind.h" + +#undef TARGET_SUPPORTS_SYNC_CALLS +#define TARGET_SUPPORTS_SYNC_CALLS 1 diff --git a/gcc/config/bfin/predicates.md b/gcc/config/bfin/predicates.md new file mode 100644 index 000000000..84bf59195 --- /dev/null +++ b/gcc/config/bfin/predicates.md @@ -0,0 +1,241 @@ +;; Predicate definitions for the Blackfin. +;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +;; Contributed by Analog Devices. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Return nonzero iff OP is one of the integer constants 1 or 2. +(define_predicate "pos_scale_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 1 || INTVAL (op) == 2"))) + +;; Return nonzero iff OP is one of the integer constants 2 or 4. +(define_predicate "scale_by_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4"))) + +;; Return nonzero if OP is a constant that consists of two parts; lower +;; bits all zero and upper bits all ones. In this case, we can perform +;; an AND operation with a sequence of two shifts. Don't return nonzero +;; if the constant would be cheap to load. +(define_predicate "highbits_operand" + (and (match_code "const_int") + (match_test "log2constp (-INTVAL (op)) && !satisfies_constraint_Ks7 (op)"))) + +;; Return nonzero if OP is suitable as a right-hand side operand for an +;; andsi3 operation. +(define_predicate "rhs_andsi3_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "log2constp (~INTVAL (op)) || INTVAL (op) == 255 || INTVAL (op) == 65535")))) + +;; Return nonzero if OP is a register or a constant with exactly one bit +;; set. +(define_predicate "regorlog2_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "log2constp (INTVAL (op))")))) + +;; Return nonzero if OP is a register or an integer constant. +(define_predicate "reg_or_const_int_operand" + (ior (match_operand 0 "register_operand") + (match_code "const_int"))) + +(define_predicate "const01_operand" + (and (match_code "const_int") + (match_test "op == const0_rtx || op == const1_rtx"))) + +(define_predicate "const1_operand" + (and (match_code "const_int") + (match_test "op == const1_rtx"))) + +(define_predicate "const3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 3"))) + +(define_predicate "vec_shift_operand" + (ior (and (match_code "const_int") + (match_test "INTVAL (op) >= -16 && INTVAL (op) < 15")) + (match_operand 0 "register_operand"))) + +;; Like register_operand, but make sure that hard regs have a valid mode. +(define_predicate "valid_reg_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (REGNO (op) < FIRST_PSEUDO_REGISTER) + return HARD_REGNO_MODE_OK (REGNO (op), mode); + return 1; +}) + +;; Return nonzero if OP is a D register. +(define_predicate "d_register_operand" + (and (match_code "reg") + (match_test "D_REGNO_P (REGNO (op))"))) + +(define_predicate "p_register_operand" + (and (match_code "reg") + (match_test "P_REGNO_P (REGNO (op))"))) + +(define_predicate "dp_register_operand" + (and (match_code "reg") + (match_test "D_REGNO_P (REGNO (op)) || P_REGNO_P (REGNO (op))"))) + +;; Return nonzero if OP is a LC register. +(define_predicate "lc_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_LC0 || REGNO (op) == REG_LC1"))) + +;; Return nonzero if OP is a LT register. +(define_predicate "lt_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_LT0 || REGNO (op) == REG_LT1"))) + +;; Return nonzero if OP is a LB register. +(define_predicate "lb_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_LB0 || REGNO (op) == REG_LB1"))) + +;; Return nonzero if OP is a register or a 7-bit signed constant. +(define_predicate "reg_or_7bit_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "satisfies_constraint_Ks7 (op)")))) + +;; Return nonzero if OP is a register other than DREG and PREG. +(define_predicate "nondp_register_operand" + (match_operand 0 "register_operand") +{ + unsigned int regno; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + regno = REGNO (op); + return (regno >= FIRST_PSEUDO_REGISTER || !DP_REGNO_P (regno)); +}) + +;; Return nonzero if OP is a register other than DREG and PREG, or MEM. +(define_predicate "nondp_reg_or_memory_operand" + (ior (match_operand 0 "nondp_register_operand") + (match_operand 0 "memory_operand"))) + +;; Return nonzero if OP is a register or, when negated, a 7-bit signed +;; constant. +(define_predicate "reg_or_neg7bit_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "satisfies_constraint_KN7 (op)")))) + +;; Used for secondary reloads, this function returns 1 if OP is of the +;; form (plus (fp) (const_int)). +(define_predicate "fp_plus_const_operand" + (match_code "plus") +{ + rtx op1, op2; + + op1 = XEXP (op, 0); + op2 = XEXP (op, 1); + return (REG_P (op1) + && (REGNO (op1) == FRAME_POINTER_REGNUM + || REGNO (op1) == STACK_POINTER_REGNUM) + && GET_CODE (op2) == CONST_INT); +}) + +;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref, +;; possibly with an offset. +(define_predicate "symbolic_operand" + (ior (match_code "symbol_ref,label_ref") + (and (match_code "const") + (match_test "GET_CODE (XEXP (op,0)) == PLUS + && (GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF) + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT")))) + +;; Returns 1 if OP is a plain constant or matched by symbolic_operand. +(define_predicate "symbolic_or_const_operand" + (ior (match_code "const_int,const_double") + (match_operand 0 "symbolic_operand"))) + +;; Returns 1 if OP is a SYMBOL_REF. +(define_predicate "symbol_ref_operand" + (match_code "symbol_ref")) + +;; True for any non-virtual or eliminable register. Used in places where +;; instantiation of such a register may cause the pattern to not be recognized. +(define_predicate "register_no_elim_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return !(op == arg_pointer_rtx + || op == frame_pointer_rtx + || (REGNO (op) >= FIRST_PSEUDO_REGISTER + && REGNO (op) <= LAST_VIRTUAL_REGISTER)); +}) + +;; Test for an operator valid in a BImode conditional branch +(define_predicate "bfin_bimode_comparison_operator" + (match_code "eq,ne")) + +;; Test for an operator whose result is accessible with movbisi. +(define_predicate "bfin_direct_comparison_operator" + (match_code "eq,lt,le,leu,ltu")) + +;; The following three are used to compute the addrtype attribute. They return +;; true if passed a memory address usable for a 16-bit load or store using a +;; P or I register, respectively. If neither matches, we know we have a +;; 32-bit instruction. +;; We subdivide the P case into normal P registers, and SP/FP. We can assume +;; that speculative loads through SP and FP are no problem, so this has +;; an effect on the anomaly workaround code. + +(define_predicate "mem_p_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx; +}) + +(define_predicate "mem_spfp_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return op == stack_pointer_rtx || op == frame_pointer_rtx; +}) + +(define_predicate "mem_i_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return IREG_P (op); +}) diff --git a/gcc/config/bfin/print-sysroot-suffix.sh b/gcc/config/bfin/print-sysroot-suffix.sh new file mode 100644 index 000000000..c33ff47c3 --- /dev/null +++ b/gcc/config/bfin/print-sysroot-suffix.sh @@ -0,0 +1,81 @@ +#!/bin/sh +# Copyright (C) 2007 Free Software Foundation, Inc. +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# This script takes the following arguments: +# +# - the target sysroot +# - the value of $(MULTILIB_MATCHES) +# - the value of $(MULTILIB_OPTIONS) +# +# It uses these arguments to construct a definition of SYSROOT_SUFFIX_SPEC, +# which it prints to the standard output. For each multilib directory FOO, +# the script checks whether $sysroot has a subdirectory FOO, and if so will +# use /FOO for all compatible command-line options. It will not add a +# suffix for /FOO's options otherwise. These suffixes are concatenated, +# with one subspec for each space-separated entry in $(MULTILIB_OPTIONS). +set -e +sysroot=$1 +matches=$2 +options=$3 + +# For each multilib option OPT, add to $substs a sed command of the +# form "-e 's/OPT/OPT/'". +substs="" +for option in `echo "$options" | tr '/' ' '` +do + substs="$substs -e 's/$option/$option/g'" +done + +# For each ALIAS=CANONICAL entry in $MULTILIB_MATCHES, look for sed +# arguments in $substs of the form "-e 's/CANONICAL/.../'". Replace +# such entries with "-e 's/CANONICAL/ALIAS|.../'". Both the ALIAS and +# CANONICAL parts of $MULTILIB_MATCHES use '?' to stand for '='. +# +# After this loop, a command of the form "echo FOO | eval sed $substs" +# will replace a canonical option FOO with a %{...}-style spec pattern. +for match in $matches +do + canonical=`echo "$match" | sed -e 's/=.*//' -e 's/?/=/g'` + alias=`echo "$match" | sed -e 's/.*=//' -e 's/?/=/g'` + substs=`echo "$substs" | sed -e "s,s/$canonical/,&$alias|,"` +done + +# Build up the final SYSROOT_SUFFIX_SPEC in $spec. +spec= +for combo in $options +do + # See which option alternatives in $combo have their own sysroot + # directory. Create a subspec of the form "%{PAT1:/DIR1;...;PATn:DIRn}" + # from each such option OPTi, where DIRi is the directory associated + # with OPTi and PATi is the result of passing OPTi through $substs. + subspec= + for option in `echo "$combo" | tr '/' ' '` + do + dir=`echo "$option" | sed 's/mcpu=//'` + if test -d "$sysroot/$dir"; then + test -z "$subspec" || subspec="$subspec;" + subspec="$subspec"`echo "$option" | eval sed $substs`":/$dir" + fi + done + # Concatenate all the subspecs. + test -z "$subspec" || spec="$spec%{$subspec}" +done +if test -n "$spec"; then + echo "#undef SYSROOT_SUFFIX_SPEC" + echo "#define SYSROOT_SUFFIX_SPEC \"$spec\"" +fi diff --git a/gcc/config/bfin/rtems.h b/gcc/config/bfin/rtems.h new file mode 100644 index 000000000..6fa6ef10e --- /dev/null +++ b/gcc/config/bfin/rtems.h @@ -0,0 +1,28 @@ +/* Definitions for rtems targeting a bfin + Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Contributed by Ralf Corsépius (ralf.corsepius@rtems.org). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Target OS preprocessor built-ins. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__rtems__"); \ + builtin_assert ("system=rtems"); \ + } \ + while (0) diff --git a/gcc/config/bfin/sync.md b/gcc/config/bfin/sync.md new file mode 100644 index 000000000..7025af497 --- /dev/null +++ b/gcc/config/bfin/sync.md @@ -0,0 +1,178 @@ +;; GCC machine description for Blackfin synchronization instructions. +;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. +;; Contributed by Analog Devices. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_code_iterator FETCHOP [plus minus ior and xor]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "ior") (and "and") (xor "xor")]) +(define_code_attr fetchop_addr + [(plus "1072") (minus "1088") (ior "1104") (and "1120") (xor "1136")]) + +(define_insn "sync_si_internal" + [(set (mem:SI (match_operand:SI 0 "register_operand" "qA")) + (unspec:SI + [(FETCHOP:SI (mem:SI (match_dup 0)) + (match_operand:SI 1 "register_operand" "q0")) + (match_operand:SI 2 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "=q0")) + (clobber (match_scratch:SI 4 "=q1")) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%2);" + [(set_attr "type" "call")]) + +(define_expand "sync_si" + [(parallel + [(set (match_operand:SI 0 "memory_operand" "+m") + (unspec:SI + [(FETCHOP:SI (match_dup 0) + (match_operand:SI 1 "register_operand" "q0")) + (match_dup 2)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[0], 0))) + { + operands[0] = shallow_copy_rtx (operands[0]); + XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0)); + } + operands[2] = force_reg (Pmode, GEN_INT ()); +}) + +(define_insn "sync_old_si_internal" + [(set (match_operand:SI 0 "register_operand" "=q1") + (mem:SI (match_operand:SI 1 "register_operand" "qA"))) + (set (mem:SI (match_dup 1)) + (unspec:SI + [(FETCHOP:SI (mem:SI (match_dup 1)) + (match_operand:SI 2 "register_operand" "q0")) + (match_operand:SI 3 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "=q0")) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%3);" + [(set_attr "type" "call")]) + +(define_expand "sync_old_si" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) + (match_operand:SI 2 "register_operand" "")) + (match_dup 3)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + operands[1] = shallow_copy_rtx (operands[1]); + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + } + operands[3] = force_reg (Pmode, GEN_INT ()); +}) + +(define_insn "sync_new_si_internal" + [(set (match_operand:SI 0 "register_operand" "=q0") + (unspec:SI + [(FETCHOP:SI + (mem:SI (match_operand:SI 1 "register_operand" "qA")) + (match_operand:SI 2 "register_operand" "q0")) + (match_operand:SI 3 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (set (mem:SI (match_dup 1)) + (unspec:SI + [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2)) + (match_dup 3)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "=q1")) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%3);" + [(set_attr "type" "call")]) + +(define_expand "sync_new_si" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(FETCHOP:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "register_operand" "")) + (match_dup 3)] + UNSPEC_ATOMIC)) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) (match_dup 2)) + (match_dup 3)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + operands[1] = shallow_copy_rtx (operands[1]); + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + } + operands[3] = force_reg (Pmode, GEN_INT ()); +}) + +(define_insn "sync_compare_and_swapsi_internal" + [(set (match_operand:SI 0 "register_operand" "=q0") + (mem:SI (match_operand:SI 1 "register_operand" "qA"))) + (set (mem:SI (match_dup 1)) + (unspec:SI + [(mem:SI (match_dup 1)) + (match_operand:SI 2 "register_operand" "q1") + (match_operand:SI 3 "register_operand" "q2") + (match_operand:SI 4 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%4);" + [(set_attr "type" "call")]) + +(define_expand "sync_compare_and_swapsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:SI + [(match_dup 1) + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "") + (match_dup 4)] + UNSPEC_ATOMIC)) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + operands[1] = shallow_copy_rtx (operands[1]); + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + } + operands[4] = force_reg (Pmode, GEN_INT (0x420)); +}) diff --git a/gcc/config/bfin/t-bfin b/gcc/config/bfin/t-bfin new file mode 100644 index 000000000..37b6871c1 --- /dev/null +++ b/gcc/config/bfin/t-bfin @@ -0,0 +1,43 @@ +# Copyright (C) 2005, 2007 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +LIB1ASMSRC = bfin/lib1funcs.asm +LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 + +EXTRA_PARTS = crtbegin.o crtend.o crti.o crtn.o + +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +# Assemble startup files. +$(T)crti.o: $(srcdir)/config/bfin/crti.s $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/bfin/crti.s + +$(T)crtn.o: $(srcdir)/config/bfin/crtn.s $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/bfin/crtn.s diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf new file mode 100644 index 000000000..39209f628 --- /dev/null +++ b/gcc/config/bfin/t-bfin-elf @@ -0,0 +1,81 @@ +# Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +LIB1ASMSRC = bfin/lib1funcs.asm +LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _umulsi3_highpart +LIB1ASMFUNCS += _smulsi3_highpart + +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +CRTSTUFF_T_CFLAGS = -fpic +TARGET_LIBGCC2_CFLAGS = -fpic + +MULTILIB_OPTIONS=mcpu=bf532-none +MULTILIB_OPTIONS+=mid-shared-library/msep-data/mfdpic mleaf-id-shared-library +MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mfdpic mleaf-id-shared-library + +MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none + +MULTILIB_EXCEPTIONS=mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=*mfdpic/mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library* + +# Assemble startup files. +$(T)crti.o: $(srcdir)/config/bfin/crti.s $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/bfin/crti.s + +$(T)crtn.o: $(srcdir)/config/bfin/crtn.s $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/bfin/crtn.s + +$(T)crtlibid.o: $(srcdir)/config/bfin/crtlibid.s $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtlibid.o -x assembler-with-cpp \ + $(srcdir)/config/bfin/crtlibid.s + +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crti.o crtn.o crtlibid.o diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux new file mode 100644 index 000000000..f7ba95501 --- /dev/null +++ b/gcc/config/bfin/t-bfin-linux @@ -0,0 +1,72 @@ +# Copyright (C) 2007, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +LIB1ASMSRC = bfin/lib1funcs.asm +LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _umulsi3_highpart +LIB1ASMFUNCS += _smulsi3_highpart + +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +CRTSTUFF_T_CFLAGS = -fpic +TARGET_LIBGCC2_CFLAGS = -fpic + +MULTILIB_OPTIONS=mcpu=bf532-none +MULTILIB_DIRNAMES=bf532-none + +MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none + +SHLIB_MAPFILES=$(srcdir)/config/bfin/libgcc-bfin.ver + +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o + +# This rule uses MULTILIB_MATCHES to generate a definition of +# SYSROOT_SUFFIX_SPEC. +linux-sysroot-suffix.h: $(srcdir)/config/bfin/print-sysroot-suffix.sh + $(SHELL) $(srcdir)/config/bfin/print-sysroot-suffix.sh \ + "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \ + "$(MULTILIB_OPTIONS)" > $@ + +generated_files += linux-sysroot-suffix.h diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux new file mode 100644 index 000000000..eb6d2253e --- /dev/null +++ b/gcc/config/bfin/t-bfin-uclinux @@ -0,0 +1,72 @@ +# Copyright (C) 2007, 2008 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +LIB1ASMSRC = bfin/lib1funcs.asm +LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _umulsi3_highpart +LIB1ASMFUNCS += _smulsi3_highpart + +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +CRTSTUFF_T_CFLAGS = -fpic +TARGET_LIBGCC2_CFLAGS = -fpic + +MULTILIB_OPTIONS=mcpu=bf532-none +MULTILIB_OPTIONS+=mid-shared-library/msep-data mleaf-id-shared-library +MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mleaf-id-shared-library + +MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none + +MULTILIB_EXCEPTIONS=mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library* + +# Assemble startup files. +$(T)crtlibid.o: $(srcdir)/config/bfin/crtlibid.s $(GCC_PASSES) + $(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \ + -c -o $(T)crtlibid.o -x assembler-with-cpp \ + $(srcdir)/config/bfin/crtlibid.s + +EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtlibid.o diff --git a/gcc/config/bfin/t-rtems b/gcc/config/bfin/t-rtems new file mode 100644 index 000000000..728ab1c4f --- /dev/null +++ b/gcc/config/bfin/t-rtems @@ -0,0 +1,6 @@ +# Multilibs for fbin RTEMS targets. + +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = +MULTILIB_EXTRA_OPTS = +MULTILIB_EXCEPTIONS = diff --git a/gcc/config/bfin/uclinux.h b/gcc/config/bfin/uclinux.h new file mode 100644 index 000000000..6001b2364 --- /dev/null +++ b/gcc/config/bfin/uclinux.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: crt1%O%s} crti%O%s crtbegin%O%s crtlibid%O%s" + +#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS() + +#define MD_UNWIND_SUPPORT "config/bfin/linux-unwind.h" + +/* Like the definition in gcc.c, but for purposes of uClinux, every link is + static. */ +#define MFWRAP_SPEC " %{fmudflap|fmudflapth: \ + --wrap=malloc --wrap=free --wrap=calloc --wrap=realloc\ + --wrap=mmap --wrap=munmap --wrap=alloca\ + %{fmudflapth: --wrap=pthread_create\ +}} %{fmudflap|fmudflapth: --wrap=main}" + +#undef TARGET_SUPPORTS_SYNC_CALLS +#define TARGET_SUPPORTS_SYNC_CALLS 1 diff --git a/gcc/config/cris/arit.c b/gcc/config/cris/arit.c new file mode 100644 index 000000000..32255f99d --- /dev/null +++ b/gcc/config/cris/arit.c @@ -0,0 +1,304 @@ +/* Signed and unsigned multiplication and division and modulus for CRIS. + Contributed by Axis Communications. + Written by Hans-Peter Nilsson , c:a 1992. + + Copyright (C) 1998, 1999, 2000, 2001, 2002, + 2005, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* Note that we provide prototypes for all "const" functions, to attach + the const attribute. This is necessary in 2.7.2 - adding the + attribute to the function *definition* is a syntax error. + This did not work with e.g. 2.1; back then, the return type had to + be "const". */ + +#include "config.h" + +#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3 +#define LZ(v) __builtin_clz (v) +#endif + + +#if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \ + || defined (L_modsi3) +/* Result type of divmod worker function. */ +struct quot_rem + { + long quot; + long rem; + }; + +/* This is the worker function for div and mod. It is inlined into the + respective library function. Parameter A must have bit 31 == 0. */ + +static __inline__ struct quot_rem +do_31div (unsigned long a, unsigned long b) + __attribute__ ((__const__, __always_inline__)); + +static __inline__ struct quot_rem +do_31div (unsigned long a, unsigned long b) +{ + /* Adjust operands and result if a is 31 bits. */ + long extra = 0; + int quot_digits = 0; + + if (b == 0) + { + struct quot_rem ret; + ret.quot = 0xffffffff; + ret.rem = 0xffffffff; + return ret; + } + + if (a < b) + return (struct quot_rem) { 0, a }; + +#ifdef LZ + if (b <= a) + { + quot_digits = LZ (b) - LZ (a); + quot_digits += (a >= (b << quot_digits)); + b <<= quot_digits; + } +#else + while (b <= a) + { + b <<= 1; + quot_digits++; + } +#endif + + /* Is a 31 bits? Note that bit 31 is handled by the caller. */ + if (a & 0x40000000) + { + /* Then make b:s highest bit max 0x40000000, because it must have + been 0x80000000 to be 1 bit higher than a. */ + b >>= 1; + + /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero. */ + if (a >= b) + { + a -= b; + extra = 1 << (quot_digits - 1); + } + else + { + a -= b >> 1; + + /* Remember that we adjusted a by subtracting b * 2 ** Something. */ + extra = 1 << quot_digits; + } + + /* The number of quotient digits will be one less, because + we just adjusted b. */ + quot_digits--; + } + + /* Now do the division part. */ + + /* Subtract b and add ones to the right when a >= b + i.e. "a - (b - 1) == (a - b) + 1". */ + b--; + +#define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b)) + + switch (quot_digits) + { + case 32: DS; case 31: DS; case 30: DS; case 29: DS; + case 28: DS; case 27: DS; case 26: DS; case 25: DS; + case 24: DS; case 23: DS; case 22: DS; case 21: DS; + case 20: DS; case 19: DS; case 18: DS; case 17: DS; + case 16: DS; case 15: DS; case 14: DS; case 13: DS; + case 12: DS; case 11: DS; case 10: DS; case 9: DS; + case 8: DS; case 7: DS; case 6: DS; case 5: DS; + case 4: DS; case 3: DS; case 2: DS; case 1: DS; + case 0:; + } + + { + struct quot_rem ret; + ret.quot = (a & ((1 << quot_digits) - 1)) + extra; + ret.rem = a >> quot_digits; + return ret; + } +} + +#ifdef L_udivsi3 +unsigned long +__Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__)); + +unsigned long +__Udiv (unsigned long a, unsigned long b) +{ + long extra = 0; + + /* Adjust operands and result, if a and/or b is 32 bits. */ + /* Effectively: b & 0x80000000. */ + if ((long) b < 0) + return a >= b; + + /* Effectively: a & 0x80000000. */ + if ((long) a < 0) + { + int tmp = 0; + + if (b == 0) + return 0xffffffff; +#ifdef LZ + tmp = LZ (b); +#else + for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--) + ; + + tmp = 31 - tmp; +#endif + + if ((b << tmp) > a) + { + extra = 1 << (tmp-1); + a -= b << (tmp - 1); + } + else + { + extra = 1 << tmp; + a -= b << tmp; + } + } + + return do_31div (a, b).quot+extra; +} +#endif /* L_udivsi3 */ + +#ifdef L_divsi3 +long +__Div (long a, long b) __attribute__ ((__const__)); + +long +__Div (long a, long b) +{ + long extra = 0; + long sign = (b < 0) ? -1 : 1; + + /* We need to handle a == -2147483648 as expected and must while + doing that avoid producing a sequence like "abs (a) < 0" as GCC + may optimize out the test. That sequence may not be obvious as + we call inline functions. Testing for a being negative and + handling (presumably much rarer than positive) enables us to get + a bit of optimization for an (accumulated) reduction of the + penalty of the 0x80000000 special-case. */ + if (a < 0) + { + sign = -sign; + + if ((a & 0x7fffffff) == 0) + { + /* We're at 0x80000000. Tread carefully. */ + a -= b * sign; + extra = sign; + } + a = -a; + } + + /* We knowingly penalize pre-v10 models by multiplication with the + sign. */ + return sign * do_31div (a, __builtin_labs (b)).quot + extra; +} +#endif /* L_divsi3 */ + + +#ifdef L_umodsi3 +unsigned long +__Umod (unsigned long a, unsigned long b) __attribute__ ((__const__)); + +unsigned long +__Umod (unsigned long a, unsigned long b) +{ + /* Adjust operands and result if a and/or b is 32 bits. */ + if ((long) b < 0) + return a >= b ? a - b : a; + + if ((long) a < 0) + { + int tmp = 0; + + if (b == 0) + return a; +#ifdef LZ + tmp = LZ (b); +#else + for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--) + ; + tmp = 31 - tmp; +#endif + + if ((b << tmp) > a) + { + a -= b << (tmp - 1); + } + else + { + a -= b << tmp; + } + } + + return do_31div (a, b).rem; +} +#endif /* L_umodsi3 */ + +#ifdef L_modsi3 +long +__Mod (long a, long b) __attribute__ ((__const__)); + +long +__Mod (long a, long b) +{ + long sign = 1; + + /* We need to handle a == -2147483648 as expected and must while + doing that avoid producing a sequence like "abs (a) < 0" as GCC + may optimize out the test. That sequence may not be obvious as + we call inline functions. Testing for a being negative and + handling (presumably much rarer than positive) enables us to get + a bit of optimization for an (accumulated) reduction of the + penalty of the 0x80000000 special-case. */ + if (a < 0) + { + sign = -1; + if ((a & 0x7fffffff) == 0) + /* We're at 0x80000000. Tread carefully. */ + a += __builtin_labs (b); + a = -a; + } + + return sign * do_31div (a, __builtin_labs (b)).rem; +} +#endif /* L_modsi3 */ +#endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */ + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h new file mode 100644 index 000000000..9718cf9a9 --- /dev/null +++ b/gcc/config/cris/cris-protos.h @@ -0,0 +1,68 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 1998, 1999, 2000, 2001, 2004, 2005, 2006, 2007, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Axis Communications. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Prototypes for the CRIS port. */ + +#if defined(FILE) || defined(stdin) || defined(stdout) || defined(getc) || defined(putc) +#define STDIO_INCLUDED +#endif + +extern bool cris_simple_epilogue (void); +#ifdef RTX_CODE +extern const char *cris_op_str (rtx); +extern void cris_notice_update_cc (rtx, rtx); +extern bool cris_reload_address_legitimized (rtx, enum machine_mode, int, int, int); +extern int cris_side_effect_mode_ok (enum rtx_code, rtx *, int, int, + int, int, int); +extern bool cris_cc0_user_requires_cmp (rtx); +extern rtx cris_return_addr_rtx (int, rtx); +extern rtx cris_split_movdx (rtx *); +extern int cris_legitimate_pic_operand (rtx); +extern enum cris_pic_symbol_type cris_pic_symbol_type_of (rtx); +extern bool cris_valid_pic_const (rtx, bool); +extern bool cris_store_multiple_op_p (rtx); +extern bool cris_movem_load_rest_p (rtx, int); +extern void cris_asm_output_symbol_ref (FILE *, rtx); +extern bool cris_output_addr_const_extra (FILE *, rtx); +extern int cris_cfun_uses_pic_table (void); +extern void cris_asm_output_case_end (FILE *, int, rtx); +extern rtx cris_gen_movem_load (rtx, rtx, int); +extern rtx cris_emit_movem_store (rtx, rtx, int, bool); +extern void cris_expand_pic_call_address (rtx *); +extern void cris_order_for_addsi3 (rtx *, int); +#endif /* RTX_CODE */ +extern void cris_asm_output_label_ref (FILE *, char *); +extern void cris_target_asm_named_section (const char *, unsigned int, tree); +extern void cris_expand_prologue (void); +extern void cris_expand_epilogue (void); +extern void cris_expand_return (bool); +extern bool cris_return_address_on_stack_for_return (void); +extern bool cris_return_address_on_stack (void); +extern void cris_pragma_expand_mul (struct cpp_reader *); + +/* Need one that returns an int; usable in expressions. */ +extern int cris_fatal (char *); + +extern int cris_initial_elimination_offset (int, int); + +extern void cris_init_expanders (void); + +extern bool cris_function_value_regno_p (const unsigned int); diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c new file mode 100644 index 000000000..aec7cba63 --- /dev/null +++ b/gcc/config/cris/cris.c @@ -0,0 +1,4132 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, + 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by Axis Communications. Written by Hans-Peter Nilsson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "expr.h" +#include "except.h" +#include "function.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "reload.h" +#include "tm_p.h" +#include "debug.h" +#include "output.h" +#include "target.h" +#include "target-def.h" +#include "ggc.h" +#include "optabs.h" +#include "df.h" + +/* Usable when we have an amount to add or subtract, and want the + optimal size of the insn. */ +#define ADDITIVE_SIZE_MODIFIER(size) \ + ((size) <= 63 ? "q" : (size) <= 255 ? "u.b" : (size) <= 65535 ? "u.w" : ".d") + +#define LOSE_AND_RETURN(msgid, x) \ + do \ + { \ + cris_operand_lossage (msgid, x); \ + return; \ + } while (0) + +enum cris_retinsn_type + { CRIS_RETINSN_UNKNOWN = 0, CRIS_RETINSN_RET, CRIS_RETINSN_JUMP }; + +/* Per-function machine data. */ +struct GTY(()) machine_function + { + int needs_return_address_on_stack; + + /* This is the number of registers we save in the prologue due to + stdarg. */ + int stdarg_regs; + + enum cris_retinsn_type return_type; + }; + +/* This little fix suppresses the 'u' or 's' when '%e' in assembly + pattern. */ +static char cris_output_insn_is_bound = 0; + +/* In code for output macros, this is how we know whether e.g. constant + goes in code or in a static initializer. */ +static int in_code = 0; + +/* Fix for reg_overlap_mentioned_p. */ +static int cris_reg_overlap_mentioned_p (rtx, rtx); + +static enum machine_mode cris_promote_function_mode (const_tree, enum machine_mode, + int *, const_tree, int); + +static void cris_print_base (rtx, FILE *); + +static void cris_print_index (rtx, FILE *); + +static void cris_output_addr_const (FILE *, rtx); + +static struct machine_function * cris_init_machine_status (void); + +static rtx cris_struct_value_rtx (tree, int); + +static void cris_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, + tree type, int *, int); + +static int cris_initial_frame_pointer_offset (void); + +static void cris_operand_lossage (const char *, rtx); + +static int cris_reg_saved_in_regsave_area (unsigned int, bool); + +static void cris_print_operand (FILE *, rtx, int); + +static void cris_print_operand_address (FILE *, rtx); + +static bool cris_print_operand_punct_valid_p (unsigned char code); + +static void cris_conditional_register_usage (void); + +static void cris_asm_output_mi_thunk + (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); + +static void cris_file_start (void); +static void cris_init_libfuncs (void); + +static int cris_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); +static int cris_memory_move_cost (enum machine_mode, reg_class_t, bool); +static bool cris_rtx_costs (rtx, int, int, int *, bool); +static int cris_address_cost (rtx, bool); +static bool cris_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static int cris_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static rtx cris_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static rtx cris_function_incoming_arg (CUMULATIVE_ARGS *, + enum machine_mode, const_tree, bool); +static void cris_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static tree cris_md_asm_clobbers (tree, tree, tree); + +static bool cris_handle_option (size_t, const char *, int); +static void cris_option_override (void); + +static bool cris_frame_pointer_required (void); + +static void cris_asm_trampoline_template (FILE *); +static void cris_trampoline_init (rtx, tree, rtx); + +static rtx cris_function_value(const_tree, const_tree, bool); +static rtx cris_libcall_value (enum machine_mode, const_rtx); + +/* This is the parsed result of the "-max-stack-stackframe=" option. If + it (still) is zero, then there was no such option given. */ +int cris_max_stackframe = 0; + +/* This is the parsed result of the "-march=" option, if given. */ +int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION; + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ + +static const struct default_options cris_option_optimization_table[] = + { + { OPT_LEVELS_2_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.dword\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" + +/* We need to define these, since the 2byte, 4byte, 8byte op:s are only + available in ELF. These "normal" pseudos do not have any alignment + constraints or side-effects. */ +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP + +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP + +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND cris_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS cris_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P cris_print_operand_punct_valid_p + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE cris_conditional_register_usage + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK cris_asm_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START cris_file_start + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS cris_init_libfuncs + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST cris_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST cris_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS cris_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST cris_address_cost + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE cris_promote_function_mode + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX cris_struct_value_rtx +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS cris_setup_incoming_varargs +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE cris_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES cris_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG cris_function_arg +#undef TARGET_FUNCTION_INCOMING_ARG +#define TARGET_FUNCTION_INCOMING_ARG cris_function_incoming_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE cris_function_arg_advance +#undef TARGET_MD_ASM_CLOBBERS +#define TARGET_MD_ASM_CLOBBERS cris_md_asm_clobbers +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | CRIS_SUBTARGET_DEFAULT) +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION cris_handle_option +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED cris_frame_pointer_required + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE cris_option_override +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE cris_option_optimization_table + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE cris_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT cris_trampoline_init + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE cris_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE cris_libcall_value + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Helper for cris_load_multiple_op and cris_ret_movem_op. */ + +bool +cris_movem_load_rest_p (rtx op, int offs) +{ + unsigned int reg_count = XVECLEN (op, 0) - offs; + rtx src_addr; + int i; + rtx elt; + int setno; + int regno_dir = 1; + unsigned int regno = 0; + + /* Perform a quick check so we don't blow up below. FIXME: Adjust for + other than (MEM reg). */ + if (reg_count <= 1 + || GET_CODE (XVECEXP (op, 0, offs)) != SET + || !REG_P (SET_DEST (XVECEXP (op, 0, offs))) + || !MEM_P (SET_SRC (XVECEXP (op, 0, offs)))) + return false; + + /* Check a possible post-inc indicator. */ + if (GET_CODE (SET_SRC (XVECEXP (op, 0, offs + 1))) == PLUS) + { + rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 0); + rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 1); + + reg_count--; + + if (reg_count == 1 + || !REG_P (reg) + || !REG_P (SET_DEST (XVECEXP (op, 0, offs + 1))) + || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, offs + 1))) + || !CONST_INT_P (inc) + || INTVAL (inc) != (HOST_WIDE_INT) reg_count * 4) + return false; + i = offs + 2; + } + else + i = offs + 1; + + if (!TARGET_V32) + { + regno_dir = -1; + regno = reg_count - 1; + } + + elt = XVECEXP (op, 0, offs); + src_addr = XEXP (SET_SRC (elt), 0); + + if (GET_CODE (elt) != SET + || !REG_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) != regno + || !MEM_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || !memory_address_p (SImode, src_addr)) + return false; + + for (setno = 1; i < XVECLEN (op, 0); setno++, i++) + { + rtx elt = XVECEXP (op, 0, i); + regno += regno_dir; + + if (GET_CODE (elt) != SET + || !REG_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) != regno + || !MEM_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS + || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr) + || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1)) + || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != setno * 4) + return false; + } + + return true; +} + +/* Worker function for predicate for the parallel contents in a movem + to-memory. */ + +bool +cris_store_multiple_op_p (rtx op) +{ + int reg_count = XVECLEN (op, 0); + rtx dest; + rtx dest_addr; + rtx dest_base; + int i; + rtx elt; + int setno; + int regno_dir = 1; + int regno = 0; + int offset = 0; + + /* Perform a quick check so we don't blow up below. FIXME: Adjust for + other than (MEM reg) and (MEM (PLUS reg const)). */ + if (reg_count <= 1) + return false; + + elt = XVECEXP (op, 0, 0); + + if (GET_CODE (elt) != SET) + return false; + + dest = SET_DEST (elt); + + if (!REG_P (SET_SRC (elt)) || !MEM_P (dest)) + return false; + + dest_addr = XEXP (dest, 0); + + /* Check a possible post-inc indicator. */ + if (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS) + { + rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 0); + rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1); + + reg_count--; + + if (reg_count == 1 + || !REG_P (reg) + || !REG_P (SET_DEST (XVECEXP (op, 0, 1))) + || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, 1))) + || !CONST_INT_P (inc) + /* Support increment by number of registers, and by the offset + of the destination, if it has the form (MEM (PLUS reg + offset)). */ + || !((REG_P (dest_addr) + && REGNO (dest_addr) == REGNO (reg) + && INTVAL (inc) == (HOST_WIDE_INT) reg_count * 4) + || (GET_CODE (dest_addr) == PLUS + && REG_P (XEXP (dest_addr, 0)) + && REGNO (XEXP (dest_addr, 0)) == REGNO (reg) + && CONST_INT_P (XEXP (dest_addr, 1)) + && INTVAL (XEXP (dest_addr, 1)) == INTVAL (inc)))) + return false; + + i = 2; + } + else + i = 1; + + if (!TARGET_V32) + { + regno_dir = -1; + regno = reg_count - 1; + } + + if (GET_CODE (elt) != SET + || !REG_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) != (unsigned int) regno + || !MEM_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode) + return false; + + if (REG_P (dest_addr)) + { + dest_base = dest_addr; + offset = 0; + } + else if (GET_CODE (dest_addr) == PLUS + && REG_P (XEXP (dest_addr, 0)) + && CONST_INT_P (XEXP (dest_addr, 1))) + { + dest_base = XEXP (dest_addr, 0); + offset = INTVAL (XEXP (dest_addr, 1)); + } + else + return false; + + for (setno = 1; i < XVECLEN (op, 0); setno++, i++) + { + rtx elt = XVECEXP (op, 0, i); + regno += regno_dir; + + if (GET_CODE (elt) != SET + || !REG_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) != (unsigned int) regno + || !MEM_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode + || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS + || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_base) + || !CONST_INT_P (XEXP (XEXP (SET_DEST (elt), 0), 1)) + || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != setno * 4 + offset) + return false; + } + + return true; +} + +/* The TARGET_CONDITIONAL_REGISTER_USAGE worker. */ + +static void +cris_conditional_register_usage (void) +{ + /* FIXME: This isn't nice. We should be able to use that register for + something else if the PIC table isn't needed. */ + if (flag_pic) + fixed_regs[PIC_OFFSET_TABLE_REGNUM] + = call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + + /* Allow use of ACR (PC in pre-V32) and tweak order. */ + if (TARGET_V32) + { + static const int reg_alloc_order_v32[] = REG_ALLOC_ORDER_V32; + unsigned int i; + + fixed_regs[CRIS_ACR_REGNUM] = 0; + + for (i = 0; + i < sizeof (reg_alloc_order_v32)/sizeof (reg_alloc_order_v32[0]); + i++) + reg_alloc_order[i] = reg_alloc_order_v32[i]; + } + + if (TARGET_HAS_MUL_INSNS) + fixed_regs[CRIS_MOF_REGNUM] = 0; + + /* On early versions, we must use the 16-bit condition-code register, + which has another name. */ + if (cris_cpu_version < 8) + reg_names[CRIS_CC0_REGNUM] = "ccr"; +} + +/* Return crtl->uses_pic_offset_table. For use in cris.md, + since some generated files do not include function.h. */ + +int +cris_cfun_uses_pic_table (void) +{ + return crtl->uses_pic_offset_table; +} + +/* Given an rtx, return the text string corresponding to the CODE of X. + Intended for use in the assembly language output section of a + define_insn. */ + +const char * +cris_op_str (rtx x) +{ + cris_output_insn_is_bound = 0; + switch (GET_CODE (x)) + { + case PLUS: + return "add"; + break; + + case MINUS: + return "sub"; + break; + + case MULT: + /* This function is for retrieving a part of an instruction name for + an operator, for immediate output. If that ever happens for + MULT, we need to apply TARGET_MUL_BUG in the caller. Make sure + we notice. */ + internal_error ("MULT case in cris_op_str"); + break; + + case DIV: + return "div"; + break; + + case AND: + return "and"; + break; + + case IOR: + return "or"; + break; + + case XOR: + return "xor"; + break; + + case NOT: + return "not"; + break; + + case ASHIFT: + return "lsl"; + break; + + case LSHIFTRT: + return "lsr"; + break; + + case ASHIFTRT: + return "asr"; + break; + + case UMIN: + /* Used to control the sign/zero-extend character for the 'E' modifier. + BOUND has none. */ + cris_output_insn_is_bound = 1; + return "bound"; + break; + + default: + return "Unknown operator"; + break; + } +} + +/* Emit an error message when we're in an asm, and a fatal error for + "normal" insns. Formatted output isn't easily implemented, since we + use output_operand_lossage to output the actual message and handle the + categorization of the error. */ + +static void +cris_operand_lossage (const char *msgid, rtx op) +{ + debug_rtx (op); + output_operand_lossage ("%s", msgid); +} + +/* Print an index part of an address to file. */ + +static void +cris_print_index (rtx index, FILE *file) +{ + /* Make the index "additive" unless we'll output a negative number, in + which case the sign character is free (as in free beer). */ + if (!CONST_INT_P (index) || INTVAL (index) >= 0) + putc ('+', file); + + if (REG_P (index)) + fprintf (file, "$%s.b", reg_names[REGNO (index)]); + else if (CONSTANT_P (index)) + cris_output_addr_const (file, index); + else if (GET_CODE (index) == MULT) + { + fprintf (file, "$%s.", + reg_names[REGNO (XEXP (index, 0))]); + + putc (INTVAL (XEXP (index, 1)) == 2 ? 'w' : 'd', file); + } + else if (GET_CODE (index) == SIGN_EXTEND && MEM_P (XEXP (index, 0))) + { + rtx inner = XEXP (index, 0); + rtx inner_inner = XEXP (inner, 0); + + if (GET_CODE (inner_inner) == POST_INC) + { + fprintf (file, "[$%s+].", + reg_names[REGNO (XEXP (inner_inner, 0))]); + putc (GET_MODE (inner) == HImode ? 'w' : 'b', file); + } + else + { + fprintf (file, "[$%s].", reg_names[REGNO (inner_inner)]); + + putc (GET_MODE (inner) == HImode ? 'w' : 'b', file); + } + } + else if (MEM_P (index)) + { + rtx inner = XEXP (index, 0); + if (GET_CODE (inner) == POST_INC) + fprintf (file, "[$%s+].d", reg_names[REGNO (XEXP (inner, 0))]); + else + fprintf (file, "[$%s].d", reg_names[REGNO (inner)]); + } + else + cris_operand_lossage ("unexpected index-type in cris_print_index", + index); +} + +/* Print a base rtx of an address to file. */ + +static void +cris_print_base (rtx base, FILE *file) +{ + if (REG_P (base)) + fprintf (file, "$%s", reg_names[REGNO (base)]); + else if (GET_CODE (base) == POST_INC) + { + gcc_assert (REGNO (XEXP (base, 0)) != CRIS_ACR_REGNUM); + fprintf (file, "$%s+", reg_names[REGNO (XEXP (base, 0))]); + } + else + cris_operand_lossage ("unexpected base-type in cris_print_base", + base); +} + +/* Usable as a guard in expressions. */ + +int +cris_fatal (char *arg) +{ + internal_error (arg); + + /* We'll never get here; this is just to appease compilers. */ + return 0; +} + +/* Return nonzero if REGNO is an ordinary register that *needs* to be + saved together with other registers, possibly by a MOVEM instruction, + or is saved for target-independent reasons. There may be + target-dependent reasons to save the register anyway; this is just a + wrapper for a complicated conditional. */ + +static int +cris_reg_saved_in_regsave_area (unsigned int regno, bool got_really_used) +{ + return + (((df_regs_ever_live_p (regno) + && !call_used_regs[regno]) + || (regno == PIC_OFFSET_TABLE_REGNUM + && (got_really_used + /* It is saved anyway, if there would be a gap. */ + || (flag_pic + && df_regs_ever_live_p (regno + 1) + && !call_used_regs[regno + 1])))) + && (regno != FRAME_POINTER_REGNUM || !frame_pointer_needed) + && regno != CRIS_SRP_REGNUM) + || (crtl->calls_eh_return + && (regno == EH_RETURN_DATA_REGNO (0) + || regno == EH_RETURN_DATA_REGNO (1) + || regno == EH_RETURN_DATA_REGNO (2) + || regno == EH_RETURN_DATA_REGNO (3))); +} + +/* The PRINT_OPERAND worker. */ + +static void +cris_print_operand (FILE *file, rtx x, int code) +{ + rtx operand = x; + + /* Size-strings corresponding to MULT expressions. */ + static const char *const mults[] = { "BAD:0", ".b", ".w", "BAD:3", ".d" }; + + /* New code entries should just be added to the switch below. If + handling is finished, just return. If handling was just a + modification of the operand, the modified operand should be put in + "operand", and then do a break to let default handling + (zero-modifier) output the operand. */ + + switch (code) + { + case 'b': + /* Print the unsigned supplied integer as if it were signed + and < 0, i.e print 255 or 65535 as -1, 254, 65534 as -2, etc. */ + if (!CONST_INT_P (x) + || !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (x), 'O')) + LOSE_AND_RETURN ("invalid operand for 'b' modifier", x); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + INTVAL (x)| (INTVAL (x) <= 255 ? ~255 : ~65535)); + return; + + case 'x': + /* Print assembler code for operator. */ + fprintf (file, "%s", cris_op_str (operand)); + return; + + case 'o': + { + /* A movem modifier working on a parallel; output the register + name. */ + int regno; + + if (GET_CODE (x) != PARALLEL) + LOSE_AND_RETURN ("invalid operand for 'o' modifier", x); + + /* The second item can be (set reg (plus reg const)) to denote a + postincrement. */ + regno + = (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS + ? XVECLEN (x, 0) - 2 + : XVECLEN (x, 0) - 1); + + fprintf (file, "$%s", reg_names [regno]); + } + return; + + case 'O': + { + /* A similar movem modifier; output the memory operand. */ + rtx addr; + + if (GET_CODE (x) != PARALLEL) + LOSE_AND_RETURN ("invalid operand for 'O' modifier", x); + + /* The lowest mem operand is in the first item, but perhaps it + needs to be output as postincremented. */ + addr = MEM_P (SET_SRC (XVECEXP (x, 0, 0))) + ? XEXP (SET_SRC (XVECEXP (x, 0, 0)), 0) + : XEXP (SET_DEST (XVECEXP (x, 0, 0)), 0); + + /* The second item can be a (set reg (plus reg const)) to denote + a modification. */ + if (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS) + { + /* It's a post-increment, if the address is a naked (reg). */ + if (REG_P (addr)) + addr = gen_rtx_POST_INC (SImode, addr); + else + { + /* Otherwise, it's a side-effect; RN=RN+M. */ + fprintf (file, "[$%s=$%s%s%d]", + reg_names [REGNO (SET_DEST (XVECEXP (x, 0, 1)))], + reg_names [REGNO (XEXP (addr, 0))], + INTVAL (XEXP (addr, 1)) < 0 ? "" : "+", + (int) INTVAL (XEXP (addr, 1))); + return; + } + } + output_address (addr); + } + return; + + case 'p': + /* Adjust a power of two to its log2. */ + if (!CONST_INT_P (x) || exact_log2 (INTVAL (x)) < 0 ) + LOSE_AND_RETURN ("invalid operand for 'p' modifier", x); + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + + case 's': + /* For an integer, print 'b' or 'w' if <= 255 or <= 65535 + respectively. This modifier also terminates the inhibiting + effects of the 'x' modifier. */ + cris_output_insn_is_bound = 0; + if (GET_MODE (x) == VOIDmode && CONST_INT_P (x)) + { + if (INTVAL (x) >= 0) + { + if (INTVAL (x) <= 255) + putc ('b', file); + else if (INTVAL (x) <= 65535) + putc ('w', file); + else + putc ('d', file); + } + else + putc ('d', file); + return; + } + + /* For a non-integer, print the size of the operand. */ + putc ((GET_MODE (x) == SImode || GET_MODE (x) == SFmode) + ? 'd' : GET_MODE (x) == HImode ? 'w' + : GET_MODE (x) == QImode ? 'b' + /* If none of the above, emit an erroneous size letter. */ + : 'X', + file); + return; + + case 'z': + /* Const_int: print b for -127 <= x <= 255, + w for -32768 <= x <= 65535, else die. */ + if (!CONST_INT_P (x) + || INTVAL (x) < -32768 || INTVAL (x) > 65535) + LOSE_AND_RETURN ("invalid operand for 'z' modifier", x); + putc (INTVAL (x) >= -128 && INTVAL (x) <= 255 ? 'b' : 'w', file); + return; + + case 'Z': + /* If this is a GOT-symbol, print the size-letter corresponding to + -fpic/-fPIC. For everything else, print "d". */ + putc ((flag_pic == 1 + && GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == UNSPEC + && XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREAD) + ? 'w' : 'd', file); + return; + + case '#': + /* Output a 'nop' if there's nothing for the delay slot. + This method stolen from the sparc files. */ + if (dbr_sequence_length () == 0) + fputs ("\n\tnop", file); + return; + + case '!': + /* Output directive for alignment padded with "nop" insns. + Optimizing for size, it's plain 4-byte alignment, otherwise we + align the section to a cache-line (32 bytes) and skip at max 2 + bytes, i.e. we skip if it's the last insn on a cache-line. The + latter is faster by a small amount (for two test-programs 99.6% + and 99.9%) and larger by a small amount (ditto 100.1% and + 100.2%). This is supposed to be the simplest yet performance- + wise least intrusive way to make sure the immediately following + (supposed) muls/mulu insn isn't located at the end of a + cache-line. */ + if (TARGET_MUL_BUG) + fputs (optimize_size + ? ".p2alignw 2,0x050f\n\t" + : ".p2alignw 5,0x050f,2\n\t", file); + return; + + case ':': + /* The PIC register. */ + if (! flag_pic) + internal_error ("invalid use of ':' modifier"); + fprintf (file, "$%s", reg_names [PIC_OFFSET_TABLE_REGNUM]); + return; + + case 'H': + /* Print high (most significant) part of something. */ + switch (GET_CODE (operand)) + { + case CONST_INT: + /* If we're having 64-bit HOST_WIDE_INTs, the whole (DImode) + value is kept here, and so may be other than 0 or -1. */ + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + INTVAL (operand_subword (operand, 1, 0, DImode))); + return; + + case CONST_DOUBLE: + /* High part of a long long constant. */ + if (GET_MODE (operand) == VOIDmode) + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_HIGH (x)); + return; + } + else + LOSE_AND_RETURN ("invalid operand for 'H' modifier", x); + + case REG: + /* Print reg + 1. Check that there's not an attempt to print + high-parts of registers like stack-pointer or higher, except + for SRP (where the "high part" is MOF). */ + if (REGNO (operand) > STACK_POINTER_REGNUM - 2 + && (REGNO (operand) != CRIS_SRP_REGNUM + || CRIS_SRP_REGNUM + 1 != CRIS_MOF_REGNUM + || fixed_regs[CRIS_MOF_REGNUM] != 0)) + LOSE_AND_RETURN ("bad register", operand); + fprintf (file, "$%s", reg_names[REGNO (operand) + 1]); + return; + + case MEM: + /* Adjust memory address to high part. */ + { + rtx adj_mem = operand; + int size + = GET_MODE_BITSIZE (GET_MODE (operand)) / BITS_PER_UNIT; + + /* Adjust so we can use two SImode in DImode. + Calling adj_offsettable_operand will make sure it is an + offsettable address. Don't do this for a postincrement + though; it should remain as it was. */ + if (GET_CODE (XEXP (adj_mem, 0)) != POST_INC) + adj_mem + = adjust_address (adj_mem, GET_MODE (adj_mem), size / 2); + + output_address (XEXP (adj_mem, 0)); + return; + } + + default: + LOSE_AND_RETURN ("invalid operand for 'H' modifier", x); + } + + case 'L': + /* Strip the MEM expression. */ + operand = XEXP (operand, 0); + break; + + case 'e': + /* Like 'E', but ignore state set by 'x'. FIXME: Use code + iterators and attributes in cris.md to avoid the need for %x + and %E (and %e) and state passed between those modifiers. */ + cris_output_insn_is_bound = 0; + /* FALL THROUGH. */ + case 'E': + /* Print 's' if operand is SIGN_EXTEND or 'u' if ZERO_EXTEND unless + cris_output_insn_is_bound is nonzero. */ + if (GET_CODE (operand) != SIGN_EXTEND + && GET_CODE (operand) != ZERO_EXTEND + && !CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'e' modifier", x); + + if (cris_output_insn_is_bound) + { + cris_output_insn_is_bound = 0; + return; + } + + putc (GET_CODE (operand) == SIGN_EXTEND + || (CONST_INT_P (operand) && INTVAL (operand) < 0) + ? 's' : 'u', file); + return; + + case 'm': + /* Print the size letter of the inner element. We can do it by + calling ourselves with the 's' modifier. */ + if (GET_CODE (operand) != SIGN_EXTEND && GET_CODE (operand) != ZERO_EXTEND) + LOSE_AND_RETURN ("invalid operand for 'm' modifier", x); + cris_print_operand (file, XEXP (operand, 0), 's'); + return; + + case 'M': + /* Print the least significant part of operand. */ + if (GET_CODE (operand) == CONST_DOUBLE) + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x)); + return; + } + else if (HOST_BITS_PER_WIDE_INT > 32 && CONST_INT_P (operand)) + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, + INTVAL (x) & ((unsigned int) 0x7fffffff * 2 + 1)); + return; + } + /* Otherwise the least significant part equals the normal part, + so handle it normally. */ + break; + + case 'A': + /* When emitting an add for the high part of a DImode constant, we + want to use addq for 0 and adds.w for -1. */ + if (!CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'A' modifier", x); + fprintf (file, INTVAL (operand) < 0 ? "adds.w" : "addq"); + return; + + case 'd': + /* If this is a GOT symbol, force it to be emitted as :GOT and + :GOTPLT regardless of -fpic (i.e. not as :GOT16, :GOTPLT16). + Avoid making this too much of a special case. */ + if (flag_pic == 1 && CONSTANT_P (operand)) + { + int flag_pic_save = flag_pic; + + flag_pic = 2; + cris_output_addr_const (file, operand); + flag_pic = flag_pic_save; + return; + } + break; + + case 'D': + /* When emitting an sub for the high part of a DImode constant, we + want to use subq for 0 and subs.w for -1. */ + if (!CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'D' modifier", x); + fprintf (file, INTVAL (operand) < 0 ? "subs.w" : "subq"); + return; + + case 'S': + /* Print the operand as the index-part of an address. + Easiest way out is to use cris_print_index. */ + cris_print_index (operand, file); + return; + + case 'T': + /* Print the size letter for an operand to a MULT, which must be a + const_int with a suitable value. */ + if (!CONST_INT_P (operand) || INTVAL (operand) > 4) + LOSE_AND_RETURN ("invalid operand for 'T' modifier", x); + fprintf (file, "%s", mults[INTVAL (operand)]); + return; + + case 'u': + /* Print "u.w" if a GOT symbol and flag_pic == 1, else ".d". */ + if (flag_pic == 1 + && GET_CODE (operand) == CONST + && GET_CODE (XEXP (operand, 0)) == UNSPEC + && XINT (XEXP (operand, 0), 1) == CRIS_UNSPEC_GOTREAD) + fprintf (file, "u.w"); + else + fprintf (file, ".d"); + return; + + case 0: + /* No code, print as usual. */ + break; + + default: + LOSE_AND_RETURN ("invalid operand modifier letter", x); + } + + /* Print an operand as without a modifier letter. */ + switch (GET_CODE (operand)) + { + case REG: + if (REGNO (operand) > 15 + && REGNO (operand) != CRIS_MOF_REGNUM + && REGNO (operand) != CRIS_SRP_REGNUM + && REGNO (operand) != CRIS_CC0_REGNUM) + internal_error ("internal error: bad register: %d", REGNO (operand)); + fprintf (file, "$%s", reg_names[REGNO (operand)]); + return; + + case MEM: + output_address (XEXP (operand, 0)); + return; + + case CONST_DOUBLE: + if (GET_MODE (operand) == VOIDmode) + /* A long long constant. */ + output_addr_const (file, operand); + else + { + /* Only single precision is allowed as plain operands the + moment. FIXME: REAL_VALUE_FROM_CONST_DOUBLE isn't + documented. */ + REAL_VALUE_TYPE r; + long l; + + /* FIXME: Perhaps check overflow of the "single". */ + REAL_VALUE_FROM_CONST_DOUBLE (r, operand); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + + fprintf (file, "0x%lx", l); + } + return; + + case UNSPEC: + /* Fall through. */ + case CONST: + cris_output_addr_const (file, operand); + return; + + case MULT: + case ASHIFT: + { + /* For a (MULT (reg X) const_int) we output "rX.S". */ + int i = CONST_INT_P (XEXP (operand, 1)) + ? INTVAL (XEXP (operand, 1)) : INTVAL (XEXP (operand, 0)); + rtx reg = CONST_INT_P (XEXP (operand, 1)) + ? XEXP (operand, 0) : XEXP (operand, 1); + + if (!REG_P (reg) + || (!CONST_INT_P (XEXP (operand, 0)) + && !CONST_INT_P (XEXP (operand, 1)))) + LOSE_AND_RETURN ("unexpected multiplicative operand", x); + + cris_print_base (reg, file); + fprintf (file, ".%c", + i == 0 || (i == 1 && GET_CODE (operand) == MULT) ? 'b' + : i == 4 ? 'd' + : (i == 2 && GET_CODE (operand) == MULT) || i == 1 ? 'w' + : 'd'); + return; + } + + default: + /* No need to handle all strange variants, let output_addr_const + do it for us. */ + if (CONSTANT_P (operand)) + { + cris_output_addr_const (file, operand); + return; + } + + LOSE_AND_RETURN ("unexpected operand", x); + } +} + +static bool +cris_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '#' || code == '!' || code == ':'); +} + +/* The PRINT_OPERAND_ADDRESS worker. */ + +static void +cris_print_operand_address (FILE *file, rtx x) +{ + /* All these were inside MEM:s so output indirection characters. */ + putc ('[', file); + + if (CONSTANT_ADDRESS_P (x)) + cris_output_addr_const (file, x); + else if (BASE_OR_AUTOINCR_P (x)) + cris_print_base (x, file); + else if (GET_CODE (x) == PLUS) + { + rtx x1, x2; + + x1 = XEXP (x, 0); + x2 = XEXP (x, 1); + if (BASE_P (x1)) + { + cris_print_base (x1, file); + cris_print_index (x2, file); + } + else if (BASE_P (x2)) + { + cris_print_base (x2, file); + cris_print_index (x1, file); + } + else + LOSE_AND_RETURN ("unrecognized address", x); + } + else if (MEM_P (x)) + { + /* A DIP. Output more indirection characters. */ + putc ('[', file); + cris_print_base (XEXP (x, 0), file); + putc (']', file); + } + else + LOSE_AND_RETURN ("unrecognized address", x); + + putc (']', file); +} + +/* The RETURN_ADDR_RTX worker. + We mark that the return address is used, either by EH or + __builtin_return_address, for use by the function prologue and + epilogue. FIXME: This isn't optimal; we just use the mark in the + prologue and epilogue to say that the return address is to be stored + in the stack frame. We could return SRP for leaf-functions and use the + initial-value machinery. */ + +rtx +cris_return_addr_rtx (int count, rtx frameaddr ATTRIBUTE_UNUSED) +{ + cfun->machine->needs_return_address_on_stack = 1; + + /* The return-address is stored just above the saved frame-pointer (if + present). Apparently we can't eliminate from the frame-pointer in + that direction, so use the incoming args (maybe pretended) pointer. */ + return count == 0 + ? gen_rtx_MEM (Pmode, plus_constant (virtual_incoming_args_rtx, -4)) + : NULL_RTX; +} + +/* Accessor used in cris.md:return because cfun->machine isn't available + there. */ + +bool +cris_return_address_on_stack (void) +{ + return df_regs_ever_live_p (CRIS_SRP_REGNUM) + || cfun->machine->needs_return_address_on_stack; +} + +/* Accessor used in cris.md:return because cfun->machine isn't available + there. */ + +bool +cris_return_address_on_stack_for_return (void) +{ + return cfun->machine->return_type == CRIS_RETINSN_RET ? false + : cris_return_address_on_stack (); +} + +/* This used to be the INITIAL_FRAME_POINTER_OFFSET worker; now only + handles FP -> SP elimination offset. */ + +static int +cris_initial_frame_pointer_offset (void) +{ + int regno; + + /* Initial offset is 0 if we don't have a frame pointer. */ + int offs = 0; + bool got_really_used = false; + + if (crtl->uses_pic_offset_table) + { + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), + NULL_RTX); + pop_topmost_sequence (); + } + + /* And 4 for each register pushed. */ + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + offs += 4; + + /* And then, last, we add the locals allocated. */ + offs += get_frame_size (); + + /* And more; the accumulated args size. */ + offs += crtl->outgoing_args_size; + + /* Then round it off, in case we use aligned stack. */ + if (TARGET_STACK_ALIGN) + offs = TARGET_ALIGN_BY_32 ? (offs + 3) & ~3 : (offs + 1) & ~1; + + return offs; +} + +/* The INITIAL_ELIMINATION_OFFSET worker. + Calculate the difference between imaginary registers such as frame + pointer and the stack pointer. Used to eliminate the frame pointer + and imaginary arg pointer. */ + +int +cris_initial_elimination_offset (int fromreg, int toreg) +{ + int fp_sp_offset + = cris_initial_frame_pointer_offset (); + + /* We should be able to use regs_ever_live and related prologue + information here, or alpha should not as well. */ + bool return_address_on_stack = cris_return_address_on_stack (); + + /* Here we act as if the frame-pointer were needed. */ + int ap_fp_offset = 4 + (return_address_on_stack ? 4 : 0); + + if (fromreg == ARG_POINTER_REGNUM + && toreg == FRAME_POINTER_REGNUM) + return ap_fp_offset; + + /* Between the frame pointer and the stack are only "normal" stack + variables and saved registers. */ + if (fromreg == FRAME_POINTER_REGNUM + && toreg == STACK_POINTER_REGNUM) + return fp_sp_offset; + + /* We need to balance out the frame pointer here. */ + if (fromreg == ARG_POINTER_REGNUM + && toreg == STACK_POINTER_REGNUM) + return ap_fp_offset + fp_sp_offset - 4; + + gcc_unreachable (); +} + +/* Worker function for LEGITIMIZE_RELOAD_ADDRESS. */ + +bool +cris_reload_address_legitimized (rtx x, + enum machine_mode mode ATTRIBUTE_UNUSED, + int opnum ATTRIBUTE_UNUSED, + int itype, + int ind_levels ATTRIBUTE_UNUSED) +{ + enum reload_type type = (enum reload_type) itype; + rtx op0, op1; + rtx *op1p; + + if (GET_CODE (x) != PLUS) + return false; + + if (TARGET_V32) + return false; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + op1p = &XEXP (x, 1); + + if (!REG_P (op1)) + return false; + + if (GET_CODE (op0) == SIGN_EXTEND && MEM_P (XEXP (op0, 0))) + { + rtx op00 = XEXP (op0, 0); + rtx op000 = XEXP (op00, 0); + rtx *op000p = &XEXP (op00, 0); + + if ((GET_MODE (op00) == HImode || GET_MODE (op00) == QImode) + && (REG_P (op000) + || (GET_CODE (op000) == POST_INC && REG_P (XEXP (op000, 0))))) + { + bool something_reloaded = false; + + if (GET_CODE (op000) == POST_INC + && REG_P (XEXP (op000, 0)) + && REGNO (XEXP (op000, 0)) > CRIS_LAST_GENERAL_REGISTER) + /* No, this gets too complicated and is too rare to care + about trying to improve on the general code Here. + As the return-value is an all-or-nothing indicator, we + punt on the other register too. */ + return false; + + if ((REG_P (op000) + && REGNO (op000) > CRIS_LAST_GENERAL_REGISTER)) + { + /* The address of the inner mem is a pseudo or wrong + reg: reload that. */ + push_reload (op000, NULL_RTX, op000p, NULL, GENERAL_REGS, + GET_MODE (x), VOIDmode, 0, 0, opnum, type); + something_reloaded = true; + } + + if (REGNO (op1) > CRIS_LAST_GENERAL_REGISTER) + { + /* Base register is a pseudo or wrong reg: reload it. */ + push_reload (op1, NULL_RTX, op1p, NULL, GENERAL_REGS, + GET_MODE (x), VOIDmode, 0, 0, + opnum, type); + something_reloaded = true; + } + + gcc_assert (something_reloaded); + + return true; + } + } + + return false; +} + +/* Worker function for TARGET_REGISTER_MOVE_COST. */ + +static int +cris_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + if (!TARGET_V32) + { + /* Pretend that classes that we don't support are ALL_REGS, so + we give them the highest cost. */ + if (from != SPECIAL_REGS && from != MOF_REGS + && from != GENERAL_REGS && from != GENNONACR_REGS) + from = ALL_REGS; + + if (to != SPECIAL_REGS && to != MOF_REGS + && to != GENERAL_REGS && to != GENNONACR_REGS) + to = ALL_REGS; + } + + /* Can't move to and from a SPECIAL_REGS register, so we have to say + their move cost within that class is higher. How about 7? That's 3 + for a move to a GENERAL_REGS register, 3 for the move from the + GENERAL_REGS register, and 1 for the increased register pressure. + Also, it's higher than the memory move cost, which is in order. + We also do this for ALL_REGS, since we don't want that class to be + preferred (even to memory) at all where GENERAL_REGS doesn't fit. + Whenever it's about to be used, it's for SPECIAL_REGS. If we don't + present a higher cost for ALL_REGS than memory, a SPECIAL_REGS may be + used when a GENERAL_REGS should be used, even if there are call-saved + GENERAL_REGS left to allocate. This is because the fall-back when + the most preferred register class isn't available, isn't the next + (or next good) wider register class, but the *most widest* register + class. */ + + if ((reg_classes_intersect_p (from, SPECIAL_REGS) + && reg_classes_intersect_p (to, SPECIAL_REGS)) + || from == ALL_REGS || to == ALL_REGS) + return 7; + + if (reg_classes_intersect_p (from, SPECIAL_REGS) + || reg_classes_intersect_p (to, SPECIAL_REGS)) + return 3; + + return 2; +} + +/* Worker function for TARGET_MEMORY_MOVE_COST. + + This isn't strictly correct for v0..3 in buswidth-8bit mode, but should + suffice. */ + +static int +cris_memory_move_cost (enum machine_mode mode, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + if (mode == QImode + || mode == HImode) + return 4; + else + return 6; +} + +/* Worker for cris_notice_update_cc; handles the "normal" cases. + FIXME: this code is historical; its functionality should be + refactored to look at insn attributes and moved to + cris_notice_update_cc. Except, we better lose cc0 entirely. */ + +static void +cris_normal_notice_update_cc (rtx exp, rtx insn) +{ + /* "Normal" means, for: + (set (cc0) (...)): + CC is (...). + + (set (reg) (...)): + CC is (reg) and (...) - unless (...) is 0 or reg is a special + register or (v32 and (...) is -32..-1), then CC does not change. + CC_NO_OVERFLOW unless (...) is reg or mem. + + (set (mem) (...)): + CC does not change. + + (set (pc) (...)): + CC does not change. + + (parallel + (set (reg1) (mem (bdap/biap))) + (set (reg2) (bdap/biap))): + CC is (reg1) and (mem (reg2)) + + (parallel + (set (mem (bdap/biap)) (reg1)) [or 0] + (set (reg2) (bdap/biap))): + CC does not change. + + (where reg and mem includes strict_low_parts variants thereof) + + For all others, assume CC is clobbered. + Note that we do not have to care about setting CC_NO_OVERFLOW, + since the overflow flag is set to 0 (i.e. right) for + instructions where it does not have any sane sense, but where + other flags have meanings. (This includes shifts; the carry is + not set by them). + + Note that there are other parallel constructs we could match, + but we don't do that yet. */ + + if (GET_CODE (exp) == SET) + { + /* FIXME: Check when this happens. It looks like we should + actually do a CC_STATUS_INIT here to be safe. */ + if (SET_DEST (exp) == pc_rtx) + return; + + /* Record CC0 changes, so we do not have to output multiple + test insns. */ + if (SET_DEST (exp) == cc0_rtx) + { + CC_STATUS_INIT; + + if (GET_CODE (SET_SRC (exp)) == COMPARE + && XEXP (SET_SRC (exp), 1) == const0_rtx) + cc_status.value1 = XEXP (SET_SRC (exp), 0); + else + cc_status.value1 = SET_SRC (exp); + + /* Handle flags for the special btstq on one bit. */ + if (GET_CODE (cc_status.value1) == ZERO_EXTRACT + && XEXP (cc_status.value1, 1) == const1_rtx) + { + if (CONST_INT_P (XEXP (cc_status.value1, 0))) + /* Using cmpq. */ + cc_status.flags = CC_INVERTED; + else + /* A one-bit btstq. */ + cc_status.flags = CC_Z_IN_NOT_N; + } + + else if (GET_CODE (SET_SRC (exp)) == COMPARE) + { + if (!REG_P (XEXP (SET_SRC (exp), 0)) + && XEXP (SET_SRC (exp), 1) != const0_rtx) + /* For some reason gcc will not canonicalize compare + operations, reversing the sign by itself if + operands are in wrong order. */ + /* (But NOT inverted; eq is still eq.) */ + cc_status.flags = CC_REVERSED; + + /* This seems to be overlooked by gcc. FIXME: Check again. + FIXME: Is it really safe? */ + cc_status.value2 + = gen_rtx_MINUS (GET_MODE (SET_SRC (exp)), + XEXP (SET_SRC (exp), 0), + XEXP (SET_SRC (exp), 1)); + } + return; + } + else if (REG_P (SET_DEST (exp)) + || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART + && REG_P (XEXP (SET_DEST (exp), 0)))) + { + /* A register is set; normally CC is set to show that no + test insn is needed. Catch the exceptions. */ + + /* If not to cc0, then no "set"s in non-natural mode give + ok cc0... */ + if (GET_MODE_SIZE (GET_MODE (SET_DEST (exp))) > UNITS_PER_WORD + || GET_MODE_CLASS (GET_MODE (SET_DEST (exp))) == MODE_FLOAT) + { + /* ... except add:s and sub:s in DImode. */ + if (GET_MODE (SET_DEST (exp)) == DImode + && (GET_CODE (SET_SRC (exp)) == PLUS + || GET_CODE (SET_SRC (exp)) == MINUS)) + { + CC_STATUS_INIT; + cc_status.value1 = SET_DEST (exp); + cc_status.value2 = SET_SRC (exp); + + if (cris_reg_overlap_mentioned_p (cc_status.value1, + cc_status.value2)) + cc_status.value2 = 0; + + /* Add and sub may set V, which gets us + unoptimizable results in "gt" and "le" condition + codes. */ + cc_status.flags |= CC_NO_OVERFLOW; + + return; + } + } + else if (SET_SRC (exp) == const0_rtx + || (REG_P (SET_SRC (exp)) + && (REGNO (SET_SRC (exp)) + > CRIS_LAST_GENERAL_REGISTER)) + || (TARGET_V32 + && GET_CODE (SET_SRC (exp)) == CONST_INT + && CRIS_CONST_OK_FOR_LETTER_P (INTVAL (SET_SRC (exp)), + 'I'))) + { + /* There's no CC0 change for this case. Just check + for overlap. */ + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + + return; + } + else + { + CC_STATUS_INIT; + cc_status.value1 = SET_DEST (exp); + cc_status.value2 = SET_SRC (exp); + + if (cris_reg_overlap_mentioned_p (cc_status.value1, + cc_status.value2)) + cc_status.value2 = 0; + + /* Some operations may set V, which gets us + unoptimizable results in "gt" and "le" condition + codes. */ + if (GET_CODE (SET_SRC (exp)) == PLUS + || GET_CODE (SET_SRC (exp)) == MINUS + || GET_CODE (SET_SRC (exp)) == NEG) + cc_status.flags |= CC_NO_OVERFLOW; + + /* For V32, nothing with a register destination sets + C and V usefully. */ + if (TARGET_V32) + cc_status.flags |= CC_NO_OVERFLOW; + + return; + } + } + else if (MEM_P (SET_DEST (exp)) + || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART + && MEM_P (XEXP (SET_DEST (exp), 0)))) + { + /* When SET to MEM, then CC is not changed (except for + overlap). */ + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + + return; + } + } + else if (GET_CODE (exp) == PARALLEL) + { + if (GET_CODE (XVECEXP (exp, 0, 0)) == SET + && GET_CODE (XVECEXP (exp, 0, 1)) == SET + && REG_P (XEXP (XVECEXP (exp, 0, 1), 0))) + { + if (REG_P (XEXP (XVECEXP (exp, 0, 0), 0)) + && MEM_P (XEXP (XVECEXP (exp, 0, 0), 1))) + { + CC_STATUS_INIT; + + /* For "move.S [rx=ry+o],rz", say CC reflects + value1=rz and value2=[rx] */ + cc_status.value1 = XEXP (XVECEXP (exp, 0, 0), 0); + cc_status.value2 + = replace_equiv_address (XEXP (XVECEXP (exp, 0, 0), 1), + XEXP (XVECEXP (exp, 0, 1), 0)); + + /* Huh? A side-effect cannot change the destination + register. */ + if (cris_reg_overlap_mentioned_p (cc_status.value1, + cc_status.value2)) + internal_error ("internal error: sideeffect-insn affecting main effect"); + + /* For V32, moves to registers don't set C and V. */ + if (TARGET_V32) + cc_status.flags |= CC_NO_OVERFLOW; + return; + } + else if ((REG_P (XEXP (XVECEXP (exp, 0, 0), 1)) + || XEXP (XVECEXP (exp, 0, 0), 1) == const0_rtx) + && MEM_P (XEXP (XVECEXP (exp, 0, 0), 0))) + { + /* For "move.S rz,[rx=ry+o]" and "clear.S [rx=ry+o]", + say flags are not changed, except for overlap. */ + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + + return; + } + } + } + + /* If we got here, the case wasn't covered by the code above. */ + CC_STATUS_INIT; +} + +/* This function looks into the pattern to see how this insn affects + condition codes. + + Used when to eliminate test insns before a condition-code user, + such as a "scc" insn or a conditional branch. This includes + checking if the entities that cc was updated by, are changed by the + operation. + + Currently a jumble of the old peek-inside-the-insn and the newer + check-cc-attribute methods. */ + +void +cris_notice_update_cc (rtx exp, rtx insn) +{ + enum attr_cc attrval = get_attr_cc (insn); + + /* Check if user specified "-mcc-init" as a bug-workaround. Remember + to still set CC_REVERSED as below, since that's required by some + compare insn alternatives. (FIXME: GCC should do this virtual + operand swap by itself.) A test-case that may otherwise fail is + gcc.c-torture/execute/20000217-1.c -O0 and -O1. */ + if (TARGET_CCINIT) + { + CC_STATUS_INIT; + + if (attrval == CC_REV) + cc_status.flags = CC_REVERSED; + return; + } + + /* Slowly, we're converting to using attributes to control the setting + of condition-code status. */ + switch (attrval) + { + case CC_NONE: + /* Even if it is "none", a setting may clobber a previous + cc-value, so check. */ + if (GET_CODE (exp) == SET) + { + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + } + return; + + case CC_CLOBBER: + CC_STATUS_INIT; + return; + + case CC_REV: + case CC_NOOV32: + case CC_NORMAL: + cris_normal_notice_update_cc (exp, insn); + + /* The "test" insn doesn't clear (carry and) overflow on V32. We + can change bge => bpl and blt => bmi by passing on to the cc0 + user that V should not be considered; bgt and ble are taken + care of by other methods (see {tst,cmp}{si,hi,qi}). */ + if (attrval == CC_NOOV32 && TARGET_V32) + cc_status.flags |= CC_NO_OVERFLOW; + return; + + default: + internal_error ("unknown cc_attr value"); + } + + CC_STATUS_INIT; +} + +/* Return != 0 if the return sequence for the current function is short, + like "ret" or "jump [sp+]". Prior to reloading, we can't tell if + registers must be saved, so return 0 then. */ + +bool +cris_simple_epilogue (void) +{ + unsigned int regno; + unsigned int reglimit = STACK_POINTER_REGNUM; + bool got_really_used = false; + + if (! reload_completed + || frame_pointer_needed + || get_frame_size () != 0 + || crtl->args.pretend_args_size + || crtl->args.size + || crtl->outgoing_args_size + || crtl->calls_eh_return + + /* If we're not supposed to emit prologue and epilogue, we must + not emit return-type instructions. */ + || !TARGET_PROLOGUE_EPILOGUE) + return false; + + /* Can't return from stacked return address with v32. */ + if (TARGET_V32 && cris_return_address_on_stack ()) + return false; + + if (crtl->uses_pic_offset_table) + { + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX); + pop_topmost_sequence (); + } + + /* No simple epilogue if there are saved registers. */ + for (regno = 0; regno < reglimit; regno++) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + return false; + + return true; +} + +/* Expand a return insn (just one insn) marked as using SRP or stack + slot depending on parameter ON_STACK. */ + +void +cris_expand_return (bool on_stack) +{ + /* FIXME: emit a parallel with a USE for SRP or the stack-slot, to + tell "ret" from "jump [sp+]". Some, but not all, other parts of + GCC expect just (return) to do the right thing when optimizing, so + we do that until they're fixed. Currently, all return insns in a + function must be the same (not really a limiting factor) so we need + to check that it doesn't change half-way through. */ + emit_jump_insn (gen_rtx_RETURN (VOIDmode)); + + CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_RET || !on_stack); + CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_JUMP || on_stack); + + cfun->machine->return_type + = on_stack ? CRIS_RETINSN_JUMP : CRIS_RETINSN_RET; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +cris_rtx_costs (rtx x, int code, int outer_code, int *total, + bool speed) +{ + switch (code) + { + case CONST_INT: + { + HOST_WIDE_INT val = INTVAL (x); + if (val == 0) + *total = 0; + else if (val < 32 && val >= -32) + *total = 1; + /* Eight or 16 bits are a word and cycle more expensive. */ + else if (val <= 32767 && val >= -32768) + *total = 2; + /* A 32-bit constant (or very seldom, unsigned 16 bits) costs + another word. FIXME: This isn't linear to 16 bits. */ + else + *total = 4; + return true; + } + + case LABEL_REF: + *total = 6; + return true; + + case CONST: + case SYMBOL_REF: + *total = 6; + return true; + + case CONST_DOUBLE: + if (x != CONST0_RTX (GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x))) + *total = 12; + else + /* Make 0.0 cheap, else test-insns will not be used. */ + *total = 0; + return true; + + case MULT: + /* If we have one arm of an ADDI, make sure it gets the cost of + one insn, i.e. zero cost for this operand, and just the cost + of the PLUS, as the insn is created by combine from a PLUS + and an ASHIFT, and the MULT cost below would make the + combined value be larger than the separate insns. The insn + validity is checked elsewhere by combine. + + FIXME: this case is a stop-gap for 4.3 and 4.4, this whole + function should be rewritten. */ + if (outer_code == PLUS && BIAP_INDEX_P (x)) + { + *total = 0; + return true; + } + + /* Identify values that are no powers of two. Powers of 2 are + taken care of already and those values should not be changed. */ + if (!CONST_INT_P (XEXP (x, 1)) + || exact_log2 (INTVAL (XEXP (x, 1)) < 0)) + { + /* If we have a multiply insn, then the cost is between + 1 and 2 "fast" instructions. */ + if (TARGET_HAS_MUL_INSNS) + { + *total = COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2; + return true; + } + + /* Estimate as 4 + 4 * #ofbits. */ + *total = COSTS_N_INSNS (132); + return true; + } + return false; + + case UDIV: + case MOD: + case UMOD: + case DIV: + if (!CONST_INT_P (XEXP (x, 1)) + || exact_log2 (INTVAL (XEXP (x, 1)) < 0)) + { + /* Estimate this as 4 + 8 * #of bits. */ + *total = COSTS_N_INSNS (260); + return true; + } + return false; + + case AND: + if (CONST_INT_P (XEXP (x, 1)) + /* Two constants may actually happen before optimization. */ + && !CONST_INT_P (XEXP (x, 0)) + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (XEXP (x, 1)), 'I')) + { + *total + = (rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, speed) + 2 + + 2 * GET_MODE_NUNITS (GET_MODE (XEXP (x, 0)))); + return true; + } + return false; + + case ZERO_EXTRACT: + if (outer_code != COMPARE) + return false; + /* fall through */ + + case ZERO_EXTEND: case SIGN_EXTEND: + *total = rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, speed); + return true; + + default: + return false; + } +} + +/* The ADDRESS_COST worker. */ + +static int +cris_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) +{ + /* The metric to use for the cost-macros is unclear. + The metric used here is (the number of cycles needed) / 2, + where we consider equal a cycle for a word of code and a cycle to + read memory. FIXME: Adding "+ 1" to all values would avoid + returning 0, as tree-ssa-loop-ivopts.c as of r128272 "normalizes" + 0 to 1, thereby giving equal costs to [rN + rM] and [rN]. + Unfortunately(?) such a hack would expose other pessimizations, + at least with g++.dg/tree-ssa/ivopts-1.C, adding insns to the + loop there, without apparent reason. */ + + /* The cheapest addressing modes get 0, since nothing extra is needed. */ + if (BASE_OR_AUTOINCR_P (x)) + return 0; + + /* An indirect mem must be a DIP. This means two bytes extra for code, + and 4 bytes extra for memory read, i.e. (2 + 4) / 2. */ + if (MEM_P (x)) + return (2 + 4) / 2; + + /* Assume (2 + 4) / 2 for a single constant; a dword, since it needs + an extra DIP prefix and 4 bytes of constant in most cases. */ + if (CONSTANT_P (x)) + return (2 + 4) / 2; + + /* Handle BIAP and BDAP prefixes. */ + if (GET_CODE (x) == PLUS) + { + rtx tem1 = XEXP (x, 0); + rtx tem2 = XEXP (x, 1); + + /* Local extended canonicalization rule: the first operand must + be REG, unless it's an operation (MULT). */ + if (!REG_P (tem1) && GET_CODE (tem1) != MULT) + tem1 = tem2, tem2 = XEXP (x, 0); + + /* We'll "assume" we have canonical RTX now. */ + gcc_assert (REG_P (tem1) || GET_CODE (tem1) == MULT); + + /* A BIAP is 2 extra bytes for the prefix insn, nothing more. We + recognize the typical MULT which is always in tem1 because of + insn canonicalization. */ + if ((GET_CODE (tem1) == MULT && BIAP_INDEX_P (tem1)) + || REG_P (tem2)) + return 2 / 2; + + /* A BDAP (quick) is 2 extra bytes. Any constant operand to the + PLUS is always found in tem2. */ + if (CONST_INT_P (tem2) && INTVAL (tem2) < 128 && INTVAL (tem2) >= -128) + return 2 / 2; + + /* A BDAP -32768 .. 32767 is like BDAP quick, but with 2 extra + bytes. */ + if (CONST_INT_P (tem2) + && CRIS_CONST_OK_FOR_LETTER_P (INTVAL (tem2), 'L')) + return (2 + 2) / 2; + + /* A BDAP with some other constant is 2 bytes extra. */ + if (CONSTANT_P (tem2)) + return (2 + 2 + 2) / 2; + + /* BDAP with something indirect should have a higher cost than + BIAP with register. FIXME: Should it cost like a MEM or more? */ + return (2 + 2 + 2) / 2; + } + + /* What else? Return a high cost. It matters only for valid + addressing modes. */ + return 10; +} + +/* Check various objections to the side-effect. Used in the test-part + of an anonymous insn describing an insn with a possible side-effect. + Returns nonzero if the implied side-effect is ok. + + code : PLUS or MULT + ops : An array of rtx:es. lreg, rreg, rval, + The variables multop and other_op are indexes into this, + or -1 if they are not applicable. + lreg : The register that gets assigned in the side-effect. + rreg : One register in the side-effect expression + rval : The other register, or an int. + multop : An integer to multiply rval with. + other_op : One of the entities of the main effect, + whose mode we must consider. */ + +int +cris_side_effect_mode_ok (enum rtx_code code, rtx *ops, + int lreg, int rreg, int rval, + int multop, int other_op) +{ + /* Find what value to multiply with, for rx =ry + rz * n. */ + int mult = multop < 0 ? 1 : INTVAL (ops[multop]); + + rtx reg_rtx = ops[rreg]; + rtx val_rtx = ops[rval]; + + /* The operands may be swapped. Canonicalize them in reg_rtx and + val_rtx, where reg_rtx always is a reg (for this constraint to + match). */ + if (! BASE_P (reg_rtx)) + reg_rtx = val_rtx, val_rtx = ops[rreg]; + + /* Don't forget to check that reg_rtx really is a reg. If it isn't, + we have no business. */ + if (! BASE_P (reg_rtx)) + return 0; + + /* Don't do this when -mno-split. */ + if (!TARGET_SIDE_EFFECT_PREFIXES) + return 0; + + /* The mult expression may be hidden in lreg. FIXME: Add more + commentary about that. */ + if (GET_CODE (val_rtx) == MULT) + { + mult = INTVAL (XEXP (val_rtx, 1)); + val_rtx = XEXP (val_rtx, 0); + code = MULT; + } + + /* First check the "other operand". */ + if (other_op >= 0) + { + if (GET_MODE_SIZE (GET_MODE (ops[other_op])) > UNITS_PER_WORD) + return 0; + + /* Check if the lvalue register is the same as the "other + operand". If so, the result is undefined and we shouldn't do + this. FIXME: Check again. */ + if ((BASE_P (ops[lreg]) + && BASE_P (ops[other_op]) + && REGNO (ops[lreg]) == REGNO (ops[other_op])) + || rtx_equal_p (ops[other_op], ops[lreg])) + return 0; + } + + /* Do not accept frame_pointer_rtx as any operand. */ + if (ops[lreg] == frame_pointer_rtx || ops[rreg] == frame_pointer_rtx + || ops[rval] == frame_pointer_rtx + || (other_op >= 0 && ops[other_op] == frame_pointer_rtx)) + return 0; + + if (code == PLUS + && ! BASE_P (val_rtx)) + { + + /* Do not allow rx = rx + n if a normal add or sub with same size + would do. */ + if (rtx_equal_p (ops[lreg], reg_rtx) + && CONST_INT_P (val_rtx) + && (INTVAL (val_rtx) <= 63 && INTVAL (val_rtx) >= -63)) + return 0; + + /* Check allowed cases, like [r(+)?].[bwd] and const. */ + if (CONSTANT_P (val_rtx)) + return 1; + + if (MEM_P (val_rtx) && BASE_OR_AUTOINCR_P (XEXP (val_rtx, 0))) + return 1; + + if (GET_CODE (val_rtx) == SIGN_EXTEND + && MEM_P (XEXP (val_rtx, 0)) + && BASE_OR_AUTOINCR_P (XEXP (XEXP (val_rtx, 0), 0))) + return 1; + + /* If we got here, it's not a valid addressing mode. */ + return 0; + } + else if (code == MULT + || (code == PLUS && BASE_P (val_rtx))) + { + /* Do not allow rx = rx + ry.S, since it doesn't give better code. */ + if (rtx_equal_p (ops[lreg], reg_rtx) + || (mult == 1 && rtx_equal_p (ops[lreg], val_rtx))) + return 0; + + /* Do not allow bad multiply-values. */ + if (mult != 1 && mult != 2 && mult != 4) + return 0; + + /* Only allow r + ... */ + if (! BASE_P (reg_rtx)) + return 0; + + /* If we got here, all seems ok. + (All checks need to be done above). */ + return 1; + } + + /* If we get here, the caller got its initial tests wrong. */ + internal_error ("internal error: cris_side_effect_mode_ok with bad operands"); +} + +/* Whether next_cc0_user of insn is LE or GT or requires a real compare + insn for other reasons. */ + +bool +cris_cc0_user_requires_cmp (rtx insn) +{ + rtx cc0_user = NULL; + rtx body; + rtx set; + + gcc_assert (insn != NULL); + + if (!TARGET_V32) + return false; + + cc0_user = next_cc0_user (insn); + if (cc0_user == NULL) + return false; + + body = PATTERN (cc0_user); + set = single_set (cc0_user); + + /* Users can be sCC and bCC. */ + if (JUMP_P (cc0_user) + && GET_CODE (body) == SET + && SET_DEST (body) == pc_rtx + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE + && XEXP (XEXP (SET_SRC (body), 0), 0) == cc0_rtx) + { + return + GET_CODE (XEXP (SET_SRC (body), 0)) == GT + || GET_CODE (XEXP (SET_SRC (body), 0)) == LE; + } + else if (set) + { + return + GET_CODE (SET_SRC (body)) == GT + || GET_CODE (SET_SRC (body)) == LE; + } + + gcc_unreachable (); +} + +/* The function reg_overlap_mentioned_p in CVS (still as of 2001-05-16) + does not handle the case where the IN operand is strict_low_part; it + does handle it for X. Test-case in Axis-20010516. This function takes + care of that for THIS port. FIXME: strict_low_part is going away + anyway. */ + +static int +cris_reg_overlap_mentioned_p (rtx x, rtx in) +{ + /* The function reg_overlap_mentioned now handles when X is + strict_low_part, but not when IN is a STRICT_LOW_PART. */ + if (GET_CODE (in) == STRICT_LOW_PART) + in = XEXP (in, 0); + + return reg_overlap_mentioned_p (x, in); +} + +/* The TARGET_ASM_NAMED_SECTION worker. + We just dispatch to the functions for ELF and a.out. */ + +void +cris_target_asm_named_section (const char *name, unsigned int flags, + tree decl) +{ + if (! TARGET_ELF) + default_no_named_section (name, flags, decl); + else + default_elf_asm_named_section (name, flags, decl); +} + +/* Return TRUE iff X is a CONST valid for e.g. indexing. + ANY_OPERAND is 0 if X is in a CALL_P insn or movsi, 1 + elsewhere. */ + +bool +cris_valid_pic_const (rtx x, bool any_operand) +{ + gcc_assert (flag_pic); + + switch (GET_CODE (x)) + { + case CONST_INT: + case CONST_DOUBLE: + return true; + default: + ; + } + + if (GET_CODE (x) != CONST) + return false; + + x = XEXP (x, 0); + + /* Handle (const (plus (unspec .. UNSPEC_GOTREL) (const_int ...))). */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == UNSPEC + && (XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREL + || XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_PCREL) + && CONST_INT_P (XEXP (x, 1))) + x = XEXP (x, 0); + + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + /* A PCREL operand is only valid for call and movsi. */ + case CRIS_UNSPEC_PLT_PCREL: + case CRIS_UNSPEC_PCREL: + return !any_operand; + + case CRIS_UNSPEC_PLT_GOTREL: + case CRIS_UNSPEC_PLTGOTREAD: + case CRIS_UNSPEC_GOTREAD: + case CRIS_UNSPEC_GOTREL: + return true; + default: + gcc_unreachable (); + } + + return cris_pic_symbol_type_of (x) == cris_no_symbol; +} + +/* Helper function to find the right PIC-type symbol to generate, + given the original (non-PIC) representation. */ + +enum cris_pic_symbol_type +cris_pic_symbol_type_of (rtx x) +{ + switch (GET_CODE (x)) + { + case SYMBOL_REF: + return SYMBOL_REF_LOCAL_P (x) + ? cris_rel_symbol : cris_got_symbol; + + case LABEL_REF: + return cris_rel_symbol; + + case CONST: + return cris_pic_symbol_type_of (XEXP (x, 0)); + + case PLUS: + case MINUS: + { + enum cris_pic_symbol_type t1 = cris_pic_symbol_type_of (XEXP (x, 0)); + enum cris_pic_symbol_type t2 = cris_pic_symbol_type_of (XEXP (x, 1)); + + gcc_assert (t1 == cris_no_symbol || t2 == cris_no_symbol); + + if (t1 == cris_got_symbol || t1 == cris_got_symbol) + return cris_got_symbol_needing_fixup; + + return t1 != cris_no_symbol ? t1 : t2; + } + + case CONST_INT: + case CONST_DOUBLE: + return cris_no_symbol; + + case UNSPEC: + /* Likely an offsettability-test attempting to add a constant to + a GOTREAD symbol, which can't be handled. */ + return cris_invalid_pic_symbol; + + default: + fatal_insn ("unrecognized supposed constant", x); + } + + gcc_unreachable (); +} + +/* The LEGITIMATE_PIC_OPERAND_P worker. */ + +int +cris_legitimate_pic_operand (rtx x) +{ + /* Symbols are not valid PIC operands as-is; just constants. */ + return cris_valid_pic_const (x, true); +} + +/* The ASM_OUTPUT_CASE_END worker. */ + +void +cris_asm_output_case_end (FILE *stream, int num, rtx table) +{ + if (TARGET_V32) + { + rtx whole_jump_insn = PATTERN (PREV_INSN (PREV_INSN (table))); + + /* This can be a SEQUENCE, meaning the delay-slot of the jump is + filled. */ + rtx parallel_jump + = (GET_CODE (whole_jump_insn) == SEQUENCE + ? PATTERN (XVECEXP (whole_jump_insn, 0, 0)) : whole_jump_insn); + + asm_fprintf (stream, + "\t.word %LL%d-.%s\n", + CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (XVECEXP + (parallel_jump, 0, 0), + 1), 2), 0)), + (TARGET_PDEBUG ? "; default" : "")); + return; + } + + asm_fprintf (stream, + "\t.word %LL%d-%LL%d%s\n", + CODE_LABEL_NUMBER (XEXP + (XEXP + (XEXP + (XVECEXP + (PATTERN + (PREV_INSN + (PREV_INSN (table))), 0, 0), 1), + 2), 0)), + num, + (TARGET_PDEBUG ? "; default" : "")); +} + +/* TARGET_HANDLE_OPTION worker. We just store the values into local + variables here. Checks for correct semantics are in + cris_option_override. */ + +static bool +cris_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, + int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + case OPT_metrax100: + target_flags + |= (MASK_SVINTO + + MASK_ETRAX4_ADD + + MASK_ALIGN_BY_32); + break; + + case OPT_mno_etrax100: + target_flags + &= ~(MASK_SVINTO + + MASK_ETRAX4_ADD + + MASK_ALIGN_BY_32); + break; + + case OPT_m32_bit: + case OPT_m32bit: + target_flags + |= (MASK_STACK_ALIGN + + MASK_CONST_ALIGN + + MASK_DATA_ALIGN + + MASK_ALIGN_BY_32); + break; + + case OPT_m16_bit: + case OPT_m16bit: + target_flags + |= (MASK_STACK_ALIGN + + MASK_CONST_ALIGN + + MASK_DATA_ALIGN); + break; + + case OPT_m8_bit: + case OPT_m8bit: + target_flags + &= ~(MASK_STACK_ALIGN + + MASK_CONST_ALIGN + + MASK_DATA_ALIGN); + break; + + default: + break; + } + + CRIS_SUBTARGET_HANDLE_OPTION(code, arg, value); + + return true; +} + +/* The TARGET_OPTION_OVERRIDE worker. + As is the norm, this also parses -mfoo=bar type parameters. */ + +static void +cris_option_override (void) +{ + if (cris_max_stackframe_str) + { + cris_max_stackframe = atoi (cris_max_stackframe_str); + + /* Do some sanity checking. */ + if (cris_max_stackframe < 0 || cris_max_stackframe > 0x20000000) + internal_error ("-max-stackframe=%d is not usable, not between 0 and %d", + cris_max_stackframe, 0x20000000); + } + + /* Let "-metrax4" and "-metrax100" change the cpu version. */ + if (TARGET_SVINTO && cris_cpu_version < CRIS_CPU_SVINTO) + cris_cpu_version = CRIS_CPU_SVINTO; + else if (TARGET_ETRAX4_ADD && cris_cpu_version < CRIS_CPU_ETRAX4) + cris_cpu_version = CRIS_CPU_ETRAX4; + + /* Parse -march=... and its synonym, the deprecated -mcpu=... */ + if (cris_cpu_str) + { + cris_cpu_version + = (*cris_cpu_str == 'v' ? atoi (cris_cpu_str + 1) : -1); + + if (strcmp ("etrax4", cris_cpu_str) == 0) + cris_cpu_version = 3; + + if (strcmp ("svinto", cris_cpu_str) == 0 + || strcmp ("etrax100", cris_cpu_str) == 0) + cris_cpu_version = 8; + + if (strcmp ("ng", cris_cpu_str) == 0 + || strcmp ("etrax100lx", cris_cpu_str) == 0) + cris_cpu_version = 10; + + if (cris_cpu_version < 0 || cris_cpu_version > 32) + error ("unknown CRIS version specification in -march= or -mcpu= : %s", + cris_cpu_str); + + /* Set the target flags. */ + if (cris_cpu_version >= CRIS_CPU_ETRAX4) + target_flags |= MASK_ETRAX4_ADD; + + /* If this is Svinto or higher, align for 32 bit accesses. */ + if (cris_cpu_version >= CRIS_CPU_SVINTO) + target_flags + |= (MASK_SVINTO | MASK_ALIGN_BY_32 + | MASK_STACK_ALIGN | MASK_CONST_ALIGN + | MASK_DATA_ALIGN); + + /* Note that we do not add new flags when it can be completely + described with a macro that uses -mcpu=X. So + TARGET_HAS_MUL_INSNS is (cris_cpu_version >= CRIS_CPU_NG). */ + } + + if (cris_tune_str) + { + int cris_tune + = (*cris_tune_str == 'v' ? atoi (cris_tune_str + 1) : -1); + + if (strcmp ("etrax4", cris_tune_str) == 0) + cris_tune = 3; + + if (strcmp ("svinto", cris_tune_str) == 0 + || strcmp ("etrax100", cris_tune_str) == 0) + cris_tune = 8; + + if (strcmp ("ng", cris_tune_str) == 0 + || strcmp ("etrax100lx", cris_tune_str) == 0) + cris_tune = 10; + + if (cris_tune < 0 || cris_tune > 32) + error ("unknown CRIS cpu version specification in -mtune= : %s", + cris_tune_str); + + if (cris_tune >= CRIS_CPU_SVINTO) + /* We have currently nothing more to tune than alignment for + memory accesses. */ + target_flags + |= (MASK_STACK_ALIGN | MASK_CONST_ALIGN + | MASK_DATA_ALIGN | MASK_ALIGN_BY_32); + } + + if (cris_cpu_version >= CRIS_CPU_V32) + target_flags &= ~(MASK_SIDE_EFFECT_PREFIXES|MASK_MUL_BUG); + + if (flag_pic) + { + /* Use error rather than warning, so invalid use is easily + detectable. Still change to the values we expect, to avoid + further errors. */ + if (! TARGET_LINUX) + { + error ("-fPIC and -fpic are not supported in this configuration"); + flag_pic = 0; + } + + /* Turn off function CSE. We need to have the addresses reach the + call expanders to get PLT-marked, as they could otherwise be + compared against zero directly or indirectly. After visiting the + call expanders they will then be cse:ed, as the call expanders + force_reg the addresses, effectively forcing flag_no_function_cse + to 0. */ + flag_no_function_cse = 1; + } + + if (write_symbols == DWARF2_DEBUG && ! TARGET_ELF) + { + warning (0, "that particular -g option is invalid with -maout and -melinux"); + write_symbols = DBX_DEBUG; + } + + /* Set the per-function-data initializer. */ + init_machine_status = cris_init_machine_status; +} + +/* The TARGET_ASM_OUTPUT_MI_THUNK worker. */ + +static void +cris_asm_output_mi_thunk (FILE *stream, + tree thunkdecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree funcdecl) +{ + if (delta > 0) + fprintf (stream, "\tadd%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n", + ADDITIVE_SIZE_MODIFIER (delta), delta, + reg_names[CRIS_FIRST_ARG_REG]); + else if (delta < 0) + fprintf (stream, "\tsub%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n", + ADDITIVE_SIZE_MODIFIER (-delta), -delta, + reg_names[CRIS_FIRST_ARG_REG]); + + if (flag_pic) + { + const char *name = XSTR (XEXP (DECL_RTL (funcdecl), 0), 0); + + name = (* targetm.strip_name_encoding) (name); + + if (TARGET_V32) + { + fprintf (stream, "\tba "); + assemble_name (stream, name); + fprintf (stream, "%s\n", CRIS_PLT_PCOFFSET_SUFFIX); + } + else + { + fprintf (stream, "add.d "); + assemble_name (stream, name); + fprintf (stream, "%s,$pc\n", CRIS_PLT_PCOFFSET_SUFFIX); + } + } + else + { + fprintf (stream, "jump "); + assemble_name (stream, XSTR (XEXP (DECL_RTL (funcdecl), 0), 0)); + fprintf (stream, "\n"); + + if (TARGET_V32) + fprintf (stream, "\tnop\n"); + } +} + +/* Boilerplate emitted at start of file. + + NO_APP *only at file start* means faster assembly. It also means + comments are not allowed. In some cases comments will be output + for debugging purposes. Make sure they are allowed then. + + We want a .file directive only if TARGET_ELF. */ +static void +cris_file_start (void) +{ + /* These expressions can vary at run time, so we cannot put + them into TARGET_INITIALIZER. */ + targetm.asm_file_start_app_off = !(TARGET_PDEBUG || flag_print_asm_name); + targetm.asm_file_start_file_directive = TARGET_ELF; + + default_file_start (); +} + +/* Rename the function calls for integer multiply and divide. */ +static void +cris_init_libfuncs (void) +{ + set_optab_libfunc (smul_optab, SImode, "__Mul"); + set_optab_libfunc (sdiv_optab, SImode, "__Div"); + set_optab_libfunc (udiv_optab, SImode, "__Udiv"); + set_optab_libfunc (smod_optab, SImode, "__Mod"); + set_optab_libfunc (umod_optab, SImode, "__Umod"); +} + +/* The INIT_EXPANDERS worker sets the per-function-data initializer and + mark functions. */ + +void +cris_init_expanders (void) +{ + /* Nothing here at the moment. */ +} + +/* Zero initialization is OK for all current fields. */ + +static struct machine_function * +cris_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Split a 2 word move (DI or presumably DF) into component parts. + Originally a copy of gen_split_move_double in m32r.c. */ + +rtx +cris_split_movdx (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + rtx dest = operands[0]; + rtx src = operands[1]; + rtx val; + + /* We used to have to handle (SUBREG (MEM)) here, but that should no + longer happen; after reload there are no SUBREGs any more, and we're + only called after reload. */ + CRIS_ASSERT (GET_CODE (dest) != SUBREG && GET_CODE (src) != SUBREG); + + start_sequence (); + if (REG_P (dest)) + { + int dregno = REGNO (dest); + + /* Reg-to-reg copy. */ + if (REG_P (src)) + { + int sregno = REGNO (src); + + int reverse = (dregno == sregno + 1); + + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, reverse, TRUE, mode), + operand_subword (src, reverse, TRUE, mode))); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, !reverse, TRUE, mode), + operand_subword (src, !reverse, TRUE, mode))); + } + /* Constant-to-reg copy. */ + else if (CONST_INT_P (src) || GET_CODE (src) == CONST_DOUBLE) + { + rtx words[2]; + split_double (src, &words[0], &words[1]); + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 0, TRUE, mode), + words[0])); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 1, TRUE, mode), + words[1])); + } + /* Mem-to-reg copy. */ + else if (MEM_P (src)) + { + /* If the high-address word is used in the address, we must load it + last. Otherwise, load it first. */ + rtx addr = XEXP (src, 0); + int reverse + = (refers_to_regno_p (dregno, dregno + 1, addr, NULL) != 0); + + /* The original code implies that we can't do + move.x [rN+],rM move.x [rN],rM+1 + when rN is dead, because of REG_NOTES damage. That is + consistent with what I've seen, so don't try it. + + We have two different cases here; if the addr is POST_INC, + just pass it through, otherwise add constants. */ + + if (GET_CODE (addr) == POST_INC) + { + rtx mem; + rtx insn; + + /* Whenever we emit insns with post-incremented + addresses ourselves, we must add a post-inc note + manually. */ + mem = change_address (src, SImode, addr); + insn + = gen_rtx_SET (VOIDmode, + operand_subword (dest, 0, TRUE, mode), mem); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + + mem = copy_rtx (mem); + insn + = gen_rtx_SET (VOIDmode, + operand_subword (dest, 1, TRUE, mode), mem); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + } + else + { + /* Make sure we don't get any other addresses with + embedded postincrements. They should be stopped in + GO_IF_LEGITIMATE_ADDRESS, but we're here for your + safety. */ + if (side_effects_p (addr)) + fatal_insn ("unexpected side-effects in address", addr); + + emit_insn (gen_rtx_SET + (VOIDmode, + operand_subword (dest, reverse, TRUE, mode), + change_address + (src, SImode, + plus_constant (addr, + reverse * UNITS_PER_WORD)))); + emit_insn (gen_rtx_SET + (VOIDmode, + operand_subword (dest, ! reverse, TRUE, mode), + change_address + (src, SImode, + plus_constant (addr, + (! reverse) * + UNITS_PER_WORD)))); + } + } + else + internal_error ("unknown src"); + } + /* Reg-to-mem copy or clear mem. */ + else if (MEM_P (dest) + && (REG_P (src) + || src == const0_rtx + || src == CONST0_RTX (DFmode))) + { + rtx addr = XEXP (dest, 0); + + if (GET_CODE (addr) == POST_INC) + { + rtx mem; + rtx insn; + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + mem = change_address (dest, SImode, addr); + insn + = gen_rtx_SET (VOIDmode, + mem, operand_subword (src, 0, TRUE, mode)); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + + mem = copy_rtx (mem); + insn + = gen_rtx_SET (VOIDmode, + mem, + operand_subword (src, 1, TRUE, mode)); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + } + else + { + /* Make sure we don't get any other addresses with embedded + postincrements. They should be stopped in + GO_IF_LEGITIMATE_ADDRESS, but we're here for your safety. */ + if (side_effects_p (addr)) + fatal_insn ("unexpected side-effects in address", addr); + + emit_insn (gen_rtx_SET + (VOIDmode, + change_address (dest, SImode, addr), + operand_subword (src, 0, TRUE, mode))); + + emit_insn (gen_rtx_SET + (VOIDmode, + change_address (dest, SImode, + plus_constant (addr, + UNITS_PER_WORD)), + operand_subword (src, 1, TRUE, mode))); + } + } + + else + internal_error ("unknown dest"); + + val = get_insns (); + end_sequence (); + return val; +} + +/* The expander for the prologue pattern name. */ + +void +cris_expand_prologue (void) +{ + int regno; + int size = get_frame_size (); + /* Shorten the used name for readability. */ + int cfoa_size = crtl->outgoing_args_size; + int last_movem_reg = -1; + int framesize = 0; + rtx mem, insn; + int return_address_on_stack = cris_return_address_on_stack (); + int got_really_used = false; + int n_movem_regs = 0; + int pretend = crtl->args.pretend_args_size; + + /* Don't do anything if no prologues or epilogues are wanted. */ + if (!TARGET_PROLOGUE_EPILOGUE) + return; + + CRIS_ASSERT (size >= 0); + + if (crtl->uses_pic_offset_table) + { + /* A reference may have been optimized out (like the abort () in + fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that + it's still used. */ + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX); + pop_topmost_sequence (); + } + + /* Align the size to what's best for the CPU model. */ + if (TARGET_STACK_ALIGN) + size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1; + + if (pretend) + { + /* See also cris_setup_incoming_varargs where + cfun->machine->stdarg_regs is set. There are other setters of + crtl->args.pretend_args_size than stdarg handling, like + for an argument passed with parts in R13 and stack. We must + not store R13 into the pretend-area for that case, as GCC does + that itself. "Our" store would be marked as redundant and GCC + will attempt to remove it, which will then be flagged as an + internal error; trying to remove a frame-related insn. */ + int stdarg_regs = cfun->machine->stdarg_regs; + + framesize += pretend; + + for (regno = CRIS_FIRST_ARG_REG + CRIS_MAX_ARGS_IN_REGS - 1; + stdarg_regs > 0; + regno--, pretend -= 4, stdarg_regs--) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -4))); + /* FIXME: When dwarf2 frame output and unless asynchronous + exceptions, make dwarf2 bundle together all stack + adjustments like it does for registers between stack + adjustments. */ + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_varargs_alias_set ()); + insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno)); + + /* Note the absence of RTX_FRAME_RELATED_P on the above insn: + the value isn't restored, so we don't want to tell dwarf2 + that it's been stored to stack, else EH handling info would + get confused. */ + } + + /* For other setters of crtl->args.pretend_args_size, we + just adjust the stack by leaving the remaining size in + "pretend", handled below. */ + } + + /* Save SRP if not a leaf function. */ + if (return_address_on_stack) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -4 - pretend))); + pretend = 0; + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM)); + RTX_FRAME_RELATED_P (insn) = 1; + framesize += 4; + } + + /* Set up the frame pointer, if needed. */ + if (frame_pointer_needed) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -4 - pretend))); + pretend = 0; + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (mem, frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + framesize += 4; + } + + /* Between frame-pointer and saved registers lie the area for local + variables. If we get here with "pretended" size remaining, count + it into the general stack size. */ + size += pretend; + + /* Get a contiguous sequence of registers, starting with R0, that need + to be saved. */ + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + { + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + { + n_movem_regs++; + + /* Check if movem may be used for registers so far. */ + if (regno == last_movem_reg + 1) + /* Yes, update next expected register. */ + last_movem_reg = regno; + else + { + /* We cannot use movem for all registers. We have to flush + any movem:ed registers we got so far. */ + if (last_movem_reg != -1) + { + int n_saved + = (n_movem_regs == 1) ? 1 : last_movem_reg + 1; + + /* It is a win to use a side-effect assignment for + 64 <= size <= 128. But side-effect on movem was + not usable for CRIS v0..3. Also only do it if + side-effects insns are allowed. */ + if ((last_movem_reg + 1) * 4 + size >= 64 + && (last_movem_reg + 1) * 4 + size <= 128 + && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1) + && TARGET_SIDE_EFFECT_PREFIXES) + { + mem + = gen_rtx_MEM (SImode, + plus_constant (stack_pointer_rtx, + -(n_saved * 4 + size))); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn + = cris_emit_movem_store (mem, GEN_INT (n_saved), + -(n_saved * 4 + size), + true); + } + else + { + insn + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -(n_saved * 4 + size))); + insn = emit_insn (insn); + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = cris_emit_movem_store (mem, GEN_INT (n_saved), + 0, true); + } + + framesize += n_saved * 4 + size; + last_movem_reg = -1; + size = 0; + } + + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -4 - size))); + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno)); + RTX_FRAME_RELATED_P (insn) = 1; + + framesize += 4 + size; + size = 0; + } + } + } + + /* Check after, if we could movem all registers. This is the normal case. */ + if (last_movem_reg != -1) + { + int n_saved + = (n_movem_regs == 1) ? 1 : last_movem_reg + 1; + + /* Side-effect on movem was not usable for CRIS v0..3. Also only + do it if side-effects insns are allowed. */ + if ((last_movem_reg + 1) * 4 + size >= 64 + && (last_movem_reg + 1) * 4 + size <= 128 + && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1) + && TARGET_SIDE_EFFECT_PREFIXES) + { + mem + = gen_rtx_MEM (SImode, + plus_constant (stack_pointer_rtx, + -(n_saved * 4 + size))); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = cris_emit_movem_store (mem, GEN_INT (n_saved), + -(n_saved * 4 + size), true); + } + else + { + insn + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -(n_saved * 4 + size))); + insn = emit_insn (insn); + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = cris_emit_movem_store (mem, GEN_INT (n_saved), 0, true); + } + + framesize += n_saved * 4 + size; + /* We have to put outgoing argument space after regs. */ + if (cfoa_size) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -cfoa_size))); + RTX_FRAME_RELATED_P (insn) = 1; + framesize += cfoa_size; + } + } + else if ((size + cfoa_size) > 0) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + -(cfoa_size + size)))); + RTX_FRAME_RELATED_P (insn) = 1; + framesize += size + cfoa_size; + } + + /* Set up the PIC register, if it is used. */ + if (got_really_used) + { + rtx got + = gen_rtx_UNSPEC (SImode, gen_rtvec (1, const0_rtx), CRIS_UNSPEC_GOT); + emit_move_insn (pic_offset_table_rtx, got); + + /* FIXME: This is a cover-up for flow2 messing up; it doesn't + follow exceptional paths and tries to delete the GOT load as + unused, if it isn't used on the non-exceptional paths. Other + ports have similar or other cover-ups, or plain bugs marking + the GOT register load as maybe-dead. To see this, remove the + line below and try libsupc++/vec.cc or a trivial + "static void y (); void x () {try {y ();} catch (...) {}}". */ + emit_use (pic_offset_table_rtx); + } + + if (cris_max_stackframe && framesize > cris_max_stackframe) + warning (0, "stackframe too big: %d bytes", framesize); +} + +/* The expander for the epilogue pattern. */ + +void +cris_expand_epilogue (void) +{ + int regno; + int size = get_frame_size (); + int last_movem_reg = -1; + int argspace_offset = crtl->outgoing_args_size; + int pretend = crtl->args.pretend_args_size; + rtx mem; + bool return_address_on_stack = cris_return_address_on_stack (); + /* A reference may have been optimized out + (like the abort () in fde_split in unwind-dw2-fde.c, at least 3.2.1) + so check that it's still used. */ + int got_really_used = false; + int n_movem_regs = 0; + + if (!TARGET_PROLOGUE_EPILOGUE) + return; + + if (crtl->uses_pic_offset_table) + { + /* A reference may have been optimized out (like the abort () in + fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that + it's still used. */ + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX); + pop_topmost_sequence (); + } + + /* Align byte count of stack frame. */ + if (TARGET_STACK_ALIGN) + size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1; + + /* Check how many saved regs we can movem. They start at r0 and must + be contiguous. */ + for (regno = 0; + regno < FIRST_PSEUDO_REGISTER; + regno++) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + { + n_movem_regs++; + + if (regno == last_movem_reg + 1) + last_movem_reg = regno; + else + break; + } + + /* If there was only one register that really needed to be saved + through movem, don't use movem. */ + if (n_movem_regs == 1) + last_movem_reg = -1; + + /* Now emit "normal" move insns for all regs higher than the movem + regs. */ + for (regno = FIRST_PSEUDO_REGISTER - 1; + regno > last_movem_reg; + regno--) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + { + rtx insn; + + if (argspace_offset) + { + /* There is an area for outgoing parameters located before + the saved registers. We have to adjust for that. */ + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + argspace_offset))); + /* Make sure we only do this once. */ + argspace_offset = 0; + } + + mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (gen_rtx_raw_REG (SImode, regno), mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + + /* If we have any movem-restore, do it now. */ + if (last_movem_reg != -1) + { + rtx insn; + + if (argspace_offset) + { + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + argspace_offset))); + argspace_offset = 0; + } + + mem = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn + = emit_insn (cris_gen_movem_load (mem, + GEN_INT (last_movem_reg + 1), 0)); + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + if (side_effects_p (PATTERN (insn))) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + + /* If we don't clobber all of the allocated stack area (we've already + deallocated saved registers), GCC might want to schedule loads from + the stack to *after* the stack-pointer restore, which introduces an + interrupt race condition. This happened for the initial-value + SRP-restore for g++.dg/eh/registers1.C (noticed by inspection of + other failure for that test). It also happened for the stack slot + for the return value in (one version of) + linux/fs/dcache.c:__d_lookup, at least with "-O2 + -fno-omit-frame-pointer". */ + + /* Restore frame pointer if necessary. */ + if (frame_pointer_needed) + { + rtx insn; + + emit_insn (gen_cris_frame_deallocated_barrier ()); + + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (frame_pointer_rtx, mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + else if ((size + argspace_offset) != 0) + { + emit_insn (gen_cris_frame_deallocated_barrier ()); + + /* If there was no frame-pointer to restore sp from, we must + explicitly deallocate local variables. */ + + /* Handle space for outgoing parameters that hasn't been handled + yet. */ + size += argspace_offset; + + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, size))); + } + + /* If this function has no pushed register parameters + (stdargs/varargs), and if it is not a leaf function, then we have + the return address on the stack. */ + if (return_address_on_stack && pretend == 0) + { + if (TARGET_V32 || crtl->calls_eh_return) + { + rtx mem; + rtx insn; + rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM); + mem = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (srpreg, mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_raw_REG (SImode, + CRIS_STACKADJ_REG))); + cris_expand_return (false); + } + else + cris_expand_return (true); + + return; + } + + /* If we pushed some register parameters, then adjust the stack for + them. */ + if (pretend != 0) + { + /* If SRP is stored on the way, we need to restore it first. */ + if (return_address_on_stack) + { + rtx mem; + rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM); + rtx insn; + + mem = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (srpreg, mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, pretend))); + } + + /* Perform the "physical" unwinding that the EH machinery calculated. */ + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_raw_REG (SImode, + CRIS_STACKADJ_REG))); + cris_expand_return (false); +} + +/* Worker function for generating movem from mem for load_multiple. */ + +rtx +cris_gen_movem_load (rtx src, rtx nregs_rtx, int nprefix) +{ + int nregs = INTVAL (nregs_rtx); + rtvec vec; + int eltno = 1; + int i; + rtx srcreg = XEXP (src, 0); + unsigned int regno = nregs - 1; + int regno_inc = -1; + + if (TARGET_V32) + { + regno = 0; + regno_inc = 1; + } + + if (GET_CODE (srcreg) == POST_INC) + srcreg = XEXP (srcreg, 0); + + CRIS_ASSERT (REG_P (srcreg)); + + /* Don't use movem for just one insn. The insns are equivalent except + for the pipeline hazard (on v32); movem does not forward the loaded + registers so there's a three cycles penalty for their use. */ + if (nregs == 1) + return gen_movsi (gen_rtx_REG (SImode, 0), src); + + vec = rtvec_alloc (nprefix + nregs + + (GET_CODE (XEXP (src, 0)) == POST_INC)); + + if (GET_CODE (XEXP (src, 0)) == POST_INC) + { + RTVEC_ELT (vec, nprefix + 1) + = gen_rtx_SET (VOIDmode, srcreg, plus_constant (srcreg, nregs * 4)); + eltno++; + } + + src = replace_equiv_address (src, srcreg); + RTVEC_ELT (vec, nprefix) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno), src); + regno += regno_inc; + + for (i = 1; i < nregs; i++, eltno++) + { + RTVEC_ELT (vec, nprefix + eltno) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno), + adjust_address_nv (src, SImode, i * 4)); + regno += regno_inc; + } + + return gen_rtx_PARALLEL (VOIDmode, vec); +} + +/* Worker function for generating movem to mem. If FRAME_RELATED, notes + are added that the dwarf2 machinery understands. */ + +rtx +cris_emit_movem_store (rtx dest, rtx nregs_rtx, int increment, + bool frame_related) +{ + int nregs = INTVAL (nregs_rtx); + rtvec vec; + int eltno = 1; + int i; + rtx insn; + rtx destreg = XEXP (dest, 0); + unsigned int regno = nregs - 1; + int regno_inc = -1; + + if (TARGET_V32) + { + regno = 0; + regno_inc = 1; + } + + if (GET_CODE (destreg) == POST_INC) + increment += nregs * 4; + + if (GET_CODE (destreg) == POST_INC || GET_CODE (destreg) == PLUS) + destreg = XEXP (destreg, 0); + + CRIS_ASSERT (REG_P (destreg)); + + /* Don't use movem for just one insn. The insns are equivalent except + for the pipeline hazard (on v32); movem does not forward the loaded + registers so there's a three cycles penalty for use. */ + if (nregs == 1) + { + rtx mov = gen_rtx_SET (VOIDmode, dest, gen_rtx_REG (SImode, 0)); + + if (increment == 0) + { + insn = emit_insn (mov); + if (frame_related) + RTX_FRAME_RELATED_P (insn) = 1; + return insn; + } + + /* If there was a request for a side-effect, create the ordinary + parallel. */ + vec = rtvec_alloc (2); + + RTVEC_ELT (vec, 0) = mov; + RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, destreg, + plus_constant (destreg, increment)); + if (frame_related) + { + RTX_FRAME_RELATED_P (mov) = 1; + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1; + } + } + else + { + vec = rtvec_alloc (nregs + (increment != 0 ? 1 : 0)); + RTVEC_ELT (vec, 0) + = gen_rtx_SET (VOIDmode, + replace_equiv_address (dest, + plus_constant (destreg, + increment)), + gen_rtx_REG (SImode, regno)); + regno += regno_inc; + + /* The dwarf2 info wants this mark on each component in a parallel + that's part of the prologue (though it's optional on the first + component). */ + if (frame_related) + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 0)) = 1; + + if (increment != 0) + { + RTVEC_ELT (vec, 1) + = gen_rtx_SET (VOIDmode, destreg, + plus_constant (destreg, + increment != 0 + ? increment : nregs * 4)); + eltno++; + + if (frame_related) + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1; + + /* Don't call adjust_address_nv on a post-incremented address if + we can help it. */ + if (GET_CODE (XEXP (dest, 0)) == POST_INC) + dest = replace_equiv_address (dest, destreg); + } + + for (i = 1; i < nregs; i++, eltno++) + { + RTVEC_ELT (vec, eltno) + = gen_rtx_SET (VOIDmode, adjust_address_nv (dest, SImode, i * 4), + gen_rtx_REG (SImode, regno)); + if (frame_related) + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, eltno)) = 1; + regno += regno_inc; + } + } + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, vec)); + + /* Because dwarf2out.c handles the insns in a parallel as a sequence, + we need to keep the stack adjustment separate, after the + MEM-setters. Else the stack-adjustment in the second component of + the parallel would be mishandled; the offsets for the SETs that + follow it would be wrong. We prepare for this by adding a + REG_FRAME_RELATED_EXPR with the MEM-setting parts in a SEQUENCE + followed by the increment. Note that we have FRAME_RELATED_P on + all the SETs, including the original stack adjustment SET in the + parallel. */ + if (frame_related) + { + if (increment != 0) + { + rtx seq = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (nregs + 1)); + XVECEXP (seq, 0, 0) = copy_rtx (XVECEXP (PATTERN (insn), 0, 0)); + for (i = 1; i < nregs; i++) + XVECEXP (seq, 0, i) + = copy_rtx (XVECEXP (PATTERN (insn), 0, i + 1)); + XVECEXP (seq, 0, nregs) = copy_rtx (XVECEXP (PATTERN (insn), 0, 1)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, seq); + } + + RTX_FRAME_RELATED_P (insn) = 1; + } + + return insn; +} + +/* Worker function for expanding the address for PIC function calls. */ + +void +cris_expand_pic_call_address (rtx *opp) +{ + rtx op = *opp; + + gcc_assert (MEM_P (op)); + op = XEXP (op, 0); + + /* It might be that code can be generated that jumps to 0 (or to a + specific address). Don't die on that. (There is a + testcase.) */ + if (CONSTANT_ADDRESS_P (op) && !CONST_INT_P (op)) + { + enum cris_pic_symbol_type t = cris_pic_symbol_type_of (op); + + CRIS_ASSERT (can_create_pseudo_p ()); + + /* For local symbols (non-PLT), just get the plain symbol + reference into a register. For symbols that can be PLT, make + them PLT. */ + if (t == cris_rel_symbol) + { + /* For v32, we're fine as-is; just PICify the symbol. Forcing + into a register caused performance regression for 3.2.1, + observable in __floatdidf and elsewhere in libgcc. */ + if (TARGET_V32) + { + rtx sym = GET_CODE (op) != CONST ? op : get_related_value (op); + HOST_WIDE_INT offs = get_integer_term (op); + + /* We can't get calls to sym+N, N integer, can we? */ + gcc_assert (offs == 0); + + op = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), + CRIS_UNSPEC_PCREL)); + } + else + op = force_reg (Pmode, op); + } + else if (t == cris_got_symbol) + { + if (TARGET_AVOID_GOTPLT) + { + /* Change a "jsr sym" into (allocate register rM, rO) + "move.d (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_GOTREL)),rM" + "add.d rPIC,rM,rO", "jsr rO" for pre-v32 and + "jsr (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_PCREL))" + for v32. */ + rtx tem, rm, ro; + gcc_assert (can_create_pseudo_p ()); + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), + TARGET_V32 + ? CRIS_UNSPEC_PLT_PCREL + : CRIS_UNSPEC_PLT_GOTREL); + tem = gen_rtx_CONST (Pmode, tem); + if (TARGET_V32) + op = tem; + else + { + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, tem); + ro = gen_reg_rtx (Pmode); + if (expand_binop (Pmode, add_optab, rm, + pic_offset_table_rtx, + ro, 0, OPTAB_LIB_WIDEN) != ro) + internal_error ("expand_binop failed in movsi got"); + op = ro; + } + } + else + { + /* Change a "jsr sym" into (allocate register rM, rO) + "move.d (const (unspec [sym] CRIS_UNSPEC_PLTGOTREAD)),rM" + "add.d rPIC,rM,rO" "jsr [rO]" with the memory access + marked as not trapping and not aliasing. No "move.d + [rO],rP" as that would invite to re-use of a value + that should not be reused. FIXME: Need a peephole2 + for cases when this is cse:d from the call, to change + back to just get the PLT entry address, so we don't + resolve the same symbol over and over (the memory + access of the PLTGOT isn't constant). */ + rtx tem, mem, rm, ro; + + gcc_assert (can_create_pseudo_p ()); + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), + CRIS_UNSPEC_PLTGOTREAD); + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + ro = gen_reg_rtx (Pmode); + if (expand_binop (Pmode, add_optab, rm, + pic_offset_table_rtx, + ro, 0, OPTAB_LIB_WIDEN) != ro) + internal_error ("expand_binop failed in movsi got"); + mem = gen_rtx_MEM (Pmode, ro); + + /* This MEM doesn't alias anything. Whether it aliases + other same symbols is unimportant. */ + set_mem_alias_set (mem, new_alias_set ()); + MEM_NOTRAP_P (mem) = 1; + op = mem; + } + } + else + /* Can't possibly get a GOT-needing-fixup for a function-call, + right? */ + fatal_insn ("unidentifiable call op", op); + + *opp = replace_equiv_address (*opp, op); + } +} + +/* Make sure operands are in the right order for an addsi3 insn as + generated by a define_split. Nothing but REG_P as the first + operand is recognized by addsi3 after reload. OPERANDS contains + the operands, with the first at OPERANDS[N] and the second at + OPERANDS[N+1]. */ + +void +cris_order_for_addsi3 (rtx *operands, int n) +{ + if (!REG_P (operands[n])) + { + rtx tem = operands[n]; + operands[n] = operands[n + 1]; + operands[n + 1] = tem; + } +} + +/* Use from within code, from e.g. PRINT_OPERAND and + PRINT_OPERAND_ADDRESS. Macros used in output_addr_const need to emit + different things depending on whether code operand or constant is + emitted. */ + +static void +cris_output_addr_const (FILE *file, rtx x) +{ + in_code++; + output_addr_const (file, x); + in_code--; +} + +/* Worker function for ASM_OUTPUT_SYMBOL_REF. */ + +void +cris_asm_output_symbol_ref (FILE *file, rtx x) +{ + gcc_assert (GET_CODE (x) == SYMBOL_REF); + + if (flag_pic && in_code > 0) + { + const char *origstr = XSTR (x, 0); + const char *str; + str = (* targetm.strip_name_encoding) (origstr); + assemble_name (file, str); + + /* Sanity check. */ + if (!TARGET_V32 && !crtl->uses_pic_offset_table) + output_operand_lossage ("PIC register isn't set up"); + } + else + assemble_name (file, XSTR (x, 0)); +} + +/* Worker function for ASM_OUTPUT_LABEL_REF. */ + +void +cris_asm_output_label_ref (FILE *file, char *buf) +{ + if (flag_pic && in_code > 0) + { + assemble_name (file, buf); + + /* Sanity check. */ + if (!TARGET_V32 && !crtl->uses_pic_offset_table) + internal_error ("emitting PIC operand, but PIC register " + "isn%'t set up"); + } + else + assemble_name (file, buf); +} + +/* Worker function for OUTPUT_ADDR_CONST_EXTRA. */ + +bool +cris_output_addr_const_extra (FILE *file, rtx xconst) +{ + switch (GET_CODE (xconst)) + { + rtx x; + + case UNSPEC: + x = XVECEXP (xconst, 0, 0); + CRIS_ASSERT (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF + || GET_CODE (x) == CONST); + output_addr_const (file, x); + switch (XINT (xconst, 1)) + { + case CRIS_UNSPEC_PCREL: + /* We only get this with -fpic/PIC to tell it apart from an + invalid symbol. We can't tell here, but it should only + be the operand of a call or movsi. */ + gcc_assert (TARGET_V32 && flag_pic); + break; + + case CRIS_UNSPEC_PLT_PCREL: + gcc_assert (TARGET_V32); + fprintf (file, ":PLT"); + break; + + case CRIS_UNSPEC_PLT_GOTREL: + gcc_assert (!TARGET_V32); + fprintf (file, ":PLTG"); + break; + + case CRIS_UNSPEC_GOTREL: + gcc_assert (!TARGET_V32); + fprintf (file, ":GOTOFF"); + break; + + case CRIS_UNSPEC_GOTREAD: + if (flag_pic == 1) + fprintf (file, ":GOT16"); + else + fprintf (file, ":GOT"); + break; + + case CRIS_UNSPEC_PLTGOTREAD: + if (flag_pic == 1) + fprintf (file, CRIS_GOTPLT_SUFFIX "16"); + else + fprintf (file, CRIS_GOTPLT_SUFFIX); + break; + + default: + gcc_unreachable (); + } + return true; + + default: + return false; + } +} + +/* Worker function for TARGET_STRUCT_VALUE_RTX. */ + +static rtx +cris_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, CRIS_STRUCT_VALUE_REGNUM); +} + +/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ + +static void +cris_setup_incoming_varargs (CUMULATIVE_ARGS *ca, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED, + int *pretend_arg_size, + int second_time) +{ + if (ca->regs < CRIS_MAX_ARGS_IN_REGS) + { + int stdarg_regs = CRIS_MAX_ARGS_IN_REGS - ca->regs; + cfun->machine->stdarg_regs = stdarg_regs; + *pretend_arg_size = stdarg_regs * 4; + } + + if (TARGET_PDEBUG) + fprintf (asm_out_file, + "\n; VA:: ANSI: %d args before, anon @ #%d, %dtime\n", + ca->regs, *pretend_arg_size, second_time); +} + +/* Return true if TYPE must be passed by invisible reference. + For cris, we pass <= 8 bytes by value, others by reference. */ + +static bool +cris_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + return (targetm.calls.must_pass_in_stack (mode, type) + || CRIS_FUNCTION_ARG_SIZE (mode, type) > 8); +} + +/* A combination of defining TARGET_PROMOTE_FUNCTION_MODE, promoting arguments + and *not* defining TARGET_PROMOTE_PROTOTYPES or PROMOTE_MODE gives the + best code size and speed for gcc, ipps and products in gcc-2.7.2. */ + +enum machine_mode +cris_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return) +{ + /* Defining PROMOTE_FUNCTION_RETURN in gcc-2.7.2 uncovered bug 981110 (even + when modifying TARGET_FUNCTION_VALUE to return the promoted mode). + Maybe pointless as of now, but let's keep the old behavior. */ + if (for_return == 1) + return mode; + return CRIS_PROMOTED_MODE (mode, *punsignedp, type); +} + +/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the + time being. */ + +static rtx +cris_function_value(const_tree type, + const_tree func ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (TYPE_MODE (type), CRIS_FIRST_ARG_REG); +} + +/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the + time being. */ + +static rtx +cris_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG); +} + +/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the + time being. */ + +bool +cris_function_value_regno_p (const unsigned int regno) +{ + return (regno == CRIS_FIRST_ARG_REG); +} + +static int +cris_arg_partial_bytes (CUMULATIVE_ARGS *ca, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + if (ca->regs == CRIS_MAX_ARGS_IN_REGS - 1 + && !targetm.calls.must_pass_in_stack (mode, type) + && CRIS_FUNCTION_ARG_SIZE (mode, type) > 4 + && CRIS_FUNCTION_ARG_SIZE (mode, type) <= 8) + return UNITS_PER_WORD; + else + return 0; +} + +static rtx +cris_function_arg_1 (const CUMULATIVE_ARGS *ca, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED, + bool named, bool incoming) +{ + if ((!incoming || named) && ca->regs < CRIS_MAX_ARGS_IN_REGS) + return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG + ca->regs); + else + return NULL_RTX; +} + +/* Worker function for TARGET_FUNCTION_ARG. + The void_type_node is sent as a "closing" call. */ + +static rtx +cris_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode, + const_tree type, bool named) +{ + return cris_function_arg_1 (ca, mode, type, named, false); +} + +/* Worker function for TARGET_FUNCTION_INCOMING_ARG. + + The differences between this and the previous, is that this one checks + that an argument is named, since incoming stdarg/varargs arguments are + pushed onto the stack, and we don't have to check against the "closing" + void_type_node TYPE parameter. */ + +static rtx +cris_function_incoming_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode, + const_tree type, bool named) +{ + return cris_function_arg_1 (ca, mode, type, named, true); +} + +/* Worker function for TARGET_FUNCTION_ARG_ADVANCE. */ + +static void +cris_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + ca->regs += (3 + CRIS_FUNCTION_ARG_SIZE (mode, type)) / 4; +} + +/* Worker function for TARGET_MD_ASM_CLOBBERS. */ + +static tree +cris_md_asm_clobbers (tree outputs, tree inputs, tree in_clobbers) +{ + HARD_REG_SET mof_set; + tree clobbers; + tree t; + + CLEAR_HARD_REG_SET (mof_set); + SET_HARD_REG_BIT (mof_set, CRIS_MOF_REGNUM); + + /* For the time being, all asms clobber condition codes. Revisit when + there's a reasonable use for inputs/outputs that mention condition + codes. */ + clobbers + = tree_cons (NULL_TREE, + build_string (strlen (reg_names[CRIS_CC0_REGNUM]), + reg_names[CRIS_CC0_REGNUM]), + in_clobbers); + + for (t = outputs; t != NULL; t = TREE_CHAIN (t)) + { + tree val = TREE_VALUE (t); + + /* The constraint letter for the singleton register class of MOF + is 'h'. If it's mentioned in the constraints, the asm is + MOF-aware and adding it to the clobbers would cause it to have + impossible constraints. */ + if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))), + 'h') != NULL + || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE) + return clobbers; + } + + for (t = inputs; t != NULL; t = TREE_CHAIN (t)) + { + tree val = TREE_VALUE (t); + + if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))), + 'h') != NULL + || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE) + return clobbers; + } + + return tree_cons (NULL_TREE, + build_string (strlen (reg_names[CRIS_MOF_REGNUM]), + reg_names[CRIS_MOF_REGNUM]), + clobbers); +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. + + Really only needed if the stack frame has variable length (alloca + or variable sized local arguments (GNU C extension). See PR39499 and + PR38609 for the reason this isn't just 0. */ + +bool +cris_frame_pointer_required (void) +{ + return !current_function_sp_is_unchanging; +} + +/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE. + + This looks too complicated, and it is. I assigned r7 to be the + static chain register, but it is call-saved, so we have to save it, + and come back to restore it after the call, so we have to save srp... + Anyway, trampolines are rare enough that we can cope with this + somewhat lack of elegance. + (Do not be tempted to "straighten up" whitespace in the asms; the + assembler #NO_APP state mandates strict spacing). */ +/* ??? See the i386 regparm=3 implementation that pushes the static + chain value to the stack in the trampoline, and uses a call-saved + register when called directly. */ + +static void +cris_asm_trampoline_template (FILE *f) +{ + if (TARGET_V32) + { + /* This normally-unused nop insn acts as an instruction to + the simulator to flush its instruction cache. None of + the other instructions in the trampoline template suits + as a trigger for V32. The pc-relative addressing mode + works nicely as a trigger for V10. + FIXME: Have specific V32 template (possibly avoiding the + use of a special instruction). */ + fprintf (f, "\tclearf x\n"); + /* We have to use a register as an intermediate, choosing + semi-randomly R1 (which has to not be the STATIC_CHAIN_REGNUM), + so we can use it for address indirection and jsr target. */ + fprintf (f, "\tmove $r1,$mof\n"); + /* +4 */ + fprintf (f, "\tmove.d 0,$r1\n"); + fprintf (f, "\tmove.d $%s,[$r1]\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\taddq 6,$r1\n"); + fprintf (f, "\tmove $mof,[$r1]\n"); + fprintf (f, "\taddq 6,$r1\n"); + fprintf (f, "\tmove $srp,[$r1]\n"); + /* +20 */ + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + /* +26 */ + fprintf (f, "\tmove.d 0,$r1\n"); + fprintf (f, "\tjsr $r1\n"); + fprintf (f, "\tsetf\n"); + /* +36 */ + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + /* +42 */ + fprintf (f, "\tmove.d 0,$r1\n"); + /* +48 */ + fprintf (f, "\tmove.d 0,$r9\n"); + fprintf (f, "\tjump $r9\n"); + fprintf (f, "\tsetf\n"); + } + else + { + fprintf (f, "\tmove.d $%s,[$pc+20]\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\tmove $srp,[$pc+22]\n"); + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\tjsr 0\n"); + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\tjump 0\n"); + } +} + +/* Implement TARGET_TRAMPOLINE_INIT. */ + +static void +cris_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx tramp = XEXP (m_tramp, 0); + rtx mem; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + if (TARGET_V32) + { + mem = adjust_address (m_tramp, SImode, 6); + emit_move_insn (mem, plus_constant (tramp, 38)); + mem = adjust_address (m_tramp, SImode, 22); + emit_move_insn (mem, chain_value); + mem = adjust_address (m_tramp, SImode, 28); + emit_move_insn (mem, fnaddr); + } + else + { + mem = adjust_address (m_tramp, SImode, 10); + emit_move_insn (mem, chain_value); + mem = adjust_address (m_tramp, SImode, 16); + emit_move_insn (mem, fnaddr); + } + + /* Note that there is no need to do anything with the cache for + sake of a trampoline. */ +} + + +#if 0 +/* Various small functions to replace macros. Only called from a + debugger. They might collide with gcc functions or system functions, + so only emit them when '#if 1' above. */ + +enum rtx_code Get_code (rtx); + +enum rtx_code +Get_code (rtx x) +{ + return GET_CODE (x); +} + +const char *Get_mode (rtx); + +const char * +Get_mode (rtx x) +{ + return GET_MODE_NAME (GET_MODE (x)); +} + +rtx Xexp (rtx, int); + +rtx +Xexp (rtx x, int n) +{ + return XEXP (x, n); +} + +rtx Xvecexp (rtx, int, int); + +rtx +Xvecexp (rtx x, int n, int m) +{ + return XVECEXP (x, n, m); +} + +int Get_rtx_len (rtx); + +int +Get_rtx_len (rtx x) +{ + return GET_RTX_LENGTH (GET_CODE (x)); +} + +/* Use upper-case to distinguish from local variables that are sometimes + called next_insn and prev_insn. */ + +rtx Next_insn (rtx); + +rtx +Next_insn (rtx insn) +{ + return NEXT_INSN (insn); +} + +rtx Prev_insn (rtx); + +rtx +Prev_insn (rtx insn) +{ + return PREV_INSN (insn); +} +#endif + +#include "gt-cris.h" + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h new file mode 100644 index 000000000..0e69e6948 --- /dev/null +++ b/gcc/config/cris/cris.h @@ -0,0 +1,1335 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, + 2009, 2010, 2011 Free Software Foundation, Inc. + Contributed by Axis Communications. Written by Hans-Peter Nilsson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* After the first "Node:" comment comes all preprocessor directives and + attached declarations described in the info files, the "Using and + Porting GCC" manual (uapgcc), in the same order as found in the "Target + macros" section in the gcc-2.9x CVS edition of 2000-03-17. FIXME: Not + really, but needs an update anyway. + + There is no generic copy-of-uapgcc comment, you'll have to see uapgcc + for that. If applicable, there is a CRIS-specific comment. The order + of macro definitions follow the order in the manual. Every section in + the manual (node in the info pages) has an introductory `Node: + ' comment. If no macros are defined for a section, only + the section-comment is present. */ + +/* Note that other header files (e.g. config/elfos.h, config/linux.h, + config/cris/linux.h and config/cris/aout.h) are responsible for lots of + settings not repeated below. This file contains general CRIS + definitions and definitions for the cris-*-elf subtarget. */ + +/* We don't want to use gcc_assert for everything, as that can be + compiled out. */ +#define CRIS_ASSERT(x) \ + do { if (!(x)) internal_error ("CRIS-port assertion failed: " #x); } while (0) + +/* Replacement for REG_P since it does not match SUBREGs. Happens for + testcase Axis-20000320 with gcc-2.9x. */ +#define REG_S_P(x) \ + (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (XEXP (x, 0)))) + +/* Last register in main register bank r0..r15. */ +#define CRIS_LAST_GENERAL_REGISTER 15 + +/* Descriptions of registers used for arguments. */ +#define CRIS_FIRST_ARG_REG 10 +#define CRIS_MAX_ARGS_IN_REGS 4 + +/* See also *_REGNUM constants in cris.md. */ + +/* Most of the time, we need the index into the register-names array. + When passing debug-info, we need the real hardware register number. */ +#define CRIS_CANONICAL_SRP_REGNUM (16 + 11) +#define CRIS_CANONICAL_MOF_REGNUM (16 + 7) +/* We have CCR in all models including v10, but that's 16 bits, so let's + prefer the DCCR number, which is a DMA pointer in pre-v8, so we'll + never clash with it for GCC purposes. */ +#define CRIS_CANONICAL_CC0_REGNUM (16 + 13) + +/* When generating PIC, these suffixes are added to the names of non-local + functions when being output. Contrary to other ports, we have offsets + relative to the GOT, not the PC. We might implement PC-relative PLT + semantics later for the general case; they are used in some cases right + now, such as MI thunks. */ +#define CRIS_GOTPLT_SUFFIX ":GOTPLT" +#define CRIS_PLT_GOTOFFSET_SUFFIX ":PLTG" +#define CRIS_PLT_PCOFFSET_SUFFIX ":PLT" + +#define CRIS_FUNCTION_ARG_SIZE(MODE, TYPE) \ + ((MODE) != BLKmode ? GET_MODE_SIZE (MODE) \ + : (unsigned) int_size_in_bytes (TYPE)) + +/* Which CPU version this is. The parsed and adjusted cris_cpu_str. */ +extern int cris_cpu_version; + +/* Changing the order used to be necessary to put the fourth __make_dp + argument (a DImode parameter) in registers, to fit with the libfunc + parameter passing scheme used for intrinsic functions. FIXME: Check + performance and maybe remove definition from TARGET_LIBGCC2_CFLAGS now + that it isn't strictly necessary. We used to do this through + TARGET_LIBGCC2_CFLAGS, but that became increasingly difficult as the + parenthesis (that needed quoting) travels through several layers of + make and shell invocations. */ +#ifdef IN_LIBGCC2 +#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c) +#endif + + +/* Node: Driver */ + +/* Also provide canonical vN definitions when user specifies an alias. + Note that -melf overrides -maout. */ + +#define CPP_SPEC \ + "%{mtune=*:-D__tune_%* %{mtune=v*:-D__CRIS_arch_tune=%*}\ + %{mtune=etrax4:-D__tune_v3 -D__CRIS_arch_tune=3}\ + %{mtune=etrax100:-D__tune_v8 -D__CRIS_arch_tune=8}\ + %{mtune=svinto:-D__tune_v8 -D__CRIS_arch_tune=8}\ + %{mtune=etrax100lx:-D__tune_v10 -D__CRIS_arch_tune=10}\ + %{mtune=ng:-D__tune_v10 -D__CRIS_arch_tune=10}}\ + %{mcpu=*:-D__arch_%* %{mcpu=v*:-D__CRIS_arch_version=%*}\ + %{mcpu=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\ + %{mcpu=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{mcpu=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{mcpu=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\ + %{mcpu=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\ + %{march=*:-D__arch_%* %{march=v*:-D__CRIS_arch_version=%*}\ + %{march=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\ + %{march=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{march=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{march=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\ + %{march=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\ + %{metrax100:-D__arch__v8 -D__CRIS_arch_version=8}\ + %{metrax4:-D__arch__v3 -D__CRIS_arch_version=3}\ + %(cpp_subtarget)" + +/* For the cris-*-elf subtarget. */ + +#define CRIS_DEFAULT_TUNE "10" +#define CRIS_ARCH_CPP_DEFAULT +#define CRIS_DEFAULT_ASM_ARCH_OPTION "" + +#ifdef TARGET_CPU_DEFAULT +#if TARGET_CPU_DEFAULT != 32 && TARGET_CPU_DEFAULT != 10 + #error "Due to '()'; e.g. '#define TARGET_CPU_DEFAULT (10)', stringize TARGET_CPU_DEFAULT isn't useful: update manually." +#endif + +#if TARGET_CPU_DEFAULT == 32 +#undef CRIS_DEFAULT_TUNE +#define CRIS_DEFAULT_TUNE "32" +/* To enable use of "generic" cris-axis-elf binutils, always pass the + architecture option to GAS. (We don't do this for non-v32.) */ +#undef CRIS_DEFAULT_ASM_ARCH_OPTION +#define CRIS_DEFAULT_ASM_ARCH_OPTION "--march=v32" +#endif + +#undef CRIS_ARCH_CPP_DEFAULT +#define CRIS_ARCH_CPP_DEFAULT \ + "%{!march=*:\ + %{!metrax*:\ + %{!mcpu=*:\ + %{!mtune=*:-D__tune_v" CRIS_DEFAULT_TUNE "}\ + -D__arch_v"CRIS_DEFAULT_TUNE\ + " -D__CRIS_arch_version=" CRIS_DEFAULT_TUNE "}}}" +#endif + +#define CRIS_CPP_SUBTARGET_SPEC \ + "%{mbest-lib-options:\ + %{!moverride-best-lib-options:\ + %{!march=*:%{!metrax*:%{!mcpu=*:\ + -D__tune_v" CRIS_DEFAULT_TUNE \ + " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}}}}"\ + CRIS_ARCH_CPP_DEFAULT + +/* Override previous definitions (linux.h). */ +#undef CC1_SPEC +#define CC1_SPEC \ + "%{metrax4:-march=v3}\ + %{metrax100:-march=v8}\ + %(cc1_subtarget)" + +/* For the cris-*-elf subtarget. */ +#define CRIS_CC1_SUBTARGET_SPEC \ + "-melf\ + %{mbest-lib-options:\ + %{!moverride-best-lib-options:\ + %{!march=*:%{!mcpu=*:-mtune=v" CRIS_DEFAULT_TUNE\ + " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}\ + %{!finhibit-size-directive:\ + %{!fno-function-sections: -ffunction-sections}\ + %{!fno-data-sections: -fdata-sections}}}}" + +/* This adds to CC1_SPEC. */ +#define CC1PLUS_SPEC "" + +#ifdef HAVE_AS_NO_MUL_BUG_ABORT_OPTION +#define MAYBE_AS_NO_MUL_BUG_ABORT \ + "%{mno-mul-bug-workaround:-no-mul-bug-abort} " +#else +#define MAYBE_AS_NO_MUL_BUG_ABORT +#endif + +/* Override previous definitions (linux.h). */ +#undef ASM_SPEC +#define ASM_SPEC \ + MAYBE_AS_NO_MUL_BUG_ABORT \ + "%(asm_subtarget)\ + %{march=*:%{mcpu=*:%edo not specify both -march=... and -mcpu=...}}\ + %{march=v32:--march=v32} %{mcpu=v32:--march=v32}" + +/* For the cris-*-elf subtarget. */ +#define CRIS_ASM_SUBTARGET_SPEC \ + "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}" + +/* FIXME: We should propagate the -melf option to make the criself + "emulation" unless a linker script is provided (-T*), but I don't know + how to do that if either of -Ttext, -Tdata or -Tbss is given but no + linker script, as is usually the case. Leave it to the user for the + time being. + + Note that -melf overrides -maout except that a.out-compiled libraries + are linked in (multilibbing). We'd need some %s-variant that + checked for existence of some specific file. */ +#undef LINK_SPEC +#define LINK_SPEC \ + "%{v:--verbose}\ + %(link_subtarget)" + +/* For the cris-*-elf subtarget. */ +#define CRIS_LINK_SUBTARGET_SPEC \ + "-mcriself\ + %{sim2:%{!T*:-Tdata 0x4000000 -Tbss 0x8000000}}\ + %{!r:%{O2|O3: --gc-sections}}" + +/* Which library to get. The simulator uses a different library for + the low-level syscalls (implementing the Linux syscall ABI instead + of direct-iron accesses). Default everything with the stub "nosys" + library. */ +/* Override previous definitions (linux.h). */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{sim*:--start-group -lc -lsyslinux --end-group}\ + %{!sim*:%{g*:-lg}\ + %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p} -lbsp}\ + -lnosys" + +/* Linker startfile options; crt0 flavors. + We need to remove any previous definition (elfos.h). */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{sim*:crt1.o%s}%{!sim*:crt0.o%s}\ + crti.o%s crtbegin.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#define EXTRA_SPECS \ + {"cpp_subtarget", CRIS_CPP_SUBTARGET_SPEC}, \ + {"cc1_subtarget", CRIS_CC1_SUBTARGET_SPEC}, \ + {"asm_subtarget", CRIS_ASM_SUBTARGET_SPEC}, \ + {"link_subtarget", CRIS_LINK_SUBTARGET_SPEC}, \ + CRIS_SUBTARGET_EXTRA_SPECS + +#define CRIS_SUBTARGET_EXTRA_SPECS + + +/* Node: Run-time Target */ + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("cris"); \ + builtin_define_std ("CRIS"); \ + builtin_define_std ("GNU_CRIS"); \ + builtin_define ("__CRIS_ABI_version=2"); \ + builtin_assert ("cpu=cris"); \ + builtin_assert ("machine=cris"); \ + } \ + while (0) + +/* Previously controlled by target_flags. */ +#define TARGET_ELF 1 + +/* Previously controlled by target_flags. Note that this is *not* set + for -melinux. */ +#define TARGET_LINUX 0 + +/* For the cris-*-elf subtarget. */ +#define CRIS_SUBTARGET_DEFAULT 0 + +#define CRIS_CPU_BASE 0 +#define CRIS_CPU_ETRAX4 3 /* Just lz added. */ +#define CRIS_CPU_SVINTO 8 /* Added swap, jsrc & Co., 32-bit accesses. */ +#define CRIS_CPU_NG 10 /* Added mul[su]. */ +#define CRIS_CPU_V32 32 /* Major changes. */ + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT CRIS_CPU_BASE +#endif + +/* Default target_flags if no switches specified. */ +#ifndef TARGET_DEFAULT +# if TARGET_CPU_DEFAULT == 32 +# define TARGET_DEFAULT \ + (MASK_STACK_ALIGN \ + + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ + + MASK_PROLOGUE_EPILOGUE) +# else /* 10 */ +# define TARGET_DEFAULT \ + (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \ + + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ + + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG) +# endif +#endif + +/* Local, providing a default for cris_cpu_version. */ +#define CRIS_DEFAULT_CPU_VERSION TARGET_CPU_DEFAULT + +#define TARGET_HAS_MUL_INSNS (cris_cpu_version >= CRIS_CPU_NG) +#define TARGET_HAS_LZ (cris_cpu_version >= CRIS_CPU_ETRAX4) +#define TARGET_HAS_SWAP (cris_cpu_version >= CRIS_CPU_SVINTO) +#define TARGET_V32 (cris_cpu_version >= CRIS_CPU_V32) + +#define CRIS_SUBTARGET_HANDLE_OPTION(x, y, z) + +/* Node: Storage Layout */ + +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN 0 + +/* WORDS_BIG_ENDIAN is not defined in the hardware, but for consistency, + we use little-endianness, and we may also be able to use + post-increment on DImode indirect. */ +#define WORDS_BIG_ENDIAN 0 + +#define UNITS_PER_WORD 4 + +#define CRIS_PROMOTED_MODE(MODE, UNSIGNEDP, TYPE) \ + (GET_MODE_CLASS (MODE) == MODE_INT && GET_MODE_SIZE (MODE) < 4) \ + ? SImode : MODE + +/* We will be using prototype promotion, so they will be 32 bit. */ +#define PARM_BOUNDARY 32 + +/* Stack boundary is guided by -mstack-align, -mno-stack-align, + -malign. + Old comment: (2.1: still valid in 2.7.2?) + Note that to make this macro affect the alignment of stack + locals, a fix was required, and special precautions when handling + the stack pointer in various other macros (TARGET_ASM_FUNCTION_PROLOGUE + et al) were required. See file "function.c". If you would just define + this macro, it would only affect the builtin alloca and variable + local data (non-ANSI, non-K&R, Gnu C extension). */ +#define STACK_BOUNDARY \ + (TARGET_STACK_ALIGN ? (TARGET_ALIGN_BY_32 ? 32 : 16) : 8) + +#define FUNCTION_BOUNDARY 16 + +/* Do not change BIGGEST_ALIGNMENT (when optimizing), as it will affect + strange places, at least in 2.1. */ +#define BIGGEST_ALIGNMENT 8 + +/* If -m16bit, -m16-bit, -malign or -mdata-align, + align everything to 16 bit. */ +#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \ + (TARGET_DATA_ALIGN \ + ? (TARGET_ALIGN_BY_32 \ + ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN) \ + : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN) + +/* Note that CONSTANT_ALIGNMENT has the effect of making gcc believe that + ALL references to constant stuff (in code segment, like strings) has + this alignment. That is a rather rushed assumption. Luckily we do not + care about the "alignment" operand to builtin memcpy (only place where + it counts), so it doesn't affect any bad spots. */ +#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \ + (TARGET_CONST_ALIGN \ + ? (TARGET_ALIGN_BY_32 \ + ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN) \ + : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN) + +/* FIXME: Define LOCAL_ALIGNMENT for word and dword or arrays and + structures (if -mstack-align=), and check that it is good. */ + +#define EMPTY_FIELD_BOUNDARY 8 + +#define STRUCTURE_SIZE_BOUNDARY 8 + +#define STRICT_ALIGNMENT 0 + +/* Remove any previous definition (elfos.h). + ??? If it wasn't for all the other stuff that affects layout of + structures and bit-fields, this could presumably cause incompatibility + with other GNU/Linux ports (i.e. elfos.h users). */ +#undef PCC_BITFIELD_TYPE_MATTERS + +/* This is only used for non-scalars. Strange stuff happens to structs + (FIXME: What?) if we use anything larger than largest actually used + datum size, so lets make it 32. The type "long long" will still work + as usual. We can still have DImode insns, but they will only be used + for scalar data (i.e. long long). */ +#define MAX_FIXED_MODE_SIZE 32 + + +/* Node: Type Layout */ + +/* Note that DOUBLE_TYPE_SIZE is not defined anymore, since the default + value gives a 64-bit double, which is what we now use. */ + +/* For compatibility and historical reasons, a char should be signed. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Note that WCHAR_TYPE_SIZE is used in cexp.y, + where TARGET_SHORT is not available. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + + +/* Node: Register Basics */ + +/* We count all 16 non-special registers, SRP, a faked argument + pointer register, MOF and CCR/DCCR. */ +#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1) + +/* For CRIS, these are r15 (pc) and r14 (sp). Register r8 is used as a + frame-pointer, but is not fixed. SRP is not included in general + registers and will not be used automatically. All other special + registers are fixed at the moment. The faked argument pointer register + is fixed too. */ +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0} + +/* Register r9 is used for structure-address, r10-r13 for parameters, + r10- for return values. */ +#define CALL_USED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1} + +/* Node: Allocation Order */ + +/* We need this on CRIS, because call-used regs should be used first, + (so we don't need to push). Else start using registers from r0 and up. + This preference is mainly because if we put call-used-regs from r0 + and up, then we can't use movem to push the rest, (which have to be + saved if we use them, and movem has to start with r0). + Change here if you change which registers to use as call registers. + + The actual need to explicitly prefer call-used registers improved the + situation a lot for 2.1, but might not actually be needed anymore. + Still, this order reflects what GCC should find out by itself, so it + probably does not hurt. + + Order of preference: Call-used-regs first, then r0 and up, last fp & + sp & pc as fillers. + Call-used regs in opposite order, so they will cause less conflict if + a function has few args (<= 3) and it wants a scratch reg. + Use struct-return address first, since very few functions use + structure return values so it is likely to be available. */ +#define REG_ALLOC_ORDER \ + {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19} + +/* Use MOF and ACR. Prefer ACR before any other register. Prefer MOF + then SRP after saved registers. The *after* is because they're only + useful for storage, not for things being computed, which is + apparently more common. */ +#define REG_ALLOC_ORDER_V32 \ + {15, 9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 16, 14, 18, 19} + + +/* Node: Values in Registers */ + +/* The VOIDmode test is so we can omit mode on anonymous insns. FIXME: + Still needed in 2.9x, at least for Axis-20000319. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (MODE == VOIDmode \ + ? 1 : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* CRIS permits all registers to hold all modes. Well, except for the + condition-code register. And we can't hold larger-than-register size + modes in the last special register that can hold a full 32 bits. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (((MODE) == CCmode \ + || (REGNO) != CRIS_CC0_REGNUM) \ + && (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD \ + || ((REGNO) != CRIS_MOF_REGNUM && (REGNO) != CRIS_ACR_REGNUM))) + +/* Because CCmode isn't covered by the "narrower mode" statement in + tm.texi, we can still say all modes are tieable despite not having an + always 1 HARD_REGNO_MODE_OK. */ +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + + +/* Node: Leaf Functions */ +/* (no definitions) */ + +/* Node: Stack Registers */ +/* (no definitions) */ + + +/* Node: Register Classes */ + +/* FIXME: A separate class for the return register would make sense. + + We need a separate register class to handle register allocation for + ACR, since it can't be used for post-increment. + + It's not obvious, but having subunions of all movable-between + register classes does really help register allocation. */ +enum reg_class + { + NO_REGS, + ACR_REGS, MOF_REGS, CC0_REGS, SPECIAL_REGS, + SPEC_ACR_REGS, GENNONACR_REGS, + SPEC_GENNONACR_REGS, GENERAL_REGS, + ALL_REGS, + LIM_REG_CLASSES + }; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES \ + {"NO_REGS", \ + "ACR_REGS", "MOF_REGS", "CC0_REGS", "SPECIAL_REGS", \ + "SPEC_ACR_REGS", "GENNONACR_REGS", "SPEC_GENNONACR_REGS", \ + "GENERAL_REGS", "ALL_REGS"} + +#define CRIS_SPECIAL_REGS_CONTENTS \ + ((1 << CRIS_SRP_REGNUM) | (1 << CRIS_MOF_REGNUM) | (1 << CRIS_CC0_REGNUM)) + +/* Count in the faked argument register in GENERAL_REGS. Keep out SRP. */ +#define REG_CLASS_CONTENTS \ + { \ + {0}, \ + {1 << CRIS_ACR_REGNUM}, \ + {1 << CRIS_MOF_REGNUM}, \ + {1 << CRIS_CC0_REGNUM}, \ + {CRIS_SPECIAL_REGS_CONTENTS}, \ + {CRIS_SPECIAL_REGS_CONTENTS \ + | (1 << CRIS_ACR_REGNUM)}, \ + {(0xffff | (1 << CRIS_AP_REGNUM)) \ + & ~(1 << CRIS_ACR_REGNUM)}, \ + {(0xffff | (1 << CRIS_AP_REGNUM) \ + | CRIS_SPECIAL_REGS_CONTENTS) \ + & ~(1 << CRIS_ACR_REGNUM)}, \ + {0xffff | (1 << CRIS_AP_REGNUM)}, \ + {0xffff | (1 << CRIS_AP_REGNUM) \ + | CRIS_SPECIAL_REGS_CONTENTS} \ + } + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == CRIS_ACR_REGNUM ? ACR_REGS : \ + (REGNO) == CRIS_MOF_REGNUM ? MOF_REGS : \ + (REGNO) == CRIS_CC0_REGNUM ? CC0_REGS : \ + (REGNO) == CRIS_SRP_REGNUM ? SPECIAL_REGS : \ + GENERAL_REGS) + +#define BASE_REG_CLASS GENERAL_REGS + +#define MODE_CODE_BASE_REG_CLASS(MODE, OCODE, ICODE) \ + ((OCODE) != POST_INC ? BASE_REG_CLASS : GENNONACR_REGS) + +#define INDEX_REG_CLASS GENERAL_REGS + +#define IRA_COVER_CLASSES { GENERAL_REGS, SPECIAL_REGS, LIM_REG_CLASSES } + +#define REG_CLASS_FROM_LETTER(C) \ + ( \ + (C) == 'a' ? ACR_REGS : \ + (C) == 'b' ? GENNONACR_REGS : \ + (C) == 'h' ? MOF_REGS : \ + (C) == 'x' ? SPECIAL_REGS : \ + (C) == 'c' ? CC0_REGS : \ + NO_REGS \ + ) + +/* Since it uses reg_renumber, it is safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ + ((REGNO) <= CRIS_LAST_GENERAL_REGISTER \ + || (REGNO) == ARG_POINTER_REGNUM \ + || (unsigned) reg_renumber[REGNO] <= CRIS_LAST_GENERAL_REGISTER \ + || (unsigned) reg_renumber[REGNO] == ARG_POINTER_REGNUM) + +/* REGNO_OK_FOR_BASE_P seems to be obsolete wrt. this one, but not yet + documented as such. */ +#define REGNO_MODE_CODE_OK_FOR_BASE_P(REGNO, MODE, OCODE, ICODE) \ + (REGNO_OK_FOR_BASE_P (REGNO) \ + && ((OCODE) != POST_INC \ + || !((REGNO) == CRIS_ACR_REGNUM \ + || (unsigned) reg_renumber[REGNO] == CRIS_ACR_REGNUM))) + +/* See REGNO_OK_FOR_BASE_P. */ +#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO) + +/* It seems like gcc (2.7.2 and 2.9x of 2000-03-22) may send "NO_REGS" as + the class for a constant (testcase: __Mul in arit.c). To avoid forcing + out a constant into the constant pool, we will trap this case and + return something a bit more sane. FIXME: Check if this is a bug. + Beware that we must not "override" classes that can be specified as + constraint letters, or else asm operands using them will fail when + they need to be reloaded. FIXME: Investigate whether that constitutes + a bug. */ +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + ((CLASS) != ACR_REGS \ + && (CLASS) != MOF_REGS \ + && (CLASS) != CC0_REGS \ + && (CLASS) != SPECIAL_REGS \ + ? GENERAL_REGS : (CLASS)) + +/* We can't move special registers to and from memory in smaller than + word_mode. We also can't move between special registers. Luckily, + -1, as returned by true_regnum for non-sub/registers, is valid as a + parameter to our REGNO_REG_CLASS, returning GENERAL_REGS, so we get + the effect that any X that isn't a special-register is treated as + a non-empty intersection with GENERAL_REGS. */ +#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \ + ((((CLASS) == SPECIAL_REGS || (CLASS) == MOF_REGS) \ + && ((GET_MODE_SIZE (MODE) < 4 && MEM_P (X)) \ + || !reg_classes_intersect_p (REGNO_REG_CLASS (true_regnum (X)), \ + GENERAL_REGS))) \ + ? GENERAL_REGS : NO_REGS) + +/* FIXME: Fix regrename.c; it should check validity of replacements, + not just with a silly pass-specific macro. We may miss some + opportunities, but we must stop regrename from creating acr++. */ +#define HARD_REGNO_RENAME_OK(FROM, TO) ((TO) != CRIS_ACR_REGNUM) + +/* For CRIS, this is always the size of MODE in words, + since all registers are the same size. To use omitted modes in + patterns with reload constraints, you must say the widest size + which is allowed for VOIDmode. + FIXME: Does that still apply for gcc-2.9x? Keep poisoned until such + patterns are added back. News: 2001-03-16: Happens as early as the + underscore-test. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((MODE) == VOIDmode \ + ? 1 /* + cris_fatal ("CLASS_MAX_NREGS with VOIDmode") */ \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* We are now out of letters; we could use ten more. This forces us to + use C-code in the 'md' file. FIXME: Use some EXTRA_CONSTRAINTS. */ +#define CRIS_CONST_OK_FOR_LETTER_P(VALUE, C) \ + ( \ + /* MOVEQ, CMPQ, ANDQ, ORQ. */ \ + (C) == 'I' ? (VALUE) >= -32 && (VALUE) <= 31 : \ + /* ADDQ, SUBQ. */ \ + (C) == 'J' ? (VALUE) >= 0 && (VALUE) <= 63 : \ + /* ASRQ, BTSTQ, LSRQ, LSLQ. */ \ + (C) == 'K' ? (VALUE) >= 0 && (VALUE) <= 31 : \ + /* A 16-bit signed number. */ \ + (C) == 'L' ? (VALUE) >= -32768 && (VALUE) <= 32767 : \ + /* The constant 0 for CLEAR. */ \ + (C) == 'M' ? (VALUE) == 0 : \ + /* A negative ADDQ or SUBQ. */ \ + (C) == 'N' ? (VALUE) >= -63 && (VALUE) < 0 : \ + /* Quickened ints, QI and HI. */ \ + (C) == 'O' ? (VALUE) >= 0 && (VALUE) <= 65535 \ + && ((VALUE) >= (65535-31) \ + || ((VALUE) >= (255-31) \ + && (VALUE) <= 255 )) : \ + /* A 16-bit number signed *or* unsigned. */ \ + (C) == 'P' ? (VALUE) >= -32768 && (VALUE) <= 65535 : \ + 0) + +#define CONST_OK_FOR_CONSTRAINT_P(VALUE, C, S) \ + ( \ + ((C) != 'K' || (S)[1] == 'c') \ + ? CRIS_CONST_OK_FOR_LETTER_P (VALUE, C) : \ + ((C) == 'K' && (S)[1] == 'p') \ + ? exact_log2 (VALUE) >= 0 : \ + 0) + +#define CONSTRAINT_LEN(C, S) ((C) == 'K' ? 2 : DEFAULT_CONSTRAINT_LEN (C, S)) + +/* It is really simple to make up a 0.0; it is the same as int-0 in + IEEE754. */ +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ + ((C) == 'G' && ((VALUE) == CONST0_RTX (DFmode) \ + || (VALUE) == CONST0_RTX (SFmode))) + +/* We need this on cris to distinguish delay-slottable addressing modes. */ +#define EXTRA_CONSTRAINT(X, C) \ + ( \ + /* Slottable address mode? */ \ + (C) == 'Q' ? EXTRA_CONSTRAINT_Q (X) : \ + /* Operand to BDAP or BIAP? */ \ + (C) == 'R' ? EXTRA_CONSTRAINT_R (X) : \ + /* A local PIC symbol? */ \ + (C) == 'S' ? EXTRA_CONSTRAINT_S (X) : \ + /* A three-address addressing-mode? */ \ + (C) == 'T' ? EXTRA_CONSTRAINT_T (X) : \ + /* A PLT symbol? */ \ + (C) == 'U' ? EXTRA_CONSTRAINT_U (X) : \ + 0) + +#define EXTRA_MEMORY_CONSTRAINT(X, STR) ((X) == 'Q') + +#define EXTRA_CONSTRAINT_Q(X) \ + ( \ + /* Just an indirect register (happens to also be \ + "all" slottable memory addressing modes not \ + covered by other constraints, i.e. '>'). */ \ + MEM_P (X) && BASE_P (XEXP (X, 0)) \ + ) + +#define EXTRA_CONSTRAINT_R(X) \ + ( \ + /* An operand to BDAP or BIAP: \ + A BIAP; r.S? */ \ + BIAP_INDEX_P (X) \ + /* A [reg] or (int) [reg], maybe with post-increment. */ \ + || BDAP_INDEX_P (X) \ + || CONSTANT_INDEX_P (X) \ + ) + +#define EXTRA_CONSTRAINT_T(X) \ + ( \ + /* Memory three-address operand. All are indirect-memory: */ \ + MEM_P (X) \ + && ((MEM_P (XEXP (X, 0)) \ + /* Double indirect: [[reg]] or [[reg+]]? */ \ + && (BASE_OR_AUTOINCR_P (XEXP (XEXP (X, 0), 0)))) \ + /* Just an explicit indirect reference: [const]? */ \ + || CONSTANT_P (XEXP (X, 0)) \ + /* Something that is indexed; [...+...]? */ \ + || (GET_CODE (XEXP (X, 0)) == PLUS \ + /* A BDAP constant: [reg+(8|16|32)bit offset]? */ \ + && ((BASE_P (XEXP (XEXP (X, 0), 0)) \ + && CONSTANT_INDEX_P (XEXP (XEXP (X, 0), 1))) \ + /* A BDAP register: [reg+[reg(+)].S]? */ \ + || (BASE_P (XEXP (XEXP (X, 0), 0)) \ + && BDAP_INDEX_P(XEXP(XEXP(X, 0), 1))) \ + /* Same, but with swapped arguments (no canonical \ + ordering between e.g. REG and MEM as of LAST_UPDATED \ + "Thu May 12 03:59:11 UTC 2005"). */ \ + || (BASE_P (XEXP (XEXP (X, 0), 1)) \ + && BDAP_INDEX_P (XEXP (XEXP (X, 0), 0))) \ + /* A BIAP: [reg+reg.S] (MULT comes first). */ \ + || (BASE_P (XEXP (XEXP (X, 0), 1)) \ + && BIAP_INDEX_P (XEXP (XEXP (X, 0), 0)))))) \ + ) + +/* PIC-constructs for symbols. */ +#define EXTRA_CONSTRAINT_S(X) \ + (flag_pic && GET_CODE (X) == CONST && cris_valid_pic_const (X, false)) + +#define EXTRA_CONSTRAINT_U(X) \ + (flag_pic \ + && CONSTANT_P (X) \ + && cris_nonmemory_operand_or_callable_symbol (X, VOIDmode)) + + +/* Node: Frame Layout */ + +#define STACK_GROWS_DOWNWARD +#define FRAME_GROWS_DOWNWARD 1 + +/* It seems to be indicated in the code (at least 2.1) that this is + better a constant, and best 0. */ +#define STARTING_FRAME_OFFSET 0 + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \ + cris_return_addr_rtx (COUNT, FRAMEADDR) + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, CRIS_SRP_REGNUM) + +/* FIXME: Any __builtin_eh_return callers must not return anything and + there must not be collisions with incoming parameters. Luckily the + number of __builtin_eh_return callers is limited. For now return + parameter registers in reverse order and hope for the best. */ +#define EH_RETURN_DATA_REGNO(N) \ + (IN_RANGE ((N), 0, 3) ? (CRIS_FIRST_ARG_REG + 3 - (N)) : INVALID_REGNUM) + +/* Store the stack adjustment in the structure-return-address register. */ +#define CRIS_STACKADJ_REG CRIS_STRUCT_VALUE_REGNUM +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, CRIS_STACKADJ_REG) + +#define EH_RETURN_HANDLER_RTX \ + cris_return_addr_rtx (0, NULL) + +#define INIT_EXPANDERS cris_init_expanders () + +/* FIXME: Move this to right node (it's not documented properly yet). */ +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (CRIS_SRP_REGNUM) + +/* FIXME: Move this to right node (it's not documented properly yet). + FIXME: Check what alignment we can assume regarding + TARGET_STACK_ALIGN and TARGET_ALIGN_BY_32. */ +#define DWARF_CIE_DATA_ALIGNMENT -1 + +/* If we would ever need an exact mapping between canonical register + number and dwarf frame register, we would either need to include all + registers in the gcc description (with some marked fixed of course), or + an inverse mapping from dwarf register to gcc register. There is one + need in dwarf2out.c:expand_builtin_init_dwarf_reg_sizes. Right now, I + don't see that we need exact correspondence between DWARF *frame* + registers and DBX_REGISTER_NUMBER, so map them onto GCC registers. */ +#define DWARF_FRAME_REGNUM(REG) (REG) + +/* Node: Stack Checking */ +/* (no definitions) FIXME: Check. */ + +/* Node: Frame Registers */ + +#define STACK_POINTER_REGNUM CRIS_SP_REGNUM + +/* Register used for frame pointer. This is also the last of the saved + registers, when a frame pointer is not used. */ +#define FRAME_POINTER_REGNUM CRIS_FP_REGNUM + +/* Faked register, is always eliminated. We need it to eliminate + allocating stack slots for the return address and the frame pointer. */ +#define ARG_POINTER_REGNUM CRIS_AP_REGNUM + +#define STATIC_CHAIN_REGNUM CRIS_STATIC_CHAIN_REGNUM + + +/* Node: Elimination */ + +#define ELIMINABLE_REGS \ + {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = cris_initial_elimination_offset (FROM, TO) + + +/* Node: Stack Arguments */ + +/* Since many parameters take up one register each in any case, + defining TARGET_PROMOTE_PROTOTYPES that always returns true would + seem like a good idea, but measurements indicate that a combination + using PROMOTE_MODE is better. */ + +#define ACCUMULATE_OUTGOING_ARGS 1 + + +/* Node: Register Arguments */ + +/* Contrary to what you'd believe, defining FUNCTION_ARG_CALLEE_COPIES + seems like a (small total) loss, at least for gcc-2.7.2 compiling and + running gcc-2.1 (small win in size, small loss running -- 100.1%), + and similarly for size for products (.1 .. .3% bloat, sometimes win). + Due to the empirical likeliness of making slower code, it is not + defined. */ + +/* This no longer *needs* to be a structure; but keeping it as such should + not hurt (and hacking the ABI is simpler). */ +#define CUMULATIVE_ARGS struct cum_args +struct cum_args {int regs;}; + +/* The regs member is an integer, the number of arguments got into + registers so far. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + ((CUM).regs = 0) + +#define FUNCTION_ARG_REGNO_P(REGNO) \ + ((REGNO) >= CRIS_FIRST_ARG_REG \ + && (REGNO) < CRIS_FIRST_ARG_REG + (CRIS_MAX_ARGS_IN_REGS)) + + +/* Node: Scalar Return */ + +#define FUNCTION_VALUE_REGNO_P(N) cris_function_value_regno_p (N) + + + +/* Node: Aggregate Return */ + +#define CRIS_STRUCT_VALUE_REGNUM ((CRIS_FIRST_ARG_REG) - 1) + + +/* Node: Caller Saves */ +/* (no definitions) */ + +/* Node: Function entry */ + +/* See cris.c for TARGET_ASM_FUNCTION_PROLOGUE and + TARGET_ASM_FUNCTION_EPILOGUE. */ + +/* Node: Profiling */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + error ("no FUNCTION_PROFILER for CRIS") + +/* FIXME: Some of the undefined macros might be mandatory. If so, fix + documentation. */ + + +/* Node: Trampolines */ + +#define TRAMPOLINE_SIZE (TARGET_V32 ? 58 : 32) + +/* CRIS wants instructions on word-boundary. */ +#define TRAMPOLINE_ALIGNMENT 16 + +/* Node: Library Calls */ + +/* If you change this, you have to check whatever libraries and systems + that use it. */ +#define TARGET_EDOM 33 + + +/* Node: Addressing Modes */ + +#define HAVE_POST_INCREMENT 1 + +/* Must be a compile-time constant, so we go with the highest value + among all CRIS variants. */ +#define MAX_REGS_PER_ADDRESS 2 + +/* There are helper macros defined here which are used only in + GO_IF_LEGITIMATE_ADDRESS. + + Note that you *have to* reject invalid addressing modes for mode + MODE, even if it is legal for normal addressing modes. You cannot + rely on the constraints to do this work. They can only be used to + doublecheck your intentions. One example is that you HAVE TO reject + (mem:DI (plus:SI (reg:SI x) (reg:SI y))) because for some reason + this cannot be reloaded. (Which of course you can argue that gcc + should have done.) FIXME: Strange. Check. */ + +/* No symbol can be used as an index (or more correct, as a base) together + with a register with PIC; the PIC register must be there. */ +#define CONSTANT_INDEX_P(X) \ + (CONSTANT_P (X) && (!flag_pic || cris_valid_pic_const (X, true))) + +/* True if X is a valid base register. */ +#define BASE_P(X) \ + (REG_P (X) && REG_OK_FOR_BASE_P (X)) + +/* True if X is a valid base register with or without autoincrement. */ +#define BASE_OR_AUTOINCR_P(X) \ + (BASE_P (X) \ + || (GET_CODE (X) == POST_INC \ + && BASE_P (XEXP (X, 0)) \ + && REGNO (XEXP (X, 0)) != CRIS_ACR_REGNUM)) + +/* True if X is a valid (register) index for BDAP, i.e. [Rs].S or [Rs+].S. */ +#define BDAP_INDEX_P(X) \ + ((MEM_P (X) && GET_MODE (X) == SImode \ + && BASE_OR_AUTOINCR_P (XEXP (X, 0))) \ + || (GET_CODE (X) == SIGN_EXTEND \ + && MEM_P (XEXP (X, 0)) \ + && (GET_MODE (XEXP (X, 0)) == HImode \ + || GET_MODE (XEXP (X, 0)) == QImode) \ + && BASE_OR_AUTOINCR_P (XEXP (XEXP (X, 0), 0)))) + +/* True if X is a valid (register) index for BIAP, i.e. Rd.m. */ +#define BIAP_INDEX_P(X) \ + ((BASE_P (X) && REG_OK_FOR_INDEX_P (X)) \ + || (GET_CODE (X) == MULT \ + && BASE_P (XEXP (X, 0)) \ + && REG_OK_FOR_INDEX_P (XEXP (X, 0)) \ + && CONST_INT_P (XEXP (X, 1)) \ + && (INTVAL (XEXP (X, 1)) == 2 \ + || INTVAL (XEXP (X, 1)) == 4))) + +/* A PIC operand looks like a normal symbol here. At output we dress it + in "[rPIC+symbol:GOT]" (global symbol) or "rPIC+symbol:GOTOFF" (local + symbol) so we exclude all addressing modes where we can't replace a + plain "symbol" with that. A global PIC symbol does not fit anywhere + here (but is thankfully a general_operand in itself). A local PIC + symbol is valid for the plain "symbol + offset" case. */ +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \ + { \ + rtx x1, x2; \ + if (BASE_OR_AUTOINCR_P (X)) \ + goto ADDR; \ + else if (TARGET_V32) \ + /* Nothing else is valid then. */ \ + ; \ + else if (CONSTANT_INDEX_P (X)) \ + goto ADDR; \ + /* Indexed? */ \ + else if (GET_CODE (X) == PLUS) \ + { \ + x1 = XEXP (X, 0); \ + x2 = XEXP (X, 1); \ + /* BDAP o, Rd. */ \ + if ((BASE_P (x1) && CONSTANT_INDEX_P (x2)) \ + || (BASE_P (x2) && CONSTANT_INDEX_P (x1)) \ + /* BDAP Rs[+], Rd. */ \ + || (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD \ + && ((BASE_P (x1) && BDAP_INDEX_P (x2)) \ + || (BASE_P (x2) && BDAP_INDEX_P (x1)) \ + /* BIAP.m Rs, Rd */ \ + || (BASE_P (x1) && BIAP_INDEX_P (x2)) \ + || (BASE_P (x2) && BIAP_INDEX_P (x1))))) \ + goto ADDR; \ + } \ + else if (MEM_P (X)) \ + { \ + /* DIP (Rs). Reject [[reg+]] and [[reg]] for \ + DImode (long long). */ \ + if (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD \ + && (BASE_P (XEXP (X, 0)) \ + || BASE_OR_AUTOINCR_P (XEXP (X, 0)))) \ + goto ADDR; \ + } \ + } + +#ifndef REG_OK_STRICT + /* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +# define REG_OK_FOR_BASE_P(X) \ + (REGNO (X) <= CRIS_LAST_GENERAL_REGISTER \ + || REGNO (X) == ARG_POINTER_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER) +#else + /* Nonzero if X is a hard reg that can be used as a base reg. */ +# define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) +#endif + +#ifndef REG_OK_STRICT + /* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +# define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X) +#else + /* Nonzero if X is a hard reg that can be used as an index. */ +# define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +#endif + +/* Fix reloads known to cause suboptimal spilling. */ +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, INDL, WIN) \ + do \ + { \ + if (cris_reload_address_legitimized (X, MODE, OPNUM, TYPE, INDL)) \ + goto WIN; \ + } \ + while (0) + +#define LEGITIMATE_CONSTANT_P(X) 1 + + +/* Node: Condition Code */ + +#define NOTICE_UPDATE_CC(EXP, INSN) cris_notice_update_cc (EXP, INSN) + +/* FIXME: Maybe define CANONICALIZE_COMPARISON later, when playing with + optimizations. It is needed; currently we do this with instruction + patterns and NOTICE_UPDATE_CC. */ + + +/* Node: Costs */ + +/* Regardless of the presence of delay slots, the default value of 1 for + BRANCH_COST is the best in the range (1, 2, 3), tested with gcc-2.7.2 + with testcases ipps and gcc, giving smallest and fastest code. */ + +#define SLOW_BYTE_ACCESS 0 + +/* This is the threshold *below* which inline move sequences of + word-length sizes will be emitted. The "9" will translate to + (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions + (8 instruction sequences) or less. */ +#define MOVE_RATIO(speed) 9 + + +/* Node: Sections */ + +#define TEXT_SECTION_ASM_OP "\t.text" + +#define DATA_SECTION_ASM_OP "\t.data" + +#define FORCE_EH_FRAME_INFO_IN_DATA_SECTION (! TARGET_ELF) + +/* The jump table is immediately connected to the preceding insn. */ +#define JUMP_TABLES_IN_TEXT_SECTION 1 + + +/* Node: PIC */ + +/* Helper type. */ + +enum cris_pic_symbol_type + { + cris_no_symbol = 0, + cris_got_symbol = 1, + cris_rel_symbol = 2, + cris_got_symbol_needing_fixup = 3, + cris_invalid_pic_symbol = 4 + }; + +#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? CRIS_GOT_REGNUM : INVALID_REGNUM) + +#define LEGITIMATE_PIC_OPERAND_P(X) cris_legitimate_pic_operand (X) + + +/* Node: File Framework */ + +/* We don't want an .ident for gcc. To avoid that but still support + #ident, we override ASM_OUTPUT_IDENT and, since the gcc .ident is its + only use besides ASM_OUTPUT_IDENT, undef IDENT_ASM_OP from elfos.h. */ +#undef IDENT_ASM_OP +#undef ASM_OUTPUT_IDENT +#define ASM_OUTPUT_IDENT(FILE, NAME) \ + fprintf (FILE, "%s\"%s\"\n", "\t.ident\t", NAME); + +#define ASM_APP_ON "#APP\n" + +#define ASM_APP_OFF "#NO_APP\n" + + +/* Node: Data Output */ + +#define OUTPUT_ADDR_CONST_EXTRA(STREAM, X, FAIL) \ + do { if (!cris_output_addr_const_extra (STREAM, X)) goto FAIL; } while (0) + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) (C) == '@' + +/* Node: Uninitialized Data */ + +/* Remember to round off odd values if we want data alignment, + since we cannot do that with an .align directive. + + Using .comm causes the space not to be reserved in .bss, but by + tricks with the symbol type. Not good if other tools than binutils + are used on the object files. Since ".global ... .lcomm ..." works, we + use that. Use .._ALIGNED_COMMON, since gcc whines when we only have + ..._COMMON, and we prefer to whine ourselves; BIGGEST_ALIGNMENT is not + the one to check. This done for a.out only. */ +/* FIXME: I suspect a bug in gcc with alignment. Do not warn until + investigated; it mucks up the testsuite results. */ +#define CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, LOCAL) \ + do \ + { \ + int align_ = (ALIGN) / BITS_PER_UNIT; \ + if (TARGET_DATA_ALIGN && TARGET_ALIGN_BY_32 && align_ < 4) \ + align_ = 4; \ + else if (TARGET_DATA_ALIGN && align_ < 2) \ + align_ = 2; \ + /* FIXME: Do we need this? */ \ + else if (align_ < 1) \ + align_ = 1; \ + \ + if (TARGET_ELF) \ + { \ + if (LOCAL) \ + { \ + fprintf ((FILE), "%s", LOCAL_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), "\n"); \ + } \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ",%u,%u\n", (int)(SIZE), align_); \ + } \ + else \ + { \ + /* We can't tell a one-only or weak COMM from a "global \ + COMM" so just make all non-locals weak. */ \ + if (! (LOCAL)) \ + ASM_WEAKEN_LABEL (FILE, NAME); \ + fputs ("\t.lcomm ", (FILE)); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ",%u\n", \ + ((int)(SIZE) + (align_ - 1)) & ~(align_ - 1)); \ + } \ + } \ + while (0) + +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 0) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 1) + +/* Node: Label Output */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global " + +#define SUPPORTS_WEAK 1 + +#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYM) \ + cris_asm_output_symbol_ref (STREAM, SYM) + +#define ASM_OUTPUT_LABEL_REF(STREAM, BUF) \ + cris_asm_output_label_ref (STREAM, BUF) + +/* Remove any previous definition (elfos.h). */ +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long) NUM) + +/* Node: Initialization */ +/* (no definitions) */ + +/* Node: Macros for Initialization */ +/* (no definitions) */ + +/* Node: Instruction Output */ + +#define REGISTER_NAMES \ + {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", \ + "r9", "r10", "r11", "r12", "r13", "sp", "acr", "srp", "mof", "faked_ap", "dccr"} + +#define ADDITIONAL_REGISTER_NAMES \ + {{"r14", 14}, {"r15", 15}, {"pc", 15}} + +/* Output an empty line to illustrate the presence of the delay slot. */ +#define DBR_OUTPUT_SEQEND(FILE) \ + fprintf (FILE, "\n") + +#define LOCAL_LABEL_PREFIX (TARGET_ELF ? "." : "") + +/* cppinit.c initializes a const array from this, so it must be constant, + can't have it different based on options. Luckily, the prefix is + always allowed, so let's have it on all GCC-generated code. Note that + we have this verbatim everywhere in the back-end, not using %R or %s or + such. */ +#define REGISTER_PREFIX "$" + +/* Remove any previous definition (elfos.h). */ +/* We use -fno-leading-underscore to remove it, when necessary. */ +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \ + fprintf (FILE, \ + TARGET_V32 \ + ? "\tsubq 4,$sp\n\tmove $%s,[$sp]\n" : "\tpush $%s\n", \ + reg_names[REGNO]) + +#define ASM_OUTPUT_REG_POP(FILE, REGNO) \ + fprintf (FILE, "\tmove [$sp+],$%s\n", reg_names[REGNO]) + + +/* Node: Dispatch Tables */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + do \ + { \ + if (TARGET_V32) \ + asm_fprintf (FILE, "\t.word %LL%d-.\n", VALUE); \ + else \ + asm_fprintf (FILE, "\t.word %LL%d-%LL%d\n", VALUE, REL); \ + } \ + while (0) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + asm_fprintf (FILE, "\t.dword %LL%d\n", VALUE) + +/* Defined to also emit an .align in elfos.h. We don't want that. */ +#undef ASM_OUTPUT_CASE_LABEL + +/* Since the "bound" insn loads the comparison value if the compared< + value (register) is out of bounds (0..comparison value-1), we need + to output another case to catch it. + The way to find it is to look for the label_ref at the else-arm inside + the expanded casesi core-insn. + FIXME: Check this construct when changing to new version of gcc. */ +#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) \ + cris_asm_output_case_end (STREAM, NUM, TABLE) + + +/* Node: Exception Region Output */ +/* (no definitions) */ +/* FIXME: Fill in with our own optimized layout. */ + +/* Node: Alignment Output */ + +#define ASM_OUTPUT_ALIGN(FILE, LOG) \ + fprintf (FILE, "\t.align %d\n", (LOG)) + + +/* Node: All Debuggers */ + +#define DBX_REGISTER_NUMBER(REGNO) \ + ((REGNO) == CRIS_SRP_REGNUM ? CRIS_CANONICAL_SRP_REGNUM : \ + (REGNO) == CRIS_MOF_REGNUM ? CRIS_CANONICAL_MOF_REGNUM : \ + (REGNO) == CRIS_CC0_REGNUM ? CRIS_CANONICAL_CC0_REGNUM : \ + (REGNO)) + +/* FIXME: Investigate DEBUGGER_AUTO_OFFSET, DEBUGGER_ARG_OFFSET. */ + + +/* Node: DBX Options */ + +/* Is this correct? Check later. */ +#define DBX_NO_XREFS + +#define DBX_CONTIN_LENGTH 0 + +/* FIXME: Is this needed when we have 0 DBX_CONTIN_LENGTH? */ +#define DBX_CONTIN_CHAR '?' + + +/* Node: DBX Hooks */ +/* (no definitions) */ + +/* Node: File names and DBX */ +/* (no definitions) */ + + +/* Node: SDB and DWARF */ +/* (no definitions) */ + +/* Node: Misc */ + +/* A combination of the bound (umin) insn together with a + sign-extended add via the table to PC seems optimal. + If the table overflows, the assembler will take care of it. + Theoretically, in extreme cases (uncertain if they occur), an error + will be emitted, so FIXME: Check how large case-tables are emitted, + possible add an option to emit SImode case-tables. */ +#define CASE_VECTOR_MODE HImode + +#define CASE_VECTOR_PC_RELATIVE 1 + +/* FIXME: Investigate CASE_VECTOR_SHORTEN_MODE to make sure HImode is not + used when broken-.word could possibly fail (plus testcase). */ + +#define FIXUNS_TRUNC_LIKE_FIX_TRUNC + +/* This is the number of bytes that can be moved in one + reasonably fast instruction sequence. For CRIS, this is two + instructions: mem => reg, reg => mem. */ +#define MOVE_MAX 4 + +/* Maybe SHIFT_COUNT_TRUNCATED is safe to define? FIXME: Check later. */ + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) + +#define Pmode SImode + +#define FUNCTION_MODE QImode + +#define NO_IMPLICIT_EXTERN_C + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md new file mode 100644 index 000000000..dd5d4940c --- /dev/null +++ b/gcc/config/cris/cris.md @@ -0,0 +1,5110 @@ +;; GCC machine description for CRIS cpu cores. +;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, +;; 2008, 2009, 2010 Free Software Foundation, Inc. +;; Contributed by Axis Communications. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The original PO technology requires these to be ordered by speed, +;; so that assigner will pick the fastest. + +;; See files "md.texi" and "rtl.def" for documentation on define_insn, +;; match_*, et. al. +;; +;; The function cris_notice_update_cc in cris.c handles condition code +;; updates for most instructions, helped by the "cc" attribute. + +;; There are several instructions that are orthogonal in size, and seems +;; they could be matched by a single pattern without a specified size +;; for the operand that is orthogonal. However, this did not work on +;; gcc-2.7.2 (and probably not on gcc-2.8.1), relating to that when a +;; constant is substituted into an operand, the actual mode must be +;; deduced from the pattern. There is reasonable hope that that has been +;; fixed, so FIXME: try again. + +;; You will notice that three-operand alternatives ("=r", "r", "!To") +;; are marked with a "!" constraint modifier to avoid being reloaded +;; into. This is because gcc would otherwise prefer to use the constant +;; pool and its offsettable address instead of reloading to an +;; ("=r", "0", "i") alternative. Also, the constant-pool support was not +;; only suboptimal but also buggy in 2.7.2, ??? maybe only in 2.6.3. + +;; All insns that look like (set (...) (plus (...) (reg:SI 8))) +;; get problems when reloading r8 (frame pointer) to r14 + offs (stack +;; pointer). Thus the instructions that get into trouble have specific +;; checks against matching frame_pointer_rtx. +;; ??? But it should be re-checked for gcc > 2.7.2 +;; FIXME: This changed some time ago (from 2000-03-16) for gcc-2.9x. + +;; FIXME: When PIC, all [rX=rY+S] could be enabled to match +;; [rX=gotless_symbol]. +;; The movsi for a gotless symbol could be split (post reload). + + +(define_constants + [ + ;; PLT reference from call expansion: operand 0 is the address, + ;; the mode is VOIDmode. Always wrapped in CONST. + ;; The value is relative to the GOT. + (CRIS_UNSPEC_PLT_GOTREL 0) + + ;; PLT reference from call expansion: operand 0 is the address, + ;; the mode is VOIDmode. Always wrapped in CONST. + ;; The value is relative to the PC. It's arch-dependent whether + ;; the offset counts from the start or the end of the current item. + (CRIS_UNSPEC_PLT_PCREL 1) + + ;; The address of the global offset table as a source operand. + (CRIS_UNSPEC_GOT 2) + + ;; The offset from the global offset table to the operand. + (CRIS_UNSPEC_GOTREL 3) + + ;; The PC-relative offset to the operand. It's arch-dependent whether + ;; the offset counts from the start or the end of the current item. + (CRIS_UNSPEC_PCREL 4) + + ;; The index into the global offset table of a symbol, while + ;; also generating a GOT entry for the symbol. + (CRIS_UNSPEC_GOTREAD 5) + + ;; Similar to CRIS_UNSPEC_GOTREAD, but also generating a PLT entry. + (CRIS_UNSPEC_PLTGOTREAD 6) + + ;; Condition for v32 casesi jump, since it needs to have if_then_else + ;; form with register as one branch and default label as other. + ;; Operand 0 is const_int 0. + (CRIS_UNSPEC_CASESI 7) + + ;; Stack frame deallocation barrier. + (CRIS_UNSPEC_FRAME_DEALLOC 8) + + ;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1... + (CRIS_UNSPEC_SWAP_BITS 9) + ]) + +;; Register numbers. +(define_constants + [(CRIS_GOT_REGNUM 0) + (CRIS_STATIC_CHAIN_REGNUM 7) + (CRIS_FP_REGNUM 8) + (CRIS_SP_REGNUM 14) + (CRIS_ACR_REGNUM 15) + (CRIS_SRP_REGNUM 16) + (CRIS_MOF_REGNUM 17) + (CRIS_AP_REGNUM 18) + (CRIS_CC0_REGNUM 19)] +) + +;; We need an attribute to define whether an instruction can be put in +;; a branch-delay slot or not, and whether it has a delay slot. +;; +;; Branches and return instructions have a delay slot, and cannot +;; themselves be put in a delay slot. This has changed *for short +;; branches only* between architecture variants, but the possible win +;; is presumed negligible compared to the added complexity of the machine +;; description: one would have to add always-correct infrastructure to +;; distinguish short branches. +;; +;; Whether an instruction can be put in a delay slot depends on the +;; instruction (all short instructions except jumps and branches) +;; and the addressing mode (must not be prefixed or referring to pc). +;; In short, any "slottable" instruction must be 16 bit and not refer +;; to pc, or alter it. +;; +;; The possible values are "yes", "no", "has_slot", "has_return_slot" +;; and "has_call_slot". +;; Yes/no tells whether the insn is slottable or not. Has_call_slot means +;; that the insn is a call insn, which for CRIS v32 has a delay-slot. +;; Of special concern is that no RTX_FRAME_RELATED insn must go in that +;; call delay slot, as it's located in the address *after* the call insn, +;; and the unwind machinery doesn't know about delay slots. +;; Has_slot means that the insn is a branch insn (which are +;; not considered slottable since that is generally true). Having the +;; seemingly illogical value "has_slot" means we do not have to add +;; another attribute just to say that an insn has a delay-slot, since it +;; also infers that it is not slottable. Better names for the attribute +;; were found to be longer and not add readability to the machine +;; description. +;; Has_return_slot is similar, for the return insn. +;; +;; The default that is defined here for this attribute is "no", not +;; slottable, not having a delay-slot, so there's no need to worry about +;; it being wrong for non-branch and return instructions. +;; The default could depend on the kind of insn and the addressing +;; mode, but that would need more attributes and hairier, more error +;; prone code. +;; +;; There is an extra memory constraint, 'Q', which recognizes an indirect +;; register. The constraints 'Q' and '>' together match all possible +;; memory operands that are slottable. +;; For other operands, you need to check if it has a valid "slottable" +;; quick-immediate operand, where the particular signedness-variation +;; may match the constraints 'I' or 'J'.), and include it in the +;; constraint pattern for the slottable pattern. An alternative using +;; only "r" constraints is most often slottable. + +(define_attr "slottable" "no,yes,has_slot,has_return_slot,has_call_slot" + (const_string "no")) + +;; We also need attributes to sanely determine the condition code +;; state. See cris_notice_update_cc for how this is used. + +(define_attr "cc" "none,clobber,normal,noov32,rev" (const_string "normal")) + +;; At the moment, this attribute is just used to help bb-reorder do its +;; work; the default 0 doesn't help it. Many insns have other lengths, +;; though none are shorter. +(define_attr "length" "" (const_int 2)) + +;; A branch has one delay-slot. The instruction in the +;; delay-slot is always executed, independent of whether the branch is +;; taken or not. Note that besides setting "slottable" to "has_slot", +;; there also has to be a "%#" at the end of a "delayed" instruction +;; output pattern (for "jump" this means "ba %l0%#"), so print_operand can +;; catch it and print a "nop" if necessary. This method was stolen from +;; sparc.md. + +(define_delay (eq_attr "slottable" "has_slot") + [(eq_attr "slottable" "yes") (nil) (nil)]) + +;; We can't put prologue insns in call-insn delay-slots when +;; DWARF2 unwind info is emitted, because the unwinder matches the +;; address after the insn. It must see the return address of a call at +;; a position at least *one byte after* the insn, or it'll think that +;; the insn hasn't been executed. If the insn is in a delay-slot of a +;; call, it's just *exactly* after the insn. + +(define_delay (eq_attr "slottable" "has_call_slot") + [(and (eq_attr "slottable" "yes") + (ior (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)") + (const_int 0)) + (eq (symbol_ref "flag_exceptions") + (const_int 0)))) + (nil) (nil)]) + +;; The insn in the return insn slot must not be the +;; return-address-register restore. FIXME: Use has_slot and express +;; as a parallel with a use of the return-address-register (currently +;; only SRP). However, this requires an amount of fixing tests for +;; naked RETURN in middle-end. +(define_delay (eq_attr "slottable" "has_return_slot") + [(and (eq_attr "slottable" "yes") + (eq (symbol_ref "dead_or_set_regno_p (insn, CRIS_SRP_REGNUM)") + (const_int 0))) + (nil) (nil)]) + + +;; Iterator definitions. + +;; For the "usual" pattern size alternatives. +(define_mode_iterator BWD [SI HI QI]) +(define_mode_iterator WD [SI HI]) +(define_mode_iterator BW [HI QI]) +(define_mode_attr S [(SI "HI") (HI "QI")]) +(define_mode_attr s [(SI "hi") (HI "qi")]) +(define_mode_attr m [(SI ".d") (HI ".w") (QI ".b")]) +(define_mode_attr mm [(SI ".w") (HI ".b")]) +(define_mode_attr nbitsm1 [(SI "31") (HI "15") (QI "7")]) + +;; For the sign_extend+zero_extend variants. +(define_code_iterator szext [sign_extend zero_extend]) +(define_code_attr u [(sign_extend "") (zero_extend "u")]) +(define_code_attr su [(sign_extend "s") (zero_extend "u")]) + +;; For the shift variants. +(define_code_iterator shift [ashiftrt lshiftrt ashift]) +(define_code_iterator shiftrt [ashiftrt lshiftrt]) +(define_code_attr shlr [(ashiftrt "ashr") (lshiftrt "lshr") (ashift "ashl")]) +(define_code_attr slr [(ashiftrt "asr") (lshiftrt "lsr") (ashift "lsl")]) + +(define_code_iterator ncond [eq ne gtu ltu geu leu]) +(define_code_iterator ocond [gt le]) +(define_code_iterator rcond [lt ge]) +(define_code_attr CC [(eq "eq") (ne "ne") (gt "gt") (gtu "hi") (lt "lt") + (ltu "lo") (ge "ge") (geu "hs") (le "le") (leu "ls")]) +(define_code_attr rCC [(eq "ne") (ne "eq") (gt "le") (gtu "ls") (lt "ge") + (ltu "hs") (ge "lt") (geu "lo") (le "gt") (leu "hi")]) +(define_code_attr oCC [(lt "mi") (ge "pl")]) +(define_code_attr roCC [(lt "pl") (ge "mi")]) + +;; Operand and operator predicates. + +(include "predicates.md") + +;; Test insns. + +;; No test insns with side-effect on the mem addressing. +;; +;; See note on cmp-insns with side-effects (or lack of them) + +;; Normal named test patterns from SI on. + +(define_insn "*tstsi" + [(set (cc0) + (compare (match_operand:SI 0 "nonimmediate_operand" "r,Q>,m") + (const_int 0)))] + "" +{ + if (which_alternative == 0 && TARGET_V32) + return "cmpq 0,%0"; + return "test.d %0"; +} + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*tst_cmp" + [(set (cc0) + (compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m") + (const_int 0)))] + "cris_cc0_user_requires_cmp (insn)" + "@ + cmp 0,%0 + test %0 + test %0" + [(set_attr "slottable" "no,yes,no")]) + +(define_insn "*tst_non_cmp" + [(set (cc0) + (compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m") + (const_int 0)))] + "!cris_cc0_user_requires_cmp (insn)" + "@ + move %0,%0 + test %0 + test %0" + [(set_attr "slottable" "yes,yes,no") + (set_attr "cc" "noov32,*,*")]) + +;; It seems that the position of the sign-bit and the fact that 0.0 is +;; all 0-bits would make "tstsf" a straight-forward implementation; +;; either "test.d" it for positive/negative or "btstq 30,r" it for +;; zeroness. +;; +;; FIXME: Do that some time; check next_cc0_user to determine if +;; zero or negative is tested for. + +;; Compare insns. + +;; We could optimize the sizes of the immediate operands for various +;; cases, but that is not worth it because of the very little usage of +;; DImode for anything else but a structure/block-mode. Just do the +;; obvious stuff for the straight-forward constraint letters. + +(define_insn "*cmpdi_non_v32" + [(set (cc0) + (compare (match_operand:DI 0 "nonimmediate_operand" "rm,r,r,r,r,r,r,o") + (match_operand:DI 1 "general_operand" "M,Kc,I,P,n,r,o,r")))] + "!TARGET_V32" + "@ + test.d %M0\;ax\;test.d %H0 + cmpq %1,%M0\;ax\;cmpq 0,%H0 + cmpq %1,%M0\;ax\;cmpq -1,%H0 + cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M0,%M1\;ax\;cmp.d %H0,%H1") + +(define_insn "*cmpdi_v32" + [(set (cc0) + (compare (match_operand:DI 0 "register_operand" "r,r,r,r,r") + (match_operand:DI 1 "nonmemory_operand" "Kc,I,P,n,r")))] + "TARGET_V32" + "@ + cmpq %1,%M0\;ax\;cmpq 0,%H0 + cmpq %1,%M0\;ax\;cmpq -1,%H0 + cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0") + +;; Note that compare insns with side effect addressing mode (e.g.): +;; +;; cmp.S [rx=ry+i],rz; +;; cmp.S [%3=%1+%2],%0 +;; +;; are *not* usable for gcc since the reloader *does not accept* +;; cc0-changing insns with side-effects other than setting the condition +;; codes. The reason is that the reload stage *may* cause another insn to +;; be output after the main instruction, in turn invalidating cc0 for the +;; insn using the test. (This does not apply to the CRIS case, since a +;; reload for output -- move to memory -- does not change the condition +;; code. Unfortunately we have no way to describe that at the moment. I +;; think code would improve being in the order of one percent faster. + +;; We have cmps and cmpu (compare reg w. sign/zero extended mem). +;; These are mostly useful for compares in SImode, using 8 or 16-bit +;; constants, but sometimes gcc will find its way to use it for other +;; (memory) operands. Avoid side-effect patterns, though (see above). + +(define_insn "*cmp_ext" + [(set (cc0) + (compare + (match_operand:SI 0 "register_operand" "r,r") + (match_operator:SI 2 "cris_extend_operator" + [(match_operand:BW 1 "memory_operand" "Q>,m")])))] + "" + "cmp%e2 %1,%0" + [(set_attr "slottable" "yes,no")]) + +;; Swap operands; it seems the canonical look (if any) is not enforced. +;; +;; FIXME: Investigate that. + +(define_insn "*cmp_swapext" + [(set (cc0) + (compare + (match_operator:SI 2 "cris_extend_operator" + [(match_operand:BW 0 "memory_operand" "Q>,m")]) + (match_operand:SI 1 "register_operand" "r,r")))] + "" + "cmp%e2 %0,%1" + [(set_attr "slottable" "yes,no") + (set_attr "cc" "rev")]) + +;; The "normal" compare patterns, from SI on. Special-cases with zero +;; are covered above. + +(define_insn "*cmpsi" + [(set (cc0) + (compare + (match_operand:SI 0 "nonimmediate_operand" "r,r,r, Q>,r,r,m") + (match_operand:SI 1 "general_operand" "I,r,Q>,r, P,g,r")))] + "" + "@ + cmpq %1,%0 + cmp.d %1,%0 + cmp.d %1,%0 + cmp.d %0,%1 + cmp%e1.%z1 %1,%0 + cmp.d %1,%0 + cmp.d %0,%1" + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no") + (set_attr "cc" "normal,normal,normal,rev,normal,normal,rev")]) + +(define_insn "*cmp" + [(set (cc0) + (compare (match_operand:BW 0 "nonimmediate_operand" "r,r, Q>,r,m") + (match_operand:BW 1 "general_operand" "r,Q>,r, g,r")))] + "" + "@ + cmp %1,%0 + cmp %1,%0 + cmp %0,%1 + cmp %1,%0 + cmp %0,%1" + [(set_attr "slottable" "yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,rev,normal,rev")]) + +;; Pattern matching the BTST insn. +;; It is useful for "if (i & val)" constructs, where val is an exact +;; power of 2, or if val + 1 is a power of two, where we check for a bunch +;; of zeros starting at bit 0). + +;; SImode. This mode is the only one needed, since gcc automatically +;; extends subregs for lower-size modes. FIXME: Add testcase. +(define_insn "*btst" + [(set (cc0) + (compare + (zero_extract:SI + (match_operand:SI 0 "nonmemory_operand" "r, r,r, r,r, r,Kp") + (match_operand:SI 1 "const_int_operand" "Kc,n,Kc,n,Kc,n,n") + (match_operand:SI 2 "nonmemory_operand" "M, M,Kc,n,r, r,r")) + (const_int 0)))] + ;; Either it is a single bit, or consecutive ones starting at 0. + ;; The btst ones depend on stuff in NOTICE_UPDATE_CC. + "CONST_INT_P (operands[1]) + && (operands[1] == const1_rtx || operands[2] == const0_rtx) + && (REG_S_P (operands[0]) + || (operands[1] == const1_rtx + && REG_S_P (operands[2]) + && CONST_INT_P (operands[0]) + && exact_log2 (INTVAL (operands[0])) >= 0)) + && !TARGET_CCINIT" + +;; The next-to-last "&&" condition above should be caught by some kind of +;; canonicalization in gcc, but we can easily help with it here. +;; It results from expressions of the type +;; "power_of_2_value & (1 << y)". +;; +;; Since there may be codes with tests in on bits (in constant position) +;; beyond the size of a word, handle that by assuming those bits are 0. +;; GCC should handle that, but it's a matter of easily-added belts while +;; having suspenders. + + "@ + btstq (%1-1),%0 + cmpq 0,%0 + btstq %2,%0 + clearf nz + btst %2,%0 + clearf nz + cmpq %p0,%2" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Move insns. + +;; The whole mandatory movdi family is here; expander, "anonymous" +;; recognizer and splitter. We're forced to have a movdi pattern, +;; although GCC should be able to split it up itself. Normally it can, +;; but if other insns have DI operands (as is the case here), reload +;; must be able to generate or match a movdi. many testcases fail at +;; -O3 or -fssa if we don't have this. FIXME: Fix GCC... See +;; . +;; However, a patch from Richard Kenner (similar to the cause of +;; discussion at the URL above), indicates otherwise. See +;; . +;; The truth has IMO is not been decided yet, so check from time to +;; time by disabling the movdi patterns. + +;; To appease testcase gcc.c-torture/execute/920501-2.c (and others) at +;; -O0, we need a movdi as a temporary measure. Here's how things fail: +;; A cmpdi RTX needs reloading (global): +;; (insn 185 326 186 (set (cc0) +;; (compare (mem/f:DI (reg/v:SI 22) 0) +;; (const_int 1 [0x1]))) 4 {cmpdi} (nil) +;; (nil)) +;; Now, reg 22 is reloaded for input address, and the mem is also moved +;; out of the instruction (into a register), since one of the operands +;; must be a register. Reg 22 is reloaded (into reg 10), and the mem is +;; moved out and synthesized in SImode parts (reg 9, reg 10 - should be ok +;; wrt. overlap). The bad things happen with the synthesis in +;; emit_move_insn_1; the location where to substitute reg 10 is lost into +;; two new RTX:es, both still having reg 22. Later on, the left-over reg +;; 22 is recognized to have an equivalent in memory which is substituted +;; straight in, and we end up with an unrecognizable insn: +;; (insn 325 324 326 (set (reg:SI 9 r9) +;; (mem/f:SI (mem:SI (plus:SI (reg:SI 8 r8) +;; (const_int -84 [0xffffffac])) 0) 0)) -1 (nil) +;; (nil)) +;; which is the first part of the reloaded synthesized "movdi". +;; The right thing would be to add equivalent replacement locations for +;; insn with pseudos that need more reloading. The question is where. + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" +{ + if (MEM_P (operands[0]) + && operands[1] != const0_rtx + && (!TARGET_V32 || (!REG_P (operands[1]) && can_create_pseudo_p ()))) + operands[1] = copy_to_mode_reg (DImode, operands[1]); + + /* Some other ports (as of 2001-09-10 for example mcore and romp) also + prefer to split up constants early, like this. The testcase in + gcc.c-torture/execute/961213-1.c shows that CSE2 gets confused by the + resulting subreg sets when using the construct from mcore (as of FSF + CVS, version -r 1.5), and it believes that the high part (the last one + emitted) is the final value. */ + if ((CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_DOUBLE) + && ! reload_completed + && ! reload_in_progress) + { + rtx insns; + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + start_sequence (); + emit_move_insn (operand_subword (op0, 0, 1, DImode), + operand_subword (op1, 0, 1, DImode)); + emit_move_insn (operand_subword (op0, 1, 1, DImode), + operand_subword (op1, 1, 1, DImode)); + insns = get_insns (); + end_sequence (); + + emit_insn (insns); + DONE; + } +}) + +(define_insn_and_split "*movdi_insn_non_v32" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rx,m") + (match_operand:DI 1 "general_operand" "rx,g,rxM"))] + "(register_operand (operands[0], DImode) + || register_operand (operands[1], DImode) + || operands[1] == const0_rtx) + && !TARGET_V32" + "#" + "&& reload_completed" + [(match_dup 2)] + "operands[2] = cris_split_movdx (operands);") + +;; Overlapping (but non-identical) source memory address and destination +;; register would be a compiler bug, so we don't have to specify that. +(define_insn "*movdi_v32" + [(set + (match_operand:DI 0 "nonimmediate_operand" "=r,rx,&r,>, m,r,x,m") + (match_operand:DI 1 "general_operand" "rxi,r>,m, rx,r,m,m,x"))] + "TARGET_V32" +{ + switch (which_alternative) + { + /* FIXME: 1) Use autoincrement where possible. 2) Have peephole2, + particularly for cases where the address register is dead. */ + case 5: + if (REGNO (operands[0]) == REGNO (XEXP (operands[1], 0))) + return "addq 4,%L1\;move.d %1,%H0\;subq 4,%L1\;move.d %1,%M0"; + gcc_assert (REGNO (operands[0]) + 1 == REGNO (XEXP (operands[1], 0))); + return "move.d [%L1+],%M0\;move.d [%L1],%H0"; + case 2: + /* We could do away with the addq if we knew the address-register + isn't ACR. If we knew the address-register is dead, we could do + away with the subq too. */ + return "move.d [%L1],%M0\;addq 4,%L1\;move.d [%L1],%H0\;subq 4,%L1"; + case 4: + return "move.d %M1,[%L0]\;addq 4,%L0\;move.d %H1,[%L0]\;subq 4,%L0"; + case 6: + return "move [%L1],%M0\;addq 4,%L1\;move [%L1],%H0\;subq 4,%L1"; + case 7: + return "move %M1,[%L0]\;addq 4,%L0\;move %H1,[%L0]\;subq 4,%L0"; + + default: + return "#"; + } +} + ;; The non-split cases clobber cc0 because of their adds and subs. + ;; Beware that NOTICE_UPDATE_CC is called before the forced split happens. + [(set_attr "cc" "*,*,clobber,*,clobber,clobber,*,*")]) + +;; Much like "*movdi_insn_non_v32". Overlapping registers and constants +;; is handled so much better in cris_split_movdx. +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "TARGET_V32 + && reload_completed + && (!MEM_P (operands[0]) || !REG_P (XEXP (operands[0], 0))) + && (!MEM_P (operands[1]) || !REG_P (XEXP (operands[1], 0)))" + [(match_dup 2)] + "operands[2] = cris_split_movdx (operands);") + +;; Side-effect patterns for move.S1 [rx=ry+rx.S2],rw +;; and move.S1 [rx=ry+i],rz +;; Then movs.S1 and movu.S1 for both modes. +;; +;; move.S1 [rx=ry+rz.S],rw avoiding when rx is ry, or rw is rx +;; FIXME: These could have anonymous mode for operand 0. +;; FIXME: Special registers' alternatives too. + +(define_insn "*mov_side_biap" + [(set (match_operand:BW 0 "register_operand" "=r,r") + (mem:BW (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (match_operand:SI 3 "register_operand" "r,r")))) + (set (match_operand:SI 4 "register_operand" "=*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + move [%4=%3+%1%T2],%0") + +(define_insn "*mov_sidesisf_biap" + [(set (match_operand 0 "register_operand" "=r,r,x,x") + (mem (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r,r,r") + (match_operand:SI 2 "const_int_operand" "n,n,n,n")) + (match_operand:SI 3 "register_operand" "r,r,r,r")))) + (set (match_operand:SI 4 "register_operand" "=*3,r,*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + move.%s0 [%4=%3+%1%T2],%0 + # + move [%4=%3+%1%T2],%0") + +;; move.S1 [rx=ry+i],rz +;; avoiding move.S1 [ry=ry+i],rz +;; and move.S1 [rz=ry+i],rz +;; Note that "i" is allowed to be a register. + +(define_insn "*mov_side" + [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r") + (mem:BW + (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J'))) + return "#"; + if (which_alternative == 4) + return "move [%3=%2%S1],%0"; + return "move [%3=%1%S2],%0"; +}) + +(define_insn "*mov_sidesisf" + [(set (match_operand 0 "register_operand" "=r,r,r,x,x,x,r,r,x,x") + (mem + (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,r,r,r,R,R,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r>Rn,r,>Rn,r,r,r,r")))) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*1,r,r,*2,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 + || which_alternative == 3 + || which_alternative == 6 + || which_alternative == 8) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J'))) + return "#"; + if (which_alternative < 3) + return "move.%s0 [%3=%1%S2],%0"; + if (which_alternative == 7) + return "move.%s0 [%3=%2%S1],%0"; + if (which_alternative == 9) + return "move [%3=%2%S1],%0"; + return "move [%3=%1%S2],%0"; +}) + +;; Other way around; move to memory. + +;; Note that the condition (which for side-effect patterns is usually a +;; call to cris_side_effect_mode_ok), isn't consulted for register +;; allocation preferences -- constraints is the method for that. The +;; drawback is that we can't exclude register allocation to cause +;; "move.s rw,[rx=ry+rz.S]" when rw==rx without also excluding rx==ry or +;; rx==rz if we use an earlyclobber modifier for the constraint for rx. +;; Instead of that, we recognize and split the cases where dangerous +;; register combinations are spotted: where a register is set in the +;; side-effect, and used in the main insn. We don't handle the case where +;; the set in the main insn overlaps the set in the side-effect; that case +;; must be handled in gcc. We handle just the case where the set in the +;; side-effect overlaps the input operand of the main insn (i.e. just +;; moves to memory). + +;; +;; move.s rz,[ry=rx+rw.S] + +(define_insn "*mov_side_biap_mem" + [(set (mem:BW (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "const_int_operand" "n,n,n")) + (match_operand:SI 2 "register_operand" "r,r,r"))) + (match_operand:BW 3 "register_operand" "r,r,r")) + (set (match_operand:SI 4 "register_operand" "=*2,!3,r") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)" + "@ + # + # + move %3,[%4=%2+%0%T1]") + +(define_insn "*mov_sidesisf_biap_mem" + [(set (mem (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r,r,r,r,r,r") + (match_operand:SI 1 "const_int_operand" "n,n,n,n,n,n")) + (match_operand:SI 2 "register_operand" "r,r,r,r,r,r"))) + (match_operand 3 "register_operand" "r,r,r,x,x,x")) + (set (match_operand:SI 4 "register_operand" "=*2,!3,r,*2,!3,r") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))] + "GET_MODE_SIZE (GET_MODE (operands[3])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)" + "@ + # + # + move.%s3 %3,[%4=%2+%0%T1] + # + # + move %3,[%4=%2+%0%T1]") + +;; Split for the case above where we're out of luck with register +;; allocation (again, the condition isn't checked for that), and we end up +;; with the set in the side-effect getting the same register as the input +;; register. + +(define_split + [(parallel + [(set (match_operator + 6 "cris_mem_op" + [(plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (match_operand:SI 2 "register_operand" ""))]) + (match_operand 3 "register_operand" "")) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))])] + "reload_completed && reg_overlap_mentioned_p (operands[4], operands[3])" + [(set (match_dup 5) (match_dup 3)) + (set (match_dup 4) (match_dup 2)) + (set (match_dup 4) + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 4)))] + "operands[5] + = replace_equiv_address (operands[6], + gen_rtx_PLUS (SImode, + gen_rtx_MULT (SImode, + operands[0], + operands[1]), + operands[2]));") + +;; move.s rx,[ry=rz+i] +;; FIXME: These could have anonymous mode for operand 2. + +;; QImode + +(define_insn "*mov_side_mem" + [(set (mem:BW + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r,R,R,R") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn,r,r,r"))) + (match_operand:BW 2 "register_operand" "r,r,r,r,r,r,r")) + (set (match_operand:SI 3 "register_operand" "=*0,!*2,r,r,*1,!*2,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)" +{ + if ((which_alternative == 0 || which_alternative == 4) + && (!CONST_INT_P (operands[1]) + || INTVAL (operands[1]) > 127 + || INTVAL (operands[1]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'J'))) + return "#"; + if (which_alternative == 1 || which_alternative == 5) + return "#"; + if (which_alternative == 6) + return "move.%s2 %2,[%3=%1%S0]"; + return "move %2,[%3=%0%S1]"; +}) + +;; SImode + +(define_insn "*mov_sidesisf_mem" + [(set (mem + (plus:SI + (match_operand:SI + 0 "cris_bdap_operand" + "%r, r, r,r, r, r,r, R,R, R,R, R") + (match_operand:SI + 1 "cris_bdap_operand" + "r>Rn,r>Rn,r,>Rn,r>Rn,r,>Rn,r,r, r,r, r"))) + (match_operand 2 "register_operand" + "r, r, r,r, x, x,x, r,r, r,x, x")) + (set (match_operand:SI 3 "register_operand" + "=*0,!2, r,r, *0, r,r, *1,!*2,r,*1,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "GET_MODE_SIZE (GET_MODE (operands[2])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)" +{ + if ((which_alternative == 0 || which_alternative == 4) + && (!CONST_INT_P (operands[1]) + || INTVAL (operands[1]) > 127 + || INTVAL (operands[1]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'J'))) + return "#"; + if (which_alternative == 1 + || which_alternative == 7 + || which_alternative == 8 + || which_alternative == 10) + return "#"; + if (which_alternative < 4) + return "move.%s2 %2,[%3=%0%S1]"; + if (which_alternative == 9) + return "move.%s2 %2,[%3=%1%S0]"; + if (which_alternative == 11) + return "move %2,[%3=%1%S0]"; + return "move %2,[%3=%0%S1]"; +}) + +;; Like the biap case, a split where the set in the side-effect gets the +;; same register as the input register to the main insn, since the +;; condition isn't checked at register allocation. + +(define_split + [(parallel + [(set (match_operator + 4 "cris_mem_op" + [(plus:SI + (match_operand:SI 0 "cris_bdap_operand" "") + (match_operand:SI 1 "cris_bdap_operand" ""))]) + (match_operand 2 "register_operand" "")) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 0) (match_dup 1)))])] + "reload_completed && reg_overlap_mentioned_p (operands[3], operands[2])" + [(set (match_dup 4) (match_dup 2)) + (set (match_dup 3) (match_dup 0)) + (set (match_dup 3) (plus:SI (match_dup 3) (match_dup 1)))] + "") + +;; Clear memory side-effect patterns. It is hard to get to the mode if +;; the MEM was anonymous, so there will be one for each mode. + +;; clear.[bwd] [ry=rx+rw.s2] + +(define_insn "*clear_side_biap" + [(set (mem:BWD (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "const_int_operand" "n,n")) + (match_operand:SI 2 "register_operand" "r,r"))) + (const_int 0)) + (set (match_operand:SI 3 "register_operand" "=*2,r") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))] + "cris_side_effect_mode_ok (MULT, operands, 3, 2, 0, 1, -1)" + "@ + # + clear [%3=%2+%0%T1]") + +;; clear.[bwd] [ry=rz+i] + +(define_insn "*clear_side" + [(set (mem:BWD + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn,r,r"))) + (const_int 0)) + (set (match_operand:SI 2 "register_operand" "=*0,r,r,*1,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "cris_side_effect_mode_ok (PLUS, operands, 2, 0, 1, -1, -1)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[1]) + || INTVAL (operands[1]) > 127 + || INTVAL (operands[1]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'J'))) + return "#"; + if (which_alternative == 4) + return "clear [%2=%1%S0]"; + return "clear [%2=%0%S1]"; +}) + +;; Normal move patterns from SI on. + +(define_expand "movsi" + [(set + (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "cris_general_operand_or_symbol" ""))] + "" +{ + /* If the output goes to a MEM, make sure we have zero or a register as + input. */ + if (MEM_P (operands[0]) + && ! REG_S_P (operands[1]) + && operands[1] != const0_rtx + && can_create_pseudo_p ()) + operands[1] = force_reg (SImode, operands[1]); + + /* If we're generating PIC and have an incoming symbol, validize it to a + general operand or something that will match a special pattern. + + FIXME: Do we *have* to recognize anything that would normally be a + valid symbol? Can we exclude global PIC addresses with an added + offset? */ + if (flag_pic + && CONSTANT_ADDRESS_P (operands[1]) + && !cris_valid_pic_const (operands[1], false)) + { + enum cris_pic_symbol_type t = cris_pic_symbol_type_of (operands[1]); + + gcc_assert (t != cris_no_symbol); + + if (! REG_S_P (operands[0])) + { + /* We must have a register as destination for what we're about to + do, and for the patterns we generate. */ + CRIS_ASSERT (can_create_pseudo_p ()); + operands[1] = force_reg (SImode, operands[1]); + } + else + { + /* FIXME: add a REG_EQUAL (or is it REG_EQUIV) note to the + destination register for the symbol. It might not be + worth it. Measure. */ + crtl->uses_pic_offset_table = 1; + if (t == cris_rel_symbol) + { + /* Change a "move.d sym(+offs),rN" into (allocate register rM) + for pre-v32: + "move.d (const (plus (unspec [sym] + CRIS_UNSPEC_GOTREL) offs)),rM" "add.d rPIC,rM,rN" + and for v32: + "move.d (const (plus (unspec [sym] + CRIS_UNSPEC_PCREL) offs)),rN". */ + rtx tem, rm, rn = operands[0]; + rtx sym = GET_CODE (operands[1]) != CONST + ? operands[1] : get_related_value (operands[1]); + HOST_WIDE_INT offs = get_integer_term (operands[1]); + + gcc_assert (can_create_pseudo_p ()); + + if (TARGET_V32) + { + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), + CRIS_UNSPEC_PCREL); + if (offs != 0) + tem = plus_constant (tem, offs); + rm = rn; + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + } + else + { + /* We still uses GOT-relative addressing for + pre-v32. */ + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), + CRIS_UNSPEC_GOTREL); + if (offs != 0) + tem = plus_constant (tem, offs); + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx, + rn, 0, OPTAB_LIB_WIDEN) != rn) + internal_error ("expand_binop failed in movsi gotrel"); + } + DONE; + } + else if (t == cris_got_symbol) + { + /* Change a "move.d sym,rN" into (allocate register rM, rO) + "move.d (const (unspec [sym] CRIS_UNSPEC_GOTREAD)),rM" + "add.d rPIC,rM,rO", "move.d [rO],rN" with + the memory access marked as read-only. */ + rtx tem, mem, rm, ro, rn = operands[0]; + gcc_assert (can_create_pseudo_p ()); + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]), + CRIS_UNSPEC_GOTREAD); + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + ro = gen_reg_rtx (Pmode); + if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx, + ro, 0, OPTAB_LIB_WIDEN) != ro) + internal_error ("expand_binop failed in movsi got"); + mem = gen_rtx_MEM (Pmode, ro); + + /* This MEM doesn't alias anything. Whether it + aliases other same symbols is unimportant. */ + set_mem_alias_set (mem, new_alias_set ()); + MEM_NOTRAP_P (mem) = 1; + + /* We can set the GOT memory read of a non-called symbol + to readonly, but not that of a call symbol, as those + are subject to lazy evaluation and usually have the value + changed from the first call to the second (but + constant thereafter). */ + MEM_READONLY_P (mem) = 1; + emit_move_insn (rn, mem); + DONE; + } + else + { + /* We get here when we have to change something that would + be recognizable if it wasn't PIC. A ``sym'' is ok for + PIC symbols both with and without a GOT entry. And ``sym + + offset'' is ok for local symbols, so the only thing it + could be, is a global symbol with an offset. Check and + abort if not. */ + rtx reg = gen_reg_rtx (Pmode); + rtx sym = get_related_value (operands[1]); + HOST_WIDE_INT offs = get_integer_term (operands[1]); + + gcc_assert (can_create_pseudo_p () + && t == cris_got_symbol_needing_fixup + && sym != NULL_RTX && offs != 0); + + emit_move_insn (reg, sym); + if (expand_binop (SImode, add_optab, reg, + GEN_INT (offs), operands[0], 0, + OPTAB_LIB_WIDEN) != operands[0]) + internal_error ("expand_binop failed in movsi got+offs"); + DONE; + } + } + } +}) + +(define_insn "*movsi_got_load" + [(set (reg:SI CRIS_GOT_REGNUM) (unspec:SI [(const_int 0)] CRIS_UNSPEC_GOT))] + "flag_pic" +{ + return TARGET_V32 + ? "lapc _GLOBAL_OFFSET_TABLE_,%:" + : "move.d $pc,%:\;sub.d .:GOTOFF,%:"; +} + [(set_attr "cc" "clobber")]) + +(define_insn "*movsi_internal" + [(set + (match_operand:SI 0 "nonimmediate_operand" + "=r,r, r,Q>,r,Q>,g,r,r, r,g,rQ>,x, m,x") + (match_operand:SI 1 "cris_general_operand_or_pic_source" + "r,Q>,M,M, I,r, M,n,!S,g,r,x, rQ>,x,gi"))] + ;; Note that we prefer not to use the S alternative (if for some reason + ;; it competes with others) above, but g matches S. + "" +{ + /* Better to have c-switch here; it is worth it to optimize the size of + move insns. The alternative would be to try to find more constraint + letters. FIXME: Check again. It seems this could shrink a bit. */ + switch (which_alternative) + { + case 9: + if (TARGET_V32) + { + if (!flag_pic + && (GET_CODE (operands[1]) == SYMBOL_REF + || GET_CODE (operands[1]) == LABEL_REF + || GET_CODE (operands[1]) == CONST)) + { + /* FIXME: Express this through (set_attr cc none) instead, + since we can't express the ``none'' at this point. FIXME: + Use lapc for everything except const_int and when next cc0 + user would want the flag setting. */ + CC_STATUS_INIT; + return "lapc %1,%0"; + } + if (flag_pic == 1 + && GET_CODE (operands[1]) == CONST + && GET_CODE (XEXP (operands[1], 0)) == UNSPEC + && XINT (XEXP (operands[1], 0), 1) == CRIS_UNSPEC_GOTREAD) + return "movu.w %1,%0"; + } + /* FALLTHROUGH */ + case 0: + case 1: + case 5: + case 10: + return "move.d %1,%0"; + + case 11: + case 12: + case 13: + case 14: + return "move %d1,%0"; + + case 2: + case 3: + case 6: + return "clear.d %0"; + + /* Constants -32..31 except 0. */ + case 4: + return "moveq %1,%0"; + + /* We can win a little on constants -32768..-33, 32..65535. */ + case 7: + if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) < 65536) + { + if (INTVAL (operands[1]) < 256) + return "movu.b %1,%0"; + return "movu.w %1,%0"; + } + else if (INTVAL (operands[1]) >= -32768 && INTVAL (operands[1]) < 32768) + { + if (INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) < 128) + return "movs.b %1,%0"; + return "movs.w %1,%0"; + } + return "move.d %1,%0"; + + case 8: + { + rtx tem = operands[1]; + gcc_assert (GET_CODE (tem) == CONST); + tem = XEXP (tem, 0); + if (GET_CODE (tem) == PLUS + && GET_CODE (XEXP (tem, 0)) == UNSPEC + && (XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL + || XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_PCREL) + && CONST_INT_P (XEXP (tem, 1))) + tem = XEXP (tem, 0); + gcc_assert (GET_CODE (tem) == UNSPEC); + switch (XINT (tem, 1)) + { + case CRIS_UNSPEC_GOTREAD: + case CRIS_UNSPEC_PLTGOTREAD: + /* Using sign-extend mostly to be consistent with the + indexed addressing mode. */ + if (flag_pic == 1) + return "movs.w %1,%0"; + return "move.d %1,%0"; + + case CRIS_UNSPEC_GOTREL: + case CRIS_UNSPEC_PLT_GOTREL: + gcc_assert (!TARGET_V32); + return "move.d %1,%0"; + + case CRIS_UNSPEC_PCREL: + case CRIS_UNSPEC_PLT_PCREL: + gcc_assert (TARGET_V32); + return "lapc %1,%0"; + + default: + gcc_unreachable (); + } + } + default: + return "BOGUS: %1 to %0"; + } +} + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,no,yes,yes,no,no") + (set_attr "cc" "*,*,*,*,*,*,*,*,*,*,*,none,none,none,none")]) + +;; Extend operations with side-effect from mem to register, using +;; MOVS/MOVU. These are from mem to register only. +;; +;; [rx=ry+rz.S] +;; +;; QImode to HImode +;; +;; FIXME: Can we omit extend to HImode, since GCC should truncate for +;; HImode by itself? Perhaps use only anonymous modes? + +(define_insn "*ext_sideqihi_biap" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operator:HI + 5 "cris_extend_operator" + [(mem:QI (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (match_operand:SI 3 "register_operand" "r,r")))])) + (set (match_operand:SI 4 "register_operand" "=*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + mov%e5.%m5 [%4=%3+%1%T2],%0") + +(define_insn "*ext_sidesi_biap" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 5 "cris_extend_operator" + [(mem:BW (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (match_operand:SI 3 "register_operand" "r,r")))])) + (set (match_operand:SI 4 "register_operand" "=*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + mov%e5 [%4=%3+%1%T2],%0") + +;; Same but [rx=ry+i] + +;; QImode to HImode + +(define_insn "*ext_sideqihi" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (match_operator:HI + 4 "cris_extend_operator" + [(mem:QI (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J'))) + return "#"; + if (which_alternative == 4) + return "mov%e4.%m4 [%3=%2%S1],%0"; + return "mov%e4.%m4 [%3=%1%S2],%0"; +}) + +(define_insn "*ext_sidesi" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (match_operator:SI + 4 "cris_extend_operator" + [(mem:BW (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J'))) + return "#"; + if (which_alternative == 4) + return "mov%e4 [%3=%2%S1],%0"; + return "mov%e4 [%3=%1%S2],%0"; +}) + +;; FIXME: See movsi. + +(define_insn "movhi" + [(set + (match_operand:HI 0 "nonimmediate_operand" "=r,r, r,Q>,r,Q>,r,r,r,g,g,r,r,x") + (match_operand:HI 1 "general_operand" "r,Q>,M,M, I,r, L,O,n,M,r,g,x,r"))] + "" +{ + switch (which_alternative) + { + case 0: + case 1: + case 5: + case 10: + case 11: + return "move.w %1,%0"; + case 12: + case 13: + return "move %1,%0"; + case 2: + case 3: + case 9: + return "clear.w %0"; + case 4: + return "moveq %1,%0"; + case 6: + case 8: + if (INTVAL (operands[1]) < 256 && INTVAL (operands[1]) >= -128) + { + if (INTVAL (operands[1]) > 0) + return "movu.b %1,%0"; + return "movs.b %1,%0"; + } + return "move.w %1,%0"; + case 7: + return "movEq %b1,%0"; + default: + return "BOGUS: %1 to %0"; + } +} + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,yes,no,no,no,no,yes,yes") + (set_attr "cc" "*,*,none,none,*,none,*,clobber,*,none,none,*,none,none")]) + +(define_insn "movstricthi" + [(set + (strict_low_part + (match_operand:HI 0 "nonimmediate_operand" "+r,r, r,Q>,Q>,g,r,g")) + (match_operand:HI 1 "general_operand" "r,Q>,M,M, r, M,g,r"))] + "" + "@ + move.w %1,%0 + move.w %1,%0 + clear.w %0 + clear.w %0 + move.w %1,%0 + clear.w %0 + move.w %1,%0 + move.w %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")]) + +(define_expand "reload_in" + [(set (match_operand:BW 2 "register_operand" "=r") + (match_operand:BW 1 "memory_operand" "m")) + (set (match_operand:BW 0 "register_operand" "=x") + (match_dup 2))] + "" + "") + +(define_expand "reload_out" + [(set (match_operand:BW 2 "register_operand" "=&r") + (match_operand:BW 1 "register_operand" "x")) + (set (match_operand:BW 0 "memory_operand" "=m") + (match_dup 2))] + "" + "") + +(define_insn "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,r,g,g,r,r,r,x") + (match_operand:QI 1 "general_operand" "r,r, Q>,M,M, I,M,r,O,g,x,r"))] + "" + "@ + move.b %1,%0 + move.b %1,%0 + move.b %1,%0 + clear.b %0 + clear.b %0 + moveq %1,%0 + clear.b %0 + move.b %1,%0 + moveq %b1,%0 + move.b %1,%0 + move %1,%0 + move %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,yes,no,yes,yes") + (set_attr "cc" "*,*,*,*,*,*,*,*,clobber,*,none,none")]) + +(define_insn "movstrictqi" + [(set (strict_low_part + (match_operand:QI 0 "nonimmediate_operand" "+r,Q>,r, r,Q>,g,g,r")) + (match_operand:QI 1 "general_operand" "r,r, Q>,M,M, M,r,g"))] + "" + "@ + move.b %1,%0 + move.b %1,%0 + move.b %1,%0 + clear.b %0 + clear.b %0 + clear.b %0 + move.b %1,%0 + move.b %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")]) + +;; The valid "quick" bit-patterns are, except for 0.0, denormalized +;; values REALLY close to 0, and some NaN:s (I think; their exponent is +;; all ones); the worthwhile one is "0.0". +;; It will use clear, so we know ALL types of immediate 0 never change cc. + +(define_insn "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,g,g,r,r,x,Q>,m,x, x") + (match_operand:SF 1 "general_operand" "r,r, Q>,G,G, G,r,g,x,r,x, x,Q>,g"))] + "" + "@ + move.d %1,%0 + move.d %1,%0 + move.d %1,%0 + clear.d %0 + clear.d %0 + clear.d %0 + move.d %1,%0 + move.d %1,%0 + move %1,%0 + move %1,%0 + move %1,%0 + move %1,%0 + move %1,%0 + move %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no,yes,yes,yes,no,yes,no")]) + +;; Movem patterns. Primarily for use in function prologue and epilogue. +;; The V32 variants have an ordering matching the expectations of the +;; standard names "load_multiple" and "store_multiple"; pre-v32 movem +;; store R0 in the highest memory location. + +(define_expand "load_multiple" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_V32" +{ + rtx indreg; + + /* Apparently the predicate isn't checked, so we need to do so + manually. Once happened for libstdc++-v3 locale_facets.tcc. */ + if (!MEM_P (operands[1])) + FAIL; + + indreg = XEXP (operands[1], 0); + + if (GET_CODE (indreg) == POST_INC) + indreg = XEXP (indreg, 0); + if (!REG_P (indreg) + || GET_CODE (operands[2]) != CONST_INT + || !REG_P (operands[0]) + || REGNO (operands[0]) != 0 + || INTVAL (operands[2]) > CRIS_SP_REGNUM + || (int) REGNO (indreg) < INTVAL (operands[2])) + FAIL; + gcc_unreachable (); + emit_insn (cris_gen_movem_load (operands[1], operands[2], 0)); + DONE; +}) + +(define_expand "store_multiple" + [(match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_V32" +{ + rtx indreg; + + /* See load_multiple. */ + if (!MEM_P (operands[0])) + FAIL; + + indreg = XEXP (operands[0], 0); + + if (GET_CODE (indreg) == POST_INC) + indreg = XEXP (indreg, 0); + if (!REG_P (indreg) + || GET_CODE (operands[2]) != CONST_INT + || !REG_P (operands[1]) + || REGNO (operands[1]) != 0 + || INTVAL (operands[2]) > CRIS_SP_REGNUM + || (int) REGNO (indreg) < INTVAL (operands[2])) + FAIL; + gcc_unreachable (); + cris_emit_movem_store (operands[0], operands[2], 0, false); + DONE; +}) + +(define_insn "*cris_load_multiple" + [(match_parallel 0 "cris_load_multiple_op" + [(set (match_operand:SI 1 "register_operand" "=r,r") + (match_operand:SI 2 "memory_operand" "Q,m"))])] + "" + "movem %O0,%o0" + [(set_attr "cc" "none") + (set_attr "slottable" "yes,no") + ;; Not true, but setting the length to 0 causes return sequences (ret + ;; movem) to have the cost they had when (return) included the movem + ;; and reduces the performance penalty taken for needing to emit an + ;; epilogue (in turn copied by bb-reorder) instead of return patterns. + ;; FIXME: temporary change until all insn lengths are correctly + ;; described. FIXME: have better target control over bb-reorder. + (set_attr "length" "0")]) + +(define_insn "*cris_store_multiple" + [(match_parallel 0 "cris_store_multiple_op" + [(set (match_operand:SI 2 "memory_operand" "=Q,m") + (match_operand:SI 1 "register_operand" "r,r"))])] + "" + "movem %o0,%O0" + [(set_attr "cc" "none") + (set_attr "slottable" "yes,no")]) + + +;; Sign- and zero-extend insns with standard names. +;; Those for integer source operand are ordered with the widest source +;; type first. + +;; Sign-extend. + +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:SI 1 "general_operand" "g")))] + "" + "move.d %1,%M0\;smi %H0\;neg.d %H0,%H0") + +(define_insn "extenddi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:BW 1 "general_operand" "g")))] + "" + "movs %1,%M0\;smi %H0\;neg.d %H0,%H0") + +(define_insn "extendsi2" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (sign_extend:SI (match_operand:BW 1 "general_operand" "r,Q>,g")))] + "" + "movs %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +;; To do a byte->word extension, extend to dword, exept that the top half +;; of the register will be clobbered. FIXME: Perhaps this is not needed. + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (sign_extend:HI (match_operand:QI 1 "general_operand" "r,Q>,g")))] + "" + "movs.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + + +;; Zero-extend. The DImode ones are synthesized by gcc, so we don't +;; specify them here. + +(define_insn "zero_extendsi2" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (zero_extend:SI + (match_operand:BW 1 "nonimmediate_operand" "r,Q>,m")))] + "" + "movu %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +;; Same comment as sign-extend QImode to HImode above applies. + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (zero_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "r,Q>,m")))] + "" + "movu.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +;; All kinds of arithmetic and logical instructions. +;; +;; First, anonymous patterns to match addressing modes with +;; side-effects. +;; +;; op.S [rx=ry+I],rz; (add, sub, or, and, bound). +;; +;; [rx=ry+rz.S] + +(define_insn "*op_side_biap" + [(set (match_operand:BWD 0 "register_operand" "=r,r") + (match_operator:BWD + 6 "cris_orthogonal_operator" + [(match_operand:BWD 1 "register_operand" "0,0") + (mem:BWD (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6 [%5=%4+%2%T3],%0") + +;; [rx=ry+i] ([%4=%2+%3]) + +(define_insn "*op_side" + [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r") + (match_operator:BWD + 5 "cris_orthogonal_operator" + [(match_operand:BWD 1 "register_operand" "0,0,0,0,0") + (mem:BWD (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J'))) + return "#"; + if (which_alternative == 4) + return "%x5.%s0 [%4=%3%S2],%0"; + return "%x5 [%4=%2%S3],%0"; +}) + +;; To match all cases for commutative operations we may have to have the +;; following pattern for add, or & and. I do not know really, but it does +;; not break anything. +;; +;; FIXME: This really ought to be checked. +;; +;; op.S [rx=ry+I],rz; +;; +;; [rx=ry+rz.S] + +(define_insn "*op_swap_side_biap" + [(set (match_operand:BWD 0 "register_operand" "=r,r") + (match_operator:BWD + 6 "cris_commutative_orth_op" + [(mem:BWD (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r"))) + (match_operand:BWD 1 "register_operand" "0,0")])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6 [%5=%4+%2%T3],%0") + +;; [rx=ry+i] ([%4=%2+%3]) +;; FIXME: These could have anonymous mode for operand 0. + +;; QImode + +(define_insn "*op_swap_side" + [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r") + (match_operator:BWD + 5 "cris_commutative_orth_op" + [(mem:BWD + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r"))) + (match_operand:BWD 1 "register_operand" "0,0,0,0,0")])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J'))) + return "#"; + if (which_alternative == 4) + return "%x5 [%4=%3%S2],%0"; + return "%x5 [%4=%2%S3],%0"; +}) + +;; Add operations, standard names. + +;; Note that for the 'P' constraint, the high part can be -1 or 0. We +;; output the insn through the 'A' output modifier as "adds.w" and "addq", +;; respectively. +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "general_operand")))] + "" +{ + if (MEM_P (operands[2]) && TARGET_V32) + operands[2] = force_reg (DImode, operands[2]); +}) + +(define_insn "*adddi3_non_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r") + (plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,r") + (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))] + "!TARGET_V32" + "@ + addq %2,%M0\;ax\;addq 0,%H0 + subq %n2,%M0\;ax\;subq 0,%H0 + add%e2.%z2 %2,%M0\;ax\;%A2 %H2,%H0 + add.d %M2,%M0\;ax\;add.d %H2,%H0 + add.d %M2,%M1,%M0\;ax\;add.d %H2,%H1,%H0") + +; It seems no use allowing a memory operand for this one, because we'd +; need a scratch register for incrementing the address. +(define_insn "*adddi3_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,0") + (match_operand:DI 2 "nonmemory_operand" "J,N,P,r,n")))] + "TARGET_V32" + "@ + addq %2,%M0\;addc 0,%H0 + subq %n2,%M0\;ax\;subq 0,%H0 + add%e2.%z2 %2,%M0\;addc %H2,%H0 + add.d %M2,%M0\;addc %H2,%H0 + add.d %M2,%M0\;addc %H2,%H0") + +(define_expand "add3" + [(set (match_operand:BWD 0 "register_operand") + (plus:BWD + (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "general_operand")))] + "" + "") + +(define_insn "*addsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r, r,r,r,r, r,r, r") + (plus:SI + (match_operand:SI 1 "register_operand" "%0,0, 0,0,0,0, 0,r, r") + (match_operand:SI 2 "general_operand" "r,Q>,J,N,n,!S,g,!To,0")))] + +;; The last constraint is due to that after reload, the '%' is not +;; honored, and canonicalization doesn't care about keeping the same +;; register as in destination. This will happen after insn splitting. +;; gcc <= 2.7.2. FIXME: Check for gcc-2.9x + + "!TARGET_V32" +{ + switch (which_alternative) + { + case 0: + case 1: + return "add.d %2,%0"; + case 2: + return "addq %2,%0"; + case 3: + return "subq %n2,%0"; + case 4: + /* 'Known value', but not in -63..63. + Check if addu/subu may be used. */ + if (INTVAL (operands[2]) > 0) + { + if (INTVAL (operands[2]) < 256) + return "addu.b %2,%0"; + if (INTVAL (operands[2]) < 65536) + return "addu.w %2,%0"; + } + else + { + if (INTVAL (operands[2]) >= -255) + return "subu.b %n2,%0"; + if (INTVAL (operands[2]) >= -65535) + return "subu.w %n2,%0"; + } + return "add.d %2,%0"; + case 5: + { + rtx tem = operands[2]; + gcc_assert (GET_CODE (tem) == CONST); + tem = XEXP (tem, 0); + if (GET_CODE (tem) == PLUS + && GET_CODE (XEXP (tem, 0)) == UNSPEC + /* We don't allow CRIS_UNSPEC_PCREL here; we can't have a + pc-relative operand in an add insn. */ + && XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL + && CONST_INT_P (XEXP (tem, 1))) + tem = XEXP (tem, 0); + gcc_assert (GET_CODE (tem) == UNSPEC); + switch (XINT (tem, 1)) + { + case CRIS_UNSPEC_GOTREAD: + case CRIS_UNSPEC_PLTGOTREAD: + /* Using sign-extend mostly to be consistent with the + indexed addressing mode. */ + if (flag_pic == 1) + return "adds.w %2,%0"; + return "add.d %2,%0"; + + case CRIS_UNSPEC_PLT_GOTREL: + case CRIS_UNSPEC_GOTREL: + return "add.d %2,%0"; + default: + gcc_unreachable (); + } + } + case 6: + return "add%u2 %2,%0"; + case 7: + return "add.d %2,%1,%0"; + case 8: + return "add.d %1,%0"; + default: + return "BOGUS addsi %2+%1 to %0"; + } +} + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no,yes")]) + +; FIXME: Check what's best: having the three-operand ACR alternative +; before or after the corresponding-operand2 alternative. Check for +; *all* insns. FIXME: constant constraint letter for -128..127. +(define_insn "*addsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,!a,r,!a, r,r,!a,r,!a,r,r,r,!a") + (plus:SI + (match_operand:SI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r, 0,0,0,r") + (match_operand:SI 2 "general_operand" "r, r, Q>,Q>,J,N,NJ,L,L, P,n,g,g")))] + "TARGET_V32" + "@ + add.d %2,%0 + addi %2.b,%1,%0 + add.d %2,%0 + addo.d %2,%1,%0 + addq %2,%0 + subq %n2,%0 + addoq %2,%1,%0 + adds.w %2,%0 + addo %2,%1,%0 + addu.w %2,%0 + add.d %2,%0 + add%u2 %2,%0 + addo.%Z2 %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no,no,no,no,no") + (set_attr "cc" "*,none,*,none,*,*,none,*,none,*,*,*,none")]) + +(define_insn "*addhi3_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r, r,r,r,r") + (plus:HI (match_operand:HI 1 "register_operand" "%0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "r,Q>,J,N,g,!To")))] + "!TARGET_V32" + "@ + add.w %2,%0 + add.w %2,%0 + addq %2,%0 + subq %n2,%0 + add.w %2,%0 + add.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")]) + +(define_insn "*addhi3_v32" + [(set (match_operand:HI 0 "register_operand" "=r, !a,r,!a, r,r,!a,r,!a") + (plus:HI + (match_operand:HI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r") + (match_operand:HI 2 "general_operand" "r, r, Q>,Q>,J,N,NJ,g,g")))] + "TARGET_V32" + "@ + add.w %2,%0 + addi %2.b,%1,%0 + add.w %2,%0 + addo.w %2,%1,%0 + addq %2,%0 + subq %n2,%0 + addoq %2,%1,%0 + add.w %2,%0 + addo.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no") + (set_attr "cc" "*,none,*,none,clobber,clobber,none,*,none")]) + +(define_insn "*addqi3_non_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r, r,r,r,r,r") + (plus:QI (match_operand:QI 1 "register_operand" "%0,0, 0,0,0,0,r") + (match_operand:QI 2 "general_operand" "r,Q>,J,N,O,g,!To")))] + "!TARGET_V32" + "@ + add.b %2,%0 + add.b %2,%0 + addq %2,%0 + subq %n2,%0 + subQ -%b2,%0 + add.b %2,%0 + add.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,clobber,clobber,normal,normal")]) + +(define_insn "*addqi3_v32" + [(set (match_operand:QI 0 "register_operand" "=r,!a,r,!a, r,r,!a,r,r,!a") + (plus:QI + (match_operand:QI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,0,r") + (match_operand:QI 2 "general_operand" "r,r, Q>,Q>,J,N,NJ,O,g,g")))] + "TARGET_V32" + "@ + add.b %2,%0 + addi %2.b,%1,%0 + add.b %2,%0 + addo.b %2,%1,%0 + addq %2,%0 + subq %n2,%0 + addoq %2,%1,%0 + subQ -%b2,%0 + add.b %2,%0 + addo.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,yes,no,no") + (set_attr "cc" "*,none,*,none,clobber,clobber,none,clobber,*,none")]) + +;; Subtract. +;; +;; Note that because of insn canonicalization these will *seldom* but +;; rarely be used with a known constant as an operand. + +;; Note that for the 'P' constraint, the high part can be -1 or 0. We +;; output the insn through the 'D' output modifier as "subs.w" and "subq", +;; respectively. +(define_expand "subdi3" + [(set (match_operand:DI 0 "register_operand") + (minus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "general_operand")))] + "" +{ + if (TARGET_V32 && MEM_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); +}) + +(define_insn "*subdi3_non_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r") + (minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0,r") + (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))] + "!TARGET_V32" + "@ + subq %2,%M0\;ax\;subq 0,%H0 + addq %n2,%M0\;ax\;addq 0,%H0 + sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0 + sub.d %M2,%M0\;ax\;sub.d %H2,%H0 + sub.d %M2,%M1,%M0\;ax\;sub.d %H2,%H1,%H0") + +(define_insn "*subdi3_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r") + (minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0") + (match_operand:DI 2 "nonmemory_operand" "J,N,P,r")))] + "TARGET_V32" + "@ + subq %2,%M0\;ax\;subq 0,%H0 + addq %n2,%M0\;ax\;addq 0,%H0 + sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0 + sub.d %M2,%M0\;ax\;sub.d %H2,%H0") + +(define_expand "sub3" + [(set (match_operand:BWD 0 "register_operand") + (minus:BWD + (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "general_operand")))] + "" + "") + +(define_insn "*subsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r, r,r,r,r,r,r") + (minus:SI + (match_operand:SI 1 "register_operand" "0,0, 0,0,0,0,0,r") + (match_operand:SI 2 "general_operand" "r,Q>,J,N,P,n,g,!To")))] + "!TARGET_V32" + +;; This does not do the optimal: "addu.w 65535,r0" when %2 is negative. +;; But then again, %2 should not be negative. + + "@ + sub.d %2,%0 + sub.d %2,%0 + subq %2,%0 + addq %n2,%0 + sub%e2.%z2 %2,%0 + sub.d %2,%0 + sub.d %2,%0 + sub.d %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no")]) + +(define_insn "*subsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r") + (minus:SI + (match_operand:SI 1 "register_operand" "0,0,0,0,0,0,0") + (match_operand:SI 2 "general_operand" "r,Q>,J,N,P,n,g")))] + "TARGET_V32" + "@ + sub.d %2,%0 + sub.d %2,%0 + subq %2,%0 + addq %n2,%0 + sub%e2.%z2 %2,%0 + sub.d %2,%0 + sub.d %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no")]) + +(define_insn "*sub3_nonv32" + [(set (match_operand:BW 0 "register_operand" "=r,r, r,r,r,r") + (minus:BW (match_operand:BW 1 "register_operand" "0,0, 0,0,0,r") + (match_operand:BW 2 "general_operand" "r,Q>,J,N,g,!To")))] + "!TARGET_V32" + "@ + sub %2,%0 + sub %2,%0 + subq %2,%0 + addq %n2,%0 + sub %2,%0 + sub %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")]) + +(define_insn "*sub3_v32" + [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r") + (minus:BW (match_operand:BW 1 "register_operand" "0,0,0,0,0") + (match_operand:BW 2 "general_operand" "r,Q>,J,N,g")))] + "TARGET_V32" + "@ + sub %2,%0 + sub %2,%0 + subq %2,%0 + addq %n2,%0 + sub %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no") + (set_attr "cc" "normal,normal,clobber,clobber,normal")]) + +;; CRIS has some add/sub-with-sign/zero-extend instructions. +;; Although these perform sign/zero-extension to SImode, they are +;; equally applicable for the HImode case. +;; FIXME: Check; GCC should handle the widening. +;; Note that these must be located after the normal add/sub patterns, +;; so not to get constants into any less specific operands. +;; +;; Extend with add/sub and side-effect. +;; +;; ADDS/SUBS/ADDU/SUBU and BOUND, which needs a check for zero_extend +;; +;; adds/subs/addu/subu bound [rx=ry+rz.S] + +;; QImode to HImode +;; FIXME: GCC should widen. + +(define_insn "*extopqihi_side_biap" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operator:HI + 6 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0") + (match_operator:HI + 7 "cris_extend_operator" + [(mem:QI (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))])])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6%e7.%m7 [%5=%4+%2%T3],%0") + +(define_insn "*extopsi_side_biap" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 6 "cris_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operator:SI + 7 "cris_extend_operator" + [(mem:BW (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))])])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6%e7 [%5=%4+%2%T3],%0") + + +;; [rx=ry+i] + +;; QImode to HImode + +(define_insn "*extopqihi_side" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (match_operator:HI + 5 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0,0,0,0") + (match_operator:HI + 6 "cris_extend_operator" + [(mem:QI + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r") + ))])])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J'))) + return "#"; + if (which_alternative == 4) + return "%x5%E6.%m6 [%4=%3%S2],%0"; + return "%x5%E6.%m6 [%4=%2%S3],%0"; +}) + +(define_insn "*extopsi_side" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (match_operator:SI + 5 "cris_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0,0,0,0") + (match_operator:SI + 6 "cris_extend_operator" + [(mem:BW + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r") + ))])])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J'))) + return "#"; + if (which_alternative == 4) + return "%x5%E6 [%4=%3%S2],%0"; + return "%x5%E6 [%4=%2%S3],%0"; +}) + + +;; As with op.S we may have to add special pattern to match commuted +;; operands to adds/addu and bound +;; +;; adds/addu/bound [rx=ry+rz.S] + +;; QImode to HImode +;; FIXME: GCC should widen. + +(define_insn "*extopqihi_swap_side_biap" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (plus:HI + (match_operator:HI + 6 "cris_extend_operator" + [(mem:QI (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))]) + (match_operand:HI 1 "register_operand" "0,0"))) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + add%e6.b [%5=%4+%2%T3],%0") + +(define_insn "*extopsi_swap_side_biap" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 7 "cris_plus_or_bound_operator" + [(match_operator:SI + 6 "cris_extend_operator" + [(mem:BW (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))]) + (match_operand:SI 1 "register_operand" "0,0")])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "(GET_CODE (operands[7]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x7%E6 [%5=%4+%2%T3],%0") + +;; [rx=ry+i] +;; FIXME: GCC should widen. + +;; QImode to HImode + +(define_insn "*extopqihi_swap_side" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (plus:HI + (match_operator:HI + 5 "cris_extend_operator" + [(mem:QI (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]) + (match_operand:HI 1 "register_operand" "0,0,0,0,0"))) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J'))) + return "#"; + if (which_alternative == 4) + return "add%e5.b [%4=%3%S2],%0"; + return "add%e5.b [%4=%2%S3],%0"; +}) + +(define_insn "*extopsi_swap_side" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (match_operator:SI + 6 "cris_plus_or_bound_operator" + [(match_operator:SI + 5 "cris_extend_operator" + [(mem:BW (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]) + (match_operand:SI 1 "register_operand" "0,0,0,0,0")])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[5]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N') + || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J'))) + return "#"; + if (which_alternative == 4) + return \"%x6%E5.%m5 [%4=%3%S2],%0\"; + return "%x6%E5 [%4=%2%S3],%0"; +}) + +;; Extend versions (zero/sign) of normal add/sub (no side-effects). + +;; QImode to HImode +;; FIXME: GCC should widen. + +(define_insn "*extopqihi_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") + (match_operator:HI + 3 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0,0,r") + (match_operator:HI + 4 "cris_extend_operator" + [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])]))] + "!TARGET_V32 && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)" + "@ + %x3%E4.%m4 %2,%0 + %x3%E4.%m4 %2,%0 + %x3%E4.%m4 %2,%0 + %x3%E4.%m4 %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no") + (set_attr "cc" "clobber")]) + +(define_insn "*extopqihi_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operator:HI + 3 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0") + (match_operator:HI + 4 "cris_extend_operator" + [(match_operand:QI 2 "nonimmediate_operand" "r,m")])]))] + "TARGET_V32" + "%x3%e4.%m4 %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +;; QImode to SImode + +(define_insn "*extopsi_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (match_operator:SI + 3 "cris_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0,0,r") + (match_operator:SI + 4 "cris_extend_operator" + [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")])]))] + "!TARGET_V32 + && (GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)" + "@ + %x3%E4 %2,%0 + %x3%E4 %2,%0 + %x3%E4 %2,%0 + %x3%E4 %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no")]) + +(define_insn "*extopsi_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 3 "cris_additive_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operator:SI + 4 "cris_extend_operator" + [(match_operand:BW 2 "nonimmediate_operand" "r,m")])]))] + "TARGET_V32" + "%x3%e4.%m4 %2,%0" + [(set_attr "slottable" "yes")]) + +;; As with the side-effect patterns, may have to have swapped operands for add. +;; For commutative operands, these are the canonical forms. + +;; QImode to HImode + +(define_insn "*addxqihi_swap_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") + (plus:HI + (match_operator:HI + 3 "cris_extend_operator" + [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")]) + (match_operand:HI 1 "register_operand" "0,0,0,r")))] + "!TARGET_V32 && operands[1] != frame_pointer_rtx" + "@ + add%e3.b %2,%0 + add%e3.b %2,%0 + add%e3.b %2,%0 + add%e3.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no") + (set_attr "cc" "clobber")]) + +;; A case for v32, to catch the "addo" insn in addition to "adds". We +;; only care to match the canonical form; there should be no other. + +(define_insn "*addsbw_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,!a") + (plus:HI + (sign_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "r,m,m")) + (match_operand:HI 1 "register_operand" "0,0,r")))] + "TARGET_V32" + "@ + adds.b %2,%0 + adds.b %2,%0 + addo.b %2,%1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber,clobber,none")]) + +(define_insn "*addubw_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (plus:HI + (zero_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "r,m")) + (match_operand:HI 1 "register_operand" "0,0")))] + "TARGET_V32" + "addu.b %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +(define_insn "*extopsi_swap_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (match_operator:SI + 4 "cris_plus_or_bound_operator" + [(match_operator:SI + 3 "cris_extend_operator" + [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")]) + (match_operand:SI 1 "register_operand" "0,0,0,r")]))] + "!TARGET_V32 + && (GET_CODE (operands[4]) != UMIN || GET_CODE (operands[3]) == ZERO_EXTEND) + && operands[1] != frame_pointer_rtx" + "@ + %x4%E3 %2,%0 + %x4%E3 %2,%0 + %x4%E3 %2,%0 + %x4%E3 %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no")]) + +(define_insn "*adds_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,!a") + (plus:SI + (sign_extend:SI + (match_operand:BW 2 "nonimmediate_operand" "r,m,m")) + (match_operand:SI 1 "register_operand" "0,0,r")))] + "TARGET_V32" + "@ + adds %2,%0 + adds %2,%0 + addo %2,%1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "*,*,none")]) + +(define_insn "*addu_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI + (zero_extend:SI + (match_operand:BW 2 "nonimmediate_operand" "r,m")) + (match_operand:SI 1 "register_operand" "0,0")))] + "TARGET_V32 && operands[1] != frame_pointer_rtx" + "addu %2,%0" + [(set_attr "slottable" "yes")]) + +(define_insn "*bound_v32" + [(set (match_operand:SI 0 "register_operand" "=r") + (umin:SI + (zero_extend:SI + (match_operand:BW 2 "register_operand" "r")) + (match_operand:SI 1 "register_operand" "0")))] + "TARGET_V32 && operands[1] != frame_pointer_rtx" + "bound %2,%0" + [(set_attr "slottable" "yes")]) + +;; This is the special case when we use what corresponds to the +;; instruction above in "casesi". Do *not* change it to use the generic +;; pattern and "REG 15" as pc; I did that and it led to madness and +;; maintenance problems: Instead of (as imagined) recognizing and removing +;; or replacing this pattern with something simpler, other variant +;; patterns were recognized or combined, including some prefix variants +;; where the value in pc is not that of the next instruction (which means +;; this instruction actually *is* special and *should* be marked as such). +;; When switching from the "generic pattern match" approach to this simpler +;; approach, there were insignificant differences in gcc, ipps and +;; product code, somehow due to scratching reload behind the ear or +;; something. Testcase "gcc" looked .01% slower and 4 bytes bigger; +;; product code became .001% smaller but "looked better". The testcase +;; "ipps" was just different at register allocation). +;; +;; Assumptions in the jump optimizer forces us to use IF_THEN_ELSE in this +;; pattern with the default-label as the else, with the "if" being +;; index-is-less-than the max number of cases plus one. The default-label +;; is attached to the end of the case-table at time of output. + +(define_insn "*casesi_adds_w" + [(set (pc) + (if_then_else + (ltu (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "n")) + (plus:SI (sign_extend:SI + (mem:HI + (plus:SI (mult:SI (match_dup 0) (const_int 2)) + (pc)))) + (pc)) + (label_ref (match_operand 2 "" "")))) + (use (label_ref (match_operand 3 "" "")))] + "!TARGET_V32 && operands[0] != frame_pointer_rtx" + "adds.w [$pc+%0.w],$pc" + [(set_attr "cc" "clobber")]) + +;; For V32, we just have a jump, but we need to mark the table as used, +;; and the jump insn must have the if_then_else form expected by core +;; GCC. Since we don't want to prolong the lifetime of the original +;; index value, we compare against "unspec 0". It's a pity we have to +;; jump through to get the default label in place and to keep the jump +;; table around. FIXME: Look into it some time. + +(define_insn "*casesi_jump_v32" + [(set (pc) + (if_then_else + (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI) + (match_operand:SI 0 "const_int_operand" "n")) + (match_operand:SI 1 "register_operand" "r") + (label_ref (match_operand 2 "" "")))) + (use (label_ref (match_operand 3 "" "")))] + "TARGET_V32" + "jump %1%#" + [(set_attr "cc" "clobber") + (set_attr "slottable" "has_slot")]) + +;; Multiply instructions. + +;; Sometimes powers of 2 (which are normally canonicalized to a +;; left-shift) appear here, as a result of address reloading. +;; As a special, for values 3 and 5, we can match with an addi, so add those. +;; +;; FIXME: This may be unnecessary now. +;; Explicitly named for convenience of having a gen_... function. + +(define_insn "addi_mul" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI + (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "const_int_operand" "n")))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 2 + || INTVAL (operands[2]) == 4 || INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5)" +{ + if (INTVAL (operands[2]) == 2) + return "lslq 1,%0"; + else if (INTVAL (operands[2]) == 4) + return "lslq 2,%0"; + else if (INTVAL (operands[2]) == 3) + return "addi %0.w,%0"; + else if (INTVAL (operands[2]) == 5) + return "addi %0.d,%0"; + return "BAD: adr_mulsi: %0=%1*%2"; +} +[(set_attr "slottable" "yes") + ;; No flags are changed if this insn is "addi", but it does not seem + ;; worth the trouble to distinguish that to the lslq cases. + (set_attr "cc" "clobber")]) + +;; The addi insn as it is normally used. + +;; Make the the ACR alternative taste bad enough to not choose it as a +;; preference to avoid spilling problems (unwind-dw2-fde.c at build). +;; FIXME: Revisit for new register allocator. + +(define_insn "*addi" + [(set (match_operand:SI 0 "register_operand" "=r,!a") + (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 1 "register_operand" "0,r")))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && CONST_INT_P (operands[3]) + && (INTVAL (operands[3]) == 1 + || INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)" + "@ + addi %2%T3,%0 + addi %2%T3,%1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +;; The mstep instruction. Probably not useful by itself; it's to +;; non-linear wrt. the other insns. We used to expand to it, so at least +;; it's correct. + +(define_insn "mstep_shift" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (lt:SI (cc0) (const_int 0)) + (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (const_int 1)) + (match_operand:SI 2 "register_operand" "r")) + (ashift:SI (match_operand:SI 3 "register_operand" "0") + (const_int 1))))] + "!TARGET_V32" + "mstep %2,%0" + [(set_attr "slottable" "yes")]) + +;; When illegitimate addresses are legitimized, sometimes gcc forgets +;; to canonicalize the multiplications. +;; +;; FIXME: Check gcc > 2.7.2, remove and possibly fix in gcc. + +(define_insn "mstep_mul" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (lt:SI (cc0) (const_int 0)) + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "0") + (const_int 2)) + (match_operand:SI 2 "register_operand" "r")) + (mult:SI (match_operand:SI 3 "register_operand" "0") + (const_int 2))))] + "!TARGET_V32 + && operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[2] != frame_pointer_rtx + && operands[3] != frame_pointer_rtx" + "mstep %2,%0" + [(set_attr "slottable" "yes")]) + +(define_insn "mul3" + [(set (match_operand:WD 0 "register_operand" "=r") + (mult:WD + (szext:WD (match_operand: 1 "register_operand" "%0")) + (szext:WD (match_operand: 2 "register_operand" "r")))) + (clobber (match_scratch:SI 3 "=h"))] + "TARGET_HAS_MUL_INSNS" + "%!mul %2,%0" + [(set (attr "slottable") + (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0)) + (const_string "no") + (const_string "yes"))) + ;; For umuls.[bwd] it's just N unusable here, but let's be safe. + ;; For muls.b, this really extends to SImode, so cc should be + ;; considered clobbered. + ;; For muls.w, it's just N unusable here, but let's be safe. + (set_attr "cc" "clobber")]) + +;; Note that gcc does not make use of such a thing as umulqisi3. It gets +;; confused and will erroneously use it instead of umulhisi3, failing (at +;; least) gcc.c-torture/execute/arith-rand.c at all optimization levels. +;; Inspection of optab code shows that there must be only one widening +;; multiplication per mode widened to. + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (match_scratch:SI 3 "=h"))] + "TARGET_HAS_MUL_INSNS" + "%!muls.d %2,%0" + [(set (attr "slottable") + (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0)) + (const_string "no") + (const_string "yes"))) + ;; Just N unusable here, but let's be safe. + (set_attr "cc" "clobber")]) + +;; A few multiply variations. + +;; When needed, we can get the high 32 bits from the overflow +;; register. We don't care to split and optimize these. +;; +;; Note that cc0 is still valid after the move-from-overflow-register +;; insn; no special precaution need to be taken in cris_notice_update_cc. + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI + (szext:DI (match_operand:SI 1 "register_operand" "%0")) + (szext:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (match_scratch:SI 3 "=h"))] + "TARGET_HAS_MUL_INSNS" + "%!mul.d %2,%M0\;move $mof,%H0") + +;; These two patterns may be expressible by other means, perhaps by making +;; [u]?mulsidi3 a define_expand. + +;; Due to register allocation braindamage, the clobber 1,2 alternatives +;; cause a move into the clobbered register *before* the insn, then +;; after the insn, mof is moved too, rather than the clobber assigned +;; the last mof target. This became apparent when making MOF and SRP +;; visible registers, with the necessary tweak to smulsi3_highpart. +;; Because these patterns are used in division by constants, that damage +;; is visible (ipps regression tests). Therefore the last two +;; alternatives, "helping" reload to avoid an unnecessary move, but +;; punished by force of one "?". Check code from "int d (int a) {return +;; a / 1000;}" and unsigned. FIXME: Comment above was for 3.2, revisit. + +(define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=h,h,?r,?r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (szext:DI (match_operand:SI 1 "register_operand" "r,r,0,r")) + (szext:DI (match_operand:SI 2 "register_operand" "r,r,r,0"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1,2,h,h"))] + "TARGET_HAS_MUL_INSNS" + "@ + %!mul.d %2,%1 + %!mul.d %1,%2 + %!mul.d %2,%1\;move $mof,%0 + %!mul.d %1,%2\;move $mof,%0" + [(set_attr "slottable" "yes,yes,no,no") + (set_attr "cc" "clobber")]) + +;; Divide and modulus instructions. CRIS only has a step instruction. + +(define_insn "dstep_shift" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (geu:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (const_int 1)) + (match_operand:SI 2 "register_operand" "r")) + (minus:SI (ashift:SI (match_operand:SI 3 "register_operand" "0") + (const_int 1)) + (match_operand:SI 4 "register_operand" "2")) + (ashift:SI (match_operand:SI 5 "register_operand" "0") + (const_int 1))))] + "" + "dstep %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Here's a variant with mult instead of ashift. +;; +;; FIXME: This should be investigated. Which one matches through combination? + +(define_insn "dstep_mul" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (geu:SI (mult:SI (match_operand:SI 1 "register_operand" "0") + (const_int 2)) + (match_operand:SI 2 "register_operand" "r")) + (minus:SI (mult:SI (match_operand:SI 3 "register_operand" "0") + (const_int 2)) + (match_operand:SI 4 "register_operand" "2")) + (mult:SI (match_operand:SI 5 "register_operand" "0") + (const_int 2))))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[2] != frame_pointer_rtx + && operands[3] != frame_pointer_rtx" + "dstep %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Logical operators. + +;; Bitwise "and". + +;; There is no use in defining "anddi3", because gcc can expand this by +;; itself, and make reasonable code without interference. + +;; If the first operand is memory or a register and is the same as the +;; second operand, and the third operand is -256 or -65536, we can use +;; CLEAR instead. Or, if the first operand is a register, and the third +;; operand is 255 or 65535, we can zero_extend. +;; GCC isn't smart enough to recognize these cases (yet), and they seem +;; to be common enough to be worthwhile. +;; FIXME: This should be made obsolete. + +(define_expand "andsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (! (CONST_INT_P (operands[2]) + && (((INTVAL (operands[2]) == -256 + || INTVAL (operands[2]) == -65536) + && rtx_equal_p (operands[1], operands[0])) + || ((INTVAL (operands[2]) == 255 + || INTVAL (operands[2]) == 65535) + && REG_P (operands[0]))))) + { + /* Make intermediate steps if operand0 is not a register or + operand1 is not a register, and hope that the reload pass will + make something useful out of it. Note that the operands are + *not* canonicalized. For the moment, I chicken out on this, + because all or most ports do not describe 'and' with + canonicalized operands, and I seem to remember magic in reload, + checking that operand1 has constraint '%0', in which case + operand0 and operand1 must have similar predicates. + FIXME: Investigate. */ + rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + rtx reg1 = operands[1]; + + if (! REG_P (reg1)) + { + emit_move_insn (reg0, reg1); + reg1 = reg0; + } + + emit_insn (gen_rtx_SET (SImode, reg0, + gen_rtx_AND (SImode, reg1, operands[2]))); + + /* Make sure we get the right *final* destination. */ + if (! REG_P (operands[0])) + emit_move_insn (operands[0], reg0); + + DONE; + } +}) + +;; Some special cases of andsi3. + +(define_insn "*andsi_movu" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%r,Q,To") + (match_operand:SI 2 "const_int_operand" "n,n,n")))] + "(INTVAL (operands[2]) == 255 || INTVAL (operands[2]) == 65535) + && !side_effects_p (operands[1])" + "movu.%z2 %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*andsi_clear" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,Q,Q,To,To") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0,0,0") + (match_operand:SI 2 "const_int_operand" "P,n,P,n,P,n")))] + "(INTVAL (operands[2]) == -65536 || INTVAL (operands[2]) == -256) + && !side_effects_p (operands[0])" + "@ + cLear.b %0 + cLear.w %0 + cLear.b %0 + cLear.w %0 + cLear.b %0 + cLear.w %0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "none")]) + +;; This is a catch-all pattern, taking care of everything that was not +;; matched in the insns above. +;; +;; Sidenote: the tightening from "nonimmediate_operand" to +;; "register_operand" for operand 1 actually increased the register +;; pressure (worse code). That will hopefully change with an +;; improved reload pass. + +(define_insn "*expanded_andsi_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r,r") + (and:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,r") + (match_operand:SI 2 "general_operand" "I,r,Q>,g,!To")))] + "!TARGET_V32" + "@ + andq %2,%0 + and.d %2,%0 + and.d %2,%0 + and.d %2,%0 + and.d %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,no")]) + +(define_insn "*expanded_andsi_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0") + (match_operand:SI 2 "general_operand" "I,r,Q>,g")))] + "TARGET_V32" + "@ + andq %2,%0 + and.d %2,%0 + and.d %2,%0 + and.d %2,%0" + [(set_attr "slottable" "yes,yes,yes,no") + (set_attr "cc" "noov32")]) + +;; For both QI and HI we may use the quick patterns. This results in +;; useless condition codes, but that is used rarely enough for it to +;; normally be a win (could check ahead for use of cc0, but seems to be +;; more pain than win). + +;; FIXME: See note for andsi3 + +(define_expand "andhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "" +{ + if (! (CONST_INT_P (operands[2]) + && (((INTVAL (operands[2]) == -256 + || INTVAL (operands[2]) == 65280) + && rtx_equal_p (operands[1], operands[0])) + || (INTVAL (operands[2]) == 255 + && REG_P (operands[0]))))) + { + /* See comment for andsi3. */ + rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (HImode); + rtx reg1 = operands[1]; + + if (! REG_P (reg1)) + { + emit_move_insn (reg0, reg1); + reg1 = reg0; + } + + emit_insn (gen_rtx_SET (HImode, reg0, + gen_rtx_AND (HImode, reg1, operands[2]))); + + /* Make sure we get the right destination. */ + if (! REG_P (operands[0])) + emit_move_insn (operands[0], reg0); + + DONE; + } +}) + +;; Some fast andhi3 special cases. + +(define_insn "*andhi_movu" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "r,Q,To") + (const_int 255)))] + "!side_effects_p (operands[1])" + "mOvu.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*andhi_clear" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,Q,To") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "0,0,0") + (const_int -256)))] + "!side_effects_p (operands[0])" + "cLear.b %0" + [(set_attr "slottable" "yes,yes,no") + (set_attr "cc" "none")]) + +;; Catch-all andhi3 pattern. + +(define_insn "*expanded_andhi_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r, r,r,r,r") + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g,!To")))] + +;; Sidenote: the tightening from "general_operand" to +;; "register_operand" for operand 1 actually increased the register +;; pressure (worse code). That will hopefully change with an +;; improved reload pass. + + "!TARGET_V32" + "@ + andq %2,%0 + and.w %2,%0 + and.w %2,%0 + and.w %2,%0 + anDq %b2,%0 + and.w %2,%0 + and.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no,no") + (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*expanded_andhi_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r") + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))] + "TARGET_V32" + "@ + andq %2,%0 + and.w %2,%0 + and.w %2,%0 + and.w %2,%0 + anDq %b2,%0 + and.w %2,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no") + (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")]) + +;; A strict_low_part pattern. + +(define_insn "*andhi_lowpart_non_v32" + [(set (strict_low_part + (match_operand:HI 0 "register_operand" "+r,r, r,r,r,r")) + (and:HI (match_operand:HI 1 "register_operand" "%0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "r,Q>,L,O,g,!To")))] + "!TARGET_V32" + "@ + and.w %2,%0 + and.w %2,%0 + and.w %2,%0 + anDq %b2,%0 + and.w %2,%0 + and.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,yes,no,no") + (set_attr "cc" "normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*andhi_lowpart_v32" + [(set (strict_low_part + (match_operand:HI 0 "register_operand" "+r,r,r,r,r")) + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0") + (match_operand:HI 2 "general_operand" "r,Q>,L,O,g")))] + "TARGET_V32" + "@ + and.w %2,%0 + and.w %2,%0 + and.w %2,%0 + anDq %b2,%0 + and.w %2,%0" + [(set_attr "slottable" "yes,yes,no,yes,no") + (set_attr "cc" "noov32,noov32,noov32,clobber,noov32")]) + +(define_expand "andqi3" + [(set (match_operand:QI 0 "register_operand") + (and:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "general_operand")))] + "" + "") + +(define_insn "*andqi3_non_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r, r,r,r") + (and:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g,!To")))] + "!TARGET_V32" + "@ + andq %2,%0 + and.b %2,%0 + and.b %2,%0 + andQ %b2,%0 + and.b %2,%0 + and.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")]) + +(define_insn "*andqi3_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r") + (and:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))] + "TARGET_V32" + "@ + andq %2,%0 + and.b %2,%0 + and.b %2,%0 + andQ %b2,%0 + and.b %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no") + (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")]) + +(define_insn "*andqi_lowpart_non_v32" + [(set (strict_low_part + (match_operand:QI 0 "register_operand" "+r,r, r,r,r")) + (and:QI (match_operand:QI 1 "register_operand" "%0,0, 0,0,r") + (match_operand:QI 2 "general_operand" "r,Q>,O,g,!To")))] + "!TARGET_V32" + "@ + and.b %2,%0 + and.b %2,%0 + andQ %b2,%0 + and.b %2,%0 + and.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,normal,normal")]) + +(define_insn "*andqi_lowpart_v32" + [(set (strict_low_part + (match_operand:QI 0 "register_operand" "+r,r,r,r")) + (and:QI (match_operand:QI 1 "register_operand" "%0,0,0,0") + (match_operand:QI 2 "general_operand" "r,Q>,O,g")))] + "TARGET_V32" + "@ + and.b %2,%0 + and.b %2,%0 + andQ %b2,%0 + and.b %2,%0" + [(set_attr "slottable" "yes,yes,yes,no") + (set_attr "cc" "noov32,noov32,clobber,noov32")]) + +;; Bitwise or. + +;; Same comment as anddi3 applies here - no need for such a pattern. + +;; It seems there's no need to jump through hoops to get good code such as +;; with andsi3. + +(define_expand "ior3" + [(set (match_operand:BWD 0 "register_operand") + (ior:BWD (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "general_operand")))] + "" + "") + +(define_insn "*iorsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r,r,r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,0,r") + (match_operand:SI 2 "general_operand" "I, r,Q>,n,g,!To")))] + "!TARGET_V32" + "@ + orq %2,%0 + or.d %2,%0 + or.d %2,%0 + oR.%s2 %2,%0 + or.d %2,%0 + or.d %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,no,no") + (set_attr "cc" "normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*iorsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0") + (match_operand:SI 2 "general_operand" "I,r,Q>,n,g")))] + "TARGET_V32" + "@ + orq %2,%0 + or.d %2,%0 + or.d %2,%0 + oR.%s2 %2,%0 + or.d %2,%0" + [(set_attr "slottable" "yes,yes,yes,no,no") + (set_attr "cc" "noov32,noov32,noov32,clobber,noov32")]) + +(define_insn "*iorhi3_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r, r,r,r,r") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g,!To")))] + "!TARGET_V32" + "@ + orq %2,%0 + or.w %2,%0 + or.w %2,%0 + or.w %2,%0 + oRq %b2,%0 + or.w %2,%0 + or.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no,no") + (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*iorhi3_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))] + "TARGET_V32" + "@ + orq %2,%0 + or.w %2,%0 + or.w %2,%0 + or.w %2,%0 + oRq %b2,%0 + or.w %2,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no") + (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")]) + +(define_insn "*iorqi3_non_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r, r,r,r") + (ior:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g,!To")))] + "!TARGET_V32" + "@ + orq %2,%0 + or.b %2,%0 + or.b %2,%0 + orQ %b2,%0 + or.b %2,%0 + or.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")]) + +(define_insn "*iorqi3_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r") + (ior:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))] + "TARGET_V32" + "@ + orq %2,%0 + or.b %2,%0 + or.b %2,%0 + orQ %b2,%0 + or.b %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no") + (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")]) + +;; Exclusive-or + +;; See comment about "anddi3" for xordi3 - no need for such a pattern. +;; FIXME: Do we really need the shorter variants? + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "xor %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "xor3" + [(set (match_operand:BW 0 "register_operand" "=r") + (xor:BW (match_operand:BW 1 "register_operand" "%0") + (match_operand:BW 2 "register_operand" "r")))] + "" + "xor %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +;; Negation insns. + +;; Questionable use, here mostly as a (slightly usable) define_expand +;; example. + +(define_expand "negsf2" + [(set (match_dup 2) + (match_dup 3)) + (parallel [(set (match_operand:SF 0 "register_operand" "=r") + (neg:SF (match_operand:SF 1 + "register_operand" "0"))) + (use (match_dup 2))])] + "" +{ + operands[2] = gen_reg_rtx (SImode); + operands[3] = GEN_INT (1 << 31); +}) + +(define_insn "*expanded_negsf2" + [(set (match_operand:SF 0 "register_operand" "=r") + (neg:SF (match_operand:SF 1 "register_operand" "0"))) + (use (match_operand:SI 2 "register_operand" "r"))] + "" + "xor %2,%0" + [(set_attr "slottable" "yes")]) + +;; No "negdi2" although we could make one up that may be faster than +;; the one in libgcc. + +(define_insn "neg2" + [(set (match_operand:BWD 0 "register_operand" "=r") + (neg:BWD (match_operand:BWD 1 "register_operand" "r")))] + "" + "neg %1,%0" + [(set_attr "slottable" "yes")]) + +;; One-complements. + +;; See comment on anddi3 - no need for a DImode pattern. +;; See also xor comment. + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "0")))] + "" + "not %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "one_cmpl2" + [(set (match_operand:BW 0 "register_operand" "=r") + (not:BW (match_operand:BW 1 "register_operand" "0")))] + "" + "not %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +;; Arithmetic/Logical shift right (and SI left). + +(define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=r") + (shift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "Kcr")))] + "" +{ + if (REG_S_P (operands[2])) + return ".d %2,%0"; + + return "q %2,%0"; +} + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Since gcc gets lost, and forgets to zero-extend the source (or mask +;; the destination) when it changes shifts of lower modes into SImode, +;; it is better to make these expands an anonymous patterns instead of +;; the more correct define_insns. This occurs when gcc thinks that is +;; is better to widen to SImode and use immediate shift count. + +;; FIXME: Is this legacy or still true for gcc >= 2.7.2? + +;; FIXME: Can't parametrize sign_extend and zero_extend (before +;; mentioning "shiftrt"), so we need two patterns. +(define_expand "ashr3" + [(set (match_dup 3) + (sign_extend:SI (match_operand:BW 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" ""))) + (set (match_dup 5) (ashiftrt:SI (match_dup 3) (match_dup 4))) + (set (match_operand:BW 0 "general_operand" "") + (subreg:BW (match_dup 5) 0))] + "" +{ + int i; + + for (i = 3; i < 6; i++) + operands[i] = gen_reg_rtx (SImode); +}) + +(define_expand "lshr3" + [(set (match_dup 3) + (zero_extend:SI (match_operand:BW 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" ""))) + (set (match_dup 5) (lshiftrt:SI (match_dup 3) (match_dup 4))) + (set (match_operand:BW 0 "general_operand" "") + (subreg:BW (match_dup 5) 0))] + "" +{ + int i; + + for (i = 3; i < 6; i++) + operands[i] = gen_reg_rtx (SImode); +}) + +(define_insn "*expanded_" + [(set (match_operand:BW 0 "register_operand" "=r") + (shiftrt:BW (match_operand:BW 1 "register_operand" "0") + (match_operand:BW 2 "register_operand" "r")))] + "" + " %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "*_lowpart" + [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r")) + (shiftrt:BW (match_dup 0) + (match_operand:BW 1 "register_operand" "r")))] + "" + " %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Arithmetic/logical shift left. + +;; For narrower modes than SI, we can use lslq although it makes cc +;; unusable. The win is that we do not have to reload the shift-count +;; into a register. + +(define_insn "ashl3" + [(set (match_operand:BW 0 "register_operand" "=r,r") + (ashift:BW (match_operand:BW 1 "register_operand" "0,0") + (match_operand:BW 2 "nonmemory_operand" "r,Kc")))] + "" +{ + return + (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > ) + ? "moveq 0,%0" + : (CONSTANT_P (operands[2]) + ? "lslq %2,%0" : "lsl %2,%0"); +} + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32,clobber")]) + +;; A strict_low_part matcher. + +(define_insn "*ashl_lowpart" + [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r")) + (ashift:BW (match_dup 0) + (match_operand:HI 1 "register_operand" "r")))] + "" + "lsl %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Various strange insns that gcc likes. + +;; Fortunately, it is simple to construct an abssf (although it may not +;; be very much used in practice). + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=r") + (abs:SF (match_operand:SF 1 "register_operand" "0")))] + "" + "lslq 1,%0\;lsrq 1,%0") + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (abs:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "abs %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; FIXME: GCC should be able to do these expansions itself. + +(define_expand "abs2" + [(set (match_dup 2) + (sign_extend:SI (match_operand:BW 1 "general_operand" ""))) + (set (match_dup 3) (abs:SI (match_dup 2))) + (set (match_operand:BW 0 "register_operand" "") + (subreg:BW (match_dup 3) 0))] + "" + "operands[2] = gen_reg_rtx (SImode); operands[3] = gen_reg_rtx (SImode);") + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_HAS_LZ" + "lz %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "0")))] + "TARGET_HAS_SWAP" + "swapwb %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; This instruction swaps all bits in a register. +;; That means that the most significant bit is put in the place +;; of the least significant bit, and so on. + +(define_insn "cris_swap_bits" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "0")] + CRIS_UNSPEC_SWAP_BITS))] + "TARGET_HAS_SWAP" + "swapwbr %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Implement ctz using two instructions, one for bit swap and one for clz. +;; Defines a scratch register to avoid clobbering input. + +(define_expand "ctzsi2" + [(set (match_dup 2) + (match_operand:SI 1 "register_operand")) + (set (match_dup 2) + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_SWAP_BITS)) + (set (match_operand:SI 0 "register_operand") + (clz:SI (match_dup 2)))] + "TARGET_HAS_LZ && TARGET_HAS_SWAP" + "operands[2] = gen_reg_rtx (SImode);") + +;; Bound-insn. Defined to be the same as an unsigned minimum, which is an +;; operation supported by gcc. Used in casesi, but used now and then in +;; normal code too. + +(define_expand "uminsi3" + [(set (match_operand:SI 0 "register_operand" "") + (umin:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (MEM_P (operands[2]) && TARGET_V32) + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "*uminsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r, r,r") + (umin:SI (match_operand:SI 1 "register_operand" "%0,0, 0,r") + (match_operand:SI 2 "general_operand" "r,Q>,g,!To")))] + "!TARGET_V32" +{ + if (CONST_INT_P (operands[2])) + { + /* Constant operands are zero-extended, so only 32-bit operands + may be negative. */ + if (INTVAL (operands[2]) >= 0) + { + if (INTVAL (operands[2]) < 256) + return "bound.b %2,%0"; + + if (INTVAL (operands[2]) < 65536) + return "bound.w %2,%0"; + } + } + else if (which_alternative == 3) + return "bound.d %2,%1,%0"; + + return "bound.d %2,%0"; +} + [(set_attr "slottable" "yes,yes,no,no")]) + +(define_insn "*uminsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (umin:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "nonmemory_operand" "r,i")))] + "TARGET_V32" +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + /* Constant operands are zero-extended, so only 32-bit operands + may be negative. */ + if (INTVAL (operands[2]) >= 0) + { + if (INTVAL (operands[2]) < 256) + return "bound.b %2,%0"; + + if (INTVAL (operands[2]) < 65536) + return "bound.w %2,%0"; + } + } + + return "bound.d %2,%0"; +} + [(set_attr "slottable" "yes,no")]) + +;; Jump and branch insns. + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "ba %l0%#" + [(set_attr "slottable" "has_slot")]) + +;; Testcase gcc.c-torture/compile/991213-3.c fails if we allow a constant +;; here, since the insn is not recognized as an indirect jump by +;; jmp_uses_reg_or_mem used by computed_jump_p. Perhaps it is a kludge to +;; change from general_operand to nonimmediate_operand (at least the docs +;; should be changed), but then again the pattern is called indirect_jump. +(define_expand "indirect_jump" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand"))] + "" +{ + if (TARGET_V32 && MEM_P (operands[0])) + operands[0] = force_reg (SImode, operands[0]); +}) + +(define_insn "*indirect_jump_non_v32" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))] + "!TARGET_V32" + "jump %0") + +(define_insn "*indirect_jump_v32" + [(set (pc) (match_operand:SI 0 "register_operand" "r"))] + "TARGET_V32" + "jump %0%#" + [(set_attr "slottable" "has_slot")]) + +;; Return insn. Used whenever the epilogue is very simple; if it is only +;; a single ret or jump [sp+]. No allocated stack space or saved +;; registers are allowed. +;; Note that for this pattern, although named, it is ok to check the +;; context of the insn in the test, not only compiler switches. + +(define_expand "return" + [(return)] + "cris_simple_epilogue ()" + "cris_expand_return (cris_return_address_on_stack ()); DONE;") + +(define_insn "*return_expanded" + [(return)] + "" +{ + return cris_return_address_on_stack_for_return () + ? "jump [$sp+]" : "ret%#"; +} + [(set (attr "slottable") + (if_then_else + (ne (symbol_ref + "(cris_return_address_on_stack_for_return ())") + (const_int 0)) + (const_string "no") + (const_string "has_return_slot")))]) + +(define_expand "prologue" + [(const_int 0)] + "TARGET_PROLOGUE_EPILOGUE" + "cris_expand_prologue (); DONE;") + +;; Note that the (return) from the expander itself is always the last +;; insn in the epilogue. +(define_expand "epilogue" + [(const_int 0)] + "TARGET_PROLOGUE_EPILOGUE" + "cris_expand_epilogue (); DONE;") + +;; Conditional branches. + +(define_expand "cbranch4" + [(set (cc0) (compare + (match_operand:BWD 1 "nonimmediate_operand") + (match_operand:BWD 2 "general_operand"))) + (set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(cc0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +(define_expand "cbranchdi4" + [(set (cc0) + (compare (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(cc0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + if (TARGET_V32 && !REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + if (TARGET_V32 && MEM_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); +}) + + +;; We suffer from the same overflow-bit-gets-in-the-way problem as +;; e.g. m68k, so we have to check if overflow bit is set on all "signed" +;; conditions. + +(define_insn "b" + [(set (pc) + (if_then_else (ncond (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "b %l0%#" + [(set_attr "slottable" "has_slot")]) + +(define_insn "b" + [(set (pc) + (if_then_else (ocond (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? 0 : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +(define_insn "b" + [(set (pc) + (if_then_else (rcond (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? "b %l0%#" : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +;; Reversed anonymous patterns to the ones above, as mandated. + +(define_insn "*b_reversed" + [(set (pc) + (if_then_else (ncond (cc0) + (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "b %l0%#" + [(set_attr "slottable" "has_slot")]) + +(define_insn "*b_reversed" + [(set (pc) + (if_then_else (ocond (cc0) + (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? 0 : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +(define_insn "*b_reversed" + [(set (pc) + (if_then_else (rcond (cc0) + (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? "b %l0%#" : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +;; Set on condition: sCC. + +(define_expand "cstoredi4" + [(set (cc0) (compare + (match_operand:DI 2 "nonimmediate_operand") + (match_operand:DI 3 "general_operand"))) + (set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(cc0) (const_int 0)]))] + "" +{ + if (TARGET_V32 && !REG_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + if (TARGET_V32 && MEM_P (operands[3])) + operands[3] = force_reg (DImode, operands[3]); +}) + +(define_expand "cstore4" + [(set (cc0) (compare + (match_operand:BWD 2 "nonimmediate_operand") + (match_operand:BWD 3 "general_operand"))) + (set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(cc0) (const_int 0)]))] + "" + "") + +;; Like bCC, we have to check the overflow bit for +;; signed conditions. + +(define_insn "s" + [(set (match_operand:SI 0 "register_operand" "=r") + (ncond:SI (cc0) (const_int 0)))] + "" + "s %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +(define_insn "s" + [(set (match_operand:SI 0 "register_operand" "=r") + (rcond:SI (cc0) (const_int 0)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? "s %0" : "s %0"; +} + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +(define_insn "s" + [(set (match_operand:SI 0 "register_operand" "=r") + (ocond:SI (cc0) (const_int 0)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? 0 : "s %0"; +} + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +;; Call insns. + +;; We need to make these patterns "expand", since the real operand is +;; hidden in a (mem:QI ) inside operand[0] (call_value: operand[1]), +;; and cannot be checked if it were a "normal" pattern. +;; Note that "call" and "call_value" are *always* called with a +;; mem-operand for operand 0 and 1 respective. What happens for combined +;; instructions is a different issue. + +(define_expand "call" + [(parallel [(call (match_operand:QI 0 "cris_mem_call_operand" "") + (match_operand 1 "general_operand" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + if (flag_pic) + cris_expand_pic_call_address (&operands[0]); +}) + +;; Accept *anything* as operand 1. Accept operands for operand 0 in +;; order of preference. + +(define_insn "*expanded_call_non_v32" + [(call (mem:QI (match_operand:SI 0 "general_operand" "r,Q>,g")) + (match_operand 1 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "!TARGET_V32" + "jsr %0") + +(define_insn "*expanded_call_v32" + [(call + (mem:QI + (match_operand:SI 0 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i")) + (match_operand 1 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "TARGET_V32" + "@ + jsr %0%# + jsr %0%# + bsr %0%# + bsr %0%#" + [(set_attr "slottable" "has_call_slot")]) + +;; Parallel when calculating and reusing address of indirect pointer +;; with simple offset. (Makes most sense with PIC.) It looks a bit +;; wrong not to have the clobber last, but that's the way combine +;; generates it (except it doesn' look into the *inner* mem, so this +;; just matches a peephole2). FIXME: investigate that. +(define_insn "*expanded_call_side" + [(call (mem:QI + (mem:SI + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r, r,r") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn")))) + (match_operand 2 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_operand:SI 3 "register_operand" "=*0,r,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "!TARGET_AVOID_GOTPLT && !TARGET_V32" + "jsr [%3=%0%S1]") + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "cris_mem_call_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + if (flag_pic) + cris_expand_pic_call_address (&operands[1]); +}) + +;; Accept *anything* as operand 2. The validity other than "general" of +;; operand 0 will be checked elsewhere. Accept operands for operand 1 in +;; order of preference (Q includes r, but r is shorter, faster). +;; We also accept a PLT symbol. We output it as [rPIC+sym:GOTPLT] rather +;; than requiring getting rPIC + sym:PLT into a register. + +(define_insn "*expanded_call_value_non_v32" + [(set (match_operand 0 "nonimmediate_operand" "=g,g,g") + (call (mem:QI (match_operand:SI 1 "general_operand" "r,Q>,g")) + (match_operand 2 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "!TARGET_V32" + "Jsr %1" + [(set_attr "cc" "clobber")]) + +;; See similar call special-case. +(define_insn "*expanded_call_value_side" + [(set (match_operand 0 "nonimmediate_operand" "=g,g,g") + (call + (mem:QI + (mem:SI + (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r, r,r") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn")))) + (match_operand 3 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_operand:SI 4 "register_operand" "=*1,r,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "!TARGET_AVOID_GOTPLT && !TARGET_V32" + "Jsr [%4=%1%S2]" + [(set_attr "cc" "clobber")]) + +(define_insn "*expanded_call_value_v32" + [(set + (match_operand 0 "nonimmediate_operand" "=g,g,g,g") + (call + (mem:QI + (match_operand:SI 1 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i")) + (match_operand 2 "" ""))) + (clobber (reg:SI 16))] + "TARGET_V32" + "@ + Jsr %1%# + Jsr %1%# + Bsr %1%# + Bsr %1%#" + [(set_attr "cc" "clobber") + (set_attr "slottable" "has_call_slot")]) + +;; Used in debugging. No use for the direct pattern; unfilled +;; delayed-branches are taken care of by other means. + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "cc" "none")]) + +;; We need to stop accesses to the stack after the memory is +;; deallocated. Unfortunately, reorg doesn't look at naked clobbers, +;; e.g. (insn ... (clobber (mem:BLK (stack_pointer_rtx)))) and we don't +;; want to use a naked (unspec_volatile) as that would stop any +;; scheduling in the epilogue. Hence we model it as a "real" insn that +;; sets the memory in an unspecified manner. FIXME: Unfortunately it +;; still has the effect of an unspec_volatile. +(define_insn "cris_frame_deallocated_barrier" + [(set (mem:BLK (reg:SI CRIS_SP_REGNUM)) + (unspec:BLK [(const_int 0)] CRIS_UNSPEC_FRAME_DEALLOC))] + "" + "" + [(set_attr "length" "0")]) + +;; We expand on casesi so we can use "bound" and "add offset fetched from +;; a table to pc" (adds.w [pc+%0.w],pc). + +;; Note: if you change the "parallel" (or add anything after it) in +;; this expansion, you must change the macro ASM_OUTPUT_CASE_END +;; accordingly, to add the default case at the end of the jump-table. + +(define_expand "cris_casesi_non_v32" + [(set (match_dup 5) (match_operand:SI 0 "general_operand" "")) + (set (match_dup 6) + (minus:SI (match_dup 5) + (match_operand:SI 1 "const_int_operand" "n"))) + (set (match_dup 7) + (umin:SI (match_dup 6) + (match_operand:SI 2 "const_int_operand" "n"))) + (parallel + [(set (pc) + (if_then_else + (ltu (match_dup 7) (match_dup 2)) + (plus:SI (sign_extend:SI + (mem:HI + (plus:SI (mult:SI (match_dup 7) (const_int 2)) + (pc)))) + (pc)) + (label_ref (match_operand 4 "" "")))) + (use (label_ref (match_operand 3 "" "")))])] + "" +{ + operands[2] = plus_constant (operands[2], 1); + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = gen_reg_rtx (SImode); +}) + +;; FIXME: Check effect of not JUMP_TABLES_IN_TEXT_SECTION. +(define_expand "cris_casesi_v32" + [(set (match_dup 5) (match_operand:SI 0 "general_operand")) + (set (match_dup 6) + (minus:SI (match_dup 5) + (match_operand:SI 1 "const_int_operand"))) + (set (match_dup 7) + (umin:SI (match_dup 6) + (match_operand:SI 2 "const_int_operand"))) + (set (match_dup 8) (match_dup 11)) + (set (match_dup 9) + (plus:SI (mult:SI (match_dup 7) (const_int 2)) + (match_dup 8))) + (set (match_dup 10) + (plus:SI (sign_extend:SI (mem:HI (match_dup 9))) + (match_dup 9))) + (parallel + [(set (pc) + (if_then_else + (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI) (match_dup 2)) + (match_dup 10) + (label_ref (match_operand 4 "" "")))) + (use (label_ref (match_dup 3)))])] + "TARGET_V32" +{ + int i; + rtx xlabel = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + for (i = 5; i <= 10; i++) + operands[i] = gen_reg_rtx (SImode); + operands[2] = plus_constant (operands[2], 1); + + /* Don't forget to decorate labels too, for PIC. */ + operands[11] = flag_pic + ? gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xlabel), + CRIS_UNSPEC_PCREL)) + : xlabel; +}) + +(define_expand "casesi" + [(match_operand:SI 0 "general_operand") + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand") + (match_operand 3 "" "") + (match_operand 4 "" "")] + "" +{ + if (TARGET_V32) + emit_insn (gen_cris_casesi_v32 (operands[0], operands[1], operands[2], + operands[3], operands[4])); + else + emit_insn (gen_cris_casesi_non_v32 (operands[0], operands[1], operands[2], + operands[3], operands[4])); + DONE; +}) + +;; Split-patterns. Some of them have modes unspecified. This +;; should always be ok; if for no other reason sparc.md has it as +;; well. +;; +;; When register_operand is specified for an operand, we can get a +;; subreg as well (Axis-990331), so don't just assume that REG_P is true +;; for a register_operand and that REGNO can be used as is. It is best to +;; guard with REG_P, unless it is worth it to adjust for the subreg case. + +;; op [rx + 0],ry,rz +;; The index to rx is optimized into zero, and gone. + +;; First, recognize bound [rx],ry,rz; where [rx] is zero-extended, +;; and add/sub [rx],ry,rz, with zero or sign-extend on [rx]. +;; Split this into: +;; move ry,rz +;; op [rx],rz +;; Lose if rz=ry or rx=rz. +;; Call this op-extend-split. +;; Do not match for V32; the addo and addi shouldn't be split +;; up. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_operand_extend_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")])]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_op_dup 3 [(match_dup 2)])]))] + "") + +;; As op-extend-split, but recognize and split op [rz],ry,rz into +;; ext [rz],rz +;; op ry,rz +;; Do this for plus or bound only, being commutative operations, since we +;; have swapped the operands. +;; Call this op-extend-split-rx=rz + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_plus_or_bound_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")])]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2)])) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_dup 1)]))] + "") + +;; As the op-extend-split, but swapped operands, and only for +;; plus or bound, being the commutative extend-operators. FIXME: Why is +;; this needed? Is it? +;; Call this op-extend-split-swapped + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_plus_or_bound_operator" + [(match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")]) + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_op_dup 3 [(match_dup 2)])]))] + "") + +;; As op-extend-split-rx=rz, but swapped operands, only for plus or +;; bound. Call this op-extend-split-swapped-rx=rz. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_plus_or_bound_operator" + [(match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")]) + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2)])) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_dup 1)]))] + "") + +;; As op-extend-split, but the mem operand is not extended. +;; +;; op [rx],ry,rz changed into +;; move ry,rz +;; op [rx],rz +;; lose if ry=rz or rx=rz +;; Call this op-extend. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 3 "cris_orthogonal_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "memory_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 2)]))] + "") + +;; As op-extend-split-rx=rz, non-extended. +;; Call this op-split-rx=rz + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 3 "cris_commutative_orth_op" + [(match_operand 2 "memory_operand" "") + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 2)]))] + "") + +;; As op-extend-split-swapped, nonextended. +;; Call this op-split-swapped. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 3 "cris_commutative_orth_op" + [(match_operand 1 "register_operand" "") + (match_operand 2 "memory_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 1)]))] + "") + +;; As op-extend-split-swapped-rx=rz, non-extended. +;; Call this op-split-swapped-rx=rz. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 3 "cris_orthogonal_operator" + [(match_operand 2 "memory_operand" "") + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 1)]))] + "") + +;; Splits for all cases in side-effect insns where (possibly after reload +;; and register allocation) rx and ry in [rx=ry+i] are equal. + +;; move.S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_mem_op" + [(plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "register_operand" ""))])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))])] + "REG_P (operands[3]) && REG_P (operands[4]) + && REGNO (operands[3]) == REGNO (operands[4])" + [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (set (match_dup 0) (match_dup 5))] + "operands[5] = replace_equiv_address (operands[6], operands[3]);") + +;; move.S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_mem_op" + [(plus:SI (match_operand:SI 1 "cris_bdap_operand" "") + (match_operand:SI 2 "cris_bdap_operand" ""))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 1) + (match_dup 2)))])] + "(rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2]))" + [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (match_dup 4))] +{ + operands[4] = replace_equiv_address (operands[5], operands[3]); + cris_order_for_addsi3 (operands, 1); +}) + +;; move.S1 ry,[rx=rx+rz.S2] + +(define_split + [(parallel + [(set (match_operator + 6 "cris_mem_op" + [(plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (match_operand:SI 2 "register_operand" ""))]) + (match_operand 3 "register_operand" "")) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))])] + "REG_P (operands[2]) && REG_P (operands[4]) + && REGNO (operands[4]) == REGNO (operands[2])" + [(set (match_dup 4) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (match_dup 5) (match_dup 3))] + "operands[5] = replace_equiv_address (operands[6], operands[4]);") + +;; move.S1 ry,[rx=rx+i] + +(define_split + [(parallel + [(set (match_operator + 6 "cris_mem_op" + [(plus:SI (match_operand:SI 0 "cris_bdap_operand" "") + (match_operand:SI 1 "cris_bdap_operand" ""))]) + (match_operand 2 "register_operand" "")) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 0) + (match_dup 1)))])] + "(rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]))" + [(set (match_dup 3) (plus:SI (match_dup 0) (match_dup 1))) + (set (match_dup 5) (match_dup 2))] +{ + operands[5] = replace_equiv_address (operands[6], operands[3]); + cris_order_for_addsi3 (operands, 0); +}) + +;; clear.[bwd] [rx=rx+rz.S2] + +(define_split + [(parallel + [(set (mem:BWD (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (match_operand:SI 2 "register_operand" ""))) + (const_int 0)) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))])] + "REG_P (operands[2]) && REG_P (operands[3]) + && REGNO (operands[3]) == REGNO (operands[2])" + [(set (match_dup 3) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (mem:BWD (match_dup 3)) (const_int 0))] + "") + +;; clear.[bwd] [rx=rx+i] + +(define_split + [(parallel + [(set (mem:BWD + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "") + (match_operand:SI 1 "cris_bdap_operand" ""))) + (const_int 0)) + (set (match_operand:SI 2 "register_operand" "") + (plus:SI (match_dup 0) + (match_dup 1)))])] + "(rtx_equal_p (operands[0], operands[2]) + || rtx_equal_p (operands[2], operands[1]))" + [(set (match_dup 2) (plus:SI (match_dup 0) (match_dup 1))) + (set (mem:BWD (match_dup 2)) (const_int 0))] + "cris_order_for_addsi3 (operands, 0);") + +;; mov(s|u).S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_extend_operator" + [(mem (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "register_operand" "")))])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))])] + "REG_P (operands[3]) + && REG_P (operands[4]) + && REGNO (operands[3]) == REGNO (operands[4])" + [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 6)]))] + "operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]);") + +;; mov(s|u).S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_extend_operator" + [(mem (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "") + (match_operand:SI 2 "cris_bdap_operand" "")))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 1) + (match_dup 2)))])] + "(rtx_equal_p (operands[1], operands[3]) + || rtx_equal_p (operands[2], operands[3]))" + [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (match_op_dup 4 [(match_dup 5)]))] +{ + operands[5] = replace_equiv_address (XEXP (operands[4], 0), operands[3]); + cris_order_for_addsi3 (operands, 1); +}) + +;; op.S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_orthogonal_operator" + [(match_operand 1 "register_operand" "") + (mem (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" "")))])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 6)]))] +{ + operands[6] = replace_equiv_address (XEXP (operands[5], 1), operands[4]); + cris_order_for_addsi3 (operands, 2); +}) + +;; op.S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_orthogonal_operator" + [(match_operand 1 "register_operand" "") + (mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" "")))])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) + && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 7)]))] + "operands[7] = replace_equiv_address (XEXP (operands[6], 1), operands[5]);") + +;; op.S1 [rx=rx+rz.S2],ry (swapped) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_commutative_orth_op" + [(mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" ""))) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) + && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))] + "operands[7] = replace_equiv_address (XEXP (operands[6], 0), operands[5]);") + +;; op.S1 [rx=rx+i],ry (swapped) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_commutative_orth_op" + [(mem + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" ""))) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 6) (match_dup 1)]))] +{ + operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]); + cris_order_for_addsi3 (operands, 2); +}) + +;; op(s|u).S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_operand_extend_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 7 "cris_extend_operator" + [(mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" "")))])])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) + && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 8)]))] + "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[7]), GET_MODE (operands[7]), + replace_equiv_address (XEXP (operands[7], 0), + operands[5]));") + +;; op(s|u).S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_operand_extend_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 6 "cris_extend_operator" + [(mem + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" "") + ))])])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 7)]))] +{ + operands[7] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]), + replace_equiv_address (XEXP (operands[6], 0), + operands[4])); + cris_order_for_addsi3 (operands, 2); +}) + +;; op(s|u).S1 [rx=rx+rz.S2],ry (swapped, plus or bound) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 7 "cris_plus_or_bound_operator" + [(match_operator + 6 "cris_extend_operator" + [(mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" "")))]) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 8) (match_dup 1)]))] + "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]), + replace_equiv_address (XEXP (operands[6], 0), + operands[5]));") + +;; op(s|u).S1 [rx=rx+i],ry (swapped, plus or bound) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_plus_or_bound_operator" + [(match_operator + 5 "cris_extend_operator" + [(mem (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" "")))]) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))] +{ + operands[7] = gen_rtx_fmt_e (GET_CODE (operands[5]), GET_MODE (operands[5]), + replace_equiv_address (XEXP (operands[5], 0), + operands[4])); + cris_order_for_addsi3 (operands, 2); +}) + +;; Splits for addressing prefixes that have no side-effects, so we can +;; fill a delay slot. Never split if we lose something, though. + +;; If we have a +;; move [indirect_ref],rx +;; where indirect ref = {const, [r+], [r]}, it costs as much as +;; move indirect_ref,rx +;; move [rx],rx +;; Take care not to allow indirect_ref = register. + +;; We're not allowed to generate copies of registers with different mode +;; until after reload; copying pseudos upsets reload. CVS as of +;; 2001-08-24, unwind-dw2-fde.c, _Unwind_Find_FDE ICE in +;; cselib_invalidate_regno. + +(define_split ; indir_to_reg_split + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "indirect_operand" ""))] + "reload_completed + && REG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (MEM_P (XEXP (operands[1], 0)) || CONSTANT_P (XEXP (operands[1], 0))) + && REGNO (operands[0]) < CRIS_LAST_GENERAL_REGISTER" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 0) (match_dup 3))] + "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0])); + operands[3] = replace_equiv_address (operands[1], operands[2]); + operands[4] = XEXP (operands[1], 0);") + +;; As the above, but MOVS and MOVU. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_extend_operator" + [(match_operand 1 "indirect_operand" "")]))] + "reload_completed + && REG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (MEM_P (XEXP (operands[1], 0)) + || CONSTANT_P (XEXP (operands[1], 0)))" + [(set (match_dup 2) (match_dup 5)) + (set (match_dup 0) (match_op_dup 4 [(match_dup 3)]))] + "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0])); + operands[3] = replace_equiv_address (XEXP (operands[4], 0), operands[2]); + operands[5] = XEXP (operands[1], 0);") + +;; Various peephole optimizations. +;; +;; Watch out: when you exchange one set of instructions for another, the +;; condition codes setting must be the same, or you have to CC_INIT or +;; whatever is appropriate, in the pattern before you emit the +;; assembly text. This is best done here, not in cris_notice_update_cc, +;; to keep changes local to their cause. +;; +;; Do not add patterns that you do not know will be matched. +;; Please also add a self-contained testcase. + +;; We have trouble with and:s and shifts. Maybe something is broken in +;; gcc? Or it could just be that bit-field insn expansion is a bit +;; suboptimal when not having extzv insns. +;; Testcase for the following four peepholes: gcc.dg/cris-peep2-xsrand.c + +(define_peephole2 ; asrandb (peephole casesi+31) + [(set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) + (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 + && INTVAL (operands[2]) < 255 + && INTVAL (operands[1]) > 23 + /* Check that the and-operation enables us to use logical-shift. */ + && (INTVAL (operands[2]) + & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (QImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode)); +}) + +(define_peephole2 ; asrandw (peephole casesi+32) + [(set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 + && INTVAL (operands[2]) < 65535 + && INTVAL (operands[2]) != 255 + && INTVAL (operands[1]) > 15 + /* Check that the and-operation enables us to use logical-shift. */ + && (INTVAL (operands[2]) + & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (HImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode)); +}) + +(define_peephole2 ; lsrandb (peephole casesi+33) + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 + && INTVAL (operands[2]) < 255 + && INTVAL (operands[1]) > 23" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (QImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode)); +}) + +(define_peephole2 ; lsrandw (peephole casesi+34) + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 && INTVAL (operands[2]) < 65535 + && INTVAL (operands[2]) != 255 + && INTVAL (operands[1]) > 15" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (HImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode)); +}) + + +;; Change +;; add.d n,rx +;; move [rx],ry +;; into +;; move [rx=rx+n],ry +;; when -128 <= n <= 127. +;; This will reduce the size of the assembler code for n = [-128..127], +;; and speed up accordingly. Don't match if the previous insn is +;; (set rx rz) because that combination is matched by another peephole. +;; No stable test-case. + +(define_peephole2 ; moversideqi (peephole casesi+35) + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand 3 "register_operand" "") + (match_operator 4 "cris_mem_op" [(match_dup 0)]))] + "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD + && REGNO (operands[3]) != REGNO (operands[0]) + && (BASE_P (operands[1]) || BASE_P (operands[2])) + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J') + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128) + && TARGET_SIDE_EFFECT_PREFIXES" + [(parallel + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])] + ;; Checking the previous insn is a bit too awkward for the condition. +{ + rtx prev = prev_nonnote_insn (curr_insn); + if (prev != NULL_RTX) + { + rtx set = single_set (prev); + if (set != NULL_RTX + && REG_S_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (operands[0]) + && REG_S_P (SET_SRC (set))) + FAIL; + } + operands[5] + = replace_equiv_address (operands[4], + gen_rtx_PLUS (SImode, + operands[1], operands[2])); +}) + +;; Vice versa: move ry,[rx=rx+n] + +(define_peephole2 ; movemsideqi (peephole casesi+36) + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operator 3 "cris_mem_op" [(match_dup 0)]) + (match_operand 4 "register_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD + && REGNO (operands[4]) != REGNO (operands[0]) + && (BASE_P (operands[1]) || BASE_P (operands[2])) + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J') + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128) + && TARGET_SIDE_EFFECT_PREFIXES" + [(parallel + [(set (match_dup 5) (match_dup 4)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])] + "operands[5] + = replace_equiv_address (operands[3], + gen_rtx_PLUS (SImode, + operands[1], operands[2]));") + +;; As above, change: +;; add.d n,rx +;; op.d [rx],ry +;; into: +;; op.d [rx=rx+n],ry +;; Saves when n = [-128..127]. +;; +;; Splitting and joining combinations for side-effect modes are slightly +;; out of hand. They probably will not save the time they take typing in, +;; not to mention the bugs that creep in. FIXME: Get rid of as many of +;; the splits and peepholes as possible. +;; No stable test-case. + +(define_peephole2 ; mover2side (peephole casesi+37) + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand 3 "register_operand" "") + (match_operator 4 "cris_orthogonal_operator" + [(match_dup 3) + (match_operator + 5 "cris_mem_op" [(match_dup 0)])]))] + ;; FIXME: What about DFmode? + ;; Change to GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD? + "GET_MODE (operands[3]) != DImode + && REGNO (operands[0]) != REGNO (operands[3]) + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J') + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N') + && INTVAL (operands[2]) >= -128 + && INTVAL (operands[2]) <= 127 + && TARGET_SIDE_EFFECT_PREFIXES" + [(parallel + [(set (match_dup 3) (match_op_dup 4 [(match_dup 3) (match_dup 6)])) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])] + "operands[6] + = replace_equiv_address (operands[5], + gen_rtx_PLUS (SImode, + operands[1], operands[2]));") + +;; Sometimes, for some reason the pattern +;; move x,rx +;; add y,rx +;; move [rx],rz +;; will occur. Solve this, and likewise for to-memory. +;; No stable test-case. + +(define_peephole2 ; moverside (peephole casesi+38) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "cris_bdap_biap_operand" "")) + (set (match_dup 0) + (plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "") + (match_operand:SI 3 "cris_bdap_biap_operand" ""))) + (set (match_operand 4 "register_operand" "") + (match_operator 5 "cris_mem_op" [(match_dup 0)]))] + "(rtx_equal_p (operands[2], operands[0]) + || rtx_equal_p (operands[3], operands[0])) + && cris_side_effect_mode_ok (PLUS, operands, 0, + (REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + (! REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + -1, 4)" + [(parallel + [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])] +{ + rtx otherop + = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2]; + + /* Make sure we have canonical RTX so we match the insn pattern - + not a constant in the first operand. We also require the order + (plus reg mem) to match the final pattern. */ + if (CONSTANT_P (otherop) || MEM_P (otherop)) + { + operands[7] = operands[1]; + operands[8] = otherop; + } + else + { + operands[7] = otherop; + operands[8] = operands[1]; + } + operands[6] + = replace_equiv_address (operands[5], + gen_rtx_PLUS (SImode, + operands[7], operands[8])); +}) + +;; As above but to memory. +;; FIXME: Split movemside and moverside into variants and prune +;; the ones that don't trig. +;; No stable test-case. + +(define_peephole2 ; movemside (peephole casesi+39) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "cris_bdap_biap_operand" "")) + (set (match_dup 0) + (plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "") + (match_operand:SI 3 "cris_bdap_biap_operand" ""))) + (set (match_operator 4 "cris_mem_op" [(match_dup 0)]) + (match_operand 5 "register_operand" ""))] + "(rtx_equal_p (operands[2], operands[0]) + || rtx_equal_p (operands[3], operands[0])) + && cris_side_effect_mode_ok (PLUS, operands, 0, + (REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + (! REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + -1, 5)" + [(parallel + [(set (match_dup 6) (match_dup 5)) + (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])] +{ + rtx otherop + = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2]; + + /* Make sure we have canonical RTX so we match the insn pattern - + not a constant in the first operand. We also require the order + (plus reg mem) to match the final pattern. */ + if (CONSTANT_P (otherop) || MEM_P (otherop)) + { + operands[7] = operands[1]; + operands[8] = otherop; + } + else + { + operands[7] = otherop; + operands[8] = operands[1]; + } + operands[6] + = replace_equiv_address (operands[4], + gen_rtx_PLUS (SImode, + operands[7], operands[8])); +}) + +;; Another spotted bad code: +;; move rx,ry +;; move [ry],ry +;; No stable test-case. + +(define_peephole2 ; movei (peephole casesi+42) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (match_operand 2 "register_operand" "") + (match_operator 3 "cris_mem_op" [(match_dup 0)]))] + "REGNO (operands[0]) == REGNO (operands[2]) + && (REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[1]))) + && GET_MODE_SIZE (GET_MODE (operands[2])) <= UNITS_PER_WORD" + [(set (match_dup 2) (match_dup 4))] + "operands[4] = replace_equiv_address (operands[3], operands[1]);") + +;; move.d [r10+16],r9 +;; and.d r12,r9 +;; change to +;; and.d [r10+16],r12,r9 +;; With generalization of the operation, the size and the addressing mode. +;; This seems to be the result of a quirk in register allocation +;; missing the three-operand cases when having different predicates. +;; Maybe that it matters that it is a commutative operation. +;; This pattern helps that situation, but there's still the increased +;; register pressure. +;; Note that adding the noncommutative variant did not show any matches +;; in ipps and cc1, so it's not here. +;; No stable test-case. + +(define_peephole2 ; op3 (peephole casesi+44) + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_mem_op" + [(plus:SI + (match_operand:SI 1 "cris_bdap_biap_operand" "") + (match_operand:SI 2 "cris_bdap_biap_operand" ""))])) + (set (match_dup 0) + (match_operator + 5 "cris_commutative_orth_op" + [(match_operand 3 "register_operand" "") + (match_operand 4 "register_operand" "")]))] + "(rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[4], operands[0])) + && ! rtx_equal_p (operands[3], operands[4]) + && (REG_S_P (operands[1]) || REG_S_P (operands[2])) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD" + [(set (match_dup 0) (match_op_dup 5 [(match_dup 7) (match_dup 6)]))] + "operands[7] + = rtx_equal_p (operands[3], operands[0]) ? operands[4] : operands[3];") + +;; I cannot tell GCC (2.1, 2.7.2) how to correctly reload an instruction +;; that looks like +;; and.b some_byte,const,reg_32 +;; where reg_32 is the destination of the "three-address" code optimally. +;; It should be: +;; movu.b some_byte,reg_32 +;; and.b const,reg_32 +;; but it turns into: +;; move.b some_byte,reg_32 +;; and.d const,reg_32 +;; Fix it here. +;; Testcases: gcc.dg/cris-peep2-andu1.c gcc.dg/cris-peep2-andu2.c + +(define_peephole2 ; andu (casesi+45) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "nonimmediate_operand" "")) + (set (match_operand:SI 2 "register_operand" "") + (and:SI (match_dup 0) + (match_operand:SI 3 "const_int_operand" "")))] + ;; Since the size of the memory access could be made different here, + ;; don't do this for a mem-volatile access. + "REGNO (operands[2]) == REGNO (operands[0]) + && INTVAL (operands[3]) <= 65535 && INTVAL (operands[3]) >= 0 + && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'I') + && !side_effects_p (operands[1]) + && (!REG_P (operands[1]) + || REGNO (operands[1]) <= CRIS_LAST_GENERAL_REGISTER)" + ;; FIXME: CC0 valid except for M (i.e. CC_NOT_NEGATIVE). + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] +{ + enum machine_mode zmode = INTVAL (operands[3]) <= 255 ? QImode : HImode; + enum machine_mode amode + = CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'O') ? SImode : zmode; + rtx op1 + = (REG_S_P (operands[1]) + ? gen_rtx_REG (zmode, REGNO (operands[1])) + : adjust_address (operands[1], zmode, 0)); + operands[4] + = gen_rtx_ZERO_EXTEND (SImode, op1); + operands[5] = gen_rtx_REG (amode, REGNO (operands[0])); + operands[6] + = gen_rtx_AND (amode, gen_rtx_REG (amode, REGNO (operands[0])), + GEN_INT (trunc_int_for_mode (INTVAL (operands[3]), + amode == SImode + ? QImode : amode))); +}) + +;; Try and avoid GOTPLT reads escaping a call: transform them into +;; PLT. Curiously (but thankfully), peepholes for instructions +;; *without side-effects* that just feed a call (or call_value) are +;; not matched neither in a build or test-suite, so those patterns are +;; omitted. + +;; A "normal" move where we don't check the consumer. + +(define_peephole2 ; gotplt-to-plt + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))]))] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)" + [(set (match_dup 0) (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL))) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))] + "") + +;; And one set with a side-effect getting the PLTGOT offset. +;; First call and call_value variants. + +(define_peephole2 ; gotplt-to-plt-side-call + [(parallel + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI + 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))]) + (parallel [(call (mem:QI (match_dup 0)) + (match_operand 4 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && peep2_reg_dead_p (2, operands[0])" + [(parallel [(call (mem:QI (match_dup 1)) + (match_dup 4)) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_dup 3) + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] + CRIS_UNSPEC_PLTGOTREAD))))])] + "") + +(define_peephole2 ; gotplt-to-plt-side-call-value + [(parallel + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI + 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))]) + (parallel [(set (match_operand 5 "" "") + (call (mem:QI (match_dup 0)) + (match_operand 4 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && peep2_reg_dead_p (2, operands[0])" + [(parallel [(set (match_dup 5) + (call (mem:QI (match_dup 1)) + (match_dup 4))) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_dup 3) + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] + CRIS_UNSPEC_PLTGOTREAD))))])] + "") + +(define_peephole2 ; gotplt-to-plt-side + [(parallel + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI + 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)" + [(set (match_dup 3) + (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))) + (set (match_dup 3) (plus:SI (match_dup 3) (reg:SI CRIS_GOT_REGNUM))) + (set (match_dup 0) + (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL))) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))] + "") + +;; Local variables: +;; mode:emacs-lisp +;; comment-start: ";; " +;; eval: (set-syntax-table (copy-sequence (syntax-table))) +;; eval: (modify-syntax-entry ?[ "(]") +;; eval: (modify-syntax-entry ?] ")[") +;; eval: (modify-syntax-entry ?{ "(}") +;; eval: (modify-syntax-entry ?} "){") +;; eval: (setq indent-tabs-mode t) +;; End: diff --git a/gcc/config/cris/cris.opt b/gcc/config/cris/cris.opt new file mode 100644 index 000000000..9caa48924 --- /dev/null +++ b/gcc/config/cris/cris.opt @@ -0,0 +1,190 @@ +; Options for the CRIS port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; TARGET_MUL_BUG: Whether or not to work around multiplication +; instruction hardware bug when generating code for models where +; it may be present. From the trouble report for Etrax 100 LX: +; "A multiply operation may cause incorrect cache behaviour +; under some specific circumstances. The problem can occur if +; the instruction following the multiply instruction causes a +; cache miss, and multiply operand 1 (source operand) bits +; [31:27] matches the logical mapping of the mode register +; address (0xb0....), and bits [9:2] of operand 1 matches the +; TLB register address (0x258-0x25f). There is such a mapping +; in kernel mode or when the MMU is off. Normally there is no +; such mapping in user mode, and the problem will therefore +; probably not occur in Linux user mode programs." +; +; We have no sure-fire way to know from within GCC that we're +; compiling a user program. For example, -fpic/PIC is used in +; libgcc which is linked into the kernel. However, the +; workaround option -mno-mul-bug can be safely used per-package +; when compiling programs. The same goes for general user-only +; libraries such as glibc, since there's no user-space +; driver-like program that gets a mapping of I/O registers (all +; on the same page, including the TLB registers). +mmul-bug-workaround +Target Report Mask(MUL_BUG) +Work around bug in multiplication instruction + +; TARGET_ETRAX4_ADD: Instruction-set additions from Etrax 4 and up. +; (Just "lz".) +metrax4 +Target Report Mask(ETRAX4_ADD) +Compile for ETRAX 4 (CRIS v3) + +; See cris_handle_option. +metrax100 +Target Report RejectNegative +Compile for ETRAX 100 (CRIS v8) + +; See cris_handle_option. +mno-etrax100 +Target Report RejectNegative Undocumented + +mpdebug +Target Report Mask(PDEBUG) +Emit verbose debug information in assembly code + +; TARGET_CCINIT: Whether to use condition-codes generated by +; insns other than the immediately preceding compare/test insn. +; Used to check for errors in notice_update_cc. +mcc-init +Target Report Mask(CCINIT) +Do not use condition codes from normal instructions + +; TARGET_SIDE_EFFECT_PREFIXES: Whether to use side-effect +; patterns. Used to debug the [rx=ry+i] type patterns. +mside-effects +Target Report RejectNegative Mask(SIDE_EFFECT_PREFIXES) Undocumented + +mno-side-effects +Target Report RejectNegative InverseMask(SIDE_EFFECT_PREFIXES) +Do not emit addressing modes with side-effect assignment + +; TARGET_STACK_ALIGN: Whether to *keep* (not force) alignment of +; stack at 16 (or 32, depending on TARGET_ALIGN_BY_32) bits. +mstack-align +Target Report RejectNegative Mask(STACK_ALIGN) Undocumented + +mno-stack-align +Target Report RejectNegative InverseMask(STACK_ALIGN) +Do not tune stack alignment + +; TARGET_DATA_ALIGN: Whether to do alignment on individual +; modifiable objects. +mdata-align +Target Report RejectNegative Mask(DATA_ALIGN) Undocumented + +mno-data-align +Target Report RejectNegative InverseMask(DATA_ALIGN) +Do not tune writable data alignment + +; TARGET_CONST_ALIGN: Whether to do alignment on individual +; non-modifiable objects. +mconst-align +Target Report RejectNegative Mask(CONST_ALIGN) Undocumented + +mno-const-align +Target Report RejectNegative InverseMask(CONST_ALIGN) +Do not tune code and read-only data alignment + +; See cris_handle_option. +m32-bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m32bit +Target Report RejectNegative +Align code and data to 32 bits + +; See cris_handle_option. +m16-bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m16bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m8-bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m8bit +Target Report RejectNegative +Don't align items in code or data + +; TARGET_PROLOGUE_EPILOGUE: Whether or not to omit function +; prologue and epilogue. +mprologue-epilogue +Target Report RejectNegative Mask(PROLOGUE_EPILOGUE) Undocumented + +mno-prologue-epilogue +Target Report RejectNegative InverseMask(PROLOGUE_EPILOGUE) +Do not emit function prologue or epilogue + +; We have to handle this m-option here since we can't wash it +; off in both CC1_SPEC and CC1PLUS_SPEC. + +mbest-lib-options +Target Report RejectNegative +Use the most feature-enabling options allowed by other options + +; FIXME: The following comment relates to gcc before cris.opt. +; Check it it's still valid: +; We must call it "override-" since calling it "no-" will cause +; gcc.c to forget it, if there's a "later" -mbest-lib-options. +; Kludgy, but needed for some multilibbed files. +moverride-best-lib-options +Target Report RejectNegative +Override -mbest-lib-options + +mcpu= +Target Report RejectNegative Joined Undocumented Var(cris_cpu_str) + +march= +Target Report RejectNegative Joined Var(cris_cpu_str) +-march=ARCH Generate code for the specified chip or CPU version + +mtune= +Target Report RejectNegative Joined Var(cris_tune_str) +-mtune=ARCH Tune alignment for the specified chip or CPU version + +mmax-stackframe= +Target Report RejectNegative Joined Var(cris_max_stackframe_str) +-mmax-stackframe=SIZE Warn when a stackframe is larger than the specified size + +max-stackframe= +Target Report RejectNegative Joined Undocumented Var(cris_max_stackframe_str) + +; TARGET_SVINTO: Currently this just affects alignment. FIXME: +; Redundant with TARGET_ALIGN_BY_32, or put machine stuff here? +; This and the others below could just as well be variables and +; TARGET_* defines in cris.h. +Mask(SVINTO) + +; TARGET_ALIGN_BY_32: Say that all alignment specifications say +; to prefer 32 rather than 16 bits. +Mask(ALIGN_BY_32) + +; TARGET_AVOID_GOTPLT is referred to in the .c and the .md so we +; need to allocate the flag and macros here. +Mask(AVOID_GOTPLT) diff --git a/gcc/config/cris/cris_abi_symbol.c b/gcc/config/cris/cris_abi_symbol.c new file mode 100644 index 000000000..db9db2cfe --- /dev/null +++ b/gcc/config/cris/cris_abi_symbol.c @@ -0,0 +1,45 @@ +/* Define symbol to recognize CRIS ABI version 2, for a.out use. + Contributed by Axis Communications. + Written by Hans-Peter Nilsson , c:a 1992. + + Copyright (C) 2000, 2001, 2003, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "tconfig.h" +#include "tm.h" + +#ifdef __AOUT__ + +/* ELF support was not released before the ABI was changed, so we + restrict this awkwardness to a.out. This symbol is for gdb to + recognize, so it can debug both old and new programs successfully. */ +__asm__ (".global " CRIS_ABI_VERSION_SYMBOL_STRING); +__asm__ (".set " CRIS_ABI_VERSION_SYMBOL_STRING ",0"); + +#else /* not __AOUT__ */ + +/* The file must not be empty (declaration/definition-wise) according to + ISO, IIRC. */ +extern int _Dummy; + +#endif /* not __AOUT__ */ diff --git a/gcc/config/cris/elf.opt b/gcc/config/cris/elf.opt new file mode 100644 index 000000000..00ced56b0 --- /dev/null +++ b/gcc/config/cris/elf.opt @@ -0,0 +1,25 @@ +; ELF-specific options for the CRIS port of the compiler. + +; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +melf +Target Report RejectNegative Undocumented + +sim +Driver JoinedOrMissing diff --git a/gcc/config/cris/libgcc.ver b/gcc/config/cris/libgcc.ver new file mode 100644 index 000000000..e35de8310 --- /dev/null +++ b/gcc/config/cris/libgcc.ver @@ -0,0 +1,7 @@ +GCC_4.3 { + __Mul + __Div + __Udiv + __Mod + __Umod +} diff --git a/gcc/config/cris/linux.h b/gcc/config/cris/linux.h new file mode 100644 index 000000000..bf2f5f9c9 --- /dev/null +++ b/gcc/config/cris/linux.h @@ -0,0 +1,151 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Axis Communications. Written by Hans-Peter Nilsson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* After the first "Node:" comment comes all preprocessor directives and + attached declarations described in the info files, the "Using and + Porting GCC" manual (uapgcc), in the same order as found in the "Target + macros" section in the gcc-2.9x CVS edition of 2000-03-17. FIXME: Not + really, but needs an update anyway. + + There is no generic copy-of-uapgcc comment, you'll have to see uapgcc + for that. If applicable, there is a CRIS-specific comment. The order + of macro definitions follow the order in the manual. Every section in + the manual (node in the info pages) has an introductory `Node: + ' comment. If no macros are defined for a section, only + the section-comment is present. */ + +/* This file defines the macros for cris-axis-linux-gnu that are not + covered by cris.h, elfos.h and (config/)linux.h. */ + +/* Make sure we have a valid TARGET_CPU_DEFAULT, so we can assume it + and take shortcuts below. */ +#ifndef TARGET_CPU_DEFAULT +#error "TARGET_CPU_DEFAULT not defined" +#elif (TARGET_CPU_DEFAULT+0) != 10 && (TARGET_CPU_DEFAULT+0) != 32 +#error "TARGET_CPU_DEFAULT must be 10 or 32, or this file be updated" +#endif + +/* Node: Instruction Output */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* Node: Driver */ +/* These macros are CRIS-specific, but used in target driver macros. */ + +#undef CRIS_CPP_SUBTARGET_SPEC +#if TARGET_CPU_DEFAULT == 32 +# define CRIS_CPP_SUBTARGET_SPEC \ + "%{pthread:-D_REENTRANT}\ + %{!march=*:%{!mcpu=*:-D__arch_v32 -D__CRIS_arch_version=32}}" +#else +# define CRIS_CPP_SUBTARGET_SPEC \ + "%{pthread:-D_REENTRANT}\ + %{!march=*:%{!mcpu=*:-D__arch_v10 -D__CRIS_arch_version=10}}" +#endif + +#undef CRIS_CC1_SUBTARGET_SPEC +#if TARGET_CPU_DEFAULT == 32 +# define CRIS_CC1_SUBTARGET_SPEC \ + "%{!march=*:%{!mcpu=*:-march=v32}}" +#define CRIS_SUBTARGET_DEFAULT_ARCH MASK_AVOID_GOTPLT +#else +# define CRIS_CC1_SUBTARGET_SPEC \ + "%{!march=*:%{!mcpu=*:-march=v10}}" +#define CRIS_SUBTARGET_DEFAULT_ARCH 0 +#endif + +#undef CRIS_ASM_SUBTARGET_SPEC +#if TARGET_CPU_DEFAULT == 32 +# define CRIS_ASM_SUBTARGET_SPEC \ + "--em=criself \ + %{!march=*:%{!mcpu=*:--march=v32}} \ + %{!fleading-underscore:--no-underscore}\ + %{fPIC|fpic|fPIE|fpie: --pic}" +#else +# define CRIS_ASM_SUBTARGET_SPEC \ + "--em=criself \ + %{!march=*:%{!mcpu=*:--march=v10}} \ + %{!fleading-underscore:--no-underscore}\ + %{fPIC|fpic|fPIE|fpie: --pic}" +#endif + +/* Previously controlled by target_flags. */ +#undef TARGET_LINUX +#define TARGET_LINUX 1 + +#undef CRIS_SUBTARGET_DEFAULT +#define CRIS_SUBTARGET_DEFAULT \ + (MASK_SVINTO \ + + MASK_ETRAX4_ADD \ + + MASK_ALIGN_BY_32 \ + + CRIS_SUBTARGET_DEFAULT_ARCH) + +#undef CRIS_DEFAULT_CPU_VERSION +#define CRIS_DEFAULT_CPU_VERSION CRIS_CPU_NG + +#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" + +#undef CRIS_LINK_SUBTARGET_SPEC +#define CRIS_LINK_SUBTARGET_SPEC \ + "-mcrislinux\ + %{shared} %{static}\ + %{symbolic:-Bdynamic} %{static:-Bstatic}\ + %{!shared:%{!static:\ + %{rdynamic:-export-dynamic}\ + -dynamic-linker " LINUX_DYNAMIC_LINKER "}}\ + %{!r:%{O2|O3: --gc-sections}}" + + +/* Node: Run-time Target */ + +/* For the cris-*-linux* subtarget. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + LINUX_TARGET_OS_CPP_BUILTINS(); \ + if (flag_leading_underscore <= 0) \ + builtin_define ("__NO_UNDERSCORES__"); \ + } \ + while (0) + +/* Node: Type Layout */ + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* Node: Sections */ + +/* GNU/Linux has crti and crtn and does not need the + CRT_CALL_STATIC_FUNCTION trick in cris.h. */ +#undef CRT_CALL_STATIC_FUNCTION + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc/config/cris/linux.opt b/gcc/config/cris/linux.opt new file mode 100644 index 000000000..a57c48d7c --- /dev/null +++ b/gcc/config/cris/linux.opt @@ -0,0 +1,33 @@ +; GNU/Linux-specific options for the CRIS port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; Provide a legacy -mlinux option. +mlinux +Target Report RejectNegative Undocumented + +mno-gotplt +Target Report RejectNegative Mask(AVOID_GOTPLT) MaskExists +Together with -fpic and -fPIC, do not use GOTPLT references + +; There's a small added setup cost with using GOTPLT references +; for the first (resolving) call, but should in total be a win +; both in code-size and execution-time. +mgotplt +Target Report RejectNegative InverseMask(AVOID_GOTPLT) Undocumented diff --git a/gcc/config/cris/mulsi3.asm b/gcc/config/cris/mulsi3.asm new file mode 100644 index 000000000..76dfb6346 --- /dev/null +++ b/gcc/config/cris/mulsi3.asm @@ -0,0 +1,255 @@ +;; Copyright (C) 2001, 2004 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; Under Section 7 of GPL version 3, you are granted additional +;; permissions described in the GCC Runtime Library Exception, version +;; 3.1, as published by the Free Software Foundation. +;; +;; You should have received a copy of the GNU General Public License and +;; a copy of the GCC Runtime Library Exception along with this program; +;; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +;; . +;; +;; This code used to be expanded through interesting expansions in +;; the machine description, compiled from this code: +;; +;; #ifdef L_mulsi3 +;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__)); +;; +;; /* This must be compiled with the -mexpand-mul flag, to synthesize the +;; multiplication from the mstep instructions. The check for +;; smaller-size multiplication pays off in the order of .5-10%; +;; estimated median 1%, depending on application. +;; FIXME: It can be further optimized if we go to assembler code, as +;; gcc 2.7.2 adds a few unnecessary instructions and does not put the +;; basic blocks in optimal order. */ +;; long +;; __Mul (unsigned long a, unsigned long b) +;; { +;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10 +;; /* In case other code is compiled without -march=v10, they will +;; contain calls to __Mul, regardless of flags at link-time. The +;; "else"-code below will work, but is unnecessarily slow. This +;; sometimes cuts a few minutes off from simulation time by just +;; returning a "mulu.d". */ +;; return a * b; +;; #else +;; unsigned long min; +;; +;; /* Get minimum via the bound insn. */ +;; min = a < b ? a : b; +;; +;; /* Can we omit computation of the high part? */ +;; if (min > 65535) +;; /* No. Perform full multiplication. */ +;; return a * b; +;; else +;; { +;; /* Check if both operands are within 16 bits. */ +;; unsigned long max; +;; +;; /* Get maximum, by knowing the minimum. +;; This will partition a and b into max and min. +;; This is not currently something GCC understands, +;; so do this trick by asm. */ +;; __asm__ ("xor %1,%0\n\txor %2,%0" +;; : "=r" (max) +;; : "r" (b), "r" (a), "0" (min)); +;; +;; if (max > 65535) +;; /* Make GCC understand that only the low part of "min" will be +;; used. */ +;; return max * (unsigned short) min; +;; else +;; /* Only the low parts of both operands are necessary. */ +;; return ((unsigned short) max) * (unsigned short) min; +;; } +;; #endif /* not __CRIS_arch_version >= 10 */ +;; } +;; #endif /* L_mulsi3 */ +;; +;; That approach was abandoned since the caveats outweighted the +;; benefits. The expand-multiplication machinery is also removed, so you +;; can't do this anymore. +;; +;; For doubters of there being any benefits, some where: insensitivity to: +;; - ABI changes (mostly for experimentation) +;; - assembler syntax differences (mostly debug format). +;; - insn scheduling issues. +;; Most ABI experiments will presumably happen with arches with mul insns, +;; so that argument doesn't really hold anymore, and it's unlikely there +;; being new arch variants needing insn scheduling and not having mul +;; insns. + +;; ELF and a.out have different syntax for local labels: the "wrong" +;; one may not be omitted from the object. +#undef L +#ifdef __AOUT__ +# define L(x) x +#else +# define L(x) .x +#endif + + .global ___Mul + .type ___Mul,@function +___Mul: +#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10 +;; Can't have the mulu.d last on a cache-line (in the delay-slot of the +;; "ret"), due to hardware bug. See documentation for -mmul-bug-workaround. +;; Not worthwhile to conditionalize here. + .p2alignw 2,0x050f + mulu.d $r11,$r10 + ret + nop +#else + move.d $r10,$r12 + move.d $r11,$r9 + bound.d $r12,$r9 + cmpu.w 65535,$r9 + bls L(L3) + move.d $r12,$r13 + + movu.w $r11,$r9 + lslq 16,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + mstep $r9,$r13 + clear.w $r10 + test.d $r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + movu.w $r12,$r12 + move.d $r11,$r9 + clear.w $r9 + test.d $r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + mstep $r12,$r9 + add.w $r9,$r10 + lslq 16,$r10 + ret + add.d $r13,$r10 + +L(L3): + move.d $r9,$r10 + xor $r11,$r10 + xor $r12,$r10 + cmpu.w 65535,$r10 + bls L(L5) + movu.w $r9,$r13 + + movu.w $r13,$r13 + move.d $r10,$r9 + lslq 16,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + mstep $r13,$r9 + clear.w $r10 + test.d $r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + lslq 16,$r10 + ret + add.d $r9,$r10 + +L(L5): + movu.w $r9,$r9 + lslq 16,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + mstep $r9,$r10 + ret + mstep $r9,$r10 +#endif +L(Lfe1): + .size ___Mul,L(Lfe1)-___Mul diff --git a/gcc/config/cris/predicates.md b/gcc/config/cris/predicates.md new file mode 100644 index 000000000..edd16bbdd --- /dev/null +++ b/gcc/config/cris/predicates.md @@ -0,0 +1,174 @@ +;; Operand and operator predicates for the GCC CRIS port. +;; Copyright (C) 2005, 2007 Free Software Foundation, Inc. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; Operator predicates. + +(define_predicate "cris_orthogonal_operator" + (match_code "plus, minus, ior, and, umin")) + +(define_predicate "cris_commutative_orth_op" + (match_code "plus, ior, and, umin")) + +;; By the name, you might think we should include MULT. We don't because +;; it doesn't accept the same addressing modes as the others (only +;; registers) and there's also the problem of handling TARGET_MUL_BUG. + +(define_predicate "cris_operand_extend_operator" + (match_code "plus, minus, umin")) + +(define_predicate "cris_additive_operand_extend_operator" + (match_code "plus, minus")) + +(define_predicate "cris_extend_operator" + (match_code "zero_extend, sign_extend")) + +(define_predicate "cris_plus_or_bound_operator" + (match_code "plus, umin")) + +;; Used as an operator to get a handle on a already-known-valid MEM rtx:es +;; (no need to validate the address), where some address expression parts +;; have their own match_operand. + +(define_predicate "cris_mem_op" + (match_code "mem")) + +(define_predicate "cris_load_multiple_op" + (and (match_code "parallel") + (match_test "cris_movem_load_rest_p (op, 0)"))) + +(define_predicate "cris_store_multiple_op" + (and (match_code "parallel") + (match_test "cris_store_multiple_op_p (op)"))) + + +;; Operand helper predicates. + +(define_predicate "cris_bdap_const_operand" + (and (match_code "label_ref, symbol_ref, const_int, const_double, const") + (ior (not (match_test "flag_pic")) + (match_test "cris_valid_pic_const (op, true)")))) + +(define_predicate "cris_simple_address_operand" + (ior (match_operand:SI 0 "register_operand") + (and (match_code "post_inc") + (match_test "register_operand (XEXP (op, 0), Pmode)")))) + +(define_predicate "cris_simple_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "mem") + (match_test "cris_simple_address_operand (XEXP (op, 0), + Pmode)")))) + +;; The caller needs to use :SI. +(define_predicate "cris_bdap_sign_extend_operand" +; Disabled until +; or is committed. + (match_test "0")) +; (and (match_code "sign_extend") +; (and (match_test "MEM_P (XEXP (op, 0))") +; (match_test "cris_simple_address_operand (XEXP (XEXP (op, 0), 0), +; Pmode)")))) + +;; FIXME: Should not have to test for 1. +(define_predicate "cris_scale_int_operand" + (and (match_code "const_int") + (ior (ior (match_test "op == GEN_INT (4)") + (match_test "op == const2_rtx")) + (match_test "op == const1_rtx")))) + +;; FIXME: Should be able to assume (reg int). +(define_predicate "cris_biap_mult_operand" + (and (match_code "mult") + (ior (and (match_test "register_operand (XEXP (op, 0), Pmode)") + (match_test "cris_scale_int_operand (XEXP (op, 1), Pmode)")) + (and (match_test "cris_scale_int_operand (XEXP (op, 0), Pmode)") + (match_test "register_operand (XEXP (op, 1), Pmode)"))))) + + +;; Operand predicates. + +;; This checks a part of an address, the one that is not a plain register +;; for an addressing mode using BDAP. +;; Allowed operands are either: +;; a) a register +;; b) a CONST operand (but not a symbol when generating PIC) +;; c) a [r] or [r+] in SImode, or sign-extend from HI or QI. + +(define_predicate "cris_bdap_operand" + (ior (match_operand 0 "cris_bdap_const_operand") + (ior (match_operand:SI 0 "cris_simple_operand") + (match_operand:SI 0 "cris_bdap_sign_extend_operand")))) + +;; This is similar to cris_bdap_operand: +;; It checks a part of an address, the one that is not a plain register +;; for an addressing mode using BDAP or BIAP. +;; Allowed operands are either: +;; a) a register +;; b) a CONST operand (but not a symbol when generating PIC) +;; c) a mult of (1, 2 or 4) and a register +;; d) a [r] or [r+] in SImode, or sign-extend from HI or QI. */ + +(define_predicate "cris_bdap_biap_operand" + (ior (match_operand 0 "cris_bdap_operand") + (match_operand 0 "cris_biap_mult_operand"))) + +;; Since with -fPIC, not all symbols are valid PIC symbols or indeed +;; general_operands, we have to have a predicate that matches it for the +;; "movsi" expander. +;; FIXME: Can s/special_// when PR 20413 is fixed. + +(define_special_predicate "cris_general_operand_or_symbol" + (ior (match_operand 0 "general_operand") + (and (match_code "const, symbol_ref, label_ref") + ; The following test is actually just an assertion. + (match_test "cris_pic_symbol_type_of (op) != cris_no_symbol")))) + +;; A predicate for the anon movsi expansion, one that fits a PCREL +;; operand as well as general_operand. + +(define_special_predicate "cris_general_operand_or_pic_source" + (ior (match_operand 0 "general_operand") + (and (match_test "flag_pic") + (match_test "cris_valid_pic_const (op, false)")))) + +;; Since a PLT symbol is not a general_operand, we have to have a +;; predicate that matches it when we need it. We use this in the expanded +;; "call" and "call_value" anonymous patterns. + +(define_predicate "cris_nonmemory_operand_or_callable_symbol" + (ior (match_operand 0 "nonmemory_operand") + (and (match_code "const") + (and + (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC") + (ior + (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PLT_PCREL") + (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PCREL")))))) + +;; This matches a (MEM (general_operand)) or +;; (MEM (cris_general_operand_or_symbol)). The second one isn't a valid +;; memory_operand, so we need this predicate to recognize call +;; destinations before we change them to a PLT operand (by wrapping in +;; UNSPEC CRIS_UNSPEC_PLT). + +(define_predicate "cris_mem_call_operand" + (and (match_code "mem") + (ior (match_operand 0 "memory_operand") + (match_test "cris_general_operand_or_symbol (XEXP (op, 0), + Pmode)")))) diff --git a/gcc/config/cris/t-cris b/gcc/config/cris/t-cris new file mode 100644 index 000000000..1630acbcc --- /dev/null +++ b/gcc/config/cris/t-cris @@ -0,0 +1,58 @@ +# +# t-cris +# +# The Makefile fragment to include when compiling gcc et al for CRIS. +# +# Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . +# +# The makefile macros etc. are included in the order found in the +# section "Target Fragment" in the gcc info-files (or the paper copy) of +# "Using and Porting GCC" + +LIB2FUNCS_EXTRA = _udivsi3.c _divsi3.c _umodsi3.c _modsi3.c +CRIS_LIB1CSRC = $(srcdir)/config/cris/arit.c + +FPBIT = tmplibgcc_fp_bit.c +DPBIT = dp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' > dp-bit.c + cat $(srcdir)/config/fp-bit.c >> dp-bit.c + +# Use another name to avoid confusing SUN make, if support for +# it is reinstated elsewhere. Prefixed with "tmplibgcc" means +# "make clean" will wipe it. We define a few L_ thingies +# because we can't select them individually through FPBIT_FUNCS; +# see above. +tmplibgcc_fp_bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT_BIT_ORDER_MISMATCH' > $@ + echo '#define FLOAT' >> $@ + cat $(srcdir)/config/fp-bit.c >> $@ + +# The fixed-point arithmetic code is in one file, arit.c, +# similar to libgcc2.c (or the old libgcc1.c). We need to +# "split it up" with one file per define. +$(LIB2FUNCS_EXTRA): $(CRIS_LIB1CSRC) + name=`echo $@ | sed -e 's,.*/,,' | sed -e 's,.c$$,,'`; \ + echo "#define L$$name" > tmp-$@ \ + && echo '#include "$<"' >> tmp-$@ \ + && mv -f tmp-$@ $@ + +$(out_object_file): gt-cris.h +gt-cris.h : s-gtype ; @true diff --git a/gcc/config/cris/t-elfmulti b/gcc/config/cris/t-elfmulti new file mode 100644 index 000000000..8d4dfea4e --- /dev/null +++ b/gcc/config/cris/t-elfmulti @@ -0,0 +1,34 @@ +# Copyright (C) 2001, 2007 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/cris/mulsi3.asm +MULTILIB_OPTIONS = march=v10/march=v32 +MULTILIB_DIRNAMES = v10 v32 +MULTILIB_MATCHES = \ + march?v10=mcpu?etrax100lx \ + march?v10=mcpu?ng \ + march?v10=march?etrax100lx \ + march?v10=march?ng \ + march?v10=march?v11 \ + march?v10=mcpu?v11 \ + march?v10=mcpu?v10 \ + march?v32=mcpu?v32 +MULTILIB_EXTRA_OPTS = mbest-lib-options +INSTALL_LIBGCC = install-multilib +LIBGCC = stmp-multilib +CRTSTUFF_T_CFLAGS = $(LIBGCC2_CFLAGS) -moverride-best-lib-options diff --git a/gcc/config/cris/t-linux b/gcc/config/cris/t-linux new file mode 100644 index 000000000..96e861a42 --- /dev/null +++ b/gcc/config/cris/t-linux @@ -0,0 +1,9 @@ +TARGET_LIBGCC2_CFLAGS += -fPIC +CRTSTUFF_T_CFLAGS_S = $(TARGET_LIBGCC2_CFLAGS) +SHLIB_MAPFILES += $(srcdir)/config/cris/libgcc.ver + +# We *know* we have a limits.h in the glibc library, with extra +# definitions needed for e.g. libgfortran. +ifneq ($(inhibit_libc),true) +LIMITS_H_TEST = : +endif diff --git a/gcc/config/crx/crx-protos.h b/gcc/config/crx/crx-protos.h new file mode 100644 index 000000000..aeb4bdd59 --- /dev/null +++ b/gcc/config/crx/crx-protos.h @@ -0,0 +1,79 @@ +/* Prototypes for exported functions defined in crx.c + Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2007, 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_CRX_PROTOS_H +#define GCC_CRX_PROTOS_H + + +/* Register usage. */ +extern enum reg_class crx_regno_reg_class (int); +extern int crx_hard_regno_mode_ok (int regno, enum machine_mode); +#ifdef RTX_CODE +extern enum reg_class crx_secondary_reload_class (enum reg_class, enum machine_mode, rtx); +#endif /* RTX_CODE */ + +/* Passing function arguments. */ +extern int crx_function_arg_regno_p (int); +#ifdef TREE_CODE +#ifdef RTX_CODE +extern void crx_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx); +#endif /* RTX_CODE */ +#endif /* TREE_CODE */ + +#ifdef RTX_CODE +/* Addressing Modes. */ +struct crx_address +{ + rtx base, index, disp, side_effect; + int scale; +}; + +enum crx_addrtype +{ + CRX_INVALID, CRX_REG_REL, CRX_POST_INC, CRX_SCALED_INDX, CRX_ABSOLUTE +}; + +extern enum crx_addrtype crx_decompose_address (rtx addr, struct crx_address *out); + +extern int crx_const_double_ok (rtx op); + +/* Instruction output. */ +extern void crx_print_operand (FILE *, rtx, int); +extern void crx_print_operand_address (FILE *, rtx); + +/* Misc functions called from crx.md. */ +extern void crx_expand_movmem_single (rtx, rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT *); +extern int crx_expand_movmem (rtx, rtx, rtx, rtx); +#endif /* RTX_CODE */ + +/* Routines to compute costs. */ +extern int crx_memory_move_cost (enum machine_mode, enum reg_class, int); + +/* Prologue/Epilogue functions. */ +extern int crx_initial_elimination_offset (int, int); +extern char *crx_prepare_push_pop_string (int); +extern void crx_expand_prologue (void); +extern void crx_expand_epilogue (void); + + +/* Handling the "interrupt" attribute */ +extern int crx_interrupt_function_p (void); + +#endif /* GCC_CRX_PROTOS_H */ diff --git a/gcc/config/crx/crx.c b/gcc/config/crx/crx.c new file mode 100644 index 000000000..79d341c47 --- /dev/null +++ b/gcc/config/crx/crx.c @@ -0,0 +1,1466 @@ +/* Output routines for GCC for CRX. + Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/*****************************************************************************/ +/* HEADER INCLUDES */ +/*****************************************************************************/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-codes.h" +#include "insn-attr.h" +#include "flags.h" +#include "except.h" +#include "function.h" +#include "recog.h" +#include "expr.h" +#include "optabs.h" +#include "diagnostic-core.h" +#include "basic-block.h" +#include "df.h" +#include "target.h" +#include "target-def.h" + +/*****************************************************************************/ +/* DEFINITIONS */ +/*****************************************************************************/ + +/* Maximum number of register used for passing parameters. */ +#define MAX_REG_FOR_PASSING_ARGS 6 + +/* Minimum number register used for passing parameters. */ +#define MIN_REG_FOR_PASSING_ARGS 2 + +/* The maximum count of words supported in the assembly of the architecture in + * a push/pop instruction. */ +#define MAX_COUNT 8 + +/* Predicate is true if the current function is a 'noreturn' function, i.e. it + * is qualified as volatile. */ +#define FUNC_IS_NORETURN_P(decl) (TREE_THIS_VOLATILE (decl)) + +/* The following macros are used in crx_decompose_address () */ + +/* Returns the factor of a scaled index address or -1 if invalid. */ +#define SCALE_FOR_INDEX_P(X) \ + (GET_CODE (X) == CONST_INT ? \ + (INTVAL (X) == 1 ? 1 : \ + INTVAL (X) == 2 ? 2 : \ + INTVAL (X) == 4 ? 4 : \ + INTVAL (X) == 8 ? 8 : \ + -1) : \ + -1) + +/* Nonzero if the rtx X is a signed const int of n bits */ +#define RTX_SIGNED_INT_FITS_N_BITS(X,n) \ + ((GET_CODE (X) == CONST_INT \ + && SIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0) + +/* Nonzero if the rtx X is an unsigned const int of n bits. */ +#define RTX_UNSIGNED_INT_FITS_N_BITS(X, n) \ + ((GET_CODE (X) == CONST_INT \ + && UNSIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0) + +/*****************************************************************************/ +/* STATIC VARIABLES */ +/*****************************************************************************/ + +/* Nonzero if the last param processed is passed in a register. */ +static int last_parm_in_reg; + +/* Will hold the number of the last register the prologue saves, -1 if no + * register is saved. */ +static int last_reg_to_save; + +/* Each object in the array is a register number. Mark 1 for registers that + * need to be saved. */ +static int save_regs[FIRST_PSEUDO_REGISTER]; + +/* Number of bytes saved on the stack for non-scratch registers */ +static int sum_regs = 0; + +/* Number of bytes saved on the stack for local variables. */ +static int local_vars_size; + +/* The sum of 2 sizes: locals vars and padding byte for saving the registers. + * Used in expand_prologue () and expand_epilogue (). */ +static int size_for_adjusting_sp; + +/* In case of a POST_INC or POST_DEC memory reference, we must report the mode + * of the memory reference from PRINT_OPERAND to PRINT_OPERAND_ADDRESS. */ +static enum machine_mode output_memory_reference_mode; + +/*****************************************************************************/ +/* TARGETM FUNCTION PROTOTYPES */ +/*****************************************************************************/ + +static bool crx_fixed_condition_code_regs (unsigned int *, unsigned int *); +static rtx crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED); +static bool crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED); +static int crx_address_cost (rtx, bool); +static bool crx_legitimate_address_p (enum machine_mode, rtx, bool); +static bool crx_can_eliminate (const int, const int); +static rtx crx_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static void crx_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); + +/*****************************************************************************/ +/* RTL VALIDITY */ +/*****************************************************************************/ + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P crx_legitimate_address_p + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE crx_can_eliminate + +/*****************************************************************************/ +/* STACK LAYOUT AND CALLING CONVENTIONS */ +/*****************************************************************************/ + +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS crx_fixed_condition_code_regs + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX crx_struct_value_rtx + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY crx_return_in_memory + +/*****************************************************************************/ +/* PASSING FUNCTION ARGUMENTS */ +/*****************************************************************************/ + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG crx_function_arg + +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE crx_function_arg_advance + +/*****************************************************************************/ +/* RELATIVE COSTS OF OPERATIONS */ +/*****************************************************************************/ + +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST crx_address_cost + +/*****************************************************************************/ +/* TARGET-SPECIFIC USES OF `__attribute__' */ +/*****************************************************************************/ + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE crx_attribute_table + +static const struct attribute_spec crx_attribute_table[] = { + /* ISRs have special prologue and epilogue requirements. */ + {"interrupt", 0, 0, false, true, true, NULL}, + {NULL, 0, 0, false, false, false, NULL} +}; + +/* Option handling. */ + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE crx_option_optimization_table + +static const struct default_options crx_option_optimization_table[] = + { + /* Put each function in its own section so that PAGE-instruction + relaxation can do its best. */ + { OPT_LEVELS_1_PLUS, OPT_ffunction_sections, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Initialize 'targetm' variable which contains pointers to functions and data + * relating to the target machine. */ + +struct gcc_target targetm = TARGET_INITIALIZER; + + +/*****************************************************************************/ +/* TARGET HOOK IMPLEMENTATIONS */ +/*****************************************************************************/ + +/* Return the fixed registers used for condition codes. */ + +static bool +crx_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) +{ + *p1 = CC_REGNUM; + *p2 = INVALID_REGNUM; + return true; +} + +/* Implements hook TARGET_STRUCT_VALUE_RTX. */ + +static rtx +crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, CRX_STRUCT_VALUE_REGNUM); +} + +/* Implements hook TARGET_RETURN_IN_MEMORY. */ + +static bool +crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (TYPE_MODE (type) == BLKmode) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + return (size == -1 || size > 8); + } + else + return false; +} + + +/*****************************************************************************/ +/* MACRO IMPLEMENTATIONS */ +/*****************************************************************************/ + +/* STACK LAYOUT AND CALLING CONVENTIONS ROUTINES */ +/* --------------------------------------------- */ + +/* Return nonzero if the current function being compiled is an interrupt + * function as specified by the "interrupt" attribute. */ + +int +crx_interrupt_function_p (void) +{ + tree attributes; + + attributes = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + return lookup_attribute ("interrupt", attributes) != NULL_TREE; +} + +/* Compute values for the array save_regs and the variable sum_regs. The index + * of save_regs is numbers of register, each will get 1 if we need to save it + * in the current function, 0 if not. sum_regs is the total sum of the + * registers being saved. */ + +static void +crx_compute_save_regs (void) +{ + unsigned int regno; + + /* initialize here so in case the function is no-return it will be -1. */ + last_reg_to_save = -1; + + /* No need to save any registers if the function never returns. */ + if (FUNC_IS_NORETURN_P (current_function_decl)) + return; + + /* Initialize the number of bytes to be saved. */ + sum_regs = 0; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + { + if (fixed_regs[regno]) + { + save_regs[regno] = 0; + continue; + } + + /* If this reg is used and not call-used (except RA), save it. */ + if (crx_interrupt_function_p ()) + { + if (!current_function_is_leaf && call_used_regs[regno]) + /* this is a volatile reg in a non-leaf interrupt routine - save it + * for the sake of its sons. */ + save_regs[regno] = 1; + + else if (df_regs_ever_live_p (regno)) + /* This reg is used - save it. */ + save_regs[regno] = 1; + else + /* This reg is not used, and is not a volatile - don't save. */ + save_regs[regno] = 0; + } + else + { + /* If this reg is used and not call-used (except RA), save it. */ + if (df_regs_ever_live_p (regno) + && (!call_used_regs[regno] || regno == RETURN_ADDRESS_REGNUM)) + save_regs[regno] = 1; + else + save_regs[regno] = 0; + } + } + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (save_regs[regno] == 1) + { + last_reg_to_save = regno; + sum_regs += UNITS_PER_WORD; + } +} + +/* Compute the size of the local area and the size to be adjusted by the + * prologue and epilogue. */ + +static void +crx_compute_frame (void) +{ + /* For aligning the local variables. */ + int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT; + int padding_locals; + + /* Padding needed for each element of the frame. */ + local_vars_size = get_frame_size (); + + /* Align to the stack alignment. */ + padding_locals = local_vars_size % stack_alignment; + if (padding_locals) + padding_locals = stack_alignment - padding_locals; + + local_vars_size += padding_locals; + + size_for_adjusting_sp = local_vars_size + (ACCUMULATE_OUTGOING_ARGS ? + crtl->outgoing_args_size : 0); +} + +/* Worker function for TARGET_CAN_ELIMINATE. */ + +bool +crx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true); +} + +/* Implements the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ + +int +crx_initial_elimination_offset (int from, int to) +{ + /* Compute this since we need to use sum_regs. */ + crx_compute_save_regs (); + + /* Compute this since we need to use local_vars_size. */ + crx_compute_frame (); + + if ((from) == FRAME_POINTER_REGNUM && (to) == STACK_POINTER_REGNUM) + return (ACCUMULATE_OUTGOING_ARGS ? + crtl->outgoing_args_size : 0); + else if ((from) == ARG_POINTER_REGNUM && (to) == FRAME_POINTER_REGNUM) + return (sum_regs + local_vars_size); + else if ((from) == ARG_POINTER_REGNUM && (to) == STACK_POINTER_REGNUM) + return (sum_regs + local_vars_size + + (ACCUMULATE_OUTGOING_ARGS ? + crtl->outgoing_args_size : 0)); + else + abort (); +} + +/* REGISTER USAGE */ +/* -------------- */ + +/* Return the class number of the smallest class containing reg number REGNO. + * This could be a conditional expression or could index an array. */ + +enum reg_class +crx_regno_reg_class (int regno) +{ + if (regno >= 0 && regno < SP_REGNUM) + return NOSP_REGS; + + if (regno == SP_REGNUM) + return GENERAL_REGS; + + if (regno == LO_REGNUM) + return LO_REGS; + if (regno == HI_REGNUM) + return HI_REGS; + + return NO_REGS; +} + +/* Transfer between HILO_REGS and memory via secondary reloading. */ + +enum reg_class +crx_secondary_reload_class (enum reg_class rclass, + enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED) +{ + if (reg_classes_intersect_p (rclass, HILO_REGS) + && true_regnum (x) == -1) + return GENERAL_REGS; + + return NO_REGS; +} + +/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ + +int +crx_hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* CC can only hold CCmode values. */ + if (regno == CC_REGNUM) + return GET_MODE_CLASS (mode) == MODE_CC; + if (GET_MODE_CLASS (mode) == MODE_CC) + return 0; + /* HILO registers can only hold SImode and DImode */ + if (HILO_REGNO_P (regno)) + return mode == SImode || mode == DImode; + return 1; +} + +/* PASSING FUNCTION ARGUMENTS */ +/* -------------------------- */ + +/* If enough param regs are available for passing the param of type TYPE return + * the number of registers needed else 0. */ + +static int +enough_regs_for_param (CUMULATIVE_ARGS * cum, const_tree type, + enum machine_mode mode) +{ + int type_size; + int remaining_size; + + if (mode != BLKmode) + type_size = GET_MODE_BITSIZE (mode); + else + type_size = int_size_in_bytes (type) * BITS_PER_UNIT; + + remaining_size = + BITS_PER_WORD * (MAX_REG_FOR_PASSING_ARGS - + (MIN_REG_FOR_PASSING_ARGS + cum->ints) + 1); + + /* Any variable which is too big to pass in two registers, will pass on + * stack. */ + if ((remaining_size >= type_size) && (type_size <= 2 * BITS_PER_WORD)) + return (type_size + BITS_PER_WORD - 1) / BITS_PER_WORD; + + return 0; +} + +/* Implements TARGET_FUNCTION_ARG. */ + +static rtx +crx_function_arg (CUMULATIVE_ARGS * cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + last_parm_in_reg = 0; + + /* Function_arg () is called with this type just after all the args have had + * their registers assigned. The rtx that function_arg returns from this type + * is supposed to pass to 'gen_call' but currently it is not implemented (see + * macro GEN_CALL). */ + if (type == void_type_node) + return NULL_RTX; + + if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0)) + return NULL_RTX; + + if (mode == BLKmode) + { + /* Enable structures that need padding bytes at the end to pass to a + * function in registers. */ + if (enough_regs_for_param (cum, type, mode) != 0) + { + last_parm_in_reg = 1; + return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints); + } + } + + if (MIN_REG_FOR_PASSING_ARGS + cum->ints > MAX_REG_FOR_PASSING_ARGS) + return NULL_RTX; + else + { + if (enough_regs_for_param (cum, type, mode) != 0) + { + last_parm_in_reg = 1; + return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints); + } + } + + return NULL_RTX; +} + +/* Implements the macro INIT_CUMULATIVE_ARGS defined in crx.h. */ + +void +crx_init_cumulative_args (CUMULATIVE_ARGS * cum, tree fntype, + rtx libfunc ATTRIBUTE_UNUSED) +{ + tree param, next_param; + + cum->ints = 0; + + /* Determine if this function has variable arguments. This is indicated by + * the last argument being 'void_type_mode' if there are no variable + * arguments. Change here for a different vararg. */ + for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; + param != (tree) 0; param = next_param) + { + next_param = TREE_CHAIN (param); + if (next_param == (tree) 0 && TREE_VALUE (param) != void_type_node) + { + cum->ints = -1; + return; + } + } +} + +/* Implements TARGET_FUNCTION_ARG_ADVANCE. */ + +static void +crx_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + /* l holds the number of registers required */ + int l = GET_MODE_BITSIZE (mode) / BITS_PER_WORD; + + /* If the parameter isn't passed on a register don't advance cum. */ + if (!last_parm_in_reg) + return; + + if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0)) + return; + + if (mode == SImode || mode == HImode || mode == QImode || mode == DImode) + { + if (l <= 1) + cum->ints += 1; + else + cum->ints += l; + } + else if (mode == SFmode || mode == DFmode) + cum->ints += l; + else if ((mode) == BLKmode) + { + if ((l = enough_regs_for_param (cum, type, mode)) != 0) + cum->ints += l; + } + +} + +/* Implements the macro FUNCTION_ARG_REGNO_P defined in crx.h. Return nonzero + * if N is a register used for passing parameters. */ + +int +crx_function_arg_regno_p (int n) +{ + return (n <= MAX_REG_FOR_PASSING_ARGS && n >= MIN_REG_FOR_PASSING_ARGS); +} + +/* ADDRESSING MODES */ +/* ---------------- */ + +/* Implements the hook for TARGET_LEGITIMATE_ADDRESS_P defined in crx.h. + * The following addressing modes are supported on CRX: + * + * Relocations --> const | symbol_ref | label_ref + * Absolute address --> 32-bit absolute + * Post increment --> reg + 12-bit disp. + * Post modify --> reg + 12-bit disp. + * Register relative --> reg | 32-bit disp. + reg | 4 bit + reg + * Scaled index --> reg + reg | 22-bit disp. + reg + reg | + * 22-disp. + reg + reg + (2 | 4 | 8) */ + +static rtx +crx_addr_reg (rtx addr_reg) +{ + if (GET_MODE (addr_reg) != Pmode) + return NULL_RTX; + + if (REG_P (addr_reg)) + return addr_reg; + else if (GET_CODE (addr_reg) == SUBREG + && REG_P (SUBREG_REG (addr_reg)) + && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (addr_reg))) + <= UNITS_PER_WORD)) + return SUBREG_REG (addr_reg); + else + return NULL_RTX; +} + +enum crx_addrtype +crx_decompose_address (rtx addr, struct crx_address *out) +{ + rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; + rtx scale_rtx = NULL_RTX, side_effect = NULL_RTX; + int scale = -1; + + enum crx_addrtype retval = CRX_INVALID; + + switch (GET_CODE (addr)) + { + case CONST_INT: + /* Absolute address (known at compile time) */ + retval = CRX_ABSOLUTE; + disp = addr; + if (!UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), GET_MODE_BITSIZE (Pmode))) + return CRX_INVALID; + break; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + /* Absolute address (known at link time) */ + retval = CRX_ABSOLUTE; + disp = addr; + break; + + case REG: + case SUBREG: + /* Register relative address */ + retval = CRX_REG_REL; + base = addr; + break; + + case PLUS: + switch (GET_CODE (XEXP (addr, 0))) + { + case REG: + case SUBREG: + if (REG_P (XEXP (addr, 1))) + { + /* Scaled index with scale = 1 and disp. = 0 */ + retval = CRX_SCALED_INDX; + base = XEXP (addr, 1); + index = XEXP (addr, 0); + scale = 1; + } + else if (RTX_SIGNED_INT_FITS_N_BITS (XEXP (addr, 1), 28)) + { + /* Register relative address and <= 28-bit disp. */ + retval = CRX_REG_REL; + base = XEXP (addr, 0); + disp = XEXP (addr, 1); + } + else + return CRX_INVALID; + break; + + case PLUS: + /* Scaled index and <= 22-bit disp. */ + retval = CRX_SCALED_INDX; + base = XEXP (XEXP (addr, 0), 1); + disp = XEXP (addr, 1); + if (!RTX_SIGNED_INT_FITS_N_BITS (disp, 22)) + return CRX_INVALID; + switch (GET_CODE (XEXP (XEXP (addr, 0), 0))) + { + case REG: + /* Scaled index with scale = 0 and <= 22-bit disp. */ + index = XEXP (XEXP (addr, 0), 0); + scale = 1; + break; + + case MULT: + /* Scaled index with scale >= 0 and <= 22-bit disp. */ + index = XEXP (XEXP (XEXP (addr, 0), 0), 0); + scale_rtx = XEXP (XEXP (XEXP (addr, 0), 0), 1); + if ((scale = SCALE_FOR_INDEX_P (scale_rtx)) == -1) + return CRX_INVALID; + break; + + default: + return CRX_INVALID; + } + break; + + case MULT: + /* Scaled index with scale >= 0 */ + retval = CRX_SCALED_INDX; + base = XEXP (addr, 1); + index = XEXP (XEXP (addr, 0), 0); + scale_rtx = XEXP (XEXP (addr, 0), 1); + /* Scaled index with scale >= 0 and <= 22-bit disp. */ + if ((scale = SCALE_FOR_INDEX_P (scale_rtx)) == -1) + return CRX_INVALID; + break; + + default: + return CRX_INVALID; + } + break; + + case POST_INC: + case POST_DEC: + /* Simple post-increment */ + retval = CRX_POST_INC; + base = XEXP (addr, 0); + side_effect = addr; + break; + + case POST_MODIFY: + /* Generic post-increment with <= 12-bit disp. */ + retval = CRX_POST_INC; + base = XEXP (addr, 0); + side_effect = XEXP (addr, 1); + if (base != XEXP (side_effect, 0)) + return CRX_INVALID; + switch (GET_CODE (side_effect)) + { + case PLUS: + case MINUS: + disp = XEXP (side_effect, 1); + if (!RTX_SIGNED_INT_FITS_N_BITS (disp, 12)) + return CRX_INVALID; + break; + + default: + /* CRX only supports PLUS and MINUS */ + return CRX_INVALID; + } + break; + + default: + return CRX_INVALID; + } + + if (base) + { + base = crx_addr_reg (base); + if (!base) + return CRX_INVALID; + } + if (index) + { + index = crx_addr_reg (index); + if (!index) + return CRX_INVALID; + } + + out->base = base; + out->index = index; + out->disp = disp; + out->scale = scale; + out->side_effect = side_effect; + + return retval; +} + +bool +crx_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx addr, bool strict) +{ + enum crx_addrtype addrtype; + struct crx_address address; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", + GET_MODE_NAME (mode), strict); + debug_rtx (addr); + } + + addrtype = crx_decompose_address (addr, &address); + + if (addrtype == CRX_POST_INC && GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return FALSE; + + if (TARGET_DEBUG_ADDR) + { + const char *typestr; + switch (addrtype) + { + case CRX_INVALID: + typestr = "Invalid"; + break; + case CRX_REG_REL: + typestr = "Register relative"; + break; + case CRX_POST_INC: + typestr = "Post-increment"; + break; + case CRX_SCALED_INDX: + typestr = "Scaled index"; + break; + case CRX_ABSOLUTE: + typestr = "Absolute"; + break; + default: + abort (); + } + fprintf (stderr, "CRX Address type: %s\n", typestr); + } + + if (addrtype == CRX_INVALID) + return FALSE; + + if (strict) + { + if (address.base && !REGNO_OK_FOR_BASE_P (REGNO (address.base))) + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "Base register not strict\n"); + return FALSE; + } + if (address.index && !REGNO_OK_FOR_INDEX_P (REGNO (address.index))) + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "Index register not strict\n"); + return FALSE; + } + } + + return TRUE; +} + +/* ROUTINES TO COMPUTE COSTS */ +/* ------------------------- */ + +/* Return cost of the memory address x. */ + +static int +crx_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED) +{ + enum crx_addrtype addrtype; + struct crx_address address; + + int cost = 2; + + addrtype = crx_decompose_address (addr, &address); + + gcc_assert (addrtype != CRX_INVALID); + + /* An absolute address causes a 3-word instruction */ + if (addrtype == CRX_ABSOLUTE) + cost+=2; + + /* Post-modifying addresses are more powerful. */ + if (addrtype == CRX_POST_INC) + cost-=2; + + /* Attempt to minimize number of registers in the address. */ + if (address.base) + cost++; + + if (address.index && address.scale == 1) + cost+=5; + + if (address.disp && !INT_CST4 (INTVAL (address.disp))) + cost+=2; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n======\nTARGET_ADDRESS_COST = %d\n", cost); + debug_rtx (addr); + } + + return cost; +} + +/* Return the cost of moving data of mode MODE between a register of class + * RCLASS and memory; IN is zero if the value is to be written to memory, + * nonzero if it is to be read in. This cost is relative to those in + * REGISTER_MOVE_COST. */ + +int +crx_memory_move_cost (enum machine_mode mode, + enum reg_class rclass ATTRIBUTE_UNUSED, + int in ATTRIBUTE_UNUSED) +{ + /* One LD or ST takes twice the time of a simple reg-reg move */ + if (reg_classes_intersect_p (rclass, GENERAL_REGS)) + { + /* printf ("GENERAL_REGS LD/ST = %d\n", 4 * HARD_REGNO_NREGS (0, mode));*/ + return 4 * HARD_REGNO_NREGS (0, mode); + } + else if (reg_classes_intersect_p (rclass, HILO_REGS)) + { + /* HILO to memory and vice versa */ + /* printf ("HILO_REGS %s = %d\n", in ? "LD" : "ST", + (REGISTER_MOVE_COST (mode, + in ? GENERAL_REGS : HILO_REGS, + in ? HILO_REGS : GENERAL_REGS) + 4) + * HARD_REGNO_NREGS (0, mode)); */ + return (REGISTER_MOVE_COST (mode, + in ? GENERAL_REGS : HILO_REGS, + in ? HILO_REGS : GENERAL_REGS) + 4) + * HARD_REGNO_NREGS (0, mode); + } + else /* default (like in i386) */ + { + /* printf ("ANYREGS = 100\n"); */ + return 100; + } +} + +/* INSTRUCTION OUTPUT */ +/* ------------------ */ + +/* Check if a const_double is ok for crx store-immediate instructions */ + +int +crx_const_double_ok (rtx op) +{ + if (GET_MODE (op) == DFmode) + { + REAL_VALUE_TYPE r; + long l[2]; + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + REAL_VALUE_TO_TARGET_DOUBLE (r, l); + return (UNSIGNED_INT_FITS_N_BITS (l[0], 4) && + UNSIGNED_INT_FITS_N_BITS (l[1], 4)) ? 1 : 0; + } + + if (GET_MODE (op) == SFmode) + { + REAL_VALUE_TYPE r; + long l; + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + return UNSIGNED_INT_FITS_N_BITS (l, 4) ? 1 : 0; + } + + return (UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_LOW (op), 4) && + UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_HIGH (op), 4)) ? 1 : 0; +} + +/* Implements the macro PRINT_OPERAND defined in crx.h. */ + +void +crx_print_operand (FILE * file, rtx x, int code) +{ + switch (code) + { + case 'p' : + if (GET_CODE (x) == REG) { + if (GET_MODE (x) == DImode || GET_MODE (x) == DFmode) + { + int regno = REGNO (x); + if (regno + 1 >= SP_REGNUM) abort (); + fprintf (file, "{%s, %s}", reg_names[regno], reg_names[regno + 1]); + return; + } + else + { + if (REGNO (x) >= SP_REGNUM) abort (); + fprintf (file, "%s", reg_names[REGNO (x)]); + return; + } + } + + case 'd' : + { + const char *crx_cmp_str; + switch (GET_CODE (x)) + { /* MD: compare (reg, reg or imm) but CRX: cmp (reg or imm, reg) + * -> swap all non symmetric ops */ + case EQ : crx_cmp_str = "eq"; break; + case NE : crx_cmp_str = "ne"; break; + case GT : crx_cmp_str = "lt"; break; + case GTU : crx_cmp_str = "lo"; break; + case LT : crx_cmp_str = "gt"; break; + case LTU : crx_cmp_str = "hi"; break; + case GE : crx_cmp_str = "le"; break; + case GEU : crx_cmp_str = "ls"; break; + case LE : crx_cmp_str = "ge"; break; + case LEU : crx_cmp_str = "hs"; break; + default : abort (); + } + fprintf (file, "%s", crx_cmp_str); + return; + } + + case 'H': + /* Print high part of a double precision value. */ + switch (GET_CODE (x)) + { + case CONST_DOUBLE: + if (GET_MODE (x) == SFmode) abort (); + if (GET_MODE (x) == DFmode) + { + /* High part of a DF const. */ + REAL_VALUE_TYPE r; + long l[2]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + REAL_VALUE_TO_TARGET_DOUBLE (r, l); + + fprintf (file, "$0x%lx", l[1]); + return; + } + + /* -- Fallthrough to handle DI consts -- */ + + case CONST_INT: + { + rtx high, low; + split_double (x, &low, &high); + putc ('$', file); + output_addr_const (file, high); + return; + } + + case REG: + if (REGNO (x) + 1 >= FIRST_PSEUDO_REGISTER) abort (); + fprintf (file, "%s", reg_names[REGNO (x) + 1]); + return; + + case MEM: + /* Adjust memory address to high part. */ + { + rtx adj_mem = x; + adj_mem = adjust_address (adj_mem, GET_MODE (adj_mem), 4); + + output_memory_reference_mode = GET_MODE (adj_mem); + output_address (XEXP (adj_mem, 0)); + return; + } + + default: + abort (); + } + + case 'L': + /* Print low part of a double precision value. */ + switch (GET_CODE (x)) + { + case CONST_DOUBLE: + if (GET_MODE (x) == SFmode) abort (); + if (GET_MODE (x) == DFmode) + { + /* High part of a DF const. */ + REAL_VALUE_TYPE r; + long l[2]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + REAL_VALUE_TO_TARGET_DOUBLE (r, l); + + fprintf (file, "$0x%lx", l[0]); + return; + } + + /* -- Fallthrough to handle DI consts -- */ + + case CONST_INT: + { + rtx high, low; + split_double (x, &low, &high); + putc ('$', file); + output_addr_const (file, low); + return; + } + + case REG: + fprintf (file, "%s", reg_names[REGNO (x)]); + return; + + case MEM: + output_memory_reference_mode = GET_MODE (x); + output_address (XEXP (x, 0)); + return; + + default: + abort (); + } + + case 0 : /* default */ + switch (GET_CODE (x)) + { + case REG: + fprintf (file, "%s", reg_names[REGNO (x)]); + return; + + case MEM: + output_memory_reference_mode = GET_MODE (x); + output_address (XEXP (x, 0)); + return; + + case CONST_DOUBLE: + { + REAL_VALUE_TYPE r; + long l; + + /* Always use H and L for double precision - see above */ + gcc_assert (GET_MODE (x) == SFmode); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + + fprintf (file, "$0x%lx", l); + return; + } + + default: + putc ('$', file); + output_addr_const (file, x); + return; + } + + default: + output_operand_lossage ("invalid %%xn code"); + } + + abort (); +} + +/* Implements the macro PRINT_OPERAND_ADDRESS defined in crx.h. */ + +void +crx_print_operand_address (FILE * file, rtx addr) +{ + enum crx_addrtype addrtype; + struct crx_address address; + + int offset; + + addrtype = crx_decompose_address (addr, &address); + + if (address.disp) + offset = INTVAL (address.disp); + else + offset = 0; + + switch (addrtype) + { + case CRX_REG_REL: + fprintf (file, "%d(%s)", offset, reg_names[REGNO (address.base)]); + return; + + case CRX_POST_INC: + switch (GET_CODE (address.side_effect)) + { + case PLUS: + break; + case MINUS: + offset = -offset; + break; + case POST_INC: + offset = GET_MODE_SIZE (output_memory_reference_mode); + break; + case POST_DEC: + offset = -GET_MODE_SIZE (output_memory_reference_mode); + break; + default: + abort (); + } + fprintf (file, "%d(%s)+", offset, reg_names[REGNO (address.base)]); + return; + + case CRX_SCALED_INDX: + fprintf (file, "%d(%s, %s, %d)", offset, reg_names[REGNO (address.base)], + reg_names[REGNO (address.index)], address.scale); + return; + + case CRX_ABSOLUTE: + output_addr_const (file, address.disp); + return; + + default: + abort (); + } +} + + +/*****************************************************************************/ +/* MACHINE DESCRIPTION HELPER-FUNCTIONS */ +/*****************************************************************************/ + +void crx_expand_movmem_single (rtx src, rtx srcbase, rtx dst, rtx dstbase, + rtx tmp_reg, unsigned HOST_WIDE_INT *offset_p) +{ + rtx addr, mem; + unsigned HOST_WIDE_INT offset = *offset_p; + + /* Load */ + addr = plus_constant (src, offset); + mem = adjust_automodify_address (srcbase, SImode, addr, offset); + emit_move_insn (tmp_reg, mem); + + /* Store */ + addr = plus_constant (dst, offset); + mem = adjust_automodify_address (dstbase, SImode, addr, offset); + emit_move_insn (mem, tmp_reg); + + *offset_p = offset + 4; +} + +int +crx_expand_movmem (rtx dstbase, rtx srcbase, rtx count_exp, rtx align_exp) +{ + unsigned HOST_WIDE_INT count = 0, offset, si_moves, i; + HOST_WIDE_INT align = 0; + + rtx src, dst; + rtx tmp_reg; + + if (GET_CODE (align_exp) == CONST_INT) + { /* Only if aligned */ + align = INTVAL (align_exp); + if (align & 3) + return 0; + } + + if (GET_CODE (count_exp) == CONST_INT) + { /* No more than 16 SImode moves */ + count = INTVAL (count_exp); + if (count > 64) + return 0; + } + + tmp_reg = gen_reg_rtx (SImode); + + /* Create psrs for the src and dest pointers */ + dst = copy_to_mode_reg (Pmode, XEXP (dstbase, 0)); + if (dst != XEXP (dstbase, 0)) + dstbase = replace_equiv_address_nv (dstbase, dst); + src = copy_to_mode_reg (Pmode, XEXP (srcbase, 0)); + if (src != XEXP (srcbase, 0)) + srcbase = replace_equiv_address_nv (srcbase, src); + + offset = 0; + + /* Emit SImode moves */ + si_moves = count >> 2; + for (i = 0; i < si_moves; i++) + crx_expand_movmem_single (src, srcbase, dst, dstbase, tmp_reg, &offset); + + /* Special cases */ + if (count & 3) + { + offset = count - 4; + crx_expand_movmem_single (src, srcbase, dst, dstbase, tmp_reg, &offset); + } + + gcc_assert (offset == count); + + return 1; +} + +static void +mpushpop_str (char *stringbuffer, const char *mnemonic, char *mask) +{ + if (strlen (mask) > 2 || crx_interrupt_function_p ()) /* needs 2-word instr. */ + sprintf (stringbuffer, "\n\t%s\tsp, {%s}", mnemonic, mask); + else /* single word instruction */ + sprintf (stringbuffer, "\n\t%s\t%s", mnemonic, mask); +} + +/* Called from crx.md. The return value depends on the parameter push_or_pop: + * When push_or_pop is zero -> string for push instructions of prologue. + * When push_or_pop is nonzero -> string for pop/popret/retx in epilogue. + * Relies on the assumptions: + * 1. RA is the last register to be saved. + * 2. The maximal value of the counter is MAX_COUNT. */ + +char * +crx_prepare_push_pop_string (int push_or_pop) +{ + /* j is the number of registers being saved, takes care that there won't be + * more than 8 in one push/pop instruction */ + + /* For the register mask string */ + static char mask_str[50]; + + /* i is the index of save_regs[], going from 0 until last_reg_to_save */ + int i = 0; + + int ra_in_bitmask = 0; + + char *return_str; + + /* For reversing on the push instructions if there are more than one. */ + char *temp_str; + + return_str = (char *) xmalloc (120); + temp_str = (char *) xmalloc (120); + + /* Initialize */ + memset (return_str, 0, 3); + + while (i <= last_reg_to_save) + { + /* Prepare mask for one instruction. */ + mask_str[0] = 0; + + if (i <= SP_REGNUM) + { /* Add regs unit full or SP register reached */ + int j = 0; + while (j < MAX_COUNT && i <= SP_REGNUM) + { + if (save_regs[i]) + { + /* TODO to use ra_in_bitmask for detecting last pop is not + * smart it prevents things like: popret r5 */ + if (i == RETURN_ADDRESS_REGNUM) ra_in_bitmask = 1; + if (j > 0) strcat (mask_str, ", "); + strcat (mask_str, reg_names[i]); + ++j; + } + ++i; + } + } + else + { + /* Handle hi/lo savings */ + while (i <= last_reg_to_save) + { + if (save_regs[i]) + { + strcat (mask_str, "lo, hi"); + i = last_reg_to_save + 1; + break; + } + ++i; + } + } + + if (strlen (mask_str) == 0) continue; + + if (push_or_pop == 1) + { + if (crx_interrupt_function_p ()) + mpushpop_str (temp_str, "popx", mask_str); + else + { + if (ra_in_bitmask) + { + mpushpop_str (temp_str, "popret", mask_str); + ra_in_bitmask = 0; + } + else mpushpop_str (temp_str, "pop", mask_str); + } + + strcat (return_str, temp_str); + } + else + { + /* push - We need to reverse the order of the instructions if there + * are more than one. (since the pop will not be reversed in the + * epilogue */ + if (crx_interrupt_function_p ()) + mpushpop_str (temp_str, "pushx", mask_str); + else + mpushpop_str (temp_str, "push", mask_str); + strcat (temp_str, return_str); + strcpy (strcat (return_str, "\t"), temp_str); + } + + } + + if (push_or_pop == 1) + { + /* pop */ + if (crx_interrupt_function_p ()) + strcat (return_str, "\n\tretx\n"); + + else if (!FUNC_IS_NORETURN_P (current_function_decl) + && !save_regs[RETURN_ADDRESS_REGNUM]) + strcat (return_str, "\n\tjump\tra\n"); + } + + /* Skip the newline and the tab in the start of return_str. */ + return_str += 2; + return return_str; +} + +/* CompactRISC CRX Architecture stack layout: + + 0 +--------------------- + | + . + . + | + +==================== Sp(x)=Ap(x+1) + A | Args for functions + | | called by X and Dynamically + | | Dynamic allocations allocated and + | | (alloca, variable deallocated + Stack | length arrays). + grows +-------------------- Fp(x) + down| | Local variables of X + ward| +-------------------- + | | Regs saved for X-1 + | +==================== Sp(x-1)=Ap(x) + | Args for func X + | pushed by X-1 + +-------------------- Fp(x-1) + | + | + V + +*/ + +void +crx_expand_prologue (void) +{ + crx_compute_frame (); + crx_compute_save_regs (); + + /* If there is no need in push and adjustment to sp, return. */ + if (size_for_adjusting_sp + sum_regs == 0) + return; + + if (last_reg_to_save != -1) + /* If there are registers to push. */ + emit_insn (gen_push_for_prologue (GEN_INT (sum_regs))); + + if (size_for_adjusting_sp > 0) + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-size_for_adjusting_sp))); + + if (frame_pointer_needed) + /* Initialize the frame pointer with the value of the stack pointer + * pointing now to the locals. */ + emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); +} + +/* Generate insn that updates the stack for local variables and padding for + * registers we save. - Generate the appropriate return insn. */ + +void +crx_expand_epilogue (void) +{ + /* Nonzero if we need to return and pop only RA. This will generate a + * different insn. This differentiate is for the peepholes for call as last + * statement in function. */ + int only_popret_RA = (save_regs[RETURN_ADDRESS_REGNUM] + && (sum_regs == UNITS_PER_WORD)); + + if (frame_pointer_needed) + /* Restore the stack pointer with the frame pointers value */ + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + + if (size_for_adjusting_sp > 0) + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (size_for_adjusting_sp))); + + if (crx_interrupt_function_p ()) + emit_jump_insn (gen_interrupt_return ()); + else if (last_reg_to_save == -1) + /* Nothing to pop */ + /* Don't output jump for interrupt routine, only retx. */ + emit_jump_insn (gen_indirect_jump_return ()); + else if (only_popret_RA) + emit_jump_insn (gen_popret_RA_return ()); + else + emit_jump_insn (gen_pop_and_popret_return (GEN_INT (sum_regs))); +} diff --git a/gcc/config/crx/crx.h b/gcc/config/crx/crx.h new file mode 100644 index 000000000..da6e263ca --- /dev/null +++ b/gcc/config/crx/crx.h @@ -0,0 +1,478 @@ +/* Definitions of target machine for GNU compiler, for CRX. + Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_CRX_H +#define GCC_CRX_H + +/*****************************************************************************/ +/* CONTROLLING THE DRIVER */ +/*****************************************************************************/ + +#define CC1PLUS_SPEC "%{!frtti:-fno-rtti} \ + %{!fenforce-eh-specs:-fno-enforce-eh-specs} \ + %{!fexceptions:-fno-exceptions} \ + %{!fthreadsafe-statics:-fno-threadsafe-statics}" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crti.o%s crtbegin.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#undef MATH_LIBRARY +#define MATH_LIBRARY "" + +/*****************************************************************************/ +/* RUN-TIME TARGET SPECIFICATION */ +/*****************************************************************************/ + +#ifndef TARGET_CPU_CPP_BUILTINS +#define TARGET_CPU_CPP_BUILTINS() \ +do { \ + builtin_define("__CRX__"); \ + builtin_define("__CR__"); \ +} while (0) +#endif + +#define TARGET_VERSION fputs (" (CRX/ELF)", stderr); + +/*****************************************************************************/ +/* STORAGE LAYOUT */ +/*****************************************************************************/ + +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN 0 + +#define WORDS_BIG_ENDIAN 0 + +#define UNITS_PER_WORD 4 + +#define POINTER_SIZE 32 + +#define PARM_BOUNDARY 32 + +#define STACK_BOUNDARY 32 + +#define FUNCTION_BOUNDARY 32 + +#define STRUCTURE_SIZE_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT 32 + +/* In CRX arrays of chars are word-aligned, so strcpy() will be faster. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < BITS_PER_WORD \ + ? (BITS_PER_WORD) : (ALIGN)) + +/* In CRX strings are word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(CONSTANT, ALIGN) \ + (TREE_CODE (CONSTANT) == STRING_CST && (ALIGN) < BITS_PER_WORD \ + ? (BITS_PER_WORD) : (ALIGN)) + +#define STRICT_ALIGNMENT 0 + +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/*****************************************************************************/ +/* LAYOUT OF SOURCE LANGUAGE DATA TYPES */ +/*****************************************************************************/ + +#define INT_TYPE_SIZE 32 + +#define SHORT_TYPE_SIZE 16 + +#define LONG_TYPE_SIZE 32 + +#define LONG_LONG_TYPE_SIZE 64 + +#define FLOAT_TYPE_SIZE 32 + +#define DOUBLE_TYPE_SIZE 64 + +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +#define SIZE_TYPE "unsigned int" + +#define PTRDIFF_TYPE "int" + +/*****************************************************************************/ +/* REGISTER USAGE. */ +/*****************************************************************************/ + +#define FIRST_PSEUDO_REGISTER 19 + +/* On the CRX, only the stack pointer (r15) is such. */ +#define FIXED_REGISTERS \ + { \ + /* r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* r11 r12 r13 ra sp r16 r17 cc */ \ + 0, 0, 0, 0, 1, 0, 0, 1 \ + } + +/* On the CRX, calls clobbers r0-r6 (scratch registers), ra (the return address) + * and sp - (the stack pointer which is fixed). */ +#define CALL_USED_REGISTERS \ + { \ + /* r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 */ \ + 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, \ + /* r11 r12 r13 ra sp r16 r17 cc */ \ + 0, 0, 0, 1, 1, 1, 1, 1 \ + } + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* On the CRX architecture, HILO regs can only hold SI mode. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) crx_hard_regno_mode_ok(REGNO, MODE) + +/* So far no patterns for moving CCMODE data are available */ +#define AVOID_CCMODE_COPIES + +/* Interrupt functions can only use registers that have already been saved by + * the prologue, even if they would normally be call-clobbered. */ +#define HARD_REGNO_RENAME_OK(SRC, DEST) \ + (!crx_interrupt_function_p () || df_regs_ever_live_p (DEST)) + +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +enum reg_class +{ + NO_REGS, + LO_REGS, + HI_REGS, + HILO_REGS, + NOSP_REGS, + GENERAL_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* The following macro defines cover classes for Integrated Register + Allocator. Cover classes is a set of non-intersected register + classes covering all hard registers used for register allocation + purpose. Any move between two registers of a cover class should be + cheaper than load or store of the registers. The macro value is + array of register classes with LIM_REG_CLASSES used as the end + marker. */ + +#define IRA_COVER_CLASSES \ +{ \ + GENERAL_REGS, LIM_REG_CLASSES \ +} + +#define REG_CLASS_NAMES \ + { \ + "NO_REGS", \ + "LO_REGS", \ + "HI_REGS", \ + "HILO_REGS", \ + "NOSP_REGS", \ + "GENERAL_REGS", \ + "ALL_REGS" \ + } + +#define REG_CLASS_CONTENTS \ + { \ + {0x00000000}, /* NO_REGS */ \ + {0x00010000}, /* LO_REGS : 16 */ \ + {0x00020000}, /* HI_REGS : 17 */ \ + {0x00030000}, /* HILO_REGS : 16, 17 */ \ + {0x00007fff}, /* NOSP_REGS : 0 - 14 */ \ + {0x0000ffff}, /* GENERAL_REGS : 0 - 15 */ \ + {0x0007ffff} /* ALL_REGS : 0 - 18 */ \ + } + +#define REGNO_REG_CLASS(REGNO) crx_regno_reg_class(REGNO) + +#define BASE_REG_CLASS GENERAL_REGS + +#define INDEX_REG_CLASS GENERAL_REGS + +#define REG_CLASS_FROM_LETTER(C) \ + ((C) == 'b' ? NOSP_REGS : \ + (C) == 'l' ? LO_REGS : \ + (C) == 'h' ? HI_REGS : \ + (C) == 'k' ? HILO_REGS : \ + NO_REGS) + +#define REGNO_OK_FOR_BASE_P(REGNO) \ + ((REGNO) < 16 \ + || (reg_renumber && (unsigned)reg_renumber[REGNO] < 16)) + +#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO) + +#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \ + crx_secondary_reload_class (CLASS, MODE, X) + +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD + +#define SIGNED_INT_FITS_N_BITS(imm, N) \ + ((((imm) < ((long long)1<<((N)-1))) && ((imm) >= -((long long)1<<((N)-1)))) ? 1 : 0) + +#define UNSIGNED_INT_FITS_N_BITS(imm, N) \ + (((imm) < ((long long)1<<(N)) && (imm) >= (long long)0) ? 1 : 0) + +#define HILO_REGNO_P(regno) \ + (reg_classes_intersect_p(REGNO_REG_CLASS(regno), HILO_REGS)) + +#define INT_CST4(VALUE) \ + (((VALUE) >= -1 && (VALUE) <= 4) || (VALUE) == -4 \ + || (VALUE) == 7 || (VALUE) == 8 || (VALUE) == 16 || (VALUE) == 32 \ + || (VALUE) == 20 || (VALUE) == 12 || (VALUE) == 48) + +#define CONST_OK_FOR_LETTER_P(VALUE, C) \ + /* Legal const for store immediate instructions */ \ + ((C) == 'I' ? UNSIGNED_INT_FITS_N_BITS(VALUE, 3) : \ + (C) == 'J' ? UNSIGNED_INT_FITS_N_BITS(VALUE, 4) : \ + (C) == 'K' ? UNSIGNED_INT_FITS_N_BITS(VALUE, 5) : \ + (C) == 'L' ? INT_CST4(VALUE) : \ + 0) + +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ + ((C) == 'G' ? crx_const_double_ok (VALUE) : \ + 0) + +/*****************************************************************************/ +/* STACK LAYOUT AND CALLING CONVENTIONS. */ +/*****************************************************************************/ + +#define STACK_GROWS_DOWNWARD + +#define STARTING_FRAME_OFFSET 0 + +#define STACK_POINTER_REGNUM 15 + +#define FRAME_POINTER_REGNUM 13 + +#define ARG_POINTER_REGNUM 12 + +#define STATIC_CHAIN_REGNUM 1 + +#define RETURN_ADDRESS_REGNUM 14 + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +#define ELIMINABLE_REGS \ + { \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ + } + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + do { \ + (OFFSET) = crx_initial_elimination_offset ((FROM), (TO)); \ + } while (0) + +/*****************************************************************************/ +/* PASSING FUNCTION ARGUMENTS */ +/*****************************************************************************/ + +#define ACCUMULATE_OUTGOING_ARGS (TARGET_NO_PUSH_ARGS) + +#define PUSH_ARGS (!TARGET_NO_PUSH_ARGS) + +#define PUSH_ROUNDING(BYTES) (((BYTES) + 3) & ~3) + +#ifndef CUMULATIVE_ARGS +struct cumulative_args +{ + int ints; +}; + +#define CUMULATIVE_ARGS struct cumulative_args +#endif + +/* On the CRX architecture, Varargs routines should receive their parameters on + * the stack. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + crx_init_cumulative_args(&(CUM), (FNTYPE), (LIBNAME)) + +#define FUNCTION_ARG_REGNO_P(REGNO) crx_function_arg_regno_p(REGNO) + +/*****************************************************************************/ +/* RETURNING FUNCTION VALUE */ +/*****************************************************************************/ + +/* On the CRX, the return value is in R0 */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + gen_rtx_REG(TYPE_MODE (VALTYPE), 0) + +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, 0) + +#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0) + +#define CRX_STRUCT_VALUE_REGNUM 0 + +/*****************************************************************************/ +/* GENERATING CODE FOR PROFILING - NOT IMPLEMENTED */ +/*****************************************************************************/ + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + sorry ("profiler support for CRX"); \ +} + +/*****************************************************************************/ +/* TRAMPOLINES FOR NESTED FUNCTIONS - NOT SUPPORTED */ +/*****************************************************************************/ + +#define TRAMPOLINE_SIZE 32 + +/*****************************************************************************/ +/* ADDRESSING MODES */ +/*****************************************************************************/ + +#define CONSTANT_ADDRESS_P(X) \ + (GET_CODE (X) == LABEL_REF \ + || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST \ + || GET_CODE (X) == CONST_INT) + +#define MAX_REGS_PER_ADDRESS 2 + +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_POST_MODIFY_DISP 1 +#define HAVE_POST_MODIFY_REG 0 + +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) +#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +#else +#define REG_OK_FOR_BASE_P(X) 1 +#define REG_OK_FOR_INDEX_P(X) 1 +#endif /* REG_OK_STRICT */ + +#define LEGITIMATE_CONSTANT_P(X) 1 + +/*****************************************************************************/ +/* CONDITION CODE STATUS */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* RELATIVE COSTS OF OPERATIONS */ +/*****************************************************************************/ + +#define MEMORY_MOVE_COST(MODE, CLASS, IN) crx_memory_move_cost(MODE, CLASS, IN) +/* Moving to processor register flushes pipeline - thus asymmetric */ +#define REGISTER_MOVE_COST(MODE, FROM, TO) ((TO != GENERAL_REGS) ? 8 : 2) +/* Assume best case (branch predicted) */ +#define BRANCH_COST(speed_p, predictable_p) 2 + +#define SLOW_BYTE_ACCESS 1 + +/*****************************************************************************/ +/* DIVIDING THE OUTPUT INTO SECTIONS */ +/*****************************************************************************/ + +#define TEXT_SECTION_ASM_OP "\t.section\t.text" + +#define DATA_SECTION_ASM_OP "\t.section\t.data" + +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + +/*****************************************************************************/ +/* POSITION INDEPENDENT CODE */ +/*****************************************************************************/ + +#define PIC_OFFSET_TABLE_REGNUM 12 + +#define LEGITIMATE_PIC_OPERAND_P(X) 1 + +/*****************************************************************************/ +/* ASSEMBLER FORMAT */ +/*****************************************************************************/ + +#define GLOBAL_ASM_OP "\t.globl\t" + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ + asm_fprintf (STREAM, "%U%s", (*targetm.strip_name_encoding) (NAME)); + +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +/*****************************************************************************/ +/* INSTRUCTION OUTPUT */ +/*****************************************************************************/ + +#define REGISTER_NAMES \ + { \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "ra", "sp", \ + "lo", "hi", "cc" \ + } + +#define PRINT_OPERAND(STREAM, X, CODE) \ + crx_print_operand(STREAM, X, CODE) + +#define PRINT_OPERAND_ADDRESS(STREAM, ADDR) \ + crx_print_operand_address(STREAM, ADDR) + +/*****************************************************************************/ +/* OUTPUT OF DISPATCH TABLES */ +/*****************************************************************************/ + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + asm_fprintf ((STREAM), "\t.long\t.L%d\n", (VALUE)) + +/*****************************************************************************/ +/* ALIGNMENT IN ASSEMBLER FILE */ +/*****************************************************************************/ + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + asm_fprintf ((STREAM), "\t.align\t%d\n", 1 << (POWER)) + +/*****************************************************************************/ +/* MISCELLANEOUS PARAMETERS */ +/*****************************************************************************/ + +#define CASE_VECTOR_MODE Pmode + +#define MOVE_MAX 4 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define STORE_FLAG_VALUE 1 + +#define Pmode SImode + +#define FUNCTION_MODE QImode + +#endif /* ! GCC_CRX_H */ diff --git a/gcc/config/crx/crx.md b/gcc/config/crx/crx.md new file mode 100644 index 000000000..229e345d3 --- /dev/null +++ b/gcc/config/crx/crx.md @@ -0,0 +1,899 @@ +;; GCC machine description for CRX. +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2007 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; Register numbers + +(define_constants + [(SP_REGNUM 15) ; Stack pointer + (RA_REGNUM 14) ; Return address + (LO_REGNUM 16) ; LO register + (HI_REGNUM 17) ; HI register + (CC_REGNUM 18) ; Condition code register + ] +) + +(define_attr "length" "" ( const_int 6 )) + +(define_asm_attributes + [(set_attr "length" "6")] +) + +;; Predicates + +(define_predicate "u4bits_operand" + (match_code "const_int,const_double") + { + if (GET_CODE (op) == CONST_DOUBLE) + return crx_const_double_ok (op); + return (UNSIGNED_INT_FITS_N_BITS(INTVAL(op), 4)) ? 1 : 0; + } +) + +(define_predicate "cst4_operand" + (and (match_code "const_int") + (match_test "INT_CST4(INTVAL(op))"))) + +(define_predicate "reg_or_u4bits_operand" + (ior (match_operand 0 "u4bits_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "reg_or_cst4_operand" + (ior (match_operand 0 "cst4_operand") + (match_operand 0 "register_operand"))) + +(define_predicate "reg_or_sym_operand" + (ior (match_code "symbol_ref") + (match_operand 0 "register_operand"))) + +(define_predicate "cc_reg_operand" + (and (match_code "reg") + (match_test "REGNO (op) == CC_REGNUM"))) + +(define_predicate "nosp_reg_operand" + (and (match_operand 0 "register_operand") + (match_test "REGNO (op) != SP_REGNUM"))) + +(define_predicate "store_operand" + (and (match_operand 0 "memory_operand") + (not (match_operand 0 "push_operand")))) + +;; Mode Macro Definitions + +(define_mode_iterator ALLMT [QI HI SI SF DI DF]) +(define_mode_iterator CRXMM [QI HI SI SF]) +(define_mode_iterator CRXIM [QI HI SI]) +(define_mode_iterator DIDFM [DI DF]) +(define_mode_iterator SISFM [SI SF]) +(define_mode_iterator SHORT [QI HI]) + +(define_mode_attr tIsa [(QI "b") (HI "w") (SI "d") (SF "d")]) +(define_mode_attr lImmArith [(QI "4") (HI "4") (SI "6")]) +(define_mode_attr lImmRotl [(QI "2") (HI "2") (SI "4")]) +(define_mode_attr IJK [(QI "I") (HI "J") (SI "K")]) +(define_mode_attr iF [(QI "i") (HI "i") (SI "i") (DI "i") (SF "F") (DF "F")]) +(define_mode_attr JG [(QI "J") (HI "J") (SI "J") (DI "J") (SF "G") (DF "G")]) +; In HI or QI mode we push 4 bytes. +(define_mode_attr pushCnstr [(QI "X") (HI "X") (SI "<") (SF "<") (DI "<") (DF "<")]) +(define_mode_attr tpush [(QI "") (HI "") (SI "") (SF "") (DI "sp, ") (DF "sp, ")]) +(define_mode_attr lpush [(QI "2") (HI "2") (SI "2") (SF "2") (DI "4") (DF "4")]) + + +;; Code Macro Definitions + +(define_code_iterator sz_xtnd [sign_extend zero_extend]) +(define_code_attr sIsa [(sign_extend "") (zero_extend "u")]) +(define_code_attr sPat [(sign_extend "s") (zero_extend "u")]) +(define_code_attr szPat [(sign_extend "") (zero_extend "zero_")]) +(define_code_attr szIsa [(sign_extend "s") (zero_extend "z")]) + +(define_code_iterator sh_oprnd [ashift ashiftrt lshiftrt]) +(define_code_attr shIsa [(ashift "ll") (ashiftrt "ra") (lshiftrt "rl")]) +(define_code_attr shPat [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr")]) + +(define_code_iterator mima_oprnd [smax umax smin umin]) +(define_code_attr mimaIsa [(smax "maxs") (umax "maxu") (smin "mins") (umin "minu")]) + +;; Addition Instructions + +(define_insn "adddi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%0,0") + (match_operand:DI 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "addd\t%L2, %L1\;addcd\t%H2, %H1" + [(set_attr "length" "4,12")] +) + +(define_insn "add3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (plus:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "add\t%2, %0" + [(set_attr "length" "2,")] +) + +;; Subtract Instructions + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (minus:DI (match_operand:DI 1 "register_operand" "0,0") + (match_operand:DI 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "subd\t%L2, %L1\;subcd\t%H2, %H1" + [(set_attr "length" "4,12")] +) + +(define_insn "sub3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (minus:CRXIM (match_operand:CRXIM 1 "register_operand" "0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "sub\t%2, %0" + [(set_attr "length" "2,")] +) + +;; Multiply Instructions + +(define_insn "mul3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (mult:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "mul\t%2, %0" + [(set_attr "length" "2,")] +) + +;; Widening-multiplication Instructions + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=k") + (mult:DI (sz_xtnd:DI (match_operand:SI 1 "register_operand" "%r")) + (sz_xtnd:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "" + "mulld\t%2, %1" + [(set_attr "length" "4")] +) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (sz_xtnd:SI (match_operand:HI 1 "register_operand" "%0")) + (sz_xtnd:SI (match_operand:HI 2 "register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "" + "mulwd\t%2, %0" + [(set_attr "length" "4")] +) + +(define_insn "mulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sz_xtnd:HI (match_operand:QI 1 "register_operand" "%0")) + (sz_xtnd:HI (match_operand:QI 2 "register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "" + "mulbw\t%2, %0" + [(set_attr "length" "4")] +) + +;; Logical Instructions - and + +(define_insn "and3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (and:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "and\t%2, %0" + [(set_attr "length" "2,")] +) + +;; Logical Instructions - or + +(define_insn "ior3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (ior:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "or\t%2, %0" + [(set_attr "length" "2,")] +) + +;; Logical Instructions - xor + +(define_insn "xor3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (xor:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,i"))) + (clobber (reg:CC CC_REGNUM))] + "" + "xor\t%2, %0" + [(set_attr "length" "2,")] +) + +;; Sign and Zero Extend Instructions + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sz_xtnd:SI (match_operand:HI 1 "register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "extwd\t%1, %0" + [(set_attr "length" "4")] +) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sz_xtnd:SI (match_operand:QI 1 "register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "extbd\t%1, %0" + [(set_attr "length" "4")] +) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sz_xtnd:HI (match_operand:QI 1 "register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "extbw\t%1, %0" + [(set_attr "length" "4")] +) + +;; Negation Instructions + +(define_insn "neg2" + [(set (match_operand:CRXIM 0 "register_operand" "=r") + (neg:CRXIM (match_operand:CRXIM 1 "register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "neg\t%1, %0" + [(set_attr "length" "4")] +) + +;; Absolute Instructions + +(define_insn "abs2" + [(set (match_operand:CRXIM 0 "register_operand" "=r") + (abs:CRXIM (match_operand:CRXIM 1 "register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "abs\t%1, %0" + [(set_attr "length" "4")] +) + +;; Max and Min Instructions + +(define_insn "3" + [(set (match_operand:CRXIM 0 "register_operand" "=r") + (mima_oprnd:CRXIM (match_operand:CRXIM 1 "register_operand" "%0") + (match_operand:CRXIM 2 "register_operand" "r")))] + "" + "\t%2, %0" + [(set_attr "length" "4")] +) + +;; One's Complement + +(define_insn "one_cmpl2" + [(set (match_operand:CRXIM 0 "register_operand" "=r") + (not:CRXIM (match_operand:CRXIM 1 "register_operand" "0"))) + (clobber (reg:CC CC_REGNUM))] + "" + "xor\t$-1, %0" + [(set_attr "length" "2")] +) + +;; Rotate Instructions + +(define_insn "rotl3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (rotate:CRXIM (match_operand:CRXIM 1 "register_operand" "0,0") + (match_operand:CRXIM 2 "nonmemory_operand" "r,"))) + (clobber (reg:CC CC_REGNUM))] + "" + "@ + rotl\t%2, %0 + rot\t%2, %0" + [(set_attr "length" "4,")] +) + +(define_insn "rotr3" + [(set (match_operand:CRXIM 0 "register_operand" "=r") + (rotatert:CRXIM (match_operand:CRXIM 1 "register_operand" "0") + (match_operand:CRXIM 2 "register_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "rotr\t%2, %0" + [(set_attr "length" "4")] +) + +;; Arithmetic Left and Right Shift Instructions + +(define_insn "3" + [(set (match_operand:CRXIM 0 "register_operand" "=r,r") + (sh_oprnd:CRXIM (match_operand:CRXIM 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "r,"))) + (clobber (reg:CC CC_REGNUM))] + "" + "s\t%2, %0" + [(set_attr "length" "2,2")] +) + +;; Bit Set Instructions + +(define_insn "extv" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n")))] + "" + { + static char buf[100]; + int strpntr; + int size = INTVAL (operands[2]); + int pos = INTVAL (operands[3]); + strpntr = sprintf (buf, "ram\t$%d, $31, $%d, %%1, %%0\;", + BITS_PER_WORD - (size + pos), BITS_PER_WORD - size); + sprintf (buf + strpntr, "srad\t$%d, %%0", BITS_PER_WORD - size); + return buf; + } + [(set_attr "length" "6")] +) + +(define_insn "extzv" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n")))] + "" + { + static char buf[40]; + int size = INTVAL (operands[2]); + int pos = INTVAL (operands[3]); + sprintf (buf, "ram\t$%d, $%d, $0, %%1, %%0", + (BITS_PER_WORD - pos) % BITS_PER_WORD, size - 1); + return buf; + } + [(set_attr "length" "4")] +) + +(define_insn "insv" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "register_operand" "r"))] + "" + { + static char buf[40]; + int size = INTVAL (operands[1]); + int pos = INTVAL (operands[2]); + sprintf (buf, "rim\t$%d, $%d, $%d, %%3, %%0", + pos, size + pos - 1, pos); + return buf; + } + [(set_attr "length" "4")] +) + +;; Move Instructions + +(define_expand "mov" + [(set (match_operand:ALLMT 0 "nonimmediate_operand" "") + (match_operand:ALLMT 1 "general_operand" ""))] + "" + { + if (!(reload_in_progress || reload_completed)) + { + if (!register_operand (operands[0], mode)) + { + if (push_operand (operands[0], mode) ? + !nosp_reg_operand (operands[1], mode) : + !reg_or_u4bits_operand (operands[1], mode)) + { + operands[1] = copy_to_mode_reg (mode, operands[1]); + } + } + } + } +) + +(define_insn "push_internal" + [(set (match_operand:ALLMT 0 "push_operand" "=") + (match_operand:ALLMT 1 "nosp_reg_operand" "b"))] + "" + "push\t%p1" + [(set_attr "length" "")] +) + +(define_insn "mov_regs" + [(set (match_operand:SISFM 0 "register_operand" "=r, r, r, k") + (match_operand:SISFM 1 "nonmemory_operand" "r, , k, r"))] + "" + "@ + movd\t%1, %0 + movd\t%1, %0 + mfpr\t%1, %0 + mtpr\t%1, %0" + [(set_attr "length" "2,6,4,4")] +) + +(define_insn "mov_regs" + [(set (match_operand:DIDFM 0 "register_operand" "=r, r, r, k") + (match_operand:DIDFM 1 "nonmemory_operand" "r, , k, r"))] + "" + { + switch (which_alternative) + { + case 0: if (REGNO (operands[0]) > REGNO (operands[1])) + return "movd\t%H1, %H0\;movd\t%L1, %L0"; + else + return "movd\t%L1, %L0\;movd\t%H1, %H0"; + case 1: return "movd\t%H1, %H0\;movd\t%L1, %L0"; + case 2: return "mfpr\t%H1, %H0\;mfpr\t%L1, %L0"; + case 3: return "mtpr\t%H1, %H0\;mtpr\t%L1, %L0"; + default: gcc_unreachable (); + } + } + [(set_attr "length" "4,12,8,8")] +) + +(define_insn "mov_regs" ; no HI/QI mode in HILO regs + [(set (match_operand:SHORT 0 "register_operand" "=r, r") + (match_operand:SHORT 1 "nonmemory_operand" "r, i"))] + "" + "mov\t%1, %0" + [(set_attr "length" "2,")] +) + +(define_insn "mov_load" + [(set (match_operand:CRXMM 0 "register_operand" "=r") + (match_operand:CRXMM 1 "memory_operand" "m"))] + "" + "load\t%1, %0" + [(set_attr "length" "6")] +) + +(define_insn "mov_load" + [(set (match_operand:DIDFM 0 "register_operand" "=r") + (match_operand:DIDFM 1 "memory_operand" "m"))] + "" + { + rtx first_dest_reg = gen_rtx_REG (SImode, REGNO (operands[0])); + if (reg_overlap_mentioned_p (first_dest_reg, operands[1])) + return "loadd\t%H1, %H0\;loadd\t%L1, %L0"; + return "loadd\t%L1, %L0\;loadd\t%H1, %H0"; + } + [(set_attr "length" "12")] +) + +(define_insn "mov_store" + [(set (match_operand:CRXMM 0 "store_operand" "=m, m") + (match_operand:CRXMM 1 "reg_or_u4bits_operand" "r, "))] + "" + "stor\t%1, %0" + [(set_attr "length" "6")] +) + +(define_insn "mov_store" + [(set (match_operand:DIDFM 0 "store_operand" "=m, m") + (match_operand:DIDFM 1 "reg_or_u4bits_operand" "r, "))] + "" + "stord\t%H1, %H0\;stord\t%L1, %L0" + [(set_attr "length" "12")] +) + +;; Movmem Instruction + +(define_expand "movmemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))] + "" + { + if (crx_expand_movmem (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; + } +) + +;; Compare and Branch Instructions + +(define_insn "cbranchcc4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:CC 1 "cc_reg_operand" "r") + (match_operand 2 "cst4_operand" "L")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" + "b%d0\t%l3" + [(set_attr "length" "6")] +) + +(define_insn "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:CRXIM 1 "register_operand" "r") + (match_operand:CRXIM 2 "reg_or_cst4_operand" "rL")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "" + "cmpb%d0\t%2, %1, %l3" + [(set_attr "length" "6")] +) + + +;; Scond Instructions + +(define_expand "cstore4" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:CRXIM 2 "register_operand" "") + (match_operand:CRXIM 3 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(reg:CC CC_REGNUM) (const_int 0)]))] + "" + "" +) + +(define_insn "cmp_internal" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:CRXIM 0 "register_operand" "r,r") + (match_operand:CRXIM 1 "nonmemory_operand" "r,i")))] + "" + "cmp\t%1, %0" + [(set_attr "length" "2,")] +) + +(define_insn "sCOND_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "ordered_comparison_operator" + [(reg:CC CC_REGNUM) (const_int 0)]))] + "" + "s%d1\t%0" + [(set_attr "length" "2")] +) + +;; Jumps and Branches + +(define_insn "indirect_jump_return" + [(parallel + [(set (pc) + (reg:SI RA_REGNUM)) + (return)]) + ] + "reload_completed" + "jump\tra" + [(set_attr "length" "2")] +) + +(define_insn "indirect_jump" + [(set (pc) + (match_operand:SI 0 "reg_or_sym_operand" "r,i"))] + "" + "@ + jump\t%0 + br\t%a0" + [(set_attr "length" "2,6")] +) + +(define_insn "interrupt_return" + [(parallel + [(unspec_volatile [(const_int 0)] 0) + (return)])] + "" + { + return crx_prepare_push_pop_string (1); + } + [(set_attr "length" "14")] +) + +(define_insn "jump_to_imm" + [(set (pc) + (match_operand 0 "immediate_operand" "i"))] + "" + "br\t%c0" + [(set_attr "length" "6")] +) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "br\t%l0" + [(set_attr "length" "6")] +) + +;; Function Prologue and Epilogue + +(define_expand "prologue" + [(const_int 0)] + "" + { + crx_expand_prologue (); + DONE; + } +) + +(define_insn "push_for_prologue" + [(parallel + [(set (reg:SI SP_REGNUM) + (minus:SI (reg:SI SP_REGNUM) + (match_operand:SI 0 "immediate_operand" "i")))])] + "reload_completed" + { + return crx_prepare_push_pop_string (0); + } + [(set_attr "length" "4")] +) + +(define_expand "epilogue" + [(return)] + "" + { + crx_expand_epilogue (); + DONE; + } +) + +(define_insn "pop_and_popret_return" + [(parallel + [(set (reg:SI SP_REGNUM) + (plus:SI (reg:SI SP_REGNUM) + (match_operand:SI 0 "immediate_operand" "i"))) + (use (reg:SI RA_REGNUM)) + (return)]) + ] + "reload_completed" + { + return crx_prepare_push_pop_string (1); + } + [(set_attr "length" "4")] +) + +(define_insn "popret_RA_return" + [(parallel + [(use (reg:SI RA_REGNUM)) + (return)]) + ] + "reload_completed" + "popret\tra" + [(set_attr "length" "2")] +) + +;; Table Jump + +(define_insn "tablejump" + [(set (pc) + (match_operand:SI 0 "register_operand" "r")) + (use (label_ref:SI (match_operand 1 "" "" )))] + "" + "jump\t%0" + [(set_attr "length" "2")] +) + +;; Call Instructions + +(define_expand "call" + [(call (match_operand:QI 0 "memory_operand" "") + (match_operand 1 "" ""))] + "" + { + emit_call_insn (gen_crx_call (operands[0], operands[1])); + DONE; + } +) + +(define_expand "crx_call" + [(parallel + [(call (match_operand:QI 0 "memory_operand" "") + (match_operand 1 "" "")) + (clobber (reg:SI RA_REGNUM))])] + "" + "" +) + +(define_insn "crx_call_insn_branch" + [(call (mem:QI (match_operand:SI 0 "immediate_operand" "i")) + (match_operand 1 "" "")) + (clobber (match_operand:SI 2 "register_operand" "+r"))] + "" + "bal\tra, %a0" + [(set_attr "length" "6")] +) + +(define_insn "crx_call_insn_jump" + [(call (mem:QI (match_operand:SI 0 "register_operand" "r")) + (match_operand 1 "" "")) + (clobber (match_operand:SI 2 "register_operand" "+r"))] + "" + "jal\t%0" + [(set_attr "length" "2")] +) + +(define_insn "crx_call_insn_jalid" + [(call (mem:QI (mem:SI (plus:SI + (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")))) + (match_operand 2 "" "")) + (clobber (match_operand:SI 3 "register_operand" "+r"))] + "" + "jalid\t%0, %1" + [(set_attr "length" "4")] +) + +;; Call Value Instructions + +(define_expand "call_value" + [(set (match_operand 0 "general_operand" "") + (call (match_operand:QI 1 "memory_operand" "") + (match_operand 2 "" "")))] + "" + { + emit_call_insn (gen_crx_call_value (operands[0], operands[1], operands[2])); + DONE; + } +) + +(define_expand "crx_call_value" + [(parallel + [(set (match_operand 0 "general_operand" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI RA_REGNUM))])] + "" + "" +) + +(define_insn "crx_call_value_insn_branch" + [(set (match_operand 0 "" "=g") + (call (mem:QI (match_operand:SI 1 "immediate_operand" "i")) + (match_operand 2 "" ""))) + (clobber (match_operand:SI 3 "register_operand" "+r"))] + "" + "bal\tra, %a1" + [(set_attr "length" "6")] +) + +(define_insn "crx_call_value_insn_jump" + [(set (match_operand 0 "" "=g") + (call (mem:QI (match_operand:SI 1 "register_operand" "r")) + (match_operand 2 "" ""))) + (clobber (match_operand:SI 3 "register_operand" "+r"))] + "" + "jal\t%1" + [(set_attr "length" "2")] +) + +(define_insn "crx_call_value_insn_jalid" + [(set (match_operand 0 "" "=g") + (call (mem:QI (mem:SI (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))) + (match_operand 3 "" ""))) + (clobber (match_operand:SI 4 "register_operand" "+r"))] + "" + "jalid\t%0, %1" + [(set_attr "length" "4")] +) + +;; Nop + +(define_insn "nop" + [(const_int 0)] + "" + "" +) + +;; Multiply and Accumulate Instructions + +(define_insn "madsidi3" + [(set (match_operand:DI 0 "register_operand" "+k") + (plus:DI + (mult:DI (sz_xtnd:DI (match_operand:SI 1 "register_operand" "%r")) + (sz_xtnd:DI (match_operand:SI 2 "register_operand" "r"))) + (match_dup 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_MAC" + "macd\t%2, %1" + [(set_attr "length" "4")] +) + +(define_insn "madhisi3" + [(set (match_operand:SI 0 "register_operand" "+l") + (plus:SI + (mult:SI (sz_xtnd:SI (match_operand:HI 1 "register_operand" "%r")) + (sz_xtnd:SI (match_operand:HI 2 "register_operand" "r"))) + (match_dup 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_MAC" + "macw\t%2, %1" + [(set_attr "length" "4")] +) + +(define_insn "madqihi3" + [(set (match_operand:HI 0 "register_operand" "+l") + (plus:HI + (mult:HI (sz_xtnd:HI (match_operand:QI 1 "register_operand" "%r")) + (sz_xtnd:HI (match_operand:QI 2 "register_operand" "r"))) + (match_dup 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_MAC" + "macb\t%2, %1" + [(set_attr "length" "4")] +) + +;; Loop Instructions + +(define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" "")) ; iterations; zero if unknown + (use (match_operand 2 "" "")) ; max iterations + (use (match_operand 3 "" "")) ; loop level + (use (match_operand 4 "" ""))] ; label + "" + { + if (INTVAL (operands[3]) > crx_loop_nesting) + FAIL; + switch (GET_MODE (operands[0])) + { + case SImode: + emit_jump_insn (gen_doloop_end_si (operands[4], operands[0])); + break; + case HImode: + emit_jump_insn (gen_doloop_end_hi (operands[4], operands[0])); + break; + case QImode: + emit_jump_insn (gen_doloop_end_qi (operands[4], operands[0])); + break; + default: + FAIL; + } + DONE; + } +) + +; CRX dbnz[bwd] used explicitly (see above) but also by the combiner. + +(define_insn "doloop_end_" + [(set (pc) + (if_then_else (ne (match_operand:CRXIM 1 "register_operand" "+r,!m") + (const_int 1)) + (label_ref (match_operand 0 "" "")) + (pc))) + (set (match_dup 1) (plus:CRXIM (match_dup 1) (const_int -1))) + (clobber (match_scratch:CRXIM 2 "=X,r")) + (clobber (reg:CC CC_REGNUM))] + "" + "@ + dbnz\t%1, %l0 + load\t%1, %2\;add\t$-1, %2\;stor\t%2, %1\;bne\t%l0" + [(set_attr "length" "6, 12")] +) diff --git a/gcc/config/crx/crx.opt b/gcc/config/crx/crx.opt new file mode 100644 index 000000000..7ff0be0e3 --- /dev/null +++ b/gcc/config/crx/crx.opt @@ -0,0 +1,34 @@ +; Options for the National Semiconductor CRX port of the compiler. + +; Copyright (C) 2005, 2007 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mmac +Target Report Mask(MAC) +Support multiply accumulate instructions + +mno-push-args +Target Report RejectNegative Mask(NO_PUSH_ARGS) +Do not use push to store function arguments + +mloop-nesting= +Common RejectNegative Joined UInteger Var(crx_loop_nesting) Init(12) +Restrict doloop to the given nesting level + +mdebug-addr +Target RejectNegative Var(TARGET_DEBUG_ADDR) Undocumented diff --git a/gcc/config/crx/t-crx b/gcc/config/crx/t-crx new file mode 100644 index 000000000..8bb62c652 --- /dev/null +++ b/gcc/config/crx/t-crx @@ -0,0 +1,37 @@ +# CRX Target Makefile +# +# Copyright (C) 2005 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Mingw specific compilation fixes +USE_COLLECT2 = +STMP_FIXINC = + +# Software emulation for integer div and mod +LIB2FUNCS_EXTRA = $(srcdir)/config/udivmodsi4.c $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c + +# Build the floating point emulation libraries. +FPBIT = fp-bit.c +DPBIT = dp-bit.c + +fp-bit.c: $(srcdir)/config/fp-bit.c + echo '#define FLOAT' > fp-bit.c + cat $(srcdir)/config/fp-bit.c >> fp-bit.c + +dp-bit.c: $(srcdir)/config/fp-bit.c + cat $(srcdir)/config/fp-bit.c > dp-bit.c diff --git a/gcc/config/darwin-64.c b/gcc/config/darwin-64.c new file mode 100644 index 000000000..a012e9dbc --- /dev/null +++ b/gcc/config/darwin-64.c @@ -0,0 +1,72 @@ +/* Functions shipped in the ppc64 and x86_64 version of libgcc_s.1.dylib + in older Mac OS X versions, preserved for backwards compatibility. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#if defined (__ppc64__) || defined (__x86_64__) +/* Many of these functions have probably never been used by anyone + anywhere on these targets, but it's hard to prove this, so they're defined + here. None are actually necessary, as demonstrated below by defining + each function using the operation it implements. */ + +typedef long DI; +typedef unsigned long uDI; +typedef int SI; +typedef unsigned int uSI; +typedef int word_type __attribute__ ((mode (__word__))); + +DI __ashldi3 (DI x, word_type c); +DI __ashrdi3 (DI x, word_type c); +int __clzsi2 (uSI x); +word_type __cmpdi2 (DI x, DI y); +int __ctzsi2 (uSI x); +DI __divdi3 (DI x, DI y); +uDI __lshrdi3 (uDI x, word_type c); +DI __moddi3 (DI x, DI y); +DI __muldi3 (DI x, DI y); +DI __negdi2 (DI x); +int __paritysi2 (uSI x); +int __popcountsi2 (uSI x); +word_type __ucmpdi2 (uDI x, uDI y); +uDI __udivdi3 (uDI x, uDI y); +uDI __udivmoddi4 (uDI x, uDI y, uDI *r); +uDI __umoddi3 (uDI x, uDI y); + +DI __ashldi3 (DI x, word_type c) { return x << c; } +DI __ashrdi3 (DI x, word_type c) { return x >> c; } +int __clzsi2 (uSI x) { return __builtin_clz (x); } +word_type __cmpdi2 (DI x, DI y) { return x < y ? 0 : x == y ? 1 : 2; } +int __ctzsi2 (uSI x) { return __builtin_ctz (x); } +DI __divdi3 (DI x, DI y) { return x / y; } +uDI __lshrdi3 (uDI x, word_type c) { return x >> c; } +DI __moddi3 (DI x, DI y) { return x % y; } +DI __muldi3 (DI x, DI y) { return x * y; } +DI __negdi2 (DI x) { return -x; } +int __paritysi2 (uSI x) { return __builtin_parity (x); } +int __popcountsi2 (uSI x) { return __builtin_popcount (x); } +word_type __ucmpdi2 (uDI x, uDI y) { return x < y ? 0 : x == y ? 1 : 2; } +uDI __udivdi3 (uDI x, uDI y) { return x / y; } +uDI __udivmoddi4 (uDI x, uDI y, uDI *r) { *r = x % y; return x / y; } +uDI __umoddi3 (uDI x, uDI y) { return x % y; } + +#endif /* __ppc64__ || __x86_64__ */ diff --git a/gcc/config/darwin-c.c b/gcc/config/darwin-c.c new file mode 100644 index 000000000..0c713ba7e --- /dev/null +++ b/gcc/config/darwin-c.c @@ -0,0 +1,717 @@ +/* Darwin support needed only by C/C++ frontends. + Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2010 + Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "cpplib.h" +#include "tree.h" +#include "incpath.h" +#include "c-family/c-common.h" +#include "c-family/c-pragma.h" +#include "c-family/c-format.h" +#include "diagnostic-core.h" +#include "flags.h" +#include "tm_p.h" +#include "cppdefault.h" +#include "prefix.h" +#include "target.h" +#include "target-def.h" + +/* Pragmas. */ + +#define BAD(gmsgid) do { warning (OPT_Wpragmas, gmsgid); return; } while (0) +#define BAD2(msgid, arg) do { warning (OPT_Wpragmas, msgid, arg); return; } while (0) + +static bool using_frameworks = false; + +static const char *find_subframework_header (cpp_reader *pfile, const char *header, + cpp_dir **dirp); + +typedef struct align_stack +{ + int alignment; + struct align_stack * prev; +} align_stack; + +static struct align_stack * field_align_stack = NULL; + +/* Maintain a small stack of alignments. This is similar to pragma + pack's stack, but simpler. */ + +static void +push_field_alignment (int bit_alignment) +{ + align_stack *entry = XNEW (align_stack); + + entry->alignment = maximum_field_alignment; + entry->prev = field_align_stack; + field_align_stack = entry; + + maximum_field_alignment = bit_alignment; +} + +static void +pop_field_alignment (void) +{ + if (field_align_stack) + { + align_stack *entry = field_align_stack; + + maximum_field_alignment = entry->alignment; + field_align_stack = entry->prev; + free (entry); + } + else + error ("too many #pragma options align=reset"); +} + +/* Handlers for Darwin-specific pragmas. */ + +void +darwin_pragma_ignore (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + /* Do nothing. */ +} + +/* #pragma options align={mac68k|power|reset} */ + +void +darwin_pragma_options (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + const char *arg; + tree t, x; + + if (pragma_lex (&t) != CPP_NAME) + BAD ("malformed '#pragma options', ignoring"); + arg = IDENTIFIER_POINTER (t); + if (strcmp (arg, "align")) + BAD ("malformed '#pragma options', ignoring"); + if (pragma_lex (&t) != CPP_EQ) + BAD ("malformed '#pragma options', ignoring"); + if (pragma_lex (&t) != CPP_NAME) + BAD ("malformed '#pragma options', ignoring"); + + if (pragma_lex (&x) != CPP_EOF) + warning (OPT_Wpragmas, "junk at end of '#pragma options'"); + + arg = IDENTIFIER_POINTER (t); + if (!strcmp (arg, "mac68k")) + push_field_alignment (16); + else if (!strcmp (arg, "power")) + push_field_alignment (0); + else if (!strcmp (arg, "reset")) + pop_field_alignment (); + else + BAD ("malformed '#pragma options align={mac68k|power|reset}', ignoring"); +} + +/* #pragma unused ([var {, var}*]) */ + +void +darwin_pragma_unused (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + tree decl, x; + int tok; + + if (pragma_lex (&x) != CPP_OPEN_PAREN) + BAD ("missing '(' after '#pragma unused', ignoring"); + + while (1) + { + tok = pragma_lex (&decl); + if (tok == CPP_NAME && decl) + { + tree local = lookup_name (decl); + if (local && (TREE_CODE (local) == PARM_DECL + || TREE_CODE (local) == VAR_DECL)) + { + TREE_USED (local) = 1; + DECL_READ_P (local) = 1; + } + tok = pragma_lex (&x); + if (tok != CPP_COMMA) + break; + } + } + + if (tok != CPP_CLOSE_PAREN) + BAD ("missing ')' after '#pragma unused', ignoring"); + + if (pragma_lex (&x) != CPP_EOF) + BAD ("junk at end of '#pragma unused'"); +} + +/* Parse the ms_struct pragma. */ +void +darwin_pragma_ms_struct (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + const char *arg; + tree t; + + if (pragma_lex (&t) != CPP_NAME) + BAD ("malformed '#pragma ms_struct', ignoring"); + arg = IDENTIFIER_POINTER (t); + + if (!strcmp (arg, "on")) + darwin_ms_struct = true; + else if (!strcmp (arg, "off") || !strcmp (arg, "reset")) + darwin_ms_struct = false; + else + BAD ("malformed '#pragma ms_struct {on|off|reset}', ignoring"); + + if (pragma_lex (&t) != CPP_EOF) + BAD ("junk at end of '#pragma ms_struct'"); +} + +static struct frameworks_in_use { + size_t len; + const char *name; + cpp_dir* dir; +} *frameworks_in_use; +static int num_frameworks = 0; +static int max_frameworks = 0; + + +/* Remember which frameworks have been seen, so that we can ensure + that all uses of that framework come from the same framework. DIR + is the place where the named framework NAME, which is of length + LEN, was found. We copy the directory name from NAME, as it will be + freed by others. */ + +static void +add_framework (const char *name, size_t len, cpp_dir *dir) +{ + char *dir_name; + int i; + for (i = 0; i < num_frameworks; ++i) + { + if (len == frameworks_in_use[i].len + && strncmp (name, frameworks_in_use[i].name, len) == 0) + { + return; + } + } + if (i >= max_frameworks) + { + max_frameworks = i*2; + max_frameworks += i == 0; + frameworks_in_use = XRESIZEVEC (struct frameworks_in_use, + frameworks_in_use, max_frameworks); + } + dir_name = XNEWVEC (char, len + 1); + memcpy (dir_name, name, len); + dir_name[len] = '\0'; + frameworks_in_use[num_frameworks].name = dir_name; + frameworks_in_use[num_frameworks].len = len; + frameworks_in_use[num_frameworks].dir = dir; + ++num_frameworks; +} + +/* Recall if we have seen the named framework NAME, before, and where + we saw it. NAME is LEN bytes long. The return value is the place + where it was seen before. */ + +static struct cpp_dir* +find_framework (const char *name, size_t len) +{ + int i; + for (i = 0; i < num_frameworks; ++i) + { + if (len == frameworks_in_use[i].len + && strncmp (name, frameworks_in_use[i].name, len) == 0) + { + return frameworks_in_use[i].dir; + } + } + return 0; +} + +/* There are two directories in a framework that contain header files, + Headers and PrivateHeaders. We search Headers first as it is more + common to upgrade a header from PrivateHeaders to Headers and when + that is done, the old one might hang around and be out of data, + causing grief. */ + +struct framework_header {const char * dirName; int dirNameLen; }; +static struct framework_header framework_header_dirs[] = { + { "Headers", 7 }, + { "PrivateHeaders", 14 }, + { NULL, 0 } +}; + +/* Returns a pointer to a malloced string that contains the real pathname + to the file, given the base name and the name. */ + +static char * +framework_construct_pathname (const char *fname, cpp_dir *dir) +{ + char *buf; + size_t fname_len, frname_len; + cpp_dir *fast_dir; + char *frname; + struct stat st; + int i; + + /* Framework names must have a / in them. */ + buf = strchr (fname, '/'); + if (buf) + fname_len = buf - fname; + else + return 0; + + fast_dir = find_framework (fname, fname_len); + + /* Framework includes must all come from one framework. */ + if (fast_dir && dir != fast_dir) + return 0; + + frname = XNEWVEC (char, strlen (fname) + dir->len + 2 + + strlen(".framework/") + strlen("PrivateHeaders")); + strncpy (&frname[0], dir->name, dir->len); + frname_len = dir->len; + if (frname_len && frname[frname_len-1] != '/') + frname[frname_len++] = '/'; + strncpy (&frname[frname_len], fname, fname_len); + frname_len += fname_len; + strncpy (&frname[frname_len], ".framework/", strlen (".framework/")); + frname_len += strlen (".framework/"); + + if (fast_dir == 0) + { + frname[frname_len-1] = 0; + if (stat (frname, &st) == 0) + { + /* As soon as we find the first instance of the framework, + we stop and never use any later instance of that + framework. */ + add_framework (fname, fname_len, dir); + } + else + { + /* If we can't find the parent directory, no point looking + further. */ + free (frname); + return 0; + } + frname[frname_len-1] = '/'; + } + + /* Append framework_header_dirs and header file name */ + for (i = 0; framework_header_dirs[i].dirName; i++) + { + strncpy (&frname[frname_len], + framework_header_dirs[i].dirName, + framework_header_dirs[i].dirNameLen); + strcpy (&frname[frname_len + framework_header_dirs[i].dirNameLen], + &fname[fname_len]); + + if (stat (frname, &st) == 0) + return frname; + } + + free (frname); + return 0; +} + +/* Search for FNAME in sub-frameworks. pname is the context that we + wish to search in. Return the path the file was found at, + otherwise return 0. */ + +static const char* +find_subframework_file (const char *fname, const char *pname) +{ + char *sfrname; + const char *dot_framework = ".framework/"; + char *bufptr; + int sfrname_len, i, fname_len; + struct cpp_dir *fast_dir; + static struct cpp_dir subframe_dir; + struct stat st; + + bufptr = strchr (fname, '/'); + + /* Subframework files must have / in the name. */ + if (bufptr == 0) + return 0; + + fname_len = bufptr - fname; + fast_dir = find_framework (fname, fname_len); + + /* Sub framework header filename includes parent framework name and + header name in the "CarbonCore/OSUtils.h" form. If it does not + include slash it is not a sub framework include. */ + bufptr = strstr (pname, dot_framework); + + /* If the parent header is not of any framework, then this header + cannot be part of any subframework. */ + if (!bufptr) + return 0; + + /* Now translate. For example, +- bufptr + fname = CarbonCore/OSUtils.h | + pname = /System/Library/Frameworks/Foundation.framework/Headers/Foundation.h + into + sfrname = /System/Library/Frameworks/Foundation.framework/Frameworks/CarbonCore.framework/Headers/OSUtils.h */ + + sfrname = XNEWVEC (char, strlen (pname) + strlen (fname) + 2 + + strlen ("Frameworks/") + strlen (".framework/") + + strlen ("PrivateHeaders")); + + bufptr += strlen (dot_framework); + + sfrname_len = bufptr - pname; + + strncpy (&sfrname[0], pname, sfrname_len); + + strncpy (&sfrname[sfrname_len], "Frameworks/", strlen ("Frameworks/")); + sfrname_len += strlen("Frameworks/"); + + strncpy (&sfrname[sfrname_len], fname, fname_len); + sfrname_len += fname_len; + + strncpy (&sfrname[sfrname_len], ".framework/", strlen (".framework/")); + sfrname_len += strlen (".framework/"); + + /* Append framework_header_dirs and header file name */ + for (i = 0; framework_header_dirs[i].dirName; i++) + { + strncpy (&sfrname[sfrname_len], + framework_header_dirs[i].dirName, + framework_header_dirs[i].dirNameLen); + strcpy (&sfrname[sfrname_len + framework_header_dirs[i].dirNameLen], + &fname[fname_len]); + + if (stat (sfrname, &st) == 0) + { + if (fast_dir != &subframe_dir) + { + if (fast_dir) + warning (0, "subframework include %s conflicts with framework include", + fname); + else + add_framework (fname, fname_len, &subframe_dir); + } + + return sfrname; + } + } + free (sfrname); + + return 0; +} + +/* Add PATH to the system includes. PATH must be malloc-ed and + NUL-terminated. System framework paths are C++ aware. */ + +static void +add_system_framework_path (char *path) +{ + int cxx_aware = 1; + cpp_dir *p; + + p = XNEW (cpp_dir); + p->next = NULL; + p->name = path; + p->sysp = 1 + !cxx_aware; + p->construct = framework_construct_pathname; + using_frameworks = 1; + + add_cpp_dir_path (p, SYSTEM); +} + +/* Add PATH to the bracket includes. PATH must be malloc-ed and + NUL-terminated. */ + +void +add_framework_path (char *path) +{ + cpp_dir *p; + + p = XNEW (cpp_dir); + p->next = NULL; + p->name = path; + p->sysp = 0; + p->construct = framework_construct_pathname; + using_frameworks = 1; + + add_cpp_dir_path (p, BRACKET); +} + +static const char *framework_defaults [] = + { + "/System/Library/Frameworks", + "/Library/Frameworks", + }; + +/* Register the GNU objective-C runtime include path if STDINC. */ + +void +darwin_register_objc_includes (const char *sysroot, const char *iprefix, + int stdinc) +{ + const char *fname; + size_t len; + /* We do not do anything if we do not want the standard includes. */ + if (!stdinc) + return; + + fname = GCC_INCLUDE_DIR "-gnu-runtime"; + + /* Register the GNU OBJC runtime include path if we are compiling OBJC + with GNU-runtime. */ + + if (c_dialect_objc () && !flag_next_runtime) + { + char *str; + /* See if our directory starts with the standard prefix. + "Translate" them, i.e. replace /usr/local/lib/gcc... with + IPREFIX and search them first. */ + if (iprefix && (len = cpp_GCC_INCLUDE_DIR_len) != 0 && !sysroot + && !strncmp (fname, cpp_GCC_INCLUDE_DIR, len)) + { + str = concat (iprefix, fname + len, NULL); + /* FIXME: wrap the headers for C++awareness. */ + add_path (str, SYSTEM, /*c++aware=*/false, false); + } + + /* Should this directory start with the sysroot? */ + if (sysroot) + str = concat (sysroot, fname, NULL); + else + str = update_path (fname, ""); + + add_path (str, SYSTEM, /*c++aware=*/false, false); + } +} + + +/* Register all the system framework paths if STDINC is true and setup + the missing_header callback for subframework searching if any + frameworks had been registered. */ + +void +darwin_register_frameworks (const char *sysroot, + const char *iprefix ATTRIBUTE_UNUSED, int stdinc) +{ + if (stdinc) + { + size_t i; + + /* Setup default search path for frameworks. */ + for (i=0; imissing_header = find_subframework_header; +} + +/* Search for HEADER in context dependent way. The return value is + the malloced name of a header to try and open, if any, or NULL + otherwise. This is called after normal header lookup processing + fails to find a header. We search each file in the include stack, + using FUNC, starting from the most deeply nested include and + finishing with the main input file. We stop searching when FUNC + returns nonzero. */ + +static const char* +find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp) +{ + const char *fname = header; + struct cpp_buffer *b; + const char *n; + + for (b = cpp_get_buffer (pfile); + b && cpp_get_file (b) && cpp_get_path (cpp_get_file (b)); + b = cpp_get_prev (b)) + { + n = find_subframework_file (fname, cpp_get_path (cpp_get_file (b))); + if (n) + { + /* Logically, the place where we found the subframework is + the place where we found the Framework that contains the + subframework. This is useful for tracking wether or not + we are in a system header. */ + *dirp = cpp_get_dir (cpp_get_file (b)); + return n; + } + } + + return 0; +} + +/* Return the value of darwin_macosx_version_min suitable for the + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, + so '10.4.2' becomes 1040. The lowest digit is always zero. + Print a warning if the version number can't be understood. */ +static const char * +version_as_macro (void) +{ + static char result[] = "1000"; + + if (strncmp (darwin_macosx_version_min, "10.", 3) != 0) + goto fail; + if (! ISDIGIT (darwin_macosx_version_min[3])) + goto fail; + result[2] = darwin_macosx_version_min[3]; + if (darwin_macosx_version_min[4] != '\0' + && darwin_macosx_version_min[4] != '.') + goto fail; + + return result; + + fail: + error ("unknown value %qs of -mmacosx-version-min", + darwin_macosx_version_min); + return "1000"; +} + +/* Define additional CPP flags for Darwin. */ + +#define builtin_define(TXT) cpp_define (pfile, TXT) + +void +darwin_cpp_builtins (cpp_reader *pfile) +{ + builtin_define ("__MACH__"); + builtin_define ("__APPLE__"); + + /* __APPLE_CC__ is defined as some old Apple include files expect it + to be defined and won't work if it isn't. */ + builtin_define_with_value ("__APPLE_CC__", "1", false); + + if (darwin_constant_cfstrings) + builtin_define ("__CONSTANT_CFSTRINGS__"); + + builtin_define_with_value ("__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__", + version_as_macro(), false); + + /* Since we do not (at 4.6) support ObjC gc for the NeXT runtime, the + following will cause a syntax error if one tries to compile gc attributed + items. However, without this, NeXT system headers cannot be parsed + properly (on systems >= darwin 9). */ + if (flag_objc_gc) + { + builtin_define ("__strong=__attribute__((objc_gc(strong)))"); + builtin_define ("__weak=__attribute__((objc_gc(weak)))"); + builtin_define ("__OBJC_GC__"); + } + else + { + builtin_define ("__strong="); + builtin_define ("__weak="); + } + + if (flag_objc_abi == 2) + builtin_define ("__OBJC2__"); +} + +/* Handle C family front-end options. */ + +static bool +handle_c_option (size_t code, + const char *arg, + int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + default: + /* Unrecognized options that we said we'd handle turn into + errors if not listed here. */ + return false; + + case OPT_iframework: + add_system_framework_path (xstrdup (arg)); + break; + + case OPT_fapple_kext: + ; + } + + /* We recognized the option. */ + return true; +} + +#undef TARGET_HANDLE_C_OPTION +#define TARGET_HANDLE_C_OPTION handle_c_option + +struct gcc_targetcm targetcm = TARGETCM_INITIALIZER; + +/* Allow ObjC* access to CFStrings. */ +tree +darwin_objc_construct_string (tree str) +{ + if (!darwin_constant_cfstrings) + { + /* Even though we are not using CFStrings, place our literal + into the cfstring_htab hash table, so that the + darwin_constant_cfstring_p() function will see it. */ + darwin_enter_string_into_cfstring_table (str); + /* Fall back to NSConstantString. */ + return NULL_TREE; + } + + return darwin_build_constant_cfstring (str); +} + +/* The string ref type is created as CFStringRef by therefore, we + must match for it explicitly, since it's outside the gcc code. */ + +bool +darwin_cfstring_ref_p (const_tree strp) +{ + tree tn; + if (!strp || TREE_CODE (strp) != POINTER_TYPE) + return false; + + tn = TYPE_NAME (strp); + if (tn) + tn = DECL_NAME (tn); + return (tn + && IDENTIFIER_POINTER (tn) + && !strncmp (IDENTIFIER_POINTER (tn), "CFStringRef", 8)); +} + +/* At present the behavior of this is undefined and it does nothing. */ +void +darwin_check_cfstring_format_arg (tree ARG_UNUSED (format_arg), + tree ARG_UNUSED (args_list)) +{ +} + +/* The extra format types we recognize. */ +EXPORTED_CONST format_kind_info darwin_additional_format_types[] = { + { "CFString", NULL, NULL, NULL, NULL, + NULL, NULL, + FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0, + NULL, NULL + } +}; diff --git a/gcc/config/darwin-crt2.c b/gcc/config/darwin-crt2.c new file mode 100644 index 000000000..f4a584a8f --- /dev/null +++ b/gcc/config/darwin-crt2.c @@ -0,0 +1,153 @@ +/* KeyMgr backwards-compatibility support for Darwin. + Copyright (C) 2001, 2002, 2004, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* It is incorrect to include config.h here, because this file is being + compiled for the target, and hence definitions concerning only the host + do not apply. */ + +#include "tconfig.h" +#include "tsystem.h" + +/* This file doesn't do anything useful on non-powerpc targets, since they + don't have backwards compatibility anyway. */ + +#ifdef __ppc__ + +/* Homemade decls substituting for getsect.h and dyld.h, so cross + compilation works. */ +struct mach_header; +extern char *getsectdatafromheader (struct mach_header *, const char *, + const char *, unsigned long *); +extern void _dyld_register_func_for_add_image + (void (*) (struct mach_header *, unsigned long)); +extern void _dyld_register_func_for_remove_image + (void (*) (struct mach_header *, unsigned long)); + +extern void __darwin_gcc3_preregister_frame_info (void); + +/* These are from "keymgr.h". */ +extern void _init_keymgr (void); +extern void *_keymgr_get_and_lock_processwide_ptr (unsigned key); +extern void _keymgr_set_and_unlock_processwide_ptr (unsigned key, void *ptr); + +extern void *__keymgr_global[]; +typedef struct _Sinfo_Node { + unsigned int size ; /*size of this node*/ + unsigned short major_version ; /*API major version.*/ + unsigned short minor_version ; /*API minor version.*/ + } _Tinfo_Node ; + +/* KeyMgr 3.x is the first one supporting GCC3 stuff natively. */ +#define KEYMGR_API_MAJOR_GCC3 3 +/* ... with these keys. */ +#define KEYMGR_GCC3_LIVE_IMAGE_LIST 301 /* loaded images */ +#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */ + +/* Node of KEYMGR_GCC3_LIVE_IMAGE_LIST. Info about each resident image. */ +struct live_images { + unsigned long this_size; /* sizeof (live_images) */ + struct mach_header *mh; /* the image info */ + unsigned long vm_slide; + void (*destructor)(struct live_images *); /* destructor for this */ + struct live_images *next; + unsigned int examined_p; + void *fde; + void *object_info; + unsigned long info[2]; /* Future use. */ +}; + + +/* These routines are used only on Darwin versions before 10.2. + Later versions have equivalent code in the system. + Eventually, they might go away, although it might be a long time... */ + +static void darwin_unwind_dyld_remove_image_hook + (struct mach_header *m, unsigned long s); +static void darwin_unwind_dyld_remove_image_hook + (struct mach_header *m, unsigned long s); +extern void __darwin_gcc3_preregister_frame_info (void); + +static void +darwin_unwind_dyld_add_image_hook (struct mach_header *mh, unsigned long slide) +{ + struct live_images *l = (struct live_images *)calloc (1, sizeof (*l)); + l->mh = mh; + l->vm_slide = slide; + l->this_size = sizeof (*l); + l->next = (struct live_images *) + _keymgr_get_and_lock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST); + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST, l); +} + +static void +darwin_unwind_dyld_remove_image_hook (struct mach_header *m, unsigned long s) +{ + struct live_images *top, **lip, *destroy = NULL; + + /* Look for it in the list of live images and delete it. */ + + top = (struct live_images *) + _keymgr_get_and_lock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST); + for (lip = ⊤ *lip != NULL; lip = &(*lip)->next) + { + if ((*lip)->mh == m && (*lip)->vm_slide == s) + { + destroy = *lip; + *lip = destroy->next; /* unlink DESTROY */ + + if (destroy->this_size != sizeof (*destroy)) /* sanity check */ + abort (); + + break; + } + } + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST, top); + + /* Now that we have unlinked this from the image list, toss it. */ + if (destroy != NULL) + { + if (destroy->destructor != NULL) + (*destroy->destructor) (destroy); + free (destroy); + } +} + +void +__darwin_gcc3_preregister_frame_info (void) +{ + const _Tinfo_Node *info; + _init_keymgr (); + info = (_Tinfo_Node *)__keymgr_global[2]; + if (info != NULL) + { + if (info->major_version >= KEYMGR_API_MAJOR_GCC3) + return; + /* Otherwise, use our own add_image_hooks. */ + } + + _dyld_register_func_for_add_image (darwin_unwind_dyld_add_image_hook); + _dyld_register_func_for_remove_image (darwin_unwind_dyld_remove_image_hook); +} + +#endif /* __ppc__ */ diff --git a/gcc/config/darwin-crt3.c b/gcc/config/darwin-crt3.c new file mode 100644 index 000000000..9b64f2aa8 --- /dev/null +++ b/gcc/config/darwin-crt3.c @@ -0,0 +1,532 @@ +/* __cxa_atexit backwards-compatibility support for Darwin. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* Don't do anything if we are compiling for a kext multilib. */ +#ifdef __PIC__ + +/* It is incorrect to include config.h here, because this file is being + compiled for the target, and hence definitions concerning only the host + do not apply. */ + +#include "tconfig.h" +#include "tsystem.h" + +#include +#include +#include +#include + +/* This file works around two different problems. + + The first problem is that there is no __cxa_atexit on Mac OS versions + before 10.4. It fixes this by providing a complete atexit and + __cxa_atexit emulation called from the regular atexit. + + The second problem is that on all shipping versions of Mac OS, + __cxa_finalize and exit() don't work right: they don't run routines + that were registered while other atexit routines are running. This + is worked around by wrapping each atexit/__cxa_atexit routine with + our own routine which ensures that any __cxa_atexit calls while it + is running are honoured. + + There are still problems which this does not solve. Before 10.4, + shared objects linked with previous compilers won't have their + atexit calls properly interleaved with code compiled with newer + compilers. Also, atexit routines registered from shared objects + linked with previous compilers won't get the bug fix. */ + +typedef int (*cxa_atexit_p)(void (*func) (void*), void* arg, const void* dso); +typedef void (*cxa_finalize_p)(const void *dso); +typedef int (*atexit_p)(void (*func)(void)); + +/* These are from "keymgr.h". */ +extern void *_keymgr_get_and_lock_processwide_ptr (unsigned key); +extern int _keymgr_get_and_lock_processwide_ptr_2 (unsigned, void **); +extern int _keymgr_set_and_unlock_processwide_ptr (unsigned key, void *ptr); + +extern void *__keymgr_global[]; +typedef struct _Sinfo_Node { + unsigned int size ; /*size of this node*/ + unsigned short major_version ; /*API major version.*/ + unsigned short minor_version ; /*API minor version.*/ + } _Tinfo_Node ; + +#ifdef __ppc__ +#define CHECK_KEYMGR_ERROR(e) \ + (((_Tinfo_Node *)__keymgr_global[2])->major_version >= 4 ? (e) : 0) +#else +#define CHECK_KEYMGR_ERROR(e) (e) +#endif + +/* Our globals are stored under this keymgr index. */ +#define KEYMGR_ATEXIT_LIST 14 + +/* The different kinds of callback routines. */ +typedef void (*atexit_callback)(void); +typedef void (*cxa_atexit_callback)(void *); + +/* This structure holds a routine to call. There may be extra fields + at the end of the structure that this code doesn't know about. */ +struct one_atexit_routine +{ + union { + atexit_callback ac; + cxa_atexit_callback cac; + } callback; + /* has_arg is 0/2/4 if 'ac' is live, 1/3/5 if 'cac' is live. + Higher numbers indicate a later version of the structure that this + code doesn't understand and will ignore. */ + int has_arg; + void * arg; +}; + +struct atexit_routine_list +{ + struct atexit_routine_list * next; + struct one_atexit_routine r; +}; + +/* The various possibilities for status of atexit(). */ +enum atexit_status { + atexit_status_unknown = 0, + atexit_status_missing = 1, + atexit_status_broken = 2, + atexit_status_working = 16 +}; + +struct keymgr_atexit_list +{ + /* Version of this list. This code knows only about version 0. + If the version is higher than 0, this code may add new atexit routines + but should not attempt to run the list. */ + short version; + /* 1 if an atexit routine is currently being run by this code, 0 + otherwise. */ + char running_routines; + /* Holds a value from 'enum atexit_status'. */ + unsigned char atexit_status; + /* The list of atexit and cxa_atexit routines registered. If + atexit_status_missing it contains all routines registered while + linked with this code. If atexit_status_broken it contains all + routines registered during cxa_finalize while linked with this + code. */ + struct atexit_routine_list *l; + /* &__cxa_atexit; set if atexit_status >= atexit_status_broken. */ + cxa_atexit_p cxa_atexit_f; + /* &__cxa_finalize; set if atexit_status >= atexit_status_broken. */ + cxa_finalize_p cxa_finalize_f; + /* &atexit; set if atexit_status >= atexit_status_working + or atexit_status == atexit_status_missing. */ + atexit_p atexit_f; +}; + +/* Return 0 if __cxa_atexit has the bug it has in Mac OS 10.4: it + fails to call routines registered while an atexit routine is + running. Return 1 if it works properly, and -1 if an error occurred. */ + +struct atexit_data +{ + int result; + cxa_atexit_p cxa_atexit; +}; + +static void cxa_atexit_check_2 (void *arg) +{ + ((struct atexit_data *)arg)->result = 1; +} + +static void cxa_atexit_check_1 (void *arg) +{ + struct atexit_data * aed = arg; + if (aed->cxa_atexit (cxa_atexit_check_2, arg, arg) != 0) + aed->result = -1; +} + +static int +check_cxa_atexit (cxa_atexit_p cxa_atexit, cxa_finalize_p cxa_finalize) +{ + struct atexit_data aed = { 0, cxa_atexit }; + + /* We re-use &aed as the 'dso' parameter, since it's a unique address. */ + if (cxa_atexit (cxa_atexit_check_1, &aed, &aed) != 0) + return -1; + cxa_finalize (&aed); + if (aed.result == 0) + { + /* Call __cxa_finalize again to make sure that cxa_atexit_check_2 + is removed from the list before AED goes out of scope. */ + cxa_finalize (&aed); + aed.result = 0; + } + return aed.result; +} + +#ifdef __ppc__ +/* This comes from Csu. It works only before 10.4. The prototype has + been altered a bit to avoid casting. */ +extern int _dyld_func_lookup(const char *dyld_func_name, + void *address) __attribute__((visibility("hidden"))); + +static void our_atexit (void); + +/* We're running on 10.3.9. Find the address of the system atexit() + function. So easy to say, so hard to do. */ +static atexit_p +find_atexit_10_3 (void) +{ + unsigned int (*dyld_image_count_fn)(void); + const char *(*dyld_get_image_name_fn)(unsigned int image_index); + const void *(*dyld_get_image_header_fn)(unsigned int image_index); + const void *(*NSLookupSymbolInImage_fn)(const void *image, + const char *symbolName, + unsigned int options); + void *(*NSAddressOfSymbol_fn)(const void *symbol); + unsigned i, count; + + /* Find some dyld functions. */ + _dyld_func_lookup("__dyld_image_count", &dyld_image_count_fn); + _dyld_func_lookup("__dyld_get_image_name", &dyld_get_image_name_fn); + _dyld_func_lookup("__dyld_get_image_header", &dyld_get_image_header_fn); + _dyld_func_lookup("__dyld_NSLookupSymbolInImage", &NSLookupSymbolInImage_fn); + _dyld_func_lookup("__dyld_NSAddressOfSymbol", &NSAddressOfSymbol_fn); + + /* If any of these don't exist, that's an error. */ + if (! dyld_image_count_fn || ! dyld_get_image_name_fn + || ! dyld_get_image_header_fn || ! NSLookupSymbolInImage_fn + || ! NSAddressOfSymbol_fn) + return NULL; + + count = dyld_image_count_fn (); + for (i = 0; i < count; i++) + { + const char * path = dyld_get_image_name_fn (i); + const void * image; + const void * symbol; + + if (strcmp (path, "/usr/lib/libSystem.B.dylib") != 0) + continue; + image = dyld_get_image_header_fn (i); + if (! image) + return NULL; + /* '4' is NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR. */ + symbol = NSLookupSymbolInImage_fn (image, "_atexit", 4); + if (! symbol) + return NULL; + return NSAddressOfSymbol_fn (symbol); + } + return NULL; +} +#endif + +/* Create (if necessary), find, lock, fill in, and return our globals. + Return NULL on error, in which case the globals will not be locked. + The caller should call keymgr_set_and_unlock. */ +static struct keymgr_atexit_list * +get_globals (void) +{ + struct keymgr_atexit_list * r; + +#ifdef __ppc__ + /* 10.3.9 doesn't have _keymgr_get_and_lock_processwide_ptr_2 so the + PPC side can't use it. On 10.4 this just means the error gets + reported a little later when + _keymgr_set_and_unlock_processwide_ptr finds that the key was + never locked. */ + r = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST); +#else + void * rr; + if (_keymgr_get_and_lock_processwide_ptr_2 (KEYMGR_ATEXIT_LIST, &rr)) + return NULL; + r = rr; +#endif + + if (r == NULL) + { + r = calloc (sizeof (struct keymgr_atexit_list), 1); + if (! r) + return NULL; + } + + if (r->atexit_status == atexit_status_unknown) + { + void *handle; + + handle = dlopen ("/usr/lib/libSystem.B.dylib", RTLD_NOLOAD); + if (!handle) + { +#ifdef __ppc__ + r->atexit_status = atexit_status_missing; + r->atexit_f = find_atexit_10_3 (); + if (! r->atexit_f) + goto error; + if (r->atexit_f (our_atexit)) + goto error; +#else + goto error; +#endif + } + else + { + int chk_result; + + r->cxa_atexit_f = (cxa_atexit_p)dlsym (handle, "__cxa_atexit"); + r->cxa_finalize_f = (cxa_finalize_p)dlsym (handle, "__cxa_finalize"); + if (! r->cxa_atexit_f || ! r->cxa_finalize_f) + goto error; + + chk_result = check_cxa_atexit (r->cxa_atexit_f, r->cxa_finalize_f); + if (chk_result == -1) + goto error; + else if (chk_result == 0) + r->atexit_status = atexit_status_broken; + else + { + r->atexit_f = (atexit_p)dlsym (handle, "atexit"); + if (! r->atexit_f) + goto error; + r->atexit_status = atexit_status_working; + } + } + } + + return r; + + error: + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, r); + return NULL; +} + +/* Add TO_ADD to ATEXIT_LIST. ATEXIT_LIST may be NULL but is + always the result of calling _keymgr_get_and_lock_processwide_ptr and + so KEYMGR_ATEXIT_LIST is known to be locked; this routine is responsible + for unlocking it. */ + +static int +add_routine (struct keymgr_atexit_list * g, + const struct one_atexit_routine * to_add) +{ + struct atexit_routine_list * s + = malloc (sizeof (struct atexit_routine_list)); + int result; + + if (!s) + { + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); + return -1; + } + s->r = *to_add; + s->next = g->l; + g->l = s; + result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); + return CHECK_KEYMGR_ERROR (result) == 0 ? 0 : -1; +} + +/* This runs the routines in G->L up to STOP. */ +static struct keymgr_atexit_list * +run_routines (struct keymgr_atexit_list *g, + struct atexit_routine_list *stop) +{ + for (;;) + { + struct atexit_routine_list * cur = g->l; + if (! cur || cur == stop) + break; + g->l = cur->next; + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); + + switch (cur->r.has_arg) { + case 0: case 2: case 4: + cur->r.callback.ac (); + break; + case 1: case 3: case 5: + cur->r.callback.cac (cur->r.arg); + break; + default: + /* Don't understand, so don't call it. */ + break; + } + free (cur); + + g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST); + if (! g) + break; + } + return g; +} + +/* Call the routine described by ROUTINE_PARAM and then call any + routines added to KEYMGR_ATEXIT_LIST while that routine was + running, all with in_cxa_finalize set. */ + +static void +cxa_atexit_wrapper (void* routine_param) +{ + struct one_atexit_routine * routine = routine_param; + struct keymgr_atexit_list *g; + struct atexit_routine_list * base = NULL; + char prev_running = 0; + + g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST); + if (g) + { + prev_running = g->running_routines; + g->running_routines = 1; + base = g->l; + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); + } + + if (routine->has_arg) + routine->callback.cac (routine->arg); + else + routine->callback.ac (); + + if (g) + g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST); + if (g) + g = run_routines (g, base); + if (g) + { + g->running_routines = prev_running; + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); + } +} + +#ifdef __ppc__ +/* This code is used while running on 10.3.9, when __cxa_atexit doesn't + exist in the system library. 10.3.9 only supported regular PowerPC, + so this code isn't necessary on x86 or ppc64. */ + +/* This routine is called from the system atexit(); it runs everything + registered on the KEYMGR_ATEXIT_LIST. */ + +static void +our_atexit (void) +{ + struct keymgr_atexit_list *g; + char prev_running; + + g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST); + if (! g || g->version != 0 || g->atexit_status != atexit_status_missing) + return; + + prev_running = g->running_routines; + g->running_routines = 1; + g = run_routines (g, NULL); + if (! g) + return; + g->running_routines = prev_running; + _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); +} +#endif + +/* This is our wrapper around atexit and __cxa_atexit. It will return + nonzero if an error occurs, and otherwise: + - if in_cxa_finalize is set, or running on 10.3.9, add R to + KEYMGR_ATEXIT_LIST; or + - call the system __cxa_atexit to add cxa_atexit_wrapper with an argument + that indicates how cxa_atexit_wrapper should call R. */ + +static int +atexit_common (const struct one_atexit_routine *r, const void *dso) +{ + struct keymgr_atexit_list *g = get_globals (); + + if (! g) + return -1; + + if (g->running_routines || g->atexit_status == atexit_status_missing) + return add_routine (g, r); + + if (g->atexit_status >= atexit_status_working) + { + int result; + if (r->has_arg) + { + cxa_atexit_p cxa_atexit = g->cxa_atexit_f; + result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, + g); + if (CHECK_KEYMGR_ERROR (result)) + return -1; + return cxa_atexit (r->callback.cac, r->arg, dso); + } + else + { + atexit_p atexit_f = g->atexit_f; + result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, + g); + if (CHECK_KEYMGR_ERROR (result)) + return -1; + return atexit_f (r->callback.ac); + } + } + else + { + cxa_atexit_p cxa_atexit = g->cxa_atexit_f; + struct one_atexit_routine *alloced; + int result; + + result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g); + if (CHECK_KEYMGR_ERROR (result)) + return -1; + + alloced = malloc (sizeof (struct one_atexit_routine)); + if (! alloced) + return -1; + *alloced = *r; + return cxa_atexit (cxa_atexit_wrapper, alloced, dso); + } +} + +/* These are the actual replacement routines; they just funnel into + atexit_common. */ + +int __cxa_atexit (cxa_atexit_callback func, void* arg, + const void* dso) __attribute__((visibility("hidden"))); + +int +__cxa_atexit (cxa_atexit_callback func, void* arg, const void* dso) +{ + struct one_atexit_routine r; + r.callback.cac = func; + r.has_arg = 1; + r.arg = arg; + return atexit_common (&r, dso); +} + +int atexit (atexit_callback func) __attribute__((visibility("hidden"))); + +/* Use __dso_handle to allow even bundles that call atexit() to be unloaded + on 10.4. */ +extern void __dso_handle; + +int +atexit (atexit_callback func) +{ + struct one_atexit_routine r; + r.callback.ac = func; + r.has_arg = 0; + return atexit_common (&r, &__dso_handle); +} + +#endif /* __PIC__ */ diff --git a/gcc/config/darwin-driver.c b/gcc/config/darwin-driver.c new file mode 100644 index 000000000..1eb920106 --- /dev/null +++ b/gcc/config/darwin-driver.c @@ -0,0 +1,189 @@ +/* Additional functions for the GCC driver on Darwin native. + Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "gcc.h" +#include "opts.h" + +#ifndef CROSS_DIRECTORY_STRUCTURE +#include +#include "xregex.h" + +/* When running on a Darwin system and using that system's headers and + libraries, default the -mmacosx-version-min flag to be the version + of the system on which the compiler is running. */ + +static void +darwin_default_min_version (unsigned int *decoded_options_count, + struct cl_decoded_option **decoded_options) +{ + const unsigned int argc = *decoded_options_count; + struct cl_decoded_option *const argv = *decoded_options; + unsigned int i; + char osversion[32]; + size_t osversion_len = sizeof (osversion) - 1; + static int osversion_name[2] = { CTL_KERN, KERN_OSRELEASE }; + char * version_p; + char * version_pend; + int major_vers; + char minor_vers[6]; + static char new_flag[sizeof ("10.0.0") + 6]; + + /* If the command-line is empty, just return. */ + if (argc <= 1) + return; + + /* Don't do this if the user specified -mmacosx-version-min= or + -mno-macosx-version-min. */ + for (i = 1; i < argc; i++) + if (argv[i].opt_index == OPT_mmacosx_version_min_) + return; + + /* Retrieve the deployment target from the environment and insert + it as a flag. */ + { + const char * macosx_deployment_target; + macosx_deployment_target = getenv ("MACOSX_DEPLOYMENT_TARGET"); + if (macosx_deployment_target + /* Apparently, an empty string for MACOSX_DEPLOYMENT_TARGET means + "use the default". Or, possibly "use 10.1". We choose + to ignore the environment variable, as if it was never set. */ + && macosx_deployment_target[0]) + { + ++*decoded_options_count; + *decoded_options = XNEWVEC (struct cl_decoded_option, + *decoded_options_count); + (*decoded_options)[0] = argv[0]; + generate_option (OPT_mmacosx_version_min_, macosx_deployment_target, + 1, CL_DRIVER, &(*decoded_options)[1]); + memcpy (*decoded_options + 2, argv + 1, + (argc - 1) * sizeof (struct cl_decoded_option)); + return; + } + } + + /* Determine the version of the running OS. If we can't, warn user, + and do nothing. */ + if (sysctl (osversion_name, ARRAY_SIZE (osversion_name), osversion, + &osversion_len, NULL, 0) == -1) + { + warning (0, "sysctl for kern.osversion failed: %m"); + return; + } + + /* Try to parse the first two parts of the OS version number. Warn + user and return if it doesn't make sense. */ + if (! ISDIGIT (osversion[0])) + goto parse_failed; + major_vers = osversion[0] - '0'; + version_p = osversion + 1; + if (ISDIGIT (*version_p)) + major_vers = major_vers * 10 + (*version_p++ - '0'); + if (major_vers > 4 + 9) + goto parse_failed; + if (*version_p++ != '.') + goto parse_failed; + version_pend = strchr(version_p, '.'); + if (!version_pend) + goto parse_failed; + if (! ISDIGIT (*version_p)) + goto parse_failed; + strncpy(minor_vers, version_p, version_pend - version_p); + minor_vers[version_pend - version_p] = '\0'; + + /* The major kernel version number is 4 plus the second OS version + component. */ + if (major_vers - 4 <= 4) + /* On 10.4 and earlier, the old linker is used which does not + support three-component system versions. */ + sprintf (new_flag, "10.%d", major_vers - 4); + else + sprintf (new_flag, "10.%d.%s", major_vers - 4, + minor_vers); + + /* Add the new flag. */ + ++*decoded_options_count; + *decoded_options = XNEWVEC (struct cl_decoded_option, + *decoded_options_count); + (*decoded_options)[0] = argv[0]; + generate_option (OPT_mmacosx_version_min_, new_flag, + 1, CL_DRIVER, &(*decoded_options)[1]); + memcpy (*decoded_options + 2, argv + 1, + (argc - 1) * sizeof (struct cl_decoded_option)); + return; + + parse_failed: + warning (0, "couldn%'t understand kern.osversion %q.*s", + (int) osversion_len, osversion); + return; +} + +#endif /* CROSS_DIRECTORY_STRUCTURE */ + +/* Translate -filelist and -framework options in *DECODED_OPTIONS + (size *DECODED_OPTIONS_COUNT) to use -Xlinker so that they are + considered to be linker inputs in the case that no other inputs are + specified. Handling these options in DRIVER_SELF_SPECS does not + suffice because specs are too late to add linker inputs, and + handling them in LINK_SPEC does not suffice because the linker will + not be called if there are no other inputs. When native, also + default the -mmacosx-version-min flag. */ + +void +darwin_driver_init (unsigned int *decoded_options_count, + struct cl_decoded_option **decoded_options) +{ + unsigned int i; + + for (i = 1; i < *decoded_options_count; i++) + { + if ((*decoded_options)[i].errors & CL_ERR_MISSING_ARG) + continue; + switch ((*decoded_options)[i].opt_index) + { + case OPT_filelist: + case OPT_framework: + ++*decoded_options_count; + *decoded_options = XRESIZEVEC (struct cl_decoded_option, + *decoded_options, + *decoded_options_count); + memmove (*decoded_options + i + 2, + *decoded_options + i + 1, + ((*decoded_options_count - i - 2) + * sizeof (struct cl_decoded_option))); + generate_option (OPT_Xlinker, (*decoded_options)[i].arg, 1, + CL_DRIVER, &(*decoded_options)[i + 1]); + generate_option (OPT_Xlinker, + (*decoded_options)[i].canonical_option[0], 1, + CL_DRIVER, &(*decoded_options)[i]); + break; + + default: + break; + } + } + +#ifndef CROSS_DIRECTORY_STRUCTURE + darwin_default_min_version (decoded_options_count, decoded_options); +#endif +} diff --git a/gcc/config/darwin-f.c b/gcc/config/darwin-f.c new file mode 100644 index 000000000..24ed674d7 --- /dev/null +++ b/gcc/config/darwin-f.c @@ -0,0 +1,60 @@ +/* Darwin support needed only by Fortran frontends. + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by Daniel Franke. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* Provide stubs for the hooks defined by darwin.h + TARGET_EXTRA_PRE_INCLUDES, TARGET_EXTRA_INCLUDES + + As both, gcc and gfortran link in incpath.o, we can not + conditionally undefine said hooks if fortran is build. + However, we can define do-nothing stubs of said hooks as + we are not interested in objc include files in Fortran. + + The hooks original purpose (see also darwin-c.c): + * darwin_register_objc_includes + Register the GNU objective-C runtime include path if STDINC. + + * darwin_register_frameworks + Register all the system framework paths if STDINC is true and setup + the missing_header callback for subframework searching if any + frameworks had been registered. */ + + +#include "ansidecl.h" + +/* Prototypes for functions below to avoid a lengthy list of includes + to achieve the same. */ +void darwin_register_objc_includes (const char *, const char *, int); +void darwin_register_frameworks (const char *, const char *, int); + + +void +darwin_register_objc_includes (const char *sysroot ATTRIBUTE_UNUSED, + const char *iprefix ATTRIBUTE_UNUSED, + int stdinc ATTRIBUTE_UNUSED) +{ +} + +void +darwin_register_frameworks (const char *sysroot ATTRIBUTE_UNUSED, + const char *iprefix ATTRIBUTE_UNUSED, + int stdinc ATTRIBUTE_UNUSED) +{ +} diff --git a/gcc/config/darwin-ppc-ldouble-patch.def b/gcc/config/darwin-ppc-ldouble-patch.def new file mode 100644 index 000000000..ba5751e41 --- /dev/null +++ b/gcc/config/darwin-ppc-ldouble-patch.def @@ -0,0 +1,113 @@ +/* Copyright (C) 2008 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +PATCH_BUILTIN (BUILT_IN_ACOSHL) +PATCH_BUILTIN (BUILT_IN_ACOSL) +PATCH_BUILTIN (BUILT_IN_ASINHL) +PATCH_BUILTIN (BUILT_IN_ASINL) +PATCH_BUILTIN (BUILT_IN_ATAN2L) +PATCH_BUILTIN (BUILT_IN_ATANHL) +PATCH_BUILTIN (BUILT_IN_ATANL) +PATCH_BUILTIN (BUILT_IN_CABSL) +PATCH_BUILTIN (BUILT_IN_CACOSHL) +PATCH_BUILTIN (BUILT_IN_CACOSL) +PATCH_BUILTIN (BUILT_IN_CARGL) +PATCH_BUILTIN (BUILT_IN_CASINHL) +PATCH_BUILTIN (BUILT_IN_CASINL) +PATCH_BUILTIN (BUILT_IN_CATANHL) +PATCH_BUILTIN (BUILT_IN_CATANL) +PATCH_BUILTIN (BUILT_IN_CBRTL) +PATCH_BUILTIN (BUILT_IN_CCOSHL) +PATCH_BUILTIN (BUILT_IN_CCOSL) +PATCH_BUILTIN (BUILT_IN_CEILL) +PATCH_BUILTIN (BUILT_IN_CEXPL) +PATCH_BUILTIN (BUILT_IN_CIMAGL) +PATCH_BUILTIN (BUILT_IN_CLOGL) +PATCH_BUILTIN (BUILT_IN_CONJL) +PATCH_BUILTIN (BUILT_IN_COPYSIGNL) +PATCH_BUILTIN (BUILT_IN_COSHL) +PATCH_BUILTIN (BUILT_IN_COSL) +PATCH_BUILTIN (BUILT_IN_CPOWL) +PATCH_BUILTIN (BUILT_IN_CPROJL) +PATCH_BUILTIN (BUILT_IN_CREALL) +PATCH_BUILTIN (BUILT_IN_CSINHL) +PATCH_BUILTIN (BUILT_IN_CSINL) +PATCH_BUILTIN (BUILT_IN_CSQRTL) +PATCH_BUILTIN (BUILT_IN_CTANHL) +PATCH_BUILTIN (BUILT_IN_CTANL) +PATCH_BUILTIN (BUILT_IN_ERFCL) +PATCH_BUILTIN (BUILT_IN_ERFL) +PATCH_BUILTIN (BUILT_IN_EXP2L) +PATCH_BUILTIN (BUILT_IN_EXPL) +PATCH_BUILTIN (BUILT_IN_EXPM1L) +PATCH_BUILTIN (BUILT_IN_FABSL) +PATCH_BUILTIN (BUILT_IN_FDIML) +PATCH_BUILTIN (BUILT_IN_FLOORL) +PATCH_BUILTIN (BUILT_IN_FMAL) +PATCH_BUILTIN (BUILT_IN_FMAXL) +PATCH_BUILTIN (BUILT_IN_FMINL) +PATCH_BUILTIN (BUILT_IN_FMODL) +PATCH_BUILTIN (BUILT_IN_FREXPL) +PATCH_BUILTIN (BUILT_IN_HYPOTL) +PATCH_BUILTIN (BUILT_IN_ILOGBL) +PATCH_BUILTIN (BUILT_IN_LDEXPL) +PATCH_BUILTIN (BUILT_IN_LGAMMAL) +PATCH_BUILTIN (BUILT_IN_LLRINTL) +PATCH_BUILTIN (BUILT_IN_LLROUNDL) +PATCH_BUILTIN (BUILT_IN_LOG10L) +PATCH_BUILTIN (BUILT_IN_LOG1PL) +PATCH_BUILTIN (BUILT_IN_LOG2L) +PATCH_BUILTIN (BUILT_IN_LOGBL) +PATCH_BUILTIN (BUILT_IN_LOGL) +PATCH_BUILTIN (BUILT_IN_LRINTL) +PATCH_BUILTIN (BUILT_IN_LROUNDL) +PATCH_BUILTIN (BUILT_IN_MODFL) +PATCH_BUILTIN (BUILT_IN_NANL) +PATCH_BUILTIN (BUILT_IN_NEARBYINTL) +PATCH_BUILTIN (BUILT_IN_NEXTAFTERL) +PATCH_BUILTIN (BUILT_IN_NEXTTOWARDL) +PATCH_BUILTIN (BUILT_IN_POWL) +PATCH_BUILTIN (BUILT_IN_REMAINDERL) +PATCH_BUILTIN (BUILT_IN_REMQUOL) +PATCH_BUILTIN (BUILT_IN_RINTL) +PATCH_BUILTIN (BUILT_IN_ROUNDL) +PATCH_BUILTIN (BUILT_IN_SCALBLNL) +PATCH_BUILTIN (BUILT_IN_SCALBNL) +PATCH_BUILTIN (BUILT_IN_SINHL) +PATCH_BUILTIN (BUILT_IN_SINL) +PATCH_BUILTIN (BUILT_IN_SQRTL) +PATCH_BUILTIN (BUILT_IN_TANHL) +PATCH_BUILTIN (BUILT_IN_TANL) +PATCH_BUILTIN (BUILT_IN_TGAMMAL) +PATCH_BUILTIN (BUILT_IN_TRUNCL) + +PATCH_BUILTIN_NO64 (BUILT_IN_VFPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VFSCANF) +PATCH_BUILTIN_NO64 (BUILT_IN_VPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSCANF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSNPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSSCANF) + +PATCH_BUILTIN_VARIADIC (BUILT_IN_FPRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_FSCANF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_PRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SCANF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SNPRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SPRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SSCANF) diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h new file mode 100644 index 000000000..4a9961119 --- /dev/null +++ b/gcc/config/darwin-protos.h @@ -0,0 +1,127 @@ +/* Prototypes. + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern void darwin_init_sections (void); +extern int name_needs_quotes (const char *); + +extern void machopic_validate_stub_or_non_lazy_ptr (const char *); + +extern void machopic_output_function_base_name (FILE *); +extern const char *machopic_indirection_name (rtx, bool); +extern const char *machopic_mcount_stub_name (void); + +#ifdef RTX_CODE + +extern rtx machopic_gen_offset (rtx); +extern int machopic_operand_p (rtx); +extern int machopic_symbol_defined_p (rtx sym_ref); +extern enum machopic_addr_class machopic_classify_symbol (rtx); + +extern rtx machopic_indirect_data_reference (rtx, rtx); +extern rtx machopic_indirect_call_target (rtx); +extern rtx machopic_legitimize_pic_address (rtx, enum machine_mode, rtx); + +extern void machopic_asm_out_constructor (rtx, int); +extern void machopic_asm_out_destructor (rtx, int); +#endif /* RTX_CODE */ + +#ifdef TREE_CODE + +extern void machopic_define_symbol (rtx); +extern void darwin_encode_section_info (tree, rtx, int); +extern void darwin_set_default_type_attributes (tree); + +#endif /* TREE_CODE */ + +extern void machopic_finish (FILE *); + +extern int machopic_reloc_rw_mask (void); +extern section *machopic_select_section (tree, int, unsigned HOST_WIDE_INT); +extern section *machopic_select_rtx_section (enum machine_mode, rtx, + unsigned HOST_WIDE_INT); + +extern section *darwin_function_section (tree, enum node_frequency, bool, bool); +extern void darwin_function_switched_text_sections (FILE *, tree, bool); + +extern void darwin_unique_section (tree decl, int reloc); +extern void darwin_asm_named_section (const char *, unsigned int, tree); +extern void darwin_non_lazy_pcrel (FILE *, rtx); + +extern void darwin_emit_unwind_label (FILE *, tree, int, int); +extern void darwin_emit_except_table_label (FILE *); + +extern void darwin_pragma_ignore (struct cpp_reader *); +extern void darwin_pragma_options (struct cpp_reader *); +extern void darwin_pragma_unused (struct cpp_reader *); +extern void darwin_pragma_ms_struct (struct cpp_reader *); + +extern void darwin_file_start (void); +extern void darwin_file_end (void); + +extern void darwin_asm_lto_start (void); +extern void darwin_asm_lto_end (void); + +extern void darwin_mark_decl_preserved (const char *); + +extern tree darwin_handle_kext_attribute (tree *, tree, tree, int, bool *); +extern tree darwin_handle_weak_import_attribute (tree *node, tree name, + tree args, int flags, + bool * no_add_attrs); +extern void machopic_output_stub (FILE *, const char *, const char *); +extern void darwin_globalize_label (FILE *, const char *); +extern void darwin_assemble_visibility (tree, int); + +extern void darwin_asm_output_dwarf_delta (FILE *, int, const char *, + const char *); +extern void darwin_asm_output_dwarf_offset (FILE *, int, const char *, + section *); + +extern void darwin_asm_declare_object_name (FILE *, const char *, tree); +extern void darwin_asm_declare_constant_name (FILE *, const char *, + const_tree, HOST_WIDE_INT); + +extern void darwin_output_aligned_bss (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, unsigned int); + +extern void darwin_asm_output_aligned_decl_local (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern void darwin_asm_output_aligned_decl_common (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); + +extern bool darwin_binds_local_p (const_tree); +extern void darwin_cpp_builtins (struct cpp_reader *); + +extern tree darwin_init_cfstring_builtins (unsigned); +extern tree darwin_fold_builtin (tree, int, tree *, bool); +extern tree darwin_objc_construct_string (tree); +extern bool darwin_cfstring_p (tree); +extern bool darwin_cfstring_ref_p (const_tree); +extern void darwin_check_cfstring_format_arg (tree, tree); +extern tree darwin_build_constant_cfstring (tree); +extern void darwin_enter_string_into_cfstring_table (tree); + +extern void darwin_asm_output_anchor (rtx symbol); +extern bool darwin_use_anchors_for_symbol_p (const_rtx symbol); +extern bool darwin_kextabi_p (void); +extern void darwin_override_options (void); +extern void darwin_patch_builtins (void); +extern void darwin_rename_builtins (void); diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def new file mode 100644 index 000000000..61b6f69b1 --- /dev/null +++ b/gcc/config/darwin-sections.def @@ -0,0 +1,195 @@ +/* Copyright (C) 2005, 2006, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Since Darwin's ld will not allow zero-sized objects, and gcc wants them, + we emit one byte (in darwin.c) when such an object is encountered. + + This messes up section anchoring because the emitted byte is not counted + outside the port. To cope with this, we set aside sections for zero-sized + objects and disallow those sections from participating in section anchors + ("zobj_" sections, below). + + Items that might be coalesced by the linker are prevented from participating, + (and those in mergeable sections are disallowed in varasm.c). */ + +/* The .text section is generated in varasm.c */ +DEF_SECTION (text_coal_section, SECTION_CODE|SECTION_NO_ANCHOR, + ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", 0) + +DEF_SECTION (text_hot_section, SECTION_CODE, + ".section __TEXT,__text_hot,regular,pure_instructions", 0) +DEF_SECTION (text_cold_section, SECTION_CODE, + ".section __TEXT,__text_cold,regular,pure_instructions", 0) +DEF_SECTION (text_startup_section, SECTION_CODE, + ".section __TEXT,__text_startup,regular,pure_instructions", 0) +DEF_SECTION (text_exit_section, SECTION_CODE, + ".section __TEXT,__text_exit,regular,pure_instructions", 0) + +DEF_SECTION (text_hot_coal_section, SECTION_CODE, + ".section __TEXT,__text_hot_coal,coalesced,pure_instructions", 0) +DEF_SECTION (text_cold_coal_section, SECTION_CODE, + ".section __TEXT,__text_cold_coal,coalesced,pure_instructions", 0) +DEF_SECTION (text_startup_coal_section, SECTION_CODE, + ".section __TEXT,__text_stt_coal,coalesced,pure_instructions", 0) +DEF_SECTION (text_exit_coal_section, SECTION_CODE, + ".section __TEXT,__text_exit_coal,coalesced,pure_instructions", 0) + +/* const */ +DEF_SECTION (const_section, 0, ".const", 0) +DEF_SECTION (const_coal_section, SECTION_NO_ANCHOR, + ".section __TEXT,__const_coal,coalesced", 0) +/* Place to put zero-sized to avoid issues with section anchors. */ +DEF_SECTION (zobj_const_section, SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_const", 0) + +/* Write-able data. '.data' handled in varasm.c */ +DEF_SECTION (static_data_section, SECTION_WRITE, ".static_data", 0) +DEF_SECTION (data_coal_section, SECTION_WRITE|SECTION_NO_ANCHOR, + ".section __DATA,__datacoal_nt,coalesced", 0) +/* Place to put zero-sized to avoid issues with section anchors. */ +DEF_SECTION (zobj_data_section, SECTION_WRITE|SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_data", 0) + +/* BSS - .lcomm / .zerofill __DATA,__bss sections cannot be switched to + explicitly (will create an assembler error). */ +DEF_SECTION (zobj_bss_section, SECTION_WRITE|SECTION_BSS|SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_bss", 0) + +/* const data */ +DEF_SECTION (const_data_section, 0, ".const_data", 0) +DEF_SECTION (const_data_coal_section, SECTION_NO_ANCHOR, + ".section __DATA,__const_coal,coalesced", 0) +/* Place to put zero-sized to avoid issues with section anchors. */ +DEF_SECTION (zobj_const_data_section, SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_const_data", 0) + +/* Strings and other literals. */ +DEF_SECTION (cstring_section, SECTION_MERGE | SECTION_STRINGS, ".cstring", 0) +DEF_SECTION (literal4_section, SECTION_MERGE, ".literal4", 0) +DEF_SECTION (literal8_section, SECTION_MERGE, ".literal8", 0) +DEF_SECTION (literal16_section, SECTION_MERGE, ".literal16", 0) +/* Unlike constant NSStrings, constant CFStrings do not live in the + __OBJC segment since they may also occur in pure C or C++ programs. */ +DEF_SECTION (cfstring_constant_object_section, 0, + ".section __DATA, __cfstring", 0) + +/* Module init, term, constructors & destructors. */ +DEF_SECTION (mod_init_section, 0, ".mod_init_func", 0) +DEF_SECTION (mod_term_section, 0, ".mod_term_func", 0) +DEF_SECTION (constructor_section, 0, ".constructor", 0) +DEF_SECTION (destructor_section, 0, ".destructor", 0) + +/* Objective-C ABI=0 (Original version) sections. */ +DEF_SECTION (objc_class_section, 0, ".objc_class", 1) +DEF_SECTION (objc_meta_class_section, 0, ".objc_meta_class", 1) +DEF_SECTION (objc_category_section, 0, ".objc_category", 1) +DEF_SECTION (objc_class_vars_section, 0, ".objc_class_vars", 1) +DEF_SECTION (objc_instance_vars_section, 0, ".objc_instance_vars", 1) +DEF_SECTION (objc_cls_meth_section, 0, ".objc_cls_meth", 1) +DEF_SECTION (objc_inst_meth_section, 0, ".objc_inst_meth", 1) +DEF_SECTION (objc_cat_cls_meth_section, 0, ".objc_cat_cls_meth", 1) +DEF_SECTION (objc_cat_inst_meth_section, 0, ".objc_cat_inst_meth", 1) +DEF_SECTION (objc_selector_refs_section, SECTION_MERGE, ".objc_message_refs", 1) +DEF_SECTION (objc_selector_fixup_section, 0, + ".section __OBJC, __sel_fixup, regular, no_dead_strip", 1) +DEF_SECTION (objc_symbols_section, 0, ".objc_symbols", 1) +DEF_SECTION (objc_module_info_section, 0, ".objc_module_info", 1) +DEF_SECTION (objc_protocol_section, 0, ".objc_protocol", 1) +DEF_SECTION (objc_string_object_section, 0, ".objc_string_object", 1) +DEF_SECTION (objc_constant_string_object_section, 0, + ".section __OBJC, __cstring_object, regular, no_dead_strip", 0) + +/* Fix-and-Continue image marker. */ +DEF_SECTION (objc_image_info_section, 0, + ".section __OBJC, __image_info, regular, no_dead_strip", 1) +DEF_SECTION (objc_class_names_section, 0, ".objc_class_names", 1) +DEF_SECTION (objc_meth_var_names_section, 0, ".objc_meth_var_names", 1) +DEF_SECTION (objc_meth_var_types_section, 0, ".objc_meth_var_types", 1) +DEF_SECTION (objc_cls_refs_section, SECTION_MERGE, ".objc_cls_refs", 1) + +/* Stubs and symbol indirection sections. */ +/* lazy symbol pointers. */ +DEF_SECTION (machopic_lazy_symbol_ptr_section, SECTION_NO_ANCHOR, + ".lazy_symbol_pointer", 0) +DEF_SECTION (machopic_lazy_symbol_ptr2_section, SECTION_NO_ANCHOR, + ".section __DATA, __la_sym_ptr2,lazy_symbol_pointers", 0) +DEF_SECTION (machopic_lazy_symbol_ptr3_section, SECTION_NO_ANCHOR, + ".section __DATA, __la_sym_ptr3,lazy_symbol_pointers", 0) +/* non-lazy symbol pointers. */ +DEF_SECTION (machopic_nl_symbol_ptr_section, SECTION_NO_ANCHOR, + MACHOPIC_NL_SYMBOL_PTR_SECTION, 0) +/* Symbol stubs. */ +DEF_SECTION (machopic_symbol_stub_section, SECTION_NO_ANCHOR, + ".symbol_stub", 0) +DEF_SECTION (machopic_symbol_stub1_section, SECTION_NO_ANCHOR, + ".section __TEXT,__symbol_stub1,symbol_stubs," + "pure_instructions,16", 0) +/* PIC symbol stubs. */ +DEF_SECTION (machopic_picsymbol_stub_section, SECTION_NO_ANCHOR, + ".picsymbol_stub", 0) +DEF_SECTION (machopic_picsymbol_stub1_section, SECTION_NO_ANCHOR, + ".section __TEXT,__picsymbolstub1,symbol_stubs," + "pure_instructions,32", 0) +DEF_SECTION (machopic_picsymbol_stub2_section, SECTION_NO_ANCHOR, + ".section __TEXT,__picsymbolstub2,symbol_stubs,pure_instructions,25", 0) +DEF_SECTION (machopic_picsymbol_stub3_section, SECTION_NO_ANCHOR, + ".section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5", 0) + +/* Exception-related. */ +DEF_SECTION (darwin_exception_section, SECTION_NO_ANCHOR, + ".section __DATA,__gcc_except_tab", 0) +DEF_SECTION (darwin_eh_frame_section, SECTION_NO_ANCHOR, + ".section " EH_FRAME_SECTION_NAME ",__eh_frame" + EH_FRAME_SECTION_ATTR, 0) + +/* Sections for ObjC ABI=1 (ObjC 'V1' extensions) */ +DEF_SECTION (objc1_class_ext_section, 0, + ".section __OBJC, __class_ext, regular, no_dead_strip", 1) +DEF_SECTION (objc1_prop_list_section, 0, + ".section __OBJC, __property, regular, no_dead_strip", 1) +DEF_SECTION (objc1_protocol_ext_section, 0, + ".section __OBJC, __protocol_ext, regular, no_dead_strip", 1) + +/* Sections for ObjC ABI=2 (m64). */ +DEF_SECTION (objc2_message_refs_section, 0, + ".section __DATA, __objc_msgrefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_classdefs_section, 0, ".section __DATA, __objc_data", 1) +DEF_SECTION (objc2_metadata_section, 0, ".section __DATA, __objc_const", 1) + +DEF_SECTION (objc2_classrefs_section, 0, + ".section __DATA, __objc_classrefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_classlist_section, 0, + ".section __DATA, __objc_classlist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_categorylist_section, 0, + ".section __DATA, __objc_catlist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_selector_refs_section, 0, + ".section __DATA, __objc_selrefs, literal_pointers, no_dead_strip", 1) +DEF_SECTION (objc2_nonlazy_class_section, 0, + ".section __DATA, __objc_nlclslist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_nonlazy_category_section, 0, + ".section __DATA, __objc_nlcatlist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_protocollist_section, 0, + ".section __DATA, __objc_protolist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_protocolrefs_section, 0, + ".section __DATA, __objc_protorefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_super_classrefs_section, 0, + ".section __DATA, __objc_superrefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_image_info_section, 0, + ".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1) +DEF_SECTION (objc2_constant_string_object_section, 0, + ".section __DATA, __objc_stringobj, regular, no_dead_strip", 1) diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c new file mode 100644 index 000000000..3b065e5b9 --- /dev/null +++ b/gcc/config/darwin.c @@ -0,0 +1,3472 @@ +/* Functions for generic Darwin as target machine for GNU C compiler. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "expr.h" +#include "reload.h" +#include "function.h" +#include "ggc.h" +#include "langhooks.h" +#include "target.h" +#include "tm_p.h" +#include "diagnostic-core.h" +#include "toplev.h" +#include "hashtab.h" +#include "df.h" +#include "debug.h" +#include "obstack.h" +#include "lto-streamer.h" + +/* Darwin supports a feature called fix-and-continue, which is used + for rapid turn around debugging. When code is compiled with the + -mfix-and-continue flag, two changes are made to the generated code + that allow the system to do things that it would normally not be + able to do easily. These changes allow gdb to load in + recompilation of a translation unit that has been changed into a + running program and replace existing functions and methods of that + translation unit with versions of those functions and methods + from the newly compiled translation unit. The new functions access + the existing static symbols from the old translation unit, if the + symbol existed in the unit to be replaced, and from the new + translation unit, otherwise. + + The changes are to insert 5 nops at the beginning of all functions + and to use indirection to get at static symbols. The 5 nops + are required by consumers of the generated code. Currently, gdb + uses this to patch in a jump to the overriding function, this + allows all uses of the old name to forward to the replacement, + including existing function pointers and virtual methods. See + rs6000_emit_prologue for the code that handles the nop insertions. + + The added indirection allows gdb to redirect accesses to static + symbols from the newly loaded translation unit to the existing + symbol, if any. @code{static} symbols are special and are handled by + setting the second word in the .non_lazy_symbol_pointer data + structure to symbol. See indirect_data for the code that handles + the extra indirection, and machopic_output_indirection and its use + of MACHO_SYMBOL_STATIC for the code that handles @code{static} + symbol indirection. */ + +/* For darwin >= 9 (OSX 10.5) the linker is capable of making the necessary + branch islands and we no longer need to emit darwin stubs. + However, if we are generating code for earlier systems (or for use in the + kernel) the stubs might still be required, and this will be set true. */ +int darwin_emit_branch_islands = false; + +/* A flag to determine whether we are running c++ or obj-c++. This has to be + settable from non-c-family contexts too (i.e. we can't use the c_dialect_ + functions). */ +int darwin_running_cxx; + +/* Some code-gen now depends on OS major version numbers (at least). */ +int generating_for_darwin_version ; + +/* Section names. */ +section * darwin_sections[NUM_DARWIN_SECTIONS]; + +/* While we transition to using in-tests instead of ifdef'd code. */ +#ifndef HAVE_lo_sum +#define HAVE_lo_sum 0 +#define gen_macho_high(a,b) (a) +#define gen_macho_low(a,b,c) (a) +#endif + +/* True if we're setting __attribute__ ((ms_struct)). */ +int darwin_ms_struct = false; + +/* Earlier versions of Darwin as do not recognize an alignment field in + .comm directives, this should be set for versions that allow it. */ +int emit_aligned_common = false; + +/* A get_unnamed_section callback used to switch to an ObjC section. + DIRECTIVE is as for output_section_asm_op. */ + +static void +output_objc_section_asm_op (const void *directive) +{ + static bool been_here = false; + + /* The NeXT ObjC Runtime requires these sections to be present and in + order in the object. The code below implements this by emitting + a section header for each ObjC section the first time that an ObjC + section is requested. */ + if (! been_here) + { + section *saved_in_section = in_section; + static const enum darwin_section_enum tomark[] = + { + /* written, cold -> hot */ + objc_cat_cls_meth_section, + objc_cat_inst_meth_section, + objc_string_object_section, + objc_constant_string_object_section, + objc_selector_refs_section, + objc_selector_fixup_section, + objc_cls_refs_section, + objc_class_section, + objc_meta_class_section, + /* shared, hot -> cold */ + objc_cls_meth_section, + objc_inst_meth_section, + objc_protocol_section, + objc_class_names_section, + objc_meth_var_types_section, + objc_meth_var_names_section, + objc_category_section, + objc_class_vars_section, + objc_instance_vars_section, + objc_module_info_section, + objc_symbols_section, + }; + /* ABI=1 */ + static const enum darwin_section_enum tomarkv1[] = + { + objc1_protocol_ext_section, + objc1_class_ext_section, + objc1_prop_list_section + } ; + /* ABI=2 */ + static const enum darwin_section_enum tomarkv2[] = + { + objc2_message_refs_section, + objc2_classdefs_section, + objc2_metadata_section, + objc2_classrefs_section, + objc2_classlist_section, + objc2_categorylist_section, + objc2_selector_refs_section, + objc2_nonlazy_class_section, + objc2_nonlazy_category_section, + objc2_protocollist_section, + objc2_protocolrefs_section, + objc2_super_classrefs_section, + objc2_image_info_section, + objc2_constant_string_object_section + } ; + size_t i; + + been_here = true; + if (flag_objc_abi < 2) + { + for (i = 0; i < ARRAY_SIZE (tomark); i++) + switch_to_section (darwin_sections[tomark[i]]); + if (flag_objc_abi == 1) + for (i = 0; i < ARRAY_SIZE (tomarkv1); i++) + switch_to_section (darwin_sections[tomarkv1[i]]); + } + else + for (i = 0; i < ARRAY_SIZE (tomarkv2); i++) + switch_to_section (darwin_sections[tomarkv2[i]]); + /* Make sure we don't get varasm.c out of sync with us. */ + switch_to_section (saved_in_section); + } + output_section_asm_op (directive); +} + + +/* Private flag applied to disable section-anchors in a particular section. */ +#define SECTION_NO_ANCHOR SECTION_MACH_DEP + + +/* Implement TARGET_ASM_INIT_SECTIONS. */ + +void +darwin_init_sections (void) +{ +#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC) \ + darwin_sections[NAME] = \ + get_unnamed_section (FLAGS, (OBJC \ + ? output_objc_section_asm_op \ + : output_section_asm_op), \ + "\t" DIRECTIVE); +#include "config/darwin-sections.def" +#undef DEF_SECTION + + readonly_data_section = darwin_sections[const_section]; + exception_section = darwin_sections[darwin_exception_section]; + eh_frame_section = darwin_sections[darwin_eh_frame_section]; +} + +int +name_needs_quotes (const char *name) +{ + int c; + while ((c = *name++) != '\0') + if (! ISIDNUM (c) + && c != '.' && c != '$' && c != '_' ) + return 1; + return 0; +} + +/* Return true if SYM_REF can be used without an indirection. */ +int +machopic_symbol_defined_p (rtx sym_ref) +{ + if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_DEFINED) + return true; + + /* If a symbol references local and is not an extern to this + file, then the symbol might be able to declared as defined. */ + if (SYMBOL_REF_LOCAL_P (sym_ref) && ! SYMBOL_REF_EXTERNAL_P (sym_ref)) + { + /* If the symbol references a variable and the variable is a + common symbol, then this symbol is not defined. */ + if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_VARIABLE) + { + tree decl = SYMBOL_REF_DECL (sym_ref); + if (!decl) + return true; + if (DECL_COMMON (decl)) + return false; + } + return true; + } + return false; +} + +/* This module assumes that (const (symbol_ref "foo")) is a legal pic + reference, which will not be changed. */ + +enum machopic_addr_class +machopic_classify_symbol (rtx sym_ref) +{ + bool function_p; + + function_p = SYMBOL_REF_FUNCTION_P (sym_ref); + if (machopic_symbol_defined_p (sym_ref)) + return (function_p + ? MACHOPIC_DEFINED_FUNCTION : MACHOPIC_DEFINED_DATA); + else + return (function_p + ? MACHOPIC_UNDEFINED_FUNCTION : MACHOPIC_UNDEFINED_DATA); +} + +#ifndef TARGET_FIX_AND_CONTINUE +#define TARGET_FIX_AND_CONTINUE 0 +#endif + +/* Indicate when fix-and-continue style code generation is being used + and when a reference to data should be indirected so that it can be + rebound in a new translation unit to reference the original instance + of that data. Symbol names that are for code generation local to + the translation unit are bound to the new translation unit; + currently this means symbols that begin with L or _OBJC_; + otherwise, we indicate that an indirect reference should be made to + permit the runtime to rebind new instances of the translation unit + to the original instance of the data. */ + +static int +indirect_data (rtx sym_ref) +{ + int lprefix; + const char *name; + + /* If we aren't generating fix-and-continue code, don't do anything + special. */ + if (TARGET_FIX_AND_CONTINUE == 0) + return 0; + + /* Otherwise, all symbol except symbols that begin with L or _OBJC_ + are indirected. Symbols that begin with L and _OBJC_ are always + bound to the current translation unit as they are used for + generated local data of the translation unit. */ + + name = XSTR (sym_ref, 0); + + lprefix = (((name[0] == '*' || name[0] == '&') + && (name[1] == 'L' || (name[1] == '"' && name[2] == 'L'))) + || (strncmp (name, "_OBJC_", 6) == 0)); + + return ! lprefix; +} + +static int +machopic_data_defined_p (rtx sym_ref) +{ + if (indirect_data (sym_ref)) + return 0; + + switch (machopic_classify_symbol (sym_ref)) + { + case MACHOPIC_DEFINED_DATA: + case MACHOPIC_DEFINED_FUNCTION: + return 1; + default: + return 0; + } +} + +void +machopic_define_symbol (rtx mem) +{ + rtx sym_ref; + + gcc_assert (GET_CODE (mem) == MEM); + sym_ref = XEXP (mem, 0); + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED; +} + +/* Return either ORIG or: + + (const:P (unspec:P [ORIG] UNSPEC_MACHOPIC_OFFSET)) + + depending on MACHO_DYNAMIC_NO_PIC_P. */ +rtx +machopic_gen_offset (rtx orig) +{ + if (MACHO_DYNAMIC_NO_PIC_P) + return orig; + else + { + /* Play games to avoid marking the function as needing pic if we + are being called as part of the cost-estimation process. */ + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) + crtl->uses_pic_offset_table = 1; + orig = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), + UNSPEC_MACHOPIC_OFFSET); + return gen_rtx_CONST (Pmode, orig); + } +} + +static GTY(()) const char * function_base_func_name; +static GTY(()) int current_pic_label_num; + +void +machopic_output_function_base_name (FILE *file) +{ + const char *current_name; + + /* If dynamic-no-pic is on, we should not get here. */ + gcc_assert (!MACHO_DYNAMIC_NO_PIC_P); + /* When we are generating _get_pc thunks within stubs, there is no current + function. */ + if (current_function_decl) + { + current_name = + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)); + if (function_base_func_name != current_name) + { + ++current_pic_label_num; + function_base_func_name = current_name; + } + } + else + { + ++current_pic_label_num; + function_base_func_name = "L_machopic_stub_dummy"; + } + fprintf (file, "L%011d$pb", current_pic_label_num); +} + +/* The suffix attached to non-lazy pointer symbols. */ +#define NON_LAZY_POINTER_SUFFIX "$non_lazy_ptr" +/* The suffix attached to stub symbols. */ +#define STUB_SUFFIX "$stub" + +typedef struct GTY (()) machopic_indirection +{ + /* The SYMBOL_REF for the entity referenced. */ + rtx symbol; + /* The name of the stub or non-lazy pointer. */ + const char * ptr_name; + /* True iff this entry is for a stub (as opposed to a non-lazy + pointer). */ + bool stub_p; + /* True iff this stub or pointer pointer has been referenced. */ + bool used; +} machopic_indirection; + +/* A table mapping stub names and non-lazy pointer names to + SYMBOL_REFs for the stubbed-to and pointed-to entities. */ + +static GTY ((param_is (struct machopic_indirection))) htab_t + machopic_indirections; + +/* Return a hash value for a SLOT in the indirections hash table. */ + +static hashval_t +machopic_indirection_hash (const void *slot) +{ + const machopic_indirection *p = (const machopic_indirection *) slot; + return htab_hash_string (p->ptr_name); +} + +/* Returns true if the KEY is the same as that associated with + SLOT. */ + +static int +machopic_indirection_eq (const void *slot, const void *key) +{ + return strcmp (((const machopic_indirection *) slot)->ptr_name, + (const char *) key) == 0; +} + +/* Return the name of the non-lazy pointer (if STUB_P is false) or + stub (if STUB_B is true) corresponding to the given name. */ + +const char * +machopic_indirection_name (rtx sym_ref, bool stub_p) +{ + char *buffer; + const char *name = XSTR (sym_ref, 0); + size_t namelen = strlen (name); + machopic_indirection *p; + void ** slot; + bool needs_quotes; + const char *suffix; + const char *prefix = user_label_prefix; + const char *quote = ""; + tree id; + + id = maybe_get_identifier (name); + if (id) + { + tree id_orig = id; + + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + id = TREE_CHAIN (id); + if (id != id_orig) + { + name = IDENTIFIER_POINTER (id); + namelen = strlen (name); + } + } + + if (name[0] == '*') + { + prefix = ""; + ++name; + --namelen; + } + + needs_quotes = name_needs_quotes (name); + if (needs_quotes) + { + quote = "\""; + } + + if (stub_p) + suffix = STUB_SUFFIX; + else + suffix = NON_LAZY_POINTER_SUFFIX; + + buffer = XALLOCAVEC (char, strlen ("&L") + + strlen (prefix) + + namelen + + strlen (suffix) + + 2 * strlen (quote) + + 1 /* '\0' */); + + /* Construct the name of the non-lazy pointer or stub. */ + sprintf (buffer, "&%sL%s%s%s%s", quote, prefix, name, suffix, quote); + + if (!machopic_indirections) + machopic_indirections = htab_create_ggc (37, + machopic_indirection_hash, + machopic_indirection_eq, + /*htab_del=*/NULL); + + slot = htab_find_slot_with_hash (machopic_indirections, buffer, + htab_hash_string (buffer), INSERT); + if (*slot) + { + p = (machopic_indirection *) *slot; + } + else + { + p = ggc_alloc_machopic_indirection (); + p->symbol = sym_ref; + p->ptr_name = xstrdup (buffer); + p->stub_p = stub_p; + p->used = false; + *slot = p; + } + + return p->ptr_name; +} + +/* Return the name of the stub for the mcount function. */ + +const char* +machopic_mcount_stub_name (void) +{ + rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "*mcount"); + return machopic_indirection_name (symbol, /*stub_p=*/true); +} + +/* If NAME is the name of a stub or a non-lazy pointer , mark the stub + or non-lazy pointer as used -- and mark the object to which the + pointer/stub refers as used as well, since the pointer/stub will + emit a reference to it. */ + +void +machopic_validate_stub_or_non_lazy_ptr (const char *name) +{ + machopic_indirection *p; + + p = ((machopic_indirection *) + (htab_find_with_hash (machopic_indirections, name, + htab_hash_string (name)))); + if (p && ! p->used) + { + const char *real_name; + tree id; + + p->used = true; + + /* Do what output_addr_const will do when we actually call it. */ + if (SYMBOL_REF_DECL (p->symbol)) + mark_decl_referenced (SYMBOL_REF_DECL (p->symbol)); + + real_name = targetm.strip_name_encoding (XSTR (p->symbol, 0)); + + id = maybe_get_identifier (real_name); + if (id) + mark_referenced (id); + } +} + +/* Transform ORIG, which may be any data source, to the corresponding + source using indirections. */ + +rtx +machopic_indirect_data_reference (rtx orig, rtx reg) +{ + rtx ptr_ref = orig; + + if (! MACHOPIC_INDIRECT) + return orig; + + if (GET_CODE (orig) == SYMBOL_REF) + { + int defined = machopic_data_defined_p (orig); + + if (defined && MACHO_DYNAMIC_NO_PIC_P) + { + if (DARWIN_PPC) + { + /* Create a new register for CSE opportunities. */ + rtx hi_reg = (!can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode)); + emit_insn (gen_macho_high (hi_reg, orig)); + emit_insn (gen_macho_low (reg, hi_reg, orig)); + return reg; + } + else if (DARWIN_X86) + return orig; + else + /* some other cpu -- writeme! */ + gcc_unreachable (); + } + else if (defined) + { + rtx offset = NULL; + if (DARWIN_PPC || HAVE_lo_sum) + offset = machopic_gen_offset (orig); + + if (DARWIN_PPC) + { + rtx hi_sum_reg = (!can_create_pseudo_p () + ? reg + : gen_reg_rtx (Pmode)); + + gcc_assert (reg); + + emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, + gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_HIGH (Pmode, offset)))); + emit_insn (gen_rtx_SET (Pmode, reg, + gen_rtx_LO_SUM (Pmode, hi_sum_reg, + copy_rtx (offset)))); + + orig = reg; + } + else if (HAVE_lo_sum) + { + gcc_assert (reg); + + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_HIGH (Pmode, offset))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, reg, + copy_rtx (offset)))); + emit_use (pic_offset_table_rtx); + + orig = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, reg); + } + return orig; + } + + ptr_ref = (gen_rtx_SYMBOL_REF + (Pmode, + machopic_indirection_name (orig, /*stub_p=*/false))); + + SYMBOL_REF_DATA (ptr_ref) = SYMBOL_REF_DATA (orig); + + ptr_ref = gen_const_mem (Pmode, ptr_ref); + machopic_define_symbol (ptr_ref); + + if (DARWIN_X86 + && reg + && MACHO_DYNAMIC_NO_PIC_P) + { + emit_insn (gen_rtx_SET (Pmode, reg, ptr_ref)); + ptr_ref = reg; + } + + return ptr_ref; + } + else if (GET_CODE (orig) == CONST) + { + /* If "(const (plus ...", walk the PLUS and return that result. + PLUS processing (below) will restore the "(const ..." if + appropriate. */ + if (GET_CODE (XEXP (orig, 0)) == PLUS) + return machopic_indirect_data_reference (XEXP (orig, 0), reg); + else + return orig; + } + else if (GET_CODE (orig) == MEM) + { + XEXP (ptr_ref, 0) = + machopic_indirect_data_reference (XEXP (orig, 0), reg); + return ptr_ref; + } + else if (GET_CODE (orig) == PLUS) + { + rtx base, result; + /* When the target is i386, this code prevents crashes due to the + compiler's ignorance on how to move the PIC base register to + other registers. (The reload phase sometimes introduces such + insns.) */ + if (GET_CODE (XEXP (orig, 0)) == REG + && REGNO (XEXP (orig, 0)) == PIC_OFFSET_TABLE_REGNUM + /* Prevent the same register from being erroneously used + as both the base and index registers. */ + && (DARWIN_X86 && (GET_CODE (XEXP (orig, 1)) == CONST)) + && reg) + { + emit_move_insn (reg, XEXP (orig, 0)); + XEXP (ptr_ref, 0) = reg; + return ptr_ref; + } + + /* Legitimize both operands of the PLUS. */ + base = machopic_indirect_data_reference (XEXP (orig, 0), reg); + orig = machopic_indirect_data_reference (XEXP (orig, 1), + (base == reg ? 0 : reg)); + if (MACHOPIC_INDIRECT && (GET_CODE (orig) == CONST_INT)) + result = plus_constant (base, INTVAL (orig)); + else + result = gen_rtx_PLUS (Pmode, base, orig); + + if (MACHOPIC_JUST_INDIRECT && GET_CODE (base) == MEM) + { + if (reg) + { + emit_move_insn (reg, result); + result = reg; + } + else + { + result = force_reg (GET_MODE (result), result); + } + } + + return result; + } + return ptr_ref; +} + +/* Transform TARGET (a MEM), which is a function call target, to the + corresponding symbol_stub if necessary. Return a new MEM. */ + +rtx +machopic_indirect_call_target (rtx target) +{ + if (! darwin_emit_branch_islands) + return target; + + if (GET_CODE (target) != MEM) + return target; + + if (MACHOPIC_INDIRECT + && GET_CODE (XEXP (target, 0)) == SYMBOL_REF + && !(SYMBOL_REF_FLAGS (XEXP (target, 0)) + & MACHO_SYMBOL_FLAG_DEFINED)) + { + rtx sym_ref = XEXP (target, 0); + const char *stub_name = machopic_indirection_name (sym_ref, + /*stub_p=*/true); + enum machine_mode mode = GET_MODE (sym_ref); + + XEXP (target, 0) = gen_rtx_SYMBOL_REF (mode, stub_name); + SYMBOL_REF_DATA (XEXP (target, 0)) = SYMBOL_REF_DATA (sym_ref); + MEM_READONLY_P (target) = 1; + MEM_NOTRAP_P (target) = 1; + } + + return target; +} + +rtx +machopic_legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +{ + rtx pic_ref = orig; + + if (! MACHOPIC_INDIRECT) + return orig; + + /* First handle a simple SYMBOL_REF or LABEL_REF */ + if (GET_CODE (orig) == LABEL_REF + || (GET_CODE (orig) == SYMBOL_REF + )) + { + /* addr(foo) = &func+(foo-func) */ + orig = machopic_indirect_data_reference (orig, reg); + + if (GET_CODE (orig) == PLUS + && GET_CODE (XEXP (orig, 0)) == REG) + { + if (reg == 0) + return force_reg (mode, orig); + + emit_move_insn (reg, orig); + return reg; + } + + if (GET_CODE (orig) == MEM) + { + if (reg == 0) + { + gcc_assert (!reload_in_progress); + reg = gen_reg_rtx (Pmode); + } + +#if HAVE_lo_sum + if (MACHO_DYNAMIC_NO_PIC_P + && (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF + || GET_CODE (XEXP (orig, 0)) == LABEL_REF)) + { +#if defined (TARGET_TOC) /* ppc */ + rtx temp_reg = (!can_create_pseudo_p () + ? reg : + gen_reg_rtx (Pmode)); + rtx asym = XEXP (orig, 0); + rtx mem; + + emit_insn (gen_macho_high (temp_reg, asym)); + mem = gen_const_mem (GET_MODE (orig), + gen_rtx_LO_SUM (Pmode, temp_reg, + copy_rtx (asym))); + emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); +#else + /* Some other CPU -- WriteMe! but right now there are no other + platforms that can use dynamic-no-pic */ + gcc_unreachable (); +#endif + pic_ref = reg; + } + else + if (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF + || GET_CODE (XEXP (orig, 0)) == LABEL_REF) + { + rtx offset = machopic_gen_offset (XEXP (orig, 0)); +#if defined (TARGET_TOC) /* i.e., PowerPC */ + /* Generating a new reg may expose opportunities for + common subexpression elimination. */ + rtx hi_sum_reg = (!can_create_pseudo_p () + ? reg + : gen_reg_rtx (Pmode)); + rtx mem; + rtx insn; + rtx sum; + + sum = gen_rtx_HIGH (Pmode, offset); + if (! MACHO_DYNAMIC_NO_PIC_P) + sum = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, sum); + + emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, sum)); + + mem = gen_const_mem (GET_MODE (orig), + gen_rtx_LO_SUM (Pmode, + hi_sum_reg, + copy_rtx (offset))); + insn = emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); + set_unique_reg_note (insn, REG_EQUAL, pic_ref); + + pic_ref = reg; +#else + emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM)); + + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_HIGH (Pmode, + gen_rtx_CONST (Pmode, + offset)))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, reg, + gen_rtx_CONST (Pmode, + copy_rtx (offset))))); + pic_ref = gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, reg); +#endif + } + else +#endif /* HAVE_lo_sum */ + { + rtx pic = pic_offset_table_rtx; + if (GET_CODE (pic) != REG) + { + emit_move_insn (reg, pic); + pic = reg; + } +#if 0 + emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM)); +#endif + + if (reload_in_progress) + df_set_regs_ever_live (REGNO (pic), true); + pic_ref = gen_rtx_PLUS (Pmode, pic, + machopic_gen_offset (XEXP (orig, 0))); + } + +#if !defined (TARGET_TOC) + emit_move_insn (reg, pic_ref); + pic_ref = gen_const_mem (GET_MODE (orig), reg); +#endif + } + else + { + +#if HAVE_lo_sum + if (GET_CODE (orig) == SYMBOL_REF + || GET_CODE (orig) == LABEL_REF) + { + rtx offset = machopic_gen_offset (orig); +#if defined (TARGET_TOC) /* i.e., PowerPC */ + rtx hi_sum_reg; + + if (reg == 0) + { + gcc_assert (!reload_in_progress); + reg = gen_reg_rtx (Pmode); + } + + hi_sum_reg = reg; + + emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, + (MACHO_DYNAMIC_NO_PIC_P) + ? gen_rtx_HIGH (Pmode, offset) + : gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, + gen_rtx_HIGH (Pmode, + offset)))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, + hi_sum_reg, + copy_rtx (offset)))); + pic_ref = reg; +#else + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_HIGH (Pmode, offset))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, reg, + copy_rtx (offset)))); + pic_ref = gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, reg); +#endif + } + else +#endif /* HAVE_lo_sum */ + { + if (REG_P (orig) + || GET_CODE (orig) == SUBREG) + { + return orig; + } + else + { + rtx pic = pic_offset_table_rtx; + if (GET_CODE (pic) != REG) + { + emit_move_insn (reg, pic); + pic = reg; + } +#if 0 + emit_use (pic_offset_table_rtx); +#endif + if (reload_in_progress) + df_set_regs_ever_live (REGNO (pic), true); + pic_ref = gen_rtx_PLUS (Pmode, + pic, + machopic_gen_offset (orig)); + } + } + } + + if (GET_CODE (pic_ref) != REG) + { + if (reg != 0) + { + emit_move_insn (reg, pic_ref); + return reg; + } + else + { + return force_reg (mode, pic_ref); + } + } + else + { + return pic_ref; + } + } + + else if (GET_CODE (orig) == SYMBOL_REF) + return orig; + + else if (GET_CODE (orig) == PLUS + && (GET_CODE (XEXP (orig, 0)) == MEM + || GET_CODE (XEXP (orig, 0)) == SYMBOL_REF + || GET_CODE (XEXP (orig, 0)) == LABEL_REF) + && XEXP (orig, 0) != pic_offset_table_rtx + && GET_CODE (XEXP (orig, 1)) != REG) + + { + rtx base; + int is_complex = (GET_CODE (XEXP (orig, 0)) == MEM); + + base = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg); + orig = machopic_legitimize_pic_address (XEXP (orig, 1), + Pmode, (base == reg ? 0 : reg)); + if (GET_CODE (orig) == CONST_INT) + { + pic_ref = plus_constant (base, INTVAL (orig)); + is_complex = 1; + } + else + pic_ref = gen_rtx_PLUS (Pmode, base, orig); + + if (reg && is_complex) + { + emit_move_insn (reg, pic_ref); + pic_ref = reg; + } + /* Likewise, should we set special REG_NOTEs here? */ + } + + else if (GET_CODE (orig) == CONST) + { + return machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg); + } + + else if (GET_CODE (orig) == MEM + && GET_CODE (XEXP (orig, 0)) == SYMBOL_REF) + { + rtx addr = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg); + addr = replace_equiv_address (orig, addr); + emit_move_insn (reg, addr); + pic_ref = reg; + } + + return pic_ref; +} + +/* Output the stub or non-lazy pointer in *SLOT, if it has been used. + DATA is the FILE* for assembly output. Called from + htab_traverse. */ + +static int +machopic_output_indirection (void **slot, void *data) +{ + machopic_indirection *p = *((machopic_indirection **) slot); + FILE *asm_out_file = (FILE *) data; + rtx symbol; + const char *sym_name; + const char *ptr_name; + + if (!p->used) + return 1; + + symbol = p->symbol; + sym_name = XSTR (symbol, 0); + ptr_name = p->ptr_name; + + if (p->stub_p) + { + char *sym; + char *stub; + tree id; + + id = maybe_get_identifier (sym_name); + if (id) + { + tree id_orig = id; + + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + id = TREE_CHAIN (id); + if (id != id_orig) + sym_name = IDENTIFIER_POINTER (id); + } + + sym = XALLOCAVEC (char, strlen (sym_name) + 2); + if (sym_name[0] == '*' || sym_name[0] == '&') + strcpy (sym, sym_name + 1); + else if (sym_name[0] == '-' || sym_name[0] == '+') + strcpy (sym, sym_name); + else + sprintf (sym, "%s%s", user_label_prefix, sym_name); + + stub = XALLOCAVEC (char, strlen (ptr_name) + 2); + if (ptr_name[0] == '*' || ptr_name[0] == '&') + strcpy (stub, ptr_name + 1); + else + sprintf (stub, "%s%s", user_label_prefix, ptr_name); + + machopic_output_stub (asm_out_file, sym, stub); + } + else if (! indirect_data (symbol) + && (machopic_symbol_defined_p (symbol) + || SYMBOL_REF_LOCAL_P (symbol))) + { + switch_to_section (data_section); + assemble_align (GET_MODE_ALIGNMENT (Pmode)); + assemble_label (asm_out_file, ptr_name); + assemble_integer (gen_rtx_SYMBOL_REF (Pmode, sym_name), + GET_MODE_SIZE (Pmode), + GET_MODE_ALIGNMENT (Pmode), 1); + } + else + { + rtx init = const0_rtx; + + switch_to_section (darwin_sections[machopic_nl_symbol_ptr_section]); + + /* Mach-O symbols are passed around in code through indirect + references and the original symbol_ref hasn't passed through + the generic handling and reference-catching in + output_operand, so we need to manually mark weak references + as such. */ + if (SYMBOL_REF_WEAK (symbol)) + { + tree decl = SYMBOL_REF_DECL (symbol); + gcc_assert (DECL_P (decl)); + + if (decl != NULL_TREE + && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) + /* Handle only actual external-only definitions, not + e.g. extern inline code or variables for which + storage has been allocated. */ + && !TREE_STATIC (decl)) + { + fputs ("\t.weak_reference ", asm_out_file); + assemble_name (asm_out_file, sym_name); + fputc ('\n', asm_out_file); + } + } + + assemble_name (asm_out_file, ptr_name); + fprintf (asm_out_file, ":\n"); + + fprintf (asm_out_file, "\t.indirect_symbol "); + assemble_name (asm_out_file, sym_name); + fprintf (asm_out_file, "\n"); + + /* Variables that are marked with MACHO_SYMBOL_STATIC need to + have their symbol name instead of 0 in the second entry of + the non-lazy symbol pointer data structure when they are + defined. This allows the runtime to rebind newer instances + of the translation unit with the original instance of the + symbol. */ + + if ((SYMBOL_REF_FLAGS (symbol) & MACHO_SYMBOL_STATIC) + && machopic_symbol_defined_p (symbol)) + init = gen_rtx_SYMBOL_REF (Pmode, sym_name); + + assemble_integer (init, GET_MODE_SIZE (Pmode), + GET_MODE_ALIGNMENT (Pmode), 1); + } + + return 1; +} + +void +machopic_finish (FILE *asm_out_file) +{ + if (machopic_indirections) + htab_traverse_noresize (machopic_indirections, + machopic_output_indirection, + asm_out_file); +} + +int +machopic_operand_p (rtx op) +{ + if (MACHOPIC_JUST_INDIRECT) + return (GET_CODE (op) == SYMBOL_REF + && machopic_symbol_defined_p (op)); + else + return (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == UNSPEC + && XINT (XEXP (op, 0), 1) == UNSPEC_MACHOPIC_OFFSET); +} + +/* This function records whether a given name corresponds to a defined + or undefined function or variable, for machopic_classify_ident to + use later. */ + +void +darwin_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED) +{ + rtx sym_ref; + + /* Do the standard encoding things first. */ + default_encode_section_info (decl, rtl, first); + + if (TREE_CODE (decl) != FUNCTION_DECL && TREE_CODE (decl) != VAR_DECL) + return; + + sym_ref = XEXP (rtl, 0); + if (TREE_CODE (decl) == VAR_DECL) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_VARIABLE; + + if (!DECL_EXTERNAL (decl) + && (!TREE_PUBLIC (decl) || !DECL_WEAK (decl)) + && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (decl)) + && ((TREE_STATIC (decl) + && (!DECL_COMMON (decl) || !TREE_PUBLIC (decl))) + || (!DECL_COMMON (decl) && DECL_INITIAL (decl) + && DECL_INITIAL (decl) != error_mark_node))) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED; + + if (! TREE_PUBLIC (decl)) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_STATIC; +} + +void +darwin_mark_decl_preserved (const char *name) +{ + fprintf (asm_out_file, "\t.no_dead_strip "); + assemble_name (asm_out_file, name); + fputc ('\n', asm_out_file); +} + +static section * +darwin_rodata_section (int weak, bool zsize) +{ + return (weak + ? darwin_sections[const_coal_section] + : (zsize ? darwin_sections[zobj_const_section] + : darwin_sections[const_section])); +} + +static section * +darwin_mergeable_string_section (tree exp, + unsigned HOST_WIDE_INT align) +{ + /* Darwin's ld expects to see non-writable string literals in the .cstring + section. Later versions of ld check and complain when CFStrings are + enabled. Therefore we shall force the strings into .cstring since we + don't support writable ones anyway. */ + if ((darwin_constant_cfstrings || flag_merge_constants) + && TREE_CODE (exp) == STRING_CST + && TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE + && align <= 256 + && (int_size_in_bytes (TREE_TYPE (exp)) + == TREE_STRING_LENGTH (exp)) + && ((size_t) TREE_STRING_LENGTH (exp) + == strlen (TREE_STRING_POINTER (exp)) + 1)) + return darwin_sections[cstring_section]; + + if (DARWIN_SECTION_ANCHORS && flag_section_anchors + && TREE_CODE (exp) == STRING_CST + && TREE_STRING_LENGTH (exp) == 0) + return darwin_sections[zobj_const_section]; + + return readonly_data_section; +} + +#ifndef HAVE_GAS_LITERAL16 +#define HAVE_GAS_LITERAL16 0 +#endif + +static section * +darwin_mergeable_constant_section (tree exp, + unsigned HOST_WIDE_INT align, + bool zsize) +{ + enum machine_mode mode = DECL_MODE (exp); + unsigned int modesize = GET_MODE_BITSIZE (mode); + + if (DARWIN_SECTION_ANCHORS + && flag_section_anchors + && zsize) + return darwin_sections[zobj_const_section]; + + if (flag_merge_constants + && mode != VOIDmode + && mode != BLKmode + && modesize <= align + && align >= 8 + && align <= 256 + && (align & (align -1)) == 0) + { + tree size = TYPE_SIZE_UNIT (TREE_TYPE (exp)); + + if (TREE_CODE (size) == INTEGER_CST + && TREE_INT_CST_LOW (size) == 4 + && TREE_INT_CST_HIGH (size) == 0) + return darwin_sections[literal4_section]; + else if (TREE_CODE (size) == INTEGER_CST + && TREE_INT_CST_LOW (size) == 8 + && TREE_INT_CST_HIGH (size) == 0) + return darwin_sections[literal8_section]; + else if (HAVE_GAS_LITERAL16 + && TARGET_64BIT + && TREE_CODE (size) == INTEGER_CST + && TREE_INT_CST_LOW (size) == 16 + && TREE_INT_CST_HIGH (size) == 0) + return darwin_sections[literal16_section]; + else + return readonly_data_section; + } + + return readonly_data_section; +} + +int +machopic_reloc_rw_mask (void) +{ + return MACHOPIC_INDIRECT ? 3 : 0; +} + +/* We have to deal with ObjC/C++ metadata section placement in the common + code, since it will also be called from LTO. + + Return metadata attributes, if present (searching for ABI=2 first) + Return NULL_TREE if no such attributes are found. */ + +static tree +is_objc_metadata (tree decl) +{ + if (DECL_P (decl) + && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) + && DECL_ATTRIBUTES (decl)) + { + tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl)); + if (meta) + return meta; + meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl)); + if (meta) + return meta; + } + return NULL_TREE; +} + +/* Return the section required for Objective C ABI 2 metadata. */ +static section * +darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base) +{ + const char *p; + tree ident = TREE_VALUE (meta); + gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE); + p = IDENTIFIER_POINTER (ident); + + /* If we are in LTO, then we don't know the state of flag_next_runtime + or flag_objc_abi when the code was generated. We set these from the + meta-data - which is needed to deal with const string constructors. */ + + flag_next_runtime = 1; + flag_objc_abi = 2; + + if (base == data_section) + base = darwin_sections[objc2_metadata_section]; + + /* Most of the OBJC2 META-data end up in the base section, so check it + first. */ + if (!strncmp (p, "V2_BASE", 7)) + return base; + else if (!strncmp (p, "V2_STRG", 7)) + return darwin_sections[cstring_section]; + + else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7)) + return darwin_sections[objc2_classdefs_section]; + else if (!strncmp (p, "V2_MREF", 7)) + return darwin_sections[objc2_message_refs_section]; + else if (!strncmp (p, "V2_CLRF", 7)) + return darwin_sections[objc2_classrefs_section]; + else if (!strncmp (p, "V2_SURF", 7)) + return darwin_sections[objc2_super_classrefs_section]; + else if (!strncmp (p, "V2_NLCL", 7)) + return darwin_sections[objc2_nonlazy_class_section]; + else if (!strncmp (p, "V2_CLAB", 7)) + return darwin_sections[objc2_classlist_section]; + else if (!strncmp (p, "V2_SRFS", 7)) + return darwin_sections[objc2_selector_refs_section]; + else if (!strncmp (p, "V2_NLCA", 7)) + return darwin_sections[objc2_nonlazy_category_section]; + else if (!strncmp (p, "V2_CALA", 7)) + return darwin_sections[objc2_categorylist_section]; + + else if (!strncmp (p, "V2_PLST", 7)) + return darwin_sections[objc2_protocollist_section]; + else if (!strncmp (p, "V2_PRFS", 7)) + return darwin_sections[objc2_protocolrefs_section]; + + else if (!strncmp (p, "V2_INFO", 7)) + return darwin_sections[objc2_image_info_section]; + + else if (!strncmp (p, "V2_EHTY", 7)) + return darwin_sections[data_coal_section]; + + else if (!strncmp (p, "V2_CSTR", 7)) + return darwin_sections[objc2_constant_string_object_section]; + + /* Not recognized, default. */ + return base; +} + +/* Return the section required for Objective C ABI 0/1 metadata. */ +static section * +darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base) +{ + const char *p; + tree ident = TREE_VALUE (meta); + gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE); + p = IDENTIFIER_POINTER (ident); + + /* If we are in LTO, then we don't know the state of flag_next_runtime + or flag_objc_abi when the code was generated. We set these from the + meta-data - which is needed to deal with const string constructors. */ + flag_next_runtime = 1; + if (!global_options_set.x_flag_objc_abi) + flag_objc_abi = 1; + + /* String sections first, cos there are lots of strings. */ + if (!strncmp (p, "V1_STRG", 7)) + return darwin_sections[cstring_section]; + else if (!strncmp (p, "V1_CLSN", 7)) + return darwin_sections[objc_class_names_section]; + else if (!strncmp (p, "V1_METN", 7)) + return darwin_sections[objc_meth_var_names_section]; + else if (!strncmp (p, "V1_METT", 7)) + return darwin_sections[objc_meth_var_types_section]; + + else if (!strncmp (p, "V1_CLAS", 7)) + return darwin_sections[objc_class_section]; + else if (!strncmp (p, "V1_META", 7)) + return darwin_sections[objc_meta_class_section]; + else if (!strncmp (p, "V1_CATG", 7)) + return darwin_sections[objc_category_section]; + else if (!strncmp (p, "V1_PROT", 7)) + return darwin_sections[objc_protocol_section]; + + else if (!strncmp (p, "V1_CLCV", 7)) + return darwin_sections[objc_class_vars_section]; + else if (!strncmp (p, "V1_CLIV", 7)) + return darwin_sections[objc_instance_vars_section]; + + else if (!strncmp (p, "V1_CLCM", 7)) + return darwin_sections[objc_cls_meth_section]; + else if (!strncmp (p, "V1_CLIM", 7)) + return darwin_sections[objc_inst_meth_section]; + else if (!strncmp (p, "V1_CACM", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (p, "V1_CAIM", 7)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (p, "V1_PNSM", 7)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (p, "V1_PCLM", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + + else if (!strncmp (p, "V1_CLPR", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (p, "V1_CAPR", 7)) + return darwin_sections[objc_category_section]; /* ??? CHECK me. */ + + else if (!strncmp (p, "V1_PRFS", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (p, "V1_CLRF", 7)) + return darwin_sections[objc_cls_refs_section]; + else if (!strncmp (p, "V1_SRFS", 7)) + return darwin_sections[objc_selector_refs_section]; + + else if (!strncmp (p, "V1_MODU", 7)) + return darwin_sections[objc_module_info_section]; + else if (!strncmp (p, "V1_SYMT", 7)) + return darwin_sections[objc_symbols_section]; + else if (!strncmp (p, "V1_INFO", 7)) + return darwin_sections[objc_image_info_section]; + + else if (!strncmp (p, "V1_PLST", 7)) + return darwin_sections[objc1_prop_list_section]; + else if (!strncmp (p, "V1_PEXT", 7)) + return darwin_sections[objc1_protocol_ext_section]; + else if (!strncmp (p, "V1_CEXT", 7)) + return darwin_sections[objc1_class_ext_section]; + + else if (!strncmp (p, "V2_CSTR", 7)) + return darwin_sections[objc_constant_string_object_section]; + + return base; +} + +section * +machopic_select_section (tree decl, + int reloc, + unsigned HOST_WIDE_INT align) +{ + bool zsize, one, weak, ro; + section *base_section = NULL; + + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", DECL_ATTRIBUTES (decl))); + + zsize = (DECL_P (decl) + && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) + && tree_low_cst (DECL_SIZE_UNIT (decl), 1) == 0); + + one = DECL_P (decl) + && TREE_CODE (decl) == VAR_DECL + && DECL_ONE_ONLY (decl); + + ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ; + + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_TEXT: + gcc_unreachable (); + break; + + case SECCAT_RODATA: + case SECCAT_SRODATA: + base_section = darwin_rodata_section (weak, zsize); + break; + + case SECCAT_RODATA_MERGE_STR: + base_section = darwin_mergeable_string_section (decl, align); + break; + + case SECCAT_RODATA_MERGE_STR_INIT: + base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align); + break; + + case SECCAT_RODATA_MERGE_CONST: + base_section = darwin_mergeable_constant_section (decl, align, zsize); + break; + + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + case SECCAT_SDATA: + case SECCAT_TDATA: + if (weak || one) + { + if (ro) + base_section = darwin_sections[const_data_coal_section]; + else + base_section = darwin_sections[data_coal_section]; + } + else if (DARWIN_SECTION_ANCHORS + && flag_section_anchors + && zsize) + { + /* If we're doing section anchors, then punt zero-sized objects into + their own sections so that they don't interfere with offset + computation for the remaining vars. This does not need to be done + for stuff in mergeable sections, since these are ineligible for + anchors. */ + if (ro) + base_section = darwin_sections[zobj_const_data_section]; + else + base_section = darwin_sections[zobj_data_section]; + } + else if (ro) + base_section = darwin_sections[const_data_section]; + else + base_section = data_section; + break; + case SECCAT_BSS: + case SECCAT_SBSS: + case SECCAT_TBSS: + if (weak || one) + base_section = darwin_sections[data_coal_section]; + else + { + if (!TREE_PUBLIC (decl)) + base_section = lcomm_section; + else if (bss_noswitch_section) + base_section = bss_noswitch_section; + else + base_section = data_section; + } + break; + + default: + gcc_unreachable (); + } + + /* Darwin weird special cases. + a) OBJC Meta-data. */ + if (DECL_P (decl) + && (TREE_CODE (decl) == VAR_DECL + || TREE_CODE (decl) == CONST_DECL) + && DECL_ATTRIBUTES (decl)) + { + tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl)); + if (meta) + return darwin_objc2_section (decl, meta, base_section); + meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl)); + if (meta) + return darwin_objc1_section (decl, meta, base_section); + meta = lookup_attribute ("OBJC1METG", DECL_ATTRIBUTES (decl)); + if (meta) + return base_section; /* GNU runtime is happy with it all in one pot. */ + } + + /* b) Constant string objects. */ + if (TREE_CODE (decl) == CONSTRUCTOR + && TREE_TYPE (decl) + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE + && TYPE_NAME (TREE_TYPE (decl))) + { + tree name = TYPE_NAME (TREE_TYPE (decl)); + if (TREE_CODE (name) == TYPE_DECL) + name = DECL_NAME (name); + + /* FIXME: This is unsatisfactory for LTO, since it relies on other + metadata determining the source FE. */ + if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_ObjCString")) + { + if (flag_next_runtime) + { + if (flag_objc_abi == 2) + return darwin_sections[objc2_constant_string_object_section]; + else + return darwin_sections[objc_constant_string_object_section]; + } + else + return darwin_sections[objc_string_object_section]; + } + else if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_CFString")) + return darwin_sections[cfstring_constant_object_section]; + else + return base_section; + } + /* c) legacy meta-data selection. */ + else if (TREE_CODE (decl) == VAR_DECL + && DECL_NAME (decl) + && TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE + && IDENTIFIER_POINTER (DECL_NAME (decl)) + && flag_next_runtime + && !strncmp (IDENTIFIER_POINTER (DECL_NAME (decl)), "_OBJC_", 6)) + { + const char *name = IDENTIFIER_POINTER (DECL_NAME (decl)); + static bool warned_objc_46 = false; + /* We shall assert that zero-sized objects are an error in ObjC + meta-data. */ + gcc_assert (tree_low_cst (DECL_SIZE_UNIT (decl), 1) != 0); + + /* ??? This mechanism for determining the metadata section is + broken when LTO is in use, since the frontend that generated + the data is not identified. We will keep the capability for + the short term - in case any non-Objective-C programs are using + it to place data in specified sections. */ + if (!warned_objc_46) + { + location_t loc = DECL_SOURCE_LOCATION (decl); + warning_at (loc, 0, "the use of _OBJC_-prefixed variable names" + " to select meta-data sections is deprecated at 4.6" + " and will be removed in 4.7"); + warned_objc_46 = true; + } + + if (!strncmp (name, "_OBJC_CLASS_METHODS_", 20)) + return darwin_sections[objc_cls_meth_section]; + else if (!strncmp (name, "_OBJC_INSTANCE_METHODS_", 23)) + return darwin_sections[objc_inst_meth_section]; + else if (!strncmp (name, "_OBJC_CATEGORY_CLASS_METHODS_", 29)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_CATEGORY_INSTANCE_METHODS_", 32)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (name, "_OBJC_CLASS_VARIABLES_", 22)) + return darwin_sections[objc_class_vars_section]; + else if (!strncmp (name, "_OBJC_INSTANCE_VARIABLES_", 25)) + return darwin_sections[objc_instance_vars_section]; + else if (!strncmp (name, "_OBJC_CLASS_PROTOCOLS_", 22)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_CLASS_NAME_", 17)) + return darwin_sections[objc_class_names_section]; + else if (!strncmp (name, "_OBJC_METH_VAR_NAME_", 20)) + return darwin_sections[objc_meth_var_names_section]; + else if (!strncmp (name, "_OBJC_METH_VAR_TYPE_", 20)) + return darwin_sections[objc_meth_var_types_section]; + else if (!strncmp (name, "_OBJC_CLASS_REFERENCES", 22)) + return darwin_sections[objc_cls_refs_section]; + else if (!strncmp (name, "_OBJC_CLASS_", 12)) + return darwin_sections[objc_class_section]; + else if (!strncmp (name, "_OBJC_METACLASS_", 16)) + return darwin_sections[objc_meta_class_section]; + else if (!strncmp (name, "_OBJC_CATEGORY_", 15)) + return darwin_sections[objc_category_section]; + else if (!strncmp (name, "_OBJC_SELECTOR_REFERENCES", 25)) + return darwin_sections[objc_selector_refs_section]; + else if (!strncmp (name, "_OBJC_SELECTOR_FIXUP", 20)) + return darwin_sections[objc_selector_fixup_section]; + else if (!strncmp (name, "_OBJC_SYMBOLS", 13)) + return darwin_sections[objc_symbols_section]; + else if (!strncmp (name, "_OBJC_MODULES", 13)) + return darwin_sections[objc_module_info_section]; + else if (!strncmp (name, "_OBJC_IMAGE_INFO", 16)) + return darwin_sections[objc_image_info_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_INSTANCE_METHODS_", 32)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_CLASS_METHODS_", 29)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_REFS_", 20)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_", 15)) + return darwin_sections[objc_protocol_section]; + else + return base_section; + } + + return base_section; +} + +/* This can be called with address expressions as "rtx". + They must go in "const". */ + +section * +machopic_select_rtx_section (enum machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + if (GET_MODE_SIZE (mode) == 8 + && (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE)) + return darwin_sections[literal8_section]; + else if (GET_MODE_SIZE (mode) == 4 + && (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE)) + return darwin_sections[literal4_section]; + else if (HAVE_GAS_LITERAL16 + && TARGET_64BIT + && GET_MODE_SIZE (mode) == 16 + && (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE + || GET_CODE (x) == CONST_VECTOR)) + return darwin_sections[literal16_section]; + else if (MACHOPIC_INDIRECT + && (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == CONST + || GET_CODE (x) == LABEL_REF)) + return darwin_sections[const_data_section]; + else + return darwin_sections[const_section]; +} + +void +machopic_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + if (MACHOPIC_INDIRECT) + switch_to_section (darwin_sections[mod_init_section]); + else + switch_to_section (darwin_sections[constructor_section]); + assemble_align (POINTER_SIZE); + assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); + + if (! MACHOPIC_INDIRECT) + fprintf (asm_out_file, ".reference .constructors_used\n"); +} + +void +machopic_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + if (MACHOPIC_INDIRECT) + switch_to_section (darwin_sections[mod_term_section]); + else + switch_to_section (darwin_sections[destructor_section]); + assemble_align (POINTER_SIZE); + assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); + + if (! MACHOPIC_INDIRECT) + fprintf (asm_out_file, ".reference .destructors_used\n"); +} + +void +darwin_globalize_label (FILE *stream, const char *name) +{ + if (!!strncmp (name, "_OBJC_", 6)) + default_globalize_label (stream, name); +} + +/* This routine returns non-zero if 'name' starts with the special objective-c + anonymous file-scope static name. It accommodates c++'s mangling of such + symbols (in this case the symbols will have form _ZL{d}*_OBJC_* d=digit). */ + +int +darwin_label_is_anonymous_local_objc_name (const char *name) +{ + const unsigned char *p = (const unsigned char *) name; + if (*p != '_') + return 0; + if (p[1] == 'Z' && p[2] == 'L') + { + p += 3; + while (*p >= '0' && *p <= '9') + p++; + } + return (!strncmp ((const char *)p, "_OBJC_", 6)); +} + +/* LTO support for Mach-O. + + This version uses three mach-o sections to encapsulate the (unlimited + number of) lto sections. + + __GNU_LTO, __lto_sections contains the concatented GNU LTO section data. + __GNU_LTO, __section_names contains the GNU LTO section names. + __GNU_LTO, __section_index contains an array of values that index these. + + Indexed thus: +
, +
+ . + + At present, for both m32 and m64 mach-o files each of these fields is + represented by a uint32_t. This is because, AFAICT, a mach-o object + cannot exceed 4Gb because the section_64 offset field (see below) is 32bits. + + uint32_t offset; + "offset An integer specifying the offset to this section in the file." */ + +/* Count lto section numbers. */ +static unsigned int lto_section_num = 0; + +/* A vector of information about LTO sections, at present, we only have + the name. TODO: see if we can get the data length somehow. */ +typedef struct GTY (()) darwin_lto_section_e { + const char *sectname; +} darwin_lto_section_e ; +DEF_VEC_O(darwin_lto_section_e); +DEF_VEC_ALLOC_O(darwin_lto_section_e, gc); + +static GTY (()) VEC (darwin_lto_section_e, gc) * lto_section_names; + +/* Segment for LTO data. */ +#define LTO_SEGMENT_NAME "__GNU_LTO" + +/* Section wrapper scheme (used here to wrap the unlimited number of LTO + sections into three Mach-O ones). + NOTE: These names MUST be kept in sync with those in + libiberty/simple-object-mach-o. */ +#define LTO_SECTS_SECTION "__wrapper_sects" +#define LTO_NAMES_SECTION "__wrapper_names" +#define LTO_INDEX_SECTION "__wrapper_index" + +/* File to temporarily store LTO data. This is appended to asm_out_file + in darwin_end_file. */ +static FILE *lto_asm_out_file, *saved_asm_out_file; +static char *lto_asm_out_name; + +/* Prepare asm_out_file for LTO output. For darwin, this means hiding + asm_out_file and switching to an alternative output file. */ +void +darwin_asm_lto_start (void) +{ + gcc_assert (! saved_asm_out_file); + saved_asm_out_file = asm_out_file; + if (! lto_asm_out_name) + lto_asm_out_name = make_temp_file (".lto.s"); + lto_asm_out_file = fopen (lto_asm_out_name, "a"); + if (lto_asm_out_file == NULL) + fatal_error ("failed to open temporary file %s for LTO output", + lto_asm_out_name); + asm_out_file = lto_asm_out_file; +} + +/* Restore asm_out_file. */ +void +darwin_asm_lto_end (void) +{ + gcc_assert (saved_asm_out_file); + fclose (lto_asm_out_file); + asm_out_file = saved_asm_out_file; + saved_asm_out_file = NULL; +} + +static void +darwin_asm_dwarf_section (const char *name, unsigned int flags, tree decl); + +/* Called for the TARGET_ASM_NAMED_SECTION hook. */ + +void +darwin_asm_named_section (const char *name, + unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + /* LTO sections go in a special section that encapsulates the (unlimited) + number of GNU LTO sections within a single mach-o one. */ + if (strncmp (name, LTO_SECTION_NAME_PREFIX, + strlen (LTO_SECTION_NAME_PREFIX)) == 0) + { + darwin_lto_section_e e; + /* We expect certain flags to be set... */ + gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED)) + == (SECTION_DEBUG | SECTION_NAMED)); + + /* Switch to our combined section. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_SECTS_SECTION); + /* Output a label for the start of this sub-section. */ + fprintf (asm_out_file, "L_GNU_LTO%d:\t;# %s\n", + lto_section_num, name); + /* We have to jump through hoops to get the values of the intra-section + offsets... */ + fprintf (asm_out_file, "\t.set L$gnu$lto$offs%d,L_GNU_LTO%d-L_GNU_LTO0\n", + lto_section_num, lto_section_num); + fprintf (asm_out_file, + "\t.set L$gnu$lto$size%d,L_GNU_LTO%d-L_GNU_LTO%d\n", + lto_section_num, lto_section_num+1, lto_section_num); + lto_section_num++; + e.sectname = xstrdup (name); + /* Keep the names, we'll need to make a table later. + TODO: check that we do not revisit sections, that would break + the assumption of how this is done. */ + if (lto_section_names == NULL) + lto_section_names = VEC_alloc (darwin_lto_section_e, gc, 16); + VEC_safe_push (darwin_lto_section_e, gc, lto_section_names, &e); + } + else if (strncmp (name, "__DWARF,", 8) == 0) + darwin_asm_dwarf_section (name, flags, decl); + else + fprintf (asm_out_file, "\t.section %s\n", name); +} + +void +darwin_unique_section (tree decl ATTRIBUTE_UNUSED, int reloc ATTRIBUTE_UNUSED) +{ + /* Darwin does not use unique sections. */ +} + +/* Handle __attribute__ ((apple_kext_compatibility)). + This only applies to darwin kexts for 2.95 compatibility -- it shrinks the + vtable for classes with this attribute (and their descendants) by not + outputting the new 3.0 nondeleting destructor. This means that such + objects CANNOT be allocated on the stack or as globals UNLESS they have + a completely empty `operator delete'. + Luckily, this fits in with the Darwin kext model. + + This attribute also disables gcc3's potential overlaying of derived + class data members on the padding at the end of the base class. */ + +tree +darwin_handle_kext_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + /* APPLE KEXT stuff -- only applies with pure static C++ code. */ + if (! TARGET_KEXTABI) + { + warning (0, "%qE 2.95 vtable-compatibility attribute applies " + "only when compiling a kext", name); + + *no_add_attrs = true; + } + else if (TREE_CODE (*node) != RECORD_TYPE) + { + warning (0, "%qE 2.95 vtable-compatibility attribute applies " + "only to C++ classes", name); + + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle a "weak_import" attribute; arguments as in + struct attribute_spec.handler. */ + +tree +darwin_handle_weak_import_attribute (tree *node, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool * no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL && TREE_CODE (*node) != VAR_DECL) + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + else + declare_weak (*node); + + return NULL_TREE; +} + +/* Emit a label for an FDE, making it global and/or weak if appropriate. + The third parameter is nonzero if this is for exception handling. + The fourth parameter is nonzero if this is just a placeholder for an + FDE that we are omitting. */ + +void +darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty) +{ + char *lab ; + char buf[32]; + static int invok_count = 0; + static tree last_fun_decl = NULL_TREE; + + /* We use the linker to emit the .eh labels for Darwin 9 and above. */ + if (! for_eh || generating_for_darwin_version >= 9) + return; + + /* FIXME: This only works when the eh for all sections of a function is + emitted at the same time. If that changes, we would need to use a lookup + table of some form to determine what to do. Also, we should emit the + unadorned label for the partition containing the public label for a + function. This is of limited use, probably, since we do not currently + enable partitioning. */ + strcpy (buf, ".eh"); + if (decl && TREE_CODE (decl) == FUNCTION_DECL) + { + if (decl == last_fun_decl) + { + invok_count++; + snprintf (buf, 31, "$$part$$%d.eh", invok_count); + } + else + { + last_fun_decl = decl; + invok_count = 0; + } + } + + lab = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), buf, NULL); + + if (TREE_PUBLIC (decl)) + { + targetm.asm_out.globalize_label (file, lab); + if (DECL_VISIBILITY (decl) == VISIBILITY_HIDDEN) + { + fputs ("\t.private_extern ", file); + assemble_name (file, lab); + fputc ('\n', file); + } + } + + if (DECL_WEAK (decl)) + { + fputs ("\t.weak_definition ", file); + assemble_name (file, lab); + fputc ('\n', file); + } + + assemble_name (file, lab); + if (empty) + { + fputs (" = 0\n", file); + + /* Mark the absolute .eh and .eh1 style labels as needed to + ensure that we don't dead code strip them and keep such + labels from another instantiation point until we can fix this + properly with group comdat support. */ + darwin_mark_decl_preserved (lab); + } + else + fputs (":\n", file); + + free (lab); +} + +static GTY(()) unsigned long except_table_label_num; + +void +darwin_emit_except_table_label (FILE *file) +{ + char section_start_label[30]; + + ASM_GENERATE_INTERNAL_LABEL (section_start_label, "GCC_except_table", + except_table_label_num++); + ASM_OUTPUT_LABEL (file, section_start_label); +} +/* Generate a PC-relative reference to a Mach-O non-lazy-symbol. */ + +void +darwin_non_lazy_pcrel (FILE *file, rtx addr) +{ + const char *nlp_name; + + gcc_assert (GET_CODE (addr) == SYMBOL_REF); + + nlp_name = machopic_indirection_name (addr, /*stub_p=*/false); + fputs ("\t.long\t", file); + ASM_OUTPUT_LABELREF (file, nlp_name); + fputs ("-.", file); +} + +/* If this is uncommented, details of each allocation will be printed + in the asm right before the actual code. WARNING - this will cause some + test-suite fails (since the printout will contain items that some tests + are not expecting) -- so don't leave it on by default (it bloats the + asm too). */ +/*#define DEBUG_DARWIN_MEM_ALLOCATORS*/ + +/* The first two of these routines are ostensibly just intended to put + names into the asm. However, they are both hijacked in order to ensure + that zero-sized items do not make their way into the output. Consequently, + we also need to make these participate in provisions for dealing with + such items in section anchors. */ + +/* The implementation of ASM_DECLARE_OBJECT_NAME. */ +/* The RTTI data (e.g., __ti4name) is common and public (and static), + but it does need to be referenced via indirect PIC data pointers. + The machopic_define_symbol calls are telling the machopic subsystem + that the name *is* defined in this module, so it doesn't need to + make them indirect. */ +void +darwin_asm_declare_object_name (FILE *file, + const char *nam, tree decl) +{ + const char *xname = nam; + unsigned HOST_WIDE_INT size; + bool local_def, weak; + + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + + local_def = DECL_INITIAL (decl) || (TREE_STATIC (decl) + && (!DECL_COMMON (decl) + || !TREE_PUBLIC (decl))); + + if (GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) + xname = IDENTIFIER_POINTER (DECL_NAME (decl)); + + if (local_def) + { + (* targetm.encode_section_info) (decl, DECL_RTL (decl), false); + if (!weak) + machopic_define_symbol (DECL_RTL (decl)); + } + + size = tree_low_cst (DECL_SIZE_UNIT (decl), 1); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d" + " stat %d com %d pub %d t-const %d t-ro %d init %lx\n", + xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"), + (unsigned long long)size, DECL_ALIGN (decl), local_def, + DECL_WEAK (decl), TREE_STATIC (decl), DECL_COMMON (decl), + TREE_PUBLIC (decl), TREE_CONSTANT (decl), TREE_READONLY (decl), + (unsigned long)DECL_INITIAL (decl)); +#endif + + /* Darwin needs help to support local zero-sized objects. + They must be made at least one byte, and the section containing must be + marked as unsuitable for section-anchors (see storage allocators below). + + For non-zero objects this output is handled by varasm.c. + */ + if (!size) + { + unsigned int l2align = 0; + + /* The align must be honored, even for zero-sized. */ + if (DECL_ALIGN (decl)) + { + l2align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT); + fprintf (file, "\t.align\t%u\n", l2align); + } + + ASM_OUTPUT_LABEL (file, xname); + size = 1; + fprintf (file, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + + /* Check that we've correctly picked up the zero-sized item and placed it + properly. */ + gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors) + || (in_section + && (in_section->common.flags & SECTION_NO_ANCHOR))); + } + else + ASM_OUTPUT_LABEL (file, xname); +} + +/* The implementation of ASM_DECLARE_CONSTANT_NAME. */ +void +darwin_asm_declare_constant_name (FILE *file, const char *name, + const_tree exp ATTRIBUTE_UNUSED, + HOST_WIDE_INT size) +{ + assemble_label (file, name); + /* As for other items, we need at least one byte. */ + if (!size) + { + fputs ("\t.space\t1\n", file); + /* Check that we've correctly picked up the zero-sized item and placed it + properly. */ + gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors) + || (in_section + && (in_section->common.flags & SECTION_NO_ANCHOR))); + } +} + +/* Darwin storage allocators. + + Zerofill sections are desirable for large blank data since, otherwise, these + data bloat objects (PR33210). + + However, section anchors don't work in .zerofill sections (one cannot switch + to a zerofill section). Ergo, for Darwin targets using section anchors we need + to put (at least some) data into 'normal' switchable sections. + + Here we set a relatively arbitrary value for the size of an object to trigger + zerofill when section anchors are enabled (anything bigger than a page for + current Darwin implementations). FIXME: there ought to be some objective way + to make this choice. + + When section anchor are off this is ignored anyway. */ + +#define BYTES_ZFILL 4096 + +/* Emit a chunk of data for items coalesced by the linker. */ +static void +darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + /* Since the sections used here are coalesed, they will not be eligible + for section anchors, and therefore we don't need to break that out. */ + if (TREE_READONLY (decl) || TREE_CONSTANT (decl)) + switch_to_section (darwin_sections[const_data_coal_section]); + else + switch_to_section (darwin_sections[data_coal_section]); + + /* To be consistent, we'll allow darwin_asm_declare_object_name to assemble + the align info for zero-sized items... but do it here otherwise. */ + if (size && align) + fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT)); + + if (TREE_PUBLIC (decl)) + darwin_globalize_label (fp, name); + + /* ... and we let it deal with outputting one byte of zero for them too. */ + darwin_asm_declare_object_name (fp, name, decl); + if (size) + assemble_zeros (size); +} + +/* Emit a chunk of data for ObjC meta-data that got placed in BSS erroneously. */ +static void +darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align, tree meta) +{ + section *ocs = data_section; + + if (TREE_PURPOSE (meta) == get_identifier("OBJC2META")) + ocs = darwin_objc2_section (decl, meta, ocs); + else + ocs = darwin_objc1_section (decl, meta, ocs); + + switch_to_section (ocs); + + /* We shall declare that zero-sized meta-data are not valid (yet). */ + gcc_assert (size); + fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT)); + + /* ... and we let it deal with outputting one byte of zero for them too. */ + darwin_asm_declare_object_name (fp, name, decl); + assemble_zeros (size); +} + +/* This routine emits 'local' storage: + + When Section Anchors are off this routine emits .zerofill commands in + sections named for their alignment. + + When Section Anchors are on, smaller (non-zero-sized) items are placed in + the .static_data section so that the section anchoring system can see them. + Larger items are still placed in .zerofill sections, addressing PR33210. + The routine has no checking - it is all assumed to be done by the caller. +*/ +static void +darwin_emit_local_bss (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int l2align) +{ + /* FIXME: We have a fudge to make this work with Java even when the target does + not use sections anchors -- Java seems to need at least one small item in a + non-zerofill segment. */ + if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) + || (size && size <= 2)) + { + /* Put smaller objects in _static_data, where the section anchors system + can get them. + However, if they are zero-sized punt them to yet a different section + (that is not allowed to participate in anchoring). */ + if (!size) + { + fputs ("\t.section\t__DATA,__zobj_bss\n", fp); + in_section = darwin_sections[zobj_bss_section]; + size = 1; + } + else + { + fputs ("\t.static_data\n", fp); + in_section = darwin_sections[static_data_section]; + } + + if (l2align) + fprintf (fp, "\t.align\t%u\n", l2align); + + assemble_name (fp, name); + fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + else + { + /* When we are on a non-section anchor target, we can get zero-sized + items here. However, all we need to do is to bump them to one byte + and the section alignment will take care of the rest. */ + char secnam[64]; + unsigned int flags ; + snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"), + (unsigned) l2align); + /* We can't anchor (yet, if ever) in zerofill sections, because we can't + switch to them and emit a label. */ + flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR; + in_section = get_section (secnam, flags, NULL); + fprintf (fp, "\t.zerofill %s,", secnam); + assemble_name (fp, name); + if (!size) + size = 1; + + if (l2align) + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", + size, (unsigned) l2align); + else + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + + (*targetm.encode_section_info) (decl, DECL_RTL (decl), false); + /* This is defined as a file-scope var, so we know to notify machopic. */ + machopic_define_symbol (DECL_RTL (decl)); +} + +/* Emit a chunk of common. */ +static void +darwin_emit_common (FILE *fp, const char *name, + unsigned HOST_WIDE_INT size, unsigned int align) +{ + unsigned HOST_WIDE_INT rounded; + unsigned int l2align; + + /* Earlier systems complain if the alignment exceeds the page size. + The magic number is 4096 * 8 - hard-coded for legacy systems. */ + if (!emit_aligned_common && (align > 32768UL)) + align = 4096UL; /* In units. */ + else + align /= BITS_PER_UNIT; + + /* Make sure we have a meaningful align. */ + if (!align) + align = 1; + + /* For earlier toolchains, we need to emit the var as a rounded size to + tell ld the alignment. */ + if (size < align) + rounded = align; + else + rounded = (size + (align-1)) & ~(align-1); + + l2align = floor_log2 (align); + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + in_section = comm_section; + /* We mustn't allow multiple public symbols to share an address when using + the normal OSX toolchain. */ + if (!size) + { + /* Put at least one byte. */ + size = 1; + /* This section can no longer participate in section anchoring. */ + comm_section->common.flags |= SECTION_NO_ANCHOR; + } + + fputs ("\t.comm\t", fp); + assemble_name (fp, name); + fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED, + emit_aligned_common?size:rounded); + if (l2align && emit_aligned_common) + fprintf (fp, ",%u", l2align); + fputs ("\n", fp); +} + +/* Output a var which is all zero - into aligned BSS sections, common, lcomm + or coalescable data sections (for weak or comdat) as appropriate. */ + +void +darwin_output_aligned_bss (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, unsigned int align) +{ + unsigned int l2align; + bool one, pub, weak; + tree meta; + + pub = TREE_PUBLIC (decl); + one = DECL_ONE_ONLY (decl); + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d" + " pub %d weak %d one %d init %lx\n", + name, (long long)size, (int)align, TREE_READONLY (decl), + TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl), + pub, weak, one, (unsigned long)DECL_INITIAL (decl)); +#endif + + /* ObjC metadata can get put in BSS because varasm.c decides it's BSS + before the target has a chance to comment. */ + if ((meta = is_objc_metadata (decl))) + { + darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta); + return; + } + + /* Check that any initializer is valid. */ + gcc_assert ((DECL_INITIAL (decl) == NULL) + || (DECL_INITIAL (decl) == error_mark_node) + || initializer_zerop (DECL_INITIAL (decl))); + + gcc_assert (DECL_SECTION_NAME (decl) == NULL); + gcc_assert (!DECL_COMMON (decl)); + + /* Pick up the correct alignment. */ + if (!size || !align) + align = DECL_ALIGN (decl); + + l2align = floor_log2 (align / BITS_PER_UNIT); + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + last_assemble_variable_decl = decl; + + /* We would rather not have to check this here - but it seems that we might + be passed a decl that should be in coalesced space. */ + if (one || weak) + { + /* Weak or COMDAT objects are put in mergeable sections. */ + darwin_emit_weak_or_comdat (fp, decl, name, size, + DECL_ALIGN (decl)); + return; + } + + /* If this is not public, then emit according to local rules. */ + if (!pub) + { + darwin_emit_local_bss (fp, decl, name, size, l2align); + return; + } + + /* So we have a public symbol (small item fudge for Java, see above). */ + if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) + || (size && size <= 2)) + { + /* Put smaller objects in data, where the section anchors system can get + them. However, if they are zero-sized punt them to yet a different + section (that is not allowed to participate in anchoring). */ + if (!size) + { + fputs ("\t.section\t__DATA,__zobj_data\n", fp); + in_section = darwin_sections[zobj_data_section]; + size = 1; + } + else + { + fputs ("\t.data\n", fp); + in_section = data_section; + } + + if (l2align) + fprintf (fp, "\t.align\t%u\n", l2align); + + assemble_name (fp, name); + fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + else + { + char secnam[64]; + unsigned int flags ; + /* When we are on a non-section anchor target, we can get zero-sized + items here. However, all we need to do is to bump them to one byte + and the section alignment will take care of the rest. */ + snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align); + + /* We can't anchor in zerofill sections, because we can't switch + to them and emit a label. */ + flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR; + in_section = get_section (secnam, flags, NULL); + fprintf (fp, "\t.zerofill %s,", secnam); + assemble_name (fp, name); + if (!size) + size = 1; + + if (l2align) + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align); + else + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + (* targetm.encode_section_info) (decl, DECL_RTL (decl), false); +} + +/* Output a chunk of common, with alignment specified (where the target + supports this). */ +void +darwin_asm_output_aligned_decl_common (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + unsigned int l2align; + bool one, weak; + tree meta; + + /* No corresponding var. */ + if (decl==NULL) + { +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align); +#endif + darwin_emit_common (fp, name, size, align); + return; + } + + one = DECL_ONE_ONLY (decl); + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d" + " weak %d one %d init %lx\n", + name, (long long)size, (int)align, TREE_READONLY (decl), + TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl), + TREE_PUBLIC (decl), weak, one, (unsigned long)DECL_INITIAL (decl)); +#endif + + /* ObjC metadata can get put in BSS because varasm.c decides it's BSS + before the target has a chance to comment. */ + if ((meta = is_objc_metadata (decl))) + { + darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta); + return; + } + + /* We shouldn't be messing with this if the decl has a section name. */ + gcc_assert (DECL_SECTION_NAME (decl) == NULL); + + /* We would rather not have to check this here - but it seems that we might + be passed a decl that should be in coalesced space. */ + if (one || weak) + { + /* Weak or COMDAT objects are put in mergable sections. */ + darwin_emit_weak_or_comdat (fp, decl, name, size, + DECL_ALIGN (decl)); + return; + } + + /* We should only get here for DECL_COMMON, with a zero init (and, in + principle, only for public symbols too - although we deal with local + ones below). */ + + /* Check the initializer is OK. */ + gcc_assert (DECL_COMMON (decl) + && ((DECL_INITIAL (decl) == NULL) + || (DECL_INITIAL (decl) == error_mark_node) + || initializer_zerop (DECL_INITIAL (decl)))); + + last_assemble_variable_decl = decl; + + if (!size || !align) + align = DECL_ALIGN (decl); + + l2align = floor_log2 (align / BITS_PER_UNIT); + /* Check we aren't asking for more aligment than the platform allows. */ + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + if (TREE_PUBLIC (decl) != 0) + darwin_emit_common (fp, name, size, align); + else + darwin_emit_local_bss (fp, decl, name, size, l2align); +} + +/* Output a chunk of BSS with alignment specfied. */ +void +darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + unsigned long l2align; + bool one, weak; + tree meta; + + one = DECL_ONE_ONLY (decl); + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# adloc: %s (%lld,%d) ro %d cst %d stat %d one %d pub %d" + " weak %d init %lx\n", + name, (long long)size, (int)align, TREE_READONLY (decl), + TREE_CONSTANT (decl), TREE_STATIC (decl), one, TREE_PUBLIC (decl), + weak , (unsigned long)DECL_INITIAL (decl)); +#endif + + /* ObjC metadata can get put in BSS because varasm.c decides it's BSS + before the target has a chance to comment. */ + if ((meta = is_objc_metadata (decl))) + { + darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta); + return; + } + + /* We shouldn't be messing with this if the decl has a section name. */ + gcc_assert (DECL_SECTION_NAME (decl) == NULL); + + /* We would rather not have to check this here - but it seems that we might + be passed a decl that should be in coalesced space. */ + if (one || weak) + { + /* Weak or COMDAT objects are put in mergable sections. */ + darwin_emit_weak_or_comdat (fp, decl, name, size, + DECL_ALIGN (decl)); + return; + } + + /* .. and it should be suitable for placement in local mem. */ + gcc_assert(!TREE_PUBLIC (decl) && !DECL_COMMON (decl)); + /* .. and any initializer must be all-zero. */ + gcc_assert ((DECL_INITIAL (decl) == NULL) + || (DECL_INITIAL (decl) == error_mark_node) + || initializer_zerop (DECL_INITIAL (decl))); + + last_assemble_variable_decl = decl; + + if (!size || !align) + align = DECL_ALIGN (decl); + + l2align = floor_log2 (align / BITS_PER_UNIT); + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + darwin_emit_local_bss (fp, decl, name, size, l2align); +} + +/* Emit an assembler directive to set visibility for a symbol. The + only supported visibilities are VISIBILITY_DEFAULT and + VISIBILITY_HIDDEN; the latter corresponds to Darwin's "private + extern". There is no MACH-O equivalent of ELF's + VISIBILITY_INTERNAL or VISIBILITY_PROTECTED. */ + +void +darwin_assemble_visibility (tree decl, int vis) +{ + if (vis == VISIBILITY_DEFAULT) + ; + else if (vis == VISIBILITY_HIDDEN) + { + fputs ("\t.private_extern ", asm_out_file); + assemble_name (asm_out_file, + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); + fputs ("\n", asm_out_file); + } + else + warning (OPT_Wattributes, "internal and protected visibility attributes " + "not supported in this configuration; ignored"); +} + +/* VEC Used by darwin_asm_dwarf_section. + Maybe a hash tab would be better here - but the intention is that this is + a very short list (fewer than 16 items) and each entry should (ideally, + eventually) only be presented once. + + A structure to hold a dwarf debug section used entry. */ + +typedef struct GTY(()) dwarf_sect_used_entry { + const char *name; + unsigned count; +} +dwarf_sect_used_entry; + +DEF_VEC_O(dwarf_sect_used_entry); +DEF_VEC_ALLOC_O(dwarf_sect_used_entry, gc); + +/* A list of used __DWARF sections. */ +static GTY (()) VEC (dwarf_sect_used_entry, gc) * dwarf_sect_names_table; + +/* This is called when we are asked to assemble a named section and the + name begins with __DWARF,. We keep a list of the section names (without + the __DWARF, prefix) and use this to emit our required start label on the + first switch to each section. */ + +static void +darwin_asm_dwarf_section (const char *name, unsigned int flags, + tree ARG_UNUSED (decl)) +{ + unsigned i; + int namelen; + const char * sname; + dwarf_sect_used_entry *ref; + bool found = false; + gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED)) + == (SECTION_DEBUG | SECTION_NAMED)); + /* We know that the name starts with __DWARF, */ + sname = name + 8; + namelen = strchr (sname, ',') - sname; + gcc_assert (namelen); + if (dwarf_sect_names_table == NULL) + dwarf_sect_names_table = VEC_alloc (dwarf_sect_used_entry, gc, 16); + else + for (i = 0; + VEC_iterate (dwarf_sect_used_entry, dwarf_sect_names_table, i, ref); + i++) + { + if (!ref) + break; + if (!strcmp (ref->name, sname)) + { + found = true; + ref->count++; + break; + } + } + + fprintf (asm_out_file, "\t.section %s\n", name); + if (!found) + { + dwarf_sect_used_entry e; + fprintf (asm_out_file, "Lsection%.*s:\n", namelen, sname); + e.count = 1; + e.name = xstrdup (sname); + VEC_safe_push (dwarf_sect_used_entry, gc, dwarf_sect_names_table, &e); + } +} + +/* Output a difference of two labels that will be an assembly time + constant if the two labels are local. (.long lab1-lab2 will be + very different if lab1 is at the boundary between two sections; it + will be relocated according to the second section, not the first, + so one ends up with a difference between labels in different + sections, which is bad in the dwarf2 eh context for instance.) */ + +static int darwin_dwarf_label_counter; + +void +darwin_asm_output_dwarf_delta (FILE *file, int size, + const char *lab1, const char *lab2) +{ + int islocaldiff = (lab1[0] == '*' && lab1[1] == 'L' + && lab2[0] == '*' && lab2[1] == 'L'); + const char *directive = (size == 8 ? ".quad" : ".long"); + + if (islocaldiff) + fprintf (file, "\t.set L$set$%d,", darwin_dwarf_label_counter); + else + fprintf (file, "\t%s\t", directive); + + assemble_name_raw (file, lab1); + fprintf (file, "-"); + assemble_name_raw (file, lab2); + if (islocaldiff) + fprintf (file, "\n\t%s L$set$%d", directive, darwin_dwarf_label_counter++); +} + +/* Output an offset in a DWARF section on Darwin. On Darwin, DWARF section + offsets are not represented using relocs in .o files; either the + section never leaves the .o file, or the linker or other tool is + responsible for parsing the DWARF and updating the offsets. */ + +void +darwin_asm_output_dwarf_offset (FILE *file, int size, const char * lab, + section *base) +{ + char sname[64]; + int namelen; + + gcc_assert (base->common.flags & SECTION_NAMED); + gcc_assert (strncmp (base->named.name, "__DWARF,", 8) == 0); + gcc_assert (strchr (base->named.name + 8, ',')); + + namelen = strchr (base->named.name + 8, ',') - (base->named.name + 8); + sprintf (sname, "*Lsection%.*s", namelen, base->named.name + 8); + darwin_asm_output_dwarf_delta (file, size, lab, sname); +} + +/* Called from the within the TARGET_ASM_FILE_START for each target. */ + +void +darwin_file_start (void) +{ + /* Nothing to do. */ +} + +/* Called for the TARGET_ASM_FILE_END hook. + Emit the mach-o pic indirection data, the lto data and, finally a flag + to tell the linker that it can break the file object into sections and + move those around for efficiency. */ + +void +darwin_file_end (void) +{ + machopic_finish (asm_out_file); + if (strcmp (lang_hooks.name, "GNU C++") == 0) + { + switch_to_section (darwin_sections[constructor_section]); + switch_to_section (darwin_sections[destructor_section]); + ASM_OUTPUT_ALIGN (asm_out_file, 1); + } + + /* If there was LTO assembler output, append it to asm_out_file. */ + if (lto_asm_out_name) + { + int n; + char *buf, *lto_asm_txt; + + /* Shouldn't be here if we failed to switch back. */ + gcc_assert (! saved_asm_out_file); + + lto_asm_out_file = fopen (lto_asm_out_name, "r"); + if (lto_asm_out_file == NULL) + fatal_error ("failed to open temporary file %s with LTO output", + lto_asm_out_name); + fseek (lto_asm_out_file, 0, SEEK_END); + n = ftell (lto_asm_out_file); + if (n > 0) + { + fseek (lto_asm_out_file, 0, SEEK_SET); + lto_asm_txt = buf = (char *) xmalloc (n + 1); + while (fgets (lto_asm_txt, n, lto_asm_out_file)) + fputs (lto_asm_txt, asm_out_file); + /* Put a termination label. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_SECTS_SECTION); + fprintf (asm_out_file, "L_GNU_LTO%d:\t;# end of lto\n", + lto_section_num); + /* Make sure our termination label stays in this section. */ + fputs ("\t.space\t1\n", asm_out_file); + } + + /* Remove the temporary file. */ + fclose (lto_asm_out_file); + unlink_if_ordinary (lto_asm_out_name); + free (lto_asm_out_name); + } + + /* Output the names and indices. */ + if (lto_section_names && VEC_length (darwin_lto_section_e, lto_section_names)) + { + int count; + darwin_lto_section_e *ref; + /* For now, we'll make the offsets 4 bytes and unaligned - we'll fix + the latter up ourselves. */ + const char *op = integer_asm_op (4,0); + + /* Emit the names. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_NAMES_SECTION); + FOR_EACH_VEC_ELT (darwin_lto_section_e, lto_section_names, count, ref) + { + fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\n", count); + /* We have to jump through hoops to get the values of the intra-section + offsets... */ + fprintf (asm_out_file, + "\t.set L$gnu$lto$noff%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME0\n", + count, count); + fprintf (asm_out_file, + "\t.set L$gnu$lto$nsiz%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME%d\n", + count, count+1, count); + fprintf (asm_out_file, "\t.asciz\t\"%s\"\n", ref->sectname); + } + fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\t;# end\n", lto_section_num); + /* make sure our termination label stays in this section. */ + fputs ("\t.space\t1\n", asm_out_file); + + /* Emit the Index. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_INDEX_SECTION); + fputs ("\t.align\t2\n", asm_out_file); + fputs ("# Section offset, Section length, Name offset, Name length\n", + asm_out_file); + FOR_EACH_VEC_ELT (darwin_lto_section_e, lto_section_names, count, ref) + { + fprintf (asm_out_file, "%s L$gnu$lto$offs%d\t;# %s\n", + op, count, ref->sectname); + fprintf (asm_out_file, "%s L$gnu$lto$size%d\n", op, count); + fprintf (asm_out_file, "%s L$gnu$lto$noff%d\n", op, count); + fprintf (asm_out_file, "%s L$gnu$lto$nsiz%d\n", op, count); + } + } + + /* If we have section anchors, then we must prevent the linker from + re-arranging data. */ + if (!DARWIN_SECTION_ANCHORS || !flag_section_anchors) + fprintf (asm_out_file, "\t.subsections_via_symbols\n"); +} + +/* TODO: Add a language hook for identifying if a decl is a vtable. */ +#define DARWIN_VTABLE_P(DECL) 0 + +/* Cross-module name binding. Darwin does not support overriding + functions at dynamic-link time, except for vtables in kexts. */ + +bool +darwin_binds_local_p (const_tree decl) +{ + return default_binds_local_p_1 (decl, + TARGET_KEXTABI && DARWIN_VTABLE_P (decl)); +} + +/* The Darwin's implementation of TARGET_ASM_OUTPUT_ANCHOR. Define the + anchor relative to ".", the current section position. We cannot use + the default one because ASM_OUTPUT_DEF is wrong for Darwin. */ +void +darwin_asm_output_anchor (rtx symbol) +{ + fprintf (asm_out_file, "\t.set\t"); + assemble_name (asm_out_file, XSTR (symbol, 0)); + fprintf (asm_out_file, ", . + " HOST_WIDE_INT_PRINT_DEC "\n", + SYMBOL_REF_BLOCK_OFFSET (symbol)); +} + +/* Disable section anchoring on any section containing a zero-sized + object. */ +bool +darwin_use_anchors_for_symbol_p (const_rtx symbol) +{ + if (DARWIN_SECTION_ANCHORS && flag_section_anchors) + { + section *sect; + /* If the section contains a zero-sized object it's ineligible. */ + sect = SYMBOL_REF_BLOCK (symbol)->sect; + /* This should have the effect of disabling anchors for vars that follow + any zero-sized one, in a given section. */ + if (sect->common.flags & SECTION_NO_ANCHOR) + return false; + + /* Also check the normal reasons for suppressing. */ + return default_use_anchors_for_symbol_p (symbol); + } + else + return false; +} + +/* Set the darwin specific attributes on TYPE. */ +void +darwin_set_default_type_attributes (tree type) +{ + if (darwin_ms_struct + && TREE_CODE (type) == RECORD_TYPE) + TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("ms_struct"), + NULL_TREE, + TYPE_ATTRIBUTES (type)); +} + +/* True, iff we're generating code for loadable kernel extensions. */ + +bool +darwin_kextabi_p (void) { + return flag_apple_kext; +} + +void +darwin_override_options (void) +{ + /* Keep track of which (major) version we're generating code for. */ + if (darwin_macosx_version_min) + { + if (strverscmp (darwin_macosx_version_min, "10.6") >= 0) + generating_for_darwin_version = 10; + else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0) + generating_for_darwin_version = 9; + + /* Earlier versions are not specifically accounted, until required. */ + } + + /* Don't emit DWARF3/4 unless specifically selected. This is a + workaround for tool bugs. */ + if (!global_options_set.x_dwarf_strict) + dwarf_strict = 1; + + /* Do not allow unwind tables to be generated by default for m32. + fnon-call-exceptions will override this, regardless of what we do. */ + if (generating_for_darwin_version < 10 + && !global_options_set.x_flag_asynchronous_unwind_tables + && !TARGET_64BIT) + global_options.x_flag_asynchronous_unwind_tables = 0; + + /* Disable -freorder-blocks-and-partition when unwind tables are being + emitted for Darwin < 9 (OSX 10.5). + The strategy is, "Unless the User has specifically set/unset an unwind + flag we will switch off -freorder-blocks-and-partition when unwind tables + will be generated". If the User specifically sets flags... we assume + (s)he knows why... */ + if (generating_for_darwin_version < 9 + && global_options_set.x_flag_reorder_blocks_and_partition + && ((global_options.x_flag_exceptions /* User, c++, java */ + && !global_options_set.x_flag_exceptions) /* User specified... */ + || (global_options.x_flag_unwind_tables + && !global_options_set.x_flag_unwind_tables) + || (global_options.x_flag_non_call_exceptions + && !global_options_set.x_flag_non_call_exceptions) + || (global_options.x_flag_asynchronous_unwind_tables + && !global_options_set.x_flag_asynchronous_unwind_tables))) + { + inform (input_location, + "-freorder-blocks-and-partition does not work with exceptions " + "on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + + if (flag_mkernel || flag_apple_kext) + { + /* -mkernel implies -fapple-kext for C++ */ + if (strcmp (lang_hooks.name, "GNU C++") == 0) + flag_apple_kext = 1; + + flag_no_common = 1; + + /* No EH in kexts. */ + flag_exceptions = 0; + /* No -fnon-call-exceptions data in kexts. */ + flag_non_call_exceptions = 0; + /* so no tables either.. */ + flag_unwind_tables = 0; + flag_asynchronous_unwind_tables = 0; + /* We still need to emit branch islands for kernel context. */ + darwin_emit_branch_islands = true; + } + + if (flag_var_tracking + && generating_for_darwin_version >= 9 + && (flag_gtoggle ? (debug_info_level == DINFO_LEVEL_NONE) + : (debug_info_level >= DINFO_LEVEL_NORMAL)) + && write_symbols == DWARF2_DEBUG) + flag_var_tracking_uninit = 1; + + if (MACHO_DYNAMIC_NO_PIC_P) + { + if (flag_pic) + warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC"); + flag_pic = 0; + } + else if (flag_pic == 1) + { + /* Darwin's -fpic is -fPIC. */ + flag_pic = 2; + } + + /* It is assumed that branch island stubs are needed for earlier systems. */ + if (generating_for_darwin_version < 9) + darwin_emit_branch_islands = true; + else + emit_aligned_common = true; /* Later systems can support aligned common. */ + + /* The c_dialect...() macros are not available to us here. */ + darwin_running_cxx = (strstr (lang_hooks.name, "C++") != 0); +} + +/* Add $LDBL128 suffix to long double builtins. */ + +static void +darwin_patch_builtin (int fncode) +{ + tree fn = built_in_decls[fncode]; + tree sym; + char *newname; + + if (!fn) + return; + + sym = DECL_ASSEMBLER_NAME (fn); + newname = ACONCAT (("_", IDENTIFIER_POINTER (sym), "$LDBL128", NULL)); + + set_user_assembler_name (fn, newname); + + fn = implicit_built_in_decls[fncode]; + if (fn) + set_user_assembler_name (fn, newname); +} + +void +darwin_patch_builtins (void) +{ + if (LONG_DOUBLE_TYPE_SIZE != 128) + return; + +#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode); +#define PATCH_BUILTIN_NO64(fncode) \ + if (!TARGET_64BIT) \ + darwin_patch_builtin (fncode); +#define PATCH_BUILTIN_VARIADIC(fncode) \ + if (!TARGET_64BIT \ + && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \ + darwin_patch_builtin (fncode); +#include "darwin-ppc-ldouble-patch.def" +#undef PATCH_BUILTIN +#undef PATCH_BUILTIN_NO64 +#undef PATCH_BUILTIN_VARIADIC +} + +/* CFStrings implementation. */ +static GTY(()) tree cfstring_class_reference = NULL_TREE; +static GTY(()) tree cfstring_type_node = NULL_TREE; +static GTY(()) tree ccfstring_type_node = NULL_TREE; +static GTY(()) tree pccfstring_type_node = NULL_TREE; +static GTY(()) tree pcint_type_node = NULL_TREE; +static GTY(()) tree pcchar_type_node = NULL_TREE; + +static enum built_in_function darwin_builtin_cfstring; + +/* Store all constructed constant CFStrings in a hash table so that + they get uniqued properly. */ + +typedef struct GTY (()) cfstring_descriptor { + /* The string literal. */ + tree literal; + /* The resulting constant CFString. */ + tree constructor; +} cfstring_descriptor; + +static GTY ((param_is (struct cfstring_descriptor))) htab_t cfstring_htab; + +static hashval_t cfstring_hash (const void *); +static int cfstring_eq (const void *, const void *); + +static tree +add_builtin_field_decl (tree type, const char *name, tree **chain) +{ + tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier (name), type); + + if (*chain != NULL) + **chain = field; + *chain = &DECL_CHAIN (field); + + return field; +} + +tree +darwin_init_cfstring_builtins (unsigned builtin_cfstring) +{ + tree cfsfun, fields, pccfstring_ftype_pcchar; + tree *chain = NULL; + + darwin_builtin_cfstring = + (enum built_in_function) builtin_cfstring; + + /* struct __builtin_CFString { + const int *isa; (will point at + int flags; __CFConstantStringClassReference) + const char *str; + long length; + }; */ + + pcint_type_node = build_pointer_type + (build_qualified_type (integer_type_node, TYPE_QUAL_CONST)); + + pcchar_type_node = build_pointer_type + (build_qualified_type (char_type_node, TYPE_QUAL_CONST)); + + cfstring_type_node = (*lang_hooks.types.make_type) (RECORD_TYPE); + + /* Have to build backwards for finish struct. */ + fields = add_builtin_field_decl (long_integer_type_node, "length", &chain); + add_builtin_field_decl (pcchar_type_node, "str", &chain); + add_builtin_field_decl (integer_type_node, "flags", &chain); + add_builtin_field_decl (pcint_type_node, "isa", &chain); + finish_builtin_struct (cfstring_type_node, "__builtin_CFString", + fields, NULL_TREE); + + /* const struct __builtin_CFstring * + __builtin___CFStringMakeConstantString (const char *); */ + + ccfstring_type_node = build_qualified_type + (cfstring_type_node, TYPE_QUAL_CONST); + pccfstring_type_node = build_pointer_type (ccfstring_type_node); + pccfstring_ftype_pcchar = build_function_type_list + (pccfstring_type_node, pcchar_type_node, NULL_TREE); + + cfsfun = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier ("__builtin___CFStringMakeConstantString"), + pccfstring_ftype_pcchar); + + TREE_PUBLIC (cfsfun) = 1; + DECL_EXTERNAL (cfsfun) = 1; + DECL_ARTIFICIAL (cfsfun) = 1; + /* Make a lang-specific section - dup_lang_specific_decl makes a new node + in place of the existing, which may be NULL. */ + DECL_LANG_SPECIFIC (cfsfun) = NULL; + (*lang_hooks.dup_lang_specific_decl) (cfsfun); + DECL_BUILT_IN_CLASS (cfsfun) = BUILT_IN_MD; + DECL_FUNCTION_CODE (cfsfun) = darwin_builtin_cfstring; + lang_hooks.builtin_function (cfsfun); + + /* extern int __CFConstantStringClassReference[]; */ + cfstring_class_reference = build_decl (BUILTINS_LOCATION, VAR_DECL, + get_identifier ("__CFConstantStringClassReference"), + build_array_type (integer_type_node, NULL_TREE)); + + TREE_PUBLIC (cfstring_class_reference) = 1; + DECL_ARTIFICIAL (cfstring_class_reference) = 1; + (*lang_hooks.decls.pushdecl) (cfstring_class_reference); + DECL_EXTERNAL (cfstring_class_reference) = 1; + rest_of_decl_compilation (cfstring_class_reference, 0, 0); + + /* Initialize the hash table used to hold the constant CFString objects. */ + cfstring_htab = htab_create_ggc (31, cfstring_hash, cfstring_eq, NULL); + + return cfstring_type_node; +} + +tree +darwin_fold_builtin (tree fndecl, int n_args, tree *argp, + bool ARG_UNUSED (ignore)) +{ + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + if (fcode == darwin_builtin_cfstring) + { + if (!darwin_constant_cfstrings) + { + error ("built-in function %qD requires the" + " %<-mconstant-cfstrings%> flag", fndecl); + return error_mark_node; + } + + if (n_args != 1) + { + error ("built-in function %qD takes one argument only", fndecl); + return error_mark_node; + } + + return darwin_build_constant_cfstring (*argp); + } + + return NULL_TREE; +} + +void +darwin_rename_builtins (void) +{ + /* The system ___divdc3 routine in libSystem on darwin10 is not + accurate to 1ulp, ours is, so we avoid ever using the system name + for this routine and instead install a non-conflicting name that + is accurate. + + When -ffast-math or -funsafe-math-optimizations is given, we can + use the faster version. */ + if (!flag_unsafe_math_optimizations) + { + int dcode = (BUILT_IN_COMPLEX_DIV_MIN + + DCmode - MIN_MODE_COMPLEX_FLOAT); + tree fn = built_in_decls[dcode]; + /* Fortran and c call TARGET_INIT_BUILTINS and + TARGET_INIT_LIBFUNCS at different times, so we have to put a + call into each to ensure that at least one of them is called + after build_common_builtin_nodes. A better fix is to add a + new hook to run after build_common_builtin_nodes runs. */ + if (fn) + set_user_assembler_name (fn, "___ieee_divdc3"); + fn = implicit_built_in_decls[dcode]; + if (fn) + set_user_assembler_name (fn, "___ieee_divdc3"); + } +} + +static hashval_t +cfstring_hash (const void *ptr) +{ + tree str = ((const struct cfstring_descriptor *)ptr)->literal; + const unsigned char *p = (const unsigned char *) TREE_STRING_POINTER (str); + int i, len = TREE_STRING_LENGTH (str); + hashval_t h = len; + + for (i = 0; i < len; i++) + h = ((h * 613) + p[i]); + + return h; +} + +static int +cfstring_eq (const void *ptr1, const void *ptr2) +{ + tree str1 = ((const struct cfstring_descriptor *)ptr1)->literal; + tree str2 = ((const struct cfstring_descriptor *)ptr2)->literal; + int len1 = TREE_STRING_LENGTH (str1); + + return (len1 == TREE_STRING_LENGTH (str2) + && !memcmp (TREE_STRING_POINTER (str1), TREE_STRING_POINTER (str2), + len1)); +} + +tree +darwin_build_constant_cfstring (tree str) +{ + struct cfstring_descriptor *desc, key; + void **loc; + tree addr; + + if (!str) + { + error ("CFString literal is missing"); + return error_mark_node; + } + + STRIP_NOPS (str); + + if (TREE_CODE (str) == ADDR_EXPR) + str = TREE_OPERAND (str, 0); + + if (TREE_CODE (str) != STRING_CST) + { + error ("CFString literal expression is not a string constant"); + return error_mark_node; + } + + /* Perhaps we already constructed a constant CFString just like this one? */ + key.literal = str; + loc = htab_find_slot (cfstring_htab, &key, INSERT); + desc = (struct cfstring_descriptor *) *loc; + + if (!desc) + { + tree var, constructor, field; + VEC(constructor_elt,gc) *v = NULL; + int length = TREE_STRING_LENGTH (str) - 1; + + if (darwin_warn_nonportable_cfstrings) + { + const char *s = TREE_STRING_POINTER (str); + int l = 0; + + for (l = 0; l < length; l++) + if (!s[l] || !isascii (s[l])) + { + warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal", + s[l] ? "non-ASCII character" : "embedded NUL"); + break; + } + } + + *loc = desc = ggc_alloc_cleared_cfstring_descriptor (); + desc->literal = str; + + /* isa *. */ + field = TYPE_FIELDS (ccfstring_type_node); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build1 (ADDR_EXPR, TREE_TYPE (field), + cfstring_class_reference)); + /* flags */ + field = DECL_CHAIN (field); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build_int_cst (TREE_TYPE (field), 0x000007c8)); + /* string *. */ + field = DECL_CHAIN (field); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build1 (ADDR_EXPR, TREE_TYPE (field), str)); + /* length */ + field = DECL_CHAIN (field); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build_int_cst (TREE_TYPE (field), length)); + + constructor = build_constructor (ccfstring_type_node, v); + TREE_READONLY (constructor) = 1; + TREE_CONSTANT (constructor) = 1; + TREE_STATIC (constructor) = 1; + + /* Fromage: The C++ flavor of 'build_unary_op' expects constructor nodes + to have the TREE_HAS_CONSTRUCTOR (...) bit set. However, this file is + being built without any knowledge of C++ tree accessors; hence, we shall + use the generic accessor that TREE_HAS_CONSTRUCTOR actually maps to! */ + if (darwin_running_cxx) + TREE_LANG_FLAG_4 (constructor) = 1; /* TREE_HAS_CONSTRUCTOR */ + + /* Create an anonymous global variable for this CFString. */ + var = build_decl (input_location, CONST_DECL, + NULL, TREE_TYPE (constructor)); + DECL_ARTIFICIAL (var) = 1; + TREE_STATIC (var) = 1; + DECL_INITIAL (var) = constructor; + /* FIXME: This should use a translation_unit_decl to indicate file scope. */ + DECL_CONTEXT (var) = NULL_TREE; + desc->constructor = var; + } + + addr = build1 (ADDR_EXPR, pccfstring_type_node, desc->constructor); + TREE_CONSTANT (addr) = 1; + + return addr; +} + +bool +darwin_cfstring_p (tree str) +{ + struct cfstring_descriptor key; + void **loc; + + if (!str) + return false; + + STRIP_NOPS (str); + + if (TREE_CODE (str) == ADDR_EXPR) + str = TREE_OPERAND (str, 0); + + if (TREE_CODE (str) != STRING_CST) + return false; + + key.literal = str; + loc = htab_find_slot (cfstring_htab, &key, NO_INSERT); + + if (loc) + return true; + + return false; +} + +void +darwin_enter_string_into_cfstring_table (tree str) +{ + struct cfstring_descriptor key; + void **loc; + + key.literal = str; + loc = htab_find_slot (cfstring_htab, &key, INSERT); + + if (!*loc) + { + *loc = ggc_alloc_cleared_cfstring_descriptor (); + ((struct cfstring_descriptor *)*loc)->literal = str; + } +} + +/* Choose named function section based on its frequency. */ + +section * +darwin_function_section (tree decl, enum node_frequency freq, + bool startup, bool exit) +{ + /* Decide if we need to put this in a coalescable section. */ + bool weak = (decl + && DECL_WEAK (decl) + && (!DECL_ATTRIBUTES (decl) + || !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl)))); + + /* If there is a specified section name, we should not be trying to + override. */ + if (decl && DECL_SECTION_NAME (decl) != NULL_TREE) + return get_named_section (decl, NULL, 0); + + /* Default when there is no function re-ordering. */ + if (!flag_reorder_functions) + return (weak) + ? darwin_sections[text_coal_section] + : text_section; + + /* Startup code should go to startup subsection unless it is + unlikely executed (this happens especially with function splitting + where we can split away unnecesary parts of static constructors). */ + if (startup && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED) + return (weak) + ? darwin_sections[text_startup_coal_section] + : darwin_sections[text_startup_section]; + + /* Similarly for exit. */ + if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED) + return (weak) + ? darwin_sections[text_exit_coal_section] + : darwin_sections[text_exit_section]; + + /* Group cold functions together, similarly for hot code. */ + switch (freq) + { + case NODE_FREQUENCY_UNLIKELY_EXECUTED: + return (weak) + ? darwin_sections[text_cold_coal_section] + : darwin_sections[text_cold_section]; + break; + case NODE_FREQUENCY_HOT: + return (weak) + ? darwin_sections[text_hot_coal_section] + : darwin_sections[text_hot_section]; + break; + default: + return (weak) + ? darwin_sections[text_coal_section] + : text_section; + break; + } +} + +/* When a function is partitioned between sections, we need to insert a label + at the start of each new chunk - so that it may become a valid 'atom' for + eh and debug purposes. Without this the linker will emit warnings if one + tries to add line location information (since the switched fragment will + be anonymous). */ + +void +darwin_function_switched_text_sections (FILE *fp, tree decl, bool new_is_cold) +{ + char buf[128]; + snprintf (buf, 128, "%s%s",new_is_cold?"__cold_sect_of_":"__hot_sect_of_", + IDENTIFIER_POINTER (DECL_NAME (decl))); + /* Make sure we pick up all the relevant quotes etc. */ + assemble_name_raw (fp, (const char *) buf); + fputs (":\n", fp); +} + +#include "gt-darwin.h" diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h new file mode 100644 index 000000000..0526d851f --- /dev/null +++ b/gcc/config/darwin.h @@ -0,0 +1,990 @@ +/* Target definitions for Darwin (Mac OS X) systems. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef CONFIG_DARWIN_H +#define CONFIG_DARWIN_H + +/* The definitions in this file are common to all processor types + running Darwin, which is the kernel for Mac OS X. Darwin is + basically a BSD user layer laid over a Mach kernel, then evolved + for many years (at NeXT) in parallel with other Unix systems. So + while the runtime is a somewhat idiosyncratic Mach-based thing, + other definitions look like they would for a BSD variant. */ + +/* Although NeXT ran on many different architectures, as of Jan 2001 + the only supported Darwin targets are PowerPC and x86. */ + +/* One of Darwin's NeXT legacies is the Mach-O format, which is partly + like a.out and partly like COFF, with additional features like + multi-architecture binary support. */ + +#define DARWIN_X86 0 +#define DARWIN_PPC 0 + +/* Don't assume anything about the header files. */ +#define NO_IMPLICIT_EXTERN_C + +/* Suppress g++ attempt to link in the math library automatically. */ +#define MATH_LIBRARY "" + +/* We have atexit. */ + +#define HAVE_ATEXIT + +/* Define an empty body for the function do_global_dtors() in libgcc2.c. */ + +#define DO_GLOBAL_DTORS_BODY + +/* The string value for __SIZE_TYPE__. */ + +#ifndef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" +#endif + +/* Type used for ptrdiff_t, as a string used in a declaration. */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* wchar_t is int. */ + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#define INT8_TYPE "signed char" +#define INT16_TYPE "short int" +#define INT32_TYPE "int" +#define INT64_TYPE "long long int" +#define UINT8_TYPE "unsigned char" +#define UINT16_TYPE "short unsigned int" +#define UINT32_TYPE "unsigned int" +#define UINT64_TYPE "long long unsigned int" + +#define INT_LEAST8_TYPE "signed char" +#define INT_LEAST16_TYPE "short int" +#define INT_LEAST32_TYPE "int" +#define INT_LEAST64_TYPE "long long int" +#define UINT_LEAST8_TYPE "unsigned char" +#define UINT_LEAST16_TYPE "short unsigned int" +#define UINT_LEAST32_TYPE "unsigned int" +#define UINT_LEAST64_TYPE "long long unsigned int" + +#define INT_FAST8_TYPE "signed char" +#define INT_FAST16_TYPE "short int" +#define INT_FAST32_TYPE "int" +#define INT_FAST64_TYPE "long long int" +#define UINT_FAST8_TYPE "unsigned char" +#define UINT_FAST16_TYPE "short unsigned int" +#define UINT_FAST32_TYPE "unsigned int" +#define UINT_FAST64_TYPE "long long unsigned int" + +#define INTPTR_TYPE "long int" +#define UINTPTR_TYPE "long unsigned int" + +#define SIG_ATOMIC_TYPE "int" + +/* Default to using the NeXT-style runtime, since that's what is + pre-installed on Darwin systems. */ + +#define NEXT_OBJC_RUNTIME + +/* Don't default to pcc-struct-return, because gcc is the only compiler, and + we want to retain compatibility with older gcc versions. */ + +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* True if pragma ms_struct is in effect. */ +extern GTY(()) int darwin_ms_struct; + +#define DRIVER_SELF_SPECS \ + "%{gfull:-g -fno-eliminate-unused-debug-symbols} % >= 2 is only" \ + " supported on %<-m64%> targets for" \ + " %<-fnext-runtime%>"); \ + /* Sort out ObjC exceptions: If the runtime is NeXT we default to \ + sjlj for m32 only. */ \ + if (!global_options_set.x_flag_objc_sjlj_exceptions) \ + global_options.x_flag_objc_sjlj_exceptions = \ + flag_next_runtime && !TARGET_64BIT; \ + if (flag_mkernel || flag_apple_kext) \ + { \ + if (flag_use_cxa_atexit == 2) \ + flag_use_cxa_atexit = 0; \ + /* kexts should always be built without the coalesced sections \ + because the kernel loader doesn't grok such sections. */ \ + flag_weak = 0; \ + /* No RTTI in kexts. */ \ + flag_rtti = 0; \ + } \ + } while (0) + +/* Machine dependent cpp options. Don't add more options here, add + them to darwin_cpp_builtins in darwin-c.c. */ + +#undef CPP_SPEC +#define CPP_SPEC "%{static:%{!dynamic:-D__STATIC__}}%{!static:-D__DYNAMIC__}" \ + " %{pthread:-D_REENTRANT}" + +/* This is mostly a clone of the standard LINK_COMMAND_SPEC, plus + precomp, libtool, and fat build additions. + + In general, random Darwin linker flags should go into LINK_SPEC + instead of LINK_COMMAND_SPEC. The command spec is better for + specifying the handling of options understood by generic Unix + linkers, and for positional arguments like libraries. */ + +#define LINK_COMMAND_SPEC_A \ + "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\ + %(linker) \ + %{flto*:% 10.5 mmacosx-version-min= -lgcc_s.10.4) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5) \ + %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ + %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \ + -lgcc ; \ + :%:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5) \ + %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ + %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \ + -lgcc }" + +/* We specify crt0.o as -lcrt0.o so that ld will search the library path. + + crt3.o provides __cxa_atexit on systems that don't have it. Since + it's only used with C++, which requires passing -shared-libgcc, key + off that to avoid unnecessarily adding a destructor to every + powerpc program built. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{Zdynamiclib: %(darwin_dylib1) } \ + %{!Zdynamiclib:%{Zbundle:%{!static:-lbundle1.o}} \ + %{!Zbundle:%{pg:%{static:-lgcrt0.o} \ + %{!static:%{object:-lgcrt0.o} \ + %{!object:%{preload:-lgcrt0.o} \ + %{!preload:-lgcrt1.o %(darwin_crt2)}}}} \ + %{!pg:%{static:-lcrt0.o} \ + %{!static:%{object:-lcrt0.o} \ + %{!object:%{preload:-lcrt0.o} \ + %{!preload: %(darwin_crt1) \ + %(darwin_crt2)}}}}}} \ + %{shared-libgcc:%:version-compare(< 10.5 mmacosx-version-min= crt3.o%s)}" + +/* The native Darwin linker doesn't necessarily place files in the order + that they're specified on the link line. Thus, it is pointless + to put anything in ENDFILE_SPEC. */ +/* #define ENDFILE_SPEC "" */ + +#define DARWIN_EXTRA_SPECS \ + { "darwin_crt1", DARWIN_CRT1_SPEC }, \ + { "darwin_dylib1", DARWIN_DYLIB1_SPEC }, \ + { "darwin_minversion", DARWIN_MINVERSION_SPEC }, + +#define DARWIN_DYLIB1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o) \ + %:version-compare(>= 10.5 mmacosx-version-min= -ldylib1.10.5.o)" + +#define DARWIN_CRT1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o) \ + %:version-compare(>= 10.5 mmacosx-version-min= -lcrt1.10.5.o)" + +/* Default Darwin ASM_SPEC, very simple. */ +#define ASM_SPEC "-arch %(darwin_arch) \ + %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \ + %{static}" + +/* We still allow output of STABS. */ + +#define DBX_DEBUGGING_INFO 1 + +#define DWARF2_DEBUGGING_INFO 1 +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + +#define DEBUG_FRAME_SECTION "__DWARF,__debug_frame,regular,debug" +#define DEBUG_INFO_SECTION "__DWARF,__debug_info,regular,debug" +#define DEBUG_ABBREV_SECTION "__DWARF,__debug_abbrev,regular,debug" +#define DEBUG_ARANGES_SECTION "__DWARF,__debug_aranges,regular,debug" +#define DEBUG_MACINFO_SECTION "__DWARF,__debug_macinfo,regular,debug" +#define DEBUG_LINE_SECTION "__DWARF,__debug_line,regular,debug" +#define DEBUG_LOC_SECTION "__DWARF,__debug_loc,regular,debug" +#define DEBUG_PUBNAMES_SECTION "__DWARF,__debug_pubnames,regular,debug" +#define DEBUG_PUBTYPES_SECTION "__DWARF,__debug_pubtypes,regular,debug" +#define DEBUG_STR_SECTION "__DWARF,__debug_str,regular,debug" +#define DEBUG_RANGES_SECTION "__DWARF,__debug_ranges,regular,debug" + +#define TARGET_WANT_DEBUG_PUB_SECTIONS true + +/* When generating stabs debugging, use N_BINCL entries. */ + +#define DBX_USE_BINCL + +/* There is no limit to the length of stabs strings. */ + +#define DBX_CONTIN_LENGTH 0 + +/* gdb needs a null N_SO at the end of each file for scattered loading. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +/* GCC's definition of 'one_only' is the same as its definition of 'weak'. */ +#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) + +/* Mach-O supports 'weak imports', and 'weak definitions' in coalesced + sections. machopic_select_section ensures that weak variables go in + coalesced sections. Weak aliases (or any other kind of aliases) are + not supported. Weak symbols that aren't visible outside the .s file + are not supported. */ +#define ASM_WEAKEN_DECL(FILE, DECL, NAME, ALIAS) \ + do { \ + if (ALIAS) \ + { \ + warning (0, "alias definitions not supported in Mach-O; ignored"); \ + break; \ + } \ + \ + if (! DECL_EXTERNAL (DECL) && TREE_PUBLIC (DECL)) \ + targetm.asm_out.globalize_label (FILE, NAME); \ + if (DECL_EXTERNAL (DECL)) \ + fputs ("\t.weak_reference ", FILE); \ + else if (lookup_attribute ("weak_import", DECL_ATTRIBUTES (DECL))) \ + break; \ + else if (TREE_PUBLIC (DECL)) \ + fputs ("\t.weak_definition ", FILE); \ + else \ + break; \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ + } while (0) + +/* Darwin has the pthread routines in libSystem, which every program + links to, so there's no need for weak-ness for that. */ +#define GTHREAD_USE_WEAK 0 + +/* The Darwin linker imposes two limitations on common symbols: they + can't have hidden visibility, and they can't appear in dylibs. As + a consequence, we should never use common symbols to represent + vague linkage. */ +#undef USE_COMMON_FOR_ONE_ONLY +#define USE_COMMON_FOR_ONE_ONLY 0 + +/* The Darwin linker doesn't want coalesced symbols to appear in + a static archive's table of contents. */ +#undef TARGET_WEAK_NOT_IN_ARCHIVE_TOC +#define TARGET_WEAK_NOT_IN_ARCHIVE_TOC 1 + +/* On Darwin, we don't (at the time of writing) have linkonce sections + with names, so it's safe to make the class data not comdat. */ +#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT hook_bool_void_false + +/* For efficiency, on Darwin the RTTI information that is always + emitted in the standard C++ library should not be COMDAT. */ +#define TARGET_CXX_LIBRARY_RTTI_COMDAT hook_bool_void_false + +/* We make exception information linkonce. */ +#undef TARGET_USES_WEAK_UNWIND_INFO +#define TARGET_USES_WEAK_UNWIND_INFO 1 + +/* We need to use a nonlocal label for the start of an EH frame: the + Darwin linker requires that a coalesced section start with a label. + Unfortunately, it also requires that 'debug' sections don't contain + labels. */ +#undef FRAME_BEGIN_LABEL +#define FRAME_BEGIN_LABEL (for_eh ? "EH_frame" : "Lframe") + +/* Emit a label for the FDE corresponding to DECL. EMPTY means + emit a label for an empty FDE. */ +#define TARGET_ASM_EMIT_UNWIND_LABEL darwin_emit_unwind_label + +/* Emit a label to separate the exception table. */ +#define TARGET_ASM_EMIT_EXCEPT_TABLE_LABEL darwin_emit_except_table_label + +/* Our profiling scheme doesn't LP labels and counter words. */ + +#define NO_PROFILE_COUNTERS 1 + +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP + +#undef INVOKE__main + +#define TARGET_ASM_CONSTRUCTOR machopic_asm_out_constructor +#define TARGET_ASM_DESTRUCTOR machopic_asm_out_destructor + +/* Always prefix with an underscore. */ + +#define USER_LABEL_PREFIX "_" + +/* A dummy symbol that will be replaced with the function base name. */ +#define MACHOPIC_FUNCTION_BASE_NAME "" + +/* Don't output a .file directive. That is only used by the assembler for + error reporting. */ +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false + +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END darwin_file_end + +/* Because Mach-O relocations have a counter from 1 to 255 for the + section number they apply to, it is necessary to output all + normal sections before the LTO sections, to make sure that the + sections that may have relocations always have a section number + smaller than 255. */ +#undef TARGET_ASM_LTO_START +#define TARGET_ASM_LTO_START darwin_asm_lto_start +#undef TARGET_ASM_LTO_END +#define TARGET_ASM_LTO_END darwin_asm_lto_end + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", SIZE) + +/* Give ObjC methods pretty symbol names. */ + +#undef OBJC_GEN_METHOD_LABEL +#define OBJC_GEN_METHOD_LABEL(BUF,IS_INST,CLASS_NAME,CAT_NAME,SEL_NAME,NUM) \ + do { if (CAT_NAME) \ + sprintf (BUF, "%c[%s(%s) %s]", (IS_INST) ? '-' : '+', \ + (CLASS_NAME), (CAT_NAME), (SEL_NAME)); \ + else \ + sprintf (BUF, "%c[%s %s]", (IS_INST) ? '-' : '+', \ + (CLASS_NAME), (SEL_NAME)); \ + } while (0) + +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + darwin_asm_declare_object_name ((FILE), (NAME), (DECL)) + +/* The RTTI data (e.g., __ti4name) is common and public (and static), + but it does need to be referenced via indirect PIC data pointers. + The machopic_define_symbol calls are telling the machopic subsystem + that the name *is* defined in this module, so it doesn't need to + make them indirect. */ + +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do { \ + const char *xname = NAME; \ + if (GET_CODE (XEXP (DECL_RTL (DECL), 0)) != SYMBOL_REF) \ + xname = IDENTIFIER_POINTER (DECL_NAME (DECL)); \ + if (! DECL_WEAK (DECL) \ + && ((TREE_STATIC (DECL) \ + && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL))) \ + || DECL_INITIAL (DECL))) \ + machopic_define_symbol (DECL_RTL (DECL)); \ + if ((TREE_STATIC (DECL) \ + && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL))) \ + || DECL_INITIAL (DECL)) \ + (* targetm.encode_section_info) (DECL, DECL_RTL (DECL), false); \ + ASM_OUTPUT_FUNCTION_LABEL (FILE, xname, DECL); \ + } while (0) + +#undef TARGET_ASM_DECLARE_CONSTANT_NAME +#define TARGET_ASM_DECLARE_CONSTANT_NAME darwin_asm_declare_constant_name + +/* Wrap new method names in quotes so the assembler doesn't gag. + Make Objective-C internal symbols local and in doing this, we need + to accommodate the name mangling done by c++ on file scope locals. */ + +int darwin_label_is_anonymous_local_objc_name (const char *name); + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { \ + const char *xname = (NAME); \ + if (! strcmp (xname, MACHOPIC_FUNCTION_BASE_NAME)) \ + machopic_output_function_base_name(FILE); \ + else if (xname[0] == '&' || xname[0] == '*') \ + { \ + int len = strlen (xname); \ + if (len > 6 && !strcmp ("$stub", xname + len - 5)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + else if (len > 7 && !strcmp ("$stub\"", xname + len - 6)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + else if (len > 14 && !strcmp ("$non_lazy_ptr", xname + len - 13)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + else if (len > 15 && !strcmp ("$non_lazy_ptr\"", xname + len - 14)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + if (xname[1] != '"' && name_needs_quotes (&xname[1])) \ + fprintf (FILE, "\"%s\"", &xname[1]); \ + else \ + fputs (&xname[1], FILE); \ + } \ + else if (xname[0] == '+' || xname[0] == '-') \ + fprintf (FILE, "\"%s\"", xname); \ + else if (darwin_label_is_anonymous_local_objc_name (xname)) \ + fprintf (FILE, "L%s", xname); \ + else if (!strncmp (xname, ".objc_class_name_", 17)) \ + fprintf (FILE, "%s", xname); \ + else if (xname[0] != '"' && name_needs_quotes (xname)) \ + fprintf (FILE, "\"%s\"", xname); \ + else \ + asm_fprintf (FILE, "%U%s", xname); \ + } while (0) + +/* Output before executable code. */ +#undef TEXT_SECTION_ASM_OP +#define TEXT_SECTION_ASM_OP "\t.text" + +/* Output before writable data. */ + +#undef DATA_SECTION_ASM_OP +#define DATA_SECTION_ASM_OP "\t.data" + +#undef ALIGN_ASM_OP +#define ALIGN_ASM_OP ".align" + +#undef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t%s\t%d\n", ALIGN_ASM_OP, (LOG)) + +/* The maximum alignment which the object file format can support in + bits. For Mach-O, this is 2^15 bytes. */ + +#undef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT (0x8000 * 8) + +#define L2_MAX_OFILE_ALIGNMENT 15 + +/* These are the three variants that emit referenced blank space. */ +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + darwin_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + darwin_asm_output_aligned_decl_local \ + ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) + +#undef ASM_OUTPUT_ALIGNED_DECL_COMMON +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + darwin_asm_output_aligned_decl_common \ + ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) + +/* The generic version, archs should over-ride where required. */ +#define MACHOPIC_NL_SYMBOL_PTR_SECTION ".non_lazy_symbol_pointer" + +/* Declare the section variables. */ +#ifndef USED_FOR_TARGET +enum darwin_section_enum { +#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC) NAME, +#include "darwin-sections.def" +#undef DEF_SECTION + NUM_DARWIN_SECTIONS +}; +extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS]; +#endif + +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION machopic_select_section + +#undef TARGET_ASM_FUNCTION_SECTION +#define TARGET_ASM_FUNCTION_SECTION darwin_function_section + +#undef TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS +#define TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS \ + darwin_function_switched_text_sections + +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION machopic_select_rtx_section +#undef TARGET_ASM_UNIQUE_SECTION +#define TARGET_ASM_UNIQUE_SECTION darwin_unique_section +#undef TARGET_ASM_FUNCTION_RODATA_SECTION +#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section +#undef TARGET_ASM_RELOC_RW_MASK +#define TARGET_ASM_RELOC_RW_MASK machopic_reloc_rw_mask + + +#define ASM_DECLARE_UNRESOLVED_REFERENCE(FILE,NAME) \ + do { \ + if (FILE) { \ + if (MACHOPIC_INDIRECT) \ + fprintf (FILE, "\t.lazy_reference "); \ + else \ + fprintf (FILE, "\t.reference "); \ + assemble_name (FILE, NAME); \ + fprintf (FILE, "\n"); \ + } \ + } while (0) + +#define ASM_DECLARE_CLASS_REFERENCE(FILE,NAME) \ + do { \ + if (FILE) { \ + fprintf (FILE, "\t"); \ + assemble_name (FILE, NAME); \ + fprintf (FILE, "=0\n"); \ + (*targetm.asm_out.globalize_label) (FILE, NAME); \ + } \ + } while (0) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " +#define TARGET_ASM_GLOBALIZE_LABEL darwin_globalize_label + +/* Emit an assembler directive to set visibility for a symbol. Used + to support visibility attribute and Darwin's private extern + feature. */ +#undef TARGET_ASM_ASSEMBLE_VISIBILITY +#define TARGET_ASM_ASSEMBLE_VISIBILITY darwin_assemble_visibility + +/* Extra attributes for Darwin. */ +#define SUBTARGET_ATTRIBUTE_TABLE \ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ \ + { "apple_kext_compatibility", 0, 0, false, true, false, \ + darwin_handle_kext_attribute }, \ + { "weak_import", 0, 0, true, false, false, \ + darwin_handle_weak_import_attribute } + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM)) + +#undef TARGET_ASM_MARK_DECL_PRESERVED +#define TARGET_ASM_MARK_DECL_PRESERVED darwin_mark_decl_preserved + +/* Set on a symbol with SYMBOL_FLAG_FUNCTION or + MACHO_SYMBOL_FLAG_VARIABLE to indicate that the function or + variable has been defined in this translation unit. + When porting Mach-O to new architectures you need to make + sure these aren't clobbered by the backend. */ + +#define MACHO_SYMBOL_FLAG_VARIABLE (SYMBOL_FLAG_MACH_DEP) +#define MACHO_SYMBOL_FLAG_DEFINED ((SYMBOL_FLAG_MACH_DEP) << 1) + +/* Set on a symbol to indicate when fix-and-continue style code + generation is being used and the symbol refers to a static symbol + that should be rebound from new instances of a translation unit to + the original instance of the data. */ + +#define MACHO_SYMBOL_STATIC ((SYMBOL_FLAG_MACH_DEP) << 2) + +/* Symbolic names for various things we might know about a symbol. */ + +enum machopic_addr_class { + MACHOPIC_UNDEFINED, + MACHOPIC_DEFINED_DATA, + MACHOPIC_UNDEFINED_DATA, + MACHOPIC_DEFINED_FUNCTION, + MACHOPIC_UNDEFINED_FUNCTION +}; + +/* Macros defining the various PIC cases. */ + +#undef MACHO_DYNAMIC_NO_PIC_P +#define MACHO_DYNAMIC_NO_PIC_P (TARGET_MACHO_DYNAMIC_NO_PIC) +#undef MACHOPIC_INDIRECT +#define MACHOPIC_INDIRECT (flag_pic || MACHO_DYNAMIC_NO_PIC_P) +#define MACHOPIC_JUST_INDIRECT (MACHO_DYNAMIC_NO_PIC_P) +#undef MACHOPIC_PURE +#define MACHOPIC_PURE (flag_pic && ! MACHO_DYNAMIC_NO_PIC_P) + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO darwin_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING default_strip_name_encoding + +#define GEN_BINDER_NAME_FOR_STUB(BUF,STUB,STUB_LENGTH) \ + do { \ + const char *const stub_ = (STUB); \ + char *buffer_ = (BUF); \ + strcpy (buffer_, stub_); \ + if (stub_[0] == '"') \ + { \ + strcpy (buffer_ + (STUB_LENGTH) - 1, "_binder\""); \ + } \ + else \ + { \ + strcpy (buffer_ + (STUB_LENGTH), "_binder"); \ + } \ + } while (0) + +#define GEN_SYMBOL_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH) \ + do { \ + const char *const symbol_ = (SYMBOL); \ + char *buffer_ = (BUF); \ + if (name_needs_quotes (symbol_) && symbol_[0] != '"') \ + { \ + sprintf (buffer_, "\"%s\"", symbol_); \ + } \ + else \ + { \ + strcpy (buffer_, symbol_); \ + } \ + } while (0) + +/* Given a symbol name string, create the lazy pointer version + of the symbol name. */ + +#define GEN_LAZY_PTR_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH) \ + do { \ + const char *symbol_ = (SYMBOL); \ + char *buffer_ = (BUF); \ + if (symbol_[0] == '"') \ + { \ + strcpy (buffer_, "\"L"); \ + strcpy (buffer_ + 2, symbol_ + 1); \ + strcpy (buffer_ + (SYMBOL_LENGTH), "$lazy_ptr\""); \ + } \ + else if (name_needs_quotes (symbol_)) \ + { \ + strcpy (buffer_, "\"L"); \ + strcpy (buffer_ + 2, symbol_); \ + strcpy (buffer_ + (SYMBOL_LENGTH) + 2, "$lazy_ptr\""); \ + } \ + else \ + { \ + strcpy (buffer_, "L"); \ + strcpy (buffer_ + 1, symbol_); \ + strcpy (buffer_ + (SYMBOL_LENGTH) + 1, "$lazy_ptr"); \ + } \ + } while (0) + +#define EH_FRAME_SECTION_NAME "__TEXT" +#define EH_FRAME_SECTION_ATTR ",coalesced,no_toc+strip_static_syms+live_support" + +/* Java runtime class list. */ +#define JCR_SECTION_NAME "__DATA,jcr,regular,no_dead_strip" + +#undef ASM_PREFERRED_EH_DATA_FORMAT +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((CODE) == 2 && (GLOBAL) == 1) \ + ? (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4) : \ + ((CODE) == 1 || (GLOBAL) == 0) ? DW_EH_PE_pcrel : DW_EH_PE_absptr) + +#define ASM_OUTPUT_DWARF_DELTA(FILE,SIZE,LABEL1,LABEL2) \ + darwin_asm_output_dwarf_delta (FILE, SIZE, LABEL1, LABEL2) + +#define ASM_OUTPUT_DWARF_OFFSET(FILE,SIZE,LABEL,BASE) \ + darwin_asm_output_dwarf_offset (FILE, SIZE, LABEL, BASE) + +#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(ASM_OUT_FILE, ENCODING, SIZE, ADDR, DONE) \ + if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) { \ + darwin_non_lazy_pcrel (ASM_OUT_FILE, ADDR); \ + goto DONE; \ + } + +/* Experimentally, putting jump tables in text is faster on SPEC. + Also this is needed for correctness for coalesced functions. */ + +#ifndef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION 1 +#endif + +#define TARGET_TERMINATE_DW2_EH_FRAME_INFO false + +#define TARGET_ASM_INIT_SECTIONS darwin_init_sections +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION darwin_asm_named_section + +#define DARWIN_REGISTER_TARGET_PRAGMAS() \ + do { \ + if (!flag_preprocess_only) \ + cpp_register_pragma (parse_in, NULL, "mark", \ + darwin_pragma_ignore, false); \ + c_register_pragma (0, "options", darwin_pragma_options); \ + c_register_pragma (0, "segment", darwin_pragma_ignore); \ + c_register_pragma (0, "unused", darwin_pragma_unused); \ + c_register_pragma (0, "ms_struct", darwin_pragma_ms_struct); \ + } while (0) + +#undef ASM_APP_ON +#define ASM_APP_ON "" +#undef ASM_APP_OFF +#define ASM_APP_OFF "" + +void darwin_register_frameworks (const char *, const char *, int); +void darwin_register_objc_includes (const char *, const char *, int); +#define TARGET_EXTRA_PRE_INCLUDES darwin_register_objc_includes +#define TARGET_EXTRA_INCLUDES darwin_register_frameworks + +void add_framework_path (char *); +#define TARGET_OPTF add_framework_path + +#define TARGET_POSIX_IO + +/* All new versions of Darwin have C99 functions. */ + +#define TARGET_C99_FUNCTIONS 1 + +#define WINT_TYPE "int" + +/* Every program on darwin links against libSystem which contains the pthread + routines, so there's no need to explicitly call out when doing threaded + work. */ + +#undef GOMP_SELF_SPECS +#define GOMP_SELF_SPECS "" + +/* Darwin disables section anchors by default. + They should be enabled per arch where support exists in that arch. */ +#define TARGET_ASM_OUTPUT_ANCHOR NULL +#define DARWIN_SECTION_ANCHORS 0 + +/* Attempt to turn on execute permission for the stack. This may be + used by TARGET_TRAMPOLINE_INIT if the target needs it (that is, + if the target machine can change execute permissions on a page). + + There is no way to query the execute permission of the stack, so + we always issue the mprotect() call. + + Unfortunately it is not possible to make this namespace-clean. + + Also note that no errors should be emitted by this code; it is + considered dangerous for library calls to send messages to + stdout/stderr. */ + +#define ENABLE_EXECUTE_STACK \ +extern void __enable_execute_stack (void *); \ +void \ +__enable_execute_stack (void *addr) \ +{ \ + extern int mprotect (void *, size_t, int); \ + extern int getpagesize (void); \ + static int size; \ + static long mask; \ + \ + char *page, *end; \ + \ + if (size == 0) \ + { \ + size = getpagesize(); \ + mask = ~((long) size - 1); \ + } \ + \ + page = (char *) (((long) addr) & mask); \ + end = (char *) ((((long) (addr + (TARGET_64BIT ? 48 : 40))) & mask) + size); \ + \ + /* 7 == PROT_READ | PROT_WRITE | PROT_EXEC */ \ + (void) mprotect (page, end - page, 7); \ +} + +/* For Apple KEXTs, we make the constructors return this to match gcc + 2.95. */ +#define TARGET_CXX_CDTOR_RETURNS_THIS (darwin_kextabi_p) +#define TARGET_KEXTABI flag_apple_kext + +/* We have target-specific builtins. */ +#define TARGET_FOLD_BUILTIN darwin_fold_builtin + +#define TARGET_OBJC_CONSTRUCT_STRING_OBJECT \ + darwin_objc_construct_string + +#define TARGET_STRING_OBJECT_REF_TYPE_P \ + darwin_cfstring_ref_p + +#define TARGET_N_FORMAT_TYPES 1 +#define TARGET_FORMAT_TYPES darwin_additional_format_types + +#define TARGET_CHECK_STRING_OBJECT_FORMAT_ARG \ + darwin_check_cfstring_format_arg + +#define TARGET_HAS_TARGETCM 1 + +#ifndef USED_FOR_TARGET +extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **); +#define GCC_DRIVER_HOST_INITIALIZATION \ + darwin_driver_init (&decoded_options_count, &decoded_options) +#endif + +/* The Apple assembler and linker do not support constructor priorities. */ +#undef SUPPORTS_INIT_PRIORITY +#define SUPPORTS_INIT_PRIORITY 0 + +#endif /* CONFIG_DARWIN_H */ diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt new file mode 100644 index 000000000..a31ee074c --- /dev/null +++ b/gcc/config/darwin.opt @@ -0,0 +1,390 @@ +; Processor-independent options for Darwin. + +; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; Various linker options have a -Z added so that they can get to specs +; processing without interference. Note that an option name with a +; prefix that matches another option name, that also takes an +; argument, being mapped to a -Z linker option, needs to be modified +; so the prefix is different, otherwise a '*' after the shorter option +; will match with the longer one. + +all_load +Driver Alias(Zall_load) + +allowable_client +Driver Separate Alias(Zallowable_client) + +arch_errors_fatal +Driver Alias(Zarch_errors_fatal) + +bind_at_load +Driver Alias(Zbind_at_load) + +bundle +Driver Alias(Zbundle) + +bundle_loader +Driver Separate Alias(Zbundle_loader) + +dead_strip +Driver Alias(Zdead_strip) + +dependency-file +C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs) + +dylib_file +Driver Separate Alias(Zdylib_file) + +dylinker +Driver + +dynamic +Driver Alias(Zdynamic) + +dynamiclib +Driver Alias(Zdynamiclib) + +exported_symbols_list +Driver Separate Alias(Zexported_symbols_list) + +filelist +Driver RejectNegative Separate + +findirect-virtual-calls +Driver RejectNegative + +flat_namespace +Driver RejectNegative Alias(Zflat_namespace) + +force_cpusubtype_ALL +Driver RejectNegative Alias(Zforce_cpusubtype_ALL) + +force_flat_namespace +Driver RejectNegative Alias(Zforce_flat_namespace) + +framework +Driver RejectNegative Separate + +fterminated-vtables +Driver RejectNegative + +gfull +Driver + +gused +Driver + +headerpad_max_install_names +Driver + +image_base +Driver Separate Alias(Zimage_base) + +init +Driver Separate Alias(Zinit) + +install_name +Driver Separate Alias(Zinstall_name) + +keep_private_externs +Driver + +mconstant-cfstrings +Target Report Var(darwin_constant_cfstrings) Init(1) +Generate compile-time CFString objects + +multi_module +Driver RejectNegative Alias(Zmulti_module) + +multiply_defined +Driver RejectNegative Separate Alias(Zmultiply_defined) + +multiply_defined_unused +Driver RejectNegative Separate Alias(Zmultiplydefinedunused) + +no_dead_strip_inits_and_terms +Driver Alias(Zno_dead_strip_inits_and_terms) + +nofixprebinding +Driver + +nomultidefs +Driver + +noprebind +Driver + +noseglinkedit +Driver + +object +Driver + +prebind +Driver + +prebind_all_twolevel_modules +Driver + +preload +Driver + +private_bundle +Driver + +pthread +Driver + +seg_addr_table +Driver Separate Alias(Zseg_addr_table) + +seg_addr_table_filename +Driver Separate Alias(Zfn_seg_addr_table_filename) + +segaddr +Driver Separate Args(2) Alias(Zsegaddr) + +seglinkedit +Driver + +segs_read_only_addr +Driver Separate Alias(Zsegs_read_only_addr) + +segs_read_write_addr +Driver Separate Alias(Zsegs_read_write_addr) + +single_module +Driver Alias(Zsingle_module) + +twolevel_namespace +Driver + +twolevel_namespace_hints +Driver + +umbrella +Driver Separate Alias(Zumbrella) + +unexported_symbols_list +Driver Separate Alias(Zunexported_symbols_list) + +weak_reference_mismatches +Driver Separate Alias(Zweak_reference_mismatches) + +whatsloaded +Driver + +whyload +Driver + +y +Driver Joined + +Mach +Driver + +Wnonportable-cfstrings +Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning +Warn if constant CFString objects contain non-portable characters + +; Use new-style pic stubs if this is true, x86 only so far. +matt-stubs +Target Report Var(darwin_macho_att_stub) Init(1) +Generate AT&T-style stubs for Mach-O + +mdynamic-no-pic +Target Common Report Mask(MACHO_DYNAMIC_NO_PIC) +Generate code suitable for executables (NOT shared libs) + +mfix-and-continue +Target Report Var(darwin_fix_and_continue) +Generate code suitable for fast turn around debugging + +; The Init here is for the convenience of GCC developers, so that cc1 +; and cc1plus don't crash if no -mmacosx-version-min is passed. The +; driver will always pass a -mmacosx-version-min, so in normal use the +; Init is never used. Useful for setting the OS on which people +; ususally debug. +mmacosx-version-min= +Target Joined Report Var(darwin_macosx_version_min) Init("10.6") +The earliest MacOS X version on which this program will run + +mone-byte-bool +Target RejectNegative Report Var(darwin_one_byte_bool) +Set sizeof(bool) to 1 + +fapple-kext +Target Report C++ Var(flag_apple_kext) +Generate code for darwin loadable kernel extensions + +mkernel +Target Report Var(flag_mkernel) +Generate code for the kernel or loadable kernel extensions + +iframework +Target RejectNegative C ObjC C++ ObjC++ Joined Separate +-iframework Add to the end of the system framework include path + +X +Driver + +Zall_load +Driver + +Zallowable_client +Driver Separate + +Zarch_errors_fatal +Driver + +Zbind_at_load +Driver + +Zbundle +Driver + +Zbundle_loader +Driver Separate + +Zdead_strip +Driver + +Zdylib_file +Driver Separate + +Zdynamic +Driver + +Zdynamiclib +Driver + +Zexported_symbols_list +Driver Separate + +Zfn_seg_addr_table_filename +Driver Separate + +Zflat_namespace +Driver + +Zforce_cpusubtype_ALL +Driver + +Zforce_flat_namespace +Driver + +Zimage_base +Driver Separate + +Zinit +Driver Separate + +Zinstall_name +Driver Separate + +Zmulti_module +Driver + +Zmultiply_defined +Driver Separate + +Zmultiplydefinedunused +Driver Separate + +Zno_dead_strip_inits_and_terms +Driver + +Zseg_addr_table +Driver Separate + +Zsegaddr +Driver Separate Args(2) + +Zsegs_read_only_addr +Driver Separate + +Zsegs_read_write_addr +Driver Separate + +Zsingle_module +Driver + +Zumbrella +Driver Separate + +Zunexported_symbols_list +Driver Separate + +Zweak_reference_mismatches +Driver Separate + +client_name +Driver Separate + +compatibility_version +Driver Separate + +current_version +Driver Separate + +dylinker_install_name +Driver Separate + +pagezero_size +Driver Separate + +read_only_relocs +Driver Separate + +sectalign +Driver Separate Args(3) + +sectcreate +Driver Separate Args(3) + +sectobjectsymbols +Driver Separate Args(2) + +sectorder +Driver Separate Args(3) + +seg1addr +Driver Separate + +segcreate +Driver Separate Args(3) + +segprot +Driver Separate Args(3) + +segs_read_only_addr +Driver Separate + +segs_read_write_addr +Driver Separate + +sub_library +Driver Separate + +sub_umbrella +Driver Separate + +undefined +Driver Separate diff --git a/gcc/config/darwin10.h b/gcc/config/darwin10.h new file mode 100644 index 000000000..5c205945c --- /dev/null +++ b/gcc/config/darwin10.h @@ -0,0 +1,32 @@ +/* Target definitions for Darwin (Mac OS X) systems. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. + Contributed by Jack Howarth . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef LIB_SPEC +#define LIB_SPEC "%{!static: -lSystem }" + +/* Fix PR41260 by passing -no_compact_unwind on darwin10 and later until + unwinder in libSystem is fixed to digest new epilog unwinding notes. + + Fix PR47558 by linking against libSystem ahead of libgcc_ext. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ +"%:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) \ + %{!static:%{!static-libgcc: \ + %:version-compare(>= 10.6 mmacosx-version-min= -lSystem) } } %G %L" diff --git a/gcc/config/darwin9.h b/gcc/config/darwin9.h new file mode 100644 index 000000000..828118372 --- /dev/null +++ b/gcc/config/darwin9.h @@ -0,0 +1,50 @@ +/* Target definitions for Darwin (Mac OS X) systems. + Copyright (C) 2006, 2007, 2010 Free Software Foundation, Inc. + Contributed by Apple Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Prefer DWARF2. */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +#define DARWIN_PREFER_DWARF + +/* Since DWARF2 is default, conditions for running dsymutil are different. */ +#undef DSYMUTIL_SPEC +#define DSYMUTIL_SPEC \ + "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\ + %{v} \ + %{g*:%{!gstabs*:%{!g0: -idsym}}}\ + %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.f|.f90|.f95|.f03|.f77|.for|.F|.F90|.F95|.F03: \ + %{g*:%{!gstabs*:%{!g0: -dsym}}}}}}}}}}}" + +/* Tell collect2 to run dsymutil for us as necessary. */ +#define COLLECT_RUN_DSYMUTIL 1 + +/* libSystem contains unwind information for signal frames. */ +#define DARWIN_LIBSYSTEM_HAS_UNWIND + +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + do { \ + unsigned HOST_WIDE_INT _new_size = (SIZE); \ + fprintf ((FILE), "\t.comm "); \ + assemble_name ((FILE), (NAME)); \ + if (_new_size == 0) _new_size = 1; \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ + _new_size, floor_log2 ((ALIGN) / BITS_PER_UNIT)); \ + } while (0) diff --git a/gcc/config/dbx.h b/gcc/config/dbx.h new file mode 100644 index 000000000..0f86e164c --- /dev/null +++ b/gcc/config/dbx.h @@ -0,0 +1,27 @@ +/* Prefer DBX (stabs) debugging information. + Copyright (C) 1996, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file causes gcc to prefer using DBX (stabs) debugging + information. The configure script will add a #include of this file + to tm.h when --with-stabs is used for certain targets. */ + +#define DBX_DEBUGGING_INFO 1 + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG diff --git a/gcc/config/dbxcoff.h b/gcc/config/dbxcoff.h new file mode 100644 index 000000000..a2b36c0d6 --- /dev/null +++ b/gcc/config/dbxcoff.h @@ -0,0 +1,62 @@ +/* Definitions needed when using stabs embedded in COFF sections. + Copyright (C) 1996, 2004, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file may be included by any COFF target which wishes to + support -gstabs generating stabs in sections, as produced by gas + and understood by gdb. */ + +/* Output DBX (stabs) debugging information if doing -gstabs. */ + +#define DBX_DEBUGGING_INFO 1 + +/* Generate SDB debugging information by default. */ + +#ifndef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE SDB_DEBUG +#endif + +/* Be function-relative for block and source line stab directives. */ + +#define DBX_BLOCKS_FUNCTION_RELATIVE 1 + +/* but, to make this work, functions must appear prior to line info. */ + +#define DBX_FUNCTION_FIRST + +/* Generate a blank trailing N_SO to mark the end of the .o file, since + we can't depend upon the linker to mark .o file boundaries with + embedded stabs. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +/* Like block addresses, stabs line numbers are relative to the + current function. */ + +#define DBX_LINES_FUNCTION_RELATIVE 1 + +/* When generating stabs debugging, use N_BINCL entries. */ + +#undef DBX_USE_BINCL +#define DBX_USE_BINCL + +/* There is no limit to the length of stabs strings. */ + +#ifndef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 0 +#endif diff --git a/gcc/config/dbxelf.h b/gcc/config/dbxelf.h new file mode 100644 index 000000000..8d3c26507 --- /dev/null +++ b/gcc/config/dbxelf.h @@ -0,0 +1,68 @@ +/* Definitions needed when using stabs embedded in ELF sections. + Copyright (C) 1999, 2004, 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This file may be included by any ELF target which wishes to + support -gstabs generating stabs in sections, as produced by gas + and understood by gdb. */ + +#ifndef GCC_DBX_ELF_H +#define GCC_DBX_ELF_H + +/* Output DBX (stabs) debugging information if doing -gstabs. */ + +#define DBX_DEBUGGING_INFO 1 + +/* Make LBRAC and RBRAC addresses relative to the start of the + function. The native Solaris stabs debugging format works this + way, gdb expects it, and it reduces the number of relocation + entries... */ + +#define DBX_BLOCKS_FUNCTION_RELATIVE 1 + +/* ... but, to make this work, functions must appear prior to line info. */ + +#define DBX_FUNCTION_FIRST + +/* When generating stabs debugging, use N_BINCL entries. */ + +#define DBX_USE_BINCL + +/* There is no limit to the length of stabs strings. */ + +#ifndef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 0 +#endif + +/* Like block addresses, stabs line numbers are relative to the + current function. */ + +#define DBX_LINES_FUNCTION_RELATIVE 1 + +/* Generate a blank trailing N_SO to mark the end of the .o file, since + we can't depend upon the linker to mark .o file boundaries with + embedded stabs. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +#endif /* ! GCC_DBX_ELF_H */ diff --git a/gcc/config/dfp-bit.c b/gcc/config/dfp-bit.c new file mode 100644 index 000000000..19f2fdae6 --- /dev/null +++ b/gcc/config/dfp-bit.c @@ -0,0 +1,680 @@ +/* This is a software decimal floating point library. + Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This implements IEEE 754 decimal floating point arithmetic, but + does not provide a mechanism for setting the rounding mode, or for + generating or handling exceptions. Conversions between decimal + floating point types and other types depend on C library functions. + + Contributed by Ben Elliston . */ + +#include +#include +/* FIXME: compile with -std=gnu99 to get these from stdlib.h */ +extern float strtof (const char *, char **); +extern long double strtold (const char *, char **); +#include +#include + +#include "config/dfp-bit.h" + +/* Forward declarations. */ +#if WIDTH == 32 || WIDTH_TO == 32 +void __host_to_ieee_32 (_Decimal32 in, decimal32 *out); +void __ieee_to_host_32 (decimal32 in, _Decimal32 *out); +#endif +#if WIDTH == 64 || WIDTH_TO == 64 +void __host_to_ieee_64 (_Decimal64 in, decimal64 *out); +void __ieee_to_host_64 (decimal64 in, _Decimal64 *out); +#endif +#if WIDTH == 128 || WIDTH_TO == 128 +void __host_to_ieee_128 (_Decimal128 in, decimal128 *out); +void __ieee_to_host_128 (decimal128 in, _Decimal128 *out); +#endif + +/* A pointer to a binary decFloat operation. */ +typedef decFloat* (*dfp_binary_func) + (decFloat *, const decFloat *, const decFloat *, decContext *); + +/* Binary operations. */ + +/* Use a decFloat (decDouble or decQuad) function to perform a DFP + binary operation. */ +static inline decFloat +dfp_binary_op (dfp_binary_func op, decFloat arg_a, decFloat arg_b) +{ + decFloat result; + decContext context; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + + /* Perform the operation. */ + op (&result, &arg_a, &arg_b, &context); + + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + { + /* decNumber exception flags we care about here. */ + int ieee_flags; + int dec_flags = DEC_IEEE_854_Division_by_zero | DEC_IEEE_854_Inexact + | DEC_IEEE_854_Invalid_operation | DEC_IEEE_854_Overflow + | DEC_IEEE_854_Underflow; + dec_flags &= context.status; + ieee_flags = DFP_IEEE_FLAGS (dec_flags); + if (ieee_flags != 0) + DFP_HANDLE_EXCEPTIONS (ieee_flags); + } + + return result; +} + +#if WIDTH == 32 +/* The decNumber package doesn't provide arithmetic for decSingle (32 bits); + convert to decDouble, use the operation for that, and convert back. */ +static inline _Decimal32 +d32_binary_op (dfp_binary_func op, _Decimal32 arg_a, _Decimal32 arg_b) +{ + union { _Decimal32 c; decSingle f; } a32, b32, res32; + decDouble a, b, res; + decContext context; + + /* Widen the operands and perform the operation. */ + a32.c = arg_a; + b32.c = arg_b; + decSingleToWider (&a32.f, &a); + decSingleToWider (&b32.f, &b); + res = dfp_binary_op (op, a, b); + + /* Narrow the result, which might result in an underflow or overflow. */ + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + decSingleFromWider (&res32.f, &res, &context); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + { + /* decNumber exception flags we care about here. */ + int ieee_flags; + int dec_flags = DEC_IEEE_854_Inexact | DEC_IEEE_854_Overflow + | DEC_IEEE_854_Underflow; + dec_flags &= context.status; + ieee_flags = DFP_IEEE_FLAGS (dec_flags); + if (ieee_flags != 0) + DFP_HANDLE_EXCEPTIONS (ieee_flags); + } + + return res32.c; +} +#else +/* decFloat operations are supported for decDouble (64 bits) and + decQuad (128 bits). The bit patterns for the types are the same. */ +static inline DFP_C_TYPE +dnn_binary_op (dfp_binary_func op, DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + union { DFP_C_TYPE c; decFloat f; } a, b, result; + + a.c = arg_a; + b.c = arg_b; + result.f = dfp_binary_op (op, a.f, b.f); + return result.c; +} +#endif + +/* Comparison operations. */ + +/* Use a decFloat (decDouble or decQuad) function to perform a DFP + comparison. */ +static inline CMPtype +dfp_compare_op (dfp_binary_func op, decFloat arg_a, decFloat arg_b) +{ + decContext context; + decFloat res; + int result; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + + /* Perform the comparison. */ + op (&res, &arg_a, &arg_b, &context); + + if (DEC_FLOAT_IS_SIGNED (&res)) + result = -1; + else if (DEC_FLOAT_IS_ZERO (&res)) + result = 0; + else if (DEC_FLOAT_IS_NAN (&res)) + result = -2; + else + result = 1; + + return (CMPtype) result; +} + +#if WIDTH == 32 +/* The decNumber package doesn't provide comparisons for decSingle (32 bits); + convert to decDouble, use the operation for that, and convert back. */ +static inline CMPtype +d32_compare_op (dfp_binary_func op, _Decimal32 arg_a, _Decimal32 arg_b) +{ + union { _Decimal32 c; decSingle f; } a32, b32; + decDouble a, b; + + a32.c = arg_a; + b32.c = arg_b; + decSingleToWider (&a32.f, &a); + decSingleToWider (&b32.f, &b); + return dfp_compare_op (op, a, b); +} +#else +/* decFloat comparisons are supported for decDouble (64 bits) and + decQuad (128 bits). The bit patterns for the types are the same. */ +static inline CMPtype +dnn_compare_op (dfp_binary_func op, DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + union { DFP_C_TYPE c; decFloat f; } a, b; + + a.c = arg_a; + b.c = arg_b; + return dfp_compare_op (op, a.f, b.f); +} +#endif + +#if defined(L_conv_sd) +void +__host_to_ieee_32 (_Decimal32 in, decimal32 *out) +{ + memcpy (out, &in, 4); +} + +void +__ieee_to_host_32 (decimal32 in, _Decimal32 *out) +{ + memcpy (out, &in, 4); +} +#endif /* L_conv_sd */ + +#if defined(L_conv_dd) +void +__host_to_ieee_64 (_Decimal64 in, decimal64 *out) +{ + memcpy (out, &in, 8); +} + +void +__ieee_to_host_64 (decimal64 in, _Decimal64 *out) +{ + memcpy (out, &in, 8); +} +#endif /* L_conv_dd */ + +#if defined(L_conv_td) +void +__host_to_ieee_128 (_Decimal128 in, decimal128 *out) +{ + memcpy (out, &in, 16); +} + +void +__ieee_to_host_128 (decimal128 in, _Decimal128 *out) +{ + memcpy (out, &in, 16); +} +#endif /* L_conv_td */ + +#if defined(L_addsub_sd) || defined(L_addsub_dd) || defined(L_addsub_td) +DFP_C_TYPE +DFP_ADD (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + return DFP_BINARY_OP (DEC_FLOAT_ADD, arg_a, arg_b); +} + +DFP_C_TYPE +DFP_SUB (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + return DFP_BINARY_OP (DEC_FLOAT_SUBTRACT, arg_a, arg_b); +} +#endif /* L_addsub */ + +#if defined(L_mul_sd) || defined(L_mul_dd) || defined(L_mul_td) +DFP_C_TYPE +DFP_MULTIPLY (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + return DFP_BINARY_OP (DEC_FLOAT_MULTIPLY, arg_a, arg_b); +} +#endif /* L_mul */ + +#if defined(L_div_sd) || defined(L_div_dd) || defined(L_div_td) +DFP_C_TYPE +DFP_DIVIDE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + return DFP_BINARY_OP (DEC_FLOAT_DIVIDE, arg_a, arg_b); +} +#endif /* L_div */ + +#if defined (L_eq_sd) || defined (L_eq_dd) || defined (L_eq_td) +CMPtype +DFP_EQ (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + CMPtype stat; + stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b); + /* For EQ return zero for true, nonzero for false. */ + return stat != 0; +} +#endif /* L_eq */ + +#if defined (L_ne_sd) || defined (L_ne_dd) || defined (L_ne_td) +CMPtype +DFP_NE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + int stat; + stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b); + /* For NE return zero for true, nonzero for false. */ + if (__builtin_expect (stat == -2, 0)) /* An operand is NaN. */ + return 1; + return stat != 0; +} +#endif /* L_ne */ + +#if defined (L_lt_sd) || defined (L_lt_dd) || defined (L_lt_td) +CMPtype +DFP_LT (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + int stat; + stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b); + /* For LT return -1 (<0) for true, 1 for false. */ + return (stat == -1) ? -1 : 1; +} +#endif /* L_lt */ + +#if defined (L_gt_sd) || defined (L_gt_dd) || defined (L_gt_td) +CMPtype +DFP_GT (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + int stat; + stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b); + /* For GT return 1 (>0) for true, -1 for false. */ + return (stat == 1) ? 1 : -1; +} +#endif + +#if defined (L_le_sd) || defined (L_le_dd) || defined (L_le_td) +CMPtype +DFP_LE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + int stat; + stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b); + /* For LE return 0 (<= 0) for true, 1 for false. */ + if (__builtin_expect (stat == -2, 0)) /* An operand is NaN. */ + return 1; + return stat == 1; +} +#endif /* L_le */ + +#if defined (L_ge_sd) || defined (L_ge_dd) || defined (L_ge_td) +CMPtype +DFP_GE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + int stat; + stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b); + /* For GE return 1 (>=0) for true, -1 for false. */ + if (__builtin_expect (stat == -2, 0)) /* An operand is NaN. */ + return -1; + return (stat != -1) ? 1 : -1; +} +#endif /* L_ge */ + +#define BUFMAX 128 + +/* Check for floating point exceptions that are relevant for conversions + between decimal float values and handle them. */ +static inline void +dfp_conversion_exceptions (const int status) +{ + /* decNumber exception flags we care about here. */ + int ieee_flags; + int dec_flags = DEC_IEEE_854_Inexact | DEC_IEEE_854_Invalid_operation + | DEC_IEEE_854_Overflow; + dec_flags &= status; + ieee_flags = DFP_IEEE_FLAGS (dec_flags); + if (ieee_flags != 0) + DFP_HANDLE_EXCEPTIONS (ieee_flags); +} + +#if defined (L_sd_to_dd) +/* Use decNumber to convert directly from _Decimal32 to _Decimal64. */ +_Decimal64 +DFP_TO_DFP (_Decimal32 f_from) +{ + union { _Decimal32 c; decSingle f; } from; + union { _Decimal64 c; decDouble f; } to; + + from.c = f_from; + to.f = *decSingleToWider (&from.f, &to.f); + return to.c; +} +#endif + +#if defined (L_sd_to_td) +/* Use decNumber to convert directly from _Decimal32 to _Decimal128. */ +_Decimal128 +DFP_TO_DFP (_Decimal32 f_from) +{ + union { _Decimal32 c; decSingle f; } from; + union { _Decimal128 c; decQuad f; } to; + decDouble temp; + + from.c = f_from; + temp = *decSingleToWider (&from.f, &temp); + to.f = *decDoubleToWider (&temp, &to.f); + return to.c; +} +#endif + +#if defined (L_dd_to_td) +/* Use decNumber to convert directly from _Decimal64 to _Decimal128. */ +_Decimal128 +DFP_TO_DFP (_Decimal64 f_from) +{ + union { _Decimal64 c; decDouble f; } from; + union { _Decimal128 c; decQuad f; } to; + + from.c = f_from; + to.f = *decDoubleToWider (&from.f, &to.f); + return to.c; +} +#endif + +#if defined (L_dd_to_sd) +/* Use decNumber to convert directly from _Decimal64 to _Decimal32. */ +_Decimal32 +DFP_TO_DFP (_Decimal64 f_from) +{ + union { _Decimal32 c; decSingle f; } to; + union { _Decimal64 c; decDouble f; } from; + decContext context; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + from.c = f_from; + to.f = *decSingleFromWider (&to.f, &from.f, &context); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + return to.c; +} +#endif + +#if defined (L_td_to_sd) +/* Use decNumber to convert directly from _Decimal128 to _Decimal32. */ +_Decimal32 +DFP_TO_DFP (_Decimal128 f_from) +{ + union { _Decimal32 c; decSingle f; } to; + union { _Decimal128 c; decQuad f; } from; + decDouble temp; + decContext context; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + from.c = f_from; + temp = *decDoubleFromWider (&temp, &from.f, &context); + to.f = *decSingleFromWider (&to.f, &temp, &context); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + return to.c; +} +#endif + +#if defined (L_td_to_dd) +/* Use decNumber to convert directly from _Decimal128 to _Decimal64. */ +_Decimal64 +DFP_TO_DFP (_Decimal128 f_from) +{ + union { _Decimal64 c; decDouble f; } to; + union { _Decimal128 c; decQuad f; } from; + decContext context; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + from.c = f_from; + to.f = *decDoubleFromWider (&to.f, &from.f, &context); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + return to.c; +} +#endif + +#if defined (L_dd_to_si) || defined (L_td_to_si) \ + || defined (L_dd_to_usi) || defined (L_td_to_usi) +/* Use decNumber to convert directly from decimal float to integer types. */ +INT_TYPE +DFP_TO_INT (DFP_C_TYPE x) +{ + union { DFP_C_TYPE c; decFloat f; } u; + decContext context; + INT_TYPE i; + + decContextDefault (&context, DEC_INIT_DECIMAL128); + context.round = DEC_ROUND_DOWN; + u.c = x; + i = DEC_FLOAT_TO_INT (&u.f, &context, context.round); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + return i; +} +#endif + +#if defined (L_sd_to_si) || (L_sd_to_usi) +/* Use decNumber to convert directly from decimal float to integer types. */ +INT_TYPE +DFP_TO_INT (_Decimal32 x) +{ + union { _Decimal32 c; decSingle f; } u32; + decDouble f64; + decContext context; + INT_TYPE i; + + decContextDefault (&context, DEC_INIT_DECIMAL128); + context.round = DEC_ROUND_DOWN; + u32.c = x; + f64 = *decSingleToWider (&u32.f, &f64); + i = DEC_FLOAT_TO_INT (&f64, &context, context.round); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + return i; +} +#endif + +#if defined (L_sd_to_di) || defined (L_dd_to_di) || defined (L_td_to_di) \ + || defined (L_sd_to_udi) || defined (L_dd_to_udi) || defined (L_td_to_udi) +/* decNumber doesn't provide support for conversions to 64-bit integer + types, so do it the hard way. */ +INT_TYPE +DFP_TO_INT (DFP_C_TYPE x) +{ + /* decNumber's decimal* types have the same format as C's _Decimal* + types, but they have different calling conventions. */ + + /* TODO: Decimal float to integer conversions should raise FE_INVALID + if the result value does not fit into the result type. */ + + IEEE_TYPE s; + char buf[BUFMAX]; + char *pos; + decNumber qval, n1, n2; + decContext context; + + /* Use a large context to avoid losing precision. */ + decContextDefault (&context, DEC_INIT_DECIMAL128); + /* Need non-default rounding mode here. */ + context.round = DEC_ROUND_DOWN; + + HOST_TO_IEEE (x, &s); + TO_INTERNAL (&s, &n1); + /* Rescale if the exponent is less than zero. */ + decNumberToIntegralValue (&n2, &n1, &context); + /* Get a value to use for the quantize call. */ + decNumberFromString (&qval, "1.", &context); + /* Force the exponent to zero. */ + decNumberQuantize (&n1, &n2, &qval, &context); + /* Get a string, which at this point will not include an exponent. */ + decNumberToString (&n1, buf); + /* Ignore the fractional part. */ + pos = strchr (buf, '.'); + if (pos) + *pos = 0; + /* Use a C library function to convert to the integral type. */ + return STR_TO_INT (buf, NULL, 10); +} +#endif + +#if defined (L_si_to_dd) || defined (L_si_to_td) \ + || defined (L_usi_to_dd) || defined (L_usi_to_td) +/* Use decNumber to convert directly from integer to decimal float types. */ +DFP_C_TYPE +INT_TO_DFP (INT_TYPE i) +{ + union { DFP_C_TYPE c; decFloat f; } u; + + u.f = *DEC_FLOAT_FROM_INT (&u.f, i); + return u.c; +} +#endif + +#if defined (L_si_to_sd) || defined (L_usi_to_sd) +_Decimal32 +/* Use decNumber to convert directly from integer to decimal float types. */ +INT_TO_DFP (INT_TYPE i) +{ + union { _Decimal32 c; decSingle f; } u32; + decDouble f64; + decContext context; + + decContextDefault (&context, DEC_INIT_DECIMAL128); + f64 = *DEC_FLOAT_FROM_INT (&f64, i); + u32.f = *decSingleFromWider (&u32.f, &f64, &context); + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + return u32.c; +} +#endif + +#if defined (L_di_to_sd) || defined (L_di_to_dd) || defined (L_di_to_td) \ + || defined (L_udi_to_sd) || defined (L_udi_to_dd) || defined (L_udi_to_td) +/* decNumber doesn't provide support for conversions from 64-bit integer + types, so do it the hard way. */ +DFP_C_TYPE +INT_TO_DFP (INT_TYPE i) +{ + DFP_C_TYPE f; + IEEE_TYPE s; + char buf[BUFMAX]; + decContext context; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + + /* Use a C library function to get a floating point string. */ + sprintf (buf, INT_FMT ".", CAST_FOR_FMT(i)); + /* Convert from the floating point string to a decimal* type. */ + FROM_STRING (&s, buf, &context); + IEEE_TO_HOST (s, &f); + + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + dfp_conversion_exceptions (context.status); + + return f; +} +#endif + +#if defined (L_sd_to_sf) || defined (L_dd_to_sf) || defined (L_td_to_sf) \ + || defined (L_sd_to_df) || defined (L_dd_to_df) || defined (L_td_to_df) \ + || ((defined (L_sd_to_xf) || defined (L_dd_to_xf) || defined (L_td_to_xf)) \ + && LONG_DOUBLE_HAS_XF_MODE) \ + || ((defined (L_sd_to_tf) || defined (L_dd_to_tf) || defined (L_td_to_tf)) \ + && LONG_DOUBLE_HAS_TF_MODE) +BFP_TYPE +DFP_TO_BFP (DFP_C_TYPE f) +{ + IEEE_TYPE s; + char buf[BUFMAX]; + + HOST_TO_IEEE (f, &s); + /* Write the value to a string. */ + TO_STRING (&s, buf); + /* Read it as the binary floating point type and return that. */ + return STR_TO_BFP (buf, NULL); +} +#endif + +#if defined (L_sf_to_sd) || defined (L_sf_to_dd) || defined (L_sf_to_td) \ + || defined (L_df_to_sd) || defined (L_df_to_dd) || defined (L_df_to_td) \ + || ((defined (L_xf_to_sd) || defined (L_xf_to_dd) || defined (L_xf_to_td)) \ + && LONG_DOUBLE_HAS_XF_MODE) \ + || ((defined (L_tf_to_sd) || defined (L_tf_to_dd) || defined (L_tf_to_td)) \ + && LONG_DOUBLE_HAS_TF_MODE) +DFP_C_TYPE +BFP_TO_DFP (BFP_TYPE x) +{ + DFP_C_TYPE f; + IEEE_TYPE s; + char buf[BUFMAX]; + decContext context; + + decContextDefault (&context, CONTEXT_INIT); + DFP_INIT_ROUNDMODE (context.round); + + /* Use a C library function to write the floating point value to a string. */ + sprintf (buf, BFP_FMT, (BFP_VIA_TYPE) x); + + /* Convert from the floating point string to a decimal* type. */ + FROM_STRING (&s, buf, &context); + IEEE_TO_HOST (s, &f); + + if (DFP_EXCEPTIONS_ENABLED && context.status != 0) + { + /* decNumber exception flags we care about here. */ + int ieee_flags; + int dec_flags = DEC_IEEE_854_Inexact | DEC_IEEE_854_Invalid_operation + | DEC_IEEE_854_Overflow | DEC_IEEE_854_Underflow; + dec_flags &= context.status; + ieee_flags = DFP_IEEE_FLAGS (dec_flags); + if (ieee_flags != 0) + DFP_HANDLE_EXCEPTIONS (ieee_flags); + } + + return f; +} +#endif + +#if defined (L_unord_sd) || defined (L_unord_dd) || defined (L_unord_td) +CMPtype +DFP_UNORD (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b) +{ + decNumber arg1, arg2; + IEEE_TYPE a, b; + + HOST_TO_IEEE (arg_a, &a); + HOST_TO_IEEE (arg_b, &b); + TO_INTERNAL (&a, &arg1); + TO_INTERNAL (&b, &arg2); + return (decNumberIsNaN (&arg1) || decNumberIsNaN (&arg2)); +} +#endif /* L_unord_sd || L_unord_dd || L_unord_td */ diff --git a/gcc/config/dfp-bit.h b/gcc/config/dfp-bit.h new file mode 100644 index 000000000..45b79086b --- /dev/null +++ b/gcc/config/dfp-bit.h @@ -0,0 +1,626 @@ +/* Header file for dfp-bit.c. + Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef _DFPBIT_H +#define _DFPBIT_H + +#include +#include +#include +#include +#include "tconfig.h" +#include "coretypes.h" +#include "tm.h" + +#ifndef LIBGCC2_LONG_DOUBLE_TYPE_SIZE +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE LONG_DOUBLE_TYPE_SIZE +#endif + +/* We need to know the size of long double that the C library supports. + Don't use LIBGCC2_HAS_XF_MODE or LIBGCC2_HAS_TF_MODE here because + some targets set both of those. */ + +#define LONG_DOUBLE_HAS_XF_MODE \ + (BITS_PER_UNIT == 8 && LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 80) + +#define LONG_DOUBLE_HAS_TF_MODE \ + (BITS_PER_UNIT == 8 && LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128) + +/* Depending on WIDTH, define a number of macros: + + DFP_C_TYPE: type of the arguments to the libgcc functions; + (eg _Decimal32) + + IEEE_TYPE: the corresponding (encoded) IEEE754 type; + (eg decimal32) + + TO_INTERNAL: the name of the decNumber function to convert an + encoded value into the decNumber internal representation; + + TO_ENCODED: the name of the decNumber function to convert an + internally represented decNumber into the encoded + representation. + + FROM_STRING: the name of the decNumber function to read an + encoded value from a string. + + TO_STRING: the name of the decNumber function to write an + encoded value to a string. */ + +#if WIDTH == 32 +#define DFP_C_TYPE _Decimal32 +#define IEEE_TYPE decimal32 +#define HOST_TO_IEEE __host_to_ieee_32 +#define IEEE_TO_HOST __ieee_to_host_32 +#define TO_INTERNAL __decimal32ToNumber +#define TO_ENCODED __decimal32FromNumber +#define FROM_STRING __decimal32FromString +#define TO_STRING __decimal32ToString +#elif WIDTH == 64 +#define DFP_C_TYPE _Decimal64 +#define IEEE_TYPE decimal64 +#define HOST_TO_IEEE __host_to_ieee_64 +#define IEEE_TO_HOST __ieee_to_host_64 +#define TO_INTERNAL __decimal64ToNumber +#define TO_ENCODED __decimal64FromNumber +#define FROM_STRING __decimal64FromString +#define TO_STRING __decimal64ToString +#elif WIDTH == 128 +#define DFP_C_TYPE _Decimal128 +#define IEEE_TYPE decimal128 +#define HOST_TO_IEEE __host_to_ieee_128 +#define IEEE_TO_HOST __ieee_to_host_128 +#define TO_INTERNAL __decimal128ToNumber +#define TO_ENCODED __decimal128FromNumber +#define FROM_STRING __decimal128FromString +#define TO_STRING __decimal128ToString +#else +#error invalid decimal float word width +#endif + +/* We define __DEC_EVAL_METHOD__ to 2, saying that we evaluate all + operations and constants to the range and precision of the _Decimal128 + type. Make it so. */ +#if WIDTH == 32 +#define CONTEXT_INIT DEC_INIT_DECIMAL32 +#elif WIDTH == 64 +#define CONTEXT_INIT DEC_INIT_DECIMAL64 +#elif WIDTH == 128 +#define CONTEXT_INIT DEC_INIT_DECIMAL128 +#endif + +#ifndef DFP_INIT_ROUNDMODE +#define DFP_INIT_ROUNDMODE(A) A = DEC_ROUND_HALF_EVEN +#endif + +#ifdef DFP_EXCEPTIONS_ENABLED +/* Return IEEE exception flags based on decNumber status flags. */ +#define DFP_IEEE_FLAGS(DEC_FLAGS) __extension__ \ +({int _fe_flags = 0; \ + if ((dec_flags & DEC_IEEE_854_Division_by_zero) != 0) \ + _fe_flags |= FE_DIVBYZERO; \ + if ((dec_flags & DEC_IEEE_854_Inexact) != 0) \ + _fe_flags |= FE_INEXACT; \ + if ((dec_flags & DEC_IEEE_854_Invalid_operation) != 0) \ + _fe_flags |= FE_INVALID; \ + if ((dec_flags & DEC_IEEE_854_Overflow) != 0) \ + _fe_flags |= FE_OVERFLOW; \ + if ((dec_flags & DEC_IEEE_854_Underflow) != 0) \ + _fe_flags |= FE_UNDERFLOW; \ + _fe_flags; }) +#else +#define DFP_EXCEPTIONS_ENABLED 0 +#define DFP_IEEE_FLAGS(A) 0 +#define DFP_HANDLE_EXCEPTIONS(A) do {} while (0) +#endif + +/* Conversions between different decimal float types use WIDTH_TO to + determine additional macros to define. */ + +#if defined (L_dd_to_sd) || defined (L_td_to_sd) +#define WIDTH_TO 32 +#elif defined (L_sd_to_dd) || defined (L_td_to_dd) +#define WIDTH_TO 64 +#elif defined (L_sd_to_td) || defined (L_dd_to_td) +#define WIDTH_TO 128 +#endif + +/* If WIDTH_TO is defined, define additional macros: + + DFP_C_TYPE_TO: type of the result of dfp to dfp conversion. + + IEEE_TYPE_TO: the corresponding (encoded) IEEE754 type. + + TO_ENCODED_TO: the name of the decNumber function to convert an + internally represented decNumber into the encoded representation + for the destination. */ + +#if WIDTH_TO == 32 +#define DFP_C_TYPE_TO _Decimal32 +#define IEEE_TYPE_TO decimal32 +#define TO_ENCODED_TO __decimal32FromNumber +#define IEEE_TO_HOST_TO __ieee_to_host_32 +#elif WIDTH_TO == 64 +#define DFP_C_TYPE_TO _Decimal64 +#define IEEE_TYPE_TO decimal64 +#define TO_ENCODED_TO __decimal64FromNumber +#define IEEE_TO_HOST_TO __ieee_to_host_64 +#elif WIDTH_TO == 128 +#define DFP_C_TYPE_TO _Decimal128 +#define IEEE_TYPE_TO decimal128 +#define TO_ENCODED_TO __decimal128FromNumber +#define IEEE_TO_HOST_TO __ieee_to_host_128 +#endif + +/* Conversions between decimal float types and integral types use INT_KIND + to determine the data type and C functions to use. */ + +#if defined (L_sd_to_si) || defined (L_dd_to_si) || defined (L_td_to_si) \ + || defined (L_si_to_sd) || defined (L_si_to_dd) || defined (L_si_to_td) +#define INT_KIND 1 +#elif defined (L_sd_to_di) || defined (L_dd_to_di) || defined (L_td_to_di) \ + || defined (L_di_to_sd) || defined (L_di_to_dd) || defined (L_di_to_td) +#define INT_KIND 2 +#elif defined (L_sd_to_usi) || defined (L_dd_to_usi) || defined (L_td_to_usi) \ + || defined (L_usi_to_sd) || defined (L_usi_to_dd) || defined (L_usi_to_td) +#define INT_KIND 3 +#elif defined (L_sd_to_udi) || defined (L_dd_to_udi) || defined (L_td_to_udi) \ + || defined (L_udi_to_sd) || defined (L_udi_to_dd) || defined (L_udi_to_td) +#define INT_KIND 4 +#endif + +/* If INT_KIND is defined, define additional macros: + + INT_TYPE: The integer data type. + + INT_FMT: The format string for writing the integer to a string. + + CAST_FOR_FMT: Cast variable of INT_KIND to C type for sprintf. + This works for ILP32 and LP64, won't for other type size systems. + + STR_TO_INT: The function to read the integer from a string. */ + +#if INT_KIND == 1 +#define INT_TYPE SItype +#define INT_FMT "%d" +#define CAST_FOR_FMT(A) (int)A +#define STR_TO_INT strtol +#elif INT_KIND == 2 +#define INT_TYPE DItype +#define INT_FMT "%lld" +#define CAST_FOR_FMT(A) (long long)A +#define STR_TO_INT strtoll +#elif INT_KIND == 3 +#define INT_TYPE USItype +#define INT_FMT "%u" +#define CAST_FOR_FMT(A) (unsigned int)A +#define STR_TO_INT strtoul +#elif INT_KIND == 4 +#define INT_TYPE UDItype +#define INT_FMT "%llu" +#define CAST_FOR_FMT(A) (unsigned long long)A +#define STR_TO_INT strtoull +#endif + +/* Conversions between decimal float types and binary float types use + BFP_KIND to determine the data type and C functions to use. */ + +#if defined (L_sd_to_sf) || defined (L_dd_to_sf) || defined (L_td_to_sf) \ + || defined (L_sf_to_sd) || defined (L_sf_to_dd) || defined (L_sf_to_td) +#define BFP_KIND 1 +#elif defined (L_sd_to_df) || defined (L_dd_to_df ) || defined (L_td_to_df) \ + || defined (L_df_to_sd) || defined (L_df_to_dd) || defined (L_df_to_td) +#define BFP_KIND 2 +#elif defined (L_sd_to_xf) || defined (L_dd_to_xf ) || defined (L_td_to_xf) \ + || defined (L_xf_to_sd) || defined (L_xf_to_dd) || defined (L_xf_to_td) +#define BFP_KIND 3 +#elif defined (L_sd_to_tf) || defined (L_dd_to_tf) || defined (L_td_to_tf) \ + || defined (L_tf_to_sd) || defined (L_tf_to_dd) || defined (L_tf_to_td) +#define BFP_KIND 4 +#endif + +/* If BFP_KIND is defined, define additional macros: + + BFP_TYPE: The binary floating point data type. + + BFP_FMT: The format string for writing the value to a string. + The number of decimal digits printed is + ceil (nbits / log2 (10.) + 1) + as described in David Matula's CACM 19(3) 716-723 June 1968 paper. + + BFP_VIA_TYPE: Type to which to cast a variable of BPF_TYPE for a + call to sprintf. + + STR_TO_BFP: The function to read the value from a string. */ + +#if BFP_KIND == 1 +#define BFP_TYPE SFtype +#define BFP_FMT "%.9e" +#define BFP_VIA_TYPE double +#define STR_TO_BFP strtof + +#elif BFP_KIND == 2 +#define BFP_TYPE DFtype +#define BFP_FMT "%.17e" +#define BFP_VIA_TYPE double +#define STR_TO_BFP strtod + +#elif BFP_KIND == 3 +#if LONG_DOUBLE_HAS_XF_MODE +#define BFP_TYPE XFtype +#define BFP_FMT "%.21Le" +#define BFP_VIA_TYPE long double +#define STR_TO_BFP strtold +#endif /* LONG_DOUBLE_HAS_XF_MODE */ + +#elif BFP_KIND == 4 +#if LONG_DOUBLE_HAS_TF_MODE +#define BFP_TYPE TFtype +#if LDBL_MANT_DIG == 106 +#define BFP_FMT "%.33Le" +#elif LDBL_MANT_DIG == 113 +#define BFP_FMT "%.36Le" +#else +#error "unknown long double size, cannot define BFP_FMT" +#endif /* LDBL_MANT_DIG */ +#define STR_TO_BFP strtold +#define BFP_VIA_TYPE long double +#endif /* LONG_DOUBLE_HAS_TF_MODE */ + +#endif /* BFP_KIND */ + +#if WIDTH == 128 || WIDTH_TO == 128 +#include "decimal128.h" +#include "decQuad.h" +#endif +#if WIDTH == 64 || WIDTH_TO == 64 +#include "decimal64.h" +#include "decDouble.h" +#endif +#if WIDTH == 32 || WIDTH_TO == 32 +#include "decimal32.h" +#include "decSingle.h" +#endif +#include "decNumber.h" + +/* Names of arithmetic functions. */ + +#if ENABLE_DECIMAL_BID_FORMAT +#define DPD_BID_NAME(DPD,BID) BID +#else +#define DPD_BID_NAME(DPD,BID) DPD +#endif + +#if WIDTH == 32 +#define DFP_ADD DPD_BID_NAME(__dpd_addsd3,__bid_addsd3) +#define DFP_SUB DPD_BID_NAME(__dpd_subsd3,__bid_subsd3) +#define DFP_MULTIPLY DPD_BID_NAME(__dpd_mulsd3,__bid_mulsd3) +#define DFP_DIVIDE DPD_BID_NAME(__dpd_divsd3,__bid_divsd3) +#define DFP_EQ DPD_BID_NAME(__dpd_eqsd2,__bid_eqsd2) +#define DFP_NE DPD_BID_NAME(__dpd_nesd2,__bid_nesd2) +#define DFP_LT DPD_BID_NAME(__dpd_ltsd2,__bid_ltsd2) +#define DFP_GT DPD_BID_NAME(__dpd_gtsd2,__bid_gtsd2) +#define DFP_LE DPD_BID_NAME(__dpd_lesd2,__bid_lesd2) +#define DFP_GE DPD_BID_NAME(__dpd_gesd2,__bid_gesd2) +#define DFP_UNORD DPD_BID_NAME(__dpd_unordsd2,__bid_unordsd2) +#elif WIDTH == 64 +#define DFP_ADD DPD_BID_NAME(__dpd_adddd3,__bid_adddd3) +#define DFP_SUB DPD_BID_NAME(__dpd_subdd3,__bid_subdd3) +#define DFP_MULTIPLY DPD_BID_NAME(__dpd_muldd3,__bid_muldd3) +#define DFP_DIVIDE DPD_BID_NAME(__dpd_divdd3,__bid_divdd3) +#define DFP_EQ DPD_BID_NAME(__dpd_eqdd2,__bid_eqdd2) +#define DFP_NE DPD_BID_NAME(__dpd_nedd2,__bid_nedd2) +#define DFP_LT DPD_BID_NAME(__dpd_ltdd2,__bid_ltdd2) +#define DFP_GT DPD_BID_NAME(__dpd_gtdd2,__bid_gtdd2) +#define DFP_LE DPD_BID_NAME(__dpd_ledd2,__bid_ledd2) +#define DFP_GE DPD_BID_NAME(__dpd_gedd2,__bid_gedd2) +#define DFP_UNORD DPD_BID_NAME(__dpd_unorddd2,__bid_unorddd2) +#elif WIDTH == 128 +#define DFP_ADD DPD_BID_NAME(__dpd_addtd3,__bid_addtd3) +#define DFP_SUB DPD_BID_NAME(__dpd_subtd3,__bid_subtd3) +#define DFP_MULTIPLY DPD_BID_NAME(__dpd_multd3,__bid_multd3) +#define DFP_DIVIDE DPD_BID_NAME(__dpd_divtd3,__bid_divtd3) +#define DFP_EQ DPD_BID_NAME(__dpd_eqtd2,__bid_eqtd2) +#define DFP_NE DPD_BID_NAME(__dpd_netd2,__bid_netd2) +#define DFP_LT DPD_BID_NAME(__dpd_lttd2,__bid_lttd2) +#define DFP_GT DPD_BID_NAME(__dpd_gttd2,__bid_gttd2) +#define DFP_LE DPD_BID_NAME(__dpd_letd2,__bid_letd2) +#define DFP_GE DPD_BID_NAME(__dpd_getd2,__bid_getd2) +#define DFP_UNORD DPD_BID_NAME(__dpd_unordtd2,__bid_unordtd2) +#endif + +/* Names of decNumber functions for DPD arithmetic. */ + +#if WIDTH == 32 +#define decFloat decDouble +#define DFP_BINARY_OP d32_binary_op +#define DFP_COMPARE_OP d32_compare_op +#define DEC_FLOAT_ADD decDoubleAdd +#define DEC_FLOAT_SUBTRACT decDoubleSubtract +#define DEC_FLOAT_MULTIPLY decDoubleMultiply +#define DEC_FLOAT_DIVIDE decDoubleDivide +#define DEC_FLOAT_COMPARE decDoubleCompare +#define DEC_FLOAT_IS_ZERO decDoubleIsZero +#define DEC_FLOAT_IS_NAN decDoubleIsNaN +#define DEC_FLOAT_IS_SIGNED decDoubleIsSigned +#elif WIDTH == 64 +#define DFP_BINARY_OP dnn_binary_op +#define DFP_COMPARE_OP dnn_compare_op +#define decFloat decDouble +#define DEC_FLOAT_ADD decDoubleAdd +#define DEC_FLOAT_SUBTRACT decDoubleSubtract +#define DEC_FLOAT_MULTIPLY decDoubleMultiply +#define DEC_FLOAT_DIVIDE decDoubleDivide +#define DEC_FLOAT_COMPARE decDoubleCompare +#define DEC_FLOAT_IS_ZERO decDoubleIsZero +#define DEC_FLOAT_IS_NAN decDoubleIsNaN +#define DEC_FLOAT_IS_SIGNED decDoubleIsSigned +#elif WIDTH == 128 +#define DFP_BINARY_OP dnn_binary_op +#define DFP_COMPARE_OP dnn_compare_op +#define decFloat decQuad +#define DEC_FLOAT_ADD decQuadAdd +#define DEC_FLOAT_SUBTRACT decQuadSubtract +#define DEC_FLOAT_MULTIPLY decQuadMultiply +#define DEC_FLOAT_DIVIDE decQuadDivide +#define DEC_FLOAT_COMPARE decQuadCompare +#define DEC_FLOAT_IS_ZERO decQuadIsZero +#define DEC_FLOAT_IS_NAN decQuadIsNaN +#define DEC_FLOAT_IS_SIGNED decQuadIsSigned +#endif + +/* Names of functions to convert between different decimal float types. */ + +#if WIDTH == 32 +#if WIDTH_TO == 64 +#define DFP_TO_DFP DPD_BID_NAME(__dpd_extendsddd2,__bid_extendsddd2) +#elif WIDTH_TO == 128 +#define DFP_TO_DFP DPD_BID_NAME(__dpd_extendsdtd2,__bid_extendsdtd2) +#endif +#elif WIDTH == 64 +#if WIDTH_TO == 32 +#define DFP_TO_DFP DPD_BID_NAME(__dpd_truncddsd2,__bid_truncddsd2) +#elif WIDTH_TO == 128 +#define DFP_TO_DFP DPD_BID_NAME(__dpd_extendddtd2,__bid_extendddtd2) +#endif +#elif WIDTH == 128 +#if WIDTH_TO == 32 +#define DFP_TO_DFP DPD_BID_NAME(__dpd_trunctdsd2,__bid_trunctdsd2) +#elif WIDTH_TO == 64 +#define DFP_TO_DFP DPD_BID_NAME(__dpd_trunctddd2,__bid_trunctddd2) +#endif +#endif + +/* Names of functions to convert between decimal float and integers. */ + +#if WIDTH == 32 +#if INT_KIND == 1 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatsisd,__bid_floatsisd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixsdsi,__bid_fixsdsi) +#define DEC_FLOAT_FROM_INT decDoubleFromInt32 +#define DEC_FLOAT_TO_INT decDoubleToInt32 +#elif INT_KIND == 2 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatdisd,__bid_floatdisd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixsddi,__bid_fixsddi) +#elif INT_KIND == 3 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatunssisd,__bid_floatunssisd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixunssdsi,__bid_fixunssdsi) +#define DEC_FLOAT_FROM_INT decDoubleFromUInt32 +#define DEC_FLOAT_TO_INT decDoubleToUInt32 +#elif INT_KIND == 4 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatunsdisd,__bid_floatunsdisd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixunssddi,__bid_fixunssddi) +#endif +#elif WIDTH == 64 +#define decFloat decDouble +#if INT_KIND == 1 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatsidd,__bid_floatsidd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixddsi,__bid_fixddsi) +#define DEC_FLOAT_FROM_INT decDoubleFromInt32 +#define DEC_FLOAT_TO_INT decDoubleToInt32 +#elif INT_KIND == 2 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatdidd,__bid_floatdidd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixdddi,__bid_fixdddi) +#elif INT_KIND == 3 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatunssidd,__bid_floatunssidd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixunsddsi,__bid_fixunsddsi) +#define DEC_FLOAT_FROM_INT decDoubleFromUInt32 +#define DEC_FLOAT_TO_INT decDoubleToUInt32 +#elif INT_KIND == 4 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatunsdidd,__bid_floatunsdidd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixunsdddi,__bid_fixunsdddi) +#endif +#elif WIDTH == 128 +#define decFloat decQuad +#if INT_KIND == 1 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatsitd,__bid_floatsitd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixtdsi,__bid_fixtdsi) +#define DEC_FLOAT_FROM_INT decQuadFromInt32 +#define DEC_FLOAT_TO_INT decQuadToInt32 +#elif INT_KIND == 2 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatditd,__bid_floatditd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixtddi,__bid_fixtddi) +#elif INT_KIND == 3 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatunssitd,__bid_floatunssitd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixunstdsi,__bid_fixunstdsi) +#define DEC_FLOAT_FROM_INT decQuadFromUInt32 +#define DEC_FLOAT_TO_INT decQuadToUInt32 +#elif INT_KIND == 4 +#define INT_TO_DFP DPD_BID_NAME(__dpd_floatunsditd,__bid_floatunsditd) +#define DFP_TO_INT DPD_BID_NAME(__dpd_fixunstddi,__bid_fixunstddi) +#endif +#endif + +/* Names of functions to convert between decimal float and binary float. */ + +#if WIDTH == 32 +#if BFP_KIND == 1 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extendsfsd,__bid_extendsfsd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_truncsdsf,__bid_truncsdsf) +#elif BFP_KIND == 2 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_truncdfsd,__bid_truncdfsd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_extendsddf,__bid_extendsddf) +#elif BFP_KIND == 3 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_truncxfsd,__bid_truncxfsd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_extendsdxf,__bid_extendsdxf) +#elif BFP_KIND == 4 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_trunctfsd,__bid_trunctfsd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_extendsdtf,__bid_extendsdtf) +#endif /* BFP_KIND */ + +#elif WIDTH == 64 +#if BFP_KIND == 1 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extendsfdd,__bid_extendsfdd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_truncddsf,__bid_truncddsf) +#elif BFP_KIND == 2 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extenddfdd,__bid_extenddfdd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_truncdddf,__bid_truncdddf) +#elif BFP_KIND == 3 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_truncxfdd,__bid_truncxfdd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_extendddxf,__bid_extendddxf) +#elif BFP_KIND == 4 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_trunctfdd,__bid_trunctfdd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_extendddtf,__bid_extendddtf) +#endif /* BFP_KIND */ + +#elif WIDTH == 128 +#if BFP_KIND == 1 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extendsftd,__bid_extendsftd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_trunctdsf,__bid_trunctdsf) +#elif BFP_KIND == 2 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extenddftd,__bid_extenddftd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_trunctddf,__bid_trunctddf) +#elif BFP_KIND == 3 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extendxftd,__bid_extendxftd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_trunctdxf,__bid_trunctdxf) +#elif BFP_KIND == 4 +#define BFP_TO_DFP DPD_BID_NAME(__dpd_extendtftd,__bid_extendtftd) +#define DFP_TO_BFP DPD_BID_NAME(__dpd_trunctdtf,__bid_trunctdtf) +#endif /* BFP_KIND */ + +#endif /* WIDTH */ + +/* Some handy typedefs. */ + +typedef float SFtype __attribute__ ((mode (SF))); +typedef float DFtype __attribute__ ((mode (DF))); +#if LONG_DOUBLE_HAS_XF_MODE +typedef float XFtype __attribute__ ((mode (XF))); +#endif /* LONG_DOUBLE_HAS_XF_MODE */ +#if LONG_DOUBLE_HAS_TF_MODE +typedef float TFtype __attribute__ ((mode (TF))); +#endif /* LONG_DOUBLE_HAS_TF_MODE */ + +typedef int SItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); + +/* The type of the result of a decimal float comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ + +typedef int CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); + +/* Prototypes. */ + +#if defined (L_mul_sd) || defined (L_mul_dd) || defined (L_mul_td) +extern DFP_C_TYPE DFP_MULTIPLY (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_div_sd) || defined (L_div_dd) || defined (L_div_td) +extern DFP_C_TYPE DFP_DIVIDE (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_addsub_sd) || defined (L_addsub_dd) || defined (L_addsub_td) +extern DFP_C_TYPE DFP_ADD (DFP_C_TYPE, DFP_C_TYPE); +extern DFP_C_TYPE DFP_SUB (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_eq_sd) || defined (L_eq_dd) || defined (L_eq_td) +extern CMPtype DFP_EQ (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_ne_sd) || defined (L_ne_dd) || defined (L_ne_td) +extern CMPtype DFP_NE (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_lt_sd) || defined (L_lt_dd) || defined (L_lt_td) +extern CMPtype DFP_LT (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_gt_sd) || defined (L_gt_dd) || defined (L_gt_td) +extern CMPtype DFP_GT (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_le_sd) || defined (L_le_dd) || defined (L_le_td) +extern CMPtype DFP_LE (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_ge_sd) || defined (L_ge_dd) || defined (L_ge_td) +extern CMPtype DFP_GE (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_unord_sd) || defined (L_unord_dd) || defined (L_unord_td) +extern CMPtype DFP_UNORD (DFP_C_TYPE, DFP_C_TYPE); +#endif + +#if defined (L_sd_to_dd) || defined (L_sd_to_td) || defined (L_dd_to_sd) \ + || defined (L_dd_to_td) || defined (L_td_to_sd) || defined (L_td_to_dd) +extern DFP_C_TYPE_TO DFP_TO_DFP (DFP_C_TYPE); +#endif + +#if defined (L_sd_to_si) || defined (L_dd_to_si) || defined (L_td_to_si) \ + || defined (L_sd_to_di) || defined (L_dd_to_di) || defined (L_td_to_di) \ + || defined (L_sd_to_usi) || defined (L_dd_to_usi) || defined (L_td_to_usi) \ + || defined (L_sd_to_udi) || defined (L_dd_to_udi) || defined (L_td_to_udi) +extern INT_TYPE DFP_TO_INT (DFP_C_TYPE); +#endif + +#if defined (L_si_to_sd) || defined (L_si_to_dd) || defined (L_si_to_td) \ + || defined (L_di_to_sd) || defined (L_di_to_dd) || defined (L_di_to_td) \ + || defined (L_usi_to_sd) || defined (L_usi_to_dd) || defined (L_usi_to_td) \ + || defined (L_udi_to_sd) || defined (L_udi_to_dd) || defined (L_udi_to_td) +extern DFP_C_TYPE INT_TO_DFP (INT_TYPE); +#endif + +#if defined (L_sd_to_sf) || defined (L_dd_to_sf) || defined (L_td_to_sf) \ + || defined (L_sd_to_df) || defined (L_dd_to_df) || defined (L_td_to_df) \ + || ((defined (L_sd_to_xf) || defined (L_dd_to_xf) || defined (L_td_to_xf)) \ + && LONG_DOUBLE_HAS_XF_MODE) \ + || ((defined (L_sd_to_tf) || defined (L_dd_to_tf) || defined (L_td_to_tf)) \ + && LONG_DOUBLE_HAS_TF_MODE) +extern BFP_TYPE DFP_TO_BFP (DFP_C_TYPE); +#endif + +#if defined (L_sf_to_sd) || defined (L_sf_to_dd) || defined (L_sf_to_td) \ + || defined (L_df_to_sd) || defined (L_df_to_dd) || defined (L_df_to_td) \ + || ((defined (L_xf_to_sd) || defined (L_xf_to_dd) || defined (L_xf_to_td)) \ + && LONG_DOUBLE_HAS_XF_MODE) \ + || ((defined (L_tf_to_sd) || defined (L_tf_to_dd) || defined (L_tf_to_td)) \ + && LONG_DOUBLE_HAS_TF_MODE) +extern DFP_C_TYPE BFP_TO_DFP (BFP_TYPE); +#endif + +#endif /* _DFPBIT_H */ diff --git a/gcc/config/divmod.c b/gcc/config/divmod.c new file mode 100644 index 000000000..c227b99cc --- /dev/null +++ b/gcc/config/divmod.c @@ -0,0 +1,73 @@ +/* Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +long udivmodsi4 (); + +long +__divsi3 (long a, long b) +{ + int neg = 0; + long res; + + if (a < 0) + { + a = -a; + neg = !neg; + } + + if (b < 0) + { + b = -b; + neg = !neg; + } + + res = udivmodsi4 (a, b, 0); + + if (neg) + res = -res; + + return res; +} + +long +__modsi3 (long a, long b) +{ + int neg = 0; + long res; + + if (a < 0) + { + a = -a; + neg = 1; + } + + if (b < 0) + b = -b; + + res = udivmodsi4 (a, b, 1); + + if (neg) + res = -res; + + return res; +} diff --git a/gcc/config/elfos.h b/gcc/config/elfos.h new file mode 100644 index 000000000..e483216ef --- /dev/null +++ b/gcc/config/elfos.h @@ -0,0 +1,531 @@ +/* elfos.h -- operating system specific defines to be used when + targeting GCC for some generic ELF system + Copyright (C) 1991, 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004, + 2007, 2009, 2010 Free Software Foundation, Inc. + Based on svr4.h contributed by Ron Guilmette (rfg@netcom.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define TARGET_OBJFMT_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__ELF__"); \ + } \ + while (0) + +/* Define a symbol indicating that we are using elfos.h. + Some CPU specific configuration files use this. */ +#define USING_ELFOS_H + +/* The prefix to add to user-visible assembler symbols. + + For ELF systems the convention is *not* to prepend a leading + underscore onto user-level symbol names. */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* The biggest alignment supported by ELF in bits. 32-bit ELF + supports section alignment up to (0x80000000 * 8), while + 64-bit ELF supports (0x8000000000000000 * 8). If this macro + is not defined, the default is the largest alignment supported + by 32-bit ELF and representable on a 32-bit host. Use this + macro to limit the alignment which can be specified using + the `__attribute__ ((aligned (N)))' construct. */ +#ifndef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT (((unsigned int) 1 << 28) * 8) +#endif + +/* Use periods rather than dollar signs in special g++ assembler names. */ + +#define NO_DOLLAR_IN_LABEL + +/* Writing `int' for a bit-field forces int alignment for the structure. */ + +#ifndef PCC_BITFIELD_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS 1 +#endif + +/* All ELF targets can support DWARF-2. */ + +#define DWARF2_DEBUGGING_INFO 1 + +/* The GNU tools operate better with dwarf2, and it is required by some + psABI's. Since we don't have any native tools to be compatible with, + default to dwarf2. */ + +#ifndef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +#endif + +/* All SVR4 targets use the ELF object file format. */ +#define OBJECT_FORMAT_ELF + + +/* Output #ident as a .ident. */ + +#define ASM_OUTPUT_IDENT(FILE, NAME) \ + fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME); + +#define IDENT_ASM_OP "\t.ident\t" + +#undef SET_ASM_OP +#define SET_ASM_OP "\t.set\t" + +/* Most svr4 assemblers want a .file directive at the beginning of + their input file. */ +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + +/* This is how to allocate empty space in some section. The .zero + pseudo-op is used for this on most svr4 assemblers. */ + +#define SKIP_ASM_OP "\t.zero\t" + +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(FILE, SIZE) \ + fprintf ((FILE), "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\ + SKIP_ASM_OP, (SIZE)) + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. + + For most svr4 systems, the convention is that any symbol which begins + with a period is not put into the linker symbol table by the assembler. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + do \ + { \ + sprintf (LABEL, "*.%s%u", PREFIX, (unsigned) (NUM)); \ + } \ + while (0) + +/* Output the label which precedes a jumptable. Note that for all svr4 + systems where we actually generate jumptables (which is to say every + svr4 target except i386, where we use casesi instead) we put the jump- + tables into the .rodata section and since other stuff could have been + put into the .rodata section prior to any given jumptable, we have to + make sure that the location counter for the .rodata section gets pro- + perly re-aligned prior to the actual beginning of the jump table. */ + +#undef ALIGN_ASM_OP +#define ALIGN_ASM_OP "\t.align\t" + +#ifndef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \ + ASM_OUTPUT_ALIGN ((FILE), 2); +#endif + +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE) \ + do \ + { \ + ASM_OUTPUT_BEFORE_CASE_LABEL (FILE, PREFIX, NUM, JUMPTABLE) \ + (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); \ + } \ + while (0) + +/* The standard SVR4 assembler seems to require that certain builtin + library routines (e.g. .udiv) be explicitly declared as .globl + in each assembly file where they are referenced. */ + +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ + (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0)) + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#define COMMON_ASM_OP "\t.comm\t" + +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ + (SIZE), (ALIGN) / BITS_PER_UNIT); \ + } \ + while (0) + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#define LOCAL_ASM_OP "\t.local\t" + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + fprintf ((FILE), "%s", LOCAL_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), "\n"); \ + ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN); \ + } \ + while (0) + +/* This is the pseudo-op used to generate a contiguous sequence of byte + values from a double-quoted string WITHOUT HAVING A TERMINATING NUL + AUTOMATICALLY APPENDED. This is the same for most svr4 assemblers. */ + +#undef ASCII_DATA_ASM_OP +#define ASCII_DATA_ASM_OP "\t.ascii\t" + +/* Support a read-only data section. */ +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" + +/* On svr4, we *do* have support for the .init and .fini sections, and we + can put stuff in there to be executed before and after `main'. We let + crtstuff.c and other files know this by defining the following symbols. + The definitions say how to change sections to the .init and .fini + sections. This is the same for all known svr4 assemblers. */ + +#define INIT_SECTION_ASM_OP "\t.section\t.init" +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +/* Output assembly directive to move to the beginning of current section. */ +#ifdef HAVE_GAS_SUBSECTION_ORDERING +# define ASM_SECTION_START_OP "\t.subsection\t-1" +# define ASM_OUTPUT_SECTION_START(FILE) \ + fprintf ((FILE), "%s\n", ASM_SECTION_START_OP) +#endif + +#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION default_elf_select_rtx_section +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION default_elf_select_section +#undef TARGET_HAVE_SWITCHABLE_BSS_SECTIONS +#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS true + +/* Define the strings used for the special svr4 .type and .size directives. + These strings generally do not vary from one system running svr4 to + another, but if a given system (e.g. m88k running svr) needs to use + different pseudo-op names for these, they may be overridden in the + file which includes this one. */ + +#define TYPE_ASM_OP "\t.type\t" +#define SIZE_ASM_OP "\t.size\t" + +/* This is how we tell the assembler that a symbol is weak. */ + +#define ASM_WEAKEN_LABEL(FILE, NAME) \ + do \ + { \ + fputs ("\t.weak\t", (FILE)); \ + assemble_name ((FILE), (NAME)); \ + fputc ('\n', (FILE)); \ + } \ + while (0) + +/* The following macro defines the format used to output the second + operand of the .type assembler directive. Different svr4 assemblers + expect various different forms for this operand. The one given here + is just a default. You may need to override it in your machine- + specific tm.h file (depending upon the particulars of your assembler). */ + +#define TYPE_OPERAND_FMT "@%s" + +/* Write the extra assembler code needed to declare a function's result. + Most svr4 assemblers don't require any special declaration of the + result value, but there are exceptions. */ + +#ifndef ASM_DECLARE_RESULT +#define ASM_DECLARE_RESULT(FILE, RESULT) +#endif + +/* These macros generate the special .type and .size directives which + are used to set the corresponding fields of the linker symbol table + entries in an ELF object file under SVR4. These macros also output + the starting labels for the relevant functions/objects. */ + +/* Write the extra assembler code needed to declare a function properly. + Some svr4 assemblers need to also have something extra said about the + function's return value. We allow for that here. */ + +#ifndef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL); \ + } \ + while (0) +#endif + +/* Write the extra assembler code needed to declare an object properly. */ + +#ifdef HAVE_GAS_GNU_UNIQUE_OBJECT +#define USE_GNU_UNIQUE_OBJECT 1 +#else +#define USE_GNU_UNIQUE_OBJECT 0 +#endif + +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + do \ + { \ + HOST_WIDE_INT size; \ + \ + /* For template static data member instantiations or \ + inline fn local statics and their guard variables, use \ + gnu_unique_object so that they will be combined even under \ + RTLD_LOCAL. Don't use gnu_unique_object for typeinfo, \ + vtables and other read-only artificial decls. */ \ + if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (DECL) \ + && (!DECL_ARTIFICIAL (DECL) || !TREE_READONLY (DECL))) \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "gnu_unique_object"); \ + else \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive \ + && (DECL) && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ + } \ + \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } \ + while (0) + +/* Output the size directive for a decl in rest_of_decl_compilation + in the case where we did not do so before the initializer. + Once we find the error_mark_node, we know that the value of + size_directive_output was set + by ASM_DECLARE_OBJECT_NAME when it was run for the same decl. */ + +#undef ASM_FINISH_DECLARE_OBJECT +#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)\ + do \ + { \ + const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + HOST_WIDE_INT size; \ + \ + if (!flag_inhibit_size_directive \ + && DECL_SIZE (DECL) \ + && ! AT_END && TOP_LEVEL \ + && DECL_INITIAL (DECL) == error_mark_node \ + && !size_directive_output) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ + } \ + } \ + while (0) + +/* This is how to declare the size of a function. */ +#ifndef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ + } \ + while (0) +#endif + +/* A table of bytes codes used by the ASM_OUTPUT_ASCII and + ASM_OUTPUT_LIMITED_STRING macros. Each byte in the table + corresponds to a particular byte value [0..255]. For any + given byte value, if the value in the corresponding table + position is zero, the given character can be output directly. + If the table value is 1, the byte must be output as a \ooo + octal escape. If the tables value is anything else, then the + byte value should be output as a \ followed by the value + in the table. Note that we can use standard UN*X escape + sequences for many control characters, but we don't use + \a to represent BEL because some svr4 assemblers (e.g. on + the i386) don't know about that. Also, we don't use \v + since some versions of gas, such as 2.2 did not accept it. */ + +#define ESCAPES \ +"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" + +/* Some svr4 assemblers have a limit on the number of characters which + can appear in the operand of a .string directive. If your assembler + has such a limitation, you should define STRING_LIMIT to reflect that + limit. Note that at least some svr4 assemblers have a limit on the + actual number of bytes in the double-quoted string, and that they + count each character in an escape sequence as one byte. Thus, an + escape sequence like \377 would count as four bytes. + + If your target assembler doesn't support the .string directive, you + should define this to zero. +*/ + +#define STRING_LIMIT ((unsigned) 256) + +#define STRING_ASM_OP "\t.string\t" + +/* The routine used to output NUL terminated strings. We use a special + version of this for most svr4 targets because doing so makes the + generated assembly code more compact (and thus faster to assemble) + as well as more readable, especially for targets like the i386 + (where the only alternative is to output character sequences as + comma separated lists of numbers). */ + +#define ASM_OUTPUT_LIMITED_STRING(FILE, STR) \ + do \ + { \ + register const unsigned char *_limited_str = \ + (const unsigned char *) (STR); \ + register unsigned ch; \ + \ + fprintf ((FILE), "%s\"", STRING_ASM_OP); \ + \ + for (; (ch = *_limited_str); _limited_str++) \ + { \ + register int escape; \ + \ + switch (escape = ESCAPES[ch]) \ + { \ + case 0: \ + putc (ch, (FILE)); \ + break; \ + case 1: \ + fprintf ((FILE), "\\%03o", ch); \ + break; \ + default: \ + putc ('\\', (FILE)); \ + putc (escape, (FILE)); \ + break; \ + } \ + } \ + \ + fprintf ((FILE), "\"\n"); \ + } \ + while (0) + +/* The routine used to output sequences of byte values. We use a special + version of this for most svr4 targets because doing so makes the + generated assembly code more compact (and thus faster to assemble) + as well as more readable. Note that if we find subparts of the + character sequence which end with NUL (and which are shorter than + STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING. */ + +#undef ASM_OUTPUT_ASCII +#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ + do \ + { \ + const unsigned char *_ascii_bytes = \ + (const unsigned char *) (STR); \ + const unsigned char *limit = _ascii_bytes + (LENGTH); \ + const unsigned char *last_null = NULL; \ + unsigned bytes_in_chunk = 0; \ + \ + for (; _ascii_bytes < limit; _ascii_bytes++) \ + { \ + const unsigned char *p; \ + \ + if (bytes_in_chunk >= 60) \ + { \ + fprintf ((FILE), "\"\n"); \ + bytes_in_chunk = 0; \ + } \ + \ + if (_ascii_bytes > last_null) \ + { \ + for (p = _ascii_bytes; p < limit && *p != '\0'; p++) \ + continue; \ + last_null = p; \ + } \ + else \ + p = last_null; \ + \ + if (p < limit && (p - _ascii_bytes) <= (long)STRING_LIMIT) \ + { \ + if (bytes_in_chunk > 0) \ + { \ + fprintf ((FILE), "\"\n"); \ + bytes_in_chunk = 0; \ + } \ + \ + ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes); \ + _ascii_bytes = p; \ + } \ + else \ + { \ + register int escape; \ + register unsigned ch; \ + \ + if (bytes_in_chunk == 0) \ + fprintf ((FILE), "%s\"", ASCII_DATA_ASM_OP); \ + \ + switch (escape = ESCAPES[ch = *_ascii_bytes]) \ + { \ + case 0: \ + putc (ch, (FILE)); \ + bytes_in_chunk++; \ + break; \ + case 1: \ + fprintf ((FILE), "\\%03o", ch); \ + bytes_in_chunk += 4; \ + break; \ + default: \ + putc ('\\', (FILE)); \ + putc (escape, (FILE)); \ + bytes_in_chunk += 2; \ + break; \ + } \ + } \ + } \ + \ + if (bytes_in_chunk > 0) \ + fprintf ((FILE), "\"\n"); \ + } \ + while (0) + +/* Allow the use of the -frecord-gcc-switches switch via the + elf_record_gcc_switches function defined in varasm.c. */ +#undef TARGET_ASM_RECORD_GCC_SWITCHES +#define TARGET_ASM_RECORD_GCC_SWITCHES elf_record_gcc_switches + +/* A C statement (sans semicolon) to output to the stdio stream STREAM + any text necessary for declaring the name of an external symbol + named NAME which is referenced in this compilation but not defined. + It is needed to properly support non-default visibility. */ + +#ifndef ASM_OUTPUT_EXTERNAL +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + default_elf_asm_output_external (FILE, DECL, NAME) +#endif diff --git a/gcc/config/fixed-bit.c b/gcc/config/fixed-bit.c new file mode 100644 index 000000000..40ac2e29f --- /dev/null +++ b/gcc/config/fixed-bit.c @@ -0,0 +1,1216 @@ +/* This is a software fixed-point library. + Copyright (C) 2007, 2009 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This implements fixed-point arithmetic. + + Contributed by Chao-ying Fu . */ + +/* To use this file, we need to define one of the following: + QQ_MODE, UQQ_MODE, HQ_MODE, UHQ_MODE, SQ_MODE, USQ_MODE, DQ_MODE, UDQ_MODE, + TQ_MODE, UTQ_MODE, HA_MODE, UHA_MODE, SA_MODE, USA_MODE, DA_MODE, UDA_MODE, + TA_MODE, UTA_MODE. + Then, all operators for this machine mode will be created. + + Or, we need to define FROM_* TO_* for conversions from one mode to another + mode. The mode could be one of the following: + Fract: QQ, UQQ, HQ, UHQ, SQ, USQ, DQ, UDQ, TQ, UTQ + Accum: HA, UHA, SA, USA, DA, UDA, TA, UTA + Signed integer: QI, HI, SI, DI, TI + Unsigned integer: UQI, UHI, USI, UDI, UTI + Floating-point: SF, DF + Ex: If we define FROM_QQ and TO_SI, the conversion from QQ to SI is + generated. */ + +#include "tconfig.h" +#include "tsystem.h" +#include "coretypes.h" +#include "tm.h" + +#ifndef MIN_UNITS_PER_WORD +#define MIN_UNITS_PER_WORD UNITS_PER_WORD +#endif + +#include "config/fixed-bit.h" + +#if defined(FIXED_ADD) && defined(L_add) +FIXED_C_TYPE +FIXED_ADD (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + z = x + y; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_ADD */ + +#if defined(FIXED_SSADD) && defined(L_ssadd) +FIXED_C_TYPE +FIXED_SSADD (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + z = x + y; + if ((((x ^ y) >> I_F_BITS) & 1) == 0) + { + if (((z ^ x) >> I_F_BITS) & 1) + { + z = 1; + z = z << I_F_BITS; + if (x >= 0) + z--; + } + } +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_SSADD */ + +#if defined(FIXED_USADD) && defined(L_usadd) +FIXED_C_TYPE +FIXED_USADD (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + z = x + y; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + if (z < x || z < y) /* max */ + { + z = -1; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + } + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_USADD */ + +#if defined(FIXED_SUB) && defined(L_sub) +FIXED_C_TYPE +FIXED_SUB (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + z = x - y; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_SUB */ + +#if defined(FIXED_SSSUB) && defined(L_sssub) +FIXED_C_TYPE +FIXED_SSSUB (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + z = x - y; + if (((x ^ y) >> I_F_BITS) & 1) + { + if (((z ^ x) >> I_F_BITS) & 1) + { + z = 1; + z = z << I_F_BITS; + if (x >= 0) + z--; + } + } +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_SSSUB */ + +#if defined(FIXED_USSUB) && defined(L_ussub) +FIXED_C_TYPE +FIXED_USSUB (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + z = x - y; + if (x < y) + z = 0; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_USSUB */ + +#if defined(FIXED_SATURATE1) && defined(L_saturate1) +void +FIXED_SATURATE1 (DINT_C_TYPE *a) +{ + DINT_C_TYPE max, min; + max = (DINT_C_TYPE)1 << I_F_BITS; + max = max - 1; +#if MODE_UNSIGNED == 0 + min = (DINT_C_TYPE)1 << (2 * FIXED_WIDTH - 1); + min = min >> (2 * FIXED_WIDTH - 1 - I_F_BITS); +#else + min = 0; +#endif + if (*a > max) + *a = max; + else if (*a < min) + *a = min; +} +#endif /* FIXED_SATURATE1 */ + +#if defined(FIXED_SATURATE2) && defined(L_saturate2) +void +FIXED_SATURATE2 (INT_C_TYPE *high, INT_C_TYPE *low) +{ + INT_C_TYPE r_max, s_max, r_min, s_min; + r_max = 0; +#if (MODE_UNSIGNED == 0) || HAVE_PADDING_BITS + s_max = (INT_C_TYPE)1 << I_F_BITS; + s_max = s_max - 1; +#else + s_max = -1; +#endif +#if MODE_UNSIGNED == 0 + r_min = -1; + s_min = (INT_C_TYPE)1 << (FIXED_WIDTH - 1); + s_min = s_min >> (FIXED_WIDTH - 1 - I_F_BITS); +#else + r_min = 0; + s_min = 0; +#endif + + if (*high > r_max + || (*high == r_max && (UINT_C_TYPE)(*low) > (UINT_C_TYPE)s_max)) + { + *high = r_max; + *low = s_max; + } + else if (*high < r_min || + (*high == r_min && (UINT_C_TYPE)(*low) < (UINT_C_TYPE)s_min)) + { + *high = r_min; + *low = s_min; + } +} +#endif /* FIXED_SATURATE2 */ + +#if defined(FIXED_MULHELPER) && defined(L_mulhelper) +FIXED_C_TYPE +FIXED_MULHELPER (FIXED_C_TYPE a, FIXED_C_TYPE b, word_type satp) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y; + +#if defined (DINT_C_TYPE) + INT_C_TYPE z; + DINT_C_TYPE dx, dy, dz; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + dx = (DINT_C_TYPE) x; + dy = (DINT_C_TYPE) y; + dz = dx * dy; + /* Round the result by adding (1 << (FBITS -1)). */ + dz += ((DINT_C_TYPE) 1 << (FBITS - 1)); + dz = dz >> FBITS; + if (satp) + FIXED_SATURATE1 (&dz); + + z = (INT_C_TYPE) dz; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; + +#else /* No DINT_C_TYPE */ + /* The result of multiplication expands to two INT_C_TYPE. */ + INTunion aa, bb; + INTunion a_high, a_low, b_high, b_low; + INTunion high_high, high_low, low_high, low_low; + INTunion r, s, temp1, temp2; + INT_C_TYPE carry = 0; + INT_C_TYPE z; + + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + + /* Decompose a and b. */ + aa.ll = x; + bb.ll = y; + + a_high.s.low = aa.s.high; + a_high.s.high = 0; + a_low.s.low = aa.s.low; + a_low.s.high = 0; + b_high.s.low = bb.s.high; + b_high.s.high = 0; + b_low.s.low = bb.s.low; + b_low.s.high = 0; + + /* Perform four multiplications. */ + low_low.ll = a_low.ll * b_low.ll; + low_high.ll = a_low.ll * b_high.ll; + high_low.ll = a_high.ll * b_low.ll; + high_high.ll = a_high.ll * b_high.ll; + + /* Accumulate four results to {r, s}. */ + temp1.s.high = high_low.s.low; + temp1.s.low = 0; + s.ll = low_low.ll + temp1.ll; + if ((UINT_C_TYPE) s.ll < (UINT_C_TYPE) low_low.ll + || (UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp1.ll) + carry ++; /* Carry. */ + temp1.ll = s.ll; + temp2.s.high = low_high.s.low; + temp2.s.low = 0; + s.ll = temp1.ll + temp2.ll; + if ((UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp1.ll + || (UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp2.ll) + carry ++; /* Carry. */ + + temp1.s.low = high_low.s.high; + temp1.s.high = 0; + r.ll = high_high.ll + temp1.ll; + temp1.s.low = low_high.s.high; + temp1.s.high = 0; + r.ll = r.ll + temp1.ll + carry; + +#if MODE_UNSIGNED == 0 + /* For signed types, we need to add neg(y) to r, if x < 0. */ + if (x < 0) + r.ll = r.ll - y; + /* We need to add neg(x) to r, if y < 0. */ + if (y < 0) + r.ll = r.ll - x; +#endif + + /* Round the result by adding (1 << (FBITS -1)). */ + temp1.ll = s.ll; + s.ll += ((INT_C_TYPE) 1 << (FBITS -1)); + if ((UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp1.ll + || (UINT_C_TYPE) s.ll < (UINT_C_TYPE) ((INT_C_TYPE) 1 << (FBITS -1))) + r.ll += 1; + + /* Shift right the result by FBITS. */ +#if FBITS == FIXED_WIDTH + /* This happens only for unsigned types without any padding bits. + So, it is safe to set r.ll to 0 as it is logically shifted right. */ + s.ll = r.ll; + r.ll = 0; +#else + s.ll = ((UINT_C_TYPE)s.ll) >> FBITS; + temp1.ll = r.ll << (FIXED_WIDTH - FBITS); + s.ll = s.ll | temp1.ll; + r.ll = r.ll >> FBITS; +#endif + + if (satp) + FIXED_SATURATE2 (&r.ll, &s.ll); + + z = (INT_C_TYPE) s.ll; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +#endif +} +#endif /* FIXED_MULHELPER */ + +#if defined(FIXED_MUL) && defined(L_mul) +FIXED_C_TYPE +FIXED_MUL (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_MULHELPER (a, b, 0); +} +#endif /* FIXED_MUL */ + +#if defined(FIXED_SSMUL) && defined(L_ssmul) +FIXED_C_TYPE +FIXED_SSMUL (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_MULHELPER (a, b, 1); +} +#endif /* FIXED_SSMUL */ + +#if defined(FIXED_USMUL) && defined(L_usmul) +FIXED_C_TYPE +FIXED_USMUL (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_MULHELPER (a, b, 1); +} +#endif /* FIXED_USMUL */ + +#if defined(FIXED_DIVHELPER) && defined(L_divhelper) +FIXED_C_TYPE +FIXED_DIVHELPER (FIXED_C_TYPE a, FIXED_C_TYPE b, word_type satp) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y; + INT_C_TYPE z; + +#if defined (DINT_C_TYPE) + DINT_C_TYPE dx, dy, dz; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + dx = (DINT_C_TYPE) x; + dy = (DINT_C_TYPE) y; + dx = dx << FBITS; + dz = dx / dy; + if (satp) + FIXED_SATURATE1 (&dz); + z = (INT_C_TYPE) dz; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; + +#else /* No DINT_C_TYPE */ + INT_C_TYPE pos_a, pos_b, r, s; + INT_C_TYPE quo_r, quo_s, mod, temp; + word_type i; +#if MODE_UNSIGNED == 0 + word_type num_of_neg = 0; +#endif + + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + pos_a = x; + pos_b = y; + +#if MODE_UNSIGNED == 0 + /* If a < 0, negate a. */ + if (pos_a < 0) + { + pos_a = -pos_a; + num_of_neg ++; + } + /* If b < 0, negate b. */ + if (pos_b < 0) + { + pos_b = -pos_b; + num_of_neg ++; + } +#endif + + /* Left shift pos_a to {r, s} by FBITS. */ +#if FBITS == FIXED_WIDTH + /* This happens only for unsigned types without any padding bits. */ + r = pos_a; + s = 0; +#else + s = pos_a << FBITS; + r = pos_a >> (FIXED_WIDTH - FBITS); +#endif + + /* Unsigned divide r by pos_b to quo_r. The remainder is in mod. */ + quo_r = (UINT_C_TYPE)r / (UINT_C_TYPE)pos_b; + mod = (UINT_C_TYPE)r % (UINT_C_TYPE)pos_b; + quo_s = 0; + + for (i = 0; i < FIXED_WIDTH; i++) + { + /* Record the leftmost bit of mod. */ + word_type leftmost_mode = (mod >> (FIXED_WIDTH - 1)) & 1; + /* Shift left mod by 1 bit. */ + mod = mod << 1; + /* Test the leftmost bit of s to add to mod. */ + if ((s >> (FIXED_WIDTH - 1)) & 1) + mod ++; + /* Shift left quo_s by 1 bit. */ + quo_s = quo_s << 1; + /* Try to calculate (mod - pos_b). */ + temp = mod - pos_b; + if (leftmost_mode || (UINT_C_TYPE)mod >= (UINT_C_TYPE)pos_b) + { + quo_s ++; + mod = temp; + } + /* Shift left s by 1 bit. */ + s = s << 1; + } + +#if MODE_UNSIGNED == 0 + if (num_of_neg == 1) + { + quo_s = -quo_s; + if (quo_s == 0) + quo_r = -quo_r; + else + quo_r = ~quo_r; + } +#endif + if (satp) + FIXED_SATURATE2 (&quo_r, &quo_s); + z = quo_s; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +#endif +} +#endif /* FIXED_DIVHELPER */ + +#if defined(FIXED_DIV) && defined(L_div) +FIXED_C_TYPE +FIXED_DIV (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_DIVHELPER (a, b, 0); +} +#endif /* FIXED_DIV */ + + +#if defined(FIXED_UDIV) && defined(L_udiv) +FIXED_C_TYPE +FIXED_UDIV (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_DIVHELPER (a, b, 0); +} +#endif /* FIXED_UDIV */ + +#if defined(FIXED_SSDIV) && defined(L_ssdiv) +FIXED_C_TYPE +FIXED_SSDIV (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_DIVHELPER (a, b, 1); +} +#endif /* FIXED_SSDIV */ + +#if defined(FIXED_USDIV) && defined(L_usdiv) +FIXED_C_TYPE +FIXED_USDIV (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + return FIXED_DIVHELPER (a, b, 1); +} +#endif /* FIXED_USDIV */ + +#if defined(FIXED_NEG) && defined(L_neg) +FIXED_C_TYPE +FIXED_NEG (FIXED_C_TYPE a) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, z; + memcpy (&x, &a, FIXED_SIZE); + z = -x; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_NEG */ + +#if defined(FIXED_SSNEG) && defined(L_ssneg) +FIXED_C_TYPE +FIXED_SSNEG (FIXED_C_TYPE a) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, y, z; + memcpy (&y, &a, FIXED_SIZE); + x = 0; + z = x - y; + if (((x ^ y) >> I_F_BITS) & 1) + { + if (((z ^ x) >> I_F_BITS) & 1) + { + z = 1; + z = z << I_F_BITS; + if (x >= 0) + z--; + } + } +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_SSNEG */ + +#if defined(FIXED_USNEG) && defined(L_usneg) +FIXED_C_TYPE +FIXED_USNEG (FIXED_C_TYPE a __attribute__ ((__unused__))) +{ + FIXED_C_TYPE c; + INT_C_TYPE z; + z = 0; + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_USNEG */ + +#if defined(FIXED_ASHLHELPER) && defined(L_ashlhelper) +FIXED_C_TYPE +FIXED_ASHLHELPER (FIXED_C_TYPE a, word_type b, word_type satp) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, z; + +#if defined (DINT_C_TYPE) + DINT_C_TYPE dx, dz; + memcpy (&x, &a, FIXED_SIZE); + dx = (DINT_C_TYPE) x; + if (b >= FIXED_WIDTH) + dz = dx << FIXED_WIDTH; + else + dz = dx << b; + if (satp) + FIXED_SATURATE1 (&dz); + z = (INT_C_TYPE) dz; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; + +#else /* No DINT_C_TYPE */ + INT_C_TYPE r, s; + memcpy (&x, &a, FIXED_SIZE); + /* We need to shift left x by b bits to {r, s}. */ + if (b >= FIXED_WIDTH) + { + r = b; + s = 0; + } + else + { + s = x << b; + r = x >> (FIXED_WIDTH - b); + } + if (satp) + FIXED_SATURATE2 (&r, &s); + z = s; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +#endif +} +#endif /* FIXED_ASHLHELPER */ + +#if defined(FIXED_ASHL) && defined(L_ashl) +FIXED_C_TYPE +FIXED_ASHL (FIXED_C_TYPE a, word_type b) +{ + return FIXED_ASHLHELPER (a, b, 0); +} +#endif /* FIXED_ASHL */ + +#if defined(FIXED_ASHR) && defined(L_ashr) +FIXED_C_TYPE +FIXED_ASHR (FIXED_C_TYPE a, word_type b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, z; + memcpy (&x, &a, FIXED_SIZE); + z = x >> b; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_ASHR */ + +#if defined(FIXED_LSHR) && defined(L_lshr) +FIXED_C_TYPE +FIXED_LSHR (FIXED_C_TYPE a, word_type b) +{ + FIXED_C_TYPE c; + INT_C_TYPE x, z; + memcpy (&x, &a, FIXED_SIZE); + z = x >> b; +#if HAVE_PADDING_BITS + z = z << PADDING_BITS; + z = z >> PADDING_BITS; +#endif + memcpy (&c, &z, FIXED_SIZE); + return c; +} +#endif /* FIXED_LSHR */ + +#if defined(FIXED_SSASHL) && defined(L_ssashl) +FIXED_C_TYPE +FIXED_SSASHL (FIXED_C_TYPE a, word_type b) +{ + return FIXED_ASHLHELPER (a, b, 1); +} +#endif /* FIXED_SSASHL */ + +#if defined(FIXED_USASHL) && defined(L_usashl) +FIXED_C_TYPE +FIXED_USASHL (FIXED_C_TYPE a, word_type b) +{ + return FIXED_ASHLHELPER (a, b, 1); +} +#endif /* FIXED_USASHL */ + +#if defined(FIXED_CMP) && defined(L_cmp) +word_type +FIXED_CMP (FIXED_C_TYPE a, FIXED_C_TYPE b) +{ + INT_C_TYPE x, y; + memcpy (&x, &a, FIXED_SIZE); + memcpy (&y, &b, FIXED_SIZE); + + if (x < y) + return 0; + else if (x > y) + return 2; + + return 1; +} +#endif /* FIXED_CMP */ + +/* Fixed -> Fixed. */ +#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 4 && TO_TYPE == 4 +TO_FIXED_C_TYPE +FRACT (FROM_FIXED_C_TYPE a) +{ + TO_FIXED_C_TYPE c; + FROM_INT_C_TYPE x; + TO_INT_C_TYPE z; + int shift_amount; + memcpy (&x, &a, FROM_FIXED_SIZE); +#if TO_FBITS > FROM_FBITS /* Need left shift. */ + shift_amount = TO_FBITS - FROM_FBITS; + z = (TO_INT_C_TYPE) x; + z = z << shift_amount; +#else /* TO_FBITS <= FROM_FBITS. Need right Shift. */ + shift_amount = FROM_FBITS - TO_FBITS; + x = x >> shift_amount; + z = (TO_INT_C_TYPE) x; +#endif /* TO_FBITS > FROM_FBITS */ + +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* FRACT && FROM_TYPE == 4 && TO_TYPE == 4 */ + +/* Fixed -> Fixed with saturation. */ +#if defined(SATFRACT) && defined(L_satfract) && FROM_TYPE == 4 && TO_TYPE == 4 +TO_FIXED_C_TYPE +SATFRACT (FROM_FIXED_C_TYPE a) +{ + TO_FIXED_C_TYPE c; + TO_INT_C_TYPE z; + FROM_INT_C_TYPE x; +#if FROM_MODE_UNSIGNED == 0 + BIG_SINT_C_TYPE high, low; + BIG_SINT_C_TYPE max_high, max_low; + BIG_SINT_C_TYPE min_high, min_low; +#else + BIG_UINT_C_TYPE high, low; + BIG_UINT_C_TYPE max_high, max_low; + BIG_UINT_C_TYPE min_high, min_low; +#endif +#if TO_FBITS > FROM_FBITS + BIG_UINT_C_TYPE utemp; +#endif +#if TO_MODE_UNSIGNED == 0 + BIG_SINT_C_TYPE stemp; +#endif +#if TO_FBITS != FROM_FBITS + int shift_amount; +#endif + memcpy (&x, &a, FROM_FIXED_SIZE); + + /* Step 1. We need to store x to {high, low}. */ +#if FROM_MODE_UNSIGNED == 0 + low = (BIG_SINT_C_TYPE) x; + if (x < 0) + high = -1; + else + high = 0; +#else + low = (BIG_UINT_C_TYPE) x; + high = 0; +#endif + + /* Step 2. We need to shift {high, low}. */ +#if TO_FBITS > FROM_FBITS /* Left shift. */ + shift_amount = TO_FBITS - FROM_FBITS; + utemp = (BIG_UINT_C_TYPE) low; + utemp = utemp >> (BIG_WIDTH - shift_amount); + high = ((BIG_UINT_C_TYPE)(high << shift_amount)) | utemp; + low = low << shift_amount; +#elif TO_FBITS < FROM_FBITS /* Right shift. */ + shift_amount = FROM_FBITS - TO_FBITS; + low = low >> shift_amount; +#endif + + /* Step 3. Compare {high, low} with max and min of TO_FIXED_C_TYPE. */ + max_high = 0; +#if BIG_WIDTH > TO_FIXED_WIDTH || TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS + max_low = (BIG_UINT_C_TYPE)1 << TO_I_F_BITS; + max_low = max_low - 1; +#else + max_low = -1; +#endif + +#if TO_MODE_UNSIGNED == 0 + min_high = -1; + stemp = (BIG_SINT_C_TYPE)1 << (BIG_WIDTH - 1); + stemp = stemp >> (BIG_WIDTH - 1 - TO_I_F_BITS); + min_low = stemp; +#else + min_high = 0; + min_low = 0; +#endif + +#if FROM_MODE_UNSIGNED == 0 && TO_MODE_UNSIGNED == 0 + /* Signed -> Signed. */ + if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high + || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ + else if ((BIG_SINT_C_TYPE) high < (BIG_SINT_C_TYPE) min_high + || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) min_high + && (BIG_UINT_C_TYPE) low < (BIG_UINT_C_TYPE) min_low)) + low = min_low; /* Minimum. */ +#elif FROM_MODE_UNSIGNED == 1 && TO_MODE_UNSIGNED == 1 + /* Unigned -> Unsigned. */ + if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high + || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ +#elif FROM_MODE_UNSIGNED == 0 && TO_MODE_UNSIGNED == 1 + /* Signed -> Unsigned. */ + if (x < 0) + low = 0; /* Minimum. */ + else if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high + || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ +#elif FROM_MODE_UNSIGNED == 1 && TO_MODE_UNSIGNED == 0 + /* Unsigned -> Signed. */ + if ((BIG_SINT_C_TYPE) high < 0) + low = max_low; /* Maximum. */ + else if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high + || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ +#endif + + /* Step 4. Store the result. */ + z = (TO_INT_C_TYPE) low; +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(SATFRACT) && FROM_TYPE == 4 && TO_TYPE == 4 */ + +/* Fixed -> Int. */ +#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 4 && TO_TYPE == 1 +TO_INT_C_TYPE +FRACT (FROM_FIXED_C_TYPE a) +{ + FROM_INT_C_TYPE x; + TO_INT_C_TYPE z; + FROM_INT_C_TYPE i = 0; + memcpy (&x, &a, FROM_FIXED_SIZE); + +#if FROM_MODE_UNSIGNED == 0 + if (x < 0) + { +#if FROM_FIXED_WIDTH == FROM_FBITS + if (x != 0) + i = 1; +#else + if (((FROM_INT_C_TYPE)(x << (FROM_FIXED_WIDTH - FROM_FBITS))) != 0) + i = 1; +#endif + } +#endif + +#if FROM_FIXED_WIDTH == FROM_FBITS + x = 0; +#else + x = x >> FROM_FBITS; +#endif + x = x + i; + z = (TO_INT_C_TYPE) x; + return z; +} +#endif /* defined(FRACT) && FROM_TYPE == 4 && TO_TYPE == 1 */ + +/* Fixed -> Unsigned int. */ +#if defined(FRACTUNS) && defined(L_fractuns) && FROM_TYPE == 4 && TO_TYPE == 2 +TO_INT_C_TYPE +FRACTUNS (FROM_FIXED_C_TYPE a) +{ + FROM_INT_C_TYPE x; + TO_INT_C_TYPE z; + FROM_INT_C_TYPE i = 0; + memcpy (&x, &a, FROM_FIXED_SIZE); + +#if FROM_MODE_UNSIGNED == 0 + if (x < 0) + { +#if FROM_FIXED_WIDTH == FROM_FBITS + if (x != 0) + i = 1; +#else + if (((FROM_INT_C_TYPE)(x << (FROM_FIXED_WIDTH - FROM_FBITS))) != 0) + i = 1; +#endif + } +#endif + +#if FROM_FIXED_WIDTH == FROM_FBITS + x = 0; +#else + x = x >> FROM_FBITS; +#endif + x = x + i; + z = (TO_INT_C_TYPE) x; + return z; +} +#endif /* defined(FRACTUNS) && FROM_TYPE == 4 && TO_TYPE == 2 */ + +/* Int -> Fixed. */ +#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 1 && TO_TYPE == 4 +TO_FIXED_C_TYPE +FRACT (FROM_INT_C_TYPE a) +{ + TO_FIXED_C_TYPE c; + TO_INT_C_TYPE z; + z = (TO_INT_C_TYPE) a; +#if TO_FIXED_WIDTH == TO_FBITS + z = 0; +#else + z = z << TO_FBITS; +#endif +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(FRACT) && FROM_TYPE == 1 && TO_TYPE == 4 */ + +/* Signed int -> Fixed with saturation. */ +#if defined(SATFRACT) && defined(L_satfract) &&FROM_TYPE == 1 && TO_TYPE == 4 +TO_FIXED_C_TYPE +SATFRACT (FROM_INT_C_TYPE a) +{ + TO_FIXED_C_TYPE c; + TO_INT_C_TYPE z; + FROM_INT_C_TYPE x = a; + BIG_SINT_C_TYPE high, low; + BIG_SINT_C_TYPE max_high, max_low; + BIG_SINT_C_TYPE min_high, min_low; +#if TO_MODE_UNSIGNED == 0 + BIG_SINT_C_TYPE stemp; +#endif +#if BIG_WIDTH != TO_FBITS + BIG_UINT_C_TYPE utemp; + int shift_amount; +#endif + + /* Step 1. We need to store x to {high, low}. */ + low = (BIG_SINT_C_TYPE) x; + if (x < 0) + high = -1; + else + high = 0; + + /* Step 2. We need to left shift {high, low}. */ +#if BIG_WIDTH == TO_FBITS + high = low; + low = 0; +#else + shift_amount = TO_FBITS; + utemp = (BIG_UINT_C_TYPE) low; + utemp = utemp >> (BIG_WIDTH - shift_amount); + high = ((BIG_UINT_C_TYPE)(high << shift_amount)) | utemp; + low = low << shift_amount; +#endif + + /* Step 3. Compare {high, low} with max and min of TO_FIXED_C_TYPE. */ + max_high = 0; +#if BIG_WIDTH > TO_FIXED_WIDTH || TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS + max_low = (BIG_UINT_C_TYPE)1 << TO_I_F_BITS; + max_low = max_low - 1; +#else + max_low = -1; +#endif + +#if TO_MODE_UNSIGNED == 0 + min_high = -1; + stemp = (BIG_SINT_C_TYPE)1 << (BIG_WIDTH - 1); + stemp = stemp >> (BIG_WIDTH - 1 - TO_I_F_BITS); + min_low = stemp; +#else + min_high = 0; + min_low = 0; +#endif + +#if TO_MODE_UNSIGNED == 0 + /* Signed -> Signed. */ + if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high + || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ + else if ((BIG_SINT_C_TYPE) high < (BIG_SINT_C_TYPE) min_high + || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) min_high + && (BIG_UINT_C_TYPE) low < (BIG_UINT_C_TYPE) min_low)) + low = min_low; /* Minimum. */ +#else + /* Signed -> Unsigned. */ + if (x < 0) + low = 0; /* Minimum. */ + else if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high + || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ +#endif + + /* Step 4. Store the result. */ + z = (TO_INT_C_TYPE) low; +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(SATFRACT) && FROM_TYPE == 1 && TO_TYPE == 4 */ + +/* Unsigned int -> Fixed. */ +#if defined(FRACTUNS) && defined(L_fractuns) &&FROM_TYPE == 2 && TO_TYPE == 4 +TO_FIXED_C_TYPE +FRACTUNS (FROM_INT_C_TYPE a) +{ + TO_FIXED_C_TYPE c; + TO_INT_C_TYPE z; + z = (TO_INT_C_TYPE) a; +#if TO_FIXED_WIDTH == TO_FBITS + z = 0; +#else + z = z << TO_FBITS; +#endif +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(FRACTUNS) && FROM_TYPE == 2 && TO_TYPE == 4 */ + +/* Unsigned int -> Fixed with saturation. */ +#if defined(SATFRACTUNS) && defined(L_satfractuns) && FROM_TYPE == 2 && TO_TYPE == 4 +TO_FIXED_C_TYPE +SATFRACTUNS (FROM_INT_C_TYPE a) +{ + TO_FIXED_C_TYPE c; + TO_INT_C_TYPE z; + FROM_INT_C_TYPE x = a; + BIG_UINT_C_TYPE high, low; + BIG_UINT_C_TYPE max_high, max_low; +#if BIG_WIDTH != TO_FBITS + BIG_UINT_C_TYPE utemp; + int shift_amount; +#endif + + /* Step 1. We need to store x to {high, low}. */ + low = (BIG_UINT_C_TYPE) x; + high = 0; + + /* Step 2. We need to left shift {high, low}. */ +#if BIG_WIDTH == TO_FBITS + high = low; + low = 0; +#else + shift_amount = TO_FBITS; + utemp = (BIG_UINT_C_TYPE) low; + utemp = utemp >> (BIG_WIDTH - shift_amount); + high = ((BIG_UINT_C_TYPE)(high << shift_amount)) | utemp; + low = low << shift_amount; +#endif + + /* Step 3. Compare {high, low} with max and min of TO_FIXED_C_TYPE. */ + max_high = 0; +#if BIG_WIDTH > TO_FIXED_WIDTH || TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS + max_low = (BIG_UINT_C_TYPE)1 << TO_I_F_BITS; + max_low = max_low - 1; +#else + max_low = -1; +#endif + +#if TO_MODE_UNSIGNED == 1 + /* Unigned -> Unsigned. */ + if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high + || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ +#else + /* Unsigned -> Signed. */ + if ((BIG_SINT_C_TYPE) high < 0) + low = max_low; /* Maximum. */ + else if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high + || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high + && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low)) + low = max_low; /* Maximum. */ +#endif + + /* Step 4. Store the result. */ + z = (TO_INT_C_TYPE) low; +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(SATFRACTUNS) && FROM_TYPE == 2 && TO_TYPE == 4 */ + +/* Fixed -> Float. */ +#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 4 && TO_TYPE == 3 +TO_FLOAT_C_TYPE +FRACT (FROM_FIXED_C_TYPE a) +{ + FROM_INT_C_TYPE x; + TO_FLOAT_C_TYPE z; + memcpy (&x, &a, FROM_FIXED_SIZE); + z = (TO_FLOAT_C_TYPE) x; + z = z / BASE; + return z; +} +#endif /* defined(FRACT) && FROM_TYPE == 4 && TO_TYPE == 3 */ + +/* Float -> Fixed. */ +#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 3 && TO_TYPE == 4 +TO_FIXED_C_TYPE +FRACT (FROM_FLOAT_C_TYPE a) +{ + FROM_FLOAT_C_TYPE temp; + TO_INT_C_TYPE z; + TO_FIXED_C_TYPE c; + + temp = a * BASE; + z = (TO_INT_C_TYPE) temp; +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(FRACT) && FROM_TYPE == 3 && TO_TYPE == 4 */ + +/* Float -> Fixed with saturation. */ +#if defined(SATFRACT) && defined(L_satfract) && FROM_TYPE == 3 && TO_TYPE == 4 +TO_FIXED_C_TYPE +SATFRACT (FROM_FLOAT_C_TYPE a) +{ + FROM_FLOAT_C_TYPE temp; + TO_INT_C_TYPE z; + TO_FIXED_C_TYPE c; + + if (a >= FIXED_MAX) + { +#if TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS + z = (TO_INT_C_TYPE)1 << TO_I_F_BITS; + z = z - 1; +#else + z = -1; +#endif + } + else if (a <= FIXED_MIN) + { +#if TO_MODE_UNSIGNED == 0 + z = (TO_INT_C_TYPE)1 << TO_I_F_BITS; +#else + z = 0; +#endif + } + else + { + temp = a * BASE; + z = (TO_INT_C_TYPE) temp; + } + +#if TO_HAVE_PADDING_BITS + z = z << TO_PADDING_BITS; + z = z >> TO_PADDING_BITS; +#endif + memcpy (&c, &z, TO_FIXED_SIZE); + return c; +} +#endif /* defined(SATFRACT) && FROM_TYPE == 3 && TO_TYPE == 4 */ + diff --git a/gcc/config/fixed-bit.h b/gcc/config/fixed-bit.h new file mode 100644 index 000000000..562772d71 --- /dev/null +++ b/gcc/config/fixed-bit.h @@ -0,0 +1,1273 @@ +/* This is a software fixed-point library. + Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef _FIXED_BIT_H +#define _FIXED_BIT_H + +/* To use this file we need to define one of the following: + QQ_MODE, UQQ_MODE, HQ_MODE, UHQ_MODE, SQ_MODE, USQ_MODE, DQ_MODE, UDQ_MODE, + TQ_MODE, UTQ_MODE, HA_MODE, UHA_MODE, SA_MODE, USA_MODE, DA_MODE, UDA_MODE, + TA_MODE, UTA_MODE. + Then, all operators for this machine mode will be created. + + Or, we need to define FROM_* TO_* for conversions from one mode to another + mode. The mode could be one of the following: + Fract: QQ, UQQ, HQ, UHQ, SQ, USQ, DQ, UDQ, TQ, UTQ + Accum: HA, UHA, SA, USA, DA, UDA, TA, UTA + Signed integer: QI, HI, SI, DI, TI + Unsigned integer: UQI, UHI, USI, UDI, UTI + Floating-point: SF, DF + Ex: If we define FROM_QQ and TO_SI, the conversion from QQ to SI is + generated. */ + +#ifndef LIBGCC2_LONG_DOUBLE_TYPE_SIZE +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE LONG_DOUBLE_TYPE_SIZE +#endif + +#ifndef LIBGCC2_HAS_SF_MODE +#define LIBGCC2_HAS_SF_MODE (BITS_PER_UNIT == 8) +#endif + +#ifndef LIBGCC2_HAS_DF_MODE +#define LIBGCC2_HAS_DF_MODE \ + (BITS_PER_UNIT == 8 \ + && (__SIZEOF_DOUBLE__ * __CHAR_BIT__ == 64 \ + || LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 64)) +#endif + +typedef int QItype __attribute__ ((mode (QI))); +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int HItype __attribute__ ((mode (HI))); +typedef unsigned int UHItype __attribute__ ((mode (HI))); +typedef _Fract QQtype __attribute__ ((mode (QQ))); +typedef unsigned _Fract UQQtype __attribute__ ((mode (UQQ))); +typedef _Fract HQtype __attribute__ ((mode (HQ))); +typedef unsigned _Fract UHQtype __attribute__ ((mode (UHQ))); +typedef _Fract HAtype __attribute__ ((mode (HA))); +typedef unsigned _Fract UHAtype __attribute__ ((mode (UHA))); +#define HAVE_QQ 1 +#define HAVE_UQQ 1 +#define HAVE_HQ 1 +#define HAVE_UHQ 1 +#define HAVE_HA 1 +#define HAVE_UHA 1 +#define HAVE_QI 1 +#define HAVE_UQI 1 +#define HAVE_HI 1 +#define HAVE_UHI 1 +#if MIN_UNITS_PER_WORD > 1 +/* These typedefs are usually forbidden on dsp's with UNITS_PER_WORD 1. */ +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef _Fract SQtype __attribute__ ((mode (SQ))); +typedef unsigned _Fract USQtype __attribute__ ((mode (USQ))); +typedef _Fract SAtype __attribute__ ((mode (SA))); +typedef unsigned _Fract USAtype __attribute__ ((mode (USA))); +#define HAVE_SQ 1 +#define HAVE_USQ 1 +#define HAVE_SA 1 +#define HAVE_USA 1 +#define HAVE_SI 1 +#define HAVE_USI 1 +#if LONG_LONG_TYPE_SIZE > 32 +/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 2. */ +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +typedef _Fract DQtype __attribute__ ((mode (DQ))); +typedef unsigned _Fract UDQtype __attribute__ ((mode (UDQ))); +typedef _Fract DAtype __attribute__ ((mode (DA))); +typedef unsigned _Fract UDAtype __attribute__ ((mode (UDA))); +#define HAVE_DQ 1 +#define HAVE_UDQ 1 +#define HAVE_DA 1 +#define HAVE_UDA 1 +#define HAVE_DI 1 +#define HAVE_UDI 1 +#if MIN_UNITS_PER_WORD > 4 +/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 4. */ +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); +typedef _Fract TQtype __attribute__ ((mode (TQ))); +typedef unsigned _Fract UTQtype __attribute__ ((mode (UTQ))); +typedef _Fract TAtype __attribute__ ((mode (TA))); +typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA))); +#define HAVE_TQ 1 +#define HAVE_UTQ 1 +#define HAVE_TA 1 +#define HAVE_UTA 1 +#define HAVE_TI 1 +#define HAVE_UTI 1 +#endif +#endif +#endif + +#if LIBGCC2_HAS_SF_MODE +typedef float SFtype __attribute__ ((mode (SF))); +#define HAVE_SF 1 +#endif +#if LIBGCC2_HAS_DF_MODE +typedef float DFtype __attribute__ ((mode (DF))); +#define HAVE_DF 1 +#endif + +typedef int word_type __attribute__ ((mode (__word__))); + +/* Based on modes, we create many defines. */ + +#if defined (QQ_MODE) && (HAVE_QQ == 1) +#define FIXED_SIZE 1 /* in bytes. */ +#define INT_C_TYPE QItype +#define UINT_C_TYPE UQItype +#define DINT_C_TYPE HItype +#define DUINT_C_TYPE UHItype +#define MODE_NAME QQ +#define MODE_NAME_S qq +#define MODE_UNSIGNED 0 +#endif + +#if defined (UQQ_MODE) && (HAVE_UQQ == 1) +#define FIXED_SIZE 1 /* in bytes. */ +#define INT_C_TYPE UQItype +#define UINT_C_TYPE UQItype +#define DINT_C_TYPE UHItype +#define DUINT_C_TYPE UHItype +#define MODE_NAME UQQ +#define MODE_NAME_S uqq +#define MODE_UNSIGNED 1 +#endif + +#if defined (HQ_MODE) && (HAVE_HQ == 1) +#define FIXED_SIZE 2 /* in bytes. */ +#define INT_C_TYPE HItype +#define UINT_C_TYPE UHItype + +#if HAVE_SI == 1 +#define DINT_C_TYPE SItype +#define DUINT_C_TYPE USItype +#else +#define HINT_C_TYPE QItype +#define HUINT_C_TYPE UQItype +#endif + +#define MODE_NAME HQ +#define MODE_NAME_S hq +#define MODE_UNSIGNED 0 +#endif + +#if defined (UHQ_MODE) && (HAVE_UHQ == 1) +#define FIXED_SIZE 2 /* in bytes. */ +#define INT_C_TYPE UHItype +#define UINT_C_TYPE UHItype + +#if HAVE_SI == 1 +#define DINT_C_TYPE USItype +#define DUINT_C_TYPE USItype +#else +#define HINT_C_TYPE UQItype +#define HUINT_C_TYPE UQItype +#endif + +#define MODE_NAME UHQ +#define MODE_NAME_S uhq +#define MODE_UNSIGNED 1 +#endif + +#if defined (SQ_MODE) && (HAVE_SQ == 1) +#define FIXED_SIZE 4 /* in bytes. */ +#define INT_C_TYPE SItype +#define UINT_C_TYPE USItype + +#if HAVE_DI == 1 +#define DINT_C_TYPE DItype +#define DUINT_C_TYPE UDItype +#else +#define HINT_C_TYPE HItype +#define HUINT_C_TYPE UHItype +#endif + +#define MODE_NAME SQ +#define MODE_NAME_S sq +#define MODE_UNSIGNED 0 +#endif + +#if defined (USQ_MODE) && (HAVE_USQ == 1) +#define FIXED_SIZE 4 /* in bytes. */ +#define INT_C_TYPE USItype +#define UINT_C_TYPE USItype + +#if HAVE_DI == 1 +#define DINT_C_TYPE UDItype +#define DUINT_C_TYPE UDItype +#else +#define HINT_C_TYPE UHItype +#define HUINT_C_TYPE UHItype +#endif + +#define MODE_NAME USQ +#define MODE_NAME_S usq +#define MODE_UNSIGNED 1 +#endif + +#if defined (DQ_MODE) && (HAVE_DQ == 1) +#define FIXED_SIZE 8 /* in bytes. */ +#define INT_C_TYPE DItype +#define UINT_C_TYPE UDItype + +#if HAVE_TI == 1 +#define DINT_C_TYPE TItype +#define DUINT_C_TYPE UTItype +#else +#define HINT_C_TYPE SItype +#define HUINT_C_TYPE USItype +#endif + +#define MODE_NAME DQ +#define MODE_NAME_S dq +#define MODE_UNSIGNED 0 +#endif + +#if defined (UDQ_MODE) && (HAVE_UDQ == 1) +#define FIXED_SIZE 8 /* in bytes. */ +#define INT_C_TYPE UDItype +#define UINT_C_TYPE UDItype + +#if HAVE_TI == 1 +#define DINT_C_TYPE UTItype +#define DUINT_C_TYPE UTItype +#else +#define HINT_C_TYPE USItype +#define HUINT_C_TYPE USItype +#endif + +#define MODE_NAME UDQ +#define MODE_NAME_S udq +#define MODE_UNSIGNED 1 +#endif + +#if defined (TQ_MODE) && (HAVE_TQ == 1) +#define FIXED_SIZE 16 /* in bytes. */ +#define INT_C_TYPE TItype +#define UINT_C_TYPE UTItype +#define HINT_C_TYPE DItype +#define HUINT_C_TYPE UDItype +#define MODE_NAME TQ +#define MODE_NAME_S tq +#define MODE_UNSIGNED 0 +#endif + +#if defined (UTQ_MODE) && (HAVE_UTQ == 1) +#define FIXED_SIZE 16 /* in bytes. */ +#define INT_C_TYPE UTItype +#define UINT_C_TYPE UTItype +#define HINT_C_TYPE UDItype +#define HUINT_C_TYPE UDItype +#define MODE_NAME UTQ +#define MODE_NAME_S utq +#define MODE_UNSIGNED 1 +#endif + +#if defined (HA_MODE) && (HAVE_HA == 1) +#define FIXED_SIZE 2 /* in bytes. */ +#define INT_C_TYPE HItype +#define UINT_C_TYPE UHItype + +#if HAVE_SI == 1 +#define DINT_C_TYPE SItype +#define DUINT_C_TYPE USItype +#else +#define HINT_C_TYPE QItype +#define HUINT_C_TYPE UQItype +#endif + +#define MODE_NAME HA +#define MODE_NAME_S ha +#define MODE_UNSIGNED 0 +#endif + +#if defined (UHA_MODE) && (HAVE_UHA == 1) +#define FIXED_SIZE 2 /* in bytes. */ +#define INT_C_TYPE UHItype +#define UINT_C_TYPE UHItype + +#if HAVE_SI == 1 +#define DINT_C_TYPE USItype +#define DUINT_C_TYPE USItype +#else +#define HINT_C_TYPE UQItype +#define HUINT_C_TYPE UQItype +#endif + +#define MODE_NAME UHA +#define MODE_NAME_S uha +#define MODE_UNSIGNED 1 +#endif + +#if defined (SA_MODE) && (HAVE_SA == 1) +#define FIXED_SIZE 4 /* in bytes. */ +#define INT_C_TYPE SItype +#define UINT_C_TYPE USItype + +#if HAVE_DI == 1 +#define DINT_C_TYPE DItype +#define DUINT_C_TYPE UDItype +#else +#define HINT_C_TYPE HItype +#define HUINT_C_TYPE UHItype +#endif + +#define MODE_NAME SA +#define MODE_NAME_S sa +#define MODE_UNSIGNED 0 +#endif + +#if defined (USA_MODE) && (HAVE_USA == 1) +#define FIXED_SIZE 4 /* in bytes. */ +#define INT_C_TYPE USItype +#define UINT_C_TYPE USItype + +#if HAVE_DI == 1 +#define DINT_C_TYPE UDItype +#define DUINT_C_TYPE UDItype +#else +#define HINT_C_TYPE UHItype +#define HUINT_C_TYPE UHItype +#endif + +#define MODE_NAME USA +#define MODE_NAME_S usa +#define MODE_UNSIGNED 1 +#endif + +#if defined (DA_MODE) && (HAVE_DA == 1) +#define FIXED_SIZE 8 /* in bytes. */ +#define INT_C_TYPE DItype +#define UINT_C_TYPE UDItype + +#if HAVE_TI == 1 +#define DINT_C_TYPE TItype +#define DUINT_C_TYPE UTItype +#else +#define HINT_C_TYPE SItype +#define HUINT_C_TYPE USItype +#endif + +#define MODE_NAME DA +#define MODE_NAME_S da +#define MODE_UNSIGNED 0 +#endif + +#if defined (UDA_MODE) && (HAVE_UDA == 1) +#define FIXED_SIZE 8 /* in bytes. */ +#define INT_C_TYPE UDItype +#define UINT_C_TYPE UDItype + +#if HAVE_TI == 1 +#define DINT_C_TYPE UTItype +#define DUINT_C_TYPE UTItype +#else +#define HINT_C_TYPE USItype +#define HUINT_C_TYPE USItype +#endif + +#define MODE_NAME UDA +#define MODE_NAME_S uda +#define MODE_UNSIGNED 1 +#endif + +#if defined (TA_MODE) && (HAVE_TA == 1) +#define FIXED_SIZE 16 /* in bytes. */ +#define INT_C_TYPE TItype +#define UINT_C_TYPE UTItype +#define HINT_C_TYPE DItype +#define HUINT_C_TYPE UDItype +#define MODE_NAME TA +#define MODE_NAME_S ta +#define MODE_UNSIGNED 0 +#endif + +#if defined (UTA_MODE) && (HAVE_UTA == 1) +#define FIXED_SIZE 16 /* in bytes. */ +#define INT_C_TYPE UTItype +#define UINT_C_TYPE UTItype +#define HINT_C_TYPE UDItype +#define HUINT_C_TYPE UDItype +#define MODE_NAME UTA +#define MODE_NAME_S uta +#define MODE_UNSIGNED 1 +#endif + +/* The following defines are based on the previous defines. */ + +#if defined (HINT_C_TYPE) +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + struct INTstruct {HINT_C_TYPE high, low;}; +#else + struct INTstruct {HINT_C_TYPE low, high;}; +#endif + +typedef union +{ + struct INTstruct s; + INT_C_TYPE ll; +} INTunion; +#endif + +#define FIXED_WIDTH (FIXED_SIZE * BITS_PER_UNIT) /* in bits. */ +#define FIXED_C_TYPE1(NAME) NAME ## type +#define FIXED_C_TYPE2(NAME) FIXED_C_TYPE1(NAME) +#define FIXED_C_TYPE FIXED_C_TYPE2(MODE_NAME) +#define FBITS1(NAME) __ ## NAME ## _FBIT__ +#define FBITS2(NAME) FBITS1(NAME) +#define FBITS FBITS2(MODE_NAME) +#define IBITS1(NAME) __ ## NAME ## _IBIT__ +#define IBITS2(NAME) IBITS1(NAME) +#define IBITS IBITS2(MODE_NAME) +#define I_F_BITS (FBITS + IBITS) + +#define FIXED_OP(OP,MODE,NUM) OP ## MODE ## NUM + +#define FIXED_SATURATE1_TEMP(NAME) FIXED_OP(__saturate1,NAME,) +#define FIXED_SATURATE2_TEMP(NAME) FIXED_OP(__saturate2,NAME,) +#define FIXED_MULHELPER_TEMP(NAME) FIXED_OP(__mulhelper,NAME,) +#define FIXED_DIVHELPER_TEMP(NAME) FIXED_OP(__divhelper,NAME,) +#define FIXED_ASHLHELPER_TEMP(NAME) FIXED_OP(__ashlhelper,NAME,) +#define FIXED_ADD_TEMP(NAME) FIXED_OP(__add,NAME,3) +#define FIXED_SSADD_TEMP(NAME) FIXED_OP(__ssadd,NAME,3) +#define FIXED_USADD_TEMP(NAME) FIXED_OP(__usadd,NAME,3) +#define FIXED_SUB_TEMP(NAME) FIXED_OP(__sub,NAME,3) +#define FIXED_SSSUB_TEMP(NAME) FIXED_OP(__sssub,NAME,3) +#define FIXED_USSUB_TEMP(NAME) FIXED_OP(__ussub,NAME,3) +#define FIXED_MUL_TEMP(NAME) FIXED_OP(__mul,NAME,3) +#define FIXED_SSMUL_TEMP(NAME) FIXED_OP(__ssmul,NAME,3) +#define FIXED_USMUL_TEMP(NAME) FIXED_OP(__usmul,NAME,3) +#define FIXED_DIV_TEMP(NAME) FIXED_OP(__div,NAME,3) +#define FIXED_UDIV_TEMP(NAME) FIXED_OP(__udiv,NAME,3) +#define FIXED_SSDIV_TEMP(NAME) FIXED_OP(__ssdiv,NAME,3) +#define FIXED_USDIV_TEMP(NAME) FIXED_OP(__usdiv,NAME,3) +#define FIXED_NEG_TEMP(NAME) FIXED_OP(__neg,NAME,2) +#define FIXED_SSNEG_TEMP(NAME) FIXED_OP(__ssneg,NAME,2) +#define FIXED_USNEG_TEMP(NAME) FIXED_OP(__usneg,NAME,2) +#define FIXED_ASHL_TEMP(NAME) FIXED_OP(__ashl,NAME,3) +#define FIXED_ASHR_TEMP(NAME) FIXED_OP(__ashr,NAME,3) +#define FIXED_LSHR_TEMP(NAME) FIXED_OP(__lshr,NAME,3) +#define FIXED_SSASHL_TEMP(NAME) FIXED_OP(__ssashl,NAME,3) +#define FIXED_USASHL_TEMP(NAME) FIXED_OP(__usashl,NAME,3) +#define FIXED_CMP_TEMP(NAME) FIXED_OP(__cmp,NAME,2) + +#if defined (MODE_NAME) +#if defined (DINT_C_TYPE) +#define FIXED_SATURATE1 FIXED_SATURATE1_TEMP(MODE_NAME_S) +#else +#define FIXED_SATURATE2 FIXED_SATURATE2_TEMP(MODE_NAME_S) +#endif +#define FIXED_MULHELPER FIXED_MULHELPER_TEMP(MODE_NAME_S) +#define FIXED_DIVHELPER FIXED_DIVHELPER_TEMP(MODE_NAME_S) +#define FIXED_ASHLHELPER FIXED_ASHLHELPER_TEMP(MODE_NAME_S) +#define FIXED_ADD FIXED_ADD_TEMP(MODE_NAME_S) +#define FIXED_SUB FIXED_SUB_TEMP(MODE_NAME_S) +#define FIXED_MUL FIXED_MUL_TEMP(MODE_NAME_S) +#define FIXED_NEG FIXED_NEG_TEMP(MODE_NAME_S) +#define FIXED_ASHL FIXED_ASHL_TEMP(MODE_NAME_S) +#define FIXED_CMP FIXED_CMP_TEMP(MODE_NAME_S) + +/* The following functions are for all fixed-point modes. */ +#if defined (DINT_C_TYPE) +extern void FIXED_SATURATE1 (DINT_C_TYPE *); +#else +extern void FIXED_SATURATE2 (INT_C_TYPE *, INT_C_TYPE *); +#endif +extern FIXED_C_TYPE FIXED_MULHELPER (FIXED_C_TYPE, FIXED_C_TYPE, word_type); +extern FIXED_C_TYPE FIXED_DIVHELPER (FIXED_C_TYPE, FIXED_C_TYPE, word_type); +extern FIXED_C_TYPE FIXED_ASHLHELPER (FIXED_C_TYPE, word_type, word_type); +extern FIXED_C_TYPE FIXED_ADD (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_SUB (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_MUL (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_NEG (FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_ASHL (FIXED_C_TYPE, word_type); +extern word_type FIXED_CMP (FIXED_C_TYPE, FIXED_C_TYPE); +#endif + +#if MODE_UNSIGNED == 0 /* Signed types. */ +#define PADDING_BITS (FIXED_WIDTH - 1 - I_F_BITS) +#define NONPADDING_BITS (1 + I_F_BITS) + +#if defined (MODE_NAME) +#define FIXED_DIV FIXED_DIV_TEMP(MODE_NAME_S) +#define FIXED_ASHR FIXED_ASHR_TEMP(MODE_NAME_S) +#define FIXED_SSADD FIXED_SSADD_TEMP(MODE_NAME_S) +#define FIXED_SSSUB FIXED_SSSUB_TEMP(MODE_NAME_S) +#define FIXED_SSMUL FIXED_SSMUL_TEMP(MODE_NAME_S) +#define FIXED_SSDIV FIXED_SSDIV_TEMP(MODE_NAME_S) +#define FIXED_SSNEG FIXED_SSNEG_TEMP(MODE_NAME_S) +#define FIXED_SSASHL FIXED_SSASHL_TEMP(MODE_NAME_S) + +/* The following functions are for signed fixed-point modes. */ +extern FIXED_C_TYPE FIXED_DIV (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_ASHR (FIXED_C_TYPE, word_type); +extern FIXED_C_TYPE FIXED_SSADD (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_SSSUB (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_SSMUL (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_SSDIV (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_SSNEG (FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_SSASHL (FIXED_C_TYPE, word_type); +#endif + +#else /* Unsigned types. */ +#define PADDING_BITS (FIXED_WIDTH - I_F_BITS) +#define NONPADDING_BITS (I_F_BITS) + +#if defined (MODE_NAME) +#define FIXED_UDIV FIXED_UDIV_TEMP(MODE_NAME_S) +#define FIXED_LSHR FIXED_LSHR_TEMP(MODE_NAME_S) +#define FIXED_USDIV FIXED_USDIV_TEMP(MODE_NAME_S) +#define FIXED_USADD FIXED_USADD_TEMP(MODE_NAME_S) +#define FIXED_USSUB FIXED_USSUB_TEMP(MODE_NAME_S) +#define FIXED_USMUL FIXED_USMUL_TEMP(MODE_NAME_S) +#define FIXED_USNEG FIXED_USNEG_TEMP(MODE_NAME_S) +#define FIXED_USASHL FIXED_USASHL_TEMP(MODE_NAME_S) + +/* The following functions are for unsigned fixed-point modes. */ +extern FIXED_C_TYPE FIXED_UDIV (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_LSHR (FIXED_C_TYPE, word_type); +extern FIXED_C_TYPE FIXED_USADD (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_USSUB (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_USMUL (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_USDIV (FIXED_C_TYPE, FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_USNEG (FIXED_C_TYPE); +extern FIXED_C_TYPE FIXED_USASHL (FIXED_C_TYPE, word_type); +#endif + +#endif /* End of testing MODE_UNSIGNED. */ + +/* This define is to check if this mode have any padding bits. */ +#define HAVE_PADDING_BITS (PADDING_BITS > 0) + +/* ------------------------------------------------------------------------ */ +/* The following defines are for conversions. */ + +#if defined (FROM_QI) && HAVE_QI == 1 +#define FROM_TYPE 1 /* Signed integer. */ +#define FROM_INT_C_TYPE QItype +#define FROM_SINT_C_TYPE QItype +#define FROM_UINT_C_TYPE UQItype +#define FROM_MODE_NAME_S qi +#define FROM_INT_SIZE 1 /* in bytes. */ + +#elif defined (FROM_HI) && HAVE_HI == 1 +#define FROM_TYPE 1 /* Signed integer. */ +#define FROM_INT_C_TYPE HItype +#define FROM_SINT_C_TYPE HItype +#define FROM_UINT_C_TYPE UHItype +#define FROM_MODE_NAME_S hi +#define FROM_INT_SIZE 2 /* in bytes. */ + +#elif defined (FROM_SI) && HAVE_SI == 1 +#define FROM_TYPE 1 /* Signed integer. */ +#define FROM_INT_C_TYPE SItype +#define FROM_SINT_C_TYPE SItype +#define FROM_UINT_C_TYPE USItype +#define FROM_MODE_NAME_S si +#define FROM_INT_SIZE 4 /* in bytes. */ + +#elif defined (FROM_DI) && HAVE_DI == 1 +#define FROM_TYPE 1 /* Signed integer. */ +#define FROM_INT_C_TYPE DItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_NAME_S di +#define FROM_INT_SIZE 8 /* in bytes. */ + +#elif defined (FROM_TI) && HAVE_TI == 1 +#define FROM_TYPE 1 /* Signed integer. */ +#define FROM_INT_C_TYPE TItype +#define FROM_SINT_C_TYPE TItype +#define FROM_UINT_C_TYPE UTItype +#define FROM_MODE_NAME_S ti +#define FROM_INT_SIZE 16 /* in bytes. */ + +#elif defined (FROM_UQI) && HAVE_UQI == 1 +#define FROM_TYPE 2 /* Unsigned integer. */ +#define FROM_INT_C_TYPE QItype +#define FROM_SINT_C_TYPE QItype +#define FROM_UINT_C_TYPE UQItype +#define FROM_MODE_NAME_S qi +#define FROM_INT_SIZE 1 /* in bytes. */ + +#elif defined (FROM_UHI) && HAVE_UHI == 1 +#define FROM_TYPE 2 /* Unsigned integer. */ +#define FROM_INT_C_TYPE UHItype +#define FROM_SINT_C_TYPE HItype +#define FROM_UINT_C_TYPE UHItype +#define FROM_MODE_NAME_S hi +#define FROM_INT_SIZE 2 /* in bytes. */ + +#elif defined (FROM_USI) && HAVE_USI == 1 +#define FROM_TYPE 2 /* Unsigned integer. */ +#define FROM_INT_C_TYPE USItype +#define FROM_SINT_C_TYPE SItype +#define FROM_UINT_C_TYPE USItype +#define FROM_MODE_NAME_S si +#define FROM_INT_SIZE 4 /* in bytes. */ + +#elif defined (FROM_UDI) && HAVE_UDI == 1 +#define FROM_TYPE 2 /* Unsigned integer. */ +#define FROM_INT_C_TYPE UDItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_NAME_S di +#define FROM_INT_SIZE 8 /* in bytes. */ + +#elif defined (FROM_UTI) && HAVE_UTI == 1 +#define FROM_TYPE 2 /* Unsigned integer. */ +#define FROM_INT_C_TYPE UTItype +#define FROM_SINT_C_TYPE TItype +#define FROM_UINT_C_TYPE UTItype +#define FROM_MODE_NAME_S ti +#define FROM_INT_SIZE 16 /* in bytes. */ + +#elif defined (FROM_SF) && HAVE_SF == 1 +#define FROM_TYPE 3 /* Floating-point. */ +#define FROM_FLOAT_C_TYPE SFtype +#define FROM_MODE_NAME_S sf + +#elif defined (FROM_DF) && HAVE_DF == 1 +#define FROM_TYPE 3 /* Floating-point. */ +#define FROM_FLOAT_C_TYPE DFtype +#define FROM_MODE_NAME_S df + +#elif defined (FROM_QQ) && HAVE_QQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME QQ +#define FROM_MODE_NAME_S qq +#define FROM_INT_C_TYPE QItype +#define FROM_SINT_C_TYPE QItype +#define FROM_UINT_C_TYPE UQItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 1 /* in bytes. */ + +#elif defined (FROM_HQ) && HAVE_HQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME HQ +#define FROM_MODE_NAME_S hq +#define FROM_INT_C_TYPE HItype +#define FROM_SINT_C_TYPE HItype +#define FROM_UINT_C_TYPE UHItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (FROM_SQ) && HAVE_SQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME SQ +#define FROM_MODE_NAME_S sq +#define FROM_INT_C_TYPE SItype +#define FROM_SINT_C_TYPE SItype +#define FROM_UINT_C_TYPE USItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (FROM_DQ) && HAVE_DQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME DQ +#define FROM_MODE_NAME_S dq +#define FROM_INT_C_TYPE DItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (FROM_TQ) && HAVE_TQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME TQ +#define FROM_MODE_NAME_S tq +#define FROM_INT_C_TYPE TItype +#define FROM_SINT_C_TYPE TItype +#define FROM_UINT_C_TYPE UTItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 16 /* in bytes. */ + +#elif defined (FROM_UQQ) && HAVE_UQQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UQQ +#define FROM_MODE_NAME_S uqq +#define FROM_INT_C_TYPE UQItype +#define FROM_SINT_C_TYPE QItype +#define FROM_UINT_C_TYPE UQItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 1 /* in bytes. */ + +#elif defined (FROM_UHQ) && HAVE_UHQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UHQ +#define FROM_MODE_NAME_S uhq +#define FROM_INT_C_TYPE UHItype +#define FROM_SINT_C_TYPE HItype +#define FROM_UINT_C_TYPE UHItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (FROM_USQ) && HAVE_USQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME USQ +#define FROM_MODE_NAME_S usq +#define FROM_INT_C_TYPE USItype +#define FROM_SINT_C_TYPE SItype +#define FROM_UINT_C_TYPE USItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (FROM_UDQ) && HAVE_UDQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UDQ +#define FROM_MODE_NAME_S udq +#define FROM_INT_C_TYPE UDItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (FROM_UTQ) && HAVE_UTQ == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UTQ +#define FROM_MODE_NAME_S utq +#define FROM_INT_C_TYPE UTItype +#define FROM_SINT_C_TYPE TItype +#define FROM_UINT_C_TYPE UTItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 16 /* in bytes. */ + +#elif defined (FROM_HA) && HAVE_HA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME HA +#define FROM_MODE_NAME_S ha +#define FROM_INT_C_TYPE HItype +#define FROM_SINT_C_TYPE HItype +#define FROM_UINT_C_TYPE UHItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (FROM_SA) && HAVE_SA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME SA +#define FROM_MODE_NAME_S sa +#define FROM_INT_C_TYPE SItype +#define FROM_SINT_C_TYPE SItype +#define FROM_UINT_C_TYPE USItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (FROM_DA) && HAVE_DA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME DA +#define FROM_MODE_NAME_S da +#define FROM_INT_C_TYPE DItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (FROM_TA) && HAVE_TA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME TA +#define FROM_MODE_NAME_S ta +#define FROM_INT_C_TYPE TItype +#define FROM_SINT_C_TYPE TItype +#define FROM_UINT_C_TYPE UTItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 16 /* in bytes. */ + +#elif defined (FROM_UHA) && HAVE_UHA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UHA +#define FROM_MODE_NAME_S uha +#define FROM_INT_C_TYPE UHItype +#define FROM_SINT_C_TYPE HItype +#define FROM_UINT_C_TYPE UHItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (FROM_USA) && HAVE_USA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME USA +#define FROM_MODE_NAME_S usa +#define FROM_INT_C_TYPE USItype +#define FROM_SINT_C_TYPE SItype +#define FROM_UINT_C_TYPE USItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (FROM_UDA) && HAVE_UDA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UDA +#define FROM_MODE_NAME_S uda +#define FROM_INT_C_TYPE UDItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (FROM_UTA) && HAVE_UTA == 1 +#define FROM_TYPE 4 /* Fixed-point. */ +#define FROM_MODE_NAME UTA +#define FROM_MODE_NAME_S uta +#define FROM_INT_C_TYPE UTItype +#define FROM_SINT_C_TYPE TItype +#define FROM_UINT_C_TYPE UTItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 16 /* in bytes. */ + +#endif + +#if defined (TO_QI) && HAVE_QI == 1 && !defined (FROM_QI) +#define TO_TYPE 1 /* Signed integer. */ +#define TO_INT_C_TYPE QItype +#define TO_SINT_C_TYPE QItype +#define TO_UINT_C_TYPE UQItype +#define TO_MODE_NAME_S qi + +#elif defined (TO_HI) && HAVE_HI == 1 && !defined (FROM_HI) +#define TO_TYPE 1 /* Signed integer. */ +#define TO_INT_C_TYPE HItype +#define TO_SINT_C_TYPE HItype +#define TO_UINT_C_TYPE UHItype +#define TO_MODE_NAME_S hi + +#elif defined (TO_SI) && HAVE_SI == 1 && !defined (FROM_SI) +#define TO_TYPE 1 /* Signed integer. */ +#define TO_INT_C_TYPE SItype +#define TO_SINT_C_TYPE SItype +#define TO_UINT_C_TYPE USItype +#define TO_MODE_NAME_S si + +#elif defined (TO_DI) && HAVE_DI == 1 && !defined (FROM_DI) +#define TO_TYPE 1 /* Signed integer. */ +#define TO_INT_C_TYPE DItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_NAME_S di + +#elif defined (TO_TI) && HAVE_TI == 1 && !defined (FROM_TI) +#define TO_TYPE 1 /* Signed integer. */ +#define TO_INT_C_TYPE TItype +#define TO_SINT_C_TYPE TItype +#define TO_UINT_C_TYPE UTItype +#define TO_MODE_NAME_S ti + +#elif defined (TO_UQI) && HAVE_UQI == 1 && !defined (FROM_UQI) +#define TO_TYPE 2 /* Unsigned integer. */ +#define TO_INT_C_TYPE UQItype +#define TO_SINT_C_TYPE QItype +#define TO_UINT_C_TYPE UQItype +#define TO_MODE_NAME_S qi + +#elif defined (TO_UHI) && HAVE_UHI == 1 && !defined (FROM_UHI) +#define TO_TYPE 2 /* Unsigned integer. */ +#define TO_INT_C_TYPE UHItype +#define TO_SINT_C_TYPE HItype +#define TO_UINT_C_TYPE UHItype +#define TO_MODE_NAME_S hi + +#elif defined (TO_USI) && HAVE_USI == 1 && !defined (FROM_USI) +#define TO_TYPE 2 /* Unsigned integer. */ +#define TO_INT_C_TYPE USItype +#define TO_SINT_C_TYPE SItype +#define TO_UINT_C_TYPE USItype +#define TO_MODE_NAME_S si + +#elif defined (TO_UDI) && HAVE_UDI == 1 && !defined (FROM_UDI) +#define TO_TYPE 2 /* Unsigned integer. */ +#define TO_INT_C_TYPE UDItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_NAME_S di + +#elif defined (TO_UTI) && HAVE_UTI == 1 && !defined (FROM_UTI) +#define TO_TYPE 2 /* Unsigned integer. */ +#define TO_INT_C_TYPE UTItype +#define TO_SINT_C_TYPE TItype +#define TO_UINT_C_TYPE UTItype +#define TO_MODE_NAME_S ti + +#elif defined (TO_SF) && HAVE_SF == 1 && !defined (FROM_SF) +#define TO_TYPE 3 /* Floating-point. */ +#define TO_FLOAT_C_TYPE SFtype +#define TO_MODE_NAME_S sf + +#elif defined (TO_DF) && HAVE_DF == 1 && !defined (FROM_DF) +#define TO_TYPE 3 /* Floating-point. */ +#define TO_FLOAT_C_TYPE DFtype +#define TO_MODE_NAME_S df + +#elif defined (TO_QQ) && HAVE_QQ == 1 && !defined (FROM_QQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME QQ +#define TO_MODE_NAME_S qq +#define TO_INT_C_TYPE QItype +#define TO_SINT_C_TYPE QItype +#define TO_UINT_C_TYPE UQItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 1 /* in bytes. */ + +#elif defined (TO_HQ) && HAVE_HQ == 1 && !defined (FROM_HQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME HQ +#define TO_MODE_NAME_S hq +#define TO_INT_C_TYPE HItype +#define TO_SINT_C_TYPE HItype +#define TO_UINT_C_TYPE UHItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (TO_SQ) && HAVE_SQ == 1 && !defined (FROM_SQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME SQ +#define TO_MODE_NAME_S sq +#define TO_INT_C_TYPE SItype +#define TO_SINT_C_TYPE SItype +#define TO_UINT_C_TYPE USItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (TO_DQ) && HAVE_DQ == 1 && !defined (FROM_DQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME DQ +#define TO_MODE_NAME_S dq +#define TO_INT_C_TYPE DItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (TO_TQ) && HAVE_TQ == 1 && !defined (FROM_TQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME TQ +#define TO_MODE_NAME_S tq +#define TO_INT_C_TYPE TItype +#define TO_SINT_C_TYPE TItype +#define TO_UINT_C_TYPE UTItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 16 /* in bytes. */ + +#elif defined (TO_UQQ) && HAVE_UQQ == 1 && !defined (FROM_UQQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UQQ +#define TO_MODE_NAME_S uqq +#define TO_INT_C_TYPE UQItype +#define TO_SINT_C_TYPE QItype +#define TO_UINT_C_TYPE UQItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 1 /* in bytes. */ + +#elif defined (TO_UHQ) && HAVE_UHQ == 1 && !defined (FROM_UHQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UHQ +#define TO_MODE_NAME_S uhq +#define TO_INT_C_TYPE UHItype +#define TO_SINT_C_TYPE HItype +#define TO_UINT_C_TYPE UHItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (TO_USQ) && HAVE_USQ == 1 && !defined (FROM_USQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME USQ +#define TO_MODE_NAME_S usq +#define TO_INT_C_TYPE USItype +#define TO_SINT_C_TYPE SItype +#define TO_UINT_C_TYPE USItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (TO_UDQ) && HAVE_UDQ == 1 && !defined (FROM_UDQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UDQ +#define TO_MODE_NAME_S udq +#define TO_INT_C_TYPE UDItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (TO_UTQ) && HAVE_UTQ == 1 && !defined (FROM_UTQ) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UTQ +#define TO_MODE_NAME_S utq +#define TO_INT_C_TYPE UTItype +#define TO_SINT_C_TYPE TItype +#define TO_UINT_C_TYPE UTItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 16 /* in bytes. */ + +#elif defined (TO_HA) && HAVE_HA == 1 && !defined (FROM_HA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME HA +#define TO_MODE_NAME_S ha +#define TO_INT_C_TYPE HItype +#define TO_SINT_C_TYPE HItype +#define TO_UINT_C_TYPE UHItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (TO_SA) && HAVE_SA == 1 && !defined (FROM_SA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME SA +#define TO_MODE_NAME_S sa +#define TO_INT_C_TYPE SItype +#define TO_SINT_C_TYPE SItype +#define TO_UINT_C_TYPE USItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (TO_DA) && HAVE_DA == 1 && !defined (FROM_DA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME DA +#define TO_MODE_NAME_S da +#define TO_INT_C_TYPE DItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (TO_TA) && HAVE_TA == 1 && !defined (FROM_TA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME TA +#define TO_MODE_NAME_S ta +#define TO_INT_C_TYPE TItype +#define TO_SINT_C_TYPE TItype +#define TO_UINT_C_TYPE UTItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 16 /* in bytes. */ + +#elif defined (TO_UHA) && HAVE_UHA == 1 && !defined (FROM_UHA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UHA +#define TO_MODE_NAME_S uha +#define TO_INT_C_TYPE UHItype +#define TO_SINT_C_TYPE HItype +#define TO_UINT_C_TYPE UHItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 2 /* in bytes. */ + +#elif defined (TO_USA) && HAVE_USA == 1 && !defined (FROM_USA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME USA +#define TO_MODE_NAME_S usa +#define TO_INT_C_TYPE USItype +#define TO_SINT_C_TYPE SItype +#define TO_UINT_C_TYPE USItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 4 /* in bytes. */ + +#elif defined (TO_UDA) && HAVE_UDA == 1 && !defined (FROM_UDA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UDA +#define TO_MODE_NAME_S uda +#define TO_INT_C_TYPE UDItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 8 /* in bytes. */ + +#elif defined (TO_UTA) && HAVE_UTA == 1 && !defined (FROM_UTA) +#define TO_TYPE 4 /* Fixed-point. */ +#define TO_MODE_NAME UTA +#define TO_MODE_NAME_S uta +#define TO_INT_C_TYPE UTItype +#define TO_SINT_C_TYPE TItype +#define TO_UINT_C_TYPE UTItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 16 /* in bytes. */ + +#endif + +#if defined (FROM_MODE_NAME_S) && defined (TO_MODE_NAME_S) + +#if FROM_TYPE == 1 /* Signed integer. */ +#define FROM_INT_WIDTH (FROM_INT_SIZE * BITS_PER_UNIT) +#endif + +#if FROM_TYPE == 2 /* Unsigned integer. */ +#define FROM_INT_WIDTH (FROM_INT_SIZE * BITS_PER_UNIT) +#endif + +#if FROM_TYPE == 4 /* Fixed-point. */ +#define FROM_FIXED_C_TYPE FIXED_C_TYPE2(FROM_MODE_NAME) +#define FROM_FBITS FBITS2(FROM_MODE_NAME) +#define FROM_FIXED_WIDTH (FROM_FIXED_SIZE * BITS_PER_UNIT) +#define FROM_FBITS FBITS2(FROM_MODE_NAME) +#define FROM_IBITS IBITS2(FROM_MODE_NAME) +#define FROM_I_F_BITS (FROM_FBITS + FROM_IBITS) + +#if FROM_MODE_UNSIGNED == 0 /* Signed types. */ +#define FROM_PADDING_BITS (FROM_FIXED_WIDTH - 1 - FROM_I_F_BITS) +#define FROM_NONPADDING_BITS (1 + FROM_I_F_BITS) +#else /* Unsigned types. */ +#define FROM_PADDING_BITS (FROM_FIXED_WIDTH - FROM_I_F_BITS) +#define FROM_NONPADDING_BITS (FROM_I_F_BITS) +#endif +#define FROM_HAVE_PADDING_BITS (FROM_PADDING_BITS > 0) +#endif /* FROM_TYPE == 4 */ + +#if TO_TYPE == 4 /* Fixed-point. */ +#define TO_FIXED_C_TYPE FIXED_C_TYPE2(TO_MODE_NAME) +#define TO_FBITS FBITS2(TO_MODE_NAME) +#define TO_FIXED_WIDTH (TO_FIXED_SIZE * BITS_PER_UNIT) +#define TO_FBITS FBITS2(TO_MODE_NAME) +#define TO_IBITS IBITS2(TO_MODE_NAME) +#define TO_I_F_BITS (TO_FBITS + TO_IBITS) + +#if TO_MODE_UNSIGNED == 0 /* Signed types. */ +#define TO_PADDING_BITS (TO_FIXED_WIDTH - 1 - TO_I_F_BITS) +#define TO_NONPADDING_BITS (1 + TO_I_F_BITS) +#else /* Unsigned types. */ +#define TO_PADDING_BITS (TO_FIXED_WIDTH - TO_I_F_BITS) +#define TO_NONPADDING_BITS (TO_I_F_BITS) +#endif +#define TO_HAVE_PADDING_BITS (TO_PADDING_BITS > 0) +#endif /* TO_TYPE == 4 */ + +#define FIXED_CONVERT_OP(OP,FROM,TO) OP ## FROM ## TO +#define FIXED_CONVERT_OP2(OP,FROM,TO) OP ## FROM ## TO ## 2 +#define FRACT_TEMP(N1,N2) FIXED_CONVERT_OP(__fract,N1,N2) +#define FRACT2_TEMP(N1,N2) FIXED_CONVERT_OP2(__fract,N1,N2) +#define SATFRACT_TEMP(N1,N2) FIXED_CONVERT_OP(__satfract,N1,N2) +#define SATFRACT2_TEMP(N1,N2) FIXED_CONVERT_OP2(__satfract,N1,N2) +#define FRACTUNS_TEMP(N1,N2) FIXED_CONVERT_OP(__fractuns,N1,N2) +#define SATFRACTUNS_TEMP(N1,N2) FIXED_CONVERT_OP(__satfractuns,N1,N2) + +/* Define conversions from fixed-point to fixed-point. */ +#if FROM_TYPE == 4 && TO_TYPE == 4 + +#if FROM_FIXED_SIZE > TO_FIXED_SIZE +#define BIG_SINT_C_TYPE FROM_SINT_C_TYPE +#define BIG_UINT_C_TYPE FROM_UINT_C_TYPE +#define BIG_WIDTH FROM_FIXED_WIDTH +#else +#define BIG_SINT_C_TYPE TO_SINT_C_TYPE +#define BIG_UINT_C_TYPE TO_UINT_C_TYPE +#define BIG_WIDTH TO_FIXED_WIDTH +#endif + +/* Check if FROM* and TO* are in the same machine class. */ +#if ((FROM_MODE_UNSIGNED == TO_MODE_UNSIGNED) \ + && ((FROM_IBITS == 0) == (TO_IBITS == 0))) +/* Same modes: append '2' to conversion function names */ +#define FRACT FRACT2_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#define SATFRACT SATFRACT2_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#else +/* Different modes: don't append '2' to conversion function names */ +#define FRACT FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#define SATFRACT SATFRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#endif + +extern TO_FIXED_C_TYPE FRACT (FROM_FIXED_C_TYPE); +extern TO_FIXED_C_TYPE SATFRACT (FROM_FIXED_C_TYPE); +#endif /* FROM_TYPE == 4 && TO_TYPE == 4 */ + +/* Define conversions from fixed-point to signed integer. */ +#if FROM_TYPE == 4 && TO_TYPE == 1 +#define FRACT FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +extern TO_INT_C_TYPE FRACT (FROM_FIXED_C_TYPE); +#endif /* FROM_TYPE == 4 && TO_TYPE == 1 */ + +/* Define conversions from fixed-point to unsigned integer. */ +#if FROM_TYPE == 4 && TO_TYPE == 2 +#define FRACTUNS FRACTUNS_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +extern TO_INT_C_TYPE FRACTUNS (FROM_FIXED_C_TYPE); +#endif /* FROM_TYPE == 4 && TO_TYPE == 2 */ + +/* Define conversions from fixed-point to floating-point. */ +#if FROM_TYPE == 4 && TO_TYPE == 3 +#define BASE1(NUM) 0x1.0p ## NUM +#define BASE2(NUM) BASE1(NUM) +#define BASE BASE2(FROM_FBITS) +#define FRACT FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +extern TO_FLOAT_C_TYPE FRACT (FROM_FIXED_C_TYPE); +#endif /* FROM_TYPE == 4 && TO_TYPE == 3 */ + +/* Define conversions from signed integer to fixed-point. */ +#if FROM_TYPE == 1 && TO_TYPE == 4 + +#if FROM_INT_SIZE > TO_FIXED_SIZE +#define BIG_SINT_C_TYPE FROM_SINT_C_TYPE +#define BIG_UINT_C_TYPE FROM_UINT_C_TYPE +#define BIG_WIDTH FROM_INT_WIDTH +#else +#define BIG_SINT_C_TYPE TO_SINT_C_TYPE +#define BIG_UINT_C_TYPE TO_UINT_C_TYPE +#define BIG_WIDTH TO_FIXED_WIDTH +#endif + +#define FRACT FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#define SATFRACT SATFRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +extern TO_FIXED_C_TYPE FRACT (FROM_INT_C_TYPE); +extern TO_FIXED_C_TYPE SATFRACT (FROM_INT_C_TYPE); +#endif /* FROM_TYPE == 1 && TO_TYPE == 4 */ + +/* Define conversions from unsigned integer to fixed-point. */ +#if FROM_TYPE == 2 && TO_TYPE == 4 + +#if FROM_INT_SIZE > TO_FIXED_SIZE +#define BIG_SINT_C_TYPE FROM_SINT_C_TYPE +#define BIG_UINT_C_TYPE FROM_UINT_C_TYPE +#define BIG_WIDTH FROM_INT_WIDTH +#else +#define BIG_SINT_C_TYPE TO_SINT_C_TYPE +#define BIG_UINT_C_TYPE TO_UINT_C_TYPE +#define BIG_WIDTH TO_FIXED_WIDTH +#endif + +#define FRACTUNS FRACTUNS_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#define SATFRACTUNS SATFRACTUNS_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +extern TO_FIXED_C_TYPE FRACTUNS (FROM_INT_C_TYPE); +extern TO_FIXED_C_TYPE SATFRACTUNS (FROM_INT_C_TYPE); +#endif /* FROM_TYPE == 2 && TO_TYPE == 4 */ + +/* Define conversions from floating-point to fixed-point. */ +#if FROM_TYPE == 3 && TO_TYPE == 4 + +#define BASE1(NUM) (0x1.0p ## NUM) +#define BASE2(NUM) BASE1(NUM) +#define BASE BASE2(TO_FBITS) + +#define FIXED_MAX1(NUM1,NUM2) (0x1.0p ## NUM1 - 0x1.0p- ## NUM2) +#define FIXED_MAX2(NUM1,NUM2) FIXED_MAX1(NUM1,NUM2) +#define FIXED_MAX FIXED_MAX2(TO_IBITS,TO_FBITS) + +#define FIXED_MIN1(NUM) (-0x1.0p ## NUM) +#define FIXED_MIN2(NUM) FIXED_MIN1(NUM) +#if TO_MODE_UNSIGNED == 0 +#define FIXED_MIN FIXED_MIN2(TO_IBITS) +#else +#define FIXED_MIN 0.0 +#endif + +#define FRACT FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +#define SATFRACT SATFRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S) +extern TO_FIXED_C_TYPE FRACT (FROM_FLOAT_C_TYPE); +extern TO_FIXED_C_TYPE SATFRACT (FROM_FLOAT_C_TYPE); +#endif /* FROM_TYPE == 3 && TO_TYPE == 4 */ + +#endif /* defined (FROM_MODE_NAME_S) && defined (TO_MODE_NAME_S) */ + +#endif /* _FIXED_BIT_H */ diff --git a/gcc/config/flat.h b/gcc/config/flat.h new file mode 100644 index 000000000..9c9ae751d --- /dev/null +++ b/gcc/config/flat.h @@ -0,0 +1,22 @@ +/* Defines to be used for targets that support flat executables. + Copyright (C) 2006, 2007 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This macro applies on top of OBJECT_FORMAT_ELF and indicates that + we want to support both flat and ELF output. */ +#define OBJECT_FORMAT_FLAT diff --git a/gcc/config/floatunsidf.c b/gcc/config/floatunsidf.c new file mode 100644 index 000000000..ff2811250 --- /dev/null +++ b/gcc/config/floatunsidf.c @@ -0,0 +1,15 @@ +/* Public domain. */ +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef float DFtype __attribute__ ((mode (DF))); + +DFtype +__floatunsidf (USItype u) +{ + SItype s = (SItype) u; + DFtype r = (DFtype) s; + if (s < 0) + r += (DFtype)2.0 * (DFtype) ((USItype) 1 + << (sizeof (USItype) * __CHAR_BIT__ - 1)); + return r; +} diff --git a/gcc/config/floatunsisf.c b/gcc/config/floatunsisf.c new file mode 100644 index 000000000..11d4aa78c --- /dev/null +++ b/gcc/config/floatunsisf.c @@ -0,0 +1,18 @@ +/* Public domain. */ +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef float SFtype __attribute__ ((mode (SF))); + +SFtype +__floatunsisf (USItype u) +{ + SItype s = (SItype) u; + if (s < 0) + { + /* As in expand_float, compute (u & 1) | (u >> 1) to ensure + correct rounding if a nonzero bit is shifted out. */ + return (SFtype) 2.0 * (SFtype) (SItype) ((u & 1) | (u >> 1)); + } + else + return (SFtype) s; +} diff --git a/gcc/config/floatunsitf.c b/gcc/config/floatunsitf.c new file mode 100644 index 000000000..955d67666 --- /dev/null +++ b/gcc/config/floatunsitf.c @@ -0,0 +1,15 @@ +/* Public domain. */ +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef float TFtype __attribute__ ((mode (TF))); + +TFtype +__floatunsitf (USItype u) +{ + SItype s = (SItype) u; + TFtype r = (TFtype) s; + if (s < 0) + r += (TFtype)2.0 * (TFtype) ((USItype) 1 + << (sizeof (USItype) * __CHAR_BIT__ - 1)); + return r; +} diff --git a/gcc/config/floatunsixf.c b/gcc/config/floatunsixf.c new file mode 100644 index 000000000..52511688d --- /dev/null +++ b/gcc/config/floatunsixf.c @@ -0,0 +1,15 @@ +/* Public domain. */ +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef float XFtype __attribute__ ((mode (XF))); + +XFtype +__floatunsixf (USItype u) +{ + SItype s = (SItype) u; + XFtype r = (XFtype) s; + if (s < 0) + r += (XFtype)2.0 * (XFtype) ((USItype) 1 + << (sizeof (USItype) * __CHAR_BIT__ - 1)); + return r; +} diff --git a/gcc/config/fp-bit.c b/gcc/config/fp-bit.c new file mode 100644 index 000000000..82d924ec2 --- /dev/null +++ b/gcc/config/fp-bit.c @@ -0,0 +1,1657 @@ +/* This is a software floating point library which can be used + for targets without hardware floating point. + Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, + 2004, 2005, 2008, 2009, 2010 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This implements IEEE 754 format arithmetic, but does not provide a + mechanism for setting the rounding mode, or for generating or handling + exceptions. + + The original code by Steve Chamberlain, hacked by Mark Eichin and Jim + Wilson, all of Cygnus Support. */ + +/* The intended way to use this file is to make two copies, add `#define FLOAT' + to one copy, then compile both copies and add them to libgcc.a. */ + +#include "tconfig.h" +#include "coretypes.h" +#include "tm.h" +#include "config/fp-bit.h" + +/* The following macros can be defined to change the behavior of this file: + FLOAT: Implement a `float', aka SFmode, fp library. If this is not + defined, then this file implements a `double', aka DFmode, fp library. + FLOAT_ONLY: Used with FLOAT, to implement a `float' only library, i.e. + don't include float->double conversion which requires the double library. + This is useful only for machines which can't support doubles, e.g. some + 8-bit processors. + CMPtype: Specify the type that floating point compares should return. + This defaults to SItype, aka int. + _DEBUG_BITFLOAT: This makes debugging the code a little easier, by adding + two integers to the FLO_union_type. + NO_DENORMALS: Disable handling of denormals. + NO_NANS: Disable nan and infinity handling + SMALL_MACHINE: Useful when operations on QIs and HIs are faster + than on an SI */ + +/* We don't currently support extended floats (long doubles) on machines + without hardware to deal with them. + + These stubs are just to keep the linker from complaining about unresolved + references which can be pulled in from libio & libstdc++, even if the + user isn't using long doubles. However, they may generate an unresolved + external to abort if abort is not used by the function, and the stubs + are referenced from within libc, since libgcc goes before and after the + system library. */ + +#ifdef DECLARE_LIBRARY_RENAMES + DECLARE_LIBRARY_RENAMES +#endif + +#ifdef EXTENDED_FLOAT_STUBS +extern void abort (void); +void __extendsfxf2 (void) { abort(); } +void __extenddfxf2 (void) { abort(); } +void __truncxfdf2 (void) { abort(); } +void __truncxfsf2 (void) { abort(); } +void __fixxfsi (void) { abort(); } +void __floatsixf (void) { abort(); } +void __addxf3 (void) { abort(); } +void __subxf3 (void) { abort(); } +void __mulxf3 (void) { abort(); } +void __divxf3 (void) { abort(); } +void __negxf2 (void) { abort(); } +void __eqxf2 (void) { abort(); } +void __nexf2 (void) { abort(); } +void __gtxf2 (void) { abort(); } +void __gexf2 (void) { abort(); } +void __lexf2 (void) { abort(); } +void __ltxf2 (void) { abort(); } + +void __extendsftf2 (void) { abort(); } +void __extenddftf2 (void) { abort(); } +void __trunctfdf2 (void) { abort(); } +void __trunctfsf2 (void) { abort(); } +void __fixtfsi (void) { abort(); } +void __floatsitf (void) { abort(); } +void __addtf3 (void) { abort(); } +void __subtf3 (void) { abort(); } +void __multf3 (void) { abort(); } +void __divtf3 (void) { abort(); } +void __negtf2 (void) { abort(); } +void __eqtf2 (void) { abort(); } +void __netf2 (void) { abort(); } +void __gttf2 (void) { abort(); } +void __getf2 (void) { abort(); } +void __letf2 (void) { abort(); } +void __lttf2 (void) { abort(); } +#else /* !EXTENDED_FLOAT_STUBS, rest of file */ + +/* IEEE "special" number predicates */ + +#ifdef NO_NANS + +#define nan() 0 +#define isnan(x) 0 +#define isinf(x) 0 +#else + +#if defined L_thenan_sf +const fp_number_type __thenan_sf = { CLASS_SNAN, 0, 0, {(fractype) 0} }; +#elif defined L_thenan_df +const fp_number_type __thenan_df = { CLASS_SNAN, 0, 0, {(fractype) 0} }; +#elif defined L_thenan_tf +const fp_number_type __thenan_tf = { CLASS_SNAN, 0, 0, {(fractype) 0} }; +#elif defined TFLOAT +extern const fp_number_type __thenan_tf; +#elif defined FLOAT +extern const fp_number_type __thenan_sf; +#else +extern const fp_number_type __thenan_df; +#endif + +INLINE +static const fp_number_type * +makenan (void) +{ +#ifdef TFLOAT + return & __thenan_tf; +#elif defined FLOAT + return & __thenan_sf; +#else + return & __thenan_df; +#endif +} + +INLINE +static int +isnan (const fp_number_type *x) +{ + return __builtin_expect (x->class == CLASS_SNAN || x->class == CLASS_QNAN, + 0); +} + +INLINE +static int +isinf (const fp_number_type * x) +{ + return __builtin_expect (x->class == CLASS_INFINITY, 0); +} + +#endif /* NO_NANS */ + +INLINE +static int +iszero (const fp_number_type * x) +{ + return x->class == CLASS_ZERO; +} + +INLINE +static void +flip_sign ( fp_number_type * x) +{ + x->sign = !x->sign; +} + +/* Count leading zeroes in N. */ +INLINE +static int +clzusi (USItype n) +{ + extern int __clzsi2 (USItype); + if (sizeof (USItype) == sizeof (unsigned int)) + return __builtin_clz (n); + else if (sizeof (USItype) == sizeof (unsigned long)) + return __builtin_clzl (n); + else if (sizeof (USItype) == sizeof (unsigned long long)) + return __builtin_clzll (n); + else + return __clzsi2 (n); +} + +extern FLO_type pack_d (const fp_number_type * ); + +#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf) +FLO_type +pack_d (const fp_number_type *src) +{ + FLO_union_type dst; + fractype fraction = src->fraction.ll; /* wasn't unsigned before? */ + int sign = src->sign; + int exp = 0; + + if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src))) + { + /* We can't represent these values accurately. By using the + largest possible magnitude, we guarantee that the conversion + of infinity is at least as big as any finite number. */ + exp = EXPMAX; + fraction = ((fractype) 1 << FRACBITS) - 1; + } + else if (isnan (src)) + { + exp = EXPMAX; + if (src->class == CLASS_QNAN || 1) + { +#ifdef QUIET_NAN_NEGATED + fraction |= QUIET_NAN - 1; +#else + fraction |= QUIET_NAN; +#endif + } + } + else if (isinf (src)) + { + exp = EXPMAX; + fraction = 0; + } + else if (iszero (src)) + { + exp = 0; + fraction = 0; + } + else if (fraction == 0) + { + exp = 0; + } + else + { + if (__builtin_expect (src->normal_exp < NORMAL_EXPMIN, 0)) + { +#ifdef NO_DENORMALS + /* Go straight to a zero representation if denormals are not + supported. The denormal handling would be harmless but + isn't unnecessary. */ + exp = 0; + fraction = 0; +#else /* NO_DENORMALS */ + /* This number's exponent is too low to fit into the bits + available in the number, so we'll store 0 in the exponent and + shift the fraction to the right to make up for it. */ + + int shift = NORMAL_EXPMIN - src->normal_exp; + + exp = 0; + + if (shift > FRAC_NBITS - NGARDS) + { + /* No point shifting, since it's more that 64 out. */ + fraction = 0; + } + else + { + int lowbit = (fraction & (((fractype)1 << shift) - 1)) ? 1 : 0; + fraction = (fraction >> shift) | lowbit; + } + if ((fraction & GARDMASK) == GARDMSB) + { + if ((fraction & (1 << NGARDS))) + fraction += GARDROUND + 1; + } + else + { + /* Add to the guards to round up. */ + fraction += GARDROUND; + } + /* Perhaps the rounding means we now need to change the + exponent, because the fraction is no longer denormal. */ + if (fraction >= IMPLICIT_1) + { + exp += 1; + } + fraction >>= NGARDS; +#endif /* NO_DENORMALS */ + } + else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) + && __builtin_expect (src->normal_exp > EXPBIAS, 0)) + { + exp = EXPMAX; + fraction = 0; + } + else + { + exp = src->normal_exp + EXPBIAS; + if (!ROUND_TOWARDS_ZERO) + { + /* IF the gard bits are the all zero, but the first, then we're + half way between two numbers, choose the one which makes the + lsb of the answer 0. */ + if ((fraction & GARDMASK) == GARDMSB) + { + if (fraction & (1 << NGARDS)) + fraction += GARDROUND + 1; + } + else + { + /* Add a one to the guards to round up */ + fraction += GARDROUND; + } + if (fraction >= IMPLICIT_2) + { + fraction >>= 1; + exp += 1; + } + } + fraction >>= NGARDS; + + if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX) + { + /* Saturate on overflow. */ + exp = EXPMAX; + fraction = ((fractype) 1 << FRACBITS) - 1; + } + } + } + + /* We previously used bitfields to store the number, but this doesn't + handle little/big endian systems conveniently, so use shifts and + masks */ +#ifdef FLOAT_BIT_ORDER_MISMATCH + dst.bits.fraction = fraction; + dst.bits.exp = exp; + dst.bits.sign = sign; +#else +# if defined TFLOAT && defined HALFFRACBITS + { + halffractype high, low, unity; + int lowsign, lowexp; + + unity = (halffractype) 1 << HALFFRACBITS; + + /* Set HIGH to the high double's significand, masking out the implicit 1. + Set LOW to the low double's full significand. */ + high = (fraction >> (FRACBITS - HALFFRACBITS)) & (unity - 1); + low = fraction & (unity * 2 - 1); + + /* Get the initial sign and exponent of the low double. */ + lowexp = exp - HALFFRACBITS - 1; + lowsign = sign; + + /* HIGH should be rounded like a normal double, making |LOW| <= + 0.5 ULP of HIGH. Assume round-to-nearest. */ + if (exp < EXPMAX) + if (low > unity || (low == unity && (high & 1) == 1)) + { + /* Round HIGH up and adjust LOW to match. */ + high++; + if (high == unity) + { + /* May make it infinite, but that's OK. */ + high = 0; + exp++; + } + low = unity * 2 - low; + lowsign ^= 1; + } + + high |= (halffractype) exp << HALFFRACBITS; + high |= (halffractype) sign << (HALFFRACBITS + EXPBITS); + + if (exp == EXPMAX || exp == 0 || low == 0) + low = 0; + else + { + while (lowexp > 0 && low < unity) + { + low <<= 1; + lowexp--; + } + + if (lowexp <= 0) + { + halffractype roundmsb, round; + int shift; + + shift = 1 - lowexp; + roundmsb = (1 << (shift - 1)); + round = low & ((roundmsb << 1) - 1); + + low >>= shift; + lowexp = 0; + + if (round > roundmsb || (round == roundmsb && (low & 1) == 1)) + { + low++; + if (low == unity) + /* LOW rounds up to the smallest normal number. */ + lowexp++; + } + } + + low &= unity - 1; + low |= (halffractype) lowexp << HALFFRACBITS; + low |= (halffractype) lowsign << (HALFFRACBITS + EXPBITS); + } + dst.value_raw = ((fractype) high << HALFSHIFT) | low; + } +# else + dst.value_raw = fraction & ((((fractype)1) << FRACBITS) - (fractype)1); + dst.value_raw |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << FRACBITS; + dst.value_raw |= ((fractype) (sign & 1)) << (FRACBITS | EXPBITS); +# endif +#endif + +#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT) +#ifdef TFLOAT + { + qrtrfractype tmp1 = dst.words[0]; + qrtrfractype tmp2 = dst.words[1]; + dst.words[0] = dst.words[3]; + dst.words[1] = dst.words[2]; + dst.words[2] = tmp2; + dst.words[3] = tmp1; + } +#else + { + halffractype tmp = dst.words[0]; + dst.words[0] = dst.words[1]; + dst.words[1] = tmp; + } +#endif +#endif + + return dst.value; +} +#endif + +#if defined(L_unpack_df) || defined(L_unpack_sf) || defined(L_unpack_tf) +void +unpack_d (FLO_union_type * src, fp_number_type * dst) +{ + /* We previously used bitfields to store the number, but this doesn't + handle little/big endian systems conveniently, so use shifts and + masks */ + fractype fraction; + int exp; + int sign; + +#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT) + FLO_union_type swapped; + +#ifdef TFLOAT + swapped.words[0] = src->words[3]; + swapped.words[1] = src->words[2]; + swapped.words[2] = src->words[1]; + swapped.words[3] = src->words[0]; +#else + swapped.words[0] = src->words[1]; + swapped.words[1] = src->words[0]; +#endif + src = &swapped; +#endif + +#ifdef FLOAT_BIT_ORDER_MISMATCH + fraction = src->bits.fraction; + exp = src->bits.exp; + sign = src->bits.sign; +#else +# if defined TFLOAT && defined HALFFRACBITS + { + halffractype high, low; + + high = src->value_raw >> HALFSHIFT; + low = src->value_raw & (((fractype)1 << HALFSHIFT) - 1); + + fraction = high & ((((fractype)1) << HALFFRACBITS) - 1); + fraction <<= FRACBITS - HALFFRACBITS; + exp = ((int)(high >> HALFFRACBITS)) & ((1 << EXPBITS) - 1); + sign = ((int)(high >> (((HALFFRACBITS + EXPBITS))))) & 1; + + if (exp != EXPMAX && exp != 0 && low != 0) + { + int lowexp = ((int)(low >> HALFFRACBITS)) & ((1 << EXPBITS) - 1); + int lowsign = ((int)(low >> (((HALFFRACBITS + EXPBITS))))) & 1; + int shift; + fractype xlow; + + xlow = low & ((((fractype)1) << HALFFRACBITS) - 1); + if (lowexp) + xlow |= (((halffractype)1) << HALFFRACBITS); + else + lowexp = 1; + shift = (FRACBITS - HALFFRACBITS) - (exp - lowexp); + if (shift > 0) + xlow <<= shift; + else if (shift < 0) + xlow >>= -shift; + if (sign == lowsign) + fraction += xlow; + else if (fraction >= xlow) + fraction -= xlow; + else + { + /* The high part is a power of two but the full number is lower. + This code will leave the implicit 1 in FRACTION, but we'd + have added that below anyway. */ + fraction = (((fractype) 1 << FRACBITS) - xlow) << 1; + exp--; + } + } + } +# else + fraction = src->value_raw & ((((fractype)1) << FRACBITS) - 1); + exp = ((int)(src->value_raw >> FRACBITS)) & ((1 << EXPBITS) - 1); + sign = ((int)(src->value_raw >> (FRACBITS + EXPBITS))) & 1; +# endif +#endif + + dst->sign = sign; + if (exp == 0) + { + /* Hmm. Looks like 0 */ + if (fraction == 0 +#ifdef NO_DENORMALS + || 1 +#endif + ) + { + /* tastes like zero */ + dst->class = CLASS_ZERO; + } + else + { + /* Zero exponent with nonzero fraction - it's denormalized, + so there isn't a leading implicit one - we'll shift it so + it gets one. */ + dst->normal_exp = exp - EXPBIAS + 1; + fraction <<= NGARDS; + + dst->class = CLASS_NUMBER; +#if 1 + while (fraction < IMPLICIT_1) + { + fraction <<= 1; + dst->normal_exp--; + } +#endif + dst->fraction.ll = fraction; + } + } + else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) + && __builtin_expect (exp == EXPMAX, 0)) + { + /* Huge exponent*/ + if (fraction == 0) + { + /* Attached to a zero fraction - means infinity */ + dst->class = CLASS_INFINITY; + } + else + { + /* Nonzero fraction, means nan */ +#ifdef QUIET_NAN_NEGATED + if ((fraction & QUIET_NAN) == 0) +#else + if (fraction & QUIET_NAN) +#endif + { + dst->class = CLASS_QNAN; + } + else + { + dst->class = CLASS_SNAN; + } + /* Keep the fraction part as the nan number */ + dst->fraction.ll = fraction; + } + } + else + { + /* Nothing strange about this number */ + dst->normal_exp = exp - EXPBIAS; + dst->class = CLASS_NUMBER; + dst->fraction.ll = (fraction << NGARDS) | IMPLICIT_1; + } +} +#endif /* L_unpack_df || L_unpack_sf */ + +#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf) +static const fp_number_type * +_fpadd_parts (fp_number_type * a, + fp_number_type * b, + fp_number_type * tmp) +{ + intfrac tfraction; + + /* Put commonly used fields in local variables. */ + int a_normal_exp; + int b_normal_exp; + fractype a_fraction; + fractype b_fraction; + + if (isnan (a)) + { + return a; + } + if (isnan (b)) + { + return b; + } + if (isinf (a)) + { + /* Adding infinities with opposite signs yields a NaN. */ + if (isinf (b) && a->sign != b->sign) + return makenan (); + return a; + } + if (isinf (b)) + { + return b; + } + if (iszero (b)) + { + if (iszero (a)) + { + *tmp = *a; + tmp->sign = a->sign & b->sign; + return tmp; + } + return a; + } + if (iszero (a)) + { + return b; + } + + /* Got two numbers. shift the smaller and increment the exponent till + they're the same */ + { + int diff; + int sdiff; + + a_normal_exp = a->normal_exp; + b_normal_exp = b->normal_exp; + a_fraction = a->fraction.ll; + b_fraction = b->fraction.ll; + + diff = a_normal_exp - b_normal_exp; + sdiff = diff; + + if (diff < 0) + diff = -diff; + if (diff < FRAC_NBITS) + { + if (sdiff > 0) + { + b_normal_exp += diff; + LSHIFT (b_fraction, diff); + } + else if (sdiff < 0) + { + a_normal_exp += diff; + LSHIFT (a_fraction, diff); + } + } + else + { + /* Somethings's up.. choose the biggest */ + if (a_normal_exp > b_normal_exp) + { + b_normal_exp = a_normal_exp; + b_fraction = 0; + } + else + { + a_normal_exp = b_normal_exp; + a_fraction = 0; + } + } + } + + if (a->sign != b->sign) + { + if (a->sign) + { + tfraction = -a_fraction + b_fraction; + } + else + { + tfraction = a_fraction - b_fraction; + } + if (tfraction >= 0) + { + tmp->sign = 0; + tmp->normal_exp = a_normal_exp; + tmp->fraction.ll = tfraction; + } + else + { + tmp->sign = 1; + tmp->normal_exp = a_normal_exp; + tmp->fraction.ll = -tfraction; + } + /* and renormalize it */ + + while (tmp->fraction.ll < IMPLICIT_1 && tmp->fraction.ll) + { + tmp->fraction.ll <<= 1; + tmp->normal_exp--; + } + } + else + { + tmp->sign = a->sign; + tmp->normal_exp = a_normal_exp; + tmp->fraction.ll = a_fraction + b_fraction; + } + tmp->class = CLASS_NUMBER; + /* Now the fraction is added, we have to shift down to renormalize the + number */ + + if (tmp->fraction.ll >= IMPLICIT_2) + { + LSHIFT (tmp->fraction.ll, 1); + tmp->normal_exp++; + } + return tmp; +} + +FLO_type +add (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + fp_number_type tmp; + const fp_number_type *res; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + res = _fpadd_parts (&a, &b, &tmp); + + return pack_d (res); +} + +FLO_type +sub (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + fp_number_type tmp; + const fp_number_type *res; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + b.sign ^= 1; + + res = _fpadd_parts (&a, &b, &tmp); + + return pack_d (res); +} +#endif /* L_addsub_sf || L_addsub_df */ + +#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf) +static inline __attribute__ ((__always_inline__)) const fp_number_type * +_fpmul_parts ( fp_number_type * a, + fp_number_type * b, + fp_number_type * tmp) +{ + fractype low = 0; + fractype high = 0; + + if (isnan (a)) + { + a->sign = a->sign != b->sign; + return a; + } + if (isnan (b)) + { + b->sign = a->sign != b->sign; + return b; + } + if (isinf (a)) + { + if (iszero (b)) + return makenan (); + a->sign = a->sign != b->sign; + return a; + } + if (isinf (b)) + { + if (iszero (a)) + { + return makenan (); + } + b->sign = a->sign != b->sign; + return b; + } + if (iszero (a)) + { + a->sign = a->sign != b->sign; + return a; + } + if (iszero (b)) + { + b->sign = a->sign != b->sign; + return b; + } + + /* Calculate the mantissa by multiplying both numbers to get a + twice-as-wide number. */ + { +#if defined(NO_DI_MODE) || defined(TFLOAT) + { + fractype x = a->fraction.ll; + fractype ylow = b->fraction.ll; + fractype yhigh = 0; + int bit; + + /* ??? This does multiplies one bit at a time. Optimize. */ + for (bit = 0; bit < FRAC_NBITS; bit++) + { + int carry; + + if (x & 1) + { + carry = (low += ylow) < ylow; + high += yhigh + carry; + } + yhigh <<= 1; + if (ylow & FRACHIGH) + { + yhigh |= 1; + } + ylow <<= 1; + x >>= 1; + } + } +#elif defined(FLOAT) + /* Multiplying two USIs to get a UDI, we're safe. */ + { + UDItype answer = (UDItype)a->fraction.ll * (UDItype)b->fraction.ll; + + high = answer >> BITS_PER_SI; + low = answer; + } +#else + /* fractype is DImode, but we need the result to be twice as wide. + Assuming a widening multiply from DImode to TImode is not + available, build one by hand. */ + { + USItype nl = a->fraction.ll; + USItype nh = a->fraction.ll >> BITS_PER_SI; + USItype ml = b->fraction.ll; + USItype mh = b->fraction.ll >> BITS_PER_SI; + UDItype pp_ll = (UDItype) ml * nl; + UDItype pp_hl = (UDItype) mh * nl; + UDItype pp_lh = (UDItype) ml * nh; + UDItype pp_hh = (UDItype) mh * nh; + UDItype res2 = 0; + UDItype res0 = 0; + UDItype ps_hh__ = pp_hl + pp_lh; + if (ps_hh__ < pp_hl) + res2 += (UDItype)1 << BITS_PER_SI; + pp_hl = (UDItype)(USItype)ps_hh__ << BITS_PER_SI; + res0 = pp_ll + pp_hl; + if (res0 < pp_ll) + res2++; + res2 += (ps_hh__ >> BITS_PER_SI) + pp_hh; + high = res2; + low = res0; + } +#endif + } + + tmp->normal_exp = a->normal_exp + b->normal_exp + + FRAC_NBITS - (FRACBITS + NGARDS); + tmp->sign = a->sign != b->sign; + while (high >= IMPLICIT_2) + { + tmp->normal_exp++; + if (high & 1) + { + low >>= 1; + low |= FRACHIGH; + } + high >>= 1; + } + while (high < IMPLICIT_1) + { + tmp->normal_exp--; + + high <<= 1; + if (low & FRACHIGH) + high |= 1; + low <<= 1; + } + + if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB) + { + if (high & (1 << NGARDS)) + { + /* Because we're half way, we would round to even by adding + GARDROUND + 1, except that's also done in the packing + function, and rounding twice will lose precision and cause + the result to be too far off. Example: 32-bit floats with + bit patterns 0xfff * 0x3f800400 ~= 0xfff (less than 0.5ulp + off), not 0x1000 (more than 0.5ulp off). */ + } + else if (low) + { + /* We're a further than half way by a small amount corresponding + to the bits set in "low". Knowing that, we round here and + not in pack_d, because there we don't have "low" available + anymore. */ + high += GARDROUND + 1; + + /* Avoid further rounding in pack_d. */ + high &= ~(fractype) GARDMASK; + } + } + tmp->fraction.ll = high; + tmp->class = CLASS_NUMBER; + return tmp; +} + +FLO_type +multiply (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + fp_number_type tmp; + const fp_number_type *res; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + res = _fpmul_parts (&a, &b, &tmp); + + return pack_d (res); +} +#endif /* L_mul_sf || L_mul_df || L_mul_tf */ + +#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf) +static inline __attribute__ ((__always_inline__)) const fp_number_type * +_fpdiv_parts (fp_number_type * a, + fp_number_type * b) +{ + fractype bit; + fractype numerator; + fractype denominator; + fractype quotient; + + if (isnan (a)) + { + return a; + } + if (isnan (b)) + { + return b; + } + + a->sign = a->sign ^ b->sign; + + if (isinf (a) || iszero (a)) + { + if (a->class == b->class) + return makenan (); + return a; + } + + if (isinf (b)) + { + a->fraction.ll = 0; + a->normal_exp = 0; + return a; + } + if (iszero (b)) + { + a->class = CLASS_INFINITY; + return a; + } + + /* Calculate the mantissa by multiplying both 64bit numbers to get a + 128 bit number */ + { + /* quotient = + ( numerator / denominator) * 2^(numerator exponent - denominator exponent) + */ + + a->normal_exp = a->normal_exp - b->normal_exp; + numerator = a->fraction.ll; + denominator = b->fraction.ll; + + if (numerator < denominator) + { + /* Fraction will be less than 1.0 */ + numerator *= 2; + a->normal_exp--; + } + bit = IMPLICIT_1; + quotient = 0; + /* ??? Does divide one bit at a time. Optimize. */ + while (bit) + { + if (numerator >= denominator) + { + quotient |= bit; + numerator -= denominator; + } + bit >>= 1; + numerator *= 2; + } + + if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB) + { + if (quotient & (1 << NGARDS)) + { + /* Because we're half way, we would round to even by adding + GARDROUND + 1, except that's also done in the packing + function, and rounding twice will lose precision and cause + the result to be too far off. */ + } + else if (numerator) + { + /* We're a further than half way by the small amount + corresponding to the bits set in "numerator". Knowing + that, we round here and not in pack_d, because there we + don't have "numerator" available anymore. */ + quotient += GARDROUND + 1; + + /* Avoid further rounding in pack_d. */ + quotient &= ~(fractype) GARDMASK; + } + } + + a->fraction.ll = quotient; + return (a); + } +} + +FLO_type +divide (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + const fp_number_type *res; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + res = _fpdiv_parts (&a, &b); + + return pack_d (res); +} +#endif /* L_div_sf || L_div_df */ + +#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df) \ + || defined(L_fpcmp_parts_tf) +/* according to the demo, fpcmp returns a comparison with 0... thus + a -1 + a==b -> 0 + a>b -> +1 + */ + +int +__fpcmp_parts (fp_number_type * a, fp_number_type * b) +{ +#if 0 + /* either nan -> unordered. Must be checked outside of this routine. */ + if (isnan (a) && isnan (b)) + { + return 1; /* still unordered! */ + } +#endif + + if (isnan (a) || isnan (b)) + { + return 1; /* how to indicate unordered compare? */ + } + if (isinf (a) && isinf (b)) + { + /* +inf > -inf, but +inf != +inf */ + /* b \a| +inf(0)| -inf(1) + ______\+--------+-------- + +inf(0)| a==b(0)| ab(1) | a==b(0) + -------+--------+-------- + So since unordered must be nonzero, just line up the columns... + */ + return b->sign - a->sign; + } + /* but not both... */ + if (isinf (a)) + { + return a->sign ? -1 : 1; + } + if (isinf (b)) + { + return b->sign ? 1 : -1; + } + if (iszero (a) && iszero (b)) + { + return 0; + } + if (iszero (a)) + { + return b->sign ? 1 : -1; + } + if (iszero (b)) + { + return a->sign ? -1 : 1; + } + /* now both are "normal". */ + if (a->sign != b->sign) + { + /* opposite signs */ + return a->sign ? -1 : 1; + } + /* same sign; exponents? */ + if (a->normal_exp > b->normal_exp) + { + return a->sign ? -1 : 1; + } + if (a->normal_exp < b->normal_exp) + { + return a->sign ? 1 : -1; + } + /* same exponents; check size. */ + if (a->fraction.ll > b->fraction.ll) + { + return a->sign ? -1 : 1; + } + if (a->fraction.ll < b->fraction.ll) + { + return a->sign ? 1 : -1; + } + /* after all that, they're equal. */ + return 0; +} +#endif + +#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compoare_tf) +CMPtype +compare (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + return __fpcmp_parts (&a, &b); +} +#endif /* L_compare_sf || L_compare_df */ + +/* These should be optimized for their specific tasks someday. */ + +#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf) +CMPtype +_eq_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + if (isnan (&a) || isnan (&b)) + return 1; /* false, truth == 0 */ + + return __fpcmp_parts (&a, &b) ; +} +#endif /* L_eq_sf || L_eq_df */ + +#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf) +CMPtype +_ne_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + if (isnan (&a) || isnan (&b)) + return 1; /* true, truth != 0 */ + + return __fpcmp_parts (&a, &b) ; +} +#endif /* L_ne_sf || L_ne_df */ + +#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf) +CMPtype +_gt_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + if (isnan (&a) || isnan (&b)) + return -1; /* false, truth > 0 */ + + return __fpcmp_parts (&a, &b); +} +#endif /* L_gt_sf || L_gt_df */ + +#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf) +CMPtype +_ge_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + if (isnan (&a) || isnan (&b)) + return -1; /* false, truth >= 0 */ + return __fpcmp_parts (&a, &b) ; +} +#endif /* L_ge_sf || L_ge_df */ + +#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf) +CMPtype +_lt_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + if (isnan (&a) || isnan (&b)) + return 1; /* false, truth < 0 */ + + return __fpcmp_parts (&a, &b); +} +#endif /* L_lt_sf || L_lt_df */ + +#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf) +CMPtype +_le_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + if (isnan (&a) || isnan (&b)) + return 1; /* false, truth <= 0 */ + + return __fpcmp_parts (&a, &b) ; +} +#endif /* L_le_sf || L_le_df */ + +#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf) +CMPtype +_unord_f2 (FLO_type arg_a, FLO_type arg_b) +{ + fp_number_type a; + fp_number_type b; + FLO_union_type au, bu; + + au.value = arg_a; + bu.value = arg_b; + + unpack_d (&au, &a); + unpack_d (&bu, &b); + + return (isnan (&a) || isnan (&b)); +} +#endif /* L_unord_sf || L_unord_df */ + +#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf) +FLO_type +si_to_float (SItype arg_a) +{ + fp_number_type in; + + in.class = CLASS_NUMBER; + in.sign = arg_a < 0; + if (!arg_a) + { + in.class = CLASS_ZERO; + } + else + { + USItype uarg; + int shift; + in.normal_exp = FRACBITS + NGARDS; + if (in.sign) + { + /* Special case for minint, since there is no +ve integer + representation for it */ + if (arg_a == (- MAX_SI_INT - 1)) + { + return (FLO_type)(- MAX_SI_INT - 1); + } + uarg = (-arg_a); + } + else + uarg = arg_a; + + in.fraction.ll = uarg; + shift = clzusi (uarg) - (BITS_PER_SI - 1 - FRACBITS - NGARDS); + if (shift > 0) + { + in.fraction.ll <<= shift; + in.normal_exp -= shift; + } + } + return pack_d (&in); +} +#endif /* L_si_to_sf || L_si_to_df */ + +#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf) +FLO_type +usi_to_float (USItype arg_a) +{ + fp_number_type in; + + in.sign = 0; + if (!arg_a) + { + in.class = CLASS_ZERO; + } + else + { + int shift; + in.class = CLASS_NUMBER; + in.normal_exp = FRACBITS + NGARDS; + in.fraction.ll = arg_a; + + shift = clzusi (arg_a) - (BITS_PER_SI - 1 - FRACBITS - NGARDS); + if (shift < 0) + { + fractype guard = in.fraction.ll & (((fractype)1 << -shift) - 1); + in.fraction.ll >>= -shift; + in.fraction.ll |= (guard != 0); + in.normal_exp -= shift; + } + else if (shift > 0) + { + in.fraction.ll <<= shift; + in.normal_exp -= shift; + } + } + return pack_d (&in); +} +#endif + +#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si) +SItype +float_to_si (FLO_type arg_a) +{ + fp_number_type a; + SItype tmp; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &a); + + if (iszero (&a)) + return 0; + if (isnan (&a)) + return 0; + /* get reasonable MAX_SI_INT... */ + if (isinf (&a)) + return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT; + /* it is a number, but a small one */ + if (a.normal_exp < 0) + return 0; + if (a.normal_exp > BITS_PER_SI - 2) + return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT; + tmp = a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp); + return a.sign ? (-tmp) : (tmp); +} +#endif /* L_sf_to_si || L_df_to_si */ + +#if defined(L_tf_to_usi) +USItype +float_to_usi (FLO_type arg_a) +{ + fp_number_type a; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &a); + + if (iszero (&a)) + return 0; + if (isnan (&a)) + return 0; + /* it is a negative number */ + if (a.sign) + return 0; + /* get reasonable MAX_USI_INT... */ + if (isinf (&a)) + return MAX_USI_INT; + /* it is a number, but a small one */ + if (a.normal_exp < 0) + return 0; + if (a.normal_exp > BITS_PER_SI - 1) + return MAX_USI_INT; + else if (a.normal_exp > (FRACBITS + NGARDS)) + return a.fraction.ll << (a.normal_exp - (FRACBITS + NGARDS)); + else + return a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp); +} +#endif /* L_tf_to_usi */ + +#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf) +FLO_type +negate (FLO_type arg_a) +{ + fp_number_type a; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &a); + + flip_sign (&a); + return pack_d (&a); +} +#endif /* L_negate_sf || L_negate_df */ + +#ifdef FLOAT + +#if defined(L_make_sf) +SFtype +__make_fp(fp_class_type class, + unsigned int sign, + int exp, + USItype frac) +{ + fp_number_type in; + + in.class = class; + in.sign = sign; + in.normal_exp = exp; + in.fraction.ll = frac; + return pack_d (&in); +} +#endif /* L_make_sf */ + +#ifndef FLOAT_ONLY + +/* This enables one to build an fp library that supports float but not double. + Otherwise, we would get an undefined reference to __make_dp. + This is needed for some 8-bit ports that can't handle well values that + are 8-bytes in size, so we just don't support double for them at all. */ + +#if defined(L_sf_to_df) +DFtype +sf_to_df (SFtype arg_a) +{ + fp_number_type in; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &in); + + return __make_dp (in.class, in.sign, in.normal_exp, + ((UDItype) in.fraction.ll) << F_D_BITOFF); +} +#endif /* L_sf_to_df */ + +#if defined(L_sf_to_tf) && defined(TMODES) +TFtype +sf_to_tf (SFtype arg_a) +{ + fp_number_type in; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &in); + + return __make_tp (in.class, in.sign, in.normal_exp, + ((UTItype) in.fraction.ll) << F_T_BITOFF); +} +#endif /* L_sf_to_df */ + +#endif /* ! FLOAT_ONLY */ +#endif /* FLOAT */ + +#ifndef FLOAT + +extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype); + +#if defined(L_make_df) +DFtype +__make_dp (fp_class_type class, unsigned int sign, int exp, UDItype frac) +{ + fp_number_type in; + + in.class = class; + in.sign = sign; + in.normal_exp = exp; + in.fraction.ll = frac; + return pack_d (&in); +} +#endif /* L_make_df */ + +#if defined(L_df_to_sf) +SFtype +df_to_sf (DFtype arg_a) +{ + fp_number_type in; + USItype sffrac; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &in); + + sffrac = in.fraction.ll >> F_D_BITOFF; + + /* We set the lowest guard bit in SFFRAC if we discarded any non + zero bits. */ + if ((in.fraction.ll & (((USItype) 1 << F_D_BITOFF) - 1)) != 0) + sffrac |= 1; + + return __make_fp (in.class, in.sign, in.normal_exp, sffrac); +} +#endif /* L_df_to_sf */ + +#if defined(L_df_to_tf) && defined(TMODES) \ + && !defined(FLOAT) && !defined(TFLOAT) +TFtype +df_to_tf (DFtype arg_a) +{ + fp_number_type in; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &in); + + return __make_tp (in.class, in.sign, in.normal_exp, + ((UTItype) in.fraction.ll) << D_T_BITOFF); +} +#endif /* L_sf_to_df */ + +#ifdef TFLOAT +#if defined(L_make_tf) +TFtype +__make_tp(fp_class_type class, + unsigned int sign, + int exp, + UTItype frac) +{ + fp_number_type in; + + in.class = class; + in.sign = sign; + in.normal_exp = exp; + in.fraction.ll = frac; + return pack_d (&in); +} +#endif /* L_make_tf */ + +#if defined(L_tf_to_df) +DFtype +tf_to_df (TFtype arg_a) +{ + fp_number_type in; + UDItype sffrac; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &in); + + sffrac = in.fraction.ll >> D_T_BITOFF; + + /* We set the lowest guard bit in SFFRAC if we discarded any non + zero bits. */ + if ((in.fraction.ll & (((UTItype) 1 << D_T_BITOFF) - 1)) != 0) + sffrac |= 1; + + return __make_dp (in.class, in.sign, in.normal_exp, sffrac); +} +#endif /* L_tf_to_df */ + +#if defined(L_tf_to_sf) +SFtype +tf_to_sf (TFtype arg_a) +{ + fp_number_type in; + USItype sffrac; + FLO_union_type au; + + au.value = arg_a; + unpack_d (&au, &in); + + sffrac = in.fraction.ll >> F_T_BITOFF; + + /* We set the lowest guard bit in SFFRAC if we discarded any non + zero bits. */ + if ((in.fraction.ll & (((UTItype) 1 << F_T_BITOFF) - 1)) != 0) + sffrac |= 1; + + return __make_fp (in.class, in.sign, in.normal_exp, sffrac); +} +#endif /* L_tf_to_sf */ +#endif /* TFLOAT */ + +#endif /* ! FLOAT */ +#endif /* !EXTENDED_FLOAT_STUBS */ diff --git a/gcc/config/fp-bit.h b/gcc/config/fp-bit.h new file mode 100644 index 000000000..f0b07e91f --- /dev/null +++ b/gcc/config/fp-bit.h @@ -0,0 +1,499 @@ +/* Header file for fp-bit.c. */ +/* Copyright (C) 2000, 2002, 2003, 2006, 2009, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef GCC_FP_BIT_H +#define GCC_FP_BIT_H + +/* Defining FINE_GRAINED_LIBRARIES allows one to select which routines + from this file are compiled via additional -D options. + + This avoids the need to pull in the entire fp emulation library + when only a small number of functions are needed. + + If FINE_GRAINED_LIBRARIES is not defined, then compile every + suitable routine. */ +#ifndef FINE_GRAINED_LIBRARIES +#define L_pack_df +#define L_unpack_df +#define L_pack_sf +#define L_unpack_sf +#define L_addsub_sf +#define L_addsub_df +#define L_mul_sf +#define L_mul_df +#define L_div_sf +#define L_div_df +#define L_fpcmp_parts_sf +#define L_fpcmp_parts_df +#define L_compare_sf +#define L_compare_df +#define L_eq_sf +#define L_eq_df +#define L_ne_sf +#define L_ne_df +#define L_gt_sf +#define L_gt_df +#define L_ge_sf +#define L_ge_df +#define L_lt_sf +#define L_lt_df +#define L_le_sf +#define L_le_df +#define L_unord_sf +#define L_unord_df +#define L_usi_to_sf +#define L_usi_to_df +#define L_si_to_sf +#define L_si_to_df +#define L_sf_to_si +#define L_df_to_si +#define L_f_to_usi +#define L_df_to_usi +#define L_negate_sf +#define L_negate_df +#define L_make_sf +#define L_make_df +#define L_sf_to_df +#define L_df_to_sf +#ifdef FLOAT +#define L_thenan_sf +#else +#define L_thenan_df +#endif +#endif /* ! FINE_GRAINED_LIBRARIES */ + +#if __LDBL_MANT_DIG__ == 113 || __LDBL_MANT_DIG__ == 106 +# if defined(TFLOAT) || defined(L_sf_to_tf) || defined(L_df_to_tf) +# define TMODES +# endif +#endif + +typedef float SFtype __attribute__ ((mode (SF))); +typedef float DFtype __attribute__ ((mode (DF))); +#ifdef TMODES +typedef float TFtype __attribute__ ((mode (TF))); +#endif + +typedef int HItype __attribute__ ((mode (HI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +#ifdef TMODES +typedef int TItype __attribute__ ((mode (TI))); +#endif + +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +#ifndef CMPtype +typedef int CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#endif + +typedef unsigned int UHItype __attribute__ ((mode (HI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +#ifdef TMODES +typedef unsigned int UTItype __attribute__ ((mode (TI))); +#endif + +#define MAX_USI_INT (~(USItype)0) +#define MAX_SI_INT ((SItype) (MAX_USI_INT >> 1)) +#define BITS_PER_SI (4 * BITS_PER_UNIT) +#ifdef TMODES +#define MAX_UDI_INT (~(UDItype)0) +#define MAX_DI_INT ((DItype) (MAX_UDI_INT >> 1)) +#define BITS_PER_DI (8 * BITS_PER_UNIT) +#endif + +#ifdef FLOAT_ONLY +#define NO_DI_MODE +#endif + +#ifdef TFLOAT +# ifndef TMODES +# error "TFLOAT requires long double to have 113 bits of mantissa" +# endif + +# define PREFIXFPDP tp +# define PREFIXSFDF tf +# define NGARDS 10L /* Is this right? */ +# define GARDROUND 0x1ff +# define GARDMASK 0x3ff +# define GARDMSB 0x200 +# define FRAC_NBITS 128 + +# if __LDBL_MANT_DIG__ == 113 /* IEEE quad */ +# define EXPBITS 15 +# define EXPBIAS 16383 +# define EXPMAX (0x7fff) +# define QUIET_NAN ((TItype)0x8 << 108) +# define FRACHIGH ((TItype)0x8 << 124) +# define FRACHIGH2 ((TItype)0xc << 124) +# define FRACBITS 112 +# endif + +# if __LDBL_MANT_DIG__ == 106 /* IBM extended (double+double) */ +# define EXPBITS 11 +# define EXPBIAS 1023 +# define EXPMAX (0x7ff) +# define QUIET_NAN ((TItype)0x8 << (48 + 64)) +# define FRACHIGH ((TItype)0x8 << 124) +# define FRACHIGH2 ((TItype)0xc << 124) +# define FRACBITS 105 +# define HALFFRACBITS 52 +# define HALFSHIFT 64 +# endif + +# define pack_d __pack_t +# define unpack_d __unpack_t +# define __fpcmp_parts __fpcmp_parts_t + typedef UTItype fractype; + typedef UDItype halffractype; + typedef USItype qrtrfractype; +#define qrtrfractype qrtrfractype + typedef TFtype FLO_type; + typedef TItype intfrac; +#elif defined FLOAT +# define NGARDS 7L +# define GARDROUND 0x3f +# define GARDMASK 0x7f +# define GARDMSB 0x40 +# define EXPBITS 8 +# define EXPBIAS 127 +# define FRACBITS 23 +# define EXPMAX (0xff) +# define QUIET_NAN 0x100000L +# define FRAC_NBITS 32 +# define FRACHIGH 0x80000000L +# define FRACHIGH2 0xc0000000L +# define pack_d __pack_f +# define unpack_d __unpack_f +# define __fpcmp_parts __fpcmp_parts_f + typedef USItype fractype; + typedef UHItype halffractype; + typedef SFtype FLO_type; + typedef SItype intfrac; + +#else +# define PREFIXFPDP dp +# define PREFIXSFDF df +# define NGARDS 8L +# define GARDROUND 0x7f +# define GARDMASK 0xff +# define GARDMSB 0x80 +# define EXPBITS 11 +# define EXPBIAS 1023 +# define FRACBITS 52 +# define EXPMAX (0x7ff) +# define QUIET_NAN 0x8000000000000LL +# define FRAC_NBITS 64 +# define FRACHIGH 0x8000000000000000LL +# define FRACHIGH2 0xc000000000000000LL +# define pack_d __pack_d +# define unpack_d __unpack_d +# define __fpcmp_parts __fpcmp_parts_d + typedef UDItype fractype; + typedef USItype halffractype; + typedef DFtype FLO_type; + typedef DItype intfrac; +#endif /* FLOAT */ + +#ifdef TFLOAT +# define add __addtf3 +# define sub __subtf3 +# define multiply __multf3 +# define divide __divtf3 +# define compare __cmptf2 +# define _eq_f2 __eqtf2 +# define _ne_f2 __netf2 +# define _gt_f2 __gttf2 +# define _ge_f2 __getf2 +# define _lt_f2 __lttf2 +# define _le_f2 __letf2 +# define _unord_f2 __unordtf2 +# define usi_to_float __floatunsitf +# define si_to_float __floatsitf +# define float_to_si __fixtfsi +# define float_to_usi __fixunstfsi +# define negate __negtf2 +# define tf_to_sf __trunctfsf2 +# define tf_to_df __trunctfdf2 +#elif defined FLOAT +# define add __addsf3 +# define sub __subsf3 +# define multiply __mulsf3 +# define divide __divsf3 +# define compare __cmpsf2 +# define _eq_f2 __eqsf2 +# define _ne_f2 __nesf2 +# define _gt_f2 __gtsf2 +# define _ge_f2 __gesf2 +# define _lt_f2 __ltsf2 +# define _le_f2 __lesf2 +# define _unord_f2 __unordsf2 +# define usi_to_float __floatunsisf +# define si_to_float __floatsisf +# define float_to_si __fixsfsi +# define float_to_usi __fixunssfsi +# define negate __negsf2 +# define sf_to_df __extendsfdf2 +# define sf_to_tf __extendsftf2 +#else +# define add __adddf3 +# define sub __subdf3 +# define multiply __muldf3 +# define divide __divdf3 +# define compare __cmpdf2 +# define _eq_f2 __eqdf2 +# define _ne_f2 __nedf2 +# define _gt_f2 __gtdf2 +# define _ge_f2 __gedf2 +# define _lt_f2 __ltdf2 +# define _le_f2 __ledf2 +# define _unord_f2 __unorddf2 +# define usi_to_float __floatunsidf +# define si_to_float __floatsidf +# define float_to_si __fixdfsi +# define float_to_usi __fixunsdfsi +# define negate __negdf2 +# define df_to_sf __truncdfsf2 +# define df_to_tf __extenddftf2 +#endif /* FLOAT */ + +#ifndef INLINE +#define INLINE __inline__ +#endif + +/* Preserve the sticky-bit when shifting fractions to the right. */ +#define LSHIFT(a, s) { a = (a >> s) | !!(a & (((fractype) 1 << s) - 1)); } + +/* numeric parameters */ +/* F_D_BITOFF is the number of bits offset between the MSB of the mantissa + of a float and of a double. Assumes there are only two float types. + (double::FRAC_BITS+double::NGARDS-(float::FRAC_BITS-float::NGARDS)) + */ +#define F_D_BITOFF (52+8-(23+7)) + +#ifdef TMODES +# define F_T_BITOFF (__LDBL_MANT_DIG__-1+10-(23+7)) +# define D_T_BITOFF (__LDBL_MANT_DIG__-1+10-(52+8)) +#endif + + +#define NORMAL_EXPMIN (-(EXPBIAS)+1) +#define IMPLICIT_1 ((fractype)1<<(FRACBITS+NGARDS)) +#define IMPLICIT_2 ((fractype)1<<(FRACBITS+1+NGARDS)) + +/* common types */ + +typedef enum +{ + CLASS_SNAN, + CLASS_QNAN, + CLASS_ZERO, + CLASS_NUMBER, + CLASS_INFINITY +} fp_class_type; + +typedef struct +{ +#ifdef SMALL_MACHINE + char class; + unsigned char sign; + short normal_exp; +#else + fp_class_type class; + unsigned int sign; + int normal_exp; +#endif + + union + { + fractype ll; + halffractype l[2]; + } fraction; +} fp_number_type; + +typedef union +{ + FLO_type value; + fractype value_raw; + +#ifndef FLOAT +# ifdef qrtrfractype + qrtrfractype qwords[4]; +# else + halffractype words[2]; +# endif +#endif + +#ifdef FLOAT_BIT_ORDER_MISMATCH + struct + { + fractype fraction:FRACBITS __attribute__ ((packed)); + unsigned int exp:EXPBITS __attribute__ ((packed)); + unsigned int sign:1 __attribute__ ((packed)); + } + bits; +#endif + +#ifdef _DEBUG_BITFLOAT + struct + { + unsigned int sign:1 __attribute__ ((packed)); + unsigned int exp:EXPBITS __attribute__ ((packed)); + fractype fraction:FRACBITS __attribute__ ((packed)); + } + bits_big_endian; + + struct + { + fractype fraction:FRACBITS __attribute__ ((packed)); + unsigned int exp:EXPBITS __attribute__ ((packed)); + unsigned int sign:1 __attribute__ ((packed)); + } + bits_little_endian; +#endif +} +FLO_union_type; + +/* Prototypes. */ + +#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf) +extern FLO_type pack_d (const fp_number_type *); +#endif + +extern void unpack_d (FLO_union_type *, fp_number_type *); + +#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf) +extern FLO_type add (FLO_type, FLO_type); +extern FLO_type sub (FLO_type, FLO_type); +#endif + +#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf) +extern FLO_type multiply (FLO_type, FLO_type); +#endif + +#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf) +extern FLO_type divide (FLO_type, FLO_type); +#endif + +extern int __fpcmp_parts (fp_number_type *, fp_number_type *); + +#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compare_tf) +extern CMPtype compare (FLO_type, FLO_type); +#endif + +#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf) +extern CMPtype _eq_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf) +extern CMPtype _ne_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf) +extern CMPtype _gt_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf) +extern CMPtype _ge_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf) +extern CMPtype _lt_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf) +extern CMPtype _le_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf) +extern CMPtype _unord_f2 (FLO_type, FLO_type); +#endif + +#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf) +extern FLO_type si_to_float (SItype); +#endif + +#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si) +extern SItype float_to_si (FLO_type); +#endif + +#if defined(L_tf_to_usi) +extern USItype float_to_usi (FLO_type); +#endif + +#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf) +extern FLO_type usi_to_float (USItype); +#endif + +#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf) +extern FLO_type negate (FLO_type); +#endif + +#ifdef FLOAT +#if defined(L_make_sf) +extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype); +#endif +#ifndef FLOAT_ONLY +extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype); +#if defined(L_sf_to_df) +extern DFtype sf_to_df (SFtype); +#endif +#if defined(L_sf_to_tf) && defined(TMODES) +extern TFtype sf_to_tf (SFtype); +#endif +#endif /* ! FLOAT_ONLY */ +#endif /* FLOAT */ + +#ifndef FLOAT +extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype); +#if defined(L_make_df) +extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype); +#endif +#if defined(L_df_to_sf) +extern SFtype df_to_sf (DFtype); +#endif +#if defined(L_df_to_tf) && defined(TMODES) +extern TFtype df_to_tf (DFtype); +#endif +#endif /* ! FLOAT */ + +#ifdef TMODES +extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype); +extern TFtype __make_tp (fp_class_type, unsigned int, int, UTItype); +#ifdef TFLOAT +#if defined(L_tf_to_sf) +extern SFtype tf_to_sf (TFtype); +#endif +#if defined(L_tf_to_df) +extern DFtype tf_to_df (TFtype); +#endif +#if defined(L_di_to_tf) +extern TFtype di_to_df (DItype); +#endif +#endif /* TFLOAT */ +#endif /* TMODES */ + +#endif /* ! GCC_FP_BIT_H */ diff --git a/gcc/config/fr30/constraints.md b/gcc/config/fr30/constraints.md new file mode 100644 index 000000000..2fb049159 --- /dev/null +++ b/gcc/config/fr30/constraints.md @@ -0,0 +1,72 @@ +;; Constraint definitions for the FR30. +;; Copyright (C) 2011 +;; Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints. +(define_register_constraint "d" "MULTIPLY_64_REG" + "The MDH,MDL register pair as used by MUL and MULU.") + +(define_register_constraint "e" "MULTIPLY_32_REG" + "The MDL register as used by MULH and MULUH.") + +(define_register_constraint "h" "HIGH_REGS" + "Registers 8 through 15.") + +(define_register_constraint "l" "LOW_REGS" + "Registers 0 through 7.") + +(define_register_constraint "a" "ALL_REGS" + "@internal") + +;; Integer constraints. +(define_constraint "I" + "An integer in the range 0 to 15." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 15)"))) + +(define_constraint "J" + "An integer in the range -16 to -1." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -16, -1)"))) + +(define_constraint "K" + "An integer in the range 16 to 31." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 16, 31)"))) + +(define_constraint "L" + "An integer in the range 0 to 255." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 255)"))) + +(define_constraint "M" + "An integer in the range 0 to 1048575." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 1048575)"))) + +(define_constraint "P" + "An integer in the range -256 to 255." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -256, 255)"))) + +;; Extra constraints. +(define_constraint "Q" + "@internal" + (and (match_code "mem") + (match_code "symbol_ref" "0"))) diff --git a/gcc/config/fr30/crti.asm b/gcc/config/fr30/crti.asm new file mode 100644 index 000000000..4ce61231b --- /dev/null +++ b/gcc/config/fr30/crti.asm @@ -0,0 +1,61 @@ +# crti.s for ELF + +# Copyright (C) 1992, 1998, 1999, 2008, 2009 Free Software Foundation, Inc. +# Written By David Vinayak Henkel-Wallace, June 1992 +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +# This file just make a stack frame for the contents of the .fini and +# .init sections. Users may put any desired instructions in those +# sections. + + .section ".init" + .global _init + .type _init,#function + .align 4 +_init: + st rp, @-r15 + enter #4 + + # These nops are here to align the end of this code with a 16 byte + # boundary. The linker will start inserting code into the .init + # section at such a boundary. + + nop + nop + nop + nop + nop + nop + + + .section ".fini" + .global _fini + .type _fini,#function + .align 4 +_fini: + st rp, @-r15 + enter #4 + nop + nop + nop + nop + nop + nop diff --git a/gcc/config/fr30/crtn.asm b/gcc/config/fr30/crtn.asm new file mode 100644 index 000000000..ac2712186 --- /dev/null +++ b/gcc/config/fr30/crtn.asm @@ -0,0 +1,44 @@ +# crtn.asm for ELF + +# Copyright (C) 1992, 1999, 2008, 2009 Free Software Foundation, Inc. +# Written By David Vinayak Henkel-Wallace, June 1992 +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# . + +# This file just makes sure that the .fini and .init sections do in +# fact return. Users may put any desired instructions in those sections. +# This file is the last thing linked into any executable. + + .section ".init" + .align 4 + + leave + ld @r15+,rp + ret + + + .section ".fini" + .align 4 + + leave + ld @r15+,rp + ret + +# Th-th-th-that is all folks! diff --git a/gcc/config/fr30/fr30-protos.h b/gcc/config/fr30/fr30-protos.h new file mode 100644 index 000000000..45bb3444e --- /dev/null +++ b/gcc/config/fr30/fr30-protos.h @@ -0,0 +1,35 @@ +/* Prototypes for fr30.c functions used in the md file & elsewhere. + Copyright (C) 1999, 2000, 2002, 2004, 2007, 2010 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern void fr30_expand_prologue (void); +extern void fr30_expand_epilogue (void); +extern unsigned int fr30_compute_frame_size (int, int); + +#ifdef RTX_CODE +extern int fr30_check_multiple_regs (rtx *, int, int); +extern void fr30_print_operand (FILE *, rtx, int); +extern void fr30_print_operand_address (FILE *, rtx); +extern rtx fr30_move_double (rtx *); +#ifdef HAVE_MACHINE_MODES +#define Mmode enum machine_mode +extern int fr30_const_double_is_zero (rtx); +#undef Mmode +#endif /* HAVE_MACHINE_MODES */ +#endif /* RTX_CODE */ diff --git a/gcc/config/fr30/fr30.c b/gcc/config/fr30/fr30.c new file mode 100644 index 000000000..74585b5dc --- /dev/null +++ b/gcc/config/fr30/fr30.c @@ -0,0 +1,1066 @@ +/* FR30 specific functions. + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2009, + 2010 Free Software Foundation, Inc. + Contributed by Cygnus Solutions. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/*{{{ Includes */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "tree.h" +#include "output.h" +#include "expr.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "df.h" +#include "diagnostic-core.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" + +/*}}}*/ +/*{{{ Function Prologues & Epilogues */ + +/* The FR30 stack looks like this: + + Before call After call + FP ->| | | | + +-----------------------+ +-----------------------+ high + | | | | memory + | local variables, | | local variables, | + | reg save area, etc. | | reg save area, etc. | + | | | | + +-----------------------+ +-----------------------+ + | | | | + | args to the func that | | args to this func. | + | is being called that | | | + SP ->| do not fit in regs | | | + +-----------------------+ +-----------------------+ + | args that used to be | \ + | in regs; only created | | pretend_size + AP-> | for vararg funcs | / + +-----------------------+ + | | \ + | register save area | | + | | | + +-----------------------+ | reg_size + | return address | | + +-----------------------+ | + FP ->| previous frame ptr | / + +-----------------------+ + | | \ + | local variables | | var_size + | | / + +-----------------------+ + | | \ + low | room for args to | | + memory | other funcs called | | args_size + | from this one | | + SP ->| | / + +-----------------------+ + + Note, AP is a fake hard register. It will be eliminated in favor of + SP or FP as appropriate. + + Note, Some or all of the stack sections above may be omitted if they + are not needed. */ + +/* Structure to be filled in by fr30_compute_frame_size() with register + save masks, and offsets for the current function. */ +struct fr30_frame_info +{ + unsigned int total_size; /* # Bytes that the entire frame takes up. */ + unsigned int pretend_size; /* # Bytes we push and pretend caller did. */ + unsigned int args_size; /* # Bytes that outgoing arguments take up. */ + unsigned int reg_size; /* # Bytes needed to store regs. */ + unsigned int var_size; /* # Bytes that variables take up. */ + unsigned int frame_size; /* # Bytes in current frame. */ + unsigned int gmask; /* Mask of saved registers. */ + unsigned int save_fp; /* Nonzero if frame pointer must be saved. */ + unsigned int save_rp; /* Nonzero if return pointer must be saved. */ + int initialised; /* Nonzero if frame size already calculated. */ +}; + +/* Current frame information calculated by fr30_compute_frame_size(). */ +static struct fr30_frame_info current_frame_info; + +/* Zero structure to initialize current_frame_info. */ +static struct fr30_frame_info zero_frame_info; + +static void fr30_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, + tree, int *, int); +static bool fr30_must_pass_in_stack (enum machine_mode, const_tree); +static int fr30_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, + tree, bool); +static rtx fr30_function_arg (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static void fr30_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, + const_tree, bool); +static bool fr30_frame_pointer_required (void); +static rtx fr30_function_value (const_tree, const_tree, bool); +static rtx fr30_libcall_value (enum machine_mode, const_rtx); +static bool fr30_function_value_regno_p (const unsigned int); +static bool fr30_can_eliminate (const int, const int); +static void fr30_asm_trampoline_template (FILE *); +static void fr30_trampoline_init (rtx, tree, rtx); +static int fr30_num_arg_regs (enum machine_mode, const_tree); + +#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) +#define RETURN_POINTER_MASK (1 << (RETURN_POINTER_REGNUM)) + +/* Tell prologue and epilogue if register REGNO should be saved / restored. + The return address and frame pointer are treated separately. + Don't consider them here. */ +#define MUST_SAVE_REGISTER(regno) \ + ( (regno) != RETURN_POINTER_REGNUM \ + && (regno) != FRAME_POINTER_REGNUM \ + && df_regs_ever_live_p (regno) \ + && ! call_used_regs [regno] ) + +#define MUST_SAVE_FRAME_POINTER (df_regs_ever_live_p (FRAME_POINTER_REGNUM) || frame_pointer_needed) +#define MUST_SAVE_RETURN_POINTER (df_regs_ever_live_p (RETURN_POINTER_REGNUM) || crtl->profile) + +#if UNITS_PER_WORD == 4 +#define WORD_ALIGN(SIZE) (((SIZE) + 3) & ~3) +#endif + +/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ +static const struct default_options fr30_option_optimization_table[] = + { + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + +/* Initialize the GCC target structure. */ +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES fr30_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG fr30_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE fr30_function_arg_advance + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE fr30_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE fr30_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P fr30_function_value_regno_p + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS fr30_setup_incoming_varargs +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK fr30_must_pass_in_stack + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED fr30_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE fr30_can_eliminate + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE fr30_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT fr30_trampoline_init + +#undef TARGET_EXCEPT_UNWIND_INFO +#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE fr30_option_optimization_table + +struct gcc_target targetm = TARGET_INITIALIZER; + + +/* Worker function for TARGET_CAN_ELIMINATE. */ + +bool +fr30_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == FRAME_POINTER_REGNUM || ! frame_pointer_needed); +} + +/* Returns the number of bytes offset between FROM_REG and TO_REG + for the current function. As a side effect it fills in the + current_frame_info structure, if the data is available. */ +unsigned int +fr30_compute_frame_size (int from_reg, int to_reg) +{ + int regno; + unsigned int return_value; + unsigned int var_size; + unsigned int args_size; + unsigned int pretend_size; + unsigned int reg_size; + unsigned int gmask; + + var_size = WORD_ALIGN (get_frame_size ()); + args_size = WORD_ALIGN (crtl->outgoing_args_size); + pretend_size = crtl->args.pretend_args_size; + + reg_size = 0; + gmask = 0; + + /* Calculate space needed for registers. */ + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++) + { + if (MUST_SAVE_REGISTER (regno)) + { + reg_size += UNITS_PER_WORD; + gmask |= 1 << regno; + } + } + + current_frame_info.save_fp = MUST_SAVE_FRAME_POINTER; + current_frame_info.save_rp = MUST_SAVE_RETURN_POINTER; + + reg_size += (current_frame_info.save_fp + current_frame_info.save_rp) + * UNITS_PER_WORD; + + /* Save computed information. */ + current_frame_info.pretend_size = pretend_size; + current_frame_info.var_size = var_size; + current_frame_info.args_size = args_size; + current_frame_info.reg_size = reg_size; + current_frame_info.frame_size = args_size + var_size; + current_frame_info.total_size = args_size + var_size + reg_size + pretend_size; + current_frame_info.gmask = gmask; + current_frame_info.initialised = reload_completed; + + /* Calculate the required distance. */ + return_value = 0; + + if (to_reg == STACK_POINTER_REGNUM) + return_value += args_size + var_size; + + if (from_reg == ARG_POINTER_REGNUM) + return_value += reg_size; + + return return_value; +} + +/* Called after register allocation to add any instructions needed for the + prologue. Using a prologue insn is favored compared to putting all of the + instructions in output_function_prologue(), since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. */ + +void +fr30_expand_prologue (void) +{ + int regno; + rtx insn; + + if (! current_frame_info.initialised) + fr30_compute_frame_size (0, 0); + + /* This cases shouldn't happen. Catch it now. */ + gcc_assert (current_frame_info.total_size || !current_frame_info.gmask); + + /* Allocate space for register arguments if this is a variadic function. */ + if (current_frame_info.pretend_size) + { + int regs_to_save = current_frame_info.pretend_size / UNITS_PER_WORD; + + /* Push argument registers into the pretend arg area. */ + for (regno = FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS; regno --, regs_to_save --;) + { + insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (current_frame_info.gmask) + { + /* Save any needed call-saved regs. */ + for (regno = STACK_POINTER_REGNUM; regno--;) + { + if ((current_frame_info.gmask & (1 << regno)) != 0) + { + insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + + /* Save return address if necessary. */ + if (current_frame_info.save_rp) + { + insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, + RETURN_POINTER_REGNUM))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Save old frame pointer and create new one, if necessary. */ + if (current_frame_info.save_fp) + { + if (current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD)) + { + int enter_size = current_frame_info.frame_size + UNITS_PER_WORD; + rtx pattern; + + insn = emit_insn (gen_enter_func (GEN_INT (enter_size))); + RTX_FRAME_RELATED_P (insn) = 1; + + pattern = PATTERN (insn); + + /* Also mark all 3 subexpressions as RTX_FRAME_RELATED_P. */ + if (GET_CODE (pattern) == PARALLEL) + { + int x; + for (x = XVECLEN (pattern, 0); x--;) + { + rtx part = XVECEXP (pattern, 0, x); + + /* One of the insns in the ENTER pattern updates the + frame pointer. If we do not actually need the frame + pointer in this function then this is a side effect + rather than a desired effect, so we do not mark that + insn as being related to the frame set up. Doing this + allows us to compile the crash66.C test file in the + G++ testsuite. */ + if (! frame_pointer_needed + && GET_CODE (part) == SET + && SET_DEST (part) == hard_frame_pointer_rtx) + RTX_FRAME_RELATED_P (part) = 0; + else + RTX_FRAME_RELATED_P (part) = 1; + } + } + } + else + { + insn = emit_insn (gen_movsi_push (frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + if (frame_pointer_needed) + { + insn = emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + + /* Allocate the stack frame. */ + if (current_frame_info.frame_size == 0) + ; /* Nothing to do. */ + else if (current_frame_info.save_fp + && current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD)) + ; /* Nothing to do. */ + else if (current_frame_info.frame_size <= 512) + { + insn = emit_insn (gen_add_to_stack + (GEN_INT (- (signed) current_frame_info.frame_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM); + insn = emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size))); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (crtl->profile) + emit_insn (gen_blockage ()); +} + +/* Called after register allocation to add any instructions needed for the + epilogue. Using an epilogue insn is favored compared to putting all of the + instructions in output_function_epilogue(), since it allows the scheduler + to intermix instructions with the restores of the caller saved registers. + In some cases, it might be necessary to emit a barrier instruction as the + first insn to prevent such scheduling. */ +void +fr30_expand_epilogue (void) +{ + int regno; + + /* Perform the inversion operations of the prologue. */ + gcc_assert (current_frame_info.initialised); + + /* Pop local variables and arguments off the stack. + If frame_pointer_needed is TRUE then the frame pointer register + has actually been used as a frame pointer, and we can recover + the stack pointer from it, otherwise we must unwind the stack + manually. */ + if (current_frame_info.frame_size > 0) + { + if (current_frame_info.save_fp && frame_pointer_needed) + { + emit_insn (gen_leave_func ()); + current_frame_info.save_fp = 0; + } + else if (current_frame_info.frame_size <= 508) + emit_insn (gen_add_to_stack + (GEN_INT (current_frame_info.frame_size))); + else + { + rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM); + emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size))); + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp)); + } + } + + if (current_frame_info.save_fp) + emit_insn (gen_movsi_pop (frame_pointer_rtx)); + + /* Pop all the registers that were pushed. */ + if (current_frame_info.save_rp) + emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, RETURN_POINTER_REGNUM))); + + for (regno = 0; regno < STACK_POINTER_REGNUM; regno ++) + if (current_frame_info.gmask & (1 << regno)) + emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno))); + + if (current_frame_info.pretend_size) + emit_insn (gen_add_to_stack (GEN_INT (current_frame_info.pretend_size))); + + /* Reset state info for each function. */ + current_frame_info = zero_frame_info; + + emit_jump_insn (gen_return_from_func ()); +} + +/* Do any needed setup for a variadic function. We must create a register + parameter block, and then copy any anonymous arguments, plus the last + named argument, from registers into memory. * copying actually done in + fr30_expand_prologue(). + + ARG_REGS_USED_SO_FAR has *not* been updated for the last named argument + which has type TYPE and mode MODE, and we rely on this fact. */ +void +fr30_setup_incoming_varargs (CUMULATIVE_ARGS *arg_regs_used_so_far, + enum machine_mode mode, + tree type ATTRIBUTE_UNUSED, + int *pretend_size, + int second_time ATTRIBUTE_UNUSED) +{ + int size; + + /* All BLKmode values are passed by reference. */ + gcc_assert (mode != BLKmode); + + /* ??? This run-time test as well as the code inside the if + statement is probably unnecessary. */ + if (targetm.calls.strict_argument_naming (arg_regs_used_so_far)) + /* If TARGET_STRICT_ARGUMENT_NAMING returns true, then the last named + arg must not be treated as an anonymous arg. */ + arg_regs_used_so_far += fr30_num_arg_regs (mode, type); + + size = FR30_NUM_ARG_REGS - (* arg_regs_used_so_far); + + if (size <= 0) + return; + + * pretend_size = (size * UNITS_PER_WORD); +} + +/*}}}*/ +/*{{{ Printing operands */ + +/* Print a memory address as an operand to reference that memory location. */ + +void +fr30_print_operand_address (FILE *stream, rtx address) +{ + switch (GET_CODE (address)) + { + case SYMBOL_REF: + output_addr_const (stream, address); + break; + + default: + fprintf (stderr, "code = %x\n", GET_CODE (address)); + debug_rtx (address); + output_operand_lossage ("fr30_print_operand_address: unhandled address"); + break; + } +} + +/* Print an operand. */ + +void +fr30_print_operand (FILE *file, rtx x, int code) +{ + rtx x0; + + switch (code) + { + case '#': + /* Output a :D if this instruction is delayed. */ + if (dbr_sequence_length () != 0) + fputs (":D", file); + return; + + case 'p': + /* Compute the register name of the second register in a hi/lo + register pair. */ + if (GET_CODE (x) != REG) + output_operand_lossage ("fr30_print_operand: unrecognized %%p code"); + else + fprintf (file, "r%d", REGNO (x) + 1); + return; + + case 'b': + /* Convert GCC's comparison operators into FR30 comparison codes. */ + switch (GET_CODE (x)) + { + case EQ: fprintf (file, "eq"); break; + case NE: fprintf (file, "ne"); break; + case LT: fprintf (file, "lt"); break; + case LE: fprintf (file, "le"); break; + case GT: fprintf (file, "gt"); break; + case GE: fprintf (file, "ge"); break; + case LTU: fprintf (file, "c"); break; + case LEU: fprintf (file, "ls"); break; + case GTU: fprintf (file, "hi"); break; + case GEU: fprintf (file, "nc"); break; + default: + output_operand_lossage ("fr30_print_operand: unrecognized %%b code"); + break; + } + return; + + case 'B': + /* Convert GCC's comparison operators into the complimentary FR30 + comparison codes. */ + switch (GET_CODE (x)) + { + case EQ: fprintf (file, "ne"); break; + case NE: fprintf (file, "eq"); break; + case LT: fprintf (file, "ge"); break; + case LE: fprintf (file, "gt"); break; + case GT: fprintf (file, "le"); break; + case GE: fprintf (file, "lt"); break; + case LTU: fprintf (file, "nc"); break; + case LEU: fprintf (file, "hi"); break; + case GTU: fprintf (file, "ls"); break; + case GEU: fprintf (file, "c"); break; + default: + output_operand_lossage ("fr30_print_operand: unrecognized %%B code"); + break; + } + return; + + case 'A': + /* Print a signed byte value as an unsigned value. */ + if (GET_CODE (x) != CONST_INT) + output_operand_lossage ("fr30_print_operand: invalid operand to %%A code"); + else + { + HOST_WIDE_INT val; + + val = INTVAL (x); + + val &= 0xff; + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); + } + return; + + case 'x': + if (GET_CODE (x) != CONST_INT + || INTVAL (x) < 16 + || INTVAL (x) > 32) + output_operand_lossage ("fr30_print_operand: invalid %%x code"); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) - 16); + return; + + case 'F': + if (GET_CODE (x) != CONST_DOUBLE) + output_operand_lossage ("fr30_print_operand: invalid %%F code"); + else + { + char str[30]; + + real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), + sizeof (str), 0, 1); + fputs (str, file); + } + return; + + case 0: + /* Handled below. */ + break; + + default: + fprintf (stderr, "unknown code = %x\n", code); + output_operand_lossage ("fr30_print_operand: unknown code"); + return; + } + + switch (GET_CODE (x)) + { + case REG: + fputs (reg_names [REGNO (x)], file); + break; + + case MEM: + x0 = XEXP (x,0); + + switch (GET_CODE (x0)) + { + case REG: + gcc_assert ((unsigned) REGNO (x0) < ARRAY_SIZE (reg_names)); + fprintf (file, "@%s", reg_names [REGNO (x0)]); + break; + + case PLUS: + if (GET_CODE (XEXP (x0, 0)) != REG + || REGNO (XEXP (x0, 0)) < FRAME_POINTER_REGNUM + || REGNO (XEXP (x0, 0)) > STACK_POINTER_REGNUM + || GET_CODE (XEXP (x0, 1)) != CONST_INT) + { + fprintf (stderr, "bad INDEXed address:"); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + } + else if (REGNO (XEXP (x0, 0)) == FRAME_POINTER_REGNUM) + { + HOST_WIDE_INT val = INTVAL (XEXP (x0, 1)); + if (val < -(1 << 9) || val > ((1 << 9) - 4)) + { + fprintf (stderr, "frame INDEX out of range:"); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + } + fprintf (file, "@(r14, #" HOST_WIDE_INT_PRINT_DEC ")", val); + } + else + { + HOST_WIDE_INT val = INTVAL (XEXP (x0, 1)); + if (val < 0 || val > ((1 << 6) - 4)) + { + fprintf (stderr, "stack INDEX out of range:"); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + } + fprintf (file, "@(r15, #" HOST_WIDE_INT_PRINT_DEC ")", val); + } + break; + + case SYMBOL_REF: + output_address (x0); + break; + + default: + fprintf (stderr, "bad MEM code = %x\n", GET_CODE (x0)); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + break; + } + break; + + case CONST_DOUBLE : + /* We handle SFmode constants here as output_addr_const doesn't. */ + if (GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE d; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (d, x); + REAL_VALUE_TO_TARGET_SINGLE (d, l); + fprintf (file, "0x%08lx", l); + break; + } + + /* Fall through. Let output_addr_const deal with it. */ + default: + output_addr_const (file, x); + break; + } + + return; +} + +/*}}}*/ + +/* Implements TARGET_FUNCTION_VALUE. */ + +static rtx +fr30_function_value (const_tree valtype, + const_tree fntype_or_decli ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM); +} + +/* Implements TARGET_LIBCALL_VALUE. */ + +static rtx +fr30_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, RETURN_VALUE_REGNUM); +} + +/* Implements TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +fr30_function_value_regno_p (const unsigned int regno) +{ + return (regno == RETURN_VALUE_REGNUM); +} + +/*{{{ Function arguments */ + +/* Return true if we should pass an argument on the stack rather than + in registers. */ + +static bool +fr30_must_pass_in_stack (enum machine_mode mode, const_tree type) +{ + if (mode == BLKmode) + return true; + if (type == NULL) + return false; + return AGGREGATE_TYPE_P (type); +} + +/* Compute the number of word sized registers needed to hold a + function argument of mode INT_MODE and tree type TYPE. */ +static int +fr30_num_arg_regs (enum machine_mode mode, const_tree type) +{ + int size; + + if (targetm.calls.must_pass_in_stack (mode, type)) + return 0; + + if (type && mode == BLKmode) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; +} + +/* Returns the number of bytes in which *part* of a parameter of machine + mode MODE and tree type TYPE (which may be NULL if the type is not known). + If the argument fits entirely in the argument registers, or entirely on + the stack, then 0 is returned. + CUM is the number of argument registers already used by earlier + parameters to the function. */ + +static int +fr30_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, + tree type, bool named) +{ + /* Unnamed arguments, i.e. those that are prototyped as ... + are always passed on the stack. + Also check here to see if all the argument registers are full. */ + if (named == 0 || *cum >= FR30_NUM_ARG_REGS) + return 0; + + /* Work out how many argument registers would be needed if this + parameter were to be passed entirely in registers. If there + are sufficient argument registers available (or if no registers + are needed because the parameter must be passed on the stack) + then return zero, as this parameter does not require partial + register, partial stack stack space. */ + if (*cum + fr30_num_arg_regs (mode, type) <= FR30_NUM_ARG_REGS) + return 0; + + return (FR30_NUM_ARG_REGS - *cum) * UNITS_PER_WORD; +} + +static rtx +fr30_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named) +{ + if (!named + || fr30_must_pass_in_stack (mode, type) + || *cum >= FR30_NUM_ARG_REGS) + return NULL_RTX; + else + return gen_rtx_REG (mode, *cum + FIRST_ARG_REGNUM); +} + +/* A C statement (sans semicolon) to update the summarizer variable CUM to + advance past an argument in the argument list. The values MODE, TYPE and + NAMED describe that argument. Once this is done, the variable CUM is + suitable for analyzing the *following* argument with `FUNCTION_ARG', etc. + + This macro need not do anything if the argument in question was passed on + the stack. The compiler knows how to track the amount of stack space used + for arguments without any special help. */ +static void +fr30_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, + const_tree type, bool named) +{ + *cum += named * fr30_num_arg_regs (mode, type); +} + +/*}}}*/ +/*{{{ Operand predicates */ + +#ifndef Mmode +#define Mmode enum machine_mode +#endif + +/* Returns true iff all the registers in the operands array + are in descending or ascending order. */ +int +fr30_check_multiple_regs (rtx *operands, int num_operands, int descending) +{ + if (descending) + { + unsigned int prev_regno = 0; + + while (num_operands --) + { + if (GET_CODE (operands [num_operands]) != REG) + return 0; + + if (REGNO (operands [num_operands]) < prev_regno) + return 0; + + prev_regno = REGNO (operands [num_operands]); + } + } + else + { + unsigned int prev_regno = CONDITION_CODE_REGNUM; + + while (num_operands --) + { + if (GET_CODE (operands [num_operands]) != REG) + return 0; + + if (REGNO (operands [num_operands]) > prev_regno) + return 0; + + prev_regno = REGNO (operands [num_operands]); + } + } + + return 1; +} + +int +fr30_const_double_is_zero (rtx operand) +{ + REAL_VALUE_TYPE d; + + if (operand == NULL || GET_CODE (operand) != CONST_DOUBLE) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (d, operand); + + return REAL_VALUES_EQUAL (d, dconst0); +} + +/*}}}*/ +/*{{{ Instruction Output Routines */ + +/* Output a double word move. + It must be REG<-REG, REG<-MEM, MEM<-REG or REG<-CONST. + On the FR30 we are constrained by the fact that it does not + support offsetable addresses, and so we have to load the + address of the secnd word into the second destination register + before we can use it. */ + +rtx +fr30_move_double (rtx * operands) +{ + rtx src = operands[1]; + rtx dest = operands[0]; + enum rtx_code src_code = GET_CODE (src); + enum rtx_code dest_code = GET_CODE (dest); + enum machine_mode mode = GET_MODE (dest); + rtx val; + + start_sequence (); + + if (dest_code == REG) + { + if (src_code == REG) + { + int reverse = (REGNO (dest) == REGNO (src) + 1); + + /* We normally copy the low-numbered register first. However, if + the first register of operand 0 is the same as the second register + of operand 1, we must copy in the opposite order. */ + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, reverse, TRUE, mode), + operand_subword (src, reverse, TRUE, mode))); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, !reverse, TRUE, mode), + operand_subword (src, !reverse, TRUE, mode))); + } + else if (src_code == MEM) + { + rtx addr = XEXP (src, 0); + rtx dest0 = operand_subword (dest, 0, TRUE, mode); + rtx dest1 = operand_subword (dest, 1, TRUE, mode); + rtx new_mem; + + gcc_assert (GET_CODE (addr) == REG); + + /* Copy the address before clobbering it. See PR 34174. */ + emit_insn (gen_rtx_SET (SImode, dest1, addr)); + emit_insn (gen_rtx_SET (VOIDmode, dest0, + adjust_address (src, SImode, 0))); + emit_insn (gen_rtx_SET (SImode, dest1, + plus_constant (dest1, UNITS_PER_WORD))); + + new_mem = gen_rtx_MEM (SImode, dest1); + MEM_COPY_ATTRIBUTES (new_mem, src); + + emit_insn (gen_rtx_SET (VOIDmode, dest1, new_mem)); + } + else if (src_code == CONST_INT || src_code == CONST_DOUBLE) + { + rtx words[2]; + split_double (src, &words[0], &words[1]); + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 0, TRUE, mode), + words[0])); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 1, TRUE, mode), + words[1])); + } + } + else if (src_code == REG && dest_code == MEM) + { + rtx addr = XEXP (dest, 0); + rtx src0; + rtx src1; + + gcc_assert (GET_CODE (addr) == REG); + + src0 = operand_subword (src, 0, TRUE, mode); + src1 = operand_subword (src, 1, TRUE, mode); + + emit_move_insn (adjust_address (dest, SImode, 0), src0); + + if (REGNO (addr) == STACK_POINTER_REGNUM + || REGNO (addr) == FRAME_POINTER_REGNUM) + emit_insn (gen_rtx_SET (VOIDmode, + adjust_address (dest, SImode, UNITS_PER_WORD), + src1)); + else + { + rtx new_mem; + rtx scratch_reg_r0 = gen_rtx_REG (SImode, 0); + + /* We need a scratch register to hold the value of 'address + 4'. + We use r0 for this purpose. It is used for example for long + jumps and is already marked to not be used by normal register + allocation. */ + emit_insn (gen_movsi_internal (scratch_reg_r0, addr)); + emit_insn (gen_addsi_small_int (scratch_reg_r0, scratch_reg_r0, + GEN_INT (UNITS_PER_WORD))); + new_mem = gen_rtx_MEM (SImode, scratch_reg_r0); + MEM_COPY_ATTRIBUTES (new_mem, dest); + emit_move_insn (new_mem, src1); + emit_insn (gen_blockage ()); + } + } + else + /* This should have been prevented by the constraints on movdi_insn. */ + gcc_unreachable (); + + val = get_insns (); + end_sequence (); + + return val; +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. */ + +bool +fr30_frame_pointer_required (void) +{ + return (flag_omit_frame_pointer == 0 || crtl->args.pretend_args_size > 0); +} + +/*}}}*/ +/*{{{ Trampoline Output Routines */ + +/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE. + On the FR30, the trampoline is: + + nop + ldi:32 STATIC, r12 + nop + ldi:32 FUNCTION, r0 + jmp @r0 + + The no-ops are to guarantee that the static chain and final + target are 32 bit aligned within the trampoline. That allows us to + initialize those locations with simple SImode stores. The alternative + would be to use HImode stores. */ + +static void +fr30_asm_trampoline_template (FILE *f) +{ + fprintf (f, "\tnop\n"); + fprintf (f, "\tldi:32\t#0, %s\n", reg_names [STATIC_CHAIN_REGNUM]); + fprintf (f, "\tnop\n"); + fprintf (f, "\tldi:32\t#0, %s\n", reg_names [COMPILER_SCRATCH_REGISTER]); + fprintf (f, "\tjmp\t@%s\n", reg_names [COMPILER_SCRATCH_REGISTER]); +} + +/* Implement TARGET_TRAMPOLINE_INIT. */ + +static void +fr30_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx mem; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + mem = adjust_address (m_tramp, SImode, 4); + emit_move_insn (mem, chain_value); + mem = adjust_address (m_tramp, SImode, 12); + emit_move_insn (mem, fnaddr); +} + +/*}}}*/ +/* Local Variables: */ +/* folded-file: t */ +/* End: */ diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h new file mode 100644 index 000000000..d20d7fb79 --- /dev/null +++ b/gcc/config/fr30/fr30.h @@ -0,0 +1,871 @@ +/*{{{ Comment. */ + +/* Definitions of FR30 target. + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by Cygnus Solutions. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/*}}}*/ +/*{{{ Run-time target specifications. */ + +#undef ASM_SPEC +#define ASM_SPEC "" + +/* Define this to be a string constant containing `-D' options to define the + predefined macros that identify this machine and system. These macros will + be predefined unless the `-ansi' option is specified. */ + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("fr30"); \ + builtin_assert ("machine=fr30"); \ + } \ + while (0) + +#define TARGET_VERSION fprintf (stderr, " (fr30)"); + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s" + +/* Include the OS stub library, so that the code can be simulated. + This is not the right way to do this. Ideally this kind of thing + should be done in the linker script - but I have not worked out how + to specify the location of a linker script in a gcc command line yet... */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%{!mno-lsim:-lsim} crtend.o%s crtn.o%s" + +#undef LIB_SPEC +#define LIB_SPEC "-lc" + +#undef LINK_SPEC +#define LINK_SPEC "%{h*} %{v:-V} \ + %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}" + +/*}}}*/ +/*{{{ Storage Layout. */ + +#define BITS_BIG_ENDIAN 1 + +#define BYTES_BIG_ENDIAN 1 + +#define WORDS_BIG_ENDIAN 1 + +#define UNITS_PER_WORD 4 + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + do \ + { \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + (MODE) = SImode; \ + } \ + while (0) + +#define PARM_BOUNDARY 32 + +#define STACK_BOUNDARY 32 + +#define FUNCTION_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT 32 + +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define STRICT_ALIGNMENT 1 + +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/*}}}*/ +/*{{{ Layout of Source Language Data Types. */ + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +/*}}}*/ +/*{{{ REGISTER BASICS. */ + +/* Number of hardware registers known to the compiler. They receive numbers 0 + through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number + really is assigned the number `FIRST_PSEUDO_REGISTER'. */ +#define FIRST_PSEUDO_REGISTER 21 + +/* Fixed register assignments: */ + +/* Here we do a BAD THING - reserve a register for use by the machine + description file. There are too many places in compiler where it + assumes that it can issue a branch or jump instruction without + providing a scratch register for it, and reload just cannot cope, so + we keep a register back for these situations. */ +#define COMPILER_SCRATCH_REGISTER 0 + +/* The register that contains the result of a function call. */ +#define RETURN_VALUE_REGNUM 4 + +/* The first register that can contain the arguments to a function. */ +#define FIRST_ARG_REGNUM 4 + +/* A call-used register that can be used during the function prologue. */ +#define PROLOGUE_TMP_REGNUM COMPILER_SCRATCH_REGISTER + +/* Register numbers used for passing a function's static chain pointer. If + register windows are used, the register number as seen by the called + function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as + seen by the calling function is `STATIC_CHAIN_REGNUM'. If these registers + are the same, `STATIC_CHAIN_INCOMING_REGNUM' need not be defined. + + The static chain register need not be a fixed register. + + If the static chain is passed in memory, these macros should not be defined; + instead, the next two macros should be defined. */ +#define STATIC_CHAIN_REGNUM 12 +/* #define STATIC_CHAIN_INCOMING_REGNUM */ + +/* An FR30 specific hardware register. */ +#define ACCUMULATOR_REGNUM 13 + +/* The register number of the frame pointer register, which is used to access + automatic variables in the stack frame. On some machines, the hardware + determines which register this is. On other machines, you can choose any + register you wish for this purpose. */ +#define FRAME_POINTER_REGNUM 14 + +/* The register number of the stack pointer register, which must also be a + fixed register according to `FIXED_REGISTERS'. On most machines, the + hardware determines which register this is. */ +#define STACK_POINTER_REGNUM 15 + +/* The following a fake hard registers that describe some of the dedicated + registers on the FR30. */ +#define CONDITION_CODE_REGNUM 16 +#define RETURN_POINTER_REGNUM 17 +#define MD_HIGH_REGNUM 18 +#define MD_LOW_REGNUM 19 + +/* An initializer that says which registers are used for fixed purposes all + throughout the compiled code and are therefore not available for general + allocation. These would include the stack pointer, the frame pointer + (except on machines where that can be used as a general register when no + frame pointer is needed), the program counter on machines where that is + considered one of the addressable registers, and any other numbered register + with a standard use. + + This information is expressed as a sequence of numbers, separated by commas + and surrounded by braces. The Nth number is 1 if register N is fixed, 0 + otherwise. + + The table initialized from this macro, and the table initialized by the + following one, may be overridden at run time either automatically, by the + actions of the macro `TARGET_CONDITIONAL_REGISTER_USAGE', or by the user + with the command options `-ffixed-REG', `-fcall-used-REG' and + `-fcall-saved-REG'. */ +#define FIXED_REGISTERS \ + { 1, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ \ + 0, 0, 0, 0, 0, 0, 0, 1, /* 8 - 15 */ \ + 1, 1, 1, 1, 1 } /* 16 - 20 */ + +/* XXX - MDL and MDH set as fixed for now - this is until I can get the + mul patterns working. */ + +/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in + general) by function calls as well as for fixed registers. This macro + therefore identifies the registers that are not available for general + allocation of values that must live across function calls. + + If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically + saves it on function entry and restores it on function exit, if the register + is used within the function. */ +#define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, 1, 1, 1, 1, /* 0 - 7 */ \ + 0, 0, 0, 0, 1, 1, 0, 1, /* 8 - 15 */ \ + 1, 1, 1, 1, 1 } /* 16 - 20 */ + +/* A C initializer containing the assembler's names for the machine registers, + each one as a C string constant. This is what translates register numbers + in the compiler into assembler language. */ +#define REGISTER_NAMES \ +{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "ac", "fp", "sp", \ + "cc", "rp", "mdh", "mdl", "ap" \ +} + +/* If defined, a C initializer for an array of structures containing a name and + a register number. This macro defines additional names for hard registers, + thus allowing the `asm' option in declarations to refer to registers using + alternate names. */ +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + {"r13", 13}, {"r14", 14}, {"r15", 15}, {"usp", 15}, {"ps", 16}\ +} + +/*}}}*/ +/*{{{ How Values Fit in Registers. */ + +/* A C expression for the number of consecutive hard registers, starting at + register number REGNO, required to hold a value of mode MODE. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* A C expression that is nonzero if it is permissible to store a value of mode + MODE in hard register number REGNO (or in several registers starting with + that one). */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) 1 + +/* A C expression that is nonzero if it is desirable to choose register + allocation so as to avoid move instructions between a value of mode MODE1 + and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are + ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be + zero. */ +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +/*}}}*/ +/*{{{ Register Classes. */ + +/* An enumeral type that must be defined with all the register class names as + enumeral values. `NO_REGS' must be first. `ALL_REGS' must be the last + register class, followed by one more enumeral value, `LIM_REG_CLASSES', + which is not a register class but rather tells how many classes there are. + + Each register class has a number, which is the value of casting the class + name to type `int'. The number serves as an index in many of the tables + described below. */ +enum reg_class +{ + NO_REGS, + MULTIPLY_32_REG, /* the MDL register as used by the MULH, MULUH insns */ + MULTIPLY_64_REG, /* the MDH,MDL register pair as used by MUL and MULU */ + LOW_REGS, /* registers 0 through 7 */ + HIGH_REGS, /* registers 8 through 15 */ + REAL_REGS, /* i.e. all the general hardware registers on the FR30 */ + ALL_REGS, + LIM_REG_CLASSES +}; + +#define GENERAL_REGS REAL_REGS +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +#define IRA_COVER_CLASSES \ +{ \ + REAL_REGS, MULTIPLY_64_REG, LIM_REG_CLASSES \ +} + +/* An initializer containing the names of the register classes as C string + constants. These names are used in writing some of the debugging dumps. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "MULTIPLY_32_REG", \ + "MULTIPLY_64_REG", \ + "LOW_REGS", \ + "HIGH_REGS", \ + "REAL_REGS", \ + "ALL_REGS" \ + } + +/* An initializer containing the contents of the register classes, as integers + which are bit masks. The Nth integer specifies the contents of class N. + The way the integer MASK is interpreted is that register R is in the class + if `MASK & (1 << R)' is 1. + + When the machine has more than 32 registers, an integer does not suffice. + Then the integers are replaced by sub-initializers, braced groupings + containing several integers. Each sub-initializer must be suitable as an + initializer for the type `HARD_REG_SET' which is defined in + `hard-reg-set.h'. */ +#define REG_CLASS_CONTENTS \ +{ \ + { 0 }, \ + { 1 << MD_LOW_REGNUM }, \ + { (1 << MD_LOW_REGNUM) | (1 << MD_HIGH_REGNUM) }, \ + { (1 << 8) - 1 }, \ + { ((1 << 8) - 1) << 8 }, \ + { (1 << CONDITION_CODE_REGNUM) - 1 }, \ + { (1 << FIRST_PSEUDO_REGISTER) - 1 } \ +} + +/* A C expression whose value is a register class containing hard register + REGNO. In general there is more than one such class; choose a class which + is "minimal", meaning that no smaller class also contains the register. */ +#define REGNO_REG_CLASS(REGNO) \ + ( (REGNO) < 8 ? LOW_REGS \ + : (REGNO) < CONDITION_CODE_REGNUM ? HIGH_REGS \ + : (REGNO) == MD_LOW_REGNUM ? MULTIPLY_32_REG \ + : (REGNO) == MD_HIGH_REGNUM ? MULTIPLY_64_REG \ + : ALL_REGS) + +/* A macro whose definition is the name of the class to which a valid base + register must belong. A base register is one used in an address which is + the register value plus a displacement. */ +#define BASE_REG_CLASS REAL_REGS + +/* A macro whose definition is the name of the class to which a valid index + register must belong. An index register is one used in an address where its + value is either multiplied by a scale factor or added to another register + (as well as added to a displacement). */ +#define INDEX_REG_CLASS REAL_REGS + +/* A C expression which is nonzero if register number NUM is suitable for use + as a base register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard register. */ +#define REGNO_OK_FOR_BASE_P(NUM) 1 + +/* A C expression which is nonzero if register number NUM is suitable for use + as an index register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard register. + + The difference between an index register and a base register is that the + index register may be scaled. If an address involves the sum of two + registers, neither one of them scaled, then either one may be labeled the + "base" and the other the "index"; but whichever labeling is used must fit + the machine's constraints of which registers may serve in each capacity. + The compiler will try both labelings, looking for one that is valid, and + will reload one or both registers only if neither labeling works. */ +#define REGNO_OK_FOR_INDEX_P(NUM) 1 + +/* A C expression for the maximum number of consecutive registers of + class CLASS needed to hold a value of mode MODE. + + This is closely related to the macro `HARD_REGNO_NREGS'. In fact, the value + of the macro `CLASS_MAX_NREGS (CLASS, MODE)' should be the maximum value of + `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class CLASS. + + This macro helps control the handling of multiple-word values in + the reload pass. */ +#define CLASS_MAX_NREGS(CLASS, MODE) HARD_REGNO_NREGS (0, MODE) + +/*}}}*/ +/*{{{ Basic Stack Layout. */ + +/* Define this macro if pushing a word onto the stack moves the stack pointer + to a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this to macro nonzero if the addresses of local variable slots + are at negative offsets from the frame pointer. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset from the frame pointer to the first local variable slot to be + allocated. + + If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the + first slot's length from `STARTING_FRAME_OFFSET'. Otherwise, it is found by + adding the length of the first slot to the value `STARTING_FRAME_OFFSET'. */ +/* #define STARTING_FRAME_OFFSET -4 */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. If not specified, the default value of zero + is used. This is the proper value for most machines. + + If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first + location at which outgoing arguments are placed. */ +#define STACK_POINTER_OFFSET 0 + +/* Offset from the argument pointer register to the first argument's address. + On some machines it may depend on the data type of the function. + + If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first + argument's address. */ +#define FIRST_PARM_OFFSET(FUNDECL) 0 + +/* A C expression whose value is RTL representing the location of the incoming + return address at the beginning of any function, before the prologue. This + RTL is either a `REG', indicating that the return value is saved in `REG', + or a `MEM' representing a location in the stack. + + You only need to define this macro if you want to support call frame + debugging information like that provided by DWARF 2. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM) + +/*}}}*/ +/*{{{ Register That Address the Stack Frame. */ + +/* The register number of the arg pointer register, which is used to access the + function's argument list. On some machines, this is the same as the frame + pointer register. On some machines, the hardware determines which register + this is. On other machines, you can choose any register you wish for this + purpose. If this is not the same register as the frame pointer register, + then you must mark it as a fixed register according to `FIXED_REGISTERS', or + arrange to be able to eliminate it. */ +#define ARG_POINTER_REGNUM 20 + +/*}}}*/ +/*{{{ Eliminating the Frame Pointer and the Arg Pointer. */ + +/* If defined, this macro specifies a table of register pairs used to eliminate + unneeded registers that point into the stack frame. If it is not defined, + the only elimination attempted by the compiler is to replace references to + the frame pointer with references to the stack pointer. + + The definition of this macro is a list of structure initializations, each of + which specifies an original and replacement register. + + On some machines, the position of the argument pointer is not known until + the compilation is completed. In such a case, a separate hard register must + be used for the argument pointer. This register can be eliminated by + replacing it with either the frame pointer or the argument pointer, + depending on whether or not the frame pointer has been eliminated. + + In this case, you might specify: + #define ELIMINABLE_REGS \ + {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + + Note that the elimination of the argument pointer with the stack pointer is + specified first since that is the preferred elimination. */ + +#define ELIMINABLE_REGS \ +{ \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ +} + +/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'. It specifies the + initial difference between the specified pair of registers. This macro must + be defined if `ELIMINABLE_REGS' is defined. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = fr30_compute_frame_size (FROM, TO) + +/*}}}*/ +/*{{{ Passing Function Arguments on the Stack. */ + +/* If defined, the maximum amount of space required for outgoing arguments will + be computed and placed into the variable + `crtl->outgoing_args_size'. No space will be pushed onto the + stack for each call; instead, the function prologue should increase the + stack frame size by this amount. + + Defining both `PUSH_ROUNDING' and `ACCUMULATE_OUTGOING_ARGS' is not + proper. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/*}}}*/ +/*{{{ Function Arguments in Registers. */ + +/* The number of register assigned to holding function arguments. */ + +#define FR30_NUM_ARG_REGS 4 + +/* A C type for declaring a variable that is used as the first argument of + `FUNCTION_ARG' and other related values. For some target machines, the type + `int' suffices and can hold the number of bytes of argument so far. + + There is no need to record in `CUMULATIVE_ARGS' anything about the arguments + that have been passed on the stack. The compiler has other variables to + keep track of that. For target machines on which all arguments are passed + on the stack, there is no need to store anything in `CUMULATIVE_ARGS'; + however, the data structure must exist and should not be empty, so use + `int'. */ +/* On the FR30 this value is an accumulating count of the number of argument + registers that have been filled with argument values, as opposed to say, + the number of bytes of argument accumulated so far. */ +#define CUMULATIVE_ARGS int + +/* A C statement (sans semicolon) for initializing the variable CUM for the + state at the beginning of the argument list. The variable has type + `CUMULATIVE_ARGS'. The value of FNTYPE is the tree node for the data type + of the function which will receive the args, or 0 if the args are to a + compiler support library function. The value of INDIRECT is nonzero when + processing an indirect call, for example a call through a function pointer. + The value of INDIRECT is zero for a call to an explicitly named function, a + library function call, or when `INIT_CUMULATIVE_ARGS' is used to find + arguments for the function being compiled. + + When processing a call to a compiler support library function, LIBNAME + identifies which one. It is a `symbol_ref' rtx which contains the name of + the function, as a string. LIBNAME is 0 when an ordinary C function call is + being processed. Thus, each time this macro is called, either LIBNAME or + FNTYPE is nonzero, but never both of them at once. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM) = 0 + +/* A C expression that is nonzero if REGNO is the number of a hard register in + which function arguments are sometimes passed. This does *not* include + implicit arguments such as the static chain and the structure-value address. + On many machines, no registers can be used for this purpose since all + function arguments are pushed on the stack. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) < FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS)) + +/*}}}*/ +/*{{{ How Large Values are Returned. */ + +/* Define this macro to be 1 if all structure and union return values must be + in memory. Since this results in slower code, this should be defined only + if needed for compatibility with other compilers or with an ABI. If you + define this macro to be 0, then the conventions used for structure and union + return values are decided by the `TARGET_RETURN_IN_MEMORY' macro. + + If not defined, this defaults to the value 1. */ +#define DEFAULT_PCC_STRUCT_RETURN 1 + +/*}}}*/ +/*{{{ Generating Code for Profiling. */ + +/* A C statement or compound statement to output to FILE some assembler code to + call the profiling subroutine `mcount'. Before calling, the assembler code + must load the address of a counter variable into a register where `mcount' + expects to find the address. The name of this variable is `LP' followed by + the number LABELNO, so you would generate the name using `LP%d' in a + `fprintf'. + + The details of how the address should be passed to `mcount' are determined + by your operating system environment, not by GCC. To figure them out, + compile a small program for profiling using the system's installed C + compiler and look at the assembler code that results. */ +#define FUNCTION_PROFILER(FILE, LABELNO) \ +{ \ + fprintf (FILE, "\t mov rp, r1\n" ); \ + fprintf (FILE, "\t ldi:32 mcount, r0\n" ); \ + fprintf (FILE, "\t call @r0\n" ); \ + fprintf (FILE, ".word\tLP%d\n", LABELNO); \ +} + +/*}}}*/ +/*{{{ Trampolines for Nested Functions. */ + +/* A C expression for the size in bytes of the trampoline, as an integer. */ +#define TRAMPOLINE_SIZE 18 + +/* We want the trampoline to be aligned on a 32bit boundary so that we can + make sure the location of the static chain & target function within + the trampoline is also aligned on a 32bit boundary. */ +#define TRAMPOLINE_ALIGNMENT 32 + +/*}}}*/ +/*{{{ Addressing Modes. */ + +/* A number, the maximum number of registers that can appear in a valid memory + address. Note that it is up to you to specify a value equal to the maximum + number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* A C compound statement with a conditional `goto LABEL;' executed if X (an + RTX) is a legitimate memory address on the target machine for a memory + operand of mode MODE. */ + +/* On the FR30 we only have one real addressing mode - an address in a + register. There are three special cases however: + + * indexed addressing using small positive offsets from the stack pointer + + * indexed addressing using small signed offsets from the frame pointer + + * register plus register addressing using R13 as the base register. + + At the moment we only support the first two of these special cases. */ + +#ifdef REG_OK_STRICT +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ + do \ + { \ + if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 << 6) - 4)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 << 9) - 4)) \ + goto LABEL; \ + } \ + while (0) +#else +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ + do \ + { \ + if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 << 6) - 4)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && (REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM \ + || REGNO (XEXP (X, 0)) == ARG_POINTER_REGNUM) \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 << 9) - 4)) \ + goto LABEL; \ + } \ + while (0) +#endif + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as a base register. For hard registers, it should always accept those + which the hardware permits and reject the others. Whether the macro accepts + or rejects pseudo registers must be controlled by `REG_OK_STRICT' as + described above. This usually requires two variant definitions, of which + `REG_OK_STRICT' controls the one actually used. */ +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) (((unsigned) REGNO (X)) <= STACK_POINTER_REGNUM) +#else +#define REG_OK_FOR_BASE_P(X) 1 +#endif + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as an index register. + + The difference between an index register and a base register is that the + index register may be scaled. If an address involves the sum of two + registers, neither one of them scaled, then either one may be labeled the + "base" and the other the "index"; but whichever labeling is used must fit + the machine's constraints of which registers may serve in each capacity. + The compiler will try both labelings, looking for one that is valid, and + will reload one or both registers only if neither labeling works. */ +#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X) + +/* A C expression that is nonzero if X is a legitimate constant for an + immediate operand on the target machine. You can assume that X satisfies + `CONSTANT_P', so you need not check this. In fact, `1' is a suitable + definition for this macro on machines where anything `CONSTANT_P' is valid. */ +#define LEGITIMATE_CONSTANT_P(X) 1 + +/*}}}*/ +/*{{{ Describing Relative Costs of Operations */ + +/* Define this macro as a C expression which is nonzero if accessing less than + a word of memory (i.e. a `char' or a `short') is no faster than accessing a + word of memory, i.e., if such access require more than one instruction or if + there is no difference in cost between byte and (aligned) word loads. + + When this macro is not defined, the compiler will access a field by finding + the smallest containing object; when it is defined, a fullword load will be + used if alignment permits. Unless bytes accesses are faster than word + accesses, using word accesses is preferable since it may eliminate + subsequent memory access if subsequent accesses occur to other fields in the + same word of the structure, but to different bytes. */ +#define SLOW_BYTE_ACCESS 1 + +/*}}}*/ +/*{{{ Dividing the output into sections. */ + +/* A C expression whose value is a string containing the assembler operation + that should precede instructions and read-only data. Normally `".text"' is + right. */ +#define TEXT_SECTION_ASM_OP "\t.text" + +/* A C expression whose value is a string containing the assembler operation to + identify the following data as writable initialized data. Normally + `".data"' is right. */ +#define DATA_SECTION_ASM_OP "\t.data" + +/* If defined, a C expression whose value is a string containing the + assembler operation to identify the following data as + uninitialized global data. If not defined, and neither + `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined, + uninitialized global data will be output in the data section if + `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be + used. */ +#define BSS_SECTION_ASM_OP "\t.section .bss" + +/*}}}*/ +/*{{{ The Overall Framework of an Assembler File. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at the + end of the line. */ +#define ASM_COMMENT_START ";" + +/* A C string constant for text to be output before each `asm' statement or + group of consecutive ones. Normally this is `"#APP"', which is a comment + that has no effect on most assemblers but tells the GNU assembler that it + must check the lines that follow for all valid assembler constructs. */ +#define ASM_APP_ON "#APP\n" + +/* A C string constant for text to be output after each `asm' statement or + group of consecutive ones. Normally this is `"#NO_APP"', which tells the + GNU assembler to resume making the time-saving assumptions that are valid + for ordinary compiler output. */ +#define ASM_APP_OFF "#NO_APP\n" + +/*}}}*/ +/*{{{ Output and Generation of Labels. */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +/*}}}*/ +/*{{{ Output of Assembler Instructions. */ + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand X. X is an RTL expression. + + CODE is a value that can be used to specify one of several ways of printing + the operand. It is used when identical operands must be printed differently + depending on the context. CODE comes from the `%' specification that was + used to request printing of the operand. If the specification was just + `%DIGIT' then CODE is 0; if the specification was `%LTR DIGIT' then CODE is + the ASCII code for LTR. + + If X is a register, this macro should print the register's name. The names + can be found in an array `reg_names' whose type is `char *[]'. `reg_names' + is initialized from `REGISTER_NAMES'. + + When the machine description has a specification `%PUNCT' (a `%' followed by + a punctuation character), this macro is called with a null pointer for X and + the punctuation character for CODE. */ +#define PRINT_OPERAND(STREAM, X, CODE) fr30_print_operand (STREAM, X, CODE) + +/* A C expression which evaluates to true if CODE is a valid punctuation + character for use in the `PRINT_OPERAND' macro. If + `PRINT_OPERAND_PUNCT_VALID_P' is not defined, it means that no punctuation + characters (except for the standard one, `%') are used in this way. */ +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#') + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand that is a memory reference whose address is X. X + is an RTL expression. */ + +#define PRINT_OPERAND_ADDRESS(STREAM, X) fr30_print_operand_address (STREAM, X) + +#define REGISTER_PREFIX "%" +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" +#define IMMEDIATE_PREFIX "" + +/*}}}*/ +/*{{{ Output of Dispatch Tables. */ + +/* This macro should be provided on machines where the addresses in a dispatch + table are relative to the table's own address. + + The definition should be a C statement to output to the stdio stream STREAM + an assembler pseudo-instruction to generate a difference between two labels. + VALUE and REL are the numbers of two internal labels. The definitions of + these labels are output using `(*targetm.asm_out.internal_label)', and they must be + printed in the same way here. For example, + + fprintf (STREAM, "\t.word L%d-L%d\n", VALUE, REL) */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ +fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL) + +/* This macro should be provided on machines where the addresses in a dispatch + table are absolute. + + The definition should be a C statement to output to the stdio stream STREAM + an assembler pseudo-instruction to generate a reference to a label. VALUE + is the number of an internal label whose definition is output using + `(*targetm.asm_out.internal_label)'. For example, + + fprintf (STREAM, "\t.word L%d\n", VALUE) */ +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ +fprintf (STREAM, "\t.word .L%d\n", VALUE) + +/*}}}*/ +/*{{{ Assembler Commands for Alignment. */ + +/* A C statement to output to the stdio stream STREAM an assembler command to + advance the location counter to a multiple of 2 to the POWER bytes. POWER + will be a C expression of type `int'. */ +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + fprintf ((STREAM), "\t.p2align %d\n", (POWER)) + +/*}}}*/ +/*{{{ Miscellaneous Parameters. */ + +/* An alias for a machine mode name. This is the machine mode that elements of + a jump-table should have. */ +#define CASE_VECTOR_MODE SImode + +/* The maximum number of bytes that a single instruction can move quickly from + memory to memory. */ +#define MOVE_MAX 8 + +/* A C expression which is nonzero if on this machine it is safe to "convert" + an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller + than INPREC) by merely operating on it as if it had only OUTPREC bits. + + On many machines, this expression can be 1. + + When `TRULY_NOOP_TRUNCATION' returns 1 for a pair of sizes for modes for + which `MODES_TIEABLE_P' is 0, suboptimal code can result. If this is the + case, making `TRULY_NOOP_TRUNCATION' return 0 in such cases may improve + things. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* An alias for the machine mode for pointers. On most machines, define this + to be the integer mode corresponding to the width of a hardware pointer; + `SImode' on 32-bit machine or `DImode' on 64-bit machines. On some machines + you must define this to be one of the partial integer modes, such as + `PSImode'. + + The width of `Pmode' must be at least as large as the value of + `POINTER_SIZE'. If it is not equal, you must define the macro + `POINTERS_EXTEND_UNSIGNED' to specify how pointers are extended to `Pmode'. */ +#define Pmode SImode + +/* An alias for the machine mode used for memory references to functions being + called, in `call' RTL expressions. On most machines this should be + `QImode'. */ +#define FUNCTION_MODE QImode + +/* If cross-compiling, don't require stdio.h etc to build libgcc.a. */ +#if defined CROSS_DIRECTORY_STRUCTURE && ! defined inhibit_libc +#define inhibit_libc +#endif + +/*}}}*/ + +/* Local Variables: */ +/* folded-file: t */ +/* End: */ diff --git a/gcc/config/fr30/fr30.md b/gcc/config/fr30/fr30.md new file mode 100644 index 000000000..6b3559983 --- /dev/null +++ b/gcc/config/fr30/fr30.md @@ -0,0 +1,1268 @@ +;; FR30 machine description. +;; Copyright (C) 1998, 1999, 2000, 2002, 2004, 2005, 2007, 2010 +;; Free Software Foundation, Inc. +;; Contributed by Cygnus Solutions. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;;{{{ Attributes + +(define_attr "length" "" (const_int 2)) + +;; Used to distinguish between small memory model targets and big mode targets. + +(define_attr "size" "small,big" + (const (if_then_else (symbol_ref "TARGET_SMALL_MODEL") + (const_string "small") + (const_string "big")))) + + +;; Define an attribute to be used by the delay slot code. +;; An instruction by default is considered to be 'delayable' +;; that is, it can be placed into a delay slot, but it is not +;; itself a delayed branch type instruction. An instruction +;; whose type is 'delayed' is one which has a delay slot, and +;; an instruction whose delay_type is 'other' is one which does +;; not have a delay slot, nor can it be placed into a delay slot. + +(define_attr "delay_type" "delayable,delayed,other" (const_string "delayable")) + +;;}}} +;;{{{ Delay Slot Specifications + +(define_delay (eq_attr "delay_type" "delayed") + [(and (eq_attr "delay_type" "delayable") + (eq_attr "length" "2")) + (nil) + (nil)] +) + +(include "predicates.md") +(include "constraints.md") + +;;}}} +;;{{{ Moves + +;;{{{ Comment + +;; Wrap moves in define_expand to prevent memory->memory moves from being +;; generated at the RTL level, which generates better code for most machines +;; which can't do mem->mem moves. + +;; If operand 0 is a `subreg' with mode M of a register whose own mode is wider +;; than M, the effect of this instruction is to store the specified value in +;; the part of the register that corresponds to mode M. The effect on the rest +;; of the register is undefined. + +;; This class of patterns is special in several ways. First of all, each of +;; these names *must* be defined, because there is no other way to copy a datum +;; from one place to another. + +;; Second, these patterns are not used solely in the RTL generation pass. Even +;; the reload pass can generate move insns to copy values from stack slots into +;; temporary registers. When it does so, one of the operands is a hard +;; register and the other is an operand that can need to be reloaded into a +;; register. + +;; Therefore, when given such a pair of operands, the pattern must +;; generate RTL which needs no reloading and needs no temporary +;; registers--no registers other than the operands. For example, if +;; you support the pattern with a `define_expand', then in such a +;; case the `define_expand' mustn't call `force_reg' or any other such +;; function which might generate new pseudo registers. + +;; This requirement exists even for subword modes on a RISC machine +;; where fetching those modes from memory normally requires several +;; insns and some temporary registers. Look in `spur.md' to see how +;; the requirement can be satisfied. + +;; During reload a memory reference with an invalid address may be passed as an +;; operand. Such an address will be replaced with a valid address later in the +;; reload pass. In this case, nothing may be done with the address except to +;; use it as it stands. If it is copied, it will not be replaced with a valid +;; address. No attempt should be made to make such an address into a valid +;; address and no routine (such as `change_address') that will do so may be +;; called. Note that `general_operand' will fail when applied to such an +;; address. +;; +;; The global variable `reload_in_progress' (which must be explicitly declared +;; if required) can be used to determine whether such special handling is +;; required. +;; +;; The variety of operands that have reloads depends on the rest of +;; the machine description, but typically on a RISC machine these can +;; only be pseudo registers that did not get hard registers, while on +;; other machines explicit memory references will get optional +;; reloads. +;; +;; If a scratch register is required to move an object to or from memory, it +;; can be allocated using `gen_reg_rtx' prior to reload. But this is +;; impossible during and after reload. If there are cases needing scratch +;; registers after reload, you must define `SECONDARY_INPUT_RELOAD_CLASS' and +;; perhaps also `SECONDARY_OUTPUT_RELOAD_CLASS' to detect them, and provide +;; patterns `reload_inM' or `reload_outM' to handle them. + +;; The constraints on a `moveM' must permit moving any hard register to any +;; other hard register provided that `HARD_REGNO_MODE_OK' permits mode M in +;; both registers and `REGISTER_MOVE_COST' applied to their classes returns a +;; value of 2. + +;; It is obligatory to support floating point `moveM' instructions +;; into and out of any registers that can hold fixed point values, +;; because unions and structures (which have modes `SImode' or +;; `DImode') can be in those registers and they may have floating +;; point members. + +;; There may also be a need to support fixed point `moveM' instructions in and +;; out of floating point registers. Unfortunately, I have forgotten why this +;; was so, and I don't know whether it is still true. If `HARD_REGNO_MODE_OK' +;; rejects fixed point values in floating point registers, then the constraints +;; of the fixed point `moveM' instructions must be designed to avoid ever +;; trying to reload into a floating point register. + +;;}}} +;;{{{ Push and Pop + +;; Push a register onto the stack +(define_insn "movsi_push" + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "register_operand" "a"))] + "" + "st %0, @-r15" +) + +;; Pop a register off the stack +(define_insn "movsi_pop" + [(set (match_operand:SI 0 "register_operand" "=a") + (mem:SI (post_inc:SI (reg:SI 15))))] + "" + "ld @r15+, %0" +) + +;;}}} +;;{{{ 1 Byte Moves + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " +{ + if (!reload_in_progress + && !reload_completed + && GET_CODE (operands[0]) == MEM + && (GET_CODE (operands[1]) == MEM + || immediate_operand (operands[1], QImode))) + operands[1] = copy_to_mode_reg (QImode, operands[1]); +}") + +(define_insn "movqi_unsigned_register_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "memory_operand" "m")))] + "" + "ldub %1, %0" +) + +(define_expand "movqi_signed_register_load" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "memory_operand" "")))] + "" + " + emit_insn (gen_movqi_unsigned_register_load (operands[0], operands[1])); + emit_insn (gen_extendqisi2 (operands[0], operands[0])); + DONE; + " +) + +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,red,m,r") + (match_operand:QI 1 "general_operand" "i,red,r,rm"))] + "" + "@ + ldi:8\\t#%A1, %0 + mov \\t%1, %0 + stb \\t%1, %0 + ldub \\t%1, %0" +) + +;;}}} +;;{{{ 2 Byte Moves + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + if (!reload_in_progress + && !reload_completed + && GET_CODE (operands[0]) == MEM + && (GET_CODE (operands[1]) == MEM + || immediate_operand (operands[1], HImode))) + operands[1] = copy_to_mode_reg (HImode, operands[1]); +}") + +(define_insn "movhi_unsigned_register_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))] + "" + "lduh %1, %0" +) + +(define_expand "movhi_signed_register_load" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "memory_operand" "")))] + "" + " + emit_insn (gen_movhi_unsigned_register_load (operands[0], operands[1])); + emit_insn (gen_extendhisi2 (operands[0], operands[0])); + DONE; + " +) + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,red,m,r") + (match_operand:HI 1 "general_operand" "L,M,n,red,r,rm"))] + "" + "@ + ldi:8 \\t#%1, %0 + ldi:20\\t#%1, %0 + ldi:32\\t#%1, %0 + mov \\t%1, %0 + sth \\t%1, %0 + lduh \\t%1, %0" + [(set_attr "length" "*,4,6,*,*,*")] +) + +;;}}} +;;{{{ 4 Byte Moves + +;; If the destination is a MEM and the source is a +;; MEM or an CONST_INT move the source into a register. +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + "{ + if (!reload_in_progress + && !reload_completed + && GET_CODE(operands[0]) == MEM + && (GET_CODE (operands[1]) == MEM + || immediate_operand (operands[1], SImode))) + operands[1] = copy_to_mode_reg (SImode, operands[1]); + }" +) + +;; We can do some clever tricks when loading certain immediate +;; values. We implement these tricks as define_splits, rather +;; than putting the code into the define_expand "movsi" above, +;; because if we put them there, they will be evaluated at RTL +;; generation time and then the combiner pass will come along +;; and replace the multiple insns that have been generated with +;; the original, slower, load insns. (The combiner pass only +;; cares about reducing the number of instructions, it does not +;; care about instruction lengths or speeds). Splits are +;; evaluated after the combine pass and before the scheduling +;; passes, so that they are the perfect place to put this +;; intelligence. +;; +;; XXX we probably ought to implement these for QI and HI mode +;; loads as well. + +;; If we are loading a small negative constant we can save space +;; and time by loading the positive value and then sign extending it. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "INTVAL (operands[1]) <= -1 && INTVAL (operands[1]) >= -128 + && (GET_CODE (operands[0]) != SUBREG + || SCALAR_INT_MODE_P (GET_MODE (XEXP (operands[0], 0))))" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] + "{ + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + operands[2] = gen_lowpart (QImode, operands[0]); + }" +) + +;; If we are loading a large negative constant, one which does +;; not have any of its bottom 24 bit set, then we can save time +;; and space by loading the byte value and shifting it into place. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "(INTVAL (operands[1]) < 0) && ((INTVAL (operands[1]) & 0x00ffffff) == 0)" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 24))) + (clobber (reg:CC 16))])] + "{ + HOST_WIDE_INT val = INTVAL (operands[1]); + operands[2] = GEN_INT (val >> 24); + }" +) + +;; If we are loading a large positive constant, one which has bits +;; in the top byte set, but whose set bits all lie within an 8 bit +;; range, then we can save time and space by loading the byte value +;; and shifting it into place. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "(INTVAL (operands[1]) > 0x00ffffff) + && ((INTVAL (operands[1]) >> exact_log2 (INTVAL (operands[1]) & (- INTVAL (operands[1])))) < 0x100)" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC 16))])] + "{ + HOST_WIDE_INT val = INTVAL (operands[1]); + int shift = exact_log2 (val & ( - val)); + operands[2] = GEN_INT (val >> shift); + operands[3] = GEN_INT (shift); + }" +) + +;; When TARGET_SMALL_MODEL is defined we assume that all symbolic +;; values are addresses which will fit in 20 bits. + +(define_insn "movsi_internal" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,red,V,r,m") + (match_operand:SI 1 "general_operand" "L,M,n,i,rde,r,rm,r"))] + "" + "* + { + switch (which_alternative) + { + case 0: return \"ldi:8 \\t#%1, %0\"; + case 1: return \"ldi:20\\t#%1, %0\"; + case 2: return \"ldi:32\\t#%1, %0\"; + case 3: if (TARGET_SMALL_MODEL) + return \"ldi:20\\t%1, %0\"; + else + return \"ldi:32\\t%1, %0\"; + case 4: return \"mov \\t%1, %0\"; + case 5: return \"st \\t%1, %0\"; + case 6: return \"ld \\t%1, %0\"; + case 7: return \"st \\t%1, %0\"; + default: gcc_unreachable (); + } + }" + [(set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 4) + (eq_attr "alternative" "2") (const_int 6) + (eq_attr "alternative" "3") + (if_then_else (eq_attr "size" "small") + (const_int 4) + (const_int 6))] + (const_int 2)))] +) + +;;}}} +;;{{{ 8 Byte Moves + +;; Note - the FR30 does not have an 8 byte load/store instruction +;; but we have to support this pattern because some other patterns +;; (e.g. muldisi2) can produce a DImode result. +;; (This code is stolen from the M32R port.) + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (DImode, operands[1]); + " +) + +;; We use an insn and a split so that we can generate +;; RTL rather than text from fr30_move_double(). + +(define_insn "*movdi_insn" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,m,r") + (match_operand:DI 1 "di_operand" "r,m,r,nF"))] + "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)" + "#" + [(set_attr "length" "4,8,12,12")] +) + +(define_split + [(set (match_operand:DI 0 "nonimmediate_di_operand" "") + (match_operand:DI 1 "di_operand" ""))] + "reload_completed" + [(match_dup 2)] + "operands[2] = fr30_move_double (operands);" +) + +;;}}} +;;{{{ Load & Store Multiple Registers + +;; The load multiple and store multiple patterns are implemented +;; as peepholes because the only time they are expected to occur +;; is during function prologues and epilogues. + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 3 "high_register_operand" "h"))] + "fr30_check_multiple_regs (operands, 4, 1)" + "stm1 (%0, %1, %2, %3)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "high_register_operand" "h"))] + "fr30_check_multiple_regs (operands, 3, 1)" + "stm1 (%0, %1, %2)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "high_register_operand" "h"))] + "fr30_check_multiple_regs (operands, 2, 1)" + "stm1 (%0, %1)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (match_operand:SI 0 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 1 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 2 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 3 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15))))] + "fr30_check_multiple_regs (operands, 4, 0)" + "ldm1 (%0, %1, %2, %3)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (match_operand:SI 0 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 1 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 2 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15))))] + "fr30_check_multiple_regs (operands, 3, 0)" + "ldm1 (%0, %1, %2)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (match_operand:SI 0 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 1 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15))))] + "fr30_check_multiple_regs (operands, 2, 0)" + "ldm1 (%0, %1)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 3 "low_register_operand" "l"))] + "fr30_check_multiple_regs (operands, 4, 1)" + "stm0 (%0, %1, %2, %3)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "low_register_operand" "l"))] + "fr30_check_multiple_regs (operands, 3, 1)" + "stm0 (%0, %1, %2)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "low_register_operand" "l"))] + "fr30_check_multiple_regs (operands, 2, 1)" + "stm0 (%0, %1)" + [(set_attr "delay_type" "other")] +) + +;;}}} +;;{{{ Floating Point Moves + +;; Note - Patterns for SF mode moves are compulsory, but +;; patterns for DF are optional, as GCC can synthesize them. + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "{ + if (!reload_in_progress && !reload_completed + && memory_operand (operands[0], SFmode) + && memory_operand (operands[1], SFmode)) + operands[1] = copy_to_mode_reg (SFmode, operands[1]); + }" +) + +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,red,m,r") + (match_operand:SF 1 "general_operand" "Fn,i,rde,r,rm"))] + "" + "* + { + switch (which_alternative) + { + case 0: return \"ldi:32\\t%1, %0\"; + case 1: if (TARGET_SMALL_MODEL) + return \"ldi:20\\t%1, %0\"; + else + return \"ldi:32\\t%1, %0\"; + case 2: return \"mov \\t%1, %0\"; + case 3: return \"st \\t%1, %0\"; + case 4: return \"ld \\t%1, %0\"; + default: gcc_unreachable (); + } + }" + [(set (attr "length") (cond [(eq_attr "alternative" "0") (const_int 6) + (eq_attr "alternative" "1") + (if_then_else (eq_attr "size" "small") + (const_int 4) + (const_int 6))] + (const_int 2)))] +) + +(define_insn "*movsf_constant_store" + [(set (match_operand:SF 0 "memory_operand" "=m") + (match_operand:SF 1 "immediate_operand" "F"))] + "" + "* + { + const char * ldi_instr; + const char * tmp_reg; + static char buffer[100]; + + ldi_instr = fr30_const_double_is_zero (operands[1]) ? \"ldi:8\" : \"ldi:32\"; + + tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER]; + + sprintf (buffer, \"%s\\t#%%1, %s\\t;\\n\\tst\\t%s, %%0\\t; Created by movsf_constant_store\", + ldi_instr, tmp_reg, tmp_reg); + + return buffer; + }" + [(set_attr "length" "8")] +) + +;;}}} + +;;}}} +;;{{{ Conversions + +;; Signed conversions from a smaller integer to a larger integer + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "register_operand" "0")))] + "" + "extsb %0" +) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "0")))] + "" + "extsh %0" +) + +;; Unsigned conversions from a smaller integer to a larger integer + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "register_operand" "0")))] + "" + "extub %0" +) + +(define_insn "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))] + "" + "extuh %0" +) + +;;}}} +;;{{{ Arithmetic + +;;{{{ Addition + +;; This is a special pattern just for adjusting the stack size. +(define_insn "add_to_stack" + [(set (reg:SI 15) + (plus:SI (reg:SI 15) + (match_operand:SI 0 "stack_add_operand" "i")))] + "" + "addsp %0" +) + +;; We need some trickery to be able to handle the addition of +;; large (i.e. outside +/- 16) constants. We need to be able to +;; handle this because reload assumes that it can generate add +;; instructions with arbitrary sized constants. +(define_expand "addsi3" + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + "{ + if ( GET_CODE (operands[2]) == REG + || GET_CODE (operands[2]) == SUBREG) + emit_insn (gen_addsi_regs (operands[0], operands[1], operands[2])); + else if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2])); + else if (INTVAL (operands[2]) >= -16 + && INTVAL (operands[2]) <= 15 + && (!REG_P (operands[1]) + || !REGNO_PTR_FRAME_P (REGNO (operands[1])) + || REGNO (operands[1]) == STACK_POINTER_REGNUM)) + emit_insn (gen_addsi_small_int (operands[0], operands[1], operands[2])); + else + emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2])); + DONE; + }" +) + +(define_insn "addsi_regs" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "addn %2, %0" +) + +;; Do not allow an eliminable register in the source register. It +;; might be eliminated in favor of the stack pointer, probably +;; increasing the offset, and so rendering the instruction illegal. +(define_insn "addsi_small_int" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "add_immediate_operand" "I,J")))] + "!REG_P (operands[1]) + || !REGNO_PTR_FRAME_P (REGNO (operands[1])) + || REGNO (operands[1]) == STACK_POINTER_REGNUM" + "@ + addn %2, %0 + addn2 %2, %0" +) + +(define_expand "addsi_big_int" + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")))] + "" + "{ + /* Cope with the possibility that ops 0 and 1 are the same register. */ + if (rtx_equal_p (operands[0], operands[1])) + { + if (reload_in_progress || reload_completed) + { + rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/); + + emit_insn (gen_movsi (reg, operands[2])); + emit_insn (gen_addsi_regs (operands[0], operands[0], reg)); + } + else + { + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_addsi_regs (operands[0], operands[0], operands[2])); + } + } + else + { + emit_insn (gen_movsi (operands[0], operands[2])); + emit_insn (gen_addsi_regs (operands[0], operands[0], operands[1])); + } + DONE; + }" +) + +(define_insn "*addsi_for_reload" + [(set (match_operand:SI 0 "register_operand" "=&r,r,r") + (plus:SI (match_operand:SI 1 "register_operand" "r,r,r") + (match_operand:SI 2 "immediate_operand" "L,M,n")))] + "reload_in_progress || reload_completed" + "@ + ldi:8\\t#%2, %0 \\n\\taddn\\t%1, %0 + ldi:20\\t#%2, %0 \\n\\taddn\\t%1, %0 + ldi:32\\t#%2, %0 \\n\\taddn\\t%1, %0" + [(set_attr "length" "4,6,8")] +) + +;;}}} +;;{{{ Subtraction + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "subn %2, %0" +) + +;;}}} +;;{{{ Multiplication + +;; Signed multiplication producing 64-bit results from 32-bit inputs +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "mul %2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0" + [(set_attr "length" "6")] +) + +;; Unsigned multiplication producing 64-bit results from 32-bit inputs +(define_insn "umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "mulu %2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0" + [(set_attr "length" "6")] +) + +;; Signed multiplication producing 32-bit result from 16-bit inputs +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%r")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "mulh %2, %1\\n\\tmov\\tmdl, %0" + [(set_attr "length" "4")] +) + +;; Unsigned multiplication producing 32-bit result from 16-bit inputs +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%r")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "muluh %2, %1\\n\\tmov\\tmdl, %0" + [(set_attr "length" "4")] +) + +;; Signed multiplication producing 32-bit result from 32-bit inputs +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:CC 16))] + "" + "mul %2, %1\\n\\tmov\\tmdl, %0" + [(set_attr "length" "4")] +) + +;;}}} +;;}}} +;;{{{ Shifts + +;; Arithmetic Shift Left +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (ashift:SI (match_operand:SI 1 "register_operand" "0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,K"))) + (clobber (reg:CC 16))] + "" + "@ + lsl %2, %0 + lsl %2, %0 + lsl2 %x2, %0" +) + +;; Arithmetic Shift Right +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,K"))) + (clobber (reg:CC 16))] + "" + "@ + asr %2, %0 + asr %2, %0 + asr2 %x2, %0" +) + +;; Logical Shift Right +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,K"))) + (clobber (reg:CC 16))] + "" + "@ + lsr %2, %0 + lsr %2, %0 + lsr2 %x2, %0" +) + +;;}}} +;;{{{ Logical Operations + +;; Logical AND, 32-bit integers +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "0"))) + (clobber (reg:CC 16))] + "" + "and %1, %0" +) + +;; Inclusive OR, 32-bit integers +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "0"))) + (clobber (reg:CC 16))] + "" + "or %1, %0" +) + +;; Exclusive OR, 32-bit integers +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "0"))) + (clobber (reg:CC 16))] + "" + "eor %1, %0" +) + +;; One's complement, 32-bit integers +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "") + (not:SI (match_operand:SI 1 "register_operand" "")))] + "" + "{ + if (rtx_equal_p (operands[0], operands[1])) + { + if (reload_in_progress || reload_completed) + { + rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/); + + emit_insn (gen_movsi (reg, constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], operands[0], reg)); + } + else + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (reg, constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], operands[0], reg)); + } + } + else + { + emit_insn (gen_movsi_internal (operands[0], constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], operands[1], operands[0])); + } + DONE; + }" +) + +;;}}} +;;{{{ Comparisons + +;; The actual comparisons, generated by the cbranch and/or cstore expanders + +(define_insn "*cmpsi_internal" + [(set (reg:CC 16) + (compare:CC (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "nonmemory_operand" "r,I,J")))] + "" + "@ + cmp %1, %0 + cmp %1, %0 + cmp2 %1, %0" +) + +;;}}} +;;{{{ Branches + +;; Define_expands called by the machine independent part of the compiler +;; to allocate a new comparison register + +(define_expand "cbranchsi4" + [(set (reg:CC 16) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else (match_operator:CC 0 "ordered_comparison_operator" + [(reg:CC 16) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "" +) + + +;; Actual branches. We must allow for the (label_ref) and the (pc) to be +;; swapped. If they are swapped, it reverses the sense of the branch. + +;; This pattern matches the (branch-if-true) branches generated above. +;; It generates two different instruction sequences depending upon how +;; far away the destination is. + +;; The calculation for the instruction length is derived as follows: +;; The branch instruction has a 9-bit signed displacement so we have +;; this inequality for the displacement: +;; +;; -256 <= pc < 256 +;; or +;; -256 + 256 <= pc + 256 < 256 + 256 +;; i.e. +;; 0 <= pc + 256 < 512 +;; +;; if we consider the displacement as an unsigned value, then negative +;; displacements become very large positive displacements, and the +;; inequality becomes: +;; +;; pc + 256 < 512 +;; +;; In order to allow for the fact that the real branch instruction works +;; from pc + 2, we increase the offset to 258. +;; +;; Note - we do not have to worry about whether the branch is delayed or +;; not, as branch shortening happens after delay slot reorganization. + +(define_insn "*branch_true" + [(set (pc) + (if_then_else (match_operator:CC 0 "comparison_operator" + [(reg:CC 16) + (const_int 0)]) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "* + { + if (get_attr_length (insn) == 2) + return \"b%b0%#\\t%l1\"; + else + { + static char buffer [100]; + const char * tmp_reg; + const char * ldi_insn; + + tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER]; + + ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\"; + + /* The code produced here is, for say the EQ case: + + Bne 1f + LDI