From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001
From: upstream source tree <ports@midipix.org>
Date: Sun, 15 Mar 2015 20:14:05 -0400
Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified
 gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream
 tarball.

downloading a git-generated archive based on the 'upstream' tag
should provide you with a source tree that is binary identical
to the one extracted from the above tarball.

if you have obtained the source via the command 'git clone',
however, do note that line-endings of files in your working
directory might differ from line-endings of the respective
files in the upstream repository.
---
 gcc/config/ia64/lib1funcs.asm | 795 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 795 insertions(+)
 create mode 100644 gcc/config/ia64/lib1funcs.asm

(limited to 'gcc/config/ia64/lib1funcs.asm')

diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm
new file mode 100644
index 000000000..b7eaa6eca
--- /dev/null
+++ b/gcc/config/ia64/lib1funcs.asm
@@ -0,0 +1,795 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L__divxf3
+// Compute a 80-bit IEEE double-extended quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+//
+// __divtf3 is an alternate symbol name for backward compatibility.
+
+	.text
+	.align 16
+	.global __divxf3
+	.proc __divxf3
+__divxf3:
+#ifdef SHARED
+	.global __divtf3
+__divtf3:
+#endif
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fnma.s1 f11 = farg1, f10, f1
+(p6)	fma.s1 f12 = farg0, f10, f0
+	;;
+(p6)	fma.s1 f13 = f11, f11, f0
+(p6)	fma.s1 f14 = f11, f11, f11
+	;;
+(p6)	fma.s1 f11 = f13, f13, f11
+(p6)	fma.s1 f13 = f14, f10, f10
+	;;
+(p6)	fma.s1 f10 = f13, f11, f10
+(p6)	fnma.s1 f11 = farg1, f12, farg0
+	;;
+(p6)	fma.s1 f11 = f11, f10, f12
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fnma.s1 f12 = farg1, f11, farg0
+	;;
+(p6)	fma.s0 fret0 = f12, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	.endp __divxf3
+#endif
+
+#ifdef L__divdf3
+// Compute a 64-bit IEEE double quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdf3
+	.proc __divdf3
+__divdf3:
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f11 = farg0, f10
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fmpy.s1 f13 = f12, f12
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fma.s1 f11 = f13, f11, f11
+	;;
+(p6)	fmpy.s1 f12 = f13, f13
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.d.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.d.s1 f8 = farg1, f11, farg0
+	;;
+(p6)	fma.d fret0 = f8, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdf3
+#endif
+
+#ifdef L__divsf3
+// Compute a 32-bit IEEE float quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsf3
+	.proc __divsf3
+__divsf3:
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f8 = farg0, f10
+(p6)	fnma.s1 f9 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.d.s1 f10 = f9, f8, f8
+	;;
+(p6)	fnorm.s.s0 fret0 = f10
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsf3
+#endif
+
+#ifdef L__divdi3
+// Compute a 64-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdi3
+	.proc __divdi3
+__divdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an integer.
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdi3
+#endif
+
+#ifdef L__moddi3
+// Compute a 64-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+	.text
+	.align 16
+	.global __moddi3
+	.proc __moddi3
+__moddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f14
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	// r = q * (-b) + a
+	xma.l f10 = f10, f9, f14
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __moddi3
+#endif
+
+#ifdef L__udivdi3
+// Compute a 64-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __udivdi3
+	.proc __udivdi3
+__udivdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, to avoid FP software-assist faults.
+	fcvt.xuf.s1 f8 = f8
+	fcvt.xuf.s1 f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __udivdi3
+#endif
+
+#ifdef L__umoddi3
+// Compute a 64-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+	.text
+	.align 16
+	.global __umoddi3
+	.proc __umoddi3
+__umoddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, to avoid FP software assist faults.
+	fcvt.xuf.s1 f8 = f14
+	fcvt.xuf.s1 f9 = f9
+(p7)	break 1;
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	// r = q * (-b) + a
+	xma.l f10 = f10, f9, f14
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __umoddi3
+#endif
+
+#ifdef L__divsi3
+// Compute a 32-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsi3
+	.proc __divsi3
+__divsi3:
+	.regstk 2,0,0,0
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	sxt4 in0 = in0
+	sxt4 in1 = in1
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+(p7)	break 1
+	;;
+	mov r2 = 0x0ffdd
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+(p6)	fmpy.s1 f8 = f8, f10
+(p6)	fnma.s1 f9 = f9, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f11
+	;;
+(p6)	fma.s1 f10 = f9, f8, f8
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsi3
+#endif
+
+#ifdef L__modsi3
+// Compute a 32-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __modsi3
+	.proc __modsi3
+__modsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	sxt4 in0 = in0
+	sxt4 in1 = in1
+	;;
+	setf.sig f13 = r32
+	setf.sig f9 = r33
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	sub in1 = r0, in1
+	fcvt.xf f8 = f13
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+(p7)	break 1
+	;;
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f10 = f9, f10, f1
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f12 = f10, f12, f12
+(p6)	fma.s1 f10 = f10, f10, f11	
+	;;
+(p6)	fma.s1 f10 = f10, f12, f12
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	xma.l f10 = f10, f9, f13
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __modsi3
+#endif
+
+#ifdef L__udivsi3
+// Compute a 32-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __udivsi3
+	.proc __udivsi3
+__udivsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	zxt4 in0 = in0
+	zxt4 in1 = in1
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+(p6)	fmpy.s1 f8 = f8, f10
+(p6)	fnma.s1 f9 = f9, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f11
+	;;
+(p6)	fma.s1 f10 = f9, f8, f8
+	;;
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __udivsi3
+#endif
+
+#ifdef L__umodsi3
+// Compute a 32-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __umodsi3
+	.proc __umodsi3
+__umodsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	zxt4 in0 = in0
+	zxt4 in1 = in1
+	;;
+	setf.sig f13 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	sub in1 = r0, in1
+	fcvt.xf f8 = f13
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+(p7)	break 1;
+	;;
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f10 = f9, f10, f1
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f12 = f10, f12, f12
+(p6)	fma.s1 f10 = f10, f10, f11
+	;;
+(p6)	fma.s1 f10 = f10, f12, f12
+	;;
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	xma.l f10 = f10, f9, f13
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __umodsi3
+#endif
+
+#ifdef L__save_stack_nonlocal
+// Notes on save/restore stack nonlocal: We read ar.bsp but write
+// ar.bspstore.  This is because ar.bsp can be read at all times
+// (independent of the RSE mode) but since it's read-only we need to
+// restore the value via ar.bspstore.  This is OK because
+// ar.bsp==ar.bspstore after executing "flushrs".
+
+// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
+
+	.text
+	.align 16
+	.global __ia64_save_stack_nonlocal
+	.proc __ia64_save_stack_nonlocal
+__ia64_save_stack_nonlocal:
+	{ .mmf
+	  alloc r18 = ar.pfs, 2, 0, 0, 0
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  st8 [in0] = in1, 24
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmi
+	  st8 [in0] = r18, -16
+	  mov ar.rsc = r19
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmi
+	  mov r16 = ar.bsp
+	  mov r17 = ar.rnat
+	  adds r2 = 8, in0
+	  ;;
+	}
+	{ .mmi
+	  st8 [in0] = r16
+	  st8 [r2] = r17
+	}
+	{ .mib
+	  mov ar.rsc = r19
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_save_stack_nonlocal
+#endif
+
+#ifdef L__nonlocal_goto
+// void __ia64_nonlocal_goto(void *target_label, void *save_area,
+//			     void *static_chain);
+
+	.text
+	.align 16
+	.global __ia64_nonlocal_goto
+	.proc __ia64_nonlocal_goto
+__ia64_nonlocal_goto:
+	{ .mmi
+	  alloc r20 = ar.pfs, 3, 0, 0, 0
+	  ld8 r12 = [in1], 8
+	  mov.ret.sptk rp = in0, .L0
+	  ;;
+	}
+	{ .mmf
+	  ld8 r16 = [in1], 8
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  ld8 r17 = [in1], 8
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmi
+	  ld8 r18 = [in1]
+	  mov ar.rsc = r19
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmi
+	  mov ar.bspstore = r16
+	  ;;
+	  mov ar.rnat = r17
+	  ;;
+	}
+	{ .mmi
+	  loadrs
+	  invala
+	  mov r15 = in2
+	  ;;
+	}
+.L0:	{ .mib
+	  mov ar.rsc = r19
+	  mov ar.pfs = r18
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_nonlocal_goto
+#endif
+
+#ifdef L__restore_stack_nonlocal
+// This is mostly the same as nonlocal_goto above.
+// ??? This has not been tested yet.
+
+// void __ia64_restore_stack_nonlocal(void *save_area)
+
+	.text
+	.align 16
+	.global __ia64_restore_stack_nonlocal
+	.proc __ia64_restore_stack_nonlocal
+__ia64_restore_stack_nonlocal:
+	{ .mmf
+	  alloc r20 = ar.pfs, 4, 0, 0, 0
+	  ld8 r12 = [in0], 8
+	  ;;
+	}
+	{ .mmb
+	  ld8 r16=[in0], 8
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  ld8 r17 = [in0], 8
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmf
+	  ld8 r18 = [in0]
+	  mov ar.rsc = r19
+	  ;;
+	}
+	{ .mmi
+	  mov ar.bspstore = r16
+	  ;;
+	  mov ar.rnat = r17
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmf
+	  loadrs
+	  invala
+	  ;;
+	}
+.L0:	{ .mib
+	  mov ar.rsc = r19
+	  mov ar.pfs = r18
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_restore_stack_nonlocal
+#endif
+
+#ifdef L__trampoline
+// Implement the nested function trampoline.  This is out of line
+// so that we don't have to bother with flushing the icache, as
+// well as making the on-stack trampoline smaller.
+//
+// The trampoline has the following form:
+//
+//		+-------------------+ >
+//	TRAMP:	| __ia64_trampoline | |
+//		+-------------------+  > fake function descriptor
+//		| TRAMP+16          | |
+//		+-------------------+ >
+//		| target descriptor |
+//		+-------------------+
+//		| static link	    |
+//		+-------------------+
+
+	.text
+	.align 16
+	.global __ia64_trampoline
+	.proc __ia64_trampoline
+__ia64_trampoline:
+	{ .mmi
+	  ld8 r2 = [r1], 8
+	  ;;
+	  ld8 r15 = [r1]
+	}
+	{ .mmi
+	  ld8 r3 = [r2], 8
+	  ;;
+	  ld8 r1 = [r2]
+	  mov b6 = r3
+	}
+	{ .bbb
+	  br.sptk.many b6
+	  ;;
+	}
+	.endp __ia64_trampoline
+#endif
+
+#ifdef SHARED
+// Thunks for backward compatibility.
+#ifdef L_fixtfdi
+	.text
+	.align 16
+	.global __fixtfti
+	.proc __fixtfti
+__fixtfti:
+	{ .bbb
+	  br.sptk.many __fixxfti
+	  ;;
+	}
+	.endp __fixtfti
+#endif
+#ifdef L_fixunstfdi
+	.align 16
+	.global __fixunstfti
+	.proc __fixunstfti
+__fixunstfti:
+	{ .bbb
+	  br.sptk.many __fixunsxfti
+	  ;;
+	}
+	.endp __fixunstfti
+#endif
+#ifdef L_floatditf
+	.align 16
+	.global __floattitf
+	.proc __floattitf
+__floattitf:
+	{ .bbb
+	  br.sptk.many __floattixf
+	  ;;
+	}
+	.endp __floattitf
+#endif
+#endif
-- 
cgit v1.2.3