# exp(x) = 2^hi + 2^hi (2^lo - 1)# where hi+lo = log2e*x with 128bit precision# exact log2e*x calculation depends on nearest rounding mode# using the exact multiplication method of Dekker and Veltkamp.global expl.type expl,@functionexpl:fldt 4(%esp)# interesting case: 0x1p-32 <= |x| < 16384# check if (exponent|0x8000) is in [0xbfff-32, 0xbfff+13]mov 12(%esp), %axor $0x8000, %axsub $0xbfdf, %axcmp $45, %axjbe 2ftest %ax, %axfld1js 1f# if |x|>=0x1p14 or nan return 2^trunc(x)fscalefstp %st(1)ret# if |x|<0x1p-32 return 1+x1: faddpret# should be 0x1.71547652b82fe178p0L == 0x3fff b8aa3b29 5c17f0bc# it will be wrong on non-nearest rounding mode2: fldl2esubl $44, %esp# hi = log2e_hi*x# 2^hi = exp2l(hi)fmul %st(1),%stfld %st(0)fstpt (%esp)fstpt 16(%esp)fstpt 32(%esp).hidden __exp2lcall __exp2l# if 2^hi == inf return 2^hifld %st(0)fstpt (%esp)cmpw $0x7fff, 8(%esp)je 1ffldt 32(%esp)fldt 16(%esp)# fpu stack: 2^hi x hi# exact mult: x*log2efld %st(1)# c = 0x1p32+1pushl $0x41f00000pushl $0x00100000fldl (%esp)# xh = x - c*x + c*x# xl = x - xhfmulpfld %st(2)fsub %st(1), %stfaddpfld %st(2)fsub %st(1), %st# yh = log2e_hi - c*log2e_hi + c*log2e_hipushl $0x3ff71547pushl $0x65200000fldl (%esp)# fpu stack: 2^hi x hi xh xl yh# lo = hi - xh*yh + xl*yhfld %st(2)fmul %st(1), %stfsubp %st, %st(4)fmul %st(1), %stfaddp %st, %st(3)# yl = log2e_hi - yhpushl $0x3de705fcpushl $0x2f000000fldl (%esp)# fpu stack: 2^hi x lo xh xl yl# lo += xh*yl + xl*ylfmul %st, %st(2)fmulp %st, %st(1)fxch %st(2)faddpfaddp# log2e_lopushl $0xbfbepushl $0x82f0025fpushl $0x2dc582eefldt (%esp)addl $36,%esp# fpu stack: 2^hi x lo log2e_lo# lo += log2e_lo*x# return 2^hi + 2^hi (2^lo - 1)fmulp %st, %st(2)faddpf2xm1fmul %st(1), %stfaddp1: addl $44, %espret