1: # 2: # Copyright (c) 1985 Regents of the University of California. 3: # 4: # Use and reproduction of this software are granted in accordance with 5: # the terms and conditions specified in the Berkeley Software License 6: # Agreement (in particular, this entails acknowledgement of the programs' 7: # source, and inclusion of this notice) with the additional understanding 8: # that all recipients should regard themselves as participants in an 9: # ongoing research project and hence should feel obligated to report 10: # their experiences (good or bad) with these elementary function codes, 11: # using "sendbug 4bsd-bugs@BERKELEY", to the authors. 12: # 13: 14: # @(#)atan2.s 1.2 (Berkeley) 8/21/85 15: 16: # ATAN2(Y,X) 17: # RETURN ARG (X+iY) 18: # VAX D FORMAT (56 BITS PRECISION) 19: # CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85; 20: # 21: # 22: # Method : 23: # 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). 24: # 2. Reduce x to positive by (if x and y are unexceptional): 25: # ARG (x+iy) = arctan(y/x) ... if x > 0, 26: # ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, 27: # 3. According to the integer k=4t+0.25 truncated , t=y/x, the argument 28: # is further reduced to one of the following intervals and the 29: # arctangent of y/x is evaluated by the corresponding formula: 30: # 31: # [0,7/16] atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) 32: # [7/16,11/16] atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) ) 33: # [11/16.19/16] atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) ) 34: # [19/16,39/16] atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) ) 35: # [39/16,INF] atan(y/x) = atan(INF) + atan( -x/y ) 36: # 37: # Special cases: 38: # Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y). 39: # 40: # ARG( NAN , (anything) ) is NaN; 41: # ARG( (anything), NaN ) is NaN; 42: # ARG(+(anything but NaN), +-0) is +-0 ; 43: # ARG(-(anything but NaN), +-0) is +-PI ; 44: # ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2; 45: # ARG( +INF,+-(anything but INF and NaN) ) is +-0 ; 46: # ARG( -INF,+-(anything but INF and NaN) ) is +-PI; 47: # ARG( +INF,+-INF ) is +-PI/4 ; 48: # ARG( -INF,+-INF ) is +-3PI/4; 49: # ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2; 50: # 51: # Accuracy: 52: # atan2(y,x) returns the exact ARG(x+iy) nearly rounded. 53: # 54: .text 55: .align 1 56: .globl _atan2 57: _atan2 : 58: .word 0x0ff4 59: movq 4(ap),r2 # r2 = y 60: movq 12(ap),r4 # r4 = x 61: bicw3 $0x7f,r2,r0 62: bicw3 $0x7f,r4,r1 63: cmpw r0,$0x8000 # y is the reserved operand 64: jeql resop 65: cmpw r1,$0x8000 # x is the reserved operand 66: jeql resop 67: subl2 $8,sp 68: bicw3 $0x7fff,r2,-4(fp) # copy y sign bit to -4(fp) 69: bicw3 $0x7fff,r4,-8(fp) # copy x sign bit to -8(fp) 70: cmpd r4,$0x4080 # x = 1.0 ? 71: bneq xnot1 72: movq r2,r0 73: bicw2 $0x8000,r0 # t = |y| 74: movq r0,r2 # y = |y| 75: brb begin 76: xnot1: 77: bicw3 $0x807f,r2,r11 # yexp 78: jeql yeq0 # if y=0 goto yeq0 79: bicw3 $0x807f,r4,r10 # xexp 80: jeql pio2 # if x=0 goto pio2 81: subw2 r10,r11 # k = yexp - xexp 82: cmpw r11,$0x2000 # k >= 64 (exp) ? 83: jgeq pio2 # atan2 = +-pi/2 84: divd3 r4,r2,r0 # t = y/x never overflow 85: bicw2 $0x8000,r0 # t > 0 86: bicw2 $0xff80,r2 # clear the exponent of y 87: bicw2 $0xff80,r4 # clear the exponent of x 88: bisw2 $0x4080,r2 # normalize y to [1,2) 89: bisw2 $0x4080,r4 # normalize x to [1,2) 90: subw2 r11,r4 # scale x so that yexp-xexp=k 91: begin: 92: cmpw r0,$0x411c # t : 39/16 93: jgeq L50 94: addl3 $0x180,r0,r10 # 8*t 95: cvtrfl r10,r10 # [8*t] rounded to int 96: ashl $-1,r10,r10 # [8*t]/2 97: casel r10,$0,$4 98: L1: 99: .word L20-L1 100: .word L20-L1 101: .word L30-L1 102: .word L40-L1 103: .word L40-L1 104: L10: 105: movq $0xb4d9940f985e407b,r6 # Hi=.98279372324732906796d0 106: movq $0x21b1879a3bc2a2fc,r8 # Lo=-.17092002525602665777d-17 107: subd3 r4,r2,r0 # y-x 108: addw2 $0x80,r0 # 2(y-x) 109: subd2 r4,r0 # 2(y-x)-x 110: addw2 $0x80,r4 # 2x 111: movq r2,r10 112: addw2 $0x80,r10 # 2y 113: addd2 r10,r2 # 3y 114: addd2 r4,r2 # 3y+2x 115: divd2 r2,r0 # (2y-3x)/(2x+3y) 116: brw L60 117: L20: 118: cmpw r0,$0x3280 # t : 2**(-28) 119: jlss L80 120: clrq r6 # Hi=r6=0, Lo=r8=0 121: clrq r8 122: brw L60 123: L30: 124: movq $0xda7b2b0d63383fed,r6 # Hi=.46364760900080611433d0 125: movq $0xf0ea17b2bf912295,r8 # Lo=.10147340032515978826d-17 126: movq r2,r0 127: addw2 $0x80,r0 # 2y 128: subd2 r4,r0 # 2y-x 129: addw2 $0x80,r4 # 2x 130: addd2 r2,r4 # 2x+y 131: divd2 r4,r0 # (2y-x)/(2x+y) 132: brb L60 133: L50: 134: movq $0x68c2a2210fda40c9,r6 # Hi=1.5707963267948966135d1 135: movq $0x06e0145c26332326,r8 # Lo=.22517417741562176079d-17 136: cmpw r0,$0x5100 # y : 2**57 137: bgeq L90 138: divd3 r2,r4,r0 139: bisw2 $0x8000,r0 # -x/y 140: brb L60 141: L40: 142: movq $0x68c2a2210fda4049,r6 # Hi=.78539816339744830676d0 143: movq $0x06e0145c263322a6,r8 # Lo=.11258708870781088040d-17 144: subd3 r4,r2,r0 # y-x 145: addd2 r4,r2 # y+x 146: divd2 r2,r0 # (y-x)/(y+x) 147: L60: 148: movq r0,r10 149: muld2 r0,r0 150: polyd r0,$12,ptable 151: muld2 r10,r0 152: subd2 r0,r8 153: addd3 r8,r10,r0 154: addd2 r6,r0 155: L80: 156: movw -8(fp),r2 157: bneq pim 158: bisw2 -4(fp),r0 # return sign(y)*r0 159: ret 160: L90: # x >= 2**25 161: movq r6,r0 162: brb L80 163: pim: 164: subd3 r0,$0x68c2a2210fda4149,r0 # pi-t 165: bisw2 -4(fp),r0 166: ret 167: yeq0: 168: movw -8(fp),r2 169: beql zero # if sign(x)=1 return pi 170: movq $0x68c2a2210fda4149,r0 # pi=3.1415926535897932270d1 171: ret 172: zero: 173: clrq r0 # return 0 174: ret 175: pio2: 176: movq $0x68c2a2210fda40c9,r0 # pi/2=1.5707963267948966135d1 177: bisw2 -4(fp),r0 # return sign(y)*pi/2 178: ret 179: resop: 180: movq $0x8000,r0 # propagate the reserved operand 181: ret 182: .align 2 183: ptable: 184: .quad 0xb50f5ce96e7abd60 185: .quad 0x51e44a42c1073e02 186: .quad 0x3487e3289643be35 187: .quad 0xdb62066dffba3e54 188: .quad 0xcf8e2d5199abbe70 189: .quad 0x26f39cb884883e88 190: .quad 0x135117d18998be9d 191: .quad 0x602ce9742e883eba 192: .quad 0xa35ad0be8e38bee3 193: .quad 0xffac922249243f12 194: .quad 0x7f14ccccccccbf4c 195: .quad 0xaa8faaaaaaaa3faa 196: .quad 0x0000000000000000