1: /* 2: * Program: uldiv.s 3: * Copyright 1993, GTE Government Systems 4: * Author: Steven M. Schultz 5: * 6: * Version Date Modification 7: * 0.0 02Feb91 1. Initial inspiration struck. 8: * 1.0 05Jun93 2. Released into the Public Domain. 9: */ 10: 11: #include "DEFS.h" 12: 13: /* 14: * All routines have both a C interface and an assembly interface. Normally 15: * the two are the same. In the case of 'ulsh' the compiler has placed one 16: * of the operands in r0 and r1 so the assembly interface differs from the 17: * C interface. 18: */ 19: 20: /* 21: * u_long uldiv(lhs, rhs) 22: * u_long lhs, rhs; 23: * 24: * unsigned 32-bit "/" routine. Calls to uldiv are generated automatically 25: * by the C compiler. 26: */ 27: 28: #if !defined(KERNEL) 29: /* 30: * uldiv for applications (uses floating point) 31: */ 32: .globl l2f, l6f 33: .globl uldiv 34: uldiv: 35: ENTRY(uldiv) 36: jsr pc,l2f / 2(sp) -> fr0 37: jsr pc,l6f / 6(sp) -> fr3 38: tstf fr3 / check for zero divisor 39: cfcc / don't want to have an FP fault 40: beq 1f / in integer arithmetic 41: divf fr3,fr0 / fr0 /= rhs 42: 1: 43: movfi fr0,-(sp) 44: mov (sp)+,r0 / return result 45: mov (sp)+,r1 46: seti 47: rts pc 48: #else 49: /* 50: * uldiv for the kernel (fixed point only - no FP) 51: */ 52: 53: .globl uldiv 54: uldiv: 55: ENTRY(uldiv) 56: mov r2,-(sp) / faster than csv/cret ... 57: mov r3,-(sp) 58: mov r4,-(sp) 59: mov 14.(sp),r3 / r3 = lo(rhs) 60: bmi slowuldiv / rhs >= 2^15 61: tst 12.(sp) / hi(rhs) empty? 62: bne slowuldiv / no, rhs >= 2^16 63: 64: mov 10.(sp),r2 / r2 = lo(lhs) 65: mov 8.(sp),r1 / r1 = hi(lhs) 66: 67: clr r0 / r0 = hi(lhs) / lo(rhs) 68: div r3,r0 / r1 = hi(lhs) % lo(rhs) 69: mov r0,r4 / save high quotient 70: mov r1,-(sp) / stash hi(tmp) 71: mov r1,r0 / tmp=(hi(lhs)%lo(rhs))<<16 | lo(lhs) 72: mov r2,r1 / (r0:r1 = tmp) 73: div r3,r0 / r0 = tmp / lo(rhs) 74: bvc 3f / done if tmp/lo(rhs) < 2^15 75: 76: mov (sp),r0 / reload r0:r1 with tmp (regs may be 77: mov r2,r1 / clobbered by failed div) 78: sub r3,r0 / r0:r1 -= 2^16 * lo(rhs) 79: div r3,r0 80: tst r1 / if (negative) remainder, subtract one from 81: sxt r1 / quotient 82: add r1,r0 / cannot overflow! 83: 3: 84: tst (sp)+ / pop hi(tmp) off stack 85: mov r0,r1 / r1 (lo(quo)) = tmp / lo(rhs) 86: mov r4,r0 / r0 (hi(quo)) = hi(lhs) / lo(rhs) 87: 9: 88: mov (sp)+,r4 / restore registers 89: mov (sp)+,r3 90: mov (sp)+,r2 91: rts pc 92: 93: /* 94: * The divisor (rhs) is known to be >= 2^15 so we perform a shift and 95: * subtract algorithm. It's slow - feel free to improve it. 96: * 97: * The algorithm for signed divide broke down for unsigned operands, a slower 98: * larger, more painful algorithm was implmented using scaling and 99: * repetitive subraction/shifting. Works best for large numbers (fewer 100: * shifts that way). 101: */ 102: slowuldiv: 103: mov 8.(sp),r0 / r0 = hi(lhs) 104: mov 10.(sp),r1 / r1 = lo(lhs) 105: mov 12.(sp),r2 / r2 = hi(rhs) 106: / r3 = lo(rhs) - already done 107: 108: clr r4 / init scale of lhs 109: 2: 110: ashc $1,r0 111: blos 1f / check for zero at same time 112: inc r4 113: br 2b 114: 1: 115: mov r4,-(sp) / save scale of lhs 116: clr r4 117: 2: 118: asl r3 119: rol r2 120: bcs 1f 121: inc r4 / bump rhs scale 122: br 2b 123: 1: 124: clr r0 125: mov $1,r1 126: sub (sp)+,r4 / difference in scale (rhs - lhs) 127: ashc r4,r0 / initial quotient adder 128: mov r1,-(sp) / quoadder lo 129: mov r0,-(sp) / quoadder hi 130: mov 12.(sp),r0 / r0 = hi(lhs) 131: mov 14.(sp),r1 / r1 = lo(lhs) 132: mov 16.(sp),r2 / r2 = hi(rhs) 133: mov 18.(sp),r3 / r3 = lo(rhs) 134: 135: ashc r4,r2 / scale rhs up for repetitive subtraction 136: clr r4 / quo lo 137: clr -(sp) / quo hi 138: docmp: 139: cmp r2,r0 140: bhi noadd 141: blo dosub 142: cmp r3,r1 143: bhi noadd 144: dosub: 145: sub r3,r1 146: sbc r0 147: sub r2,r0 148: add 4(sp),r4 / quo lo += quoadder lo 149: adc (sp) / quo hi 150: add 2(sp),(sp) / quo hi += quoadder hi 151: br docmp 152: noadd: 153: clc / right shift rhs 154: ror r2 155: ror r3 156: clc / right shift quotient adder 157: ror 2(sp) 158: ror 4(sp) 159: bne docmp / quo adder not 0 means more to do 160: tst 2(sp) 161: bne docmp 162: mov (sp)+,r0 / quo hi 163: mov r4,r1 / quo lo 164: cmp (sp)+,(sp)+ / remove quot adder 165: br 9b 166: #endif KERNEL 167: 168: /* 169: * u_long ualdiv(lhs, rhs) 170: * u_long *lhs, rhs; 171: * 172: * 32-bit "/=" routine. Calls to ualdiv are generated automatically by the C 173: * compiler. 174: */ 175: 176: .globl ualdiv 177: ualdiv: 178: ENTRY(ualdiv) 179: mov r2,-(sp) / need a register to point at the lhs 180: mov 8.(sp),-(sp) / The divide algorithm is long 181: mov 8.(sp),-(sp) / enough that it just doesn't make sense 182: mov 8.(sp),r2 / to bother repeating it. We just translate 183: mov 2(r2),-(sp) / the call for uldiv and let it do the work 184: mov (r2),-(sp) / and return its results (also stuffing it 185: jsr pc,uldiv / into *lhs) 186: add $8.,sp / clean up stack 187: mov r0,(r2)+ / store high word, 188: mov r1,(r2) / and low 189: mov (sp)+,r2 / restore r2 190: rts pc / and return