1: /* 2: * Copyright (c) 1987 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifdef LIBC_SCCS 8: <@(#)lrem.s 2.4 (GTE) 12/26/92\0> 9: .even 10: #endif LIBC_SCCS 11: 12: /* 13: * lrem(lhs, rhs) 14: * long lhs, rhs; 15: * 16: * 32-bit "%" routine. Calls to lrem are generated automatically by the C 17: * compiler. 18: */ 19: #include "DEFS.h" 20: 21: #if !defined(KERNEL) 22: /* 23: * Lrem for floating point hardware. Check for divide by zero. Don't want 24: * floating point divide trap in integer math. 25: */ 26: 27: #define one 040200 28: 29: ASENTRY(lrem) 30: tst 6(sp) / divide by zero check 31: bne 1f 32: tst 8.(sp) 33: bne 1f 34: mov 2(sp),r0 35: mov 4(sp),r1 / return lhs 36: rts pc 37: 1: 38: setl 39: movif 2(sp),fr0 / fr0 = lhs 40: movf fr0,fr2 / fr2 = lhs 41: movif 6(sp),fr3 / fr3 = rhs 42: divf fr3,fr0 / fr0 = lhs/rhs 43: modf $one,fr0 / fr0 = integer((lhs/rhs) * 1.0) 44: mulf fr3,fr1 / fr0 = integer(lhs/rhs) * rhs 45: subf fr1,fr2 / fr2 = lhs - (integer(*lhs/rhs) * rhs) 46: movfi fr2,-(sp) / (result) 47: mov (sp)+,r0 48: mov (sp)+,r1 49: seti 50: rts pc 51: #else 52: /* 53: * Lrem for fixed point hardware. 54: */ 55: #define negl(high, low) neg high; \ 56: neg low; \ 57: sbc high / high -= (low != 0) 58: 59: ASENTRY(lrem) 60: mov r2,-(sp) / faster than csv/cret ... 61: mov r3,-(sp) 62: mov r4,-(sp) 63: mov 14.(sp),r3 / r3 = loint(rhs) 64: sxt r4 / r4 = sign(rhs) 65: bpl 1f / if (int)loint(rhs) < 0 66: neg r3 / r3 = asb(loint(rhs)) 67: 1: 68: cmp r4,12.(sp) / hiint(rhs) all sign bits? 69: bne hardlrem / no, rhs >= 2^15 70: 71: mov 10.(sp),r2 / r2 = loint(lhs) 72: mov 8.(sp),r1 / r1 = hiint(lhs) 73: bge 2f / if lhs < 0 74: negl(r1, r2) / r1:r2 = abs(lhs) 75: 2: 76: /* 77: * At this point we know what the sign of the result is going to be 78: * (r4), abs(rhs) < 2^15, we have the absolute value of rhs in 79: * r3 as a single word integer and the absolute value of lhs in 80: * r1 (hiint) and r2 (loint). References to hiint(rhs), loint(lhs) 81: * and hiint(lhs) in the following comments actually refer to the 82: * absolute value of rhs and lhs. 83: * 84: * We perform a long remainder via: 85: * tmp = (hiint(lhs) % loint(rhs))<<16 | loint(lhs) 86: * loint(rem) = tmp % loint(rhs) 87: */ 88: clr r0 89: div r3,r0 / r1 = hiint(lhs) % loint(rhs) 90: mov r1,r4 / stash hiint(tmp) 91: mov r1,r0 / tmp=(hiint(lhs)%loint(rhs))<<16 | loint(lhs) 92: mov r2,r1 / (r0:r1 = tmp) 93: div r3,r0 / r1 = tmp % loint(rhs) 94: bvc 3f / done if tmp/loint(rhs) < 2^15 95: /* 96: * Our second division overflowed leaving undefined values in 97: * registers. This can only happen when: 98: * tmp/loint(rhs) >= 2^15 99: * tmp >= 2^15 * loint(rhs) 100: * tmp >= 2^16 * (loint(rhs)/2) 101: * 102: * If we subtract 2^16 * loint(rhs) from both sides however, we get: 103: * tmp - (2^16 * loint(rhs)) >= -(2^16 * (loint(rhs)/2)) 104: * 105: * and then divide both sides by loint(rhs): 106: * tmp/loint(rhs) - 2^16 >= -(2^15) 107: * 108: * which is a division that won't generate an overflow. Finally: 109: * tmp = quo*loint(rhs) + rem 110: * tmp - (2^16 * loint(rhs)) = (quo - 2^16) * loint(rhs) + rem 111: * 112: * Since we're now dividing a negative number and since the div 113: * instruction always generates a remainder the same sign as the 114: * dividend, if we get a non-zero remainder, we'll actually get: 115: * (quo+1 - 2^16) * loint(rhs) + rem-loint(rhs) 116: * 117: * which means we'll have to adjust the remainder returned by 118: * adding loint(rhs) ... 119: */ 120: mov r4,r0 / reload r0:r1 with tmp (regs may be 121: mov r2,r1 / clobbered by failed div) 122: sub r3,r0 / r0:r1 -= 2^16 * loint(rhs) 123: div r3,r0 124: tst r1 / if no remainder (0), bop out immediately, 125: beq 4f / otherwise add loint(rhs) 126: add r3,r1 127: 3: 128: tst 8.(sp) / if lhs < 0 (result always sign of lhs) 129: bpl 4f / rem = -rem 130: neg r1 131: 4: 132: sxt r0 / sign extend remainder 133: ret: 134: mov (sp)+,r4 / restore registers 135: mov (sp)+,r3 136: mov (sp)+,r2 137: rts pc 138: 139: /* 140: * The divisor (rhs) is known to be >= 2^15 so we perform a bit shift 141: * algorithm as only 16 cycles are needed: 142: * long 143: * hardlrem(lhs, rhs) 144: * long lhs, rhs; 145: * { 146: * long hi_sreg, lo_sreg; 147: * unsigned int cnt; 148: * 149: * if (lhs < 0) 150: * lhs = -lhs; 151: * if (rhs < 0) 152: * rhs = -rhs; 153: * hi_sreg = hiint(lhs); 154: * lo_sreg = loint(lhs)<<16; 155: * for (cnt = 16; cnt; cnt--) { 156: * qshiftl(&hi_sreg, &lo_sreg); 157: * if (hi_sreg >= rhs) 158: * hi_sreg -= rhs; 159: * } 160: * return((long)((lhs < 0) ? -hi_sreg : hi_sreg)); 161: * } 162: * The assembly version of the above algorithm uses r0, r1 and r2 to implement 163: * hi_sreg and lo_sreg by putting lhs into r0:r1 and zeroing r2 thereby 164: * creating a three word register r2:r0:r1 with hi_sreg = r0:r1 and lo_sreg = 165: * r1:r2 ... 166: */ 167: hardlrem: 168: mov 10.(sp),r2 / r2 = loint(lhs) 169: mov 8.(sp),r1 / r1 = hiint(lhs) 170: bpl 1f / if lhs < 0 171: negl(r1, r2) / r1:r2 = abs(lhs) 172: 1: 173: mov 12.(sp),r3 / r3 = hiint(rhs) 174: bge 2f / if rhs < 0 175: negl(r3, 14.(sp)) / rhs = -rhs (r3:loint(rhs)) 176: 2: 177: clr r0 / clear top of shift register 178: mov $16.,r4 / loop 16 times 179: 3: 180: clc / shift combined shift register and quotient 181: rol r2 / left one place 182: rol r1 183: rol r0 184: cmp r3,r0 / How do r0:r1 (hi_sreg) and rhs compare? 185: bgt 4f 186: blt 5f 187: cmp 14.(sp),r1 188: blos 5f 189: 4: 190: sob r4,3b / r0:r1 (hi_sreg) < rhs: 191: br 6f / just loop 192: 5: 193: sub 14.(sp),r1 / r0:r1 (hi_sreg) >= rhs 194: sbc r0 / subtract rhs from r0:r1 (hi_sreg) 195: sub r3,r0 196: sob r4,3b / and loop 197: 6: 198: tst 8(sp) / if lhs >= 0 199: bge ret / return immediately 200: negl(r0, r1) / else negate answer before returning 201: br ret 202: #endif