1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GR 2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 3 M68000 Hi-Performance Microprocessor Division 3 M68000 Hi-Performance Microprocessor Division 4 M68060 Software Package 4 M68060 Software Package 5 Production Release P1.00 -- October 10, 1994 5 Production Release P1.00 -- October 10, 1994 6 6 7 M68060 Software Package Copyright © 1993, 199 7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. 8 8 9 THE SOFTWARE is provided on an "AS IS" basis a 9 THE SOFTWARE is provided on an "AS IS" basis and without warranty. 10 To the maximum extent permitted by applicable 10 To the maximum extent permitted by applicable law, 11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPR 11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILIT 12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE 13 and any warranty against infringement with reg 13 and any warranty against infringement with regard to the SOFTWARE 14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and 14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. 15 15 16 To the maximum extent permitted by applicable 16 To the maximum extent permitted by applicable law, 17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY D 17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOS 18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORM 19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 20 ARISING OF THE USE OR INABILITY TO USE THE SOF 20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 21 Motorola assumes no responsibility for the mai 21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. 22 22 23 You are hereby granted a copyright license to 23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE 24 so long as this entire notice is retained with 24 so long as this entire notice is retained without alteration in any modified and/or 25 redistributed versions, and that such modified 25 redistributed versions, and that such modified versions are clearly identified as such. 26 No licenses are granted by implication, estopp 26 No licenses are granted by implication, estoppel or otherwise under any patents 27 or trademarks of Motorola, Inc. 27 or trademarks of Motorola, Inc. 28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29 # litop.s: 29 # litop.s: 30 # This file is appended to the top of th 30 # This file is appended to the top of the 060FPLSP package 31 # and contains the entry points into the packa 31 # and contains the entry points into the package. The user, in 32 # effect, branches to one of the branch table 32 # effect, branches to one of the branch table entries located here. 33 # 33 # 34 34 35 bra.l _060LSP__idivs64_ 35 bra.l _060LSP__idivs64_ 36 short 0x0000 36 short 0x0000 37 bra.l _060LSP__idivu64_ 37 bra.l _060LSP__idivu64_ 38 short 0x0000 38 short 0x0000 39 39 40 bra.l _060LSP__imuls64_ 40 bra.l _060LSP__imuls64_ 41 short 0x0000 41 short 0x0000 42 bra.l _060LSP__imulu64_ 42 bra.l _060LSP__imulu64_ 43 short 0x0000 43 short 0x0000 44 44 45 bra.l _060LSP__cmp2_Ab_ 45 bra.l _060LSP__cmp2_Ab_ 46 short 0x0000 46 short 0x0000 47 bra.l _060LSP__cmp2_Aw_ 47 bra.l _060LSP__cmp2_Aw_ 48 short 0x0000 48 short 0x0000 49 bra.l _060LSP__cmp2_Al_ 49 bra.l _060LSP__cmp2_Al_ 50 short 0x0000 50 short 0x0000 51 bra.l _060LSP__cmp2_Db_ 51 bra.l _060LSP__cmp2_Db_ 52 short 0x0000 52 short 0x0000 53 bra.l _060LSP__cmp2_Dw_ 53 bra.l _060LSP__cmp2_Dw_ 54 short 0x0000 54 short 0x0000 55 bra.l _060LSP__cmp2_Dl_ 55 bra.l _060LSP__cmp2_Dl_ 56 short 0x0000 56 short 0x0000 57 57 58 # leave room for future possible aditions. 58 # leave room for future possible aditions. 59 align 0x200 59 align 0x200 60 60 61 ############################################## 61 ######################################################################### 62 # XDEF *************************************** 62 # XDEF **************************************************************** # 63 # _060LSP__idivu64_(): Emulate 64-bit un 63 # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. # 64 # _060LSP__idivs64_(): Emulate 64-bit si 64 # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. # 65 # 65 # # 66 # This is the library version which is a 66 # This is the library version which is accessed as a subroutine # 67 # and therefore does not work exactly li 67 # and therefore does not work exactly like the 680X0 div{s,u}.l # 68 # 64-bit divide instruction. 68 # 64-bit divide instruction. # 69 # 69 # # 70 # XREF *************************************** 70 # XREF **************************************************************** # 71 # None. 71 # None. # 72 # 72 # # 73 # INPUT ************************************** 73 # INPUT *************************************************************** # 74 # 0x4(sp) = divisor 74 # 0x4(sp) = divisor # 75 # 0x8(sp) = hi(dividend) 75 # 0x8(sp) = hi(dividend) # 76 # 0xc(sp) = lo(dividend) 76 # 0xc(sp) = lo(dividend) # 77 # 0x10(sp) = pointer to location to plac 77 # 0x10(sp) = pointer to location to place quotient/remainder # 78 # 78 # # 79 # OUTPUT ************************************* 79 # OUTPUT ************************************************************** # 80 # 0x10(sp) = points to location of remai 80 # 0x10(sp) = points to location of remainder/quotient. # 81 # remainder is in first longw 81 # remainder is in first longword, quotient is in 2nd. # 82 # 82 # # 83 # ALGORITHM ********************************** 83 # ALGORITHM *********************************************************** # 84 # If the operands are signed, make them 84 # If the operands are signed, make them unsigned and save the # 85 # sign info for later. Separate out special ca 85 # sign info for later. Separate out special cases like divide-by-zero # 86 # or 32-bit divides if possible. Else, use a s 86 # or 32-bit divides if possible. Else, use a special math algorithm # 87 # to calculate the result. 87 # to calculate the result. # 88 # Restore sign info if signed instructio 88 # Restore sign info if signed instruction. Set the condition # 89 # codes before performing the final "rts". If 89 # codes before performing the final "rts". If the divisor was equal to # 90 # zero, then perform a divide-by-zero using a 90 # zero, then perform a divide-by-zero using a 16-bit implemented # 91 # divide instruction. This way, the operating 91 # divide instruction. This way, the operating system can record that # 92 # the event occurred even though it may not po 92 # the event occurred even though it may not point to the correct place. # 93 # 93 # # 94 ############################################## 94 ######################################################################### 95 95 96 set POSNEG, -1 96 set POSNEG, -1 97 set NDIVISOR, -2 97 set NDIVISOR, -2 98 set NDIVIDEND, -3 98 set NDIVIDEND, -3 99 set DDSECOND, -4 99 set DDSECOND, -4 100 set DDNORMAL, -8 100 set DDNORMAL, -8 101 set DDQUOTIENT, -12 101 set DDQUOTIENT, -12 102 set DIV64_CC, -16 102 set DIV64_CC, -16 103 103 104 ########## 104 ########## 105 # divs.l # 105 # divs.l # 106 ########## 106 ########## 107 global _060LSP__idivs64_ 107 global _060LSP__idivs64_ 108 _060LSP__idivs64_: 108 _060LSP__idivs64_: 109 # PROLOGUE BEGIN ############################# 109 # PROLOGUE BEGIN ######################################################## 110 link.w %a6,&-16 110 link.w %a6,&-16 111 movm.l &0x3f00,-(%sp) 111 movm.l &0x3f00,-(%sp) # save d2-d7 112 # fmovm.l &0x0,-(%sp) 112 # fmovm.l &0x0,-(%sp) # save no fpregs 113 # PROLOGUE END ############################### 113 # PROLOGUE END ########################################################## 114 114 115 mov.w %cc,DIV64_CC(%a6) 115 mov.w %cc,DIV64_CC(%a6) 116 st POSNEG(%a6) 116 st POSNEG(%a6) # signed operation 117 bra.b ldiv64_cont 117 bra.b ldiv64_cont 118 118 119 ########## 119 ########## 120 # divu.l # 120 # divu.l # 121 ########## 121 ########## 122 global _060LSP__idivu64_ 122 global _060LSP__idivu64_ 123 _060LSP__idivu64_: 123 _060LSP__idivu64_: 124 # PROLOGUE BEGIN ############################# 124 # PROLOGUE BEGIN ######################################################## 125 link.w %a6,&-16 125 link.w %a6,&-16 126 movm.l &0x3f00,-(%sp) 126 movm.l &0x3f00,-(%sp) # save d2-d7 127 # fmovm.l &0x0,-(%sp) 127 # fmovm.l &0x0,-(%sp) # save no fpregs 128 # PROLOGUE END ############################### 128 # PROLOGUE END ########################################################## 129 129 130 mov.w %cc,DIV64_CC(%a6) 130 mov.w %cc,DIV64_CC(%a6) 131 sf POSNEG(%a6) 131 sf POSNEG(%a6) # unsigned operation 132 132 133 ldiv64_cont: 133 ldiv64_cont: 134 mov.l 0x8(%a6),%d7 134 mov.l 0x8(%a6),%d7 # fetch divisor 135 135 136 beq.w ldiv64eq0 136 beq.w ldiv64eq0 # divisor is = 0!!! 137 137 138 mov.l 0xc(%a6), %d5 138 mov.l 0xc(%a6), %d5 # get dividend hi 139 mov.l 0x10(%a6), %d6 139 mov.l 0x10(%a6), %d6 # get dividend lo 140 140 141 # separate signed and unsigned divide 141 # separate signed and unsigned divide 142 tst.b POSNEG(%a6) 142 tst.b POSNEG(%a6) # signed or unsigned? 143 beq.b ldspecialcases 143 beq.b ldspecialcases # use positive divide 144 144 145 # save the sign of the divisor 145 # save the sign of the divisor 146 # make divisor unsigned if it's negative 146 # make divisor unsigned if it's negative 147 tst.l %d7 147 tst.l %d7 # chk sign of divisor 148 slt NDIVISOR(%a6) 148 slt NDIVISOR(%a6) # save sign of divisor 149 bpl.b ldsgndividend 149 bpl.b ldsgndividend 150 neg.l %d7 150 neg.l %d7 # complement negative divisor 151 151 152 # save the sign of the dividend 152 # save the sign of the dividend 153 # make dividend unsigned if it's negative 153 # make dividend unsigned if it's negative 154 ldsgndividend: 154 ldsgndividend: 155 tst.l %d5 155 tst.l %d5 # chk sign of hi(dividend) 156 slt NDIVIDEND(%a6) 156 slt NDIVIDEND(%a6) # save sign of dividend 157 bpl.b ldspecialcases 157 bpl.b ldspecialcases 158 158 159 mov.w &0x0, %cc 159 mov.w &0x0, %cc # clear 'X' cc bit 160 negx.l %d6 160 negx.l %d6 # complement signed dividend 161 negx.l %d5 161 negx.l %d5 162 162 163 # extract some special cases: 163 # extract some special cases: 164 # - is (dividend == 0) ? 164 # - is (dividend == 0) ? 165 # - is (hi(dividend) == 0 && (divisor <= 165 # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div) 166 ldspecialcases: 166 ldspecialcases: 167 tst.l %d5 167 tst.l %d5 # is (hi(dividend) == 0) 168 bne.b ldnormaldivide 168 bne.b ldnormaldivide # no, so try it the long way 169 169 170 tst.l %d6 170 tst.l %d6 # is (lo(dividend) == 0), too 171 beq.w lddone 171 beq.w lddone # yes, so (dividend == 0) 172 172 173 cmp.l %d7,%d6 173 cmp.l %d7,%d6 # is (divisor <= lo(dividend)) 174 bls.b ld32bitdivide 174 bls.b ld32bitdivide # yes, so use 32 bit divide 175 175 176 exg %d5,%d6 176 exg %d5,%d6 # q = 0, r = dividend 177 bra.w ldivfinish 177 bra.w ldivfinish # can't divide, we're done. 178 178 179 ld32bitdivide: 179 ld32bitdivide: 180 tdivu.l %d7, %d5:%d6 180 tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div! 181 181 182 bra.b ldivfinish 182 bra.b ldivfinish 183 183 184 ldnormaldivide: 184 ldnormaldivide: 185 # last special case: 185 # last special case: 186 # - is hi(dividend) >= divisor ? if yes, 186 # - is hi(dividend) >= divisor ? if yes, then overflow 187 cmp.l %d7,%d5 187 cmp.l %d7,%d5 188 bls.b lddovf 188 bls.b lddovf # answer won't fit in 32 bits 189 189 190 # perform the divide algorithm: 190 # perform the divide algorithm: 191 bsr.l ldclassical 191 bsr.l ldclassical # do int divide 192 192 193 # separate into signed and unsigned finishes. 193 # separate into signed and unsigned finishes. 194 ldivfinish: 194 ldivfinish: 195 tst.b POSNEG(%a6) 195 tst.b POSNEG(%a6) # do divs, divu separately 196 beq.b lddone 196 beq.b lddone # divu has no processing!!! 197 197 198 # it was a divs.l, so ccode setting is a littl 198 # it was a divs.l, so ccode setting is a little more complicated... 199 tst.b NDIVIDEND(%a6) 199 tst.b NDIVIDEND(%a6) # remainder has same sign 200 beq.b ldcc 200 beq.b ldcc # as dividend. 201 neg.l %d5 201 neg.l %d5 # sgn(rem) = sgn(dividend) 202 ldcc: 202 ldcc: 203 mov.b NDIVISOR(%a6), %d0 203 mov.b NDIVISOR(%a6), %d0 204 eor.b %d0, NDIVIDEND(%a6) 204 eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative 205 beq.b ldqpos 205 beq.b ldqpos # branch to quot positive 206 206 207 # 0x80000000 is the largest number representab 207 # 0x80000000 is the largest number representable as a 32-bit negative 208 # number. the negative of 0x80000000 is 0x8000 208 # number. the negative of 0x80000000 is 0x80000000. 209 cmpi.l %d6, &0x80000000 209 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits? 210 bhi.b lddovf 210 bhi.b lddovf 211 211 212 neg.l %d6 212 neg.l %d6 # make (-quot) 2's comp 213 213 214 bra.b lddone 214 bra.b lddone 215 215 216 ldqpos: 216 ldqpos: 217 btst &0x1f, %d6 217 btst &0x1f, %d6 # will (+quot) fit in 32 bits? 218 bne.b lddovf 218 bne.b lddovf 219 219 220 lddone: 220 lddone: 221 # if the register numbers are the same, only t 221 # if the register numbers are the same, only the quotient gets saved. 222 # so, if we always save the quotient second, w 222 # so, if we always save the quotient second, we save ourselves a cmp&beq 223 andi.w &0x10,DIV64_CC(%a6) 223 andi.w &0x10,DIV64_CC(%a6) 224 mov.w DIV64_CC(%a6),%cc 224 mov.w DIV64_CC(%a6),%cc 225 tst.l %d6 225 tst.l %d6 # may set 'N' ccode bit 226 226 227 # here, the result is in d1 and d0. the curren 227 # here, the result is in d1 and d0. the current strategy is to save 228 # the values at the location pointed to by a0. 228 # the values at the location pointed to by a0. 229 # use movm here to not disturb the condition c 229 # use movm here to not disturb the condition codes. 230 ldexit: 230 ldexit: 231 movm.l &0x0060,([0x14,%a6]) 231 movm.l &0x0060,([0x14,%a6]) # save result 232 232 233 # EPILOGUE BEGIN ############################# 233 # EPILOGUE BEGIN ######################################################## 234 # fmovm.l (%sp)+,&0x0 234 # fmovm.l (%sp)+,&0x0 # restore no fpregs 235 movm.l (%sp)+,&0x00fc 235 movm.l (%sp)+,&0x00fc # restore d2-d7 236 unlk %a6 236 unlk %a6 237 # EPILOGUE END ############################### 237 # EPILOGUE END ########################################################## 238 238 239 rts 239 rts 240 240 241 # the result should be the unchanged dividend 241 # the result should be the unchanged dividend 242 lddovf: 242 lddovf: 243 mov.l 0xc(%a6), %d5 243 mov.l 0xc(%a6), %d5 # get dividend hi 244 mov.l 0x10(%a6), %d6 244 mov.l 0x10(%a6), %d6 # get dividend lo 245 245 246 andi.w &0x1c,DIV64_CC(%a6) 246 andi.w &0x1c,DIV64_CC(%a6) 247 ori.w &0x02,DIV64_CC(%a6) 247 ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit 248 mov.w DIV64_CC(%a6),%cc 248 mov.w DIV64_CC(%a6),%cc 249 249 250 bra.b ldexit 250 bra.b ldexit 251 251 252 ldiv64eq0: 252 ldiv64eq0: 253 mov.l 0xc(%a6),([0x14,%a6]) 253 mov.l 0xc(%a6),([0x14,%a6]) 254 mov.l 0x10(%a6),([0x14,%a6], 254 mov.l 0x10(%a6),([0x14,%a6],0x4) 255 255 256 mov.w DIV64_CC(%a6),%cc 256 mov.w DIV64_CC(%a6),%cc 257 257 258 # EPILOGUE BEGIN ############################# 258 # EPILOGUE BEGIN ######################################################## 259 # fmovm.l (%sp)+,&0x0 259 # fmovm.l (%sp)+,&0x0 # restore no fpregs 260 movm.l (%sp)+,&0x00fc 260 movm.l (%sp)+,&0x00fc # restore d2-d7 261 unlk %a6 261 unlk %a6 262 # EPILOGUE END ############################### 262 # EPILOGUE END ########################################################## 263 263 264 divu.w &0x0,%d0 264 divu.w &0x0,%d0 # force a divbyzero exception 265 rts 265 rts 266 266 267 ############################################## 267 ########################################################################### 268 ############################################## 268 ######################################################################### 269 # This routine uses the 'classical' Algorithm 269 # This routine uses the 'classical' Algorithm D from Donald Knuth's # 270 # Art of Computer Programming, vol II, Seminum 270 # Art of Computer Programming, vol II, Seminumerical Algorithms. # 271 # For this implementation b=2**16, and the tar 271 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, # 272 # where U,V are words of the quadword dividend 272 # where U,V are words of the quadword dividend and longword divisor, # 273 # and U1, V1 are the most significant words. 273 # and U1, V1 are the most significant words. # 274 # 274 # # 275 # The most sig. longword of the 64 bit dividen 275 # The most sig. longword of the 64 bit dividend must be in %d5, least # 276 # in %d6. The divisor must be in the variable 276 # in %d6. The divisor must be in the variable ddivisor, and the # 277 # signed/unsigned flag ddusign must be set (0= 277 # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). # 278 # The quotient is returned in %d6, remainder i 278 # The quotient is returned in %d6, remainder in %d5, unless the # 279 # v (overflow) bit is set in the saved %ccr. I 279 # v (overflow) bit is set in the saved %ccr. If overflow, the dividend # 280 # is unchanged. 280 # is unchanged. # 281 ############################################## 281 ######################################################################### 282 ldclassical: 282 ldclassical: 283 # if the divisor msw is 0, use simpler algorit 283 # if the divisor msw is 0, use simpler algorithm then the full blown 284 # one at ddknuth: 284 # one at ddknuth: 285 285 286 cmpi.l %d7, &0xffff 286 cmpi.l %d7, &0xffff 287 bhi.b lddknuth 287 bhi.b lddknuth # go use D. Knuth algorithm 288 288 289 # Since the divisor is only a word (and larger 289 # Since the divisor is only a word (and larger than the mslw of the dividend), 290 # a simpler algorithm may be used : 290 # a simpler algorithm may be used : 291 # In the general case, four quotient words wou 291 # In the general case, four quotient words would be created by 292 # dividing the divisor word into each dividend 292 # dividing the divisor word into each dividend word. In this case, 293 # the first two quotient words must be zero, o 293 # the first two quotient words must be zero, or overflow would occur. 294 # Since we already checked this case above, we 294 # Since we already checked this case above, we can treat the most significant 295 # longword of the dividend as (0) remainder (s 295 # longword of the dividend as (0) remainder (see Knuth) and merely complete 296 # the last two divisions to get a quotient lon 296 # the last two divisions to get a quotient longword and word remainder: 297 297 298 clr.l %d1 298 clr.l %d1 299 swap %d5 299 swap %d5 # same as r*b if previous step rqd 300 swap %d6 300 swap %d6 # get u3 to lsw position 301 mov.w %d6, %d5 301 mov.w %d6, %d5 # rb + u3 302 302 303 divu.w %d7, %d5 303 divu.w %d7, %d5 304 304 305 mov.w %d5, %d1 305 mov.w %d5, %d1 # first quotient word 306 swap %d6 306 swap %d6 # get u4 307 mov.w %d6, %d5 307 mov.w %d6, %d5 # rb + u4 308 308 309 divu.w %d7, %d5 309 divu.w %d7, %d5 310 310 311 swap %d1 311 swap %d1 312 mov.w %d5, %d1 312 mov.w %d5, %d1 # 2nd quotient 'digit' 313 clr.w %d5 313 clr.w %d5 314 swap %d5 314 swap %d5 # now remainder 315 mov.l %d1, %d6 315 mov.l %d1, %d6 # and quotient 316 316 317 rts 317 rts 318 318 319 lddknuth: 319 lddknuth: 320 # In this algorithm, the divisor is treated as 320 # In this algorithm, the divisor is treated as a 2 digit (word) number 321 # which is divided into a 3 digit (word) divid 321 # which is divided into a 3 digit (word) dividend to get one quotient 322 # digit (word). After subtraction, the dividen 322 # digit (word). After subtraction, the dividend is shifted and the 323 # process repeated. Before beginning, the divi 323 # process repeated. Before beginning, the divisor and quotient are 324 # 'normalized' so that the process of estimati 324 # 'normalized' so that the process of estimating the quotient digit 325 # will yield verifiably correct results.. 325 # will yield verifiably correct results.. 326 326 327 clr.l DDNORMAL(%a6) 327 clr.l DDNORMAL(%a6) # count of shifts for normalization 328 clr.b DDSECOND(%a6) 328 clr.b DDSECOND(%a6) # clear flag for quotient digits 329 clr.l %d1 329 clr.l %d1 # %d1 will hold trial quotient 330 lddnchk: 330 lddnchk: 331 btst &31, %d7 331 btst &31, %d7 # must we normalize? first word of 332 bne.b lddnormalized 332 bne.b lddnormalized # divisor (V1) must be >= 65536/2 333 addq.l &0x1, DDNORMAL(%a6) 333 addq.l &0x1, DDNORMAL(%a6) # count normalization shifts 334 lsl.l &0x1, %d7 334 lsl.l &0x1, %d7 # shift the divisor 335 lsl.l &0x1, %d6 335 lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2 336 roxl.l &0x1, %d5 336 roxl.l &0x1, %d5 # shift u1,u2 337 bra.w lddnchk 337 bra.w lddnchk 338 lddnormalized: 338 lddnormalized: 339 339 340 # Now calculate an estimate of the quotient wo 340 # Now calculate an estimate of the quotient words (msw first, then lsw). 341 # The comments use subscripts for the first qu 341 # The comments use subscripts for the first quotient digit determination. 342 mov.l %d7, %d3 342 mov.l %d7, %d3 # divisor 343 mov.l %d5, %d2 343 mov.l %d5, %d2 # dividend mslw 344 swap %d2 344 swap %d2 345 swap %d3 345 swap %d3 346 cmp.w %d2, %d3 346 cmp.w %d2, %d3 # V1 = U1 ? 347 bne.b lddqcalc1 347 bne.b lddqcalc1 348 mov.w &0xffff, %d1 348 mov.w &0xffff, %d1 # use max trial quotient word 349 bra.b lddadj0 349 bra.b lddadj0 350 lddqcalc1: 350 lddqcalc1: 351 mov.l %d5, %d1 351 mov.l %d5, %d1 352 352 353 divu.w %d3, %d1 353 divu.w %d3, %d1 # use quotient of mslw/msw 354 354 355 andi.l &0x0000ffff, %d1 355 andi.l &0x0000ffff, %d1 # zero any remainder 356 lddadj0: 356 lddadj0: 357 357 358 # now test the trial quotient and adjust. This 358 # now test the trial quotient and adjust. This step plus the 359 # normalization assures (according to Knuth) t 359 # normalization assures (according to Knuth) that the trial 360 # quotient will be at worst 1 too large. 360 # quotient will be at worst 1 too large. 361 mov.l %d6, -(%sp) 361 mov.l %d6, -(%sp) 362 clr.w %d6 362 clr.w %d6 # word u3 left 363 swap %d6 363 swap %d6 # in lsw position 364 lddadj1: mov.l %d7, %d3 364 lddadj1: mov.l %d7, %d3 365 mov.l %d1, %d2 365 mov.l %d1, %d2 366 mulu.w %d7, %d2 366 mulu.w %d7, %d2 # V2q 367 swap %d3 367 swap %d3 368 mulu.w %d1, %d3 368 mulu.w %d1, %d3 # V1q 369 mov.l %d5, %d4 369 mov.l %d5, %d4 # U1U2 370 sub.l %d3, %d4 370 sub.l %d3, %d4 # U1U2 - V1q 371 371 372 swap %d4 372 swap %d4 373 373 374 mov.w %d4,%d0 374 mov.w %d4,%d0 375 mov.w %d6,%d4 375 mov.w %d6,%d4 # insert lower word (U3) 376 376 377 tst.w %d0 377 tst.w %d0 # is upper word set? 378 bne.w lddadjd1 378 bne.w lddadjd1 379 379 380 # add.l %d6, %d4 380 # add.l %d6, %d4 # (U1U2 - V1q) + U3 381 381 382 cmp.l %d2, %d4 382 cmp.l %d2, %d4 383 bls.b lddadjd1 383 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ? 384 subq.l &0x1, %d1 384 subq.l &0x1, %d1 # yes, decrement and recheck 385 bra.b lddadj1 385 bra.b lddadj1 386 lddadjd1: 386 lddadjd1: 387 # now test the word by multiplying it by the d 387 # now test the word by multiplying it by the divisor (V1V2) and comparing 388 # the 3 digit (word) result with the current d 388 # the 3 digit (word) result with the current dividend words 389 mov.l %d5, -(%sp) 389 mov.l %d5, -(%sp) # save %d5 (%d6 already saved) 390 mov.l %d1, %d6 390 mov.l %d1, %d6 391 swap %d6 391 swap %d6 # shift answer to ms 3 words 392 mov.l %d7, %d5 392 mov.l %d7, %d5 393 bsr.l ldmm2 393 bsr.l ldmm2 394 mov.l %d5, %d2 394 mov.l %d5, %d2 # now %d2,%d3 are trial*divisor 395 mov.l %d6, %d3 395 mov.l %d6, %d3 396 mov.l (%sp)+, %d5 396 mov.l (%sp)+, %d5 # restore dividend 397 mov.l (%sp)+, %d6 397 mov.l (%sp)+, %d6 398 sub.l %d3, %d6 398 sub.l %d3, %d6 399 subx.l %d2, %d5 399 subx.l %d2, %d5 # subtract double precision 400 bcc ldd2nd 400 bcc ldd2nd # no carry, do next quotient digit 401 subq.l &0x1, %d1 401 subq.l &0x1, %d1 # q is one too large 402 # need to add back divisor longword to current 402 # need to add back divisor longword to current ms 3 digits of dividend 403 # - according to Knuth, this is done only 2 ou 403 # - according to Knuth, this is done only 2 out of 65536 times for random 404 # divisor, dividend selection. 404 # divisor, dividend selection. 405 clr.l %d2 405 clr.l %d2 406 mov.l %d7, %d3 406 mov.l %d7, %d3 407 swap %d3 407 swap %d3 408 clr.w %d3 408 clr.w %d3 # %d3 now ls word of divisor 409 add.l %d3, %d6 409 add.l %d3, %d6 # aligned with 3rd word of dividend 410 addx.l %d2, %d5 410 addx.l %d2, %d5 411 mov.l %d7, %d3 411 mov.l %d7, %d3 412 clr.w %d3 412 clr.w %d3 # %d3 now ms word of divisor 413 swap %d3 413 swap %d3 # aligned with 2nd word of dividend 414 add.l %d3, %d5 414 add.l %d3, %d5 415 ldd2nd: 415 ldd2nd: 416 tst.b DDSECOND(%a6) # both 416 tst.b DDSECOND(%a6) # both q words done? 417 bne.b lddremain 417 bne.b lddremain 418 # first quotient digit now correct. store digi 418 # first quotient digit now correct. store digit and shift the 419 # (subtracted) dividend 419 # (subtracted) dividend 420 mov.w %d1, DDQUOTIENT(%a6) 420 mov.w %d1, DDQUOTIENT(%a6) 421 clr.l %d1 421 clr.l %d1 422 swap %d5 422 swap %d5 423 swap %d6 423 swap %d6 424 mov.w %d6, %d5 424 mov.w %d6, %d5 425 clr.w %d6 425 clr.w %d6 426 st DDSECOND(%a6) 426 st DDSECOND(%a6) # second digit 427 bra.w lddnormalized 427 bra.w lddnormalized 428 lddremain: 428 lddremain: 429 # add 2nd word to quotient, get the remainder. 429 # add 2nd word to quotient, get the remainder. 430 mov.w %d1, DDQUOTIENT+2(%a6) 430 mov.w %d1, DDQUOTIENT+2(%a6) 431 # shift down one word/digit to renormalize rem 431 # shift down one word/digit to renormalize remainder. 432 mov.w %d5, %d6 432 mov.w %d5, %d6 433 swap %d6 433 swap %d6 434 swap %d5 434 swap %d5 435 mov.l DDNORMAL(%a6), %d7 435 mov.l DDNORMAL(%a6), %d7 # get norm shift count 436 beq.b lddrn 436 beq.b lddrn 437 subq.l &0x1, %d7 437 subq.l &0x1, %d7 # set for loop count 438 lddnlp: 438 lddnlp: 439 lsr.l &0x1, %d5 439 lsr.l &0x1, %d5 # shift into %d6 440 roxr.l &0x1, %d6 440 roxr.l &0x1, %d6 441 dbf %d7, lddnlp 441 dbf %d7, lddnlp 442 lddrn: 442 lddrn: 443 mov.l %d6, %d5 443 mov.l %d6, %d5 # remainder 444 mov.l DDQUOTIENT(%a6), %d6 444 mov.l DDQUOTIENT(%a6), %d6 # quotient 445 445 446 rts 446 rts 447 ldmm2: 447 ldmm2: 448 # factors for the 32X32->64 multiplication are 448 # factors for the 32X32->64 multiplication are in %d5 and %d6. 449 # returns 64 bit result in %d5 (hi) %d6(lo). 449 # returns 64 bit result in %d5 (hi) %d6(lo). 450 # destroys %d2,%d3,%d4. 450 # destroys %d2,%d3,%d4. 451 451 452 # multiply hi,lo words of each factor to get 4 452 # multiply hi,lo words of each factor to get 4 intermediate products 453 mov.l %d6, %d2 453 mov.l %d6, %d2 454 mov.l %d6, %d3 454 mov.l %d6, %d3 455 mov.l %d5, %d4 455 mov.l %d5, %d4 456 swap %d3 456 swap %d3 457 swap %d4 457 swap %d4 458 mulu.w %d5, %d6 458 mulu.w %d5, %d6 # %d6 <- lsw*lsw 459 mulu.w %d3, %d5 459 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source 460 mulu.w %d4, %d2 460 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest 461 mulu.w %d4, %d3 461 mulu.w %d4, %d3 # %d3 <- msw*msw 462 # now use swap and addx to consolidate to two 462 # now use swap and addx to consolidate to two longwords 463 clr.l %d4 463 clr.l %d4 464 swap %d6 464 swap %d6 465 add.w %d5, %d6 465 add.w %d5, %d6 # add msw of l*l to lsw of m*l product 466 addx.w %d4, %d3 466 addx.w %d4, %d3 # add any carry to m*m product 467 add.w %d2, %d6 467 add.w %d2, %d6 # add in lsw of other m*l product 468 addx.w %d4, %d3 468 addx.w %d4, %d3 # add any carry to m*m product 469 swap %d6 469 swap %d6 # %d6 is low 32 bits of final product 470 clr.w %d5 470 clr.w %d5 471 clr.w %d2 471 clr.w %d2 # lsw of two mixed products used, 472 swap %d5 472 swap %d5 # now use msws of longwords 473 swap %d2 473 swap %d2 474 add.l %d2, %d5 474 add.l %d2, %d5 475 add.l %d3, %d5 # %d5 475 add.l %d3, %d5 # %d5 now ms 32 bits of final product 476 rts 476 rts 477 477 478 ############################################## 478 ######################################################################### 479 # XDEF *************************************** 479 # XDEF **************************************************************** # 480 # _060LSP__imulu64_(): Emulate 64-bit un 480 # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction # 481 # _060LSP__imuls64_(): Emulate 64-bit si 481 # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. # 482 # 482 # # 483 # This is the library version which is a 483 # This is the library version which is accessed as a subroutine # 484 # and therefore does not work exactly li 484 # and therefore does not work exactly like the 680X0 mul{s,u}.l # 485 # 64-bit multiply instruction. 485 # 64-bit multiply instruction. # 486 # 486 # # 487 # XREF *************************************** 487 # XREF **************************************************************** # 488 # None 488 # None # 489 # 489 # # 490 # INPUT ************************************** 490 # INPUT *************************************************************** # 491 # 0x4(sp) = multiplier 491 # 0x4(sp) = multiplier # 492 # 0x8(sp) = multiplicand 492 # 0x8(sp) = multiplicand # 493 # 0xc(sp) = pointer to location to place 493 # 0xc(sp) = pointer to location to place 64-bit result # 494 # 494 # # 495 # OUTPUT ************************************* 495 # OUTPUT ************************************************************** # 496 # 0xc(sp) = points to location of 64-bit 496 # 0xc(sp) = points to location of 64-bit result # 497 # 497 # # 498 # ALGORITHM ********************************** 498 # ALGORITHM *********************************************************** # 499 # Perform the multiply in pieces using 1 499 # Perform the multiply in pieces using 16x16->32 unsigned # 500 # multiplies and "add" instructions. 500 # multiplies and "add" instructions. # 501 # Set the condition codes as appropriate 501 # Set the condition codes as appropriate before performing an # 502 # "rts". 502 # "rts". # 503 # 503 # # 504 ############################################## 504 ######################################################################### 505 505 506 set MUL64_CC, -4 506 set MUL64_CC, -4 507 507 508 global _060LSP__imulu64_ 508 global _060LSP__imulu64_ 509 _060LSP__imulu64_: 509 _060LSP__imulu64_: 510 510 511 # PROLOGUE BEGIN ############################# 511 # PROLOGUE BEGIN ######################################################## 512 link.w %a6,&-4 512 link.w %a6,&-4 513 movm.l &0x3800,-(%sp) 513 movm.l &0x3800,-(%sp) # save d2-d4 514 # fmovm.l &0x0,-(%sp) 514 # fmovm.l &0x0,-(%sp) # save no fpregs 515 # PROLOGUE END ############################### 515 # PROLOGUE END ########################################################## 516 516 517 mov.w %cc,MUL64_CC(%a6) 517 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes 518 518 519 mov.l 0x8(%a6),%d0 519 mov.l 0x8(%a6),%d0 # store multiplier in d0 520 beq.w mulu64_zero 520 beq.w mulu64_zero # handle zero separately 521 521 522 mov.l 0xc(%a6),%d1 522 mov.l 0xc(%a6),%d1 # get multiplicand in d1 523 beq.w mulu64_zero 523 beq.w mulu64_zero # handle zero separately 524 524 525 ############################################## 525 ######################################################################### 526 # 63 32 526 # 63 32 0 # 527 # ---------------------------- 527 # ---------------------------- # 528 # | hi(mplier) * hi(mplicand)| 528 # | hi(mplier) * hi(mplicand)| # 529 # ---------------------------- 529 # ---------------------------- # 530 # ------------------------- 530 # ----------------------------- # 531 # | hi(mplier) * lo(mplican 531 # | hi(mplier) * lo(mplicand) | # 532 # ------------------------- 532 # ----------------------------- # 533 # ------------------------- 533 # ----------------------------- # 534 # | lo(mplier) * hi(mplican 534 # | lo(mplier) * hi(mplicand) | # 535 # ------------------------- 535 # ----------------------------- # 536 # | ----------- 536 # | ----------------------------- # 537 # --|-- | lo(mplier 537 # --|-- | lo(mplier) * lo(mplicand) | # 538 # | ----------- 538 # | ----------------------------- # 539 # ====================================== 539 # ======================================================== # 540 # -------------------------------------- 540 # -------------------------------------------------------- # 541 # | hi(result) | lo 541 # | hi(result) | lo(result) | # 542 # -------------------------------------- 542 # -------------------------------------------------------- # 543 ############################################## 543 ######################################################################### 544 mulu64_alg: 544 mulu64_alg: 545 # load temp registers with operands 545 # load temp registers with operands 546 mov.l %d0,%d2 546 mov.l %d0,%d2 # mr in d2 547 mov.l %d0,%d3 547 mov.l %d0,%d3 # mr in d3 548 mov.l %d1,%d4 548 mov.l %d1,%d4 # md in d4 549 swap %d3 549 swap %d3 # hi(mr) in lo d3 550 swap %d4 550 swap %d4 # hi(md) in lo d4 551 551 552 # complete necessary multiplies: 552 # complete necessary multiplies: 553 mulu.w %d1,%d0 553 mulu.w %d1,%d0 # [1] lo(mr) * lo(md) 554 mulu.w %d3,%d1 554 mulu.w %d3,%d1 # [2] hi(mr) * lo(md) 555 mulu.w %d4,%d2 555 mulu.w %d4,%d2 # [3] lo(mr) * hi(md) 556 mulu.w %d4,%d3 556 mulu.w %d4,%d3 # [4] hi(mr) * hi(md) 557 557 558 # add lo portions of [2],[3] to hi portion of 558 # add lo portions of [2],[3] to hi portion of [1]. 559 # add carries produced from these adds to [4]. 559 # add carries produced from these adds to [4]. 560 # lo([1]) is the final lo 16 bits of the resul 560 # lo([1]) is the final lo 16 bits of the result. 561 clr.l %d4 561 clr.l %d4 # load d4 w/ zero value 562 swap %d0 562 swap %d0 # hi([1]) <==> lo([1]) 563 add.w %d1,%d0 563 add.w %d1,%d0 # hi([1]) + lo([2]) 564 addx.l %d4,%d3 564 addx.l %d4,%d3 # [4] + carry 565 add.w %d2,%d0 565 add.w %d2,%d0 # hi([1]) + lo([3]) 566 addx.l %d4,%d3 566 addx.l %d4,%d3 # [4] + carry 567 swap %d0 567 swap %d0 # lo([1]) <==> hi([1]) 568 568 569 # lo portions of [2],[3] have been added in to 569 # lo portions of [2],[3] have been added in to final result. 570 # now, clear lo, put hi in lo reg, and add to 570 # now, clear lo, put hi in lo reg, and add to [4] 571 clr.w %d1 571 clr.w %d1 # clear lo([2]) 572 clr.w %d2 572 clr.w %d2 # clear hi([3]) 573 swap %d1 573 swap %d1 # hi([2]) in lo d1 574 swap %d2 574 swap %d2 # hi([3]) in lo d2 575 add.l %d2,%d1 575 add.l %d2,%d1 # [4] + hi([2]) 576 add.l %d3,%d1 576 add.l %d3,%d1 # [4] + hi([3]) 577 577 578 # now, grab the condition codes. only one that 578 # now, grab the condition codes. only one that can be set is 'N'. 579 # 'N' CAN be set if the operation is unsigned 579 # 'N' CAN be set if the operation is unsigned if bit 63 is set. 580 mov.w MUL64_CC(%a6),%d4 580 mov.w MUL64_CC(%a6),%d4 581 andi.b &0x10,%d4 581 andi.b &0x10,%d4 # keep old 'X' bit 582 tst.l %d1 582 tst.l %d1 # may set 'N' bit 583 bpl.b mulu64_ddone 583 bpl.b mulu64_ddone 584 ori.b &0x8,%d4 584 ori.b &0x8,%d4 # set 'N' bit 585 mulu64_ddone: 585 mulu64_ddone: 586 mov.w %d4,%cc 586 mov.w %d4,%cc 587 587 588 # here, the result is in d1 and d0. the curren 588 # here, the result is in d1 and d0. the current strategy is to save 589 # the values at the location pointed to by a0. 589 # the values at the location pointed to by a0. 590 # use movm here to not disturb the condition c 590 # use movm here to not disturb the condition codes. 591 mulu64_end: 591 mulu64_end: 592 exg %d1,%d0 592 exg %d1,%d0 593 movm.l &0x0003,([0x10,%a6]) 593 movm.l &0x0003,([0x10,%a6]) # save result 594 594 595 # EPILOGUE BEGIN ############################# 595 # EPILOGUE BEGIN ######################################################## 596 # fmovm.l (%sp)+,&0x0 596 # fmovm.l (%sp)+,&0x0 # restore no fpregs 597 movm.l (%sp)+,&0x001c 597 movm.l (%sp)+,&0x001c # restore d2-d4 598 unlk %a6 598 unlk %a6 599 # EPILOGUE END ############################### 599 # EPILOGUE END ########################################################## 600 600 601 rts 601 rts 602 602 603 # one or both of the operands is zero so the r 603 # one or both of the operands is zero so the result is also zero. 604 # save the zero result to the register file an 604 # save the zero result to the register file and set the 'Z' ccode bit. 605 mulu64_zero: 605 mulu64_zero: 606 clr.l %d0 606 clr.l %d0 607 clr.l %d1 607 clr.l %d1 608 608 609 mov.w MUL64_CC(%a6),%d4 609 mov.w MUL64_CC(%a6),%d4 610 andi.b &0x10,%d4 610 andi.b &0x10,%d4 611 ori.b &0x4,%d4 611 ori.b &0x4,%d4 612 mov.w %d4,%cc 612 mov.w %d4,%cc # set 'Z' ccode bit 613 613 614 bra.b mulu64_end 614 bra.b mulu64_end 615 615 616 ########## 616 ########## 617 # muls.l # 617 # muls.l # 618 ########## 618 ########## 619 global _060LSP__imuls64_ 619 global _060LSP__imuls64_ 620 _060LSP__imuls64_: 620 _060LSP__imuls64_: 621 621 622 # PROLOGUE BEGIN ############################# 622 # PROLOGUE BEGIN ######################################################## 623 link.w %a6,&-4 623 link.w %a6,&-4 624 movm.l &0x3c00,-(%sp) 624 movm.l &0x3c00,-(%sp) # save d2-d5 625 # fmovm.l &0x0,-(%sp) 625 # fmovm.l &0x0,-(%sp) # save no fpregs 626 # PROLOGUE END ############################### 626 # PROLOGUE END ########################################################## 627 627 628 mov.w %cc,MUL64_CC(%a6) 628 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes 629 629 630 mov.l 0x8(%a6),%d0 630 mov.l 0x8(%a6),%d0 # store multiplier in d0 631 beq.b mulu64_zero 631 beq.b mulu64_zero # handle zero separately 632 632 633 mov.l 0xc(%a6),%d1 633 mov.l 0xc(%a6),%d1 # get multiplicand in d1 634 beq.b mulu64_zero 634 beq.b mulu64_zero # handle zero separately 635 635 636 clr.b %d5 636 clr.b %d5 # clear sign tag 637 tst.l %d0 637 tst.l %d0 # is multiplier negative? 638 bge.b muls64_chk_md_sgn 638 bge.b muls64_chk_md_sgn # no 639 neg.l %d0 639 neg.l %d0 # make multiplier positive 640 640 641 ori.b &0x1,%d5 641 ori.b &0x1,%d5 # save multiplier sgn 642 642 643 # the result sign is the exclusive or of the o 643 # the result sign is the exclusive or of the operand sign bits. 644 muls64_chk_md_sgn: 644 muls64_chk_md_sgn: 645 tst.l %d1 645 tst.l %d1 # is multiplicand negative? 646 bge.b muls64_alg 646 bge.b muls64_alg # no 647 neg.l %d1 647 neg.l %d1 # make multiplicand positive 648 648 649 eori.b &0x1,%d5 649 eori.b &0x1,%d5 # calculate correct sign 650 650 651 ############################################## 651 ######################################################################### 652 # 63 32 652 # 63 32 0 # 653 # ---------------------------- 653 # ---------------------------- # 654 # | hi(mplier) * hi(mplicand)| 654 # | hi(mplier) * hi(mplicand)| # 655 # ---------------------------- 655 # ---------------------------- # 656 # ------------------------- 656 # ----------------------------- # 657 # | hi(mplier) * lo(mplican 657 # | hi(mplier) * lo(mplicand) | # 658 # ------------------------- 658 # ----------------------------- # 659 # ------------------------- 659 # ----------------------------- # 660 # | lo(mplier) * hi(mplican 660 # | lo(mplier) * hi(mplicand) | # 661 # ------------------------- 661 # ----------------------------- # 662 # | ----------- 662 # | ----------------------------- # 663 # --|-- | lo(mplier 663 # --|-- | lo(mplier) * lo(mplicand) | # 664 # | ----------- 664 # | ----------------------------- # 665 # ====================================== 665 # ======================================================== # 666 # -------------------------------------- 666 # -------------------------------------------------------- # 667 # | hi(result) | lo 667 # | hi(result) | lo(result) | # 668 # -------------------------------------- 668 # -------------------------------------------------------- # 669 ############################################## 669 ######################################################################### 670 muls64_alg: 670 muls64_alg: 671 # load temp registers with operands 671 # load temp registers with operands 672 mov.l %d0,%d2 672 mov.l %d0,%d2 # mr in d2 673 mov.l %d0,%d3 673 mov.l %d0,%d3 # mr in d3 674 mov.l %d1,%d4 674 mov.l %d1,%d4 # md in d4 675 swap %d3 675 swap %d3 # hi(mr) in lo d3 676 swap %d4 676 swap %d4 # hi(md) in lo d4 677 677 678 # complete necessary multiplies: 678 # complete necessary multiplies: 679 mulu.w %d1,%d0 679 mulu.w %d1,%d0 # [1] lo(mr) * lo(md) 680 mulu.w %d3,%d1 680 mulu.w %d3,%d1 # [2] hi(mr) * lo(md) 681 mulu.w %d4,%d2 681 mulu.w %d4,%d2 # [3] lo(mr) * hi(md) 682 mulu.w %d4,%d3 682 mulu.w %d4,%d3 # [4] hi(mr) * hi(md) 683 683 684 # add lo portions of [2],[3] to hi portion of 684 # add lo portions of [2],[3] to hi portion of [1]. 685 # add carries produced from these adds to [4]. 685 # add carries produced from these adds to [4]. 686 # lo([1]) is the final lo 16 bits of the resul 686 # lo([1]) is the final lo 16 bits of the result. 687 clr.l %d4 687 clr.l %d4 # load d4 w/ zero value 688 swap %d0 688 swap %d0 # hi([1]) <==> lo([1]) 689 add.w %d1,%d0 689 add.w %d1,%d0 # hi([1]) + lo([2]) 690 addx.l %d4,%d3 690 addx.l %d4,%d3 # [4] + carry 691 add.w %d2,%d0 691 add.w %d2,%d0 # hi([1]) + lo([3]) 692 addx.l %d4,%d3 692 addx.l %d4,%d3 # [4] + carry 693 swap %d0 693 swap %d0 # lo([1]) <==> hi([1]) 694 694 695 # lo portions of [2],[3] have been added in to 695 # lo portions of [2],[3] have been added in to final result. 696 # now, clear lo, put hi in lo reg, and add to 696 # now, clear lo, put hi in lo reg, and add to [4] 697 clr.w %d1 697 clr.w %d1 # clear lo([2]) 698 clr.w %d2 698 clr.w %d2 # clear hi([3]) 699 swap %d1 699 swap %d1 # hi([2]) in lo d1 700 swap %d2 700 swap %d2 # hi([3]) in lo d2 701 add.l %d2,%d1 701 add.l %d2,%d1 # [4] + hi([2]) 702 add.l %d3,%d1 702 add.l %d3,%d1 # [4] + hi([3]) 703 703 704 tst.b %d5 704 tst.b %d5 # should result be signed? 705 beq.b muls64_done 705 beq.b muls64_done # no 706 706 707 # result should be a signed negative number. 707 # result should be a signed negative number. 708 # compute 2's complement of the unsigned numbe 708 # compute 2's complement of the unsigned number: 709 # -negate all bits and add 1 709 # -negate all bits and add 1 710 muls64_neg: 710 muls64_neg: 711 not.l %d0 711 not.l %d0 # negate lo(result) bits 712 not.l %d1 712 not.l %d1 # negate hi(result) bits 713 addq.l &1,%d0 713 addq.l &1,%d0 # add 1 to lo(result) 714 addx.l %d4,%d1 714 addx.l %d4,%d1 # add carry to hi(result) 715 715 716 muls64_done: 716 muls64_done: 717 mov.w MUL64_CC(%a6),%d4 717 mov.w MUL64_CC(%a6),%d4 718 andi.b &0x10,%d4 718 andi.b &0x10,%d4 # keep old 'X' bit 719 tst.l %d1 719 tst.l %d1 # may set 'N' bit 720 bpl.b muls64_ddone 720 bpl.b muls64_ddone 721 ori.b &0x8,%d4 721 ori.b &0x8,%d4 # set 'N' bit 722 muls64_ddone: 722 muls64_ddone: 723 mov.w %d4,%cc 723 mov.w %d4,%cc 724 724 725 # here, the result is in d1 and d0. the curren 725 # here, the result is in d1 and d0. the current strategy is to save 726 # the values at the location pointed to by a0. 726 # the values at the location pointed to by a0. 727 # use movm here to not disturb the condition c 727 # use movm here to not disturb the condition codes. 728 muls64_end: 728 muls64_end: 729 exg %d1,%d0 729 exg %d1,%d0 730 movm.l &0x0003,([0x10,%a6]) 730 movm.l &0x0003,([0x10,%a6]) # save result at (a0) 731 731 732 # EPILOGUE BEGIN ############################# 732 # EPILOGUE BEGIN ######################################################## 733 # fmovm.l (%sp)+,&0x0 733 # fmovm.l (%sp)+,&0x0 # restore no fpregs 734 movm.l (%sp)+,&0x003c 734 movm.l (%sp)+,&0x003c # restore d2-d5 735 unlk %a6 735 unlk %a6 736 # EPILOGUE END ############################### 736 # EPILOGUE END ########################################################## 737 737 738 rts 738 rts 739 739 740 # one or both of the operands is zero so the r 740 # one or both of the operands is zero so the result is also zero. 741 # save the zero result to the register file an 741 # save the zero result to the register file and set the 'Z' ccode bit. 742 muls64_zero: 742 muls64_zero: 743 clr.l %d0 743 clr.l %d0 744 clr.l %d1 744 clr.l %d1 745 745 746 mov.w MUL64_CC(%a6),%d4 746 mov.w MUL64_CC(%a6),%d4 747 andi.b &0x10,%d4 747 andi.b &0x10,%d4 748 ori.b &0x4,%d4 748 ori.b &0x4,%d4 749 mov.w %d4,%cc 749 mov.w %d4,%cc # set 'Z' ccode bit 750 750 751 bra.b muls64_end 751 bra.b muls64_end 752 752 753 ############################################## 753 ######################################################################### 754 # XDEF *************************************** 754 # XDEF **************************************************************** # 755 # _060LSP__cmp2_Ab_(): Emulate "cmp2.b A 755 # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". # 756 # _060LSP__cmp2_Aw_(): Emulate "cmp2.w A 756 # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". # 757 # _060LSP__cmp2_Al_(): Emulate "cmp2.l A 757 # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". # 758 # _060LSP__cmp2_Db_(): Emulate "cmp2.b D 758 # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". # 759 # _060LSP__cmp2_Dw_(): Emulate "cmp2.w D 759 # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". # 760 # _060LSP__cmp2_Dl_(): Emulate "cmp2.l D 760 # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". # 761 # 761 # # 762 # This is the library version which is a 762 # This is the library version which is accessed as a subroutine # 763 # and therefore does not work exactly li 763 # and therefore does not work exactly like the 680X0 "cmp2" # 764 # instruction. 764 # instruction. # 765 # 765 # # 766 # XREF *************************************** 766 # XREF **************************************************************** # 767 # None 767 # None # 768 # 768 # # 769 # INPUT ************************************** 769 # INPUT *************************************************************** # 770 # 0x4(sp) = Rn 770 # 0x4(sp) = Rn # 771 # 0x8(sp) = pointer to boundary pair 771 # 0x8(sp) = pointer to boundary pair # 772 # 772 # # 773 # OUTPUT ************************************* 773 # OUTPUT ************************************************************** # 774 # cc = condition codes are set correctly 774 # cc = condition codes are set correctly # 775 # 775 # # 776 # ALGORITHM ********************************** 776 # ALGORITHM *********************************************************** # 777 # In the interest of simplicity, all ope 777 # In the interest of simplicity, all operands are converted to # 778 # longword size whether the operation is byte, 778 # longword size whether the operation is byte, word, or long. The # 779 # bounds are sign extended accordingly. If Rn 779 # bounds are sign extended accordingly. If Rn is a data register, Rn is # 780 # also sign extended. If Rn is an address regi 780 # also sign extended. If Rn is an address register, it need not be sign # 781 # extended since the full register is always u 781 # extended since the full register is always used. # 782 # The condition codes are set correctly 782 # The condition codes are set correctly before the final "rts". # 783 # 783 # # 784 ############################################## 784 ######################################################################### 785 785 786 set CMP2_CC, -4 786 set CMP2_CC, -4 787 787 788 global _060LSP__cmp2_Ab_ 788 global _060LSP__cmp2_Ab_ 789 _060LSP__cmp2_Ab_: 789 _060LSP__cmp2_Ab_: 790 790 791 # PROLOGUE BEGIN ############################# 791 # PROLOGUE BEGIN ######################################################## 792 link.w %a6,&-4 792 link.w %a6,&-4 793 movm.l &0x3800,-(%sp) 793 movm.l &0x3800,-(%sp) # save d2-d4 794 # fmovm.l &0x0,-(%sp) 794 # fmovm.l &0x0,-(%sp) # save no fpregs 795 # PROLOGUE END ############################### 795 # PROLOGUE END ########################################################## 796 796 797 mov.w %cc,CMP2_CC(%a6) 797 mov.w %cc,CMP2_CC(%a6) 798 mov.l 0x8(%a6), %d2 798 mov.l 0x8(%a6), %d2 # get regval 799 799 800 mov.b ([0xc,%a6],0x0),%d0 800 mov.b ([0xc,%a6],0x0),%d0 801 mov.b ([0xc,%a6],0x1),%d1 801 mov.b ([0xc,%a6],0x1),%d1 802 802 803 extb.l %d0 803 extb.l %d0 # sign extend lo bnd 804 extb.l %d1 804 extb.l %d1 # sign extend hi bnd 805 bra.w l_cmp2_cmp 805 bra.w l_cmp2_cmp # go do the compare emulation 806 806 807 global _060LSP__cmp2_Aw_ 807 global _060LSP__cmp2_Aw_ 808 _060LSP__cmp2_Aw_: 808 _060LSP__cmp2_Aw_: 809 809 810 # PROLOGUE BEGIN ############################# 810 # PROLOGUE BEGIN ######################################################## 811 link.w %a6,&-4 811 link.w %a6,&-4 812 movm.l &0x3800,-(%sp) 812 movm.l &0x3800,-(%sp) # save d2-d4 813 # fmovm.l &0x0,-(%sp) 813 # fmovm.l &0x0,-(%sp) # save no fpregs 814 # PROLOGUE END ############################### 814 # PROLOGUE END ########################################################## 815 815 816 mov.w %cc,CMP2_CC(%a6) 816 mov.w %cc,CMP2_CC(%a6) 817 mov.l 0x8(%a6), %d2 817 mov.l 0x8(%a6), %d2 # get regval 818 818 819 mov.w ([0xc,%a6],0x0),%d0 819 mov.w ([0xc,%a6],0x0),%d0 820 mov.w ([0xc,%a6],0x2),%d1 820 mov.w ([0xc,%a6],0x2),%d1 821 821 822 ext.l %d0 822 ext.l %d0 # sign extend lo bnd 823 ext.l %d1 823 ext.l %d1 # sign extend hi bnd 824 bra.w l_cmp2_cmp 824 bra.w l_cmp2_cmp # go do the compare emulation 825 825 826 global _060LSP__cmp2_Al_ 826 global _060LSP__cmp2_Al_ 827 _060LSP__cmp2_Al_: 827 _060LSP__cmp2_Al_: 828 828 829 # PROLOGUE BEGIN ############################# 829 # PROLOGUE BEGIN ######################################################## 830 link.w %a6,&-4 830 link.w %a6,&-4 831 movm.l &0x3800,-(%sp) 831 movm.l &0x3800,-(%sp) # save d2-d4 832 # fmovm.l &0x0,-(%sp) 832 # fmovm.l &0x0,-(%sp) # save no fpregs 833 # PROLOGUE END ############################### 833 # PROLOGUE END ########################################################## 834 834 835 mov.w %cc,CMP2_CC(%a6) 835 mov.w %cc,CMP2_CC(%a6) 836 mov.l 0x8(%a6), %d2 836 mov.l 0x8(%a6), %d2 # get regval 837 837 838 mov.l ([0xc,%a6],0x0),%d0 838 mov.l ([0xc,%a6],0x0),%d0 839 mov.l ([0xc,%a6],0x4),%d1 839 mov.l ([0xc,%a6],0x4),%d1 840 bra.w l_cmp2_cmp 840 bra.w l_cmp2_cmp # go do the compare emulation 841 841 842 global _060LSP__cmp2_Db_ 842 global _060LSP__cmp2_Db_ 843 _060LSP__cmp2_Db_: 843 _060LSP__cmp2_Db_: 844 844 845 # PROLOGUE BEGIN ############################# 845 # PROLOGUE BEGIN ######################################################## 846 link.w %a6,&-4 846 link.w %a6,&-4 847 movm.l &0x3800,-(%sp) 847 movm.l &0x3800,-(%sp) # save d2-d4 848 # fmovm.l &0x0,-(%sp) 848 # fmovm.l &0x0,-(%sp) # save no fpregs 849 # PROLOGUE END ############################### 849 # PROLOGUE END ########################################################## 850 850 851 mov.w %cc,CMP2_CC(%a6) 851 mov.w %cc,CMP2_CC(%a6) 852 mov.l 0x8(%a6), %d2 852 mov.l 0x8(%a6), %d2 # get regval 853 853 854 mov.b ([0xc,%a6],0x0),%d0 854 mov.b ([0xc,%a6],0x0),%d0 855 mov.b ([0xc,%a6],0x1),%d1 855 mov.b ([0xc,%a6],0x1),%d1 856 856 857 extb.l %d0 857 extb.l %d0 # sign extend lo bnd 858 extb.l %d1 858 extb.l %d1 # sign extend hi bnd 859 859 860 # operation is a data register compare. 860 # operation is a data register compare. 861 # sign extend byte to long so we can do simple 861 # sign extend byte to long so we can do simple longword compares. 862 extb.l %d2 862 extb.l %d2 # sign extend data byte 863 bra.w l_cmp2_cmp 863 bra.w l_cmp2_cmp # go do the compare emulation 864 864 865 global _060LSP__cmp2_Dw_ 865 global _060LSP__cmp2_Dw_ 866 _060LSP__cmp2_Dw_: 866 _060LSP__cmp2_Dw_: 867 867 868 # PROLOGUE BEGIN ############################# 868 # PROLOGUE BEGIN ######################################################## 869 link.w %a6,&-4 869 link.w %a6,&-4 870 movm.l &0x3800,-(%sp) 870 movm.l &0x3800,-(%sp) # save d2-d4 871 # fmovm.l &0x0,-(%sp) 871 # fmovm.l &0x0,-(%sp) # save no fpregs 872 # PROLOGUE END ############################### 872 # PROLOGUE END ########################################################## 873 873 874 mov.w %cc,CMP2_CC(%a6) 874 mov.w %cc,CMP2_CC(%a6) 875 mov.l 0x8(%a6), %d2 875 mov.l 0x8(%a6), %d2 # get regval 876 876 877 mov.w ([0xc,%a6],0x0),%d0 877 mov.w ([0xc,%a6],0x0),%d0 878 mov.w ([0xc,%a6],0x2),%d1 878 mov.w ([0xc,%a6],0x2),%d1 879 879 880 ext.l %d0 880 ext.l %d0 # sign extend lo bnd 881 ext.l %d1 881 ext.l %d1 # sign extend hi bnd 882 882 883 # operation is a data register compare. 883 # operation is a data register compare. 884 # sign extend word to long so we can do simple 884 # sign extend word to long so we can do simple longword compares. 885 ext.l %d2 885 ext.l %d2 # sign extend data word 886 bra.w l_cmp2_cmp 886 bra.w l_cmp2_cmp # go emulate compare 887 887 888 global _060LSP__cmp2_Dl_ 888 global _060LSP__cmp2_Dl_ 889 _060LSP__cmp2_Dl_: 889 _060LSP__cmp2_Dl_: 890 890 891 # PROLOGUE BEGIN ############################# 891 # PROLOGUE BEGIN ######################################################## 892 link.w %a6,&-4 892 link.w %a6,&-4 893 movm.l &0x3800,-(%sp) 893 movm.l &0x3800,-(%sp) # save d2-d4 894 # fmovm.l &0x0,-(%sp) 894 # fmovm.l &0x0,-(%sp) # save no fpregs 895 # PROLOGUE END ############################### 895 # PROLOGUE END ########################################################## 896 896 897 mov.w %cc,CMP2_CC(%a6) 897 mov.w %cc,CMP2_CC(%a6) 898 mov.l 0x8(%a6), %d2 898 mov.l 0x8(%a6), %d2 # get regval 899 899 900 mov.l ([0xc,%a6],0x0),%d0 900 mov.l ([0xc,%a6],0x0),%d0 901 mov.l ([0xc,%a6],0x4),%d1 901 mov.l ([0xc,%a6],0x4),%d1 902 902 903 # 903 # 904 # To set the ccodes correctly: 904 # To set the ccodes correctly: 905 # (1) save 'Z' bit from (Rn - lo) 905 # (1) save 'Z' bit from (Rn - lo) 906 # (2) save 'Z' and 'N' bits from ((hi - 906 # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi)) 907 # (3) keep 'X', 'N', and 'V' from before 907 # (3) keep 'X', 'N', and 'V' from before instruction 908 # (4) combine ccodes 908 # (4) combine ccodes 909 # 909 # 910 l_cmp2_cmp: 910 l_cmp2_cmp: 911 sub.l %d0, %d2 911 sub.l %d0, %d2 # (Rn - lo) 912 mov.w %cc, %d3 912 mov.w %cc, %d3 # fetch resulting ccodes 913 andi.b &0x4, %d3 913 andi.b &0x4, %d3 # keep 'Z' bit 914 sub.l %d0, %d1 914 sub.l %d0, %d1 # (hi - lo) 915 cmp.l %d1,%d2 915 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi)) 916 916 917 mov.w %cc, %d4 917 mov.w %cc, %d4 # fetch resulting ccodes 918 or.b %d4, %d3 918 or.b %d4, %d3 # combine w/ earlier ccodes 919 andi.b &0x5, %d3 919 andi.b &0x5, %d3 # keep 'Z' and 'N' 920 920 921 mov.w CMP2_CC(%a6), %d4 921 mov.w CMP2_CC(%a6), %d4 # fetch old ccodes 922 andi.b &0x1a, %d4 922 andi.b &0x1a, %d4 # keep 'X','N','V' bits 923 or.b %d3, %d4 923 or.b %d3, %d4 # insert new ccodes 924 mov.w %d4,%cc 924 mov.w %d4,%cc # save new ccodes 925 925 926 # EPILOGUE BEGIN ############################# 926 # EPILOGUE BEGIN ######################################################## 927 # fmovm.l (%sp)+,&0x0 927 # fmovm.l (%sp)+,&0x0 # restore no fpregs 928 movm.l (%sp)+,&0x001c 928 movm.l (%sp)+,&0x001c # restore d2-d4 929 unlk %a6 929 unlk %a6 930 # EPILOGUE END ############################### 930 # EPILOGUE END ########################################################## 931 931 932 rts 932 rts
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.