1 | 1 | 2 | round.sa 3.4 7/29/91 2 | round.sa 3.4 7/29/91 3 | 3 | 4 | handle rounding and normalization task 4 | handle rounding and normalization tasks 5 | 5 | 6 | 6 | 7 | 7 | 8 | Copyright (C) Motorola, Inc. 1 8 | Copyright (C) Motorola, Inc. 1990 9 | All Rights Reserved 9 | All Rights Reserved 10 | 10 | 11 | For details on the license for this fi 11 | For details on the license for this file, please see the 12 | file, README, in this same directory. 12 | file, README, in this same directory. 13 13 14 |ROUND idnt 2,1 | Motorola 040 Floating Po 14 |ROUND idnt 2,1 | Motorola 040 Floating Point Software Package 15 15 16 |section 8 16 |section 8 17 17 18 #include "fpsp.h" 18 #include "fpsp.h" 19 19 20 | 20 | 21 | round --- round result according to pr 21 | round --- round result according to precision/mode 22 | 22 | 23 | a0 points to the input operand in the 23 | a0 points to the input operand in the internal extended format 24 | d1(high word) contains rounding precis 24 | d1(high word) contains rounding precision: 25 | ext = $0000xxxx 25 | ext = $0000xxxx 26 | sgl = $0001xxxx 26 | sgl = $0001xxxx 27 | dbl = $0002xxxx 27 | dbl = $0002xxxx 28 | d1(low word) contains rounding mode: 28 | d1(low word) contains rounding mode: 29 | RN = $xxxx0000 29 | RN = $xxxx0000 30 | RZ = $xxxx0001 30 | RZ = $xxxx0001 31 | RM = $xxxx0010 31 | RM = $xxxx0010 32 | RP = $xxxx0011 32 | RP = $xxxx0011 33 | d0{31:29} contains the g,r,s bits (ext 33 | d0{31:29} contains the g,r,s bits (extended) 34 | 34 | 35 | On return the value pointed to by a0 i 35 | On return the value pointed to by a0 is correctly rounded, 36 | a0 is preserved and the g-r-s bits in 36 | a0 is preserved and the g-r-s bits in d0 are cleared. 37 | The result is not typed - the tag fiel 37 | The result is not typed - the tag field is invalid. The 38 | result is still in the internal extend 38 | result is still in the internal extended format. 39 | 39 | 40 | The INEX bit of USER_FPSR will be set 40 | The INEX bit of USER_FPSR will be set if the rounded result was 41 | inexact (i.e. if any of the g-r-s bits 41 | inexact (i.e. if any of the g-r-s bits were set). 42 | 42 | 43 43 44 .global round 44 .global round 45 round: 45 round: 46 | If g=r=s=0 then result is exact and round is 46 | If g=r=s=0 then result is exact and round is done, else set 47 | the inex flag in status reg and continue. 47 | the inex flag in status reg and continue. 48 | 48 | 49 bsrs ext_grs |this 49 bsrs ext_grs |this subroutine looks at the 50 | :round 50 | :rounding precision and sets 51 | ;the a 51 | ;the appropriate g-r-s bits. 52 tstl %d0 |if gr 52 tstl %d0 |if grs are zero, go force 53 bne rnd_cont |lower 53 bne rnd_cont |lower bits to zero for size 54 54 55 swap %d1 |set u 55 swap %d1 |set up d1.w for round prec. 56 bra truncate 56 bra truncate 57 57 58 rnd_cont: 58 rnd_cont: 59 | 59 | 60 | Use rounding mode as an index into a jump ta 60 | Use rounding mode as an index into a jump table for these modes. 61 | 61 | 62 orl #inx2a_mask,USER_FPSR(%a6) |se 62 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 63 lea mode_tab,%a1 63 lea mode_tab,%a1 64 movel (%a1,%d1.w*4),%a1 64 movel (%a1,%d1.w*4),%a1 65 jmp (%a1) 65 jmp (%a1) 66 | 66 | 67 | Jump table indexed by rounding mode in d1.w. 67 | Jump table indexed by rounding mode in d1.w. All following assumes 68 | grs != 0. 68 | grs != 0. 69 | 69 | 70 mode_tab: 70 mode_tab: 71 .long rnd_near 71 .long rnd_near 72 .long rnd_zero 72 .long rnd_zero 73 .long rnd_mnus 73 .long rnd_mnus 74 .long rnd_plus 74 .long rnd_plus 75 | 75 | 76 | ROUND PLUS INFINITY 76 | ROUND PLUS INFINITY 77 | 77 | 78 | If sign of fp number = 0 (positive), t 78 | If sign of fp number = 0 (positive), then add 1 to l. 79 | 79 | 80 rnd_plus: 80 rnd_plus: 81 swap %d1 |set u 81 swap %d1 |set up d1 for round prec. 82 tstb LOCAL_SGN(%a0) |check 82 tstb LOCAL_SGN(%a0) |check for sign 83 bmi truncate |if po 83 bmi truncate |if positive then truncate 84 movel #0xffffffff,%d0 |force 84 movel #0xffffffff,%d0 |force g,r,s to be all f's 85 lea add_to_l,%a1 85 lea add_to_l,%a1 86 movel (%a1,%d1.w*4),%a1 86 movel (%a1,%d1.w*4),%a1 87 jmp (%a1) 87 jmp (%a1) 88 | 88 | 89 | ROUND MINUS INFINITY 89 | ROUND MINUS INFINITY 90 | 90 | 91 | If sign of fp number = 1 (negative), t 91 | If sign of fp number = 1 (negative), then add 1 to l. 92 | 92 | 93 rnd_mnus: 93 rnd_mnus: 94 swap %d1 |set u 94 swap %d1 |set up d1 for round prec. 95 tstb LOCAL_SGN(%a0) |check 95 tstb LOCAL_SGN(%a0) |check for sign 96 bpl truncate |if ne 96 bpl truncate |if negative then truncate 97 movel #0xffffffff,%d0 |force 97 movel #0xffffffff,%d0 |force g,r,s to be all f's 98 lea add_to_l,%a1 98 lea add_to_l,%a1 99 movel (%a1,%d1.w*4),%a1 99 movel (%a1,%d1.w*4),%a1 100 jmp (%a1) 100 jmp (%a1) 101 | 101 | 102 | ROUND ZERO 102 | ROUND ZERO 103 | 103 | 104 | Always truncate. 104 | Always truncate. 105 rnd_zero: 105 rnd_zero: 106 swap %d1 |set u 106 swap %d1 |set up d1 for round prec. 107 bra truncate 107 bra truncate 108 | 108 | 109 | 109 | 110 | ROUND NEAREST 110 | ROUND NEAREST 111 | 111 | 112 | If (g=1), then add 1 to l and if (r=s= 112 | If (g=1), then add 1 to l and if (r=s=0), then clear l 113 | Note that this will round to even in c 113 | Note that this will round to even in case of a tie. 114 | 114 | 115 rnd_near: 115 rnd_near: 116 swap %d1 |set u 116 swap %d1 |set up d1 for round prec. 117 asll #1,%d0 |shift 117 asll #1,%d0 |shift g-bit to c-bit 118 bcc truncate |if (g 118 bcc truncate |if (g=1) then 119 lea add_to_l,%a1 119 lea add_to_l,%a1 120 movel (%a1,%d1.w*4),%a1 120 movel (%a1,%d1.w*4),%a1 121 jmp (%a1) 121 jmp (%a1) 122 122 123 | 123 | 124 | ext_grs --- extract guard, round and s 124 | ext_grs --- extract guard, round and sticky bits 125 | 125 | 126 | Input: d1 = PREC:ROUND 126 | Input: d1 = PREC:ROUND 127 | Output: d0{31:29}= guard, round, 127 | Output: d0{31:29}= guard, round, sticky 128 | 128 | 129 | The ext_grs extract the guard/round/sticky b 129 | The ext_grs extract the guard/round/sticky bits according to the 130 | selected rounding precision. It is called by 130 | selected rounding precision. It is called by the round subroutine 131 | only. All registers except d0 are kept inta 131 | only. All registers except d0 are kept intact. d0 becomes an 132 | updated guard,round,sticky in d0{31:29} 132 | updated guard,round,sticky in d0{31:29} 133 | 133 | 134 | Notes: the ext_grs uses the round PREC, and 134 | Notes: the ext_grs uses the round PREC, and therefore has to swap d1 135 | prior to usage, and needs to restore 135 | prior to usage, and needs to restore d1 to original. 136 | 136 | 137 ext_grs: 137 ext_grs: 138 swap %d1 |have 138 swap %d1 |have d1.w point to round precision 139 cmpiw #0,%d1 139 cmpiw #0,%d1 140 bnes sgl_or_dbl 140 bnes sgl_or_dbl 141 bras end_ext_grs 141 bras end_ext_grs 142 142 143 sgl_or_dbl: 143 sgl_or_dbl: 144 moveml %d2/%d3,-(%a7) |make 144 moveml %d2/%d3,-(%a7) |make some temp registers 145 cmpiw #1,%d1 145 cmpiw #1,%d1 146 bnes grs_dbl 146 bnes grs_dbl 147 grs_sgl: 147 grs_sgl: 148 bfextu LOCAL_HI(%a0){#24:#2},%d3 148 bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right 149 movel #30,%d2 |of th 149 movel #30,%d2 |of the sgl prec. limits 150 lsll %d2,%d3 |shift 150 lsll %d2,%d3 |shift g-r bits to MSB of d3 151 movel LOCAL_HI(%a0),%d2 151 movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test 152 andil #0x0000003f,%d2 |s bit 152 andil #0x0000003f,%d2 |s bit is the or of all other 153 bnes st_stky |bits 153 bnes st_stky |bits to the right of g-r 154 tstl LOCAL_LO(%a0) |test 154 tstl LOCAL_LO(%a0) |test lower mantissa 155 bnes st_stky |if an 155 bnes st_stky |if any are set, set sticky 156 tstl %d0 |test 156 tstl %d0 |test original g,r,s 157 bnes st_stky |if an 157 bnes st_stky |if any are set, set sticky 158 bras end_sd |if wo 158 bras end_sd |if words 3 and 4 are clr, exit 159 grs_dbl: 159 grs_dbl: 160 bfextu LOCAL_LO(%a0){#21:#2},%d3 160 bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right 161 movel #30,%d2 |of th 161 movel #30,%d2 |of the dbl prec. limits 162 lsll %d2,%d3 |shift 162 lsll %d2,%d3 |shift g-r bits to the MSB of d3 163 movel LOCAL_LO(%a0),%d2 163 movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test 164 andil #0x000001ff,%d2 |s bit 164 andil #0x000001ff,%d2 |s bit is the or-ing of all 165 bnes st_stky |other 165 bnes st_stky |other bits to the right of g-r 166 tstl %d0 |test 166 tstl %d0 |test word original g,r,s 167 bnes st_stky |if an 167 bnes st_stky |if any are set, set sticky 168 bras end_sd |if cl 168 bras end_sd |if clear, exit 169 st_stky: 169 st_stky: 170 bset #rnd_stky_bit,%d3 170 bset #rnd_stky_bit,%d3 171 end_sd: 171 end_sd: 172 movel %d3,%d0 |retur 172 movel %d3,%d0 |return grs to d0 173 moveml (%a7)+,%d2/%d3 |resto 173 moveml (%a7)+,%d2/%d3 |restore scratch registers 174 end_ext_grs: 174 end_ext_grs: 175 swap %d1 |resto 175 swap %d1 |restore d1 to original 176 rts 176 rts 177 177 178 |******************* Local Equates 178 |******************* Local Equates 179 .set ad_1_sgl,0x00000100 | con 179 .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec 180 .set ad_1_dbl,0x00000800 | con 180 .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec 181 181 182 182 183 |Jump table for adding 1 to the l-bit indexed 183 |Jump table for adding 1 to the l-bit indexed by rnd prec 184 184 185 add_to_l: 185 add_to_l: 186 .long add_ext 186 .long add_ext 187 .long add_sgl 187 .long add_sgl 188 .long add_dbl 188 .long add_dbl 189 .long add_dbl 189 .long add_dbl 190 | 190 | 191 | ADD SINGLE 191 | ADD SINGLE 192 | 192 | 193 add_sgl: 193 add_sgl: 194 addl #ad_1_sgl,LOCAL_HI(%a0) 194 addl #ad_1_sgl,LOCAL_HI(%a0) 195 bccs scc_clr |no ma 195 bccs scc_clr |no mantissa overflow 196 roxrw LOCAL_HI(%a0) |shift 196 roxrw LOCAL_HI(%a0) |shift v-bit back in 197 roxrw LOCAL_HI+2(%a0) |shift 197 roxrw LOCAL_HI+2(%a0) |shift v-bit back in 198 addw #0x1,LOCAL_EX(%a0) |and i 198 addw #0x1,LOCAL_EX(%a0) |and incr exponent 199 scc_clr: 199 scc_clr: 200 tstl %d0 |test 200 tstl %d0 |test for rs = 0 201 bnes sgl_done 201 bnes sgl_done 202 andiw #0xfe00,LOCAL_HI+2(%a0) |clear 202 andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit 203 sgl_done: 203 sgl_done: 204 andil #0xffffff00,LOCAL_HI(%a0) |tru 204 andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit 205 clrl LOCAL_LO(%a0) |clear 205 clrl LOCAL_LO(%a0) |clear d2 206 rts 206 rts 207 207 208 | 208 | 209 | ADD EXTENDED 209 | ADD EXTENDED 210 | 210 | 211 add_ext: 211 add_ext: 212 addql #1,LOCAL_LO(%a0) |add 1 212 addql #1,LOCAL_LO(%a0) |add 1 to l-bit 213 bccs xcc_clr |test 213 bccs xcc_clr |test for carry out 214 addql #1,LOCAL_HI(%a0) |propa 214 addql #1,LOCAL_HI(%a0) |propagate carry 215 bccs xcc_clr 215 bccs xcc_clr 216 roxrw LOCAL_HI(%a0) |mant 216 roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit 217 roxrw LOCAL_HI+2(%a0) |mant 217 roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit 218 roxrw LOCAL_LO(%a0) 218 roxrw LOCAL_LO(%a0) 219 roxrw LOCAL_LO+2(%a0) 219 roxrw LOCAL_LO+2(%a0) 220 addw #0x1,LOCAL_EX(%a0) |and i 220 addw #0x1,LOCAL_EX(%a0) |and inc exp 221 xcc_clr: 221 xcc_clr: 222 tstl %d0 |test 222 tstl %d0 |test rs = 0 223 bnes add_ext_done 223 bnes add_ext_done 224 andib #0xfe,LOCAL_LO+3(%a0) |clear 224 andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit 225 add_ext_done: 225 add_ext_done: 226 rts 226 rts 227 | 227 | 228 | ADD DOUBLE 228 | ADD DOUBLE 229 | 229 | 230 add_dbl: 230 add_dbl: 231 addl #ad_1_dbl,LOCAL_LO(%a0) 231 addl #ad_1_dbl,LOCAL_LO(%a0) 232 bccs dcc_clr 232 bccs dcc_clr 233 addql #1,LOCAL_HI(%a0) 233 addql #1,LOCAL_HI(%a0) |propagate carry 234 bccs dcc_clr 234 bccs dcc_clr 235 roxrw LOCAL_HI(%a0) |mant 235 roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit 236 roxrw LOCAL_HI+2(%a0) |mant 236 roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit 237 roxrw LOCAL_LO(%a0) 237 roxrw LOCAL_LO(%a0) 238 roxrw LOCAL_LO+2(%a0) 238 roxrw LOCAL_LO+2(%a0) 239 addw #0x1,LOCAL_EX(%a0) |incr 239 addw #0x1,LOCAL_EX(%a0) |incr exponent 240 dcc_clr: 240 dcc_clr: 241 tstl %d0 |test 241 tstl %d0 |test for rs = 0 242 bnes dbl_done 242 bnes dbl_done 243 andiw #0xf000,LOCAL_LO+2(%a0) |clear 243 andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit 244 244 245 dbl_done: 245 dbl_done: 246 andil #0xfffff800,LOCAL_LO(%a0) |tru 246 andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit 247 rts 247 rts 248 248 249 error: 249 error: 250 rts 250 rts 251 | 251 | 252 | Truncate all other bits 252 | Truncate all other bits 253 | 253 | 254 trunct: 254 trunct: 255 .long end_rnd 255 .long end_rnd 256 .long sgl_done 256 .long sgl_done 257 .long dbl_done 257 .long dbl_done 258 .long dbl_done 258 .long dbl_done 259 259 260 truncate: 260 truncate: 261 lea trunct,%a1 261 lea trunct,%a1 262 movel (%a1,%d1.w*4),%a1 262 movel (%a1,%d1.w*4),%a1 263 jmp (%a1) 263 jmp (%a1) 264 264 265 end_rnd: 265 end_rnd: 266 rts 266 rts 267 267 268 | 268 | 269 | NORMALIZE 269 | NORMALIZE 270 | 270 | 271 | These routines (nrm_zero & nrm_set) normaliz 271 | These routines (nrm_zero & nrm_set) normalize the unnorm. This 272 | is done by shifting the mantissa left while 272 | is done by shifting the mantissa left while decrementing the 273 | exponent. 273 | exponent. 274 | 274 | 275 | NRM_SET shifts and decrements until there is 275 | NRM_SET shifts and decrements until there is a 1 set in the integer 276 | bit of the mantissa (msb in d1). 276 | bit of the mantissa (msb in d1). 277 | 277 | 278 | NRM_ZERO shifts and decrements until there i 278 | NRM_ZERO shifts and decrements until there is a 1 set in the integer 279 | bit of the mantissa (msb in d1) unless this 279 | bit of the mantissa (msb in d1) unless this would mean the exponent 280 | would go less than 0. In that case the numb 280 | would go less than 0. In that case the number becomes a denorm - the 281 | exponent (d0) is set to 0 and the mantissa ( 281 | exponent (d0) is set to 0 and the mantissa (d1 & d2) is not 282 | normalized. 282 | normalized. 283 | 283 | 284 | Note that both routines have been optimized 284 | Note that both routines have been optimized (for the worst case) and 285 | therefore do not have the easy to follow dec 285 | therefore do not have the easy to follow decrement/shift loop. 286 | 286 | 287 | NRM_ZERO 287 | NRM_ZERO 288 | 288 | 289 | Distance to first 1 bit in mantissa = 289 | Distance to first 1 bit in mantissa = X 290 | Distance to 0 from exponent = Y 290 | Distance to 0 from exponent = Y 291 | If X < Y 291 | If X < Y 292 | Then 292 | Then 293 | nrm_set 293 | nrm_set 294 | Else 294 | Else 295 | shift mantissa by Y 295 | shift mantissa by Y 296 | set exponent = 0 296 | set exponent = 0 297 | 297 | 298 |input: 298 |input: 299 | FP_SCR1 = exponent, ms mantissa part, 299 | FP_SCR1 = exponent, ms mantissa part, ls mantissa part 300 |output: 300 |output: 301 | L_SCR1{4} = fpte15 or ete15 bit 301 | L_SCR1{4} = fpte15 or ete15 bit 302 | 302 | 303 .global nrm_zero 303 .global nrm_zero 304 nrm_zero: 304 nrm_zero: 305 movew LOCAL_EX(%a0),%d0 305 movew LOCAL_EX(%a0),%d0 306 cmpw #64,%d0 |see if exp > 306 cmpw #64,%d0 |see if exp > 64 307 bmis d0_less 307 bmis d0_less 308 bsr nrm_set |exp > 64 so e 308 bsr nrm_set |exp > 64 so exp won't exceed 0 309 rts 309 rts 310 d0_less: 310 d0_less: 311 moveml %d2/%d3/%d5/%d6,-(%a7) 311 moveml %d2/%d3/%d5/%d6,-(%a7) 312 movel LOCAL_HI(%a0),%d1 312 movel LOCAL_HI(%a0),%d1 313 movel LOCAL_LO(%a0),%d2 313 movel LOCAL_LO(%a0),%d2 314 314 315 bfffo %d1{#0:#32},%d3 |get the dista 315 bfffo %d1{#0:#32},%d3 |get the distance to the first 1 316 | ;in ms mant 316 | ;in ms mant 317 beqs ms_clr |branch if no 317 beqs ms_clr |branch if no bits were set 318 cmpw %d3,%d0 |of X>Y 318 cmpw %d3,%d0 |of X>Y 319 bmis greater |then exp will 319 bmis greater |then exp will go past 0 (neg) if 320 | ;it is just sh 320 | ;it is just shifted 321 bsr nrm_set |else exp won' 321 bsr nrm_set |else exp won't go past 0 322 moveml (%a7)+,%d2/%d3/%d5/%d6 322 moveml (%a7)+,%d2/%d3/%d5/%d6 323 rts 323 rts 324 greater: 324 greater: 325 movel %d2,%d6 |save ls mant 325 movel %d2,%d6 |save ls mant in d6 326 lsll %d0,%d2 |shift ls mant 326 lsll %d0,%d2 |shift ls mant by count 327 lsll %d0,%d1 |shift ms mant 327 lsll %d0,%d1 |shift ms mant by count 328 movel #32,%d5 328 movel #32,%d5 329 subl %d0,%d5 |make op a den 329 subl %d0,%d5 |make op a denorm by shifting bits 330 lsrl %d5,%d6 |by the number 330 lsrl %d5,%d6 |by the number in the exp, then 331 | ;set exp = 0. 331 | ;set exp = 0. 332 orl %d6,%d1 |shift the ls 332 orl %d6,%d1 |shift the ls mant bits into the ms mant 333 movel #0,%d0 |same as if de 333 movel #0,%d0 |same as if decremented exp to 0 334 | ;while shiftin 334 | ;while shifting 335 movew %d0,LOCAL_EX(%a0) 335 movew %d0,LOCAL_EX(%a0) 336 movel %d1,LOCAL_HI(%a0) 336 movel %d1,LOCAL_HI(%a0) 337 movel %d2,LOCAL_LO(%a0) 337 movel %d2,LOCAL_LO(%a0) 338 moveml (%a7)+,%d2/%d3/%d5/%d6 338 moveml (%a7)+,%d2/%d3/%d5/%d6 339 rts 339 rts 340 ms_clr: 340 ms_clr: 341 bfffo %d2{#0:#32},%d3 |check if any 341 bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant 342 beqs all_clr |branch if non 342 beqs all_clr |branch if none set 343 addw #32,%d3 343 addw #32,%d3 344 cmpw %d3,%d0 |if X>Y 344 cmpw %d3,%d0 |if X>Y 345 bmis greater |then branch 345 bmis greater |then branch 346 bsr nrm_set |else exp won' 346 bsr nrm_set |else exp won't go past 0 347 moveml (%a7)+,%d2/%d3/%d5/%d6 347 moveml (%a7)+,%d2/%d3/%d5/%d6 348 rts 348 rts 349 all_clr: 349 all_clr: 350 movew #0,LOCAL_EX(%a0) |no ma 350 movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0. 351 moveml (%a7)+,%d2/%d3/%d5/%d6 351 moveml (%a7)+,%d2/%d3/%d5/%d6 352 rts 352 rts 353 | 353 | 354 | NRM_SET 354 | NRM_SET 355 | 355 | 356 .global nrm_set 356 .global nrm_set 357 nrm_set: 357 nrm_set: 358 movel %d7,-(%a7) 358 movel %d7,-(%a7) 359 bfffo LOCAL_HI(%a0){#0:#32},%d7 |fin 359 bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) 360 beqs lower |branch if ms 360 beqs lower |branch if ms mant is all 0's 361 361 362 movel %d6,-(%a7) 362 movel %d6,-(%a7) 363 363 364 subw %d7,LOCAL_EX(%a0) |sub e 364 subw %d7,LOCAL_EX(%a0) |sub exponent by count 365 movel LOCAL_HI(%a0),%d0 |d0 ha 365 movel LOCAL_HI(%a0),%d0 |d0 has ms mant 366 movel LOCAL_LO(%a0),%d1 |d1 has ls m 366 movel LOCAL_LO(%a0),%d1 |d1 has ls mant 367 367 368 lsll %d7,%d0 |shift first 1 368 lsll %d7,%d0 |shift first 1 to j bit position 369 movel %d1,%d6 |copy ls mant 369 movel %d1,%d6 |copy ls mant into d6 370 lsll %d7,%d6 |shift ls mant 370 lsll %d7,%d6 |shift ls mant by count 371 movel %d6,LOCAL_LO(%a0) |store 371 movel %d6,LOCAL_LO(%a0) |store ls mant into memory 372 moveql #32,%d6 372 moveql #32,%d6 373 subl %d7,%d6 |continue shif 373 subl %d7,%d6 |continue shift 374 lsrl %d6,%d1 |shift off all 374 lsrl %d6,%d1 |shift off all bits but those that will 375 | ;be shifted in 375 | ;be shifted into ms mant 376 orl %d1,%d0 |shift the ls 376 orl %d1,%d0 |shift the ls mant bits into the ms mant 377 movel %d0,LOCAL_HI(%a0) |store 377 movel %d0,LOCAL_HI(%a0) |store ms mant into memory 378 moveml (%a7)+,%d7/%d6 |restore regis 378 moveml (%a7)+,%d7/%d6 |restore registers 379 rts 379 rts 380 380 381 | 381 | 382 | We get here if ms mant was = 0, and we assum 382 | We get here if ms mant was = 0, and we assume ls mant has bits 383 | set (otherwise this would have been tagged a 383 | set (otherwise this would have been tagged a zero not a denorm). 384 | 384 | 385 lower: 385 lower: 386 movew LOCAL_EX(%a0),%d0 |d0 ha 386 movew LOCAL_EX(%a0),%d0 |d0 has exponent 387 movel LOCAL_LO(%a0),%d1 |d1 ha 387 movel LOCAL_LO(%a0),%d1 |d1 has ls mant 388 subw #32,%d0 |account for m 388 subw #32,%d0 |account for ms mant being all zeros 389 bfffo %d1{#0:#32},%d7 |find first 1 389 bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7) 390 subw %d7,%d0 |subtract shif 390 subw %d7,%d0 |subtract shift count from exp 391 lsll %d7,%d1 |shift first 1 391 lsll %d7,%d1 |shift first 1 to integer bit in ms mant 392 movew %d0,LOCAL_EX(%a0) |store 392 movew %d0,LOCAL_EX(%a0) |store ms mant 393 movel %d1,LOCAL_HI(%a0) |store 393 movel %d1,LOCAL_HI(%a0) |store exp 394 clrl LOCAL_LO(%a0) |clear ls mant 394 clrl LOCAL_LO(%a0) |clear ls mant 395 movel (%a7)+,%d7 395 movel (%a7)+,%d7 396 rts 396 rts 397 | 397 | 398 | denorm --- denormalize an intermediate 398 | denorm --- denormalize an intermediate result 399 | 399 | 400 | Used by underflow. 400 | Used by underflow. 401 | 401 | 402 | Input: 402 | Input: 403 | a0 points to the operand to be d 403 | a0 points to the operand to be denormalized 404 | (in the internal extended for 404 | (in the internal extended format) 405 | 405 | 406 | d0: rounding precision 406 | d0: rounding precision 407 | Output: 407 | Output: 408 | a0 points to the denormalized re 408 | a0 points to the denormalized result 409 | (in the internal extended for 409 | (in the internal extended format) 410 | 410 | 411 | d0 is guard,round,sticky 411 | d0 is guard,round,sticky 412 | 412 | 413 | d0 comes into this routine with the rounding 413 | d0 comes into this routine with the rounding precision. It 414 | is then loaded with the denormalized exponen 414 | is then loaded with the denormalized exponent threshold for the 415 | rounding precision. 415 | rounding precision. 416 | 416 | 417 417 418 .global denorm 418 .global denorm 419 denorm: 419 denorm: 420 btstb #6,LOCAL_EX(%a0) |check 420 btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000 421 beqs no_sgn_ext 421 beqs no_sgn_ext 422 bsetb #7,LOCAL_EX(%a0) |sign 422 bsetb #7,LOCAL_EX(%a0) |sign extend if it is so 423 no_sgn_ext: 423 no_sgn_ext: 424 424 425 cmpib #0,%d0 |if 0 then ext 425 cmpib #0,%d0 |if 0 then extended precision 426 bnes not_ext |else branch 426 bnes not_ext |else branch 427 427 428 clrl %d1 |load d1 with 428 clrl %d1 |load d1 with ext threshold 429 clrl %d0 |clear the sti 429 clrl %d0 |clear the sticky flag 430 bsr dnrm_lp |denormalize t 430 bsr dnrm_lp |denormalize the number 431 tstb %d1 |check for ine 431 tstb %d1 |check for inex 432 beq no_inex |if clr, no in 432 beq no_inex |if clr, no inex 433 bras dnrm_inex |if set, set i 433 bras dnrm_inex |if set, set inex 434 434 435 not_ext: 435 not_ext: 436 cmpil #1,%d0 |if 1 then sin 436 cmpil #1,%d0 |if 1 then single precision 437 beqs load_sgl |else must be 437 beqs load_sgl |else must be 2, double prec 438 438 439 load_dbl: 439 load_dbl: 440 movew #dbl_thresh,%d1 |put copy of t 440 movew #dbl_thresh,%d1 |put copy of threshold in d1 441 movel %d1,%d0 |copy d1 into 441 movel %d1,%d0 |copy d1 into d0 442 subw LOCAL_EX(%a0),%d0 |diff 442 subw LOCAL_EX(%a0),%d0 |diff = threshold - exp 443 cmpw #67,%d0 |if diff > 67 443 cmpw #67,%d0 |if diff > 67 (mant + grs bits) 444 bpls chk_stky |then branch ( 444 bpls chk_stky |then branch (all bits would be 445 | ; shifted off 445 | ; shifted off in denorm routine) 446 clrl %d0 |else clear th 446 clrl %d0 |else clear the sticky flag 447 bsr dnrm_lp |denormalize t 447 bsr dnrm_lp |denormalize the number 448 tstb %d1 |check flag 448 tstb %d1 |check flag 449 beqs no_inex |if clr, no in 449 beqs no_inex |if clr, no inex 450 bras dnrm_inex |if set, set i 450 bras dnrm_inex |if set, set inex 451 451 452 load_sgl: 452 load_sgl: 453 movew #sgl_thresh,%d1 |put copy of t 453 movew #sgl_thresh,%d1 |put copy of threshold in d1 454 movel %d1,%d0 |copy d1 into 454 movel %d1,%d0 |copy d1 into d0 455 subw LOCAL_EX(%a0),%d0 |diff 455 subw LOCAL_EX(%a0),%d0 |diff = threshold - exp 456 cmpw #67,%d0 |if diff > 67 456 cmpw #67,%d0 |if diff > 67 (mant + grs bits) 457 bpls chk_stky |then branch ( 457 bpls chk_stky |then branch (all bits would be 458 | ; shifted off 458 | ; shifted off in denorm routine) 459 clrl %d0 |else clear th 459 clrl %d0 |else clear the sticky flag 460 bsr dnrm_lp |denormalize t 460 bsr dnrm_lp |denormalize the number 461 tstb %d1 |check flag 461 tstb %d1 |check flag 462 beqs no_inex |if clr, no in 462 beqs no_inex |if clr, no inex 463 bras dnrm_inex |if set, set i 463 bras dnrm_inex |if set, set inex 464 464 465 chk_stky: 465 chk_stky: 466 tstl LOCAL_HI(%a0) |check for any 466 tstl LOCAL_HI(%a0) |check for any bits set 467 bnes set_stky 467 bnes set_stky 468 tstl LOCAL_LO(%a0) |check for any 468 tstl LOCAL_LO(%a0) |check for any bits set 469 bnes set_stky 469 bnes set_stky 470 bras clr_mant 470 bras clr_mant 471 set_stky: 471 set_stky: 472 orl #inx2a_mask,USER_FPSR(%a6) |se 472 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 473 movel #0x20000000,%d0 |set sticky bi 473 movel #0x20000000,%d0 |set sticky bit in return value 474 clr_mant: 474 clr_mant: 475 movew %d1,LOCAL_EX(%a0) 475 movew %d1,LOCAL_EX(%a0) |load exp with threshold 476 movel #0,LOCAL_HI(%a0) |set d 476 movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa) 477 movel #0,LOCAL_LO(%a0) 477 movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa) 478 rts 478 rts 479 dnrm_inex: 479 dnrm_inex: 480 orl #inx2a_mask,USER_FPSR(%a6) |se 480 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 481 no_inex: 481 no_inex: 482 rts 482 rts 483 483 484 | 484 | 485 | dnrm_lp --- normalize exponent/mantiss 485 | dnrm_lp --- normalize exponent/mantissa to specified threshold 486 | 486 | 487 | Input: 487 | Input: 488 | a0 points to the operand 488 | a0 points to the operand to be denormalized 489 | d0{31:29} initial guard,round,st 489 | d0{31:29} initial guard,round,sticky 490 | d1{15:0} denormalization thresh 490 | d1{15:0} denormalization threshold 491 | Output: 491 | Output: 492 | a0 points to the denormal 492 | a0 points to the denormalized operand 493 | d0{31:29} final guard,round,stic 493 | d0{31:29} final guard,round,sticky 494 | d1.b inexact flag: all one 494 | d1.b inexact flag: all ones means inexact result 495 | 495 | 496 | The LOCAL_LO and LOCAL_GRS parts of the valu 496 | The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 497 | so that bfext can be used to extract the new 497 | so that bfext can be used to extract the new low part of the mantissa. 498 | Dnrm_lp can be called with a0 pointing to ET 498 | Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there 499 | is no LOCAL_GRS scratch word following it on 499 | is no LOCAL_GRS scratch word following it on the fsave frame. 500 | 500 | 501 .global dnrm_lp 501 .global dnrm_lp 502 dnrm_lp: 502 dnrm_lp: 503 movel %d2,-(%sp) |save 503 movel %d2,-(%sp) |save d2 for temp use 504 btstb #E3,E_BYTE(%a6) |test 504 btstb #E3,E_BYTE(%a6) |test for type E3 exception 505 beqs not_E3 |not t 505 beqs not_E3 |not type E3 exception 506 bfextu WBTEMP_GRS(%a6){#6:#3},%d2 506 bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit 507 movel #29,%d0 507 movel #29,%d0 508 lsll %d0,%d2 |shift 508 lsll %d0,%d2 |shift g,r,s to their positions 509 movel %d2,%d0 509 movel %d2,%d0 510 not_E3: 510 not_E3: 511 movel (%sp)+,%d2 |resto 511 movel (%sp)+,%d2 |restore d2 512 movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO 512 movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) 513 movel %d0,FP_SCR2+LOCAL_GRS(%a6) 513 movel %d0,FP_SCR2+LOCAL_GRS(%a6) 514 movel %d1,%d0 |copy 514 movel %d1,%d0 |copy the denorm threshold 515 subw LOCAL_EX(%a0),%d1 515 subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent 516 bles no_lp |d1 <= 516 bles no_lp |d1 <= 0 517 cmpw #32,%d1 517 cmpw #32,%d1 518 blts case_1 |0 = d 518 blts case_1 |0 = d1 < 32 519 cmpw #64,%d1 519 cmpw #64,%d1 520 blts case_2 |32 <= 520 blts case_2 |32 <= d1 < 64 521 bra case_3 |d1 >= 521 bra case_3 |d1 >= 64 522 | 522 | 523 | No normalization necessary 523 | No normalization necessary 524 | 524 | 525 no_lp: 525 no_lp: 526 clrb %d1 |set n 526 clrb %d1 |set no inex2 reported 527 movel FP_SCR2+LOCAL_GRS(%a6),%d0 527 movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s 528 rts 528 rts 529 | 529 | 530 | case (0<d1<32) 530 | case (0<d1<32) 531 | 531 | 532 case_1: 532 case_1: 533 movel %d2,-(%sp) 533 movel %d2,-(%sp) 534 movew %d0,LOCAL_EX(%a0) 534 movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold 535 movel #32,%d0 535 movel #32,%d0 536 subw %d1,%d0 |d0 = 536 subw %d1,%d0 |d0 = 32 - d1 537 bfextu LOCAL_EX(%a0){%d0:#32},%d2 537 bfextu LOCAL_EX(%a0){%d0:#32},%d2 538 bfextu %d2{%d1:%d0},%d2 538 bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI 539 bfextu LOCAL_HI(%a0){%d0:#32},%d1 539 bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO 540 bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32} 540 bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S 541 movel %d2,LOCAL_HI(%a0) 541 movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI 542 movel %d1,LOCAL_LO(%a0) 542 movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO 543 clrb %d1 543 clrb %d1 544 bftst %d0{#2:#30} 544 bftst %d0{#2:#30} 545 beqs c1nstky 545 beqs c1nstky 546 bsetl #rnd_stky_bit,%d0 546 bsetl #rnd_stky_bit,%d0 547 st %d1 547 st %d1 548 c1nstky: 548 c1nstky: 549 movel FP_SCR2+LOCAL_GRS(%a6),%d2 549 movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s 550 andil #0xe0000000,%d2 |clear 550 andil #0xe0000000,%d2 |clear all but G,R,S 551 tstl %d2 |test 551 tstl %d2 |test if original G,R,S are clear 552 beqs grs_clear 552 beqs grs_clear 553 orl #0x20000000,%d0 |set s 553 orl #0x20000000,%d0 |set sticky bit in d0 554 grs_clear: 554 grs_clear: 555 andil #0xe0000000,%d0 |clear 555 andil #0xe0000000,%d0 |clear all but G,R,S 556 movel (%sp)+,%d2 556 movel (%sp)+,%d2 557 rts 557 rts 558 | 558 | 559 | case (32<=d1<64) 559 | case (32<=d1<64) 560 | 560 | 561 case_2: 561 case_2: 562 movel %d2,-(%sp) 562 movel %d2,-(%sp) 563 movew %d0,LOCAL_EX(%a0) 563 movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold 564 subw #32,%d1 |d1 no 564 subw #32,%d1 |d1 now between 0 and 32 565 movel #32,%d0 565 movel #32,%d0 566 subw %d1,%d0 |d0 = 566 subw %d1,%d0 |d0 = 32 - d1 567 bfextu LOCAL_EX(%a0){%d0:#32},%d2 567 bfextu LOCAL_EX(%a0){%d0:#32},%d2 568 bfextu %d2{%d1:%d0},%d2 568 bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO 569 bfextu LOCAL_HI(%a0){%d0:#32},%d1 569 bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S 570 bftst %d1{#2:#30} 570 bftst %d1{#2:#30} 571 bnes c2_sstky |bra i 571 bnes c2_sstky |bra if sticky bit to be set 572 bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} 572 bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} 573 bnes c2_sstky |bra i 573 bnes c2_sstky |bra if sticky bit to be set 574 movel %d1,%d0 574 movel %d1,%d0 575 clrb %d1 575 clrb %d1 576 bras end_c2 576 bras end_c2 577 c2_sstky: 577 c2_sstky: 578 movel %d1,%d0 578 movel %d1,%d0 579 bsetl #rnd_stky_bit,%d0 579 bsetl #rnd_stky_bit,%d0 580 st %d1 580 st %d1 581 end_c2: 581 end_c2: 582 clrl LOCAL_HI(%a0) |store 582 clrl LOCAL_HI(%a0) |store LOCAL_HI = 0 583 movel %d2,LOCAL_LO(%a0) 583 movel %d2,LOCAL_LO(%a0) |store LOCAL_LO 584 movel FP_SCR2+LOCAL_GRS(%a6),%d2 584 movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s 585 andil #0xe0000000,%d2 |clear 585 andil #0xe0000000,%d2 |clear all but G,R,S 586 tstl %d2 |test 586 tstl %d2 |test if original G,R,S are clear 587 beqs clear_grs 587 beqs clear_grs 588 orl #0x20000000,%d0 |set s 588 orl #0x20000000,%d0 |set sticky bit in d0 589 clear_grs: 589 clear_grs: 590 andil #0xe0000000,%d0 |get r 590 andil #0xe0000000,%d0 |get rid of all but G,R,S 591 movel (%sp)+,%d2 591 movel (%sp)+,%d2 592 rts 592 rts 593 | 593 | 594 | d1 >= 64 Force the exponent to be the denorm 594 | d1 >= 64 Force the exponent to be the denorm threshold with the 595 | correct sign. 595 | correct sign. 596 | 596 | 597 case_3: 597 case_3: 598 movew %d0,LOCAL_EX(%a0) 598 movew %d0,LOCAL_EX(%a0) 599 tstw LOCAL_SGN(%a0) 599 tstw LOCAL_SGN(%a0) 600 bges c3con 600 bges c3con 601 c3neg: 601 c3neg: 602 orl #0x80000000,LOCAL_EX(%a0) 602 orl #0x80000000,LOCAL_EX(%a0) 603 c3con: 603 c3con: 604 cmpw #64,%d1 604 cmpw #64,%d1 605 beqs sixty_four 605 beqs sixty_four 606 cmpw #65,%d1 606 cmpw #65,%d1 607 beqs sixty_five 607 beqs sixty_five 608 | 608 | 609 | Shift value is out of range. Set d1 for ine 609 | Shift value is out of range. Set d1 for inex2 flag and 610 | return a zero with the given threshold. 610 | return a zero with the given threshold. 611 | 611 | 612 clrl LOCAL_HI(%a0) 612 clrl LOCAL_HI(%a0) 613 clrl LOCAL_LO(%a0) 613 clrl LOCAL_LO(%a0) 614 movel #0x20000000,%d0 614 movel #0x20000000,%d0 615 st %d1 615 st %d1 616 rts 616 rts 617 617 618 sixty_four: 618 sixty_four: 619 movel LOCAL_HI(%a0),%d0 619 movel LOCAL_HI(%a0),%d0 620 bfextu %d0{#2:#30},%d1 620 bfextu %d0{#2:#30},%d1 621 andil #0xc0000000,%d0 621 andil #0xc0000000,%d0 622 bras c3com 622 bras c3com 623 623 624 sixty_five: 624 sixty_five: 625 movel LOCAL_HI(%a0),%d0 625 movel LOCAL_HI(%a0),%d0 626 bfextu %d0{#1:#31},%d1 626 bfextu %d0{#1:#31},%d1 627 andil #0x80000000,%d0 627 andil #0x80000000,%d0 628 lsrl #1,%d0 |shift 628 lsrl #1,%d0 |shift high bit into R bit 629 629 630 c3com: 630 c3com: 631 tstl %d1 631 tstl %d1 632 bnes c3ssticky 632 bnes c3ssticky 633 tstl LOCAL_LO(%a0) 633 tstl LOCAL_LO(%a0) 634 bnes c3ssticky 634 bnes c3ssticky 635 tstb FP_SCR2+LOCAL_GRS(%a6) 635 tstb FP_SCR2+LOCAL_GRS(%a6) 636 bnes c3ssticky 636 bnes c3ssticky 637 clrb %d1 637 clrb %d1 638 bras c3end 638 bras c3end 639 639 640 c3ssticky: 640 c3ssticky: 641 bsetl #rnd_stky_bit,%d0 641 bsetl #rnd_stky_bit,%d0 642 st %d1 642 st %d1 643 c3end: 643 c3end: 644 clrl LOCAL_HI(%a0) 644 clrl LOCAL_HI(%a0) 645 clrl LOCAL_LO(%a0) 645 clrl LOCAL_LO(%a0) 646 rts 646 rts 647 647 648 |end 648 |end
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.