hexagon.zig (45809B) - Raw
1 const builtin = @import("builtin"); 2 const common = @import("./common.zig"); 3 4 fn __hexagon_divsi3() callconv(.naked) noreturn { 5 asm volatile ( 6 \\ { 7 \\ p0 = cmp.ge(r0,#0) 8 \\ p1 = cmp.ge(r1,#0) 9 \\ r1 = abs(r0) 10 \\ r2 = abs(r1) 11 \\ } 12 \\ { 13 \\ r3 = cl0(r1) 14 \\ r4 = cl0(r2) 15 \\ r5 = sub(r1,r2) 16 \\ p2 = cmp.gtu(r2,r1) 17 \\ } 18 \\ { 19 \\ r0 = #0 20 \\ p1 = xor(p0,p1) 21 \\ p0 = cmp.gtu(r2,r5) 22 \\ if (p2) jumpr r31 23 \\ } 24 \\ 25 \\ { 26 \\ r0 = mux(p1,#-1,#1) 27 \\ if (p0) jumpr r31 28 \\ r4 = sub(r4,r3) 29 \\ r3 = #1 30 \\ } 31 \\ { 32 \\ r0 = #0 33 \\ r3:2 = vlslw(r3:2,r4) 34 \\ loop0(1f,r4) 35 \\ } 36 \\ .falign 37 \\ 1: 38 \\ { 39 \\ p0 = cmp.gtu(r2,r1) 40 \\ if (!p0.new) r1 = sub(r1,r2) 41 \\ if (!p0.new) r0 = add(r0,r3) 42 \\ r3:2 = vlsrw(r3:2,#1) 43 \\ }:endloop0 44 \\ { 45 \\ p0 = cmp.gtu(r2,r1) 46 \\ if (!p0.new) r0 = add(r0,r3) 47 \\ if (!p1) jumpr r31 48 \\ } 49 \\ { 50 \\ r0 = neg(r0) 51 \\ jumpr r31 52 \\ } 53 ); 54 } 55 56 fn __hexagon_umodsi3() callconv(.naked) noreturn { 57 asm volatile ( 58 \\ { 59 \\ r2 = cl0(r0) 60 \\ r3 = cl0(r1) 61 \\ p0 = cmp.gtu(r1,r0) 62 \\ } 63 \\ { 64 \\ r2 = sub(r3,r2) 65 \\ if (p0) jumpr r31 66 \\ } 67 \\ { 68 \\ loop0(1f,r2) 69 \\ p1 = cmp.eq(r2,#0) 70 \\ r2 = lsl(r1,r2) 71 \\ } 72 \\ .falign 73 \\ 1: 74 \\ { 75 \\ p0 = cmp.gtu(r2,r0) 76 \\ if (!p0.new) r0 = sub(r0,r2) 77 \\ r2 = lsr(r2,#1) 78 \\ if (p1) r1 = #0 79 \\ }:endloop0 80 \\ { 81 \\ p0 = cmp.gtu(r2,r0) 82 \\ if (!p0.new) r0 = sub(r0,r1) 83 \\ jumpr r31 84 \\ } 85 ); 86 } 87 88 fn __hexagon_sqrtf() callconv(.naked) noreturn { 89 asm volatile ( 90 \\ { 91 \\ r3,p0 = sfinvsqrta(r0) 92 \\ r5 = sffixupr(r0) 93 \\ r4 = ##0x3f000000 94 \\ r1:0 = combine(#0,#0) 95 \\ } 96 \\ { 97 \\ r0 += sfmpy(r3,r5):lib 98 \\ r1 += sfmpy(r3,r4):lib 99 \\ r2 = r4 100 \\ r3 = r5 101 \\ } 102 \\ { 103 \\ r2 -= sfmpy(r0,r1):lib 104 \\ p1 = sfclass(r5,#1) 105 \\ 106 \\ } 107 \\ { 108 \\ r0 += sfmpy(r0,r2):lib 109 \\ r1 += sfmpy(r1,r2):lib 110 \\ r2 = r4 111 \\ r3 = r5 112 \\ } 113 \\ { 114 \\ r2 -= sfmpy(r0,r1):lib 115 \\ r3 -= sfmpy(r0,r0):lib 116 \\ } 117 \\ { 118 \\ r0 += sfmpy(r1,r3):lib 119 \\ r1 += sfmpy(r1,r2):lib 120 \\ r2 = r4 121 \\ r3 = r5 122 \\ } 123 \\ { 124 \\ 125 \\ r3 -= sfmpy(r0,r0):lib 126 \\ if (p1) r0 = or(r0,r5) 127 \\ } 128 \\ { 129 \\ r0 += sfmpy(r1,r3,p0):scale 130 \\ jumpr r31 131 \\ } 132 ); 133 } 134 135 fn __hexagon_moddi3() callconv(.naked) noreturn { 136 asm volatile ( 137 \\ { 138 \\ p3 = tstbit(r1,#31) 139 \\ } 140 \\ { 141 \\ r1:0 = abs(r1:0) 142 \\ r3:2 = abs(r3:2) 143 \\ } 144 \\ { 145 \\ r6 = cl0(r1:0) 146 \\ r7 = cl0(r3:2) 147 \\ r5:4 = r3:2 148 \\ r3:2 = r1:0 149 \\ } 150 \\ { 151 \\ r10 = sub(r7,r6) 152 \\ r1:0 = #0 153 \\ r15:14 = #1 154 \\ } 155 \\ { 156 \\ r11 = add(r10,#1) 157 \\ r13:12 = lsl(r5:4,r10) 158 \\ r15:14 = lsl(r15:14,r10) 159 \\ } 160 \\ { 161 \\ p0 = cmp.gtu(r5:4,r3:2) 162 \\ loop0(1f,r11) 163 \\ } 164 \\ { 165 \\ if (p0) jump .hexagon_moddi3_return 166 \\ } 167 \\ .falign 168 \\ 1: 169 \\ { 170 \\ p0 = cmp.gtu(r13:12,r3:2) 171 \\ } 172 \\ { 173 \\ r7:6 = sub(r3:2, r13:12) 174 \\ r9:8 = add(r1:0, r15:14) 175 \\ } 176 \\ { 177 \\ r1:0 = vmux(p0, r1:0, r9:8) 178 \\ r3:2 = vmux(p0, r3:2, r7:6) 179 \\ } 180 \\ { 181 \\ r15:14 = lsr(r15:14, #1) 182 \\ r13:12 = lsr(r13:12, #1) 183 \\ }:endloop0 184 \\ 185 \\ .hexagon_moddi3_return: 186 \\ { 187 \\ r1:0 = neg(r3:2) 188 \\ } 189 \\ { 190 \\ r1:0 = vmux(p3,r1:0,r3:2) 191 \\ jumpr r31 192 \\ } 193 ); 194 } 195 196 fn __hexagon_divdi3() callconv(.naked) noreturn { 197 asm volatile ( 198 \\ { 199 \\ p2 = tstbit(r1,#31) 200 \\ p3 = tstbit(r3,#31) 201 \\ } 202 \\ { 203 \\ r1:0 = abs(r1:0) 204 \\ r3:2 = abs(r3:2) 205 \\ } 206 \\ { 207 \\ r6 = cl0(r1:0) 208 \\ r7 = cl0(r3:2) 209 \\ r5:4 = r3:2 210 \\ r3:2 = r1:0 211 \\ } 212 \\ { 213 \\ p3 = xor(p2,p3) 214 \\ r10 = sub(r7,r6) 215 \\ r1:0 = #0 216 \\ r15:14 = #1 217 \\ } 218 \\ { 219 \\ r11 = add(r10,#1) 220 \\ r13:12 = lsl(r5:4,r10) 221 \\ r15:14 = lsl(r15:14,r10) 222 \\ } 223 \\ { 224 \\ p0 = cmp.gtu(r5:4,r3:2) 225 \\ loop0(1f,r11) 226 \\ } 227 \\ { 228 \\ if (p0) jump .hexagon_divdi3_return 229 \\ } 230 \\ .falign 231 \\ 1: 232 \\ { 233 \\ p0 = cmp.gtu(r13:12,r3:2) 234 \\ } 235 \\ { 236 \\ r7:6 = sub(r3:2, r13:12) 237 \\ r9:8 = add(r1:0, r15:14) 238 \\ } 239 \\ { 240 \\ r1:0 = vmux(p0, r1:0, r9:8) 241 \\ r3:2 = vmux(p0, r3:2, r7:6) 242 \\ } 243 \\ { 244 \\ r15:14 = lsr(r15:14, #1) 245 \\ r13:12 = lsr(r13:12, #1) 246 \\ }:endloop0 247 \\ 248 \\ .hexagon_divdi3_return: 249 \\ { 250 \\ r3:2 = neg(r1:0) 251 \\ } 252 \\ { 253 \\ r1:0 = vmux(p3,r3:2,r1:0) 254 \\ jumpr r31 255 \\ } 256 ); 257 } 258 259 fn __hexagon_divsf3() callconv(.naked) noreturn { 260 asm volatile ( 261 \\ { 262 \\ r2,p0 = sfrecipa(r0,r1) 263 \\ r4 = sffixupd(r0,r1) 264 \\ r3 = ##0x3f800000 265 \\ } 266 \\ { 267 \\ r5 = sffixupn(r0,r1) 268 \\ r3 -= sfmpy(r4,r2):lib 269 \\ r6 = ##0x80000000 270 \\ r7 = r3 271 \\ } 272 \\ { 273 \\ r2 += sfmpy(r3,r2):lib 274 \\ r3 = r7 275 \\ r6 = r5 276 \\ r0 = and(r6,r5) 277 \\ } 278 \\ { 279 \\ r3 -= sfmpy(r4,r2):lib 280 \\ r0 += sfmpy(r5,r2):lib 281 \\ } 282 \\ { 283 \\ r2 += sfmpy(r3,r2):lib 284 \\ r6 -= sfmpy(r0,r4):lib 285 \\ } 286 \\ { 287 \\ r0 += sfmpy(r6,r2):lib 288 \\ } 289 \\ { 290 \\ r5 -= sfmpy(r0,r4):lib 291 \\ } 292 \\ { 293 \\ r0 += sfmpy(r5,r2,p0):scale 294 \\ jumpr r31 295 \\ } 296 ); 297 } 298 299 fn __hexagon_udivdi3() callconv(.naked) noreturn { 300 asm volatile ( 301 \\ { 302 \\ r6 = cl0(r1:0) 303 \\ r7 = cl0(r3:2) 304 \\ r5:4 = r3:2 305 \\ r3:2 = r1:0 306 \\ } 307 \\ { 308 \\ r10 = sub(r7,r6) 309 \\ r1:0 = #0 310 \\ r15:14 = #1 311 \\ } 312 \\ { 313 \\ r11 = add(r10,#1) 314 \\ r13:12 = lsl(r5:4,r10) 315 \\ r15:14 = lsl(r15:14,r10) 316 \\ } 317 \\ { 318 \\ p0 = cmp.gtu(r5:4,r3:2) 319 \\ loop0(1f,r11) 320 \\ } 321 \\ { 322 \\ if (p0) jumpr r31 323 \\ } 324 \\ .falign 325 \\ 1: 326 \\ { 327 \\ p0 = cmp.gtu(r13:12,r3:2) 328 \\ } 329 \\ { 330 \\ r7:6 = sub(r3:2, r13:12) 331 \\ r9:8 = add(r1:0, r15:14) 332 \\ } 333 \\ { 334 \\ r1:0 = vmux(p0, r1:0, r9:8) 335 \\ r3:2 = vmux(p0, r3:2, r7:6) 336 \\ } 337 \\ { 338 \\ r15:14 = lsr(r15:14, #1) 339 \\ r13:12 = lsr(r13:12, #1) 340 \\ }:endloop0 341 \\ { 342 \\ jumpr r31 343 \\ } 344 ); 345 } 346 347 fn __hexagon_umoddi3() callconv(.naked) noreturn { 348 asm volatile ( 349 \\ { 350 \\ r6 = cl0(r1:0) 351 \\ r7 = cl0(r3:2) 352 \\ r5:4 = r3:2 353 \\ r3:2 = r1:0 354 \\ } 355 \\ { 356 \\ r10 = sub(r7,r6) 357 \\ r1:0 = #0 358 \\ r15:14 = #1 359 \\ } 360 \\ { 361 \\ r11 = add(r10,#1) 362 \\ r13:12 = lsl(r5:4,r10) 363 \\ r15:14 = lsl(r15:14,r10) 364 \\ } 365 \\ { 366 \\ p0 = cmp.gtu(r5:4,r3:2) 367 \\ loop0(1f,r11) 368 \\ } 369 \\ { 370 \\ if (p0) jump .hexagon_umoddi3_return 371 \\ } 372 \\ .falign 373 \\ 1: 374 \\ { 375 \\ p0 = cmp.gtu(r13:12,r3:2) 376 \\ } 377 \\ { 378 \\ r7:6 = sub(r3:2, r13:12) 379 \\ r9:8 = add(r1:0, r15:14) 380 \\ } 381 \\ { 382 \\ r1:0 = vmux(p0, r1:0, r9:8) 383 \\ r3:2 = vmux(p0, r3:2, r7:6) 384 \\ } 385 \\ { 386 \\ r15:14 = lsr(r15:14, #1) 387 \\ r13:12 = lsr(r13:12, #1) 388 \\ }:endloop0 389 \\ 390 \\ .hexagon_umoddi3_return: 391 \\ { 392 \\ r1:0 = r3:2 393 \\ jumpr r31 394 \\ } 395 ); 396 } 397 398 fn __hexagon_modsi3() callconv(.naked) noreturn { 399 asm volatile ( 400 \\ { 401 \\ p2 = cmp.ge(r0,#0) 402 \\ r2 = abs(r0) 403 \\ r1 = abs(r1) 404 \\ } 405 \\ { 406 \\ r3 = cl0(r2) 407 \\ r4 = cl0(r1) 408 \\ p0 = cmp.gtu(r1,r2) 409 \\ } 410 \\ { 411 \\ r3 = sub(r4,r3) 412 \\ if (p0) jumpr r31 413 \\ } 414 \\ { 415 \\ p1 = cmp.eq(r3,#0) 416 \\ loop0(1f,r3) 417 \\ r0 = r2 418 \\ r2 = lsl(r1,r3) 419 \\ } 420 \\ .falign 421 \\ 1: 422 \\ { 423 \\ p0 = cmp.gtu(r2,r0) 424 \\ if (!p0.new) r0 = sub(r0,r2) 425 \\ r2 = lsr(r2,#1) 426 \\ if (p1) r1 = #0 427 \\ }:endloop0 428 \\ { 429 \\ p0 = cmp.gtu(r2,r0) 430 \\ if (!p0.new) r0 = sub(r0,r1) 431 \\ if (p2) jumpr r31 432 \\ } 433 \\ { 434 \\ r0 = neg(r0) 435 \\ jumpr r31 436 \\ } 437 ); 438 } 439 440 fn __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes() callconv(.naked) noreturn { 441 asm volatile ( 442 \\ { 443 \\ p0 = bitsclr(r1,#7) 444 \\ p0 = bitsclr(r0,#7) 445 \\ if (p0.new) r5:4 = memd(r1) 446 \\ r3 = #-3 447 \\ } 448 \\ { 449 \\ if (!p0) jump .Lmemcpy_call 450 \\ if (p0) memd(r0++#8) = r5:4 451 \\ if (p0) r5:4 = memd(r1+#8) 452 \\ r3 += lsr(r2,#3) 453 \\ } 454 \\ { 455 \\ memd(r0++#8) = r5:4 456 \\ r5:4 = memd(r1+#16) 457 \\ r1 = add(r1,#24) 458 \\ loop0(1f,r3) 459 \\ } 460 \\ .falign 461 \\ 1: 462 \\ { 463 \\ memd(r0++#8) = r5:4 464 \\ r5:4 = memd(r1++#8) 465 \\ }:endloop0 466 \\ { 467 \\ memd(r0) = r5:4 468 \\ r0 -= add(r2,#-8) 469 \\ jumpr r31 470 \\ } 471 \\ .Lmemcpy_call: 472 \\ jump memcpy@PLT 473 ); 474 } 475 476 fn __hexagon_udivsi3() callconv(.naked) noreturn { 477 asm volatile ( 478 \\ { 479 \\ r2 = cl0(r0) 480 \\ r3 = cl0(r1) 481 \\ r5:4 = combine(#1,#0) 482 \\ p0 = cmp.gtu(r1,r0) 483 \\ } 484 \\ { 485 \\ r6 = sub(r3,r2) 486 \\ r4 = r1 487 \\ r1:0 = combine(r0,r4) 488 \\ if (p0) jumpr r31 489 \\ } 490 \\ { 491 \\ r3:2 = vlslw(r5:4,r6) 492 \\ loop0(1f,r6) 493 \\ } 494 \\ .falign 495 \\ 1: 496 \\ { 497 \\ p0 = cmp.gtu(r2,r1) 498 \\ if (!p0.new) r1 = sub(r1,r2) 499 \\ if (!p0.new) r0 = add(r0,r3) 500 \\ r3:2 = vlsrw(r3:2,#1) 501 \\ }:endloop0 502 \\ { 503 \\ p0 = cmp.gtu(r2,r1) 504 \\ if (!p0.new) r0 = add(r0,r3) 505 \\ jumpr r31 506 \\ } 507 ); 508 } 509 510 fn __hexagon_adddf3() align(32) callconv(.naked) noreturn { 511 asm volatile ( 512 \\ { 513 \\ r4 = extractu(r1,#11,#20) 514 \\ r5 = extractu(r3,#11,#20) 515 \\ r13:12 = combine(##0x20000000,#0) 516 \\ } 517 \\ { 518 \\ p3 = dfclass(r1:0,#2) 519 \\ p3 = dfclass(r3:2,#2) 520 \\ r9:8 = r13:12 521 \\ p2 = cmp.gtu(r5,r4) 522 \\ } 523 \\ { 524 \\ if (!p3) jump .Ladd_abnormal 525 \\ if (p2) r1:0 = r3:2 526 \\ if (p2) r3:2 = r1:0 527 \\ if (p2) r5:4 = combine(r4,r5) 528 \\ } 529 \\ { 530 \\ r13:12 = insert(r1:0,#52,#11 -2) 531 \\ r9:8 = insert(r3:2,#52,#11 -2) 532 \\ r15 = sub(r4,r5) 533 \\ r7:6 = combine(#62,#1) 534 \\ } 535 \\ 536 \\ 537 \\ 538 \\ 539 \\ 540 \\ .Ladd_continue: 541 \\ { 542 \\ r15 = min(r15,r7) 543 \\ 544 \\ r11:10 = neg(r13:12) 545 \\ p2 = cmp.gt(r1,#-1) 546 \\ r14 = #0 547 \\ } 548 \\ { 549 \\ if (!p2) r13:12 = r11:10 550 \\ r11:10 = extractu(r9:8,r15:14) 551 \\ r9:8 = ASR(r9:8,r15) 552 \\ 553 \\ 554 \\ 555 \\ 556 \\ r15:14 = #0 557 \\ } 558 \\ { 559 \\ p1 = cmp.eq(r11:10,r15:14) 560 \\ if (!p1.new) r8 = or(r8,r6) 561 \\ r5 = add(r4,#-1024 -60) 562 \\ p3 = cmp.gt(r3,#-1) 563 \\ } 564 \\ { 565 \\ r13:12 = add(r13:12,r9:8) 566 \\ r11:10 = sub(r13:12,r9:8) 567 \\ r7:6 = combine(#54,##2045) 568 \\ } 569 \\ { 570 \\ p0 = cmp.gtu(r4,r7) 571 \\ p0 = !cmp.gtu(r4,r6) 572 \\ if (!p0.new) jump:nt .Ladd_ovf_unf 573 \\ if (!p3) r13:12 = r11:10 574 \\ } 575 \\ { 576 \\ r1:0 = convert_d2df(r13:12) 577 \\ p0 = cmp.eq(r13,#0) 578 \\ p0 = cmp.eq(r12,#0) 579 \\ if (p0.new) jump:nt .Ladd_zero 580 \\ } 581 \\ { 582 \\ r1 += asl(r5,#20) 583 \\ jumpr r31 584 \\ } 585 \\ 586 \\ .falign 587 \\ .Ladd_zero: 588 \\ 589 \\ 590 \\ { 591 \\ r28 = USR 592 \\ r1:0 = #0 593 \\ r3 = #1 594 \\ } 595 \\ { 596 \\ r28 = extractu(r28,#2,#22) 597 \\ r3 = asl(r3,#31) 598 \\ } 599 \\ { 600 \\ p0 = cmp.eq(r28,#2) 601 \\ if (p0.new) r1 = xor(r1,r3) 602 \\ jumpr r31 603 \\ } 604 \\ .falign 605 \\ .Ladd_ovf_unf: 606 \\ { 607 \\ r1:0 = convert_d2df(r13:12) 608 \\ p0 = cmp.eq(r13,#0) 609 \\ p0 = cmp.eq(r12,#0) 610 \\ if (p0.new) jump:nt .Ladd_zero 611 \\ } 612 \\ { 613 \\ r28 = extractu(r1,#11,#20) 614 \\ r1 += asl(r5,#20) 615 \\ } 616 \\ { 617 \\ r5 = add(r5,r28) 618 \\ r3:2 = combine(##0x00100000,#0) 619 \\ } 620 \\ { 621 \\ p0 = cmp.gt(r5,##1024 +1024 -2) 622 \\ if (p0.new) jump:nt .Ladd_ovf 623 \\ } 624 \\ { 625 \\ p0 = cmp.gt(r5,#0) 626 \\ if (p0.new) jumpr:t r31 627 \\ r28 = sub(#1,r5) 628 \\ } 629 \\ { 630 \\ r3:2 = insert(r1:0,#52,#0) 631 \\ r1:0 = r13:12 632 \\ } 633 \\ { 634 \\ r3:2 = lsr(r3:2,r28) 635 \\ } 636 \\ { 637 \\ r1:0 = insert(r3:2,#63,#0) 638 \\ jumpr r31 639 \\ } 640 \\ .falign 641 \\ .Ladd_ovf: 642 \\ 643 \\ { 644 \\ r1:0 = r13:12 645 \\ r28 = USR 646 \\ r13:12 = combine(##0x7fefffff,#-1) 647 \\ } 648 \\ { 649 \\ r5 = extractu(r28,#2,#22) 650 \\ r28 = or(r28,#0x28) 651 \\ r9:8 = combine(##0x7ff00000,#0) 652 \\ } 653 \\ { 654 \\ USR = r28 655 \\ r5 ^= lsr(r1,#31) 656 \\ r28 = r5 657 \\ } 658 \\ { 659 \\ p0 = !cmp.eq(r28,#1) 660 \\ p0 = !cmp.eq(r5,#2) 661 \\ if (p0.new) r13:12 = r9:8 662 \\ } 663 \\ { 664 \\ r1:0 = insert(r13:12,#63,#0) 665 \\ } 666 \\ { 667 \\ p0 = dfcmp.eq(r1:0,r1:0) 668 \\ jumpr r31 669 \\ } 670 \\ 671 \\ .Ladd_abnormal: 672 \\ { 673 \\ r13:12 = extractu(r1:0,#63,#0) 674 \\ r9:8 = extractu(r3:2,#63,#0) 675 \\ } 676 \\ { 677 \\ p3 = cmp.gtu(r13:12,r9:8) 678 \\ if (!p3.new) r1:0 = r3:2 679 \\ if (!p3.new) r3:2 = r1:0 680 \\ } 681 \\ { 682 \\ 683 \\ p0 = dfclass(r1:0,#0x0f) 684 \\ if (!p0.new) jump:nt .Linvalid_nan_add 685 \\ if (!p3) r13:12 = r9:8 686 \\ if (!p3) r9:8 = r13:12 687 \\ } 688 \\ { 689 \\ 690 \\ 691 \\ p1 = dfclass(r1:0,#0x08) 692 \\ if (p1.new) jump:nt .Linf_add 693 \\ } 694 \\ { 695 \\ p2 = dfclass(r3:2,#0x01) 696 \\ if (p2.new) jump:nt .LB_zero 697 \\ r13:12 = #0 698 \\ } 699 \\ 700 \\ { 701 \\ p0 = dfclass(r1:0,#4) 702 \\ if (p0.new) jump:nt .Ladd_two_subnormal 703 \\ r13:12 = combine(##0x20000000,#0) 704 \\ } 705 \\ { 706 \\ r4 = extractu(r1,#11,#20) 707 \\ r5 = #1 708 \\ 709 \\ r9:8 = asl(r9:8,#11 -2) 710 \\ } 711 \\ 712 \\ 713 \\ 714 \\ { 715 \\ r13:12 = insert(r1:0,#52,#11 -2) 716 \\ r15 = sub(r4,r5) 717 \\ r7:6 = combine(#62,#1) 718 \\ jump .Ladd_continue 719 \\ } 720 \\ 721 \\ .Ladd_two_subnormal: 722 \\ { 723 \\ r13:12 = extractu(r1:0,#63,#0) 724 \\ r9:8 = extractu(r3:2,#63,#0) 725 \\ } 726 \\ { 727 \\ r13:12 = neg(r13:12) 728 \\ r9:8 = neg(r9:8) 729 \\ p0 = cmp.gt(r1,#-1) 730 \\ p1 = cmp.gt(r3,#-1) 731 \\ } 732 \\ { 733 \\ if (p0) r13:12 = r1:0 734 \\ if (p1) r9:8 = r3:2 735 \\ } 736 \\ { 737 \\ r13:12 = add(r13:12,r9:8) 738 \\ } 739 \\ { 740 \\ r9:8 = neg(r13:12) 741 \\ p0 = cmp.gt(r13,#-1) 742 \\ r3:2 = #0 743 \\ } 744 \\ { 745 \\ if (!p0) r1:0 = r9:8 746 \\ if (p0) r1:0 = r13:12 747 \\ r3 = ##0x80000000 748 \\ } 749 \\ { 750 \\ if (!p0) r1 = or(r1,r3) 751 \\ p0 = dfcmp.eq(r1:0,r3:2) 752 \\ if (p0.new) jump:nt .Lzero_plus_zero 753 \\ } 754 \\ { 755 \\ jumpr r31 756 \\ } 757 \\ 758 \\ .Linvalid_nan_add: 759 \\ { 760 \\ r28 = convert_df2sf(r1:0) 761 \\ p0 = dfclass(r3:2,#0x0f) 762 \\ if (p0.new) r3:2 = r1:0 763 \\ } 764 \\ { 765 \\ r2 = convert_df2sf(r3:2) 766 \\ r1:0 = #-1 767 \\ jumpr r31 768 \\ } 769 \\ .falign 770 \\ .LB_zero: 771 \\ { 772 \\ p0 = dfcmp.eq(r13:12,r1:0) 773 \\ if (!p0.new) jumpr:t r31 774 \\ } 775 \\ 776 \\ 777 \\ 778 \\ 779 \\ .Lzero_plus_zero: 780 \\ { 781 \\ p0 = cmp.eq(r1:0,r3:2) 782 \\ if (p0.new) jumpr:t r31 783 \\ } 784 \\ { 785 \\ r28 = USR 786 \\ } 787 \\ { 788 \\ r28 = extractu(r28,#2,#22) 789 \\ r1:0 = #0 790 \\ } 791 \\ { 792 \\ p0 = cmp.eq(r28,#2) 793 \\ if (p0.new) r1 = ##0x80000000 794 \\ jumpr r31 795 \\ } 796 \\ .Linf_add: 797 \\ 798 \\ { 799 \\ p0 = !cmp.eq(r1,r3) 800 \\ p0 = dfclass(r3:2,#8) 801 \\ if (!p0.new) jumpr:t r31 802 \\ } 803 \\ { 804 \\ r2 = ##0x7f800001 805 \\ } 806 \\ { 807 \\ r1:0 = convert_sf2df(r2) 808 \\ jumpr r31 809 \\ } 810 ); 811 } 812 813 fn __hexagon_subdf3() align(32) callconv(.naked) noreturn { 814 asm volatile ( 815 \\ { 816 \\ r3 = togglebit(r3,#31) 817 \\ jump ##__hexagon_adddf3 818 \\ } 819 ); 820 } 821 822 fn __hexagon_divdf3() align(32) callconv(.naked) noreturn { 823 asm volatile ( 824 \\ { 825 \\ p2 = dfclass(r1:0,#0x02) 826 \\ p2 = dfclass(r3:2,#0x02) 827 \\ r13:12 = combine(r3,r1) 828 \\ r28 = xor(r1,r3) 829 \\ } 830 \\ { 831 \\ if (!p2) jump .Ldiv_abnormal 832 \\ r7:6 = extractu(r3:2,#23,#52 -23) 833 \\ r8 = ##0x3f800001 834 \\ } 835 \\ { 836 \\ r9 = or(r8,r6) 837 \\ r13 = extractu(r13,#11,#52 -32) 838 \\ r12 = extractu(r12,#11,#52 -32) 839 \\ p3 = cmp.gt(r28,#-1) 840 \\ } 841 \\ 842 \\ 843 \\ .Ldenorm_continue: 844 \\ { 845 \\ r11,p0 = sfrecipa(r8,r9) 846 \\ r10 = and(r8,#-2) 847 \\ r28 = #1 848 \\ r12 = sub(r12,r13) 849 \\ } 850 \\ 851 \\ 852 \\ { 853 \\ r10 -= sfmpy(r11,r9):lib 854 \\ r1 = insert(r28,#11 +1,#52 -32) 855 \\ r13 = ##0x00800000 << 3 856 \\ } 857 \\ { 858 \\ r11 += sfmpy(r11,r10):lib 859 \\ r3 = insert(r28,#11 +1,#52 -32) 860 \\ r10 = and(r8,#-2) 861 \\ } 862 \\ { 863 \\ r10 -= sfmpy(r11,r9):lib 864 \\ r5 = #-0x3ff +1 865 \\ r4 = #0x3ff -1 866 \\ } 867 \\ { 868 \\ r11 += sfmpy(r11,r10):lib 869 \\ p1 = cmp.gt(r12,r5) 870 \\ p1 = !cmp.gt(r12,r4) 871 \\ } 872 \\ { 873 \\ r13 = insert(r11,#23,#3) 874 \\ r5:4 = #0 875 \\ r12 = add(r12,#-61) 876 \\ } 877 \\ 878 \\ 879 \\ 880 \\ 881 \\ { 882 \\ r13 = add(r13,#((-3) << 3)) 883 \\ } 884 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); } 885 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); } 886 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); } 887 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); } 888 \\ 889 \\ 890 \\ 891 \\ 892 \\ 893 \\ 894 \\ 895 \\ { 896 \\ 897 \\ r15:14 = sub(r1:0,r3:2) 898 \\ p0 = cmp.gtu(r3:2,r1:0) 899 \\ 900 \\ if (!p0.new) r6 = #2 901 \\ } 902 \\ { 903 \\ r5:4 = add(r5:4,r7:6) 904 \\ if (!p0) r1:0 = r15:14 905 \\ r15:14 = #0 906 \\ } 907 \\ { 908 \\ p0 = cmp.eq(r1:0,r15:14) 909 \\ if (!p0.new) r4 = or(r4,r28) 910 \\ } 911 \\ { 912 \\ r7:6 = neg(r5:4) 913 \\ } 914 \\ { 915 \\ if (!p3) r5:4 = r7:6 916 \\ } 917 \\ { 918 \\ r1:0 = convert_d2df(r5:4) 919 \\ if (!p1) jump .Ldiv_ovf_unf 920 \\ } 921 \\ { 922 \\ r1 += asl(r12,#52 -32) 923 \\ jumpr r31 924 \\ } 925 \\ 926 \\ .Ldiv_ovf_unf: 927 \\ { 928 \\ r1 += asl(r12,#52 -32) 929 \\ r13 = extractu(r1,#11,#52 -32) 930 \\ } 931 \\ { 932 \\ r7:6 = abs(r5:4) 933 \\ r12 = add(r12,r13) 934 \\ } 935 \\ { 936 \\ p0 = cmp.gt(r12,##0x3ff +0x3ff) 937 \\ if (p0.new) jump:nt .Ldiv_ovf 938 \\ } 939 \\ { 940 \\ p0 = cmp.gt(r12,#0) 941 \\ if (p0.new) jump:nt .Ldiv_possible_unf 942 \\ } 943 \\ { 944 \\ r13 = add(clb(r7:6),#-1) 945 \\ r12 = sub(#7,r12) 946 \\ r10 = USR 947 \\ r11 = #63 948 \\ } 949 \\ { 950 \\ r13 = min(r12,r11) 951 \\ r11 = or(r10,#0x030) 952 \\ r7:6 = asl(r7:6,r13) 953 \\ r12 = #0 954 \\ } 955 \\ { 956 \\ r15:14 = extractu(r7:6,r13:12) 957 \\ r7:6 = lsr(r7:6,r13) 958 \\ r3:2 = #1 959 \\ } 960 \\ { 961 \\ p0 = cmp.gtu(r3:2,r15:14) 962 \\ if (!p0.new) r6 = or(r2,r6) 963 \\ r7 = setbit(r7,#52 -32+4) 964 \\ } 965 \\ { 966 \\ r5:4 = neg(r7:6) 967 \\ p0 = bitsclr(r6,#(1<<4)-1) 968 \\ if (!p0.new) r10 = r11 969 \\ } 970 \\ { 971 \\ USR = r10 972 \\ if (p3) r5:4 = r7:6 973 \\ r10 = #-0x3ff -(52 +4) 974 \\ } 975 \\ { 976 \\ r1:0 = convert_d2df(r5:4) 977 \\ } 978 \\ { 979 \\ r1 += asl(r10,#52 -32) 980 \\ jumpr r31 981 \\ } 982 \\ 983 \\ 984 \\ .Ldiv_possible_unf: 985 \\ 986 \\ 987 \\ { 988 \\ r3:2 = extractu(r1:0,#63,#0) 989 \\ r15:14 = combine(##0x00100000,#0) 990 \\ r10 = #0x7FFF 991 \\ } 992 \\ { 993 \\ p0 = dfcmp.eq(r15:14,r3:2) 994 \\ p0 = bitsset(r7,r10) 995 \\ } 996 \\ 997 \\ 998 \\ 999 \\ 1000 \\ 1001 \\ 1002 \\ { 1003 \\ if (!p0) jumpr r31 1004 \\ r10 = USR 1005 \\ } 1006 \\ 1007 \\ { 1008 \\ r10 = or(r10,#0x30) 1009 \\ } 1010 \\ { 1011 \\ USR = r10 1012 \\ } 1013 \\ { 1014 \\ p0 = dfcmp.eq(r1:0,r1:0) 1015 \\ jumpr r31 1016 \\ } 1017 \\ 1018 \\ .Ldiv_ovf: 1019 \\ 1020 \\ 1021 \\ 1022 \\ { 1023 \\ r10 = USR 1024 \\ r3:2 = combine(##0x7fefffff,#-1) 1025 \\ r1 = mux(p3,#0,#-1) 1026 \\ } 1027 \\ { 1028 \\ r7:6 = combine(##0x7ff00000,#0) 1029 \\ r5 = extractu(r10,#2,#22) 1030 \\ r10 = or(r10,#0x28) 1031 \\ } 1032 \\ { 1033 \\ USR = r10 1034 \\ r5 ^= lsr(r1,#31) 1035 \\ r4 = r5 1036 \\ } 1037 \\ { 1038 \\ p0 = !cmp.eq(r4,#1) 1039 \\ p0 = !cmp.eq(r5,#2) 1040 \\ if (p0.new) r3:2 = r7:6 1041 \\ p0 = dfcmp.eq(r3:2,r3:2) 1042 \\ } 1043 \\ { 1044 \\ r1:0 = insert(r3:2,#63,#0) 1045 \\ jumpr r31 1046 \\ } 1047 \\ 1048 \\ 1049 \\ 1050 \\ 1051 \\ 1052 \\ 1053 \\ 1054 \\ .Ldiv_abnormal: 1055 \\ { 1056 \\ p0 = dfclass(r1:0,#0x0F) 1057 \\ p0 = dfclass(r3:2,#0x0F) 1058 \\ p3 = cmp.gt(r28,#-1) 1059 \\ } 1060 \\ { 1061 \\ p1 = dfclass(r1:0,#0x08) 1062 \\ p1 = dfclass(r3:2,#0x08) 1063 \\ } 1064 \\ { 1065 \\ p2 = dfclass(r1:0,#0x01) 1066 \\ p2 = dfclass(r3:2,#0x01) 1067 \\ } 1068 \\ { 1069 \\ if (!p0) jump .Ldiv_nan 1070 \\ if (p1) jump .Ldiv_invalid 1071 \\ } 1072 \\ { 1073 \\ if (p2) jump .Ldiv_invalid 1074 \\ } 1075 \\ { 1076 \\ p2 = dfclass(r1:0,#(0x0F ^ 0x01)) 1077 \\ p2 = dfclass(r3:2,#(0x0F ^ 0x08)) 1078 \\ } 1079 \\ { 1080 \\ p1 = dfclass(r1:0,#(0x0F ^ 0x08)) 1081 \\ p1 = dfclass(r3:2,#(0x0F ^ 0x01)) 1082 \\ } 1083 \\ { 1084 \\ if (!p2) jump .Ldiv_zero_result 1085 \\ if (!p1) jump .Ldiv_inf_result 1086 \\ } 1087 \\ 1088 \\ 1089 \\ 1090 \\ 1091 \\ 1092 \\ { 1093 \\ p0 = dfclass(r1:0,#0x02) 1094 \\ p1 = dfclass(r3:2,#0x02) 1095 \\ r10 = ##0x00100000 1096 \\ } 1097 \\ { 1098 \\ r13:12 = combine(r3,r1) 1099 \\ r1 = insert(r10,#11 +1,#52 -32) 1100 \\ r3 = insert(r10,#11 +1,#52 -32) 1101 \\ } 1102 \\ { 1103 \\ if (p0) r1 = or(r1,r10) 1104 \\ if (p1) r3 = or(r3,r10) 1105 \\ } 1106 \\ { 1107 \\ r5 = add(clb(r1:0),#-11) 1108 \\ r4 = add(clb(r3:2),#-11) 1109 \\ r10 = #1 1110 \\ } 1111 \\ { 1112 \\ r12 = extractu(r12,#11,#52 -32) 1113 \\ r13 = extractu(r13,#11,#52 -32) 1114 \\ } 1115 \\ { 1116 \\ r1:0 = asl(r1:0,r5) 1117 \\ r3:2 = asl(r3:2,r4) 1118 \\ if (!p0) r12 = sub(r10,r5) 1119 \\ if (!p1) r13 = sub(r10,r4) 1120 \\ } 1121 \\ { 1122 \\ r7:6 = extractu(r3:2,#23,#52 -23) 1123 \\ } 1124 \\ { 1125 \\ r9 = or(r8,r6) 1126 \\ jump .Ldenorm_continue 1127 \\ } 1128 \\ 1129 \\ .Ldiv_zero_result: 1130 \\ { 1131 \\ r1 = xor(r1,r3) 1132 \\ r3:2 = #0 1133 \\ } 1134 \\ { 1135 \\ r1:0 = insert(r3:2,#63,#0) 1136 \\ jumpr r31 1137 \\ } 1138 \\ .Ldiv_inf_result: 1139 \\ { 1140 \\ p2 = dfclass(r3:2,#0x01) 1141 \\ p2 = dfclass(r1:0,#(0x0F ^ 0x08)) 1142 \\ } 1143 \\ { 1144 \\ r10 = USR 1145 \\ if (!p2) jump 1f 1146 \\ r1 = xor(r1,r3) 1147 \\ } 1148 \\ { 1149 \\ r10 = or(r10,#0x04) 1150 \\ } 1151 \\ { 1152 \\ USR = r10 1153 \\ } 1154 \\ 1: 1155 \\ { 1156 \\ r3:2 = combine(##0x7ff00000,#0) 1157 \\ p0 = dfcmp.uo(r3:2,r3:2) 1158 \\ } 1159 \\ { 1160 \\ r1:0 = insert(r3:2,#63,#0) 1161 \\ jumpr r31 1162 \\ } 1163 \\ .Ldiv_nan: 1164 \\ { 1165 \\ p0 = dfclass(r1:0,#0x10) 1166 \\ p1 = dfclass(r3:2,#0x10) 1167 \\ if (!p0.new) r1:0 = r3:2 1168 \\ if (!p1.new) r3:2 = r1:0 1169 \\ } 1170 \\ { 1171 \\ r5 = convert_df2sf(r1:0) 1172 \\ r4 = convert_df2sf(r3:2) 1173 \\ } 1174 \\ { 1175 \\ r1:0 = #-1 1176 \\ jumpr r31 1177 \\ } 1178 \\ 1179 \\ .Ldiv_invalid: 1180 \\ { 1181 \\ r10 = ##0x7f800001 1182 \\ } 1183 \\ { 1184 \\ r1:0 = convert_sf2df(r10) 1185 \\ jumpr r31 1186 \\ } 1187 ); 1188 } 1189 1190 fn __hexagon_muldf3() align(32) callconv(.naked) noreturn { 1191 asm volatile ( 1192 \\ { 1193 \\ p0 = dfclass(r1:0,#2) 1194 \\ p0 = dfclass(r3:2,#2) 1195 \\ r13:12 = combine(##0x40000000,#0) 1196 \\ } 1197 \\ { 1198 \\ r13:12 = insert(r1:0,#52,#11 -1) 1199 \\ r5:4 = asl(r3:2,#11 -1) 1200 \\ r28 = #-1024 1201 \\ r9:8 = #1 1202 \\ } 1203 \\ { 1204 \\ r7:6 = mpyu(r4,r13) 1205 \\ r5:4 = insert(r9:8,#2,#62) 1206 \\ } 1207 \\ 1208 \\ 1209 \\ 1210 \\ 1211 \\ { 1212 \\ r15:14 = mpyu(r12,r4) 1213 \\ r7:6 += mpyu(r12,r5) 1214 \\ } 1215 \\ { 1216 \\ r7:6 += lsr(r15:14,#32) 1217 \\ r11:10 = mpyu(r13,r5) 1218 \\ r5:4 = combine(##1024 +1024 -4,#0) 1219 \\ } 1220 \\ { 1221 \\ r11:10 += lsr(r7:6,#32) 1222 \\ if (!p0) jump .Lmul_abnormal 1223 \\ p1 = cmp.eq(r14,#0) 1224 \\ p1 = cmp.eq(r6,#0) 1225 \\ } 1226 \\ { 1227 \\ if (!p1) r10 = or(r10,r8) 1228 \\ r6 = extractu(r1,#11,#20) 1229 \\ r7 = extractu(r3,#11,#20) 1230 \\ } 1231 \\ { 1232 \\ r15:14 = neg(r11:10) 1233 \\ r6 += add(r28,r7) 1234 \\ r28 = xor(r1,r3) 1235 \\ } 1236 \\ { 1237 \\ if (!p2.new) r11:10 = r15:14 1238 \\ p2 = cmp.gt(r28,#-1) 1239 \\ p0 = !cmp.gt(r6,r5) 1240 \\ p0 = cmp.gt(r6,r4) 1241 \\ if (!p0.new) jump:nt .Lmul_ovf_unf 1242 \\ } 1243 \\ { 1244 \\ r1:0 = convert_d2df(r11:10) 1245 \\ r6 = add(r6,#-1024 -58) 1246 \\ } 1247 \\ { 1248 \\ r1 += asl(r6,#20) 1249 \\ jumpr r31 1250 \\ } 1251 \\ 1252 \\ .falign 1253 \\ .Lmul_possible_unf: 1254 \\ { 1255 \\ p0 = cmp.eq(r0,#0) 1256 \\ p0 = bitsclr(r1,r4) 1257 \\ if (!p0.new) jumpr:t r31 1258 \\ r5 = #0x7fff 1259 \\ } 1260 \\ { 1261 \\ p0 = bitsset(r13,r5) 1262 \\ r4 = USR 1263 \\ r5 = #0x030 1264 \\ } 1265 \\ { 1266 \\ if (p0) r4 = or(r4,r5) 1267 \\ } 1268 \\ { 1269 \\ USR = r4 1270 \\ } 1271 \\ { 1272 \\ p0 = dfcmp.eq(r1:0,r1:0) 1273 \\ jumpr r31 1274 \\ } 1275 \\ .falign 1276 \\ .Lmul_ovf_unf: 1277 \\ { 1278 \\ r1:0 = convert_d2df(r11:10) 1279 \\ r13:12 = abs(r11:10) 1280 \\ r7 = add(r6,#-1024 -58) 1281 \\ } 1282 \\ { 1283 \\ r1 += asl(r7,#20) 1284 \\ r7 = extractu(r1,#11,#20) 1285 \\ r4 = ##0x7FEFFFFF 1286 \\ } 1287 \\ { 1288 \\ r7 += add(r6,##-1024 -58) 1289 \\ 1290 \\ r5 = #0 1291 \\ } 1292 \\ { 1293 \\ p0 = cmp.gt(r7,##1024 +1024 -2) 1294 \\ if (p0.new) jump:nt .Lmul_ovf 1295 \\ } 1296 \\ { 1297 \\ p0 = cmp.gt(r7,#0) 1298 \\ if (p0.new) jump:nt .Lmul_possible_unf 1299 \\ r5 = sub(r6,r5) 1300 \\ r28 = #63 1301 \\ } 1302 \\ { 1303 \\ r4 = #0 1304 \\ r5 = sub(#5,r5) 1305 \\ } 1306 \\ { 1307 \\ p3 = cmp.gt(r11,#-1) 1308 \\ r5 = min(r5,r28) 1309 \\ r11:10 = r13:12 1310 \\ } 1311 \\ { 1312 \\ r28 = USR 1313 \\ r15:14 = extractu(r11:10,r5:4) 1314 \\ } 1315 \\ { 1316 \\ r11:10 = asr(r11:10,r5) 1317 \\ r4 = #0x0030 1318 \\ r1 = insert(r9,#11,#20) 1319 \\ } 1320 \\ { 1321 \\ p0 = cmp.gtu(r9:8,r15:14) 1322 \\ if (!p0.new) r10 = or(r10,r8) 1323 \\ r11 = setbit(r11,#20 +3) 1324 \\ } 1325 \\ { 1326 \\ r15:14 = neg(r11:10) 1327 \\ p1 = bitsclr(r10,#0x7) 1328 \\ if (!p1.new) r28 = or(r4,r28) 1329 \\ } 1330 \\ { 1331 \\ if (!p3) r11:10 = r15:14 1332 \\ USR = r28 1333 \\ } 1334 \\ { 1335 \\ r1:0 = convert_d2df(r11:10) 1336 \\ p0 = dfcmp.eq(r1:0,r1:0) 1337 \\ } 1338 \\ { 1339 \\ r1 = insert(r9,#11 -1,#20 +1) 1340 \\ jumpr r31 1341 \\ } 1342 \\ .falign 1343 \\ .Lmul_ovf: 1344 \\ 1345 \\ { 1346 \\ r28 = USR 1347 \\ r13:12 = combine(##0x7fefffff,#-1) 1348 \\ r1:0 = r11:10 1349 \\ } 1350 \\ { 1351 \\ r14 = extractu(r28,#2,#22) 1352 \\ r28 = or(r28,#0x28) 1353 \\ r5:4 = combine(##0x7ff00000,#0) 1354 \\ } 1355 \\ { 1356 \\ USR = r28 1357 \\ r14 ^= lsr(r1,#31) 1358 \\ r28 = r14 1359 \\ } 1360 \\ { 1361 \\ p0 = !cmp.eq(r28,#1) 1362 \\ p0 = !cmp.eq(r14,#2) 1363 \\ if (p0.new) r13:12 = r5:4 1364 \\ p0 = dfcmp.eq(r1:0,r1:0) 1365 \\ } 1366 \\ { 1367 \\ r1:0 = insert(r13:12,#63,#0) 1368 \\ jumpr r31 1369 \\ } 1370 \\ 1371 \\ .Lmul_abnormal: 1372 \\ { 1373 \\ r13:12 = extractu(r1:0,#63,#0) 1374 \\ r5:4 = extractu(r3:2,#63,#0) 1375 \\ } 1376 \\ { 1377 \\ p3 = cmp.gtu(r13:12,r5:4) 1378 \\ if (!p3.new) r1:0 = r3:2 1379 \\ if (!p3.new) r3:2 = r1:0 1380 \\ } 1381 \\ { 1382 \\ 1383 \\ p0 = dfclass(r1:0,#0x0f) 1384 \\ if (!p0.new) jump:nt .Linvalid_nan 1385 \\ if (!p3) r13:12 = r5:4 1386 \\ if (!p3) r5:4 = r13:12 1387 \\ } 1388 \\ { 1389 \\ 1390 \\ p1 = dfclass(r1:0,#0x08) 1391 \\ p1 = dfclass(r3:2,#0x0e) 1392 \\ } 1393 \\ { 1394 \\ 1395 \\ 1396 \\ p0 = dfclass(r1:0,#0x08) 1397 \\ p0 = dfclass(r3:2,#0x01) 1398 \\ } 1399 \\ { 1400 \\ if (p1) jump .Ltrue_inf 1401 \\ p2 = dfclass(r3:2,#0x01) 1402 \\ } 1403 \\ { 1404 \\ if (p0) jump .Linvalid_zeroinf 1405 \\ if (p2) jump .Ltrue_zero 1406 \\ r28 = ##0x7c000000 1407 \\ } 1408 \\ 1409 \\ 1410 \\ 1411 \\ 1412 \\ 1413 \\ { 1414 \\ p0 = bitsclr(r1,r28) 1415 \\ if (p0.new) jump:nt .Lmul_tiny 1416 \\ } 1417 \\ { 1418 \\ r28 = cl0(r5:4) 1419 \\ } 1420 \\ { 1421 \\ r28 = add(r28,#-11) 1422 \\ } 1423 \\ { 1424 \\ r5:4 = asl(r5:4,r28) 1425 \\ } 1426 \\ { 1427 \\ r3:2 = insert(r5:4,#63,#0) 1428 \\ r1 -= asl(r28,#20) 1429 \\ } 1430 \\ jump __hexagon_muldf3 1431 \\ .Lmul_tiny: 1432 \\ { 1433 \\ r28 = USR 1434 \\ r1:0 = xor(r1:0,r3:2) 1435 \\ } 1436 \\ { 1437 \\ r28 = or(r28,#0x30) 1438 \\ r1:0 = insert(r9:8,#63,#0) 1439 \\ r5 = extractu(r28,#2,#22) 1440 \\ } 1441 \\ { 1442 \\ USR = r28 1443 \\ p0 = cmp.gt(r5,#1) 1444 \\ if (!p0.new) r0 = #0 1445 \\ r5 ^= lsr(r1,#31) 1446 \\ } 1447 \\ { 1448 \\ p0 = cmp.eq(r5,#3) 1449 \\ if (!p0.new) r0 = #0 1450 \\ jumpr r31 1451 \\ } 1452 \\ .Linvalid_zeroinf: 1453 \\ { 1454 \\ r28 = USR 1455 \\ } 1456 \\ { 1457 \\ r1:0 = #-1 1458 \\ r28 = or(r28,#2) 1459 \\ } 1460 \\ { 1461 \\ USR = r28 1462 \\ } 1463 \\ { 1464 \\ p0 = dfcmp.uo(r1:0,r1:0) 1465 \\ jumpr r31 1466 \\ } 1467 \\ .Linvalid_nan: 1468 \\ { 1469 \\ p0 = dfclass(r3:2,#0x0f) 1470 \\ r28 = convert_df2sf(r1:0) 1471 \\ if (p0.new) r3:2 = r1:0 1472 \\ } 1473 \\ { 1474 \\ r2 = convert_df2sf(r3:2) 1475 \\ r1:0 = #-1 1476 \\ jumpr r31 1477 \\ } 1478 \\ .falign 1479 \\ .Ltrue_zero: 1480 \\ { 1481 \\ r1:0 = r3:2 1482 \\ r3:2 = r1:0 1483 \\ } 1484 \\ .Ltrue_inf: 1485 \\ { 1486 \\ r3 = extract(r3,#1,#31) 1487 \\ } 1488 \\ { 1489 \\ r1 ^= asl(r3,#31) 1490 \\ jumpr r31 1491 \\ } 1492 ); 1493 } 1494 1495 fn __hexagon_sqrtdf2() align(32) callconv(.naked) noreturn { 1496 asm volatile ( 1497 \\ { 1498 \\ r15:14 = extractu(r1:0,#23 +1,#52 -23) 1499 \\ r28 = extractu(r1,#11,#52 -32) 1500 \\ r5:4 = combine(##0x3f000004,#1) 1501 \\ } 1502 \\ { 1503 \\ p2 = dfclass(r1:0,#0x02) 1504 \\ p2 = cmp.gt(r1,#-1) 1505 \\ if (!p2.new) jump:nt .Lsqrt_abnormal 1506 \\ r9 = or(r5,r14) 1507 \\ } 1508 \\ 1509 \\ .Ldenormal_restart: 1510 \\ { 1511 \\ r11:10 = r1:0 1512 \\ r7,p0 = sfinvsqrta(r9) 1513 \\ r5 = and(r5,#-16) 1514 \\ r3:2 = #0 1515 \\ } 1516 \\ { 1517 \\ r3 += sfmpy(r7,r9):lib 1518 \\ r2 += sfmpy(r7,r5):lib 1519 \\ r6 = r5 1520 \\ 1521 \\ 1522 \\ r9 = and(r28,#1) 1523 \\ } 1524 \\ { 1525 \\ r6 -= sfmpy(r3,r2):lib 1526 \\ r11 = insert(r4,#11 +1,#52 -32) 1527 \\ p1 = cmp.gtu(r9,#0) 1528 \\ } 1529 \\ { 1530 \\ r3 += sfmpy(r3,r6):lib 1531 \\ r2 += sfmpy(r2,r6):lib 1532 \\ r6 = r5 1533 \\ r9 = mux(p1,#8,#9) 1534 \\ } 1535 \\ { 1536 \\ r6 -= sfmpy(r3,r2):lib 1537 \\ r11:10 = asl(r11:10,r9) 1538 \\ r9 = mux(p1,#3,#2) 1539 \\ } 1540 \\ { 1541 \\ r2 += sfmpy(r2,r6):lib 1542 \\ 1543 \\ r15:14 = asl(r11:10,r9) 1544 \\ } 1545 \\ { 1546 \\ r2 = and(r2,##0x007fffff) 1547 \\ } 1548 \\ { 1549 \\ r2 = add(r2,##0x00800000 - 3) 1550 \\ r9 = mux(p1,#7,#8) 1551 \\ } 1552 \\ { 1553 \\ r8 = asl(r2,r9) 1554 \\ r9 = mux(p1,#15-(1+1),#15-(1+0)) 1555 \\ } 1556 \\ { 1557 \\ r13:12 = mpyu(r8,r15) 1558 \\ } 1559 \\ { 1560 \\ r1:0 = asl(r11:10,#15) 1561 \\ r15:14 = mpyu(r13,r13) 1562 \\ p1 = cmp.eq(r0,r0) 1563 \\ } 1564 \\ { 1565 \\ r1:0 -= asl(r15:14,#15) 1566 \\ r15:14 = mpyu(r13,r12) 1567 \\ p2 = cmp.eq(r0,r0) 1568 \\ } 1569 \\ { 1570 \\ r1:0 -= lsr(r15:14,#16) 1571 \\ p3 = cmp.eq(r0,r0) 1572 \\ } 1573 \\ { 1574 \\ r1:0 = mpyu(r1,r8) 1575 \\ } 1576 \\ { 1577 \\ r13:12 += lsr(r1:0,r9) 1578 \\ r9 = add(r9,#16) 1579 \\ r1:0 = asl(r11:10,#31) 1580 \\ } 1581 \\ 1582 \\ { 1583 \\ r15:14 = mpyu(r13,r13) 1584 \\ r1:0 -= mpyu(r13,r12) 1585 \\ } 1586 \\ { 1587 \\ r1:0 -= asl(r15:14,#31) 1588 \\ r15:14 = mpyu(r12,r12) 1589 \\ } 1590 \\ { 1591 \\ r1:0 -= lsr(r15:14,#33) 1592 \\ } 1593 \\ { 1594 \\ r1:0 = mpyu(r1,r8) 1595 \\ } 1596 \\ { 1597 \\ r13:12 += lsr(r1:0,r9) 1598 \\ r9 = add(r9,#16) 1599 \\ r1:0 = asl(r11:10,#47) 1600 \\ } 1601 \\ 1602 \\ { 1603 \\ r15:14 = mpyu(r13,r13) 1604 \\ } 1605 \\ { 1606 \\ r1:0 -= asl(r15:14,#47) 1607 \\ r15:14 = mpyu(r13,r12) 1608 \\ } 1609 \\ { 1610 \\ r1:0 -= asl(r15:14,#16) 1611 \\ r15:14 = mpyu(r12,r12) 1612 \\ } 1613 \\ { 1614 \\ r1:0 -= lsr(r15:14,#17) 1615 \\ } 1616 \\ { 1617 \\ r1:0 = mpyu(r1,r8) 1618 \\ } 1619 \\ { 1620 \\ r13:12 += lsr(r1:0,r9) 1621 \\ } 1622 \\ { 1623 \\ r3:2 = mpyu(r13,r12) 1624 \\ r5:4 = mpyu(r12,r12) 1625 \\ r15:14 = #0 1626 \\ r1:0 = #0 1627 \\ } 1628 \\ { 1629 \\ r3:2 += lsr(r5:4,#33) 1630 \\ r5:4 += asl(r3:2,#33) 1631 \\ p1 = cmp.eq(r0,r0) 1632 \\ } 1633 \\ { 1634 \\ r7:6 = mpyu(r13,r13) 1635 \\ r1:0 = sub(r1:0,r5:4,p1):carry 1636 \\ r9:8 = #1 1637 \\ } 1638 \\ { 1639 \\ r7:6 += lsr(r3:2,#31) 1640 \\ r9:8 += asl(r13:12,#1) 1641 \\ } 1642 \\ 1643 \\ 1644 \\ 1645 \\ 1646 \\ 1647 \\ { 1648 \\ r15:14 = sub(r11:10,r7:6,p1):carry 1649 \\ r5:4 = sub(r1:0,r9:8,p2):carry 1650 \\ 1651 \\ 1652 \\ 1653 \\ 1654 \\ r7:6 = #1 1655 \\ r11:10 = #0 1656 \\ } 1657 \\ { 1658 \\ r3:2 = sub(r15:14,r11:10,p2):carry 1659 \\ r7:6 = add(r13:12,r7:6) 1660 \\ r28 = add(r28,#-0x3ff) 1661 \\ } 1662 \\ { 1663 \\ 1664 \\ if (p2) r13:12 = r7:6 1665 \\ if (p2) r1:0 = r5:4 1666 \\ if (p2) r15:14 = r3:2 1667 \\ } 1668 \\ { 1669 \\ r5:4 = sub(r1:0,r9:8,p3):carry 1670 \\ r7:6 = #1 1671 \\ r28 = asr(r28,#1) 1672 \\ } 1673 \\ { 1674 \\ r3:2 = sub(r15:14,r11:10,p3):carry 1675 \\ r7:6 = add(r13:12,r7:6) 1676 \\ } 1677 \\ { 1678 \\ if (p3) r13:12 = r7:6 1679 \\ if (p3) r1:0 = r5:4 1680 \\ 1681 \\ 1682 \\ 1683 \\ 1684 \\ 1685 \\ r2 = #1 1686 \\ } 1687 \\ { 1688 \\ p0 = cmp.eq(r1:0,r11:10) 1689 \\ if (!p0.new) r12 = or(r12,r2) 1690 \\ r3 = cl0(r13:12) 1691 \\ r28 = add(r28,#-63) 1692 \\ } 1693 \\ 1694 \\ 1695 \\ 1696 \\ { 1697 \\ r1:0 = convert_ud2df(r13:12) 1698 \\ r28 = add(r28,r3) 1699 \\ } 1700 \\ { 1701 \\ r1 += asl(r28,#52 -32) 1702 \\ jumpr r31 1703 \\ } 1704 \\ .Lsqrt_abnormal: 1705 \\ { 1706 \\ p0 = dfclass(r1:0,#0x01) 1707 \\ if (p0.new) jumpr:t r31 1708 \\ } 1709 \\ { 1710 \\ p0 = dfclass(r1:0,#0x10) 1711 \\ if (p0.new) jump:nt .Lsqrt_nan 1712 \\ } 1713 \\ { 1714 \\ p0 = cmp.gt(r1,#-1) 1715 \\ if (!p0.new) jump:nt .Lsqrt_invalid_neg 1716 \\ if (!p0.new) r28 = ##0x7F800001 1717 \\ } 1718 \\ { 1719 \\ p0 = dfclass(r1:0,#0x08) 1720 \\ if (p0.new) jumpr:nt r31 1721 \\ } 1722 \\ 1723 \\ 1724 \\ { 1725 \\ r1:0 = extractu(r1:0,#52,#0) 1726 \\ } 1727 \\ { 1728 \\ r28 = add(clb(r1:0),#-11) 1729 \\ } 1730 \\ { 1731 \\ r1:0 = asl(r1:0,r28) 1732 \\ r28 = sub(#1,r28) 1733 \\ } 1734 \\ { 1735 \\ r1 = insert(r28,#1,#52 -32) 1736 \\ } 1737 \\ { 1738 \\ r3:2 = extractu(r1:0,#23 +1,#52 -23) 1739 \\ r5 = ##0x3f000004 1740 \\ } 1741 \\ { 1742 \\ r9 = or(r5,r2) 1743 \\ r5 = and(r5,#-16) 1744 \\ jump .Ldenormal_restart 1745 \\ } 1746 \\ .Lsqrt_nan: 1747 \\ { 1748 \\ r28 = convert_df2sf(r1:0) 1749 \\ r1:0 = #-1 1750 \\ jumpr r31 1751 \\ } 1752 \\ .Lsqrt_invalid_neg: 1753 \\ { 1754 \\ r1:0 = convert_sf2df(r28) 1755 \\ jumpr r31 1756 \\ } 1757 ); 1758 } 1759 1760 comptime { 1761 if (builtin.cpu.arch == .hexagon) { 1762 @export(&__hexagon_adddf3, .{ .name = "__hexagon_adddf3", .linkage = common.linkage, .visibility = common.visibility }); 1763 @export(&__hexagon_adddf3, .{ .name = "__hexagon_fast_adddf3", .linkage = common.linkage, .visibility = common.visibility }); 1764 @export(&__hexagon_subdf3, .{ .name = "__hexagon_subdf3", .linkage = common.linkage, .visibility = common.visibility }); 1765 @export(&__hexagon_subdf3, .{ .name = "__hexagon_fast_subdf3", .linkage = common.linkage, .visibility = common.visibility }); 1766 @export(&__hexagon_divdf3, .{ .name = "__hexagon_divdf3", .linkage = common.linkage, .visibility = common.visibility }); 1767 @export(&__hexagon_divdf3, .{ .name = "__hexagon_fast_divdf3", .linkage = common.linkage, .visibility = common.visibility }); 1768 @export(&__hexagon_muldf3, .{ .name = "__hexagon_muldf3", .linkage = common.linkage, .visibility = common.visibility }); 1769 @export(&__hexagon_muldf3, .{ .name = "__hexagon_fast_muldf3", .linkage = common.linkage, .visibility = common.visibility }); 1770 @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility }); 1771 @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_fast2_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility }); 1772 @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrt", .linkage = common.linkage, .visibility = common.visibility }); 1773 @export(&__hexagon_divsf3, .{ .name = "__hexagon_divsf3", .linkage = common.linkage, .visibility = common.visibility }); 1774 @export(&__hexagon_divsf3, .{ .name = "__hexagon_fast_divsf3", .linkage = common.linkage, .visibility = common.visibility }); 1775 @export(&__hexagon_divsi3, .{ .name = "__hexagon_divsi3", .linkage = common.linkage, .visibility = common.visibility }); 1776 @export(&__hexagon_umodsi3, .{ .name = "__hexagon_umodsi3", .linkage = common.linkage, .visibility = common.visibility }); 1777 @export(&__hexagon_sqrtf, .{ .name = "__hexagon_sqrtf", .linkage = common.linkage, .visibility = common.visibility }); 1778 @export(&__hexagon_sqrtf, .{ .name = "__hexagon_fast2_sqrtf", .linkage = common.linkage, .visibility = common.visibility }); 1779 @export(&__hexagon_moddi3, .{ .name = "__hexagon_moddi3", .linkage = common.linkage, .visibility = common.visibility }); 1780 @export(&__hexagon_divdi3, .{ .name = "__hexagon_divdi3", .linkage = common.linkage, .visibility = common.visibility }); 1781 @export(&__hexagon_udivdi3, .{ .name = "__hexagon_udivdi3", .linkage = common.linkage, .visibility = common.visibility }); 1782 @export(&__hexagon_umoddi3, .{ .name = "__hexagon_umoddi3", .linkage = common.linkage, .visibility = common.visibility }); 1783 @export(&__hexagon_modsi3, .{ .name = "__hexagon_modsi3", .linkage = common.linkage, .visibility = common.visibility }); 1784 @export(&__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, .{ .name = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes", .linkage = common.linkage, .visibility = common.visibility }); 1785 @export(&__hexagon_udivsi3, .{ .name = "__hexagon_udivsi3", .linkage = common.linkage, .visibility = common.visibility }); 1786 } 1787 }