Compare commits
654 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f073923ea0 | ||
|
|
50e25f6cec | ||
|
|
efebb6d341 | ||
|
|
c828c23f71 | ||
|
|
7ac44037db | ||
|
|
2a6ad23b52 | ||
|
|
7f7823e23c | ||
|
|
2cdd50c9b2 | ||
|
|
d6e84e325b | ||
|
|
bcce77700f | ||
|
|
5834ff0cc5 | ||
|
|
1bf2810f33 | ||
|
|
49c3922037 | ||
|
|
c18059a3dd | ||
|
|
d0621391bc | ||
|
|
5bc4f1e3f1 | ||
|
|
10fb1f2730 | ||
|
|
152b408934 | ||
|
|
e4fd3fd52b | ||
|
|
6288ad865c | ||
|
|
84e952c230 | ||
|
|
3b3649b86f | ||
|
|
60b2031831 | ||
|
|
20011a7a1c | ||
|
|
61a02d9d1e | ||
|
|
f25c1c6858 | ||
|
|
70c3008a00 | ||
|
|
7a893691c0 | ||
|
|
5a7a0e8518 | ||
|
|
6db9be8900 | ||
|
|
aaf2230ae8 | ||
|
|
028ec0f2c3 | ||
|
|
aa9902b586 | ||
|
|
2db28ea849 | ||
|
|
3200ebc2ea | ||
|
|
b57cb04afc | ||
|
|
2e010c60ae | ||
|
|
b2887620f3 | ||
|
|
689e241ff8 | ||
|
|
51b2f1b80b | ||
|
|
790aaeacae | ||
|
|
bb80daf509 | ||
|
|
d96dd5bc32 | ||
|
|
6b5cfd9d99 | ||
|
|
eff3530dfa | ||
|
|
44ae891bd7 | ||
|
|
cc0f660ad2 | ||
|
|
5d5820029d | ||
|
|
07e47c058c | ||
|
|
46e258c9f7 | ||
|
|
c3807dfb34 | ||
|
|
1d378d8f26 | ||
|
|
5ab25798e3 | ||
|
|
bf47cf418a | ||
|
|
61ecc48671 | ||
|
|
ed1386eeff | ||
|
|
d34d36619e | ||
|
|
8fd7e9115c | ||
|
|
c787837ce5 | ||
|
|
db18d38a43 | ||
|
|
73a306e2fa | ||
|
|
7ee1b88042 | ||
|
|
1c244d34b3 | ||
|
|
56645c1701 | ||
|
|
101b7745c4 | ||
|
|
a217c764db | ||
|
|
7d494b3e7b | ||
|
|
de5c0c9f40 | ||
|
|
6bade0b825 | ||
|
|
8a0e1d4c02 | ||
|
|
a7c87ae1e4 | ||
|
|
253d988e7c | ||
|
|
834e992a7c | ||
|
|
8429d4ceac | ||
|
|
c622766156 | ||
|
|
807a5e94e9 | ||
|
|
36eadb569a | ||
|
|
58dc2b719c | ||
|
|
ad2a29ccf2 | ||
|
|
026aebf2ea | ||
|
|
6568be575c | ||
|
|
556f22a751 | ||
|
|
1b8a241f6f | ||
|
|
0f449a3ec1 | ||
|
|
90598b4631 | ||
|
|
d243453862 | ||
|
|
138d6f9093 | ||
|
|
132e604aa3 | ||
|
|
6e2a67724c | ||
|
|
c2f5634fb3 | ||
|
|
439621e44a | ||
|
|
4e43bde924 | ||
|
|
4ac6c4d6bf | ||
|
|
9aa65c0e8e | ||
|
|
1eecfdaa9b | ||
|
|
3e86fb500d | ||
|
|
c60496a297 | ||
|
|
6fef7406c8 | ||
|
|
6b436146a8 | ||
|
|
6cbea99ed6 | ||
|
|
b018c64ca2 | ||
|
|
fe354ebb5c | ||
|
|
704a8acb59 | ||
|
|
83f8906449 | ||
|
|
4eac75914b | ||
|
|
d2d2ba10e9 | ||
|
|
0cf327eb17 | ||
|
|
d0f2eca106 | ||
|
|
79f1ff574b | ||
|
|
bced3fb64c | ||
|
|
93cbd4eeb9 | ||
|
|
9f6c5a20de | ||
|
|
7567448b91 | ||
|
|
05bf666eb6 | ||
|
|
08d595b472 | ||
|
|
8db7a1420f | ||
|
|
4955c4b8f9 | ||
|
|
1ba6e1641a | ||
|
|
a33b689f2c | ||
|
|
9cfd7dea19 | ||
|
|
78bc62fd34 | ||
|
|
40dbcd09da | ||
|
|
f11b948019 | ||
|
|
99985ad6fc | ||
|
|
b66547e98c | ||
|
|
0845cbe277 | ||
|
|
ca1b77b2d5 | ||
|
|
88e7b9bf80 | ||
|
|
37c07d4f3f | ||
|
|
b261da0672 | ||
|
|
884b5fb4cf | ||
|
|
623466762e | ||
|
|
236bbe1183 | ||
|
|
65a51b401c | ||
|
|
a06f3c74fd | ||
|
|
3d58d7232a | ||
|
|
af10b0fec2 | ||
|
|
2b35615ffb | ||
|
|
ab48934e9c | ||
|
|
bde15cf080 | ||
|
|
72ca2b214d | ||
|
|
cbbd6cfa1e | ||
|
|
5f5880979e | ||
|
|
cc26148ba7 | ||
|
|
1c1c0691cc | ||
|
|
ca597e2bfb | ||
|
|
9fa35adbd4 | ||
|
|
629f134d38 | ||
|
|
e8d81c5acf | ||
|
|
d790670f4c | ||
|
|
1a53c648ed | ||
|
|
e7ab2bc553 | ||
|
|
c721354b73 | ||
|
|
02f70cda8a | ||
|
|
2dcff95bd2 | ||
|
|
dfbb8254ca | ||
|
|
7903a758a4 | ||
|
|
b4e44c4e80 | ||
|
|
eaac218d59 | ||
|
|
491d818f17 | ||
|
|
ec0846a00f | ||
|
|
227ead54be | ||
|
|
4a4ea92cf3 | ||
|
|
445b03384a | ||
|
|
ef6260b3a7 | ||
|
|
f2d601661d | ||
|
|
e743b30bbf | ||
|
|
46aa416c48 | ||
|
|
8c31eaf2a8 | ||
|
|
a2bd9f8912 | ||
|
|
e7bf8f3f04 | ||
|
|
1fb308ceee | ||
|
|
3919afcad2 | ||
|
|
2c697e50db | ||
|
|
5911962842 | ||
|
|
8e554561df | ||
|
|
32c988a2d7 | ||
|
|
916d24cd21 | ||
|
|
4b16874f04 | ||
|
|
ee982ae162 | ||
|
|
0efe441dfd | ||
|
|
54c06bf715 | ||
|
|
8fc6e31567 | ||
|
|
f9be970375 | ||
|
|
57edd4dcb3 | ||
|
|
1c236b0766 | ||
|
|
fee875770c | ||
|
|
76239f2089 | ||
|
|
0d5ff6f462 | ||
|
|
68238d5678 | ||
|
|
dd20f558f0 | ||
|
|
c88e6e8aee | ||
|
|
5d9e3cb77f | ||
|
|
38aed5af8b | ||
|
|
aa043a6339 | ||
|
|
79ad1d9610 | ||
|
|
0090c2d70b | ||
|
|
f99b8b006f | ||
|
|
6940212ecb | ||
|
|
917e6fe370 | ||
|
|
40e4e42a66 | ||
|
|
44d8d654a0 | ||
|
|
ec59f76526 | ||
|
|
b7bc259093 | ||
|
|
893f1088df | ||
|
|
15075d2c3d | ||
|
|
31abef172a | ||
|
|
21ce559c9c | ||
|
|
73ee434c8c | ||
|
|
61718742f7 | ||
|
|
ef5e7bb469 | ||
|
|
abf5ae6897 | ||
|
|
b8f59e14cd | ||
|
|
39d5f44863 | ||
|
|
cfb2c67692 | ||
|
|
15eb28efaf | ||
|
|
4ec856b0f0 | ||
|
|
406496ca33 | ||
|
|
13b36d458f | ||
|
|
5f518dbeb9 | ||
|
|
02b61224b2 | ||
|
|
e6d4028a84 | ||
|
|
3a11757d57 | ||
|
|
a795e4ce32 | ||
|
|
44f38b04b0 | ||
|
|
5161d70620 | ||
|
|
40ca39d3d5 | ||
|
|
3ef6a00bb8 | ||
|
|
0995a81b8b | ||
|
|
d6b7d9090e | ||
|
|
7eea20bc50 | ||
|
|
5e9f87c3bd | ||
|
|
1c60f31450 | ||
|
|
96c9a9bdb3 | ||
|
|
2b5e0b66a2 | ||
|
|
abe6c2d585 | ||
|
|
f66ac9a5e7 | ||
|
|
ad3e2a5da0 | ||
|
|
47be64af5a | ||
|
|
f7670882af | ||
|
|
3671582c15 | ||
|
|
e5bc5873d7 | ||
|
|
b71a56c9df | ||
|
|
b3a6faf13e | ||
|
|
ad2527d47a | ||
|
|
c2838f2442 | ||
|
|
b8dcdc75c1 | ||
|
|
470ec91164 | ||
|
|
fa7072f3f2 | ||
|
|
cf39819478 | ||
|
|
cacba6f435 | ||
|
|
b52bffcf8d | ||
|
|
5b7ae86af4 | ||
|
|
517e8ea426 | ||
|
|
ddd04a7b46 | ||
|
|
ec27d3b4ba | ||
|
|
a7e10565fc | ||
|
|
890bf001db | ||
|
|
9f5c0b6e60 | ||
|
|
2eede35577 | ||
|
|
d8469e3c7c | ||
|
|
a1a69f24c8 | ||
|
|
61497893d3 | ||
|
|
613c4dbf58 | ||
|
|
8be606ec80 | ||
|
|
a76023bcd8 | ||
|
|
90714a3831 | ||
|
|
21e8ecbafa | ||
|
|
2c25c8aeed | ||
|
|
ea623f2d39 | ||
|
|
4b64c777ee | ||
|
|
0fc645ab70 | ||
|
|
0b8f19fcba | ||
|
|
0aae96b5f0 | ||
|
|
4556f44806 | ||
|
|
4aed7ea6f8 | ||
|
|
48cd808185 | ||
|
|
a4e8e55908 | ||
|
|
1d6f54cc7d | ||
|
|
fa2c3be341 | ||
|
|
db0fc32ab2 | ||
|
|
2e6125bc66 | ||
|
|
7a3fd89d25 | ||
|
|
dfd5363494 | ||
|
|
7af53d0826 | ||
|
|
1eda7e0fde | ||
|
|
5aefabe045 | ||
|
|
2774fe8a1b | ||
|
|
4bdfc8a10a | ||
|
|
24c2ff5cae | ||
|
|
c58f5a4742 | ||
|
|
b897e98d30 | ||
|
|
ee9ab15679 | ||
|
|
3974b7d31d | ||
|
|
f59dcc5546 | ||
|
|
8b280d5b31 | ||
|
|
821cbd7a1b | ||
|
|
73b4f09845 | ||
|
|
66a24c9c00 | ||
|
|
fa7b33549e | ||
|
|
6a95b88d1b | ||
|
|
84d8584c5b | ||
|
|
92fc5947fc | ||
|
|
5a4968484b | ||
|
|
6ec9933fd8 | ||
|
|
4cf86b4a94 | ||
|
|
c9ac607bd3 | ||
|
|
7b57454cc1 | ||
|
|
d973b40884 | ||
|
|
f0df2cdde9 | ||
|
|
793f031c4c | ||
|
|
fa024f8092 | ||
|
|
971a6fc531 | ||
|
|
e7e7625633 | ||
|
|
9be9f1ad20 | ||
|
|
1f3ed5cf27 | ||
|
|
2659ac01be | ||
|
|
4551489b92 | ||
|
|
a2315cfbfc | ||
|
|
51fdbf7f8c | ||
|
|
304f6f1d01 | ||
|
|
32ea6f54e5 | ||
|
|
7ec783876a | ||
|
|
eb3726c502 | ||
|
|
3268276b58 | ||
|
|
465e75bc5a | ||
|
|
899e36489d | ||
|
|
891c93c118 | ||
|
|
d4f791cf6c | ||
|
|
24cd99160c | ||
|
|
19343db593 | ||
|
|
d1d3dbc7b5 | ||
|
|
3c094116aa | ||
|
|
98a95cc698 | ||
|
|
5a8d87f504 | ||
|
|
598170756c | ||
|
|
632d143bff | ||
|
|
66717db735 | ||
|
|
de1f57926f | ||
|
|
3182857224 | ||
|
|
32ba0dcea9 | ||
|
|
e7c04b6df2 | ||
|
|
bb39e503c0 | ||
|
|
ad438cfd40 | ||
|
|
e932919e68 | ||
|
|
a9d2a7f002 | ||
|
|
e91136d61f | ||
|
|
6f85c860c6 | ||
|
|
38658a597b | ||
|
|
dde7cc52d2 | ||
|
|
17e68c4a11 | ||
|
|
2200c2de6f | ||
|
|
5d9a8cbe1a | ||
|
|
e08a4ea62d | ||
|
|
2c35e24bd9 | ||
|
|
79d50d9933 | ||
|
|
f377b1e886 | ||
|
|
7f0b12a481 | ||
|
|
25ad0b47e2 | ||
|
|
d1ef17e3cd | ||
|
|
1b120d1e49 | ||
|
|
21a552682e | ||
|
|
35dc987dc8 | ||
|
|
31d632b72e | ||
|
|
7e65fe7ac3 | ||
|
|
d008e209e7 | ||
|
|
e1c03d9e8e | ||
|
|
0cd63b28f3 | ||
|
|
477e3f64fc | ||
|
|
5c8600d790 | ||
|
|
8eae4a0967 | ||
|
|
5a800db48c | ||
|
|
a45db7e853 | ||
|
|
5b156031e9 | ||
|
|
5c988cc722 | ||
|
|
36ff26609b | ||
|
|
6281a511e1 | ||
|
|
c741d3f4b2 | ||
|
|
d9d61ed563 | ||
|
|
1d77f8db28 | ||
|
|
0ea50b3157 | ||
|
|
aafb832288 | ||
|
|
d15b02a6b6 | ||
|
|
4e3d7fc4bc | ||
|
|
192a039173 | ||
|
|
6bfaf262d5 | ||
|
|
08dd1b553b | ||
|
|
6fece14cfb | ||
|
|
2a25398c86 | ||
|
|
86397a532e | ||
|
|
f0a1753607 | ||
|
|
d6a74ed463 | ||
|
|
fb96c3e73e | ||
|
|
4183c6f1a5 | ||
|
|
9dae796fe3 | ||
|
|
79c2ceb2d5 | ||
|
|
e0a1466bd8 | ||
|
|
2031989d98 | ||
|
|
8b716f941d | ||
|
|
87ba004d46 | ||
|
|
c8302a5a0e | ||
|
|
0082989f22 | ||
|
|
3cbc244e98 | ||
|
|
74a12d818d | ||
|
|
45ab752f9a | ||
|
|
fe66046283 | ||
|
|
39c7bd24e4 | ||
|
|
760b307e8a | ||
|
|
e44a11341d | ||
|
|
0e7fb69bea | ||
|
|
ea805c5fe7 | ||
|
|
d917815d81 | ||
|
|
8bc523219c | ||
|
|
d686113bd2 | ||
|
|
1cc450e6e7 | ||
|
|
1435604b84 | ||
|
|
2a8160e80f | ||
|
|
9d9201c3b4 | ||
|
|
27ba4f0baf | ||
|
|
c627f9ea18 | ||
|
|
1fdebc1dc4 | ||
|
|
3f65887974 | ||
|
|
ab44939941 | ||
|
|
39e96d933e | ||
|
|
68f6332343 | ||
|
|
6bc0561d13 | ||
|
|
75ecfdf66d | ||
|
|
c9e01412a4 | ||
|
|
f55fdc00fc | ||
|
|
84619abe9f | ||
|
|
d295279b16 | ||
|
|
0003cc8105 | ||
|
|
24c2703dfa | ||
|
|
cdaa735b2b | ||
|
|
2b9302107f | ||
|
|
cd5fd653d7 | ||
|
|
caa6433b56 | ||
|
|
23058d8b43 | ||
|
|
ed4d94a5d5 | ||
|
|
c4e7d05ce3 | ||
|
|
d8d379faf1 | ||
|
|
a3a590a32a | ||
|
|
fd6a36a235 | ||
|
|
9a51091a5c | ||
|
|
f951bcf01b | ||
|
|
53d58684a6 | ||
|
|
f210f17d30 | ||
|
|
4b1d120f58 | ||
|
|
dc2e3465c7 | ||
|
|
22dc713a2f | ||
|
|
990db3c35a | ||
|
|
62ead3a2ee | ||
|
|
e9efa74333 | ||
|
|
f466e539ef | ||
|
|
d431b0fb99 | ||
|
|
5ead3244a2 | ||
|
|
756a218e27 | ||
|
|
18cf256817 | ||
|
|
3577a80bb6 | ||
|
|
0dd3bbf6e8 | ||
|
|
182cf5b8de | ||
|
|
dc502042d5 | ||
|
|
37fbf01755 | ||
|
|
18b8a625f5 | ||
|
|
7c91a055c1 | ||
|
|
62c25af802 | ||
|
|
04612d25d7 | ||
|
|
249cb2aa30 | ||
|
|
f464fe14f4 | ||
|
|
bb6b4f8db2 | ||
|
|
c49ee9f632 | ||
|
|
2715f6fdb8 | ||
|
|
b66fb7ceae | ||
|
|
6018dbd339 | ||
|
|
960914a073 | ||
|
|
63a2f9a8b2 | ||
|
|
74cea89fce | ||
|
|
08d531143f | ||
|
|
3976981ab3 | ||
|
|
7297baa9c6 | ||
|
|
07898cc0df | ||
|
|
798dbe487b | ||
|
|
31d9dc3539 | ||
|
|
fe39ca01bc | ||
|
|
5ebed1c9ee | ||
|
|
42004f9013 | ||
|
|
a966275e50 | ||
|
|
67e6d9bc30 | ||
|
|
fea016afc0 | ||
|
|
76f3bdfff8 | ||
|
|
dd3437d5ba | ||
|
|
54138d9e82 | ||
|
|
084911d9b3 | ||
|
|
942b250895 | ||
|
|
05d9f07541 | ||
|
|
fce435db26 | ||
|
|
5a8367e892 | ||
|
|
0ad1239522 | ||
|
|
137c8f5e8a | ||
|
|
98237f7c0b | ||
|
|
54a0db0daf | ||
|
|
67b8b00c44 | ||
|
|
921825b4c0 | ||
|
|
cf96b6f87b | ||
|
|
bdd5241615 | ||
|
|
a206ef34bb | ||
|
|
ddca67a2b9 | ||
|
|
fa45407e78 | ||
|
|
9ea23272fa | ||
|
|
77b530b50a | ||
|
|
b4120423a5 | ||
|
|
264c86853b | ||
|
|
b62e2fd870 | ||
|
|
5786df933d | ||
|
|
210d0017c4 | ||
|
|
7729f6cf4e | ||
|
|
716b0b8655 | ||
|
|
ccea8dcbf6 | ||
|
|
88a7f203f9 | ||
|
|
418b0967fc | ||
|
|
afe3aae582 | ||
|
|
d4cd4a35d5 | ||
|
|
91ef68f9b1 | ||
|
|
7066283004 | ||
|
|
26096e79d1 | ||
|
|
8d5c4a67a7 | ||
|
|
e745544dac | ||
|
|
f537c51f25 | ||
|
|
1ab84a27d3 | ||
|
|
3e8fd24547 | ||
|
|
57049b95b3 | ||
|
|
04472f57be | ||
|
|
671183fa9a | ||
|
|
93fac5f257 | ||
|
|
9a8545d590 | ||
|
|
aa2ca3f02c | ||
|
|
1b0e90f70b | ||
|
|
687e359291 | ||
|
|
df0e875856 | ||
|
|
a2afcae9ff | ||
|
|
48ebb65cc7 | ||
|
|
b390929826 | ||
|
|
bf20b260ce | ||
|
|
18eb3c5f90 | ||
|
|
cd36baf530 | ||
|
|
40480c7cdc | ||
|
|
68312afcdf | ||
|
|
741504862c | ||
|
|
5a25505668 | ||
|
|
afbbdb2c67 | ||
|
|
a44283b0b2 | ||
|
|
339d48ac15 | ||
|
|
3e835973db | ||
|
|
b50c676f76 | ||
|
|
a7d07d412c | ||
|
|
d108689382 | ||
|
|
1473eb9ae0 | ||
|
|
5d2ba056c8 | ||
|
|
e26ccd5166 | ||
|
|
f12d36641f | ||
|
|
018cbff438 | ||
|
|
3740bfa3bf | ||
|
|
a984040fae | ||
|
|
9a4da6c8d8 | ||
|
|
f276fd0f37 | ||
|
|
7a74dbadd7 | ||
|
|
371e578151 | ||
|
|
5029322aa1 | ||
|
|
6ffaf4c2e2 | ||
|
|
012ce1481e | ||
|
|
4c2cdf6f4d | ||
|
|
c1fde0e8c4 | ||
|
|
6356724057 | ||
|
|
03732860be | ||
|
|
df07361642 | ||
|
|
57cd074959 | ||
|
|
1f28fcdec5 | ||
|
|
b3b4786c24 | ||
|
|
98e3c7911c | ||
|
|
a890380b6a | ||
|
|
ca87f55a7b | ||
|
|
5ae53dacfb | ||
|
|
5895204c99 | ||
|
|
87407b54b6 | ||
|
|
1403748fd8 | ||
|
|
df89291d1c | ||
|
|
019f18058b | ||
|
|
403a46abcc | ||
|
|
6bf1547148 | ||
|
|
029d37d6a7 | ||
|
|
20c2dbdbd3 | ||
|
|
1ac46fac15 | ||
|
|
e9d7623e1f | ||
|
|
336d81894d | ||
|
|
52521d5f67 | ||
|
|
7ea669e04c | ||
|
|
4f8c26d2c6 | ||
|
|
53b18c8542 | ||
|
|
4543413491 | ||
|
|
3a600297ca | ||
|
|
634e8713c3 | ||
|
|
f0dafd3f20 | ||
|
|
52a2992862 | ||
|
|
48c8181886 | ||
|
|
bd6f8d99c5 | ||
|
|
4cc9fe90a8 | ||
|
|
f0d755153d | ||
|
|
4a6df04f75 | ||
|
|
75afe73c66 | ||
|
|
d4c1ed95ac | ||
|
|
1890760206 | ||
|
|
1ef6cb1b64 | ||
|
|
795703a39c | ||
|
|
a31b23c46b | ||
|
|
dc8b011d61 | ||
|
|
4a82c2d124 | ||
|
|
188fd47a51 | ||
|
|
9a99bd3a71 | ||
|
|
94ec2190f8 | ||
|
|
abff1b6884 | ||
|
|
f7837f445e | ||
|
|
38f05d4ac5 | ||
|
|
b35689b70d | ||
|
|
25972be45c | ||
|
|
9e234d4208 | ||
|
|
7a96aca39e | ||
|
|
1a414c7b6b | ||
|
|
540bac0928 | ||
|
|
4c306af4eb | ||
|
|
f1072d0d9f | ||
|
|
6663638195 | ||
|
|
f4ca3482f1 | ||
|
|
c7053bea20 | ||
|
|
300c83d893 | ||
|
|
5f28a9d238 | ||
|
|
6764a45223 | ||
|
|
73fe5f63c6 | ||
|
|
1e784839f1 | ||
|
|
1828f8eb8e | ||
|
|
262b7428cf | ||
|
|
4f4da3c10c | ||
|
|
d7e28f991d | ||
|
|
643ab90ace | ||
|
|
03a0dfbeca | ||
|
|
92751d5e24 | ||
|
|
c1642355f0 | ||
|
|
a1af7cbf00 | ||
|
|
175893913d | ||
|
|
9b91c76088 | ||
|
|
b3d12d2c9e | ||
|
|
3c3af4b332 | ||
|
|
a27c0dd591 | ||
|
|
78cb4ce030 | ||
|
|
79193ffed2 |
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*.zig text eol=lf
|
||||
918
CMakeLists.txt
918
CMakeLists.txt
File diff suppressed because it is too large
Load Diff
105
README.md
105
README.md
@@ -5,8 +5,6 @@ clarity.
|
||||
|
||||
[ziglang.org](http://ziglang.org)
|
||||
|
||||
[Documentation](http://ziglang.org/documentation/)
|
||||
|
||||
## Feature Highlights
|
||||
|
||||
* Small, simple language. Focus on debugging your application rather than
|
||||
@@ -26,7 +24,7 @@ clarity.
|
||||
always compiled against statically in source form. Compile units do not
|
||||
depend on libc unless explicitly linked.
|
||||
* Nullable type instead of null pointers.
|
||||
* Tagged union type instead of raw unions.
|
||||
* Safe unions, tagged unions, and C ABI compatible unions.
|
||||
* Generics so that one can write efficient data structures that work for any
|
||||
data type.
|
||||
* No header files required. Top level declarations are entirely
|
||||
@@ -35,7 +33,7 @@ clarity.
|
||||
* Partial compile-time function evaluation with eliminates the need for
|
||||
a preprocessor or macros.
|
||||
* The binaries produced by Zig have complete debugging information so you can,
|
||||
for example, use GDB to debug your software.
|
||||
for example, use GDB or MSVC to debug your software.
|
||||
* Built-in unit tests with `zig test`.
|
||||
* Friendly toward package maintainers. Reproducible build, bootstrapping
|
||||
process carefully documented. Issues filed by package maintainers are
|
||||
@@ -54,35 +52,21 @@ that counts as "freestanding" for the purposes of this table.
|
||||
|
||||
| | freestanding | linux | macosx | windows | other |
|
||||
|-------------|--------------|---------|---------|---------|---------|
|
||||
|i386 | OK | planned | OK | OK | planned |
|
||||
|i386 | OK | planned | OK | planned | planned |
|
||||
|x86_64 | OK | OK | OK | OK | planned |
|
||||
|arm | OK | planned | planned | N/A | planned |
|
||||
|aarch64 | OK | planned | planned | planned | planned |
|
||||
|avr | OK | planned | planned | N/A | planned |
|
||||
|bpf | OK | planned | planned | N/A | planned |
|
||||
|hexagon | OK | planned | planned | N/A | planned |
|
||||
|mips | OK | planned | planned | N/A | planned |
|
||||
|msp430 | OK | planned | planned | N/A | planned |
|
||||
|nios2 | OK | planned | planned | N/A | planned |
|
||||
|powerpc | OK | planned | planned | N/A | planned |
|
||||
|r600 | OK | planned | planned | N/A | planned |
|
||||
|amdgcn | OK | planned | planned | N/A | planned |
|
||||
|riscv | OK | planned | planned | N/A | planned |
|
||||
|sparc | OK | planned | planned | N/A | planned |
|
||||
|s390x | OK | planned | planned | N/A | planned |
|
||||
|tce | OK | planned | planned | N/A | planned |
|
||||
|thumb | OK | planned | planned | N/A | planned |
|
||||
|xcore | OK | planned | planned | N/A | planned |
|
||||
|nvptx | OK | planned | planned | N/A | planned |
|
||||
|le | OK | planned | planned | N/A | planned |
|
||||
|amdil | OK | planned | planned | N/A | planned |
|
||||
|hsail | OK | planned | planned | N/A | planned |
|
||||
|spir | OK | planned | planned | N/A | planned |
|
||||
|kalimba | OK | planned | planned | N/A | planned |
|
||||
|shave | OK | planned | planned | N/A | planned |
|
||||
|lanai | OK | planned | planned | N/A | planned |
|
||||
|wasm | OK | N/A | N/A | N/A | N/A |
|
||||
|renderscript | OK | N/A | N/A | N/A | N/A |
|
||||
|
||||
## Community
|
||||
|
||||
@@ -92,10 +76,10 @@ that counts as "freestanding" for the purposes of this table.
|
||||
|
||||
### Wanted: Windows Developers
|
||||
|
||||
Help get the tests passing on Windows, flesh out the standard library for
|
||||
Windows, streamline Zig installation and distribution for Windows. Work with
|
||||
LLVM and LLD teams to improve PDB/CodeView/MSVC debugging. Implement stack traces
|
||||
for Windows in the MinGW environment and the MSVC environment.
|
||||
Flesh out the standard library for Windows, streamline Zig installation and
|
||||
distribution for Windows. Work with LLVM and LLD teams to improve
|
||||
PDB/CodeView/MSVC debugging. Implement stack traces for Windows in the MinGW
|
||||
environment and the MSVC environment.
|
||||
|
||||
### Wanted: MacOS and iOS Developers
|
||||
|
||||
@@ -133,31 +117,26 @@ libc. Create demo games using Zig.
|
||||
[](https://travis-ci.org/zig-lang/zig)
|
||||
[](https://ci.appveyor.com/project/andrewrk/zig-d3l86/branch/master)
|
||||
|
||||
### Dependencies
|
||||
### Stage 1: Build Zig from C++ Source Code
|
||||
|
||||
#### Build Dependencies
|
||||
|
||||
These compile tools must be available on your system and are used to build
|
||||
the Zig compiler itself:
|
||||
#### Dependencies
|
||||
|
||||
##### POSIX
|
||||
|
||||
* gcc >= 5.0.0 or clang >= 3.6.0
|
||||
* cmake >= 2.8.5
|
||||
* gcc >= 5.0.0 or clang >= 3.6.0
|
||||
* LLVM, Clang, LLD development libraries == 6.x, compiled with the same gcc or clang version above
|
||||
- These depend on zlib and libxml2.
|
||||
|
||||
##### Windows
|
||||
|
||||
* cmake >= 2.8.5
|
||||
* Microsoft Visual Studio 2015
|
||||
* LLVM, Clang, LLD development libraries == 6.x, compiled with the same MSVC version above
|
||||
|
||||
#### Library Dependencies
|
||||
#### Instructions
|
||||
|
||||
These libraries must be installed on your system, with the development files
|
||||
available. The Zig compiler links against them. You have to use the same
|
||||
compiler for these libraries as you do to compile Zig.
|
||||
|
||||
* LLVM, Clang, and LLD libraries == 5.x
|
||||
|
||||
### Debug / Development Build
|
||||
##### POSIX
|
||||
|
||||
If you have gcc or clang installed, you can find out what `ZIG_LIBC_LIB_DIR`,
|
||||
`ZIG_LIBC_STATIC_LIB_DIR`, and `ZIG_LIBC_INCLUDE_DIR` should be set to
|
||||
@@ -172,55 +151,51 @@ make install
|
||||
./zig build --build-file ../build.zig test
|
||||
```
|
||||
|
||||
#### MacOS
|
||||
##### MacOS
|
||||
|
||||
`ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_STATIC_LIB_DIR` are unused.
|
||||
|
||||
```
|
||||
brew install llvm@5
|
||||
brew outdated llvm@5 || brew upgrade llvm@5
|
||||
brew install cmake llvm@6
|
||||
brew outdated llvm@6 || brew upgrade llvm@6
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@5/ -DCMAKE_INSTALL_PREFIX=$(pwd)
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@6/ -DCMAKE_INSTALL_PREFIX=$(pwd)
|
||||
make install
|
||||
./zig build --build-file ../build.zig test
|
||||
```
|
||||
|
||||
#### Windows
|
||||
##### Windows
|
||||
|
||||
See https://github.com/zig-lang/zig/wiki/Building-Zig-on-Windows
|
||||
|
||||
### Release / Install Build
|
||||
### Stage 2: Build Self-Hosted Zig from Zig Source Code
|
||||
|
||||
Once installed, `ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_INCLUDE_DIR` can be overridden
|
||||
by the `--libc-lib-dir` and `--libc-include-dir` parameters to the zig binary.
|
||||
*Note: Stage 2 compiler is not complete. Beta users of Zig should use the
|
||||
Stage 1 compiler for now.*
|
||||
|
||||
Dependencies are the same as Stage 1, except now you have a working zig compiler.
|
||||
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release -DZIG_LIBC_LIB_DIR=/some/path -DZIG_LIBC_INCLUDE_DIR=/some/path -DZIG_LIBC_STATIC_INCLUDE_DIR=/some/path
|
||||
make
|
||||
sudo make install
|
||||
bin/zig build --build-file ../build.zig --prefix $(pwd)/stage2 install
|
||||
```
|
||||
|
||||
### Test Coverage
|
||||
This produces `./stage2/bin/zig` which can be used for testing and development.
|
||||
Once it is feature complete, it will be used to build stage 3 - the final compiler
|
||||
binary.
|
||||
|
||||
To see test coverage in Zig, configure with `-DZIG_TEST_COVERAGE=ON` as an
|
||||
additional parameter to the Debug build.
|
||||
### Stage 3: Rebuild Self-Hosted Zig Using the Self-Hosted Compiler
|
||||
|
||||
You must have `lcov` installed and available.
|
||||
This is the actual compiler binary that we will install to the system.
|
||||
|
||||
Then `make coverage`.
|
||||
#### Debug / Development Build
|
||||
|
||||
With GCC you will get a nice HTML view of the coverage data. With clang,
|
||||
the last step will fail, but you can execute
|
||||
`llvm-cov gcov $(find CMakeFiles/ -name "*.gcda")` and then inspect the
|
||||
produced .gcov files.
|
||||
```
|
||||
./stage2/bin/zig build --build-file ../build.zig --prefix $(pwd)/stage3 install
|
||||
```
|
||||
|
||||
### Related Projects
|
||||
#### Release / Install Build
|
||||
|
||||
* [zig-mode](https://github.com/AndreaOrru/zig-mode) - Emacs integration
|
||||
* [zig.vim](https://github.com/zig-lang/zig.vim) - Vim configuration files
|
||||
* [vscode-zig](https://github.com/zig-lang/vscode-zig) - Visual Studio Code extension
|
||||
* [zig-compiler-completions](https://github.com/tiehuis/zig-compiler-completions) - bash and zsh completions for the zig compiler
|
||||
* [NppExtension](https://github.com/ice1000/NppExtension) - Notepad++ syntax highlighting
|
||||
```
|
||||
./stage2/bin/zig build --build-file ../build.zig install -Drelease-fast
|
||||
```
|
||||
|
||||
216
build.zig
216
build.zig
@@ -1,10 +1,102 @@
|
||||
const Builder = @import("std").build.Builder;
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const Builder = std.build.Builder;
|
||||
const tests = @import("test/tests.zig");
|
||||
const os = std.os;
|
||||
const BufMap = std.BufMap;
|
||||
const warn = std.debug.warn;
|
||||
const mem = std.mem;
|
||||
const ArrayList = std.ArrayList;
|
||||
const Buffer = std.Buffer;
|
||||
const io = std.io;
|
||||
|
||||
pub fn build(b: &Builder) !void {
|
||||
const mode = b.standardReleaseOptions();
|
||||
|
||||
var docgen_exe = b.addExecutable("docgen", "doc/docgen.zig");
|
||||
|
||||
const rel_zig_exe = try os.path.relative(b.allocator, b.build_root, b.zig_exe);
|
||||
var docgen_cmd = b.addCommand(null, b.env_map, [][]const u8 {
|
||||
docgen_exe.getOutputPath(),
|
||||
rel_zig_exe,
|
||||
"doc/langref.html.in",
|
||||
os.path.join(b.allocator, b.cache_root, "langref.html") catch unreachable,
|
||||
});
|
||||
docgen_cmd.step.dependOn(&docgen_exe.step);
|
||||
|
||||
const docs_step = b.step("docs", "Build documentation");
|
||||
docs_step.dependOn(&docgen_cmd.step);
|
||||
|
||||
const test_step = b.step("test", "Run all the tests");
|
||||
|
||||
// find the stage0 build artifacts because we're going to re-use config.h and zig_cpp library
|
||||
const build_info = try b.exec([][]const u8{b.zig_exe, "BUILD_INFO"});
|
||||
var index: usize = 0;
|
||||
const cmake_binary_dir = nextValue(&index, build_info);
|
||||
const cxx_compiler = nextValue(&index, build_info);
|
||||
const llvm_config_exe = nextValue(&index, build_info);
|
||||
const lld_include_dir = nextValue(&index, build_info);
|
||||
const lld_libraries = nextValue(&index, build_info);
|
||||
const std_files = nextValue(&index, build_info);
|
||||
const c_header_files = nextValue(&index, build_info);
|
||||
const dia_guids_lib = nextValue(&index, build_info);
|
||||
|
||||
const llvm = findLLVM(b, llvm_config_exe) catch unreachable;
|
||||
|
||||
var exe = b.addExecutable("zig", "src-self-hosted/main.zig");
|
||||
exe.setBuildMode(mode);
|
||||
exe.addIncludeDir("src");
|
||||
exe.addIncludeDir(cmake_binary_dir);
|
||||
addCppLib(b, exe, cmake_binary_dir, "zig_cpp");
|
||||
if (lld_include_dir.len != 0) {
|
||||
exe.addIncludeDir(lld_include_dir);
|
||||
var it = mem.split(lld_libraries, ";");
|
||||
while (it.next()) |lib| {
|
||||
exe.addObjectFile(lib);
|
||||
}
|
||||
} else {
|
||||
addCppLib(b, exe, cmake_binary_dir, "embedded_lld_elf");
|
||||
addCppLib(b, exe, cmake_binary_dir, "embedded_lld_coff");
|
||||
addCppLib(b, exe, cmake_binary_dir, "embedded_lld_lib");
|
||||
}
|
||||
dependOnLib(exe, llvm);
|
||||
|
||||
if (exe.target.getOs() == builtin.Os.linux) {
|
||||
const libstdcxx_path_padded = try b.exec([][]const u8{cxx_compiler, "-print-file-name=libstdc++.a"});
|
||||
const libstdcxx_path = ??mem.split(libstdcxx_path_padded, "\r\n").next();
|
||||
exe.addObjectFile(libstdcxx_path);
|
||||
|
||||
exe.linkSystemLibrary("pthread");
|
||||
} else if (exe.target.isDarwin()) {
|
||||
exe.linkSystemLibrary("c++");
|
||||
}
|
||||
|
||||
if (dia_guids_lib.len != 0) {
|
||||
exe.addObjectFile(dia_guids_lib);
|
||||
}
|
||||
|
||||
if (exe.target.getOs() != builtin.Os.windows) {
|
||||
exe.linkSystemLibrary("xml2");
|
||||
}
|
||||
exe.linkSystemLibrary("c");
|
||||
|
||||
b.default_step.dependOn(&exe.step);
|
||||
|
||||
const skip_self_hosted = b.option(bool, "skip-self-hosted", "Main test suite skips building self hosted compiler") ?? false;
|
||||
if (!skip_self_hosted) {
|
||||
test_step.dependOn(&exe.step);
|
||||
}
|
||||
const verbose_link_exe = b.option(bool, "verbose-link", "Print link command for self hosted compiler") ?? false;
|
||||
exe.setVerboseLink(verbose_link_exe);
|
||||
|
||||
b.installArtifact(exe);
|
||||
installStdLib(b, std_files);
|
||||
installCHeaders(b, c_header_files);
|
||||
|
||||
pub fn build(b: &Builder) {
|
||||
const test_filter = b.option([]const u8, "test-filter", "Skip tests that do not match filter");
|
||||
const with_lldb = b.option(bool, "with-lldb", "Run tests in LLDB to get a backtrace if one fails") ?? false;
|
||||
const test_step = b.step("test", "Run all the tests");
|
||||
|
||||
test_step.dependOn(docs_step);
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter,
|
||||
"test/behavior.zig", "behavior", "Run the behavior tests",
|
||||
@@ -22,6 +114,120 @@ pub fn build(b: &Builder) {
|
||||
test_step.dependOn(tests.addBuildExampleTests(b, test_filter));
|
||||
test_step.dependOn(tests.addCompileErrorTests(b, test_filter));
|
||||
test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter));
|
||||
test_step.dependOn(tests.addDebugSafetyTests(b, test_filter));
|
||||
test_step.dependOn(tests.addParseCTests(b, test_filter));
|
||||
test_step.dependOn(tests.addRuntimeSafetyTests(b, test_filter));
|
||||
test_step.dependOn(tests.addTranslateCTests(b, test_filter));
|
||||
test_step.dependOn(tests.addGenHTests(b, test_filter));
|
||||
}
|
||||
|
||||
fn dependOnLib(lib_exe_obj: &std.build.LibExeObjStep, dep: &const LibraryDep) void {
|
||||
for (dep.libdirs.toSliceConst()) |lib_dir| {
|
||||
lib_exe_obj.addLibPath(lib_dir);
|
||||
}
|
||||
for (dep.system_libs.toSliceConst()) |lib| {
|
||||
lib_exe_obj.linkSystemLibrary(lib);
|
||||
}
|
||||
for (dep.libs.toSliceConst()) |lib| {
|
||||
lib_exe_obj.addObjectFile(lib);
|
||||
}
|
||||
for (dep.includes.toSliceConst()) |include_path| {
|
||||
lib_exe_obj.addIncludeDir(include_path);
|
||||
}
|
||||
}
|
||||
|
||||
fn addCppLib(b: &Builder, lib_exe_obj: &std.build.LibExeObjStep, cmake_binary_dir: []const u8, lib_name: []const u8) void {
|
||||
const lib_prefix = if (lib_exe_obj.target.isWindows()) "" else "lib";
|
||||
lib_exe_obj.addObjectFile(os.path.join(b.allocator, cmake_binary_dir, "zig_cpp",
|
||||
b.fmt("{}{}{}", lib_prefix, lib_name, lib_exe_obj.target.libFileExt())) catch unreachable);
|
||||
}
|
||||
|
||||
const LibraryDep = struct {
|
||||
libdirs: ArrayList([]const u8),
|
||||
libs: ArrayList([]const u8),
|
||||
system_libs: ArrayList([]const u8),
|
||||
includes: ArrayList([]const u8),
|
||||
};
|
||||
|
||||
fn findLLVM(b: &Builder, llvm_config_exe: []const u8) !LibraryDep {
|
||||
const libs_output = try b.exec([][]const u8{llvm_config_exe, "--libs", "--system-libs"});
|
||||
const includes_output = try b.exec([][]const u8{llvm_config_exe, "--includedir"});
|
||||
const libdir_output = try b.exec([][]const u8{llvm_config_exe, "--libdir"});
|
||||
|
||||
var result = LibraryDep {
|
||||
.libs = ArrayList([]const u8).init(b.allocator),
|
||||
.system_libs = ArrayList([]const u8).init(b.allocator),
|
||||
.includes = ArrayList([]const u8).init(b.allocator),
|
||||
.libdirs = ArrayList([]const u8).init(b.allocator),
|
||||
};
|
||||
{
|
||||
var it = mem.split(libs_output, " \r\n");
|
||||
while (it.next()) |lib_arg| {
|
||||
if (mem.startsWith(u8, lib_arg, "-l")) {
|
||||
try result.system_libs.append(lib_arg[2..]);
|
||||
} else {
|
||||
if (os.path.isAbsolute(lib_arg)) {
|
||||
try result.libs.append(lib_arg);
|
||||
} else {
|
||||
try result.system_libs.append(lib_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
var it = mem.split(includes_output, " \r\n");
|
||||
while (it.next()) |include_arg| {
|
||||
if (mem.startsWith(u8, include_arg, "-I")) {
|
||||
try result.includes.append(include_arg[2..]);
|
||||
} else {
|
||||
try result.includes.append(include_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
var it = mem.split(libdir_output, " \r\n");
|
||||
while (it.next()) |libdir| {
|
||||
if (mem.startsWith(u8, libdir, "-L")) {
|
||||
try result.libdirs.append(libdir[2..]);
|
||||
} else {
|
||||
try result.libdirs.append(libdir);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn installStdLib(b: &Builder, stdlib_files: []const u8) void {
|
||||
var it = mem.split(stdlib_files, ";");
|
||||
while (it.next()) |stdlib_file| {
|
||||
const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
|
||||
const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
|
||||
b.installFile(src_path, dest_path);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn installCHeaders(b: &Builder, c_header_files: []const u8) void {
|
||||
var it = mem.split(c_header_files, ";");
|
||||
while (it.next()) |c_header_file| {
|
||||
const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
|
||||
const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
|
||||
b.installFile(src_path, dest_path);
|
||||
}
|
||||
}
|
||||
|
||||
fn nextValue(index: &usize, build_info: []const u8) []const u8 {
|
||||
const start = *index;
|
||||
while (true) : (*index += 1) {
|
||||
switch (build_info[*index]) {
|
||||
'\n' => {
|
||||
const result = build_info[start..*index];
|
||||
*index += 1;
|
||||
return result;
|
||||
},
|
||||
'\r' => {
|
||||
const result = build_info[start..*index];
|
||||
*index += 2;
|
||||
return result;
|
||||
},
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,15 +131,6 @@ __DEVICE__ float ldexp(float __arg, int __exp) {
|
||||
__DEVICE__ float log(float __x) { return ::logf(__x); }
|
||||
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
|
||||
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
||||
__DEVICE__ float nexttoward(float __from, double __to) {
|
||||
return __builtin_nexttowardf(__from, __to);
|
||||
}
|
||||
__DEVICE__ double nexttoward(double __from, double __to) {
|
||||
return __builtin_nexttoward(__from, __to);
|
||||
}
|
||||
__DEVICE__ float nexttowardf(float __from, double __to) {
|
||||
return __builtin_nexttowardf(__from, __to);
|
||||
}
|
||||
__DEVICE__ float pow(float __base, float __exp) {
|
||||
return ::powf(__base, __exp);
|
||||
}
|
||||
@@ -157,6 +148,10 @@ __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
|
||||
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
||||
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
||||
|
||||
// Notably missing above is nexttoward. We omit it because
|
||||
// libdevice doesn't provide an implementation, and we don't want to be in the
|
||||
// business of implementing tricky libm functions in this header.
|
||||
|
||||
// Now we've defined everything we promised we'd define in
|
||||
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
|
||||
// fix up our math functions.
|
||||
@@ -295,13 +290,6 @@ ldexp(__T __x, int __exp) {
|
||||
return std::ldexp((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
nexttoward(__T __from, double __to) {
|
||||
return std::nexttoward((double)__from, __to);
|
||||
}
|
||||
|
||||
template <typename __T1, typename __T2>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<
|
||||
std::numeric_limits<__T1>::is_specialized &&
|
||||
@@ -388,7 +376,6 @@ using ::lrint;
|
||||
using ::lround;
|
||||
using ::nearbyint;
|
||||
using ::nextafter;
|
||||
using ::nexttoward;
|
||||
using ::pow;
|
||||
using ::remainder;
|
||||
using ::remquo;
|
||||
@@ -456,8 +443,6 @@ using ::lroundf;
|
||||
using ::modff;
|
||||
using ::nearbyintf;
|
||||
using ::nextafterf;
|
||||
using ::nexttowardf;
|
||||
using ::nexttowardf;
|
||||
using ::powf;
|
||||
using ::remainderf;
|
||||
using ::remquof;
|
||||
|
||||
@@ -34,23 +34,24 @@
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
#pragma push_macro("__MAKE_SHUFFLES")
|
||||
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask) \
|
||||
inline __device__ int __FnName(int __val, int __offset, \
|
||||
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \
|
||||
__Type) \
|
||||
inline __device__ int __FnName(int __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return __IntIntrinsic(__val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ float __FnName(float __val, int __offset, \
|
||||
inline __device__ float __FnName(float __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return __FloatIntrinsic(__val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ unsigned int __FnName(unsigned int __val, int __offset, \
|
||||
inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned int>( \
|
||||
::__FnName(static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ long long __FnName(long long __val, int __offset, \
|
||||
inline __device__ long long __FnName(long long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
struct __Bits { \
|
||||
int __a, __b; \
|
||||
@@ -65,12 +66,29 @@
|
||||
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
|
||||
return __ret; \
|
||||
} \
|
||||
inline __device__ long __FnName(long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
_Static_assert(sizeof(long) == sizeof(long long) || \
|
||||
sizeof(long) == sizeof(int)); \
|
||||
if (sizeof(long) == sizeof(long long)) { \
|
||||
return static_cast<long>( \
|
||||
::__FnName(static_cast<long long>(__val), __offset, __width)); \
|
||||
} else if (sizeof(long) == sizeof(int)) { \
|
||||
return static_cast<long>( \
|
||||
::__FnName(static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
} \
|
||||
inline __device__ unsigned long __FnName( \
|
||||
unsigned long __val, __Type __offset, int __width = warpSize) { \
|
||||
return static_cast<unsigned long>( \
|
||||
::__FnName(static_cast<long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ unsigned long long __FnName( \
|
||||
unsigned long long __val, int __offset, int __width = warpSize) { \
|
||||
unsigned long long __val, __Type __offset, int __width = warpSize) { \
|
||||
return static_cast<unsigned long long>(::__FnName( \
|
||||
static_cast<unsigned long long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ double __FnName(double __val, int __offset, \
|
||||
inline __device__ double __FnName(double __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
long long __tmp; \
|
||||
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
|
||||
@@ -81,17 +99,166 @@
|
||||
return __ret; \
|
||||
}
|
||||
|
||||
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
|
||||
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);
|
||||
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
|
||||
// maxLane.
|
||||
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0);
|
||||
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f);
|
||||
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
|
||||
|
||||
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,
|
||||
unsigned int);
|
||||
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,
|
||||
unsigned int);
|
||||
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
|
||||
int);
|
||||
#pragma pop_macro("__MAKE_SHUFFLES")
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
#if CUDA_VERSION >= 9000
|
||||
#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
|
||||
// __shfl_sync_* variants available in CUDA-9
|
||||
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
|
||||
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
|
||||
__Mask, __Type) \
|
||||
inline __device__ int __FnName(unsigned int __mask, int __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
return __IntIntrinsic(__mask, __val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ float __FnName(unsigned int __mask, float __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
return __FloatIntrinsic(__mask, __val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ unsigned int __FnName(unsigned int __mask, \
|
||||
unsigned int __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned int>( \
|
||||
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ long long __FnName(unsigned int __mask, long long __val, \
|
||||
__Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
struct __Bits { \
|
||||
int __a, __b; \
|
||||
}; \
|
||||
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
|
||||
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
|
||||
__Bits __tmp; \
|
||||
memcpy(&__val, &__tmp, sizeof(__val)); \
|
||||
__tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \
|
||||
__tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \
|
||||
long long __ret; \
|
||||
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
|
||||
return __ret; \
|
||||
} \
|
||||
inline __device__ unsigned long long __FnName( \
|
||||
unsigned int __mask, unsigned long long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned long long>(::__FnName( \
|
||||
__mask, static_cast<unsigned long long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ long __FnName(unsigned int __mask, long __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
_Static_assert(sizeof(long) == sizeof(long long) || \
|
||||
sizeof(long) == sizeof(int)); \
|
||||
if (sizeof(long) == sizeof(long long)) { \
|
||||
return static_cast<long>(::__FnName( \
|
||||
__mask, static_cast<long long>(__val), __offset, __width)); \
|
||||
} else if (sizeof(long) == sizeof(int)) { \
|
||||
return static_cast<long>( \
|
||||
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
} \
|
||||
inline __device__ unsigned long __FnName( \
|
||||
unsigned int __mask, unsigned long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned long>( \
|
||||
::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ double __FnName(unsigned int __mask, double __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
long long __tmp; \
|
||||
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
|
||||
memcpy(&__tmp, &__val, sizeof(__val)); \
|
||||
__tmp = ::__FnName(__mask, __tmp, __offset, __width); \
|
||||
double __ret; \
|
||||
memcpy(&__ret, &__tmp, sizeof(__ret)); \
|
||||
return __ret; \
|
||||
}
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
|
||||
__nvvm_shfl_sync_idx_f32, 0x1f, int);
|
||||
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
|
||||
// maxLane.
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
|
||||
__nvvm_shfl_sync_up_f32, 0, unsigned int);
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
|
||||
__nvvm_shfl_sync_down_f32, 0x1f, unsigned int);
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
|
||||
__nvvm_shfl_sync_bfly_f32, 0x1f, int);
|
||||
#pragma pop_macro("__MAKE_SYNC_SHUFFLES")
|
||||
|
||||
inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
|
||||
return __nvvm_bar_warp_sync(mask);
|
||||
}
|
||||
|
||||
inline __device__ void __barrier_sync(unsigned int id) {
|
||||
__nvvm_barrier_sync(id);
|
||||
}
|
||||
|
||||
inline __device__ void __barrier_sync_count(unsigned int id,
|
||||
unsigned int count) {
|
||||
__nvvm_barrier_sync_cnt(id, count);
|
||||
}
|
||||
|
||||
inline __device__ int __all_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_all_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ int __any_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_any_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ int __uni_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_uni_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_ballot_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
|
||||
|
||||
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
|
||||
return __nvvm_fns(mask, base, offset);
|
||||
}
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
// Define __match* builtins CUDA-9 headers expect to see.
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
|
||||
inline __device__ unsigned int __match32_any_sync(unsigned int mask,
|
||||
unsigned int value) {
|
||||
return __nvvm_match_any_sync_i32(mask, value);
|
||||
}
|
||||
|
||||
inline __device__ unsigned long long
|
||||
__match64_any_sync(unsigned int mask, unsigned long long value) {
|
||||
return __nvvm_match_any_sync_i64(mask, value);
|
||||
}
|
||||
|
||||
inline __device__ unsigned int
|
||||
__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
|
||||
return __nvvm_match_all_sync_i32p(mask, value, pred);
|
||||
}
|
||||
|
||||
inline __device__ unsigned long long
|
||||
__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
|
||||
return __nvvm_match_all_sync_i64p(mask, value, pred);
|
||||
}
|
||||
#include "crt/sm_70_rt.hpp"
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
|
||||
#endif // __CUDA_VERSION >= 9000
|
||||
|
||||
// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
|
||||
|
||||
// Prevent the vanilla sm_32 intrinsics header from being included.
|
||||
|
||||
@@ -149,9 +149,6 @@ __DEVICE__ double nearbyint(double);
|
||||
__DEVICE__ float nearbyint(float);
|
||||
__DEVICE__ double nextafter(double, double);
|
||||
__DEVICE__ float nextafter(float, float);
|
||||
__DEVICE__ double nexttoward(double, double);
|
||||
__DEVICE__ float nexttoward(float, double);
|
||||
__DEVICE__ float nexttowardf(float, double);
|
||||
__DEVICE__ double pow(double, double);
|
||||
__DEVICE__ double pow(double, int);
|
||||
__DEVICE__ float pow(float, float);
|
||||
@@ -185,6 +182,10 @@ __DEVICE__ float tgamma(float);
|
||||
__DEVICE__ double trunc(double);
|
||||
__DEVICE__ float trunc(float);
|
||||
|
||||
// Notably missing above is nexttoward, which we don't define on
|
||||
// the device side because libdevice doesn't give us an implementation, and we
|
||||
// don't want to be in the business of writing one ourselves.
|
||||
|
||||
// We need to define these overloads in exactly the namespace our standard
|
||||
// library uses (including the right inline namespace), otherwise they won't be
|
||||
// picked up by other functions in the standard library (e.g. functions in
|
||||
@@ -255,7 +256,6 @@ using ::nan;
|
||||
using ::nanf;
|
||||
using ::nearbyint;
|
||||
using ::nextafter;
|
||||
using ::nexttoward;
|
||||
using ::pow;
|
||||
using ::remainder;
|
||||
using ::remquo;
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#include "cuda.h"
|
||||
#if !defined(CUDA_VERSION)
|
||||
#error "cuda.h did not define CUDA_VERSION"
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9000
|
||||
#error "Unsupported CUDA version!"
|
||||
#endif
|
||||
|
||||
@@ -86,7 +86,11 @@
|
||||
#define __COMMON_FUNCTIONS_H__
|
||||
|
||||
#undef __CUDACC__
|
||||
#if CUDA_VERSION < 9000
|
||||
#define __CUDABE__
|
||||
#else
|
||||
#define __CUDA_LIBDEVICE__
|
||||
#endif
|
||||
// Disables definitions of device-side runtime support stubs in
|
||||
// cuda_device_runtime_api.h
|
||||
#include "driver_types.h"
|
||||
@@ -94,6 +98,7 @@
|
||||
#include "host_defines.h"
|
||||
|
||||
#undef __CUDABE__
|
||||
#undef __CUDA_LIBDEVICE__
|
||||
#define __CUDACC__
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
@@ -105,7 +110,9 @@
|
||||
#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
|
||||
#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
|
||||
|
||||
#if CUDA_VERSION < 9000
|
||||
#include "crt/device_runtime.h"
|
||||
#endif
|
||||
#include "crt/host_runtime.h"
|
||||
// device_runtime.h defines __cxa_* macros that will conflict with
|
||||
// cxxabi.h.
|
||||
@@ -166,7 +173,18 @@ inline __host__ double __signbitd(double x) {
|
||||
// __device__.
|
||||
#pragma push_macro("__forceinline__")
|
||||
#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
|
||||
|
||||
#pragma push_macro("__float2half_rn")
|
||||
#if CUDA_VERSION >= 9000
|
||||
// CUDA-9 has conflicting prototypes for __float2half_rn(float f) in
|
||||
// cuda_fp16.h[pp] and device_functions.hpp. We need to get the one in
|
||||
// device_functions.hpp out of the way.
|
||||
#define __float2half_rn __float2half_rn_disabled
|
||||
#endif
|
||||
|
||||
#include "device_functions.hpp"
|
||||
#pragma pop_macro("__float2half_rn")
|
||||
|
||||
|
||||
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
|
||||
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
|
||||
@@ -247,7 +265,23 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
|
||||
#pragma push_macro("__GNUC__")
|
||||
#undef __GNUC__
|
||||
#define signbit __ignored_cuda_signbit
|
||||
|
||||
// CUDA-9 omits device-side definitions of some math functions if it sees
|
||||
// include guard from math.h wrapper from libstdc++. We have to undo the header
|
||||
// guard temporarily to get the definitions we need.
|
||||
#pragma push_macro("_GLIBCXX_MATH_H")
|
||||
#pragma push_macro("_LIBCPP_VERSION")
|
||||
#if CUDA_VERSION >= 9000
|
||||
#undef _GLIBCXX_MATH_H
|
||||
// We also need to undo another guard that checks for libc++ 3.8+
|
||||
#ifdef _LIBCPP_VERSION
|
||||
#define _LIBCPP_VERSION 3700
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "math_functions.hpp"
|
||||
#pragma pop_macro("_GLIBCXX_MATH_H")
|
||||
#pragma pop_macro("_LIBCPP_VERSION")
|
||||
#pragma pop_macro("__GNUC__")
|
||||
#pragma pop_macro("signbit")
|
||||
|
||||
|
||||
49
c_headers/arm64intr.h
Normal file
49
c_headers/arm64intr.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Only include this if we're compiling for the windows platform. */
|
||||
#ifndef _MSC_VER
|
||||
#include_next <arm64intr.h>
|
||||
#else
|
||||
|
||||
#ifndef __ARM64INTR_H
|
||||
#define __ARM64INTR_H
|
||||
|
||||
typedef enum
|
||||
{
|
||||
_ARM64_BARRIER_SY = 0xF,
|
||||
_ARM64_BARRIER_ST = 0xE,
|
||||
_ARM64_BARRIER_LD = 0xD,
|
||||
_ARM64_BARRIER_ISH = 0xB,
|
||||
_ARM64_BARRIER_ISHST = 0xA,
|
||||
_ARM64_BARRIER_ISHLD = 0x9,
|
||||
_ARM64_BARRIER_NSH = 0x7,
|
||||
_ARM64_BARRIER_NSHST = 0x6,
|
||||
_ARM64_BARRIER_NSHLD = 0x5,
|
||||
_ARM64_BARRIER_OSH = 0x3,
|
||||
_ARM64_BARRIER_OSHST = 0x2,
|
||||
_ARM64_BARRIER_OSHLD = 0x1
|
||||
} _ARM64INTR_BARRIER_TYPE;
|
||||
|
||||
#endif /* __ARM64INTR_H */
|
||||
#endif /* _MSC_VER */
|
||||
8464
c_headers/arm_neon.h
8464
c_headers/arm_neon.h
File diff suppressed because it is too large
Load Diff
@@ -145,13 +145,21 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_avg_epu8(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
|
||||
typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
|
||||
return (__m256i)__builtin_convertvector(
|
||||
((__builtin_convertvector((__v32qu)__a, __v32hu) +
|
||||
__builtin_convertvector((__v32qu)__b, __v32hu)) + 1)
|
||||
>> 1, __v32qu);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_avg_epu16(__m256i __a, __m256i __b)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
|
||||
typedef unsigned int __v16su __attribute__((__vector_size__(64)));
|
||||
return (__m256i)__builtin_convertvector(
|
||||
((__builtin_convertvector((__v16hu)__a, __v16su) +
|
||||
__builtin_convertvector((__v16hu)__b, __v16su)) + 1)
|
||||
>> 1, __v16hu);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
|
||||
97
c_headers/avx512bitalgintrin.h
Normal file
97
c_headers/avx512bitalgintrin.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512BITALGINTRIN_H
|
||||
#define __AVX512BITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi16(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
|
||||
(__v32hi) _mm512_popcnt_epi16(__B),
|
||||
(__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi8(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
|
||||
(__v64qi) _mm512_popcnt_epi8(__B),
|
||||
(__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -56,293 +56,145 @@ _mm512_setzero_hi(void) {
|
||||
|
||||
/* Integer compare */
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)-1); })
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)(m)); })
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)-1); })
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)(m)); })
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)-1); })
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)(m)); })
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)-1); })
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)(m)); })
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
#define _mm512_cmpeq_epi8_mask(A, B) \
|
||||
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epi8_mask(A, B) \
|
||||
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epi8_mask(A, B) \
|
||||
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epi8_mask(A, B) \
|
||||
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epi8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epi8_mask(A, B) \
|
||||
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epi8_mask(A, B) \
|
||||
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_cmpeq_epu8_mask(A, B) \
|
||||
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epu8_mask(A, B) \
|
||||
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epu8_mask(A, B) \
|
||||
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epu8_mask(A, B) \
|
||||
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epu8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epu8_mask(A, B) \
|
||||
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epu8_mask(A, B) \
|
||||
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
#define _mm512_cmpeq_epi16_mask(A, B) \
|
||||
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epi16_mask(A, B) \
|
||||
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epi16_mask(A, B) \
|
||||
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epi16_mask(A, B) \
|
||||
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epi16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epi16_mask(A, B) \
|
||||
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epi16_mask(A, B) \
|
||||
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
|
||||
(__mmask64)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
|
||||
(__mmask32)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_cmpeq_epu16_mask(A, B) \
|
||||
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epu16_mask(A, B) \
|
||||
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epu16_mask(A, B) \
|
||||
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epu16_mask(A, B) \
|
||||
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epu16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epu16_mask(A, B) \
|
||||
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epu16_mask(A, B) \
|
||||
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_add_epi8 (__m512i __A, __m512i __B) {
|
||||
@@ -706,57 +558,55 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_avg_epu8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
(__mmask64) -1);
|
||||
typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
|
||||
return (__m512i)__builtin_convertvector(
|
||||
((__builtin_convertvector((__v64qu) __A, __v64hu) +
|
||||
__builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
|
||||
>> 1, __v64qu);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __U);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
|
||||
(__v64qi)_mm512_avg_epu8(__A, __B),
|
||||
(__v64qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
(__mmask64) __U);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
|
||||
(__v64qi)_mm512_avg_epu8(__A, __B),
|
||||
(__v64qi)_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_avg_epu16 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
(__mmask32) -1);
|
||||
typedef unsigned int __v32su __attribute__((__vector_size__(128)));
|
||||
return (__m512i)__builtin_convertvector(
|
||||
((__builtin_convertvector((__v32hu) __A, __v32su) +
|
||||
__builtin_convertvector((__v32hu) __B, __v32su)) + 1)
|
||||
>> 1, __v32hu);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__v32hi) __W,
|
||||
(__mmask32) __U);
|
||||
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
|
||||
(__v32hi)_mm512_avg_epu16(__A, __B),
|
||||
(__v32hi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
(__mmask32) __U);
|
||||
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
|
||||
(__v32hi)_mm512_avg_epu16(__A, __B),
|
||||
(__v32hi) _mm512_setzero_hi());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
@@ -1543,46 +1393,6 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
|
||||
}
|
||||
|
||||
|
||||
#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)-1); })
|
||||
|
||||
#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)(m)); })
|
||||
|
||||
#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)-1); })
|
||||
|
||||
#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
|
||||
(__v64qi)(__m512i)(b), (int)(p), \
|
||||
(__mmask64)(m)); })
|
||||
|
||||
#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)-1); })
|
||||
|
||||
#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)(m)); })
|
||||
|
||||
#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)-1); })
|
||||
|
||||
#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
|
||||
(__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
|
||||
(__v32hi)(__m512i)(b), (int)(p), \
|
||||
(__mmask32)(m)); })
|
||||
|
||||
#define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)_mm512_undefined_epi32(), \
|
||||
@@ -2028,32 +1838,29 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
|
||||
(__v64qi) __O,
|
||||
__M);
|
||||
return (__m512i) __builtin_ia32_selectb_512(__M,
|
||||
(__v64qi)_mm512_set1_epi8(__A),
|
||||
(__v64qi) __O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_qi(),
|
||||
__M);
|
||||
return (__m512i) __builtin_ia32_selectb_512(__M,
|
||||
(__v64qi) _mm512_set1_epi8(__A),
|
||||
(__v64qi) _mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
|
||||
(__mmask64) __B);
|
||||
return (__mmask64) (( __A & 0xFFFFFFFF) | ( __B << 32));
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
|
||||
(__mmask32) __B);
|
||||
return (__mmask32) (( __A & 0xFFFF) | ( __B << 16));
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
@@ -2108,61 +1915,56 @@ _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_test_epi8_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
|
||||
(__v64qi) __B, __U);
|
||||
return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_test_epi16_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
|
||||
(__v32hi) __B, __U);
|
||||
return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
|
||||
(__v64qi) __B, __U);
|
||||
return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
|
||||
(__v32hi) __B, __U);
|
||||
return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_qi());
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
@@ -2219,17 +2021,17 @@ _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
|
||||
(__v32hi) __O,
|
||||
__M);
|
||||
return (__m512i) __builtin_ia32_selectw_512(__M,
|
||||
(__v32hi) _mm512_set1_epi16(__A),
|
||||
(__v32hi) __O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__M);
|
||||
return (__m512i) __builtin_ia32_selectw_512(__M,
|
||||
(__v32hi) _mm512_set1_epi16(__A),
|
||||
(__v32hi) _mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -130,13 +130,14 @@ _mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
||||
return (__m512i) _mm512_set1_epi64((long long) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
||||
return (__m512i) _mm512_set1_epi32((int) __A);
|
||||
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -973,25 +973,26 @@ _mm512_movepi64_mask (__m512i __A)
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcast_f32x2 (__m128 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
|
||||
(__v16sf)_mm512_undefined_ps(),
|
||||
(__mmask16) -1);
|
||||
return (__m512)__builtin_shufflevector((__v4sf)__A,
|
||||
(__v4sf)_mm_undefined_ps(),
|
||||
0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
|
||||
(__v16sf)
|
||||
__O, __M);
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
(__v16sf)_mm512_broadcast_f32x2(__A),
|
||||
(__v16sf)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
|
||||
{
|
||||
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
|
||||
(__v16sf)_mm512_setzero_ps (),
|
||||
__M);
|
||||
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
|
||||
(__v16sf)_mm512_broadcast_f32x2(__A),
|
||||
(__v16sf)_mm512_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
@@ -1044,25 +1045,26 @@ _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcast_i32x2 (__m128i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
|
||||
(__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16) -1);
|
||||
return (__m512i)__builtin_shufflevector((__v4si)__A,
|
||||
(__v4si)_mm_undefined_si128(),
|
||||
0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
|
||||
(__v16si)
|
||||
__O, __M);
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
(__v16si)_mm512_broadcast_i32x2(__A),
|
||||
(__v16si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
|
||||
(__v16si)_mm512_setzero_si512 (),
|
||||
__M);
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
|
||||
(__v16si)_mm512_broadcast_i32x2(__A),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -258,30 +258,6 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
|
||||
(__v8di) _mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
#else
|
||||
return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline __m512 __DEFAULT_FN_ATTRS
|
||||
_mm512_setzero_ps(void)
|
||||
@@ -340,12 +316,30 @@ _mm512_set1_epi32(int __s)
|
||||
__s, __s, __s, __s, __s, __s, __s, __s };
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__M,
|
||||
(__v16si)_mm512_set1_epi32(__A),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_set1_epi64(long long __d)
|
||||
{
|
||||
return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectq_512(__M,
|
||||
(__v8di)_mm512_set1_epi64(__A),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcastss_ps(__m128 __A)
|
||||
{
|
||||
@@ -4549,37 +4543,6 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
/* Bit Test */
|
||||
|
||||
static __inline __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_test_epi32_mask(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
|
||||
(__v16si) __B, __U);
|
||||
}
|
||||
|
||||
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_test_epi64_mask(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
|
||||
}
|
||||
|
||||
|
||||
/* SIMD load ops */
|
||||
|
||||
@@ -4850,293 +4813,105 @@ _mm512_knot(__mmask16 __M)
|
||||
|
||||
/* Integer compare */
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
#define _mm512_cmpeq_epi32_mask(A, B) \
|
||||
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epi32_mask(A, B) \
|
||||
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epi32_mask(A, B) \
|
||||
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epi32_mask(A, B) \
|
||||
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epi32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epi32_mask(A, B) \
|
||||
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epi32_mask(A, B) \
|
||||
_mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_cmpeq_epu32_mask(A, B) \
|
||||
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epu32_mask(A, B) \
|
||||
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epu32_mask(A, B) \
|
||||
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epu32_mask(A, B) \
|
||||
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epu32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epu32_mask(A, B) \
|
||||
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epu32_mask(A, B) \
|
||||
_mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
#define _mm512_cmpeq_epi64_mask(A, B) \
|
||||
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epi64_mask(A, B) \
|
||||
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epi64_mask(A, B) \
|
||||
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epi64_mask(A, B) \
|
||||
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epi64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epi64_mask(A, B) \
|
||||
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epi64_mask(A, B) \
|
||||
_mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
||||
(__mmask16)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
||||
__u);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
||||
(__mmask8)-1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
|
||||
return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
|
||||
__u);
|
||||
}
|
||||
#define _mm512_cmpeq_epu64_mask(A, B) \
|
||||
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
|
||||
#define _mm512_cmpge_epu64_mask(A, B) \
|
||||
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
|
||||
#define _mm512_cmpgt_epu64_mask(A, B) \
|
||||
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
|
||||
#define _mm512_cmple_epu64_mask(A, B) \
|
||||
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_mask_cmple_epu64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
|
||||
#define _mm512_cmplt_epu64_mask(A, B) \
|
||||
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
|
||||
#define _mm512_cmpneq_epu64_mask(A, B) \
|
||||
_mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
|
||||
#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
|
||||
_mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_cvtepi8_epi32(__m128i __A)
|
||||
@@ -6803,35 +6578,6 @@ _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
|
||||
(__v16si) __B, __U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
|
||||
(__v8di) __B, __U);
|
||||
}
|
||||
|
||||
#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
|
||||
@@ -7200,76 +6946,100 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
|
||||
}
|
||||
|
||||
#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
|
||||
(__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(B), (int)(imm), \
|
||||
(__v16sf)_mm512_undefined_ps(), \
|
||||
(__mmask16)-1); })
|
||||
(__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(B), \
|
||||
0 + ((((imm) >> 0) & 0x3) * 4), \
|
||||
1 + ((((imm) >> 0) & 0x3) * 4), \
|
||||
2 + ((((imm) >> 0) & 0x3) * 4), \
|
||||
3 + ((((imm) >> 0) & 0x3) * 4), \
|
||||
0 + ((((imm) >> 2) & 0x3) * 4), \
|
||||
1 + ((((imm) >> 2) & 0x3) * 4), \
|
||||
2 + ((((imm) >> 2) & 0x3) * 4), \
|
||||
3 + ((((imm) >> 2) & 0x3) * 4), \
|
||||
16 + ((((imm) >> 4) & 0x3) * 4), \
|
||||
17 + ((((imm) >> 4) & 0x3) * 4), \
|
||||
18 + ((((imm) >> 4) & 0x3) * 4), \
|
||||
19 + ((((imm) >> 4) & 0x3) * 4), \
|
||||
16 + ((((imm) >> 6) & 0x3) * 4), \
|
||||
17 + ((((imm) >> 6) & 0x3) * 4), \
|
||||
18 + ((((imm) >> 6) & 0x3) * 4), \
|
||||
19 + ((((imm) >> 6) & 0x3) * 4)); })
|
||||
|
||||
#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
|
||||
(__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(B), (int)(imm), \
|
||||
(__v16sf)(__m512)(W), \
|
||||
(__mmask16)(U)); })
|
||||
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
|
||||
(__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
|
||||
(__v16sf)(__m512)(W)); })
|
||||
|
||||
#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
|
||||
(__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(B), (int)(imm), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(U)); })
|
||||
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
|
||||
(__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
|
||||
(__v16sf)_mm512_setzero_ps()); })
|
||||
|
||||
#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
|
||||
(__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(B), (int)(imm), \
|
||||
(__v8df)_mm512_undefined_pd(), \
|
||||
(__mmask8)-1); })
|
||||
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(B), \
|
||||
0 + ((((imm) >> 0) & 0x3) * 2), \
|
||||
1 + ((((imm) >> 0) & 0x3) * 2), \
|
||||
0 + ((((imm) >> 2) & 0x3) * 2), \
|
||||
1 + ((((imm) >> 2) & 0x3) * 2), \
|
||||
8 + ((((imm) >> 4) & 0x3) * 2), \
|
||||
9 + ((((imm) >> 4) & 0x3) * 2), \
|
||||
8 + ((((imm) >> 6) & 0x3) * 2), \
|
||||
9 + ((((imm) >> 6) & 0x3) * 2)); })
|
||||
|
||||
#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
|
||||
(__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(B), (int)(imm), \
|
||||
(__v8df)(__m512d)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
|
||||
(__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
|
||||
(__v8df)(__m512d)(W)); })
|
||||
|
||||
#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
|
||||
(__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(B), (int)(imm), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
|
||||
(__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
|
||||
(__v8df)_mm512_setzero_pd()); })
|
||||
|
||||
#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B), (int)(imm), \
|
||||
(__v16si)_mm512_setzero_si512(), \
|
||||
(__mmask16)-1); })
|
||||
(__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), \
|
||||
0 + ((((imm) >> 0) & 0x3) * 2), \
|
||||
1 + ((((imm) >> 0) & 0x3) * 2), \
|
||||
0 + ((((imm) >> 2) & 0x3) * 2), \
|
||||
1 + ((((imm) >> 2) & 0x3) * 2), \
|
||||
8 + ((((imm) >> 4) & 0x3) * 2), \
|
||||
9 + ((((imm) >> 4) & 0x3) * 2), \
|
||||
8 + ((((imm) >> 6) & 0x3) * 2), \
|
||||
9 + ((((imm) >> 6) & 0x3) * 2)); })
|
||||
|
||||
#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B), (int)(imm), \
|
||||
(__v16si)(__m512i)(W), \
|
||||
(__mmask16)(U)); })
|
||||
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
|
||||
(__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
|
||||
(__v16si)(__m512i)(W)); })
|
||||
|
||||
#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B), (int)(imm), \
|
||||
(__v16si)_mm512_setzero_si512(), \
|
||||
(__mmask16)(U)); })
|
||||
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
|
||||
(__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
|
||||
(__v16si)_mm512_setzero_si512()); })
|
||||
|
||||
#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), (int)(imm), \
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)-1); })
|
||||
(__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), \
|
||||
0 + ((((imm) >> 0) & 0x3) * 2), \
|
||||
1 + ((((imm) >> 0) & 0x3) * 2), \
|
||||
0 + ((((imm) >> 2) & 0x3) * 2), \
|
||||
1 + ((((imm) >> 2) & 0x3) * 2), \
|
||||
8 + ((((imm) >> 4) & 0x3) * 2), \
|
||||
9 + ((((imm) >> 4) & 0x3) * 2), \
|
||||
8 + ((((imm) >> 6) & 0x3) * 2), \
|
||||
9 + ((((imm) >> 6) & 0x3) * 2)); })
|
||||
|
||||
#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), (int)(imm), \
|
||||
(__v8di)(__m512i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
|
||||
(__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
|
||||
(__v8di)(__m512i)(W)); })
|
||||
|
||||
#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), (int)(imm), \
|
||||
(__v8di)_mm512_setzero_si512(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
|
||||
(__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
|
||||
(__v8di)_mm512_setzero_si512()); })
|
||||
|
||||
#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
|
||||
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
|
||||
@@ -9017,7 +8787,7 @@ _mm512_kortestz (__mmask16 __A, __mmask16 __B)
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
|
||||
return (__mmask16) (( __A & 0xFF) | ( __B << 8));
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
@@ -9040,7 +8810,7 @@ _mm512_stream_si512 (__m512i * __P, __m512i __A)
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_stream_load_si512 (void *__P)
|
||||
_mm512_stream_load_si512 (void const *__P)
|
||||
{
|
||||
typedef __v8di __v8di_aligned __attribute__((aligned(64)));
|
||||
return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
|
||||
@@ -9172,6 +8942,64 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
|
||||
(__mmask8)(M), \
|
||||
_MM_FROUND_CUR_DIRECTION); })
|
||||
|
||||
/* Bit Test */
|
||||
|
||||
static __inline __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_test_epi32_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_test_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
|
||||
static __inline__ __m512 __DEFAULT_FN_ATTRS
|
||||
_mm512_movehdup_ps (__m512 __A)
|
||||
{
|
||||
@@ -9742,16 +9570,18 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
|
||||
__M);
|
||||
return (__m512i) __builtin_ia32_selectd_512(__M,
|
||||
(__v16si) _mm512_set1_epi32(__A),
|
||||
(__v16si) __O);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
|
||||
__M);
|
||||
return (__m512i) __builtin_ia32_selectq_512(__M,
|
||||
(__v8di) _mm512_set1_epi64(__A),
|
||||
(__v8di) __O);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
391
c_headers/avx512vbmi2intrin.h
Normal file
391
c_headers/avx512vbmi2intrin.h
Normal file
@@ -0,0 +1,391 @@
|
||||
/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VBMI2INTRIN_H
|
||||
#define __AVX512VBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
|
||||
(__v8di)(B), \
|
||||
(int)(I), \
|
||||
(__v8di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shldi_epi64(A, B, I) \
|
||||
_mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
|
||||
(__v16si)(B), \
|
||||
(int)(I), \
|
||||
(__v16si)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shldi_epi32(A, B, I) \
|
||||
_mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
|
||||
(__v32hi)(B), \
|
||||
(int)(I), \
|
||||
(__v32hi)(S), \
|
||||
(__mmask32)(U)); })
|
||||
|
||||
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shldi_epi16(A, B, I) \
|
||||
_mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
|
||||
(__v8di)(B), \
|
||||
(int)(I), \
|
||||
(__v8di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shrdi_epi64(A, B, I) \
|
||||
_mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
|
||||
(__v16si)(B), \
|
||||
(int)(I), \
|
||||
(__v16si)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shrdi_epi32(A, B, I) \
|
||||
_mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
|
||||
(__v32hi)(B), \
|
||||
(int)(I), \
|
||||
(__v32hi)(S), \
|
||||
(__mmask32)(U)); })
|
||||
|
||||
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shrdi_epi16(A, B, I) \
|
||||
_mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
157
c_headers/avx512vlbitalgintrin.h
Normal file
157
c_headers/avx512vlbitalgintrin.h
Normal file
@@ -0,0 +1,157 @@
|
||||
/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLBITALGINTRIN_H
|
||||
#define __AVX512VLBITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_popcnt_epi16(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
|
||||
(__v16hi) _mm256_popcnt_epi16(__B),
|
||||
(__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi16(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
|
||||
(__v8hi) _mm128_popcnt_epi16(__B),
|
||||
(__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_popcnt_epi8(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
|
||||
(__v32qi) _mm256_popcnt_epi8(__B),
|
||||
(__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi8(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
|
||||
(__v16qi) _mm128_popcnt_epi8(__B),
|
||||
(__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -33,26 +33,26 @@
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcastmb128 (__A);
|
||||
{
|
||||
return (__m128i) _mm_set1_epi64x((long long) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcastmb256 (__A);
|
||||
return (__m256i) _mm256_set1_epi64x((long long)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcastmw128 (__A);
|
||||
return (__m128i) _mm_set1_epi32((int)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcastmw256 (__A);
|
||||
return (__m256i) _mm256_set1_epi32((int)__A);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -978,25 +978,25 @@ _mm256_movepi64_mask (__m256i __A)
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_broadcast_f32x2 (__m128 __A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
|
||||
(__v8sf)_mm256_undefined_ps(),
|
||||
(__mmask8) -1);
|
||||
return (__m256)__builtin_shufflevector((__v4sf)__A,
|
||||
(__v4sf)_mm_undefined_ps(),
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
|
||||
(__v8sf) __O,
|
||||
__M);
|
||||
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
|
||||
(__v8sf)_mm256_broadcast_f32x2(__A),
|
||||
(__v8sf)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
|
||||
(__v8sf) _mm256_setzero_ps (),
|
||||
__M);
|
||||
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
|
||||
(__v8sf)_mm256_broadcast_f32x2(__A),
|
||||
(__v8sf)_mm256_setzero_ps());
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
@@ -1025,49 +1025,49 @@ _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_broadcast_i32x2 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
|
||||
(__v4si)_mm_undefined_si128(),
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_shufflevector((__v4si)__A,
|
||||
(__v4si)_mm_undefined_si128(),
|
||||
0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
|
||||
(__v4si) __O,
|
||||
__M);
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
|
||||
(__v4si)_mm_broadcast_i32x2(__A),
|
||||
(__v4si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
|
||||
(__v4si) _mm_setzero_si128 (),
|
||||
__M);
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
|
||||
(__v4si)_mm_broadcast_i32x2(__A),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_broadcast_i32x2 (__m128i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
|
||||
(__v8si)_mm256_undefined_si256(),
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_shufflevector((__v4si)__A,
|
||||
(__v4si)_mm_undefined_si128(),
|
||||
0, 1, 0, 1, 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
|
||||
(__v8si) __O,
|
||||
__M);
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
|
||||
(__v8si)_mm256_broadcast_i32x2(__A),
|
||||
(__v8si)__O);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
|
||||
(__v8si) _mm256_setzero_si256 (),
|
||||
__M);
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
|
||||
(__v8si)_mm256_broadcast_i32x2(__A),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
748
c_headers/avx512vlvbmi2intrin.h
Normal file
748
c_headers/avx512vlvbmi2intrin.h
Normal file
@@ -0,0 +1,748 @@
|
||||
/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLVBMI2INTRIN_H
|
||||
#define __AVX512VLVBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_setzero_hi(void) {
|
||||
return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_setzero_hi(void) {
|
||||
return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \
|
||||
(__v4di)(B), \
|
||||
(int)(I), \
|
||||
(__v4di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi64(A, B, I) \
|
||||
_mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
|
||||
(__v2di)(B), \
|
||||
(int)(I), \
|
||||
(__v2di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi64(A, B, I) \
|
||||
_mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
|
||||
(__v8si)(B), \
|
||||
(int)(I), \
|
||||
(__v8si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi32(A, B, I) \
|
||||
_mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
|
||||
(__v4si)(B), \
|
||||
(int)(I), \
|
||||
(__v4si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi32(A, B, I) \
|
||||
_mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
|
||||
(__v16hi)(B), \
|
||||
(int)(I), \
|
||||
(__v16hi)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi16(A, B, I) \
|
||||
_mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
|
||||
(__v8hi)(B), \
|
||||
(int)(I), \
|
||||
(__v8hi)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi16(A, B, I) \
|
||||
_mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
|
||||
(__v4di)(B), \
|
||||
(int)(I), \
|
||||
(__v4di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi64(A, B, I) \
|
||||
_mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
|
||||
(__v2di)(B), \
|
||||
(int)(I), \
|
||||
(__v2di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi64(A, B, I) \
|
||||
_mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
|
||||
(__v8si)(B), \
|
||||
(int)(I), \
|
||||
(__v8si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi32(A, B, I) \
|
||||
_mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
|
||||
(__v4si)(B), \
|
||||
(int)(I), \
|
||||
(__v4si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi32(A, B, I) \
|
||||
_mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
|
||||
(__v16hi)(B), \
|
||||
(int)(I), \
|
||||
(__v16hi)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi16(A, B, I) \
|
||||
_mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
|
||||
(__v8hi)(B), \
|
||||
(int)(I), \
|
||||
(__v8hi)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi16(A, B, I) \
|
||||
_mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
254
c_headers/avx512vlvnniintrin.h
Normal file
254
c_headers/avx512vlvnniintrin.h
Normal file
@@ -0,0 +1,254 @@
|
||||
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLVNNIINTRIN_H
|
||||
#define __AVX512VLVNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
146
c_headers/avx512vnniintrin.h
Normal file
146
c_headers/avx512vnniintrin.h
Normal file
@@ -0,0 +1,146 @@
|
||||
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VNNIINTRIN_H
|
||||
#define __AVX512VNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni")))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
99
c_headers/avx512vpopcntdqvlintrin.h
Normal file
99
c_headers/avx512vpopcntdqvlintrin.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
|
||||
*------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error \
|
||||
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VPOPCNTDQVLINTRIN_H
|
||||
#define __AVX512VPOPCNTDQVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl")))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_selectq_128(
|
||||
(__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
||||
return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_selectd_128(
|
||||
(__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
|
||||
return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_selectq_256(
|
||||
(__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
||||
return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_selectd_256(
|
||||
(__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
|
||||
return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
93
c_headers/cetintrin.h
Normal file
93
c_headers/cetintrin.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/*===---- cetintrin.h - CET intrinsic ------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <cetintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __CETINTRIN_H
|
||||
#define __CETINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("shstk")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {
|
||||
__builtin_ia32_incsspd(__a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {
|
||||
__builtin_ia32_incsspq(__a);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {
|
||||
return __builtin_ia32_rdsspd(__a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {
|
||||
return __builtin_ia32_rdsspq(__a);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {
|
||||
__builtin_ia32_saveprevssp();
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {
|
||||
__builtin_ia32_rstorssp(__p);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {
|
||||
__builtin_ia32_wrssd(__a, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {
|
||||
__builtin_ia32_wrssq(__a, __p);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {
|
||||
__builtin_ia32_wrussd(__a, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {
|
||||
__builtin_ia32_wrussq(__a, __p);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {
|
||||
__builtin_ia32_setssbsy();
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {
|
||||
__builtin_ia32_clrssbsy(__p);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __CETINTRIN_H */
|
||||
@@ -32,7 +32,7 @@
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_clflushopt(char * __m) {
|
||||
_mm_clflushopt(void const * __m) {
|
||||
__builtin_ia32_clflushopt(__m);
|
||||
}
|
||||
|
||||
|
||||
52
c_headers/clwbintrin.h
Normal file
52
c_headers/clwbintrin.h
Normal file
@@ -0,0 +1,52 @@
|
||||
/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __CLWBINTRIN_H
|
||||
#define __CLWBINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb")))
|
||||
|
||||
/// \brief Writes back to memory the cache line (if modified) that contains the
|
||||
/// linear address specified in \a __p from any level of the cache hierarchy in
|
||||
/// the cache coherence domain
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CLWB </c> instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to the memory location used to identify the cache line to be
|
||||
/// written back.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_clwb(void const *__p) {
|
||||
__builtin_ia32_clwb(__p);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -173,16 +173,24 @@
|
||||
#define bit_AVX512VL 0x80000000
|
||||
|
||||
/* Features in %ecx for leaf 7 sub-leaf 0 */
|
||||
#define bit_PREFTCHWT1 0x00000001
|
||||
#define bit_AVX512VBMI 0x00000002
|
||||
#define bit_PKU 0x00000004
|
||||
#define bit_OSPKE 0x00000010
|
||||
#define bit_PREFTCHWT1 0x00000001
|
||||
#define bit_AVX512VBMI 0x00000002
|
||||
#define bit_PKU 0x00000004
|
||||
#define bit_OSPKE 0x00000010
|
||||
#define bit_AVX512VBMI2 0x00000040
|
||||
#define bit_SHSTK 0x00000080
|
||||
#define bit_GFNI 0x00000100
|
||||
#define bit_VAES 0x00000200
|
||||
#define bit_VPCLMULQDQ 0x00000400
|
||||
#define bit_AVX512VNNI 0x00000800
|
||||
#define bit_AVX512BITALG 0x00001000
|
||||
#define bit_AVX512VPOPCNTDQ 0x00004000
|
||||
#define bit_RDPID 0x00400000
|
||||
#define bit_RDPID 0x00400000
|
||||
|
||||
/* Features in %edx for leaf 7 sub-leaf 0 */
|
||||
#define bit_AVX5124VNNIW 0x00000004
|
||||
#define bit_AVX5124FMAPS 0x00000008
|
||||
#define bit_IBT 0x00100000
|
||||
|
||||
/* Features in %eax for leaf 13 sub-leaf 1 */
|
||||
#define bit_XSAVEOPT 0x00000001
|
||||
@@ -192,6 +200,7 @@
|
||||
/* Features in %ecx for leaf 0x80000001 */
|
||||
#define bit_LAHF_LM 0x00000001
|
||||
#define bit_ABM 0x00000020
|
||||
#define bit_LZCNT bit_ABM /* for gcc compat */
|
||||
#define bit_SSE4a 0x00000040
|
||||
#define bit_PRFCHW 0x00000100
|
||||
#define bit_XOP 0x00000800
|
||||
|
||||
@@ -80,7 +80,7 @@ min(const __T &__a, const __T &__b, __Cmp __cmp) {
|
||||
template <class __T>
|
||||
inline __device__ const __T &
|
||||
min(const __T &__a, const __T &__b) {
|
||||
return __a < __b ? __b : __a;
|
||||
return __a < __b ? __a : __b;
|
||||
}
|
||||
|
||||
#ifdef _LIBCPP_END_NAMESPACE_STD
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
|
||||
#include_next <new>
|
||||
|
||||
// Device overrides for placement new and delete.
|
||||
#pragma push_macro("CUDA_NOEXCEPT")
|
||||
#if __cplusplus >= 201103L
|
||||
#define CUDA_NOEXCEPT noexcept
|
||||
@@ -34,6 +33,55 @@
|
||||
#define CUDA_NOEXCEPT
|
||||
#endif
|
||||
|
||||
// Device overrides for non-placement new and delete.
|
||||
__device__ inline void *operator new(__SIZE_TYPE__ size) {
|
||||
if (size == 0) {
|
||||
size = 1;
|
||||
}
|
||||
return ::malloc(size);
|
||||
}
|
||||
__device__ inline void *operator new(__SIZE_TYPE__ size,
|
||||
const std::nothrow_t &) CUDA_NOEXCEPT {
|
||||
return ::operator new(size);
|
||||
}
|
||||
|
||||
__device__ inline void *operator new[](__SIZE_TYPE__ size) {
|
||||
return ::operator new(size);
|
||||
}
|
||||
__device__ inline void *operator new[](__SIZE_TYPE__ size,
|
||||
const std::nothrow_t &) {
|
||||
return ::operator new(size);
|
||||
}
|
||||
|
||||
__device__ inline void operator delete(void* ptr) CUDA_NOEXCEPT {
|
||||
if (ptr) {
|
||||
::free(ptr);
|
||||
}
|
||||
}
|
||||
__device__ inline void operator delete(void *ptr,
|
||||
const std::nothrow_t &) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
|
||||
__device__ inline void operator delete[](void* ptr) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
__device__ inline void operator delete[](void *ptr,
|
||||
const std::nothrow_t &) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
|
||||
// Sized delete, C++14 only.
|
||||
#if __cplusplus >= 201402L
|
||||
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Device overrides for placement new and delete.
|
||||
__device__ inline void *operator new(__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
|
||||
return __ptr;
|
||||
}
|
||||
@@ -42,6 +90,7 @@ __device__ inline void *operator new[](__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT
|
||||
}
|
||||
__device__ inline void operator delete(void *, void *) CUDA_NOEXCEPT {}
|
||||
__device__ inline void operator delete[](void *, void *) CUDA_NOEXCEPT {}
|
||||
|
||||
#pragma pop_macro("CUDA_NOEXCEPT")
|
||||
|
||||
#endif // include guard
|
||||
|
||||
@@ -217,8 +217,8 @@ _mm_div_pd(__m128d __a, __m128d __b)
|
||||
|
||||
/// \brief Calculates the square root of the lower double-precision value of
|
||||
/// the second operand and returns it in the lower 64 bits of the result.
|
||||
/// The upper 64 bits of the result are copied from the upper double-
|
||||
/// precision value of the first operand.
|
||||
/// The upper 64 bits of the result are copied from the upper
|
||||
/// double-precision value of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -260,8 +260,8 @@ _mm_sqrt_pd(__m128d __a)
|
||||
|
||||
/// \brief Compares lower 64-bit double-precision values of both operands, and
|
||||
/// returns the lesser of the pair of values in the lower 64-bits of the
|
||||
/// result. The upper 64 bits of the result are copied from the upper double-
|
||||
/// precision value of the first operand.
|
||||
/// result. The upper 64 bits of the result are copied from the upper
|
||||
/// double-precision value of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -304,8 +304,8 @@ _mm_min_pd(__m128d __a, __m128d __b)
|
||||
|
||||
/// \brief Compares lower 64-bit double-precision values of both operands, and
|
||||
/// returns the greater of the pair of values in the lower 64-bits of the
|
||||
/// result. The upper 64 bits of the result are copied from the upper double-
|
||||
/// precision value of the first operand.
|
||||
/// result. The upper 64 bits of the result are copied from the upper
|
||||
/// double-precision value of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -983,8 +983,10 @@ _mm_cmpnge_sd(__m128d __a, __m128d __b)
|
||||
}
|
||||
|
||||
/// \brief Compares the lower double-precision floating-point values in each of
|
||||
/// the two 128-bit floating-point vectors of [2 x double] for equality. The
|
||||
/// comparison yields 0 for false, 1 for true.
|
||||
/// the two 128-bit floating-point vectors of [2 x double] for equality.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -996,7 +998,8 @@ _mm_cmpnge_sd(__m128d __a, __m128d __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comieq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1008,7 +1011,8 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
|
||||
/// the value in the first parameter is less than the corresponding value in
|
||||
/// the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true.
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1020,7 +1024,8 @@ _mm_comieq_sd(__m128d __a, __m128d __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comilt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1032,7 +1037,8 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
|
||||
/// the value in the first parameter is less than or equal to the
|
||||
/// corresponding value in the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true.
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1044,7 +1050,8 @@ _mm_comilt_sd(__m128d __a, __m128d __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comile_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1056,7 +1063,8 @@ _mm_comile_sd(__m128d __a, __m128d __b)
|
||||
/// the value in the first parameter is greater than the corresponding value
|
||||
/// in the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true.
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1068,7 +1076,8 @@ _mm_comile_sd(__m128d __a, __m128d __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comigt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1080,7 +1089,8 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
|
||||
/// the value in the first parameter is greater than or equal to the
|
||||
/// corresponding value in the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true.
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1092,7 +1102,8 @@ _mm_comigt_sd(__m128d __a, __m128d __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comige_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1104,7 +1115,8 @@ _mm_comige_sd(__m128d __a, __m128d __b)
|
||||
/// the value in the first parameter is unequal to the corresponding value in
|
||||
/// the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true.
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two
|
||||
/// lower double-precision values is NaN, 1 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1116,7 +1128,8 @@ _mm_comige_sd(__m128d __a, __m128d __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 1 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comineq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1127,7 +1140,7 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
|
||||
/// the two 128-bit floating-point vectors of [2 x double] for equality. The
|
||||
/// comparison yields 0 for false, 1 for true.
|
||||
///
|
||||
/// If either of the two lower double-precision values is NaN, 1 is returned.
|
||||
/// If either of the two lower double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1140,7 +1153,7 @@ _mm_comineq_sd(__m128d __a, __m128d __b)
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 1 is returned.
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomieq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1153,7 +1166,7 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
|
||||
/// the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two lower
|
||||
/// double-precision values is NaN, 1 is returned.
|
||||
/// double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1166,7 +1179,7 @@ _mm_ucomieq_sd(__m128d __a, __m128d __b)
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 1 is returned.
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomilt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1179,7 +1192,7 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
|
||||
/// corresponding value in the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two lower
|
||||
/// double-precision values is NaN, 1 is returned.
|
||||
/// double-precision values is NaN, 0 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1192,7 +1205,7 @@ _mm_ucomilt_sd(__m128d __a, __m128d __b)
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower double-precision values is NaN, 1 is returned.
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomile_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1257,7 +1270,7 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
|
||||
/// the second parameter.
|
||||
///
|
||||
/// The comparison yields 0 for false, 1 for true. If either of the two lower
|
||||
/// double-precision values is NaN, 0 is returned.
|
||||
/// double-precision values is NaN, 1 is returned.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1270,7 +1283,7 @@ _mm_ucomige_sd(__m128d __a, __m128d __b)
|
||||
/// A 128-bit vector of [2 x double]. The lower double-precision value is
|
||||
/// compared to the lower double-precision value of \a __a.
|
||||
/// \returns An integer containing the comparison result. If either of the two
|
||||
/// lower double-precision values is NaN, 0 is returned.
|
||||
/// lower double-precision values is NaN, 1 is returned.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomineq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
@@ -1935,14 +1948,15 @@ _mm_store_pd(double *__dp, __m128d __a)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c>VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
|
||||
/// This intrinsic corresponds to the
|
||||
/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
|
||||
///
|
||||
/// \param __dp
|
||||
/// A pointer to a memory location that can store two double-precision
|
||||
/// values.
|
||||
/// \param __a
|
||||
/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
|
||||
/// of the values in \a dp.
|
||||
/// of the values in \a __dp.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store1_pd(double *__dp, __m128d __a)
|
||||
{
|
||||
@@ -1950,18 +1964,20 @@ _mm_store1_pd(double *__dp, __m128d __a)
|
||||
_mm_store_pd(__dp, __a);
|
||||
}
|
||||
|
||||
/// \brief Stores a 128-bit vector of [2 x double] into an aligned memory
|
||||
/// location.
|
||||
/// \brief Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to
|
||||
/// the upper and lower 64 bits of a memory location.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.
|
||||
/// This intrinsic corresponds to the
|
||||
/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.
|
||||
///
|
||||
/// \param __dp
|
||||
/// A pointer to a 128-bit memory location. The address of the memory
|
||||
/// location has to be 16-byte aligned.
|
||||
/// A pointer to a memory location that can store two double-precision
|
||||
/// values.
|
||||
/// \param __a
|
||||
/// A 128-bit vector of [2 x double] containing the values to be stored.
|
||||
/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each
|
||||
/// of the values in \a __dp.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_store_pd1(double *__dp, __m128d __a)
|
||||
{
|
||||
@@ -2258,7 +2274,11 @@ _mm_adds_epu16(__m128i __a, __m128i __b)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_avg_epu8(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
|
||||
typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
|
||||
return (__m128i)__builtin_convertvector(
|
||||
((__builtin_convertvector((__v16qu)__a, __v16hu) +
|
||||
__builtin_convertvector((__v16qu)__b, __v16hu)) + 1)
|
||||
>> 1, __v16qu);
|
||||
}
|
||||
|
||||
/// \brief Computes the rounded avarages of corresponding elements of two
|
||||
@@ -2278,7 +2298,11 @@ _mm_avg_epu8(__m128i __a, __m128i __b)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_avg_epu16(__m128i __a, __m128i __b)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
|
||||
typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
|
||||
return (__m128i)__builtin_convertvector(
|
||||
((__builtin_convertvector((__v8hu)__a, __v8su) +
|
||||
__builtin_convertvector((__v8hu)__b, __v8su)) + 1)
|
||||
>> 1, __v8hu);
|
||||
}
|
||||
|
||||
/// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16]
|
||||
@@ -3838,8 +3862,7 @@ _mm_set1_epi8(char __b)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>
|
||||
/// instruction.
|
||||
/// This intrinsic does not correspond to a specific instruction.
|
||||
///
|
||||
/// \param __q0
|
||||
/// A 64-bit integral value used to initialize the lower 64 bits of the
|
||||
@@ -4010,7 +4033,7 @@ _mm_storeu_si128(__m128i *__p, __m128i __b)
|
||||
/// specified unaligned memory location. When a mask bit is 1, the
|
||||
/// corresponding byte is written, otherwise it is not written.
|
||||
///
|
||||
/// To minimize caching, the date is flagged as non-temporal (unlikely to be
|
||||
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
|
||||
/// used again soon). Exception and trap behavior for elements not selected
|
||||
/// for storage to memory are implementation dependent.
|
||||
///
|
||||
@@ -4524,8 +4547,8 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
|
||||
/// of [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
|
||||
/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
|
||||
/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -4657,7 +4680,7 @@ _mm_unpacklo_epi64(__m128i __a, __m128i __b)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic has no corresponding instruction.
|
||||
/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 128-bit integer vector operand. The lower 64 bits are moved to the
|
||||
@@ -4674,7 +4697,7 @@ _mm_movepi64_pi64(__m128i __a)
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVQ / MOVQ / MOVD </c> instruction.
|
||||
/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 64-bit value.
|
||||
@@ -4704,8 +4727,8 @@ _mm_move_epi64(__m128i __a)
|
||||
return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the high-order (odd-indexed) values from two 128-bit vectors
|
||||
/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
|
||||
/// \brief Unpacks the high-order 64-bit elements from two 128-bit vectors of
|
||||
/// [2 x double] and interleaves them into a 128-bit vector of [2 x
|
||||
/// double].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -4725,7 +4748,7 @@ _mm_unpackhi_pd(__m128d __a, __m128d __b)
|
||||
return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the low-order (even-indexed) values from two 128-bit vectors
|
||||
/// \brief Unpacks the low-order 64-bit elements from two 128-bit vectors
|
||||
/// of [2 x double] and interleaves them into a 128-bit vector of [2 x
|
||||
/// double].
|
||||
///
|
||||
@@ -4784,9 +4807,9 @@ _mm_movemask_pd(__m128d __a)
|
||||
/// A 128-bit vector of [2 x double].
|
||||
/// \param i
|
||||
/// An 8-bit immediate value. The least significant two bits specify which
|
||||
/// elements to copy from a and b: \n
|
||||
/// Bit[0] = 0: lower element of a copied to lower element of result. \n
|
||||
/// Bit[0] = 1: upper element of a copied to lower element of result. \n
|
||||
/// elements to copy from \a a and \a b: \n
|
||||
/// Bit[0] = 0: lower element of \a a copied to lower element of result. \n
|
||||
/// Bit[0] = 1: upper element of \a a copied to lower element of result. \n
|
||||
/// Bit[1] = 0: lower element of \a b copied to upper element of result. \n
|
||||
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
|
||||
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
|
||||
|
||||
@@ -143,4 +143,18 @@
|
||||
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# define FLT16_MANT_DIG __FLT16_MANT_DIG__
|
||||
# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
|
||||
# define FLT16_DIG __FLT16_DIG__
|
||||
# define FLT16_MIN_EXP __FLT16_MIN_EXP__
|
||||
# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__
|
||||
# define FLT16_MAX_EXP __FLT16_MAX_EXP__
|
||||
# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__
|
||||
# define FLT16_MAX __FLT16_MAX__
|
||||
# define FLT16_EPSILON __FLT16_EPSILON__
|
||||
# define FLT16_MIN __FLT16_MIN__
|
||||
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
|
||||
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
|
||||
|
||||
#endif /* __FLOAT_H */
|
||||
|
||||
@@ -60,73 +60,73 @@ _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
@@ -144,13 +144,13 @@ _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
@@ -168,37 +168,37 @@ _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
@@ -216,13 +216,13 @@ _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -46,85 +46,85 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
@@ -142,13 +142,13 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
@@ -166,37 +166,37 @@ _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
@@ -214,13 +214,13 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
202
c_headers/gfniintrin.h
Normal file
202
c_headers/gfniintrin.h
Normal file
@@ -0,0 +1,202 @@
|
||||
/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __GFNIINTRIN_H
|
||||
#define __GFNIINTRIN_H
|
||||
|
||||
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
|
||||
(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v16qi)(__m128i)(S)); })
|
||||
|
||||
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
|
||||
(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v32qi)(__m256i)(S)); })
|
||||
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
|
||||
(__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v64qi)(__m512i)(S)); })
|
||||
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \
|
||||
U, A, B, I); })
|
||||
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
|
||||
(__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v16qi)(__m128i)(S)); })
|
||||
|
||||
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
|
||||
(__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v32qi)(__m256i)(S)); })
|
||||
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
|
||||
(__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v64qi)(__m512i)(S)); })
|
||||
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \
|
||||
U, A, B, I); })
|
||||
|
||||
/* Default attributes for simple form (no masking). */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni")))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni")))
|
||||
|
||||
/* Default attributes for VLX forms. */
|
||||
#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni")))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||
(__v16qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
|
||||
_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128(__U,
|
||||
(__v16qi) _mm_gf2p8mul_epi8(__A, __B),
|
||||
(__v16qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
|
||||
_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
|
||||
(__v32qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
|
||||
_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256(__U,
|
||||
(__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
|
||||
(__v32qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
|
||||
_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512(__U,
|
||||
(__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
|
||||
(__v64qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_F
|
||||
#undef __DEFAULT_FN_ATTRS_VL
|
||||
|
||||
#endif // __GFNIINTRIN_H
|
||||
|
||||
@@ -58,6 +58,10 @@
|
||||
#include <clflushoptintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
|
||||
#include <clwbintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
|
||||
#include <avxintrin.h>
|
||||
#endif
|
||||
@@ -114,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
}
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
|
||||
#include <vpclmulqdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
||||
#include <bmiintrin.h>
|
||||
#endif
|
||||
@@ -142,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512bwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
|
||||
#include <avx512bitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
|
||||
#include <avx512cdintrin.h>
|
||||
#endif
|
||||
@@ -150,10 +162,29 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512vpopcntdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
|
||||
#include <avx512vpopcntdqvlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
|
||||
#include <avx512vnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
||||
#include <avx512vlvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
|
||||
#include <avx512dqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
||||
#include <avx512vlbitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
||||
#include <avx512vlbwintrin.h>
|
||||
@@ -191,6 +222,15 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512vbmivlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
|
||||
#include <avx512vbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
||||
#include <avx512vlvbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
|
||||
#include <avx512pfintrin.h>
|
||||
#endif
|
||||
@@ -199,6 +239,14 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <pkuintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
|
||||
#include <vaesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
|
||||
#include <gfniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
||||
_rdrand16_step(unsigned short *__p)
|
||||
@@ -315,6 +363,10 @@ _writegsbase_u64(unsigned long long __V)
|
||||
#include <xsavesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
|
||||
#include <cetintrin.h>
|
||||
#endif
|
||||
|
||||
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
||||
* whereas others are also available at all times. */
|
||||
#include <adxintrin.h>
|
||||
|
||||
@@ -38,6 +38,10 @@
|
||||
#include <armintr.h>
|
||||
#endif
|
||||
|
||||
#if defined(_M_ARM64)
|
||||
#include <arm64intr.h>
|
||||
#endif
|
||||
|
||||
/* For the definition of jmp_buf. */
|
||||
#if __STDC_HOSTED__
|
||||
#include <setjmp.h>
|
||||
@@ -828,7 +832,7 @@ _InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
|
||||
@@ -11381,6 +11381,8 @@ half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);
|
||||
* For each component of a vector type,
|
||||
* result[i] = if MSB of c[i] is set ? b[i] : a[i].
|
||||
* For a scalar type, result = c ? b : a.
|
||||
* b and a must have the same type.
|
||||
* c must have the same number of elements and bits as a.
|
||||
*/
|
||||
char __ovld __cnfn select(char a, char b, char c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, char c);
|
||||
@@ -11394,60 +11396,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, char8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, char16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);
|
||||
short __ovld __cnfn select(short a, short b, char c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, char c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, char2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, char2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, char3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, char3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, char4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, char4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, char8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, char8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, char16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, char16 c);
|
||||
int __ovld __cnfn select(int a, int b, char c);
|
||||
uint __ovld __cnfn select(uint a, uint b, char c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, char2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, char2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, char3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, char3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, char4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, char4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, char8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, char8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, char16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, char16 c);
|
||||
long __ovld __cnfn select(long a, long b, char c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, char c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, char2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, char2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, char3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, char3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, char4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, char4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, char8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, char8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, char16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, char16 c);
|
||||
float __ovld __cnfn select(float a, float b, char c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, char2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, char3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, char4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, char8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, char16 c);
|
||||
char __ovld __cnfn select(char a, char b, short c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, short c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, short2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, short2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, short3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, short3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, short4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, short4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, short8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, short8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, short16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, short16 c);
|
||||
|
||||
short __ovld __cnfn select(short a, short b, short c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, short c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, short2 c);
|
||||
@@ -11460,60 +11409,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, short8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, short16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);
|
||||
int __ovld __cnfn select(int a, int b, short c);
|
||||
uint __ovld __cnfn select(uint a, uint b, short c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, short2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, short2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, short3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, short3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, short4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, short4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, short8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, short8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, short16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, short16 c);
|
||||
long __ovld __cnfn select(long a, long b, short c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, short c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, short2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, short2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, short3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, short3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, short4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, short4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, short8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, short8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, short16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, short16 c);
|
||||
float __ovld __cnfn select(float a, float b, short c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, short2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, short3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, short4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, short8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, short16 c);
|
||||
char __ovld __cnfn select(char a, char b, int c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, int c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, int2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, int2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, int3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, int3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, int4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, int4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, int8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, int8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, int16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, int16 c);
|
||||
short __ovld __cnfn select(short a, short b, int c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, int c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, int2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, int2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, int3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, int3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, int4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, int4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, int8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, int8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, int16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, int16 c);
|
||||
|
||||
int __ovld __cnfn select(int a, int b, int c);
|
||||
uint __ovld __cnfn select(uint a, uint b, int c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, int2 c);
|
||||
@@ -11526,60 +11422,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, int8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, int16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);
|
||||
long __ovld __cnfn select(long a, long b, int c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, int c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, int2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, int2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, int3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, int3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, int4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, int4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, int8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, int8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, int16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, int16 c);
|
||||
float __ovld __cnfn select(float a, float b, int c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, int2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, int3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, int4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, int8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, int16 c);
|
||||
char __ovld __cnfn select(char a, char b, long c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, long c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, long2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, long2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, long3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, long3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, long4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, long4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, long8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, long8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, long16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, long16 c);
|
||||
short __ovld __cnfn select(short a, short b, long c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, long c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, long2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, long2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, long3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, long3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, long4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, long4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, long8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, long8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, long16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, long16 c);
|
||||
int __ovld __cnfn select(int a, int b, long c);
|
||||
uint __ovld __cnfn select(uint a, uint b, long c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, long2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, long2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, long3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, long3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, long4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, long4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, long8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, long8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, long16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, long16 c);
|
||||
|
||||
long __ovld __cnfn select(long a, long b, long c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, long c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, long2 c);
|
||||
@@ -11592,12 +11441,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, long8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, long16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);
|
||||
float __ovld __cnfn select(float a, float b, long c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, long2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, long3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, long4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, long8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, long16 c);
|
||||
|
||||
char __ovld __cnfn select(char a, char b, uchar c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, uchar c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);
|
||||
@@ -11610,60 +11454,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);
|
||||
short __ovld __cnfn select(short a, short b, uchar c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, uchar c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, uchar2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uchar2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, uchar3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uchar3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, uchar4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uchar4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, uchar8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uchar8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, uchar16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uchar16 c);
|
||||
int __ovld __cnfn select(int a, int b, uchar c);
|
||||
uint __ovld __cnfn select(uint a, uint b, uchar c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, uchar2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, uchar2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, uchar3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, uchar3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, uchar4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, uchar4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, uchar8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, uchar8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, uchar16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, uchar16 c);
|
||||
long __ovld __cnfn select(long a, long b, uchar c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, uchar c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, uchar2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uchar2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, uchar3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uchar3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, uchar4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uchar4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, uchar8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uchar8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, uchar16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uchar16 c);
|
||||
float __ovld __cnfn select(float a, float b, uchar c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, uchar2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, uchar3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, uchar4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, uchar8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, uchar16 c);
|
||||
char __ovld __cnfn select(char a, char b, ushort c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, ushort c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, ushort2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ushort2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, ushort3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ushort3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, ushort4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ushort4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, ushort8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ushort8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, ushort16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ushort16 c);
|
||||
|
||||
short __ovld __cnfn select(short a, short b, ushort c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, ushort c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);
|
||||
@@ -11676,60 +11467,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);
|
||||
int __ovld __cnfn select(int a, int b, ushort c);
|
||||
uint __ovld __cnfn select(uint a, uint b, ushort c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, ushort2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, ushort2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, ushort3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, ushort3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, ushort4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, ushort4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, ushort8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, ushort8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, ushort16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, ushort16 c);
|
||||
long __ovld __cnfn select(long a, long b, ushort c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, ushort c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, ushort2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ushort2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, ushort3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ushort3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, ushort4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ushort4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, ushort8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ushort8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, ushort16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ushort16 c);
|
||||
float __ovld __cnfn select(float a, float b, ushort c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, ushort2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, ushort3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, ushort4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, ushort8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, ushort16 c);
|
||||
char __ovld __cnfn select(char a, char b, uint c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, uint c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, uint2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uint2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, uint3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uint3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, uint4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uint4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, uint8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uint8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, uint16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uint16 c);
|
||||
short __ovld __cnfn select(short a, short b, uint c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, uint c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, uint2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uint2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, uint3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uint3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, uint4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uint4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, uint8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uint8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, uint16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uint16 c);
|
||||
|
||||
int __ovld __cnfn select(int a, int b, uint c);
|
||||
uint __ovld __cnfn select(uint a, uint b, uint c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);
|
||||
@@ -11742,60 +11480,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);
|
||||
long __ovld __cnfn select(long a, long b, uint c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, uint c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, uint2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uint2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, uint3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uint3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, uint4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uint4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, uint8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uint8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, uint16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uint16 c);
|
||||
float __ovld __cnfn select(float a, float b, uint c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);
|
||||
char __ovld __cnfn select(char a, char b, ulong c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, ulong c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, ulong2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ulong2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, ulong3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ulong3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, ulong4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ulong4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, ulong8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ulong8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, ulong16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ulong16 c);
|
||||
short __ovld __cnfn select(short a, short b, ulong c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, ulong c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, ulong2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ulong2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, ulong3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ulong3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, ulong4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ulong4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, ulong8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ulong8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, ulong16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ulong16 c);
|
||||
int __ovld __cnfn select(int a, int b, ulong c);
|
||||
uint __ovld __cnfn select(uint a, uint b, ulong c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, ulong2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, ulong2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, ulong3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, ulong3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, ulong4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, ulong4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, ulong8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, ulong8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, ulong16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, ulong16 c);
|
||||
|
||||
long __ovld __cnfn select(long a, long b, ulong c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, ulong c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);
|
||||
@@ -11808,12 +11499,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);
|
||||
float __ovld __cnfn select(float a, float b, ulong c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, ulong2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, ulong3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, ulong4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, ulong8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, ulong16 c);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
double __ovld __cnfn select(double a, double b, long c);
|
||||
double2 __ovld __cnfn select(double2 a, double2 b, long2 c);
|
||||
@@ -13141,13 +12827,14 @@ void __ovld __conv barrier(cl_mem_fence_flags flags);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
typedef enum memory_scope
|
||||
{
|
||||
memory_scope_work_item,
|
||||
memory_scope_work_group,
|
||||
memory_scope_device,
|
||||
memory_scope_all_svm_devices,
|
||||
memory_scope_sub_group
|
||||
typedef enum memory_scope {
|
||||
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
|
||||
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
|
||||
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
|
||||
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
|
||||
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
|
||||
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
|
||||
#endif
|
||||
} memory_scope;
|
||||
|
||||
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
|
||||
@@ -13952,11 +13639,11 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
|
||||
// enum values aligned with what clang uses in EmitAtomicExpr()
|
||||
typedef enum memory_order
|
||||
{
|
||||
memory_order_relaxed,
|
||||
memory_order_acquire,
|
||||
memory_order_release,
|
||||
memory_order_acq_rel,
|
||||
memory_order_seq_cst
|
||||
memory_order_relaxed = __ATOMIC_RELAXED,
|
||||
memory_order_acquire = __ATOMIC_ACQUIRE,
|
||||
memory_order_release = __ATOMIC_RELEASE,
|
||||
memory_order_acq_rel = __ATOMIC_ACQ_REL,
|
||||
memory_order_seq_cst = __ATOMIC_SEQ_CST
|
||||
} memory_order;
|
||||
|
||||
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
|
||||
@@ -16199,6 +15886,313 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
|
||||
|
||||
#endif //cl_khr_subgroups cl_intel_subgroups
|
||||
|
||||
#if defined(cl_intel_subgroups)
|
||||
// Intel-Specific Sub Group Functions
|
||||
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle( int x, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle( long x, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );
|
||||
|
||||
float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );
|
||||
|
||||
float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );
|
||||
|
||||
float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
|
||||
uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
|
||||
uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
|
||||
uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read( const __global uint* p );
|
||||
uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );
|
||||
uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );
|
||||
uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
|
||||
void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
|
||||
void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
|
||||
void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
|
||||
void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
|
||||
void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
|
||||
void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
half __ovld __conv intel_sub_group_shuffle( half x, uint c );
|
||||
half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );
|
||||
half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );
|
||||
half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );
|
||||
#endif
|
||||
|
||||
#if defined(cl_khr_fp64)
|
||||
double __ovld __conv intel_sub_group_shuffle( double x, uint c );
|
||||
double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );
|
||||
double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );
|
||||
double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );
|
||||
#endif
|
||||
|
||||
#endif //cl_intel_subgroups
|
||||
|
||||
#if defined(cl_intel_subgroups_short)
|
||||
short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );
|
||||
short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );
|
||||
short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );
|
||||
short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );
|
||||
short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );
|
||||
ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );
|
||||
ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );
|
||||
ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );
|
||||
ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle( short x, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_reduce_add( short x );
|
||||
ushort __ovld __conv intel_sub_group_reduce_add( ushort x );
|
||||
short __ovld __conv intel_sub_group_reduce_min( short x );
|
||||
ushort __ovld __conv intel_sub_group_reduce_min( ushort x );
|
||||
short __ovld __conv intel_sub_group_reduce_max( short x );
|
||||
ushort __ovld __conv intel_sub_group_reduce_max( ushort x );
|
||||
|
||||
short __ovld __conv intel_sub_group_scan_exclusive_add( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_exclusive_min( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_exclusive_max( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );
|
||||
|
||||
short __ovld __conv intel_sub_group_scan_inclusive_add( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_inclusive_min( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_inclusive_max( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);
|
||||
void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);
|
||||
void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );
|
||||
void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
|
||||
#endif // cl_intel_subgroups_short
|
||||
|
||||
#ifdef cl_amd_media_ops
|
||||
uint __ovld amd_bitalign(uint a, uint b, uint c);
|
||||
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);
|
||||
|
||||
@@ -115,8 +115,8 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
|
||||
return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
|
||||
}
|
||||
|
||||
/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
|
||||
/// vector of [4 x float] to float values stored in a 128-bit vector of
|
||||
/// \brief Moves and duplicates odd-indexed values from a 128-bit vector
|
||||
/// of [4 x float] to float values stored in a 128-bit vector of
|
||||
/// [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -137,7 +137,7 @@ _mm_movehdup_ps(__m128 __a)
|
||||
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
|
||||
}
|
||||
|
||||
/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
|
||||
/// \brief Duplicates even-indexed values from a 128-bit vector of
|
||||
/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
|
||||
@@ -648,7 +648,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
|
||||
/// input vectors are used as an input for dot product; otherwise that input
|
||||
/// is treated as zero. Bits [1:0] determine which elements of the result
|
||||
/// will receive a copy of the final dot product, with bit [0] corresponding
|
||||
/// to the lowest element and bit [3] corresponding to the highest element of
|
||||
/// to the lowest element and bit [1] corresponding to the highest element of
|
||||
/// each [2 x double] vector. If a bit is set, the dot product is returned in
|
||||
/// the corresponding element; otherwise that element is set to zero.
|
||||
#define _mm_dp_pd(X, Y, M) __extension__ ({\
|
||||
@@ -866,8 +866,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
|
||||
/// Bits[3:0]: If any of these bits are set, the corresponding result
|
||||
/// element is cleared.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the copied single-
|
||||
/// precision floating point elements from the operands.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the copied
|
||||
/// single-precision floating point elements from the operands.
|
||||
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
|
||||
|
||||
/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
|
||||
|
||||
@@ -26,10 +26,14 @@
|
||||
#ifndef __STDARG_H
|
||||
#define __STDARG_H
|
||||
|
||||
/* zig: added because macos _va_list.h was duplicately defining va_list
|
||||
*/
|
||||
#ifndef _VA_LIST
|
||||
#ifndef _VA_LIST_T
|
||||
typedef __builtin_va_list va_list;
|
||||
#define _VA_LIST
|
||||
#endif
|
||||
#endif
|
||||
#define va_start(ap, param) __builtin_va_start(ap, param)
|
||||
#define va_end(ap) __builtin_va_end(ap)
|
||||
#define va_arg(ap, type) __builtin_va_arg(ap, type)
|
||||
|
||||
@@ -32,12 +32,15 @@
|
||||
#define true 1
|
||||
#define false 0
|
||||
#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
/* Define _Bool, bool, false, true as a GNU extension. */
|
||||
/* Define _Bool as a GNU extension. */
|
||||
#define _Bool bool
|
||||
#if __cplusplus < 201103L
|
||||
/* For C++98, define bool, false, true as a GNU extension. */
|
||||
#define bool bool
|
||||
#define false false
|
||||
#define true true
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define __bool_true_false_are_defined 1
|
||||
|
||||
|
||||
@@ -76,7 +76,13 @@ typedef intptr_t _sleb128_t;
|
||||
typedef uintptr_t _uleb128_t;
|
||||
|
||||
struct _Unwind_Context;
|
||||
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
|
||||
struct _Unwind_Control_Block;
|
||||
typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
|
||||
#else
|
||||
struct _Unwind_Exception;
|
||||
typedef struct _Unwind_Exception _Unwind_Exception;
|
||||
#endif
|
||||
typedef enum {
|
||||
_URC_NO_REASON = 0,
|
||||
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
|
||||
@@ -109,8 +115,42 @@ typedef enum {
|
||||
} _Unwind_Action;
|
||||
|
||||
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
|
||||
struct _Unwind_Exception *);
|
||||
_Unwind_Exception *);
|
||||
|
||||
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
|
||||
typedef struct _Unwind_Control_Block _Unwind_Control_Block;
|
||||
typedef uint32_t _Unwind_EHT_Header;
|
||||
|
||||
struct _Unwind_Control_Block {
|
||||
uint64_t exception_class;
|
||||
void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);
|
||||
/* unwinder cache (private fields for the unwinder's use) */
|
||||
struct {
|
||||
uint32_t reserved1; /* forced unwind stop function, 0 if not forced */
|
||||
uint32_t reserved2; /* personality routine */
|
||||
uint32_t reserved3; /* callsite */
|
||||
uint32_t reserved4; /* forced unwind stop argument */
|
||||
uint32_t reserved5;
|
||||
} unwinder_cache;
|
||||
/* propagation barrier cache (valid after phase 1) */
|
||||
struct {
|
||||
uint32_t sp;
|
||||
uint32_t bitpattern[5];
|
||||
} barrier_cache;
|
||||
/* cleanup cache (preserved over cleanup) */
|
||||
struct {
|
||||
uint32_t bitpattern[4];
|
||||
} cleanup_cache;
|
||||
/* personality cache (for personality's benefit) */
|
||||
struct {
|
||||
uint32_t fnstart; /* function start address */
|
||||
_Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */
|
||||
uint32_t additional; /* additional data */
|
||||
uint32_t reserved1;
|
||||
} pr_cache;
|
||||
long long int : 0; /* force alignment of next item to 8-byte boundary */
|
||||
} __attribute__((__aligned__(8)));
|
||||
#else
|
||||
struct _Unwind_Exception {
|
||||
_Unwind_Exception_Class exception_class;
|
||||
_Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||||
@@ -120,23 +160,24 @@ struct _Unwind_Exception {
|
||||
* aligned". GCC has interpreted this to mean "use the maximum useful
|
||||
* alignment for the target"; so do we. */
|
||||
} __attribute__((__aligned__));
|
||||
#endif
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
|
||||
_Unwind_Exception_Class,
|
||||
struct _Unwind_Exception *,
|
||||
_Unwind_Exception *,
|
||||
struct _Unwind_Context *,
|
||||
void *);
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
|
||||
int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
|
||||
struct _Unwind_Context *);
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,
|
||||
_Unwind_Exception_Class,
|
||||
_Unwind_Exception *,
|
||||
struct _Unwind_Context *);
|
||||
typedef _Unwind_Personality_Fn __personality_routine;
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
|
||||
void *);
|
||||
|
||||
#if defined(__arm__) && !defined(__APPLE__)
|
||||
|
||||
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
|
||||
typedef enum {
|
||||
_UVRSC_CORE = 0, /* integer register */
|
||||
_UVRSC_VFP = 1, /* vfp */
|
||||
@@ -158,14 +199,12 @@ typedef enum {
|
||||
_UVRSR_FAILED = 2
|
||||
} _Unwind_VRS_Result;
|
||||
|
||||
#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
|
||||
typedef uint32_t _Unwind_State;
|
||||
#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)
|
||||
#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
|
||||
#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)
|
||||
#define _US_ACTION_MASK ((_Unwind_State)3)
|
||||
#define _US_FORCE_UNWIND ((_Unwind_State)8)
|
||||
#endif
|
||||
|
||||
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
|
||||
_Unwind_VRS_RegClass __regclass,
|
||||
@@ -224,13 +263,12 @@ _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
|
||||
|
||||
/* DWARF EH functions; currently not available on Darwin/ARM */
|
||||
#if !defined(__APPLE__) || !defined(__arm__)
|
||||
|
||||
_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
|
||||
_Unwind_Stop_Fn, void *);
|
||||
void _Unwind_DeleteException(struct _Unwind_Exception *);
|
||||
void _Unwind_Resume(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,
|
||||
void *);
|
||||
void _Unwind_DeleteException(_Unwind_Exception *);
|
||||
void _Unwind_Resume(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -241,11 +279,11 @@ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
|
||||
|
||||
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
|
||||
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
|
||||
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,
|
||||
_Unwind_Stop_Fn, void *);
|
||||
void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
|
||||
void _Unwind_SjLj_Resume(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);
|
||||
|
||||
void *_Unwind_FindEnclosingFunction(void *);
|
||||
|
||||
|
||||
98
c_headers/vaesintrin.h
Normal file
98
c_headers/vaesintrin.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VAESINTRIN_H
|
||||
#define __VAESINTRIN_H
|
||||
|
||||
/* Default attributes for YMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes")))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes")))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenc_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdec_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdec_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_F
|
||||
|
||||
#endif
|
||||
42
c_headers/vpclmulqdqintrin.h
Normal file
42
c_headers/vpclmulqdqintrin.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VPCLMULQDQINTRIN_H
|
||||
#define __VPCLMULQDQINTRIN_H
|
||||
|
||||
#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
|
||||
(__v4di)(__m256i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#endif // __VPCLMULQDQINTRIN_H
|
||||
|
||||
@@ -2035,9 +2035,11 @@ _mm_storer_ps(float *__p, __m128 __a)
|
||||
_mm_store_ps(__p, __a);
|
||||
}
|
||||
|
||||
#define _MM_HINT_T0 3
|
||||
#define _MM_HINT_T1 2
|
||||
#define _MM_HINT_T2 1
|
||||
#define _MM_HINT_ET0 7
|
||||
#define _MM_HINT_ET1 6
|
||||
#define _MM_HINT_T0 3
|
||||
#define _MM_HINT_T1 2
|
||||
#define _MM_HINT_T2 1
|
||||
#define _MM_HINT_NTA 0
|
||||
|
||||
#ifndef _MSC_VER
|
||||
@@ -2068,7 +2070,8 @@ _mm_storer_ps(float *__p, __m128 __a)
|
||||
/// be generated. \n
|
||||
/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
|
||||
/// be generated.
|
||||
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
|
||||
#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
|
||||
((sel) >> 2) & 1, (sel) & 0x3))
|
||||
#endif
|
||||
|
||||
/// \brief Stores a 64-bit integer in the specified aligned memory location. To
|
||||
|
||||
@@ -8,6 +8,7 @@ SET "RELEASEDIR=zig-%ZIGVERSION%"
|
||||
mkdir "%RELEASEDIR%"
|
||||
move build-msvc-release\bin\zig.exe "%RELEASEDIR%"
|
||||
move build-msvc-release\lib "%RELEASEDIR%"
|
||||
move zig-cache\langref.html "%RELEASEDIR%"
|
||||
|
||||
SET "RELEASEZIP=zig-%ZIGVERSION%.zip"
|
||||
|
||||
|
||||
@@ -6,4 +6,5 @@ build_script:
|
||||
after_build:
|
||||
- '%APPVEYOR_BUILD_FOLDER%\ci\appveyor\after_build.bat'
|
||||
cache:
|
||||
- 'llvm+clang-5.0.0-win64-msvc-release.tar.xz'
|
||||
- 'llvm+clang-5.0.1-win64-msvc-release.tar.xz'
|
||||
- 'llvm+clang-6.0.0-win64-msvc-release.tar.xz'
|
||||
|
||||
@@ -7,13 +7,16 @@ SET "PATH=C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH%"
|
||||
SET "MSYSTEM=MINGW64"
|
||||
SET "APPVEYOR_CACHE_ENTRY_ZIP_ARGS=-m0=Copy"
|
||||
|
||||
bash -lc "cd ${APPVEYOR_BUILD_FOLDER} && if [ -s ""llvm+clang-5.0.0-win64-msvc-release.tar.xz"" ]; then echo 'skipping LLVM download'; else wget 'https://s3.amazonaws.com/superjoe/temp/llvm%%2bclang-5.0.0-win64-msvc-release.tar.xz'; fi && tar xf llvm+clang-5.0.0-win64-msvc-release.tar.xz" || exit /b
|
||||
bash -lc "cd ${APPVEYOR_BUILD_FOLDER} && if [ -s ""llvm+clang-6.0.0-win64-msvc-release.tar.xz"" ]; then echo 'skipping LLVM download'; else wget 'https://s3.amazonaws.com/ziglang.org/deps/llvm%%2bclang-6.0.0-win64-msvc-release.tar.xz'; fi && tar xf llvm+clang-6.0.0-win64-msvc-release.tar.xz" || exit /b
|
||||
|
||||
|
||||
SET "PATH=%PREVPATH%"
|
||||
SET "MSYSTEM=%PREVMSYSTEM%"
|
||||
SET "ZIGBUILDDIR=%APPVEYOR_BUILD_FOLDER%\build-msvc-release"
|
||||
SET "ZIGPREFIXPATH=%APPVEYOR_BUILD_FOLDER%\llvm+clang-5.0.0-win64-msvc-release"
|
||||
SET "ZIGPREFIXPATH=%APPVEYOR_BUILD_FOLDER%\llvm+clang-6.0.0-win64-msvc-release"
|
||||
|
||||
call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64
|
||||
|
||||
mkdir %ZIGBUILDDIR%
|
||||
cd %ZIGBUILDDIR%
|
||||
@@ -22,16 +25,4 @@ msbuild /p:Configuration=Release INSTALL.vcxproj || exit /b
|
||||
|
||||
bin\zig.exe build --build-file ..\build.zig test || exit /b
|
||||
|
||||
@echo "MSVC build succeeded, proceeding with MinGW build"
|
||||
cd %APPVEYOR_BUILD_FOLDER%
|
||||
SET "PATH=C:\msys64\mingw64\bin;C:\msys64\usr\bin;%PATH%"
|
||||
SET "MSYSTEM=MINGW64"
|
||||
|
||||
bash -lc "pacman -Syu --needed --noconfirm"
|
||||
bash -lc "pacman -Su --needed --noconfirm"
|
||||
|
||||
bash -lc "pacman -S --needed --noconfirm make mingw64/mingw-w64-x86_64-make mingw64/mingw-w64-x86_64-cmake mingw64/mingw-w64-x86_64-clang mingw64/mingw-w64-x86_64-llvm mingw64/mingw-w64-x86_64-lld mingw64/mingw-w64-x86_64-gcc"
|
||||
|
||||
bash -lc "cd ${APPVEYOR_BUILD_FOLDER} && mkdir build && cd build && cmake .. -G""MSYS Makefiles"" -DCMAKE_INSTALL_PREFIX=$(pwd) -DZIG_LIBC_LIB_DIR=$(dirname $(cc -print-file-name=crt1.o)) -DZIG_LIBC_INCLUDE_DIR=$(echo -n | cc -E -x c - -v 2>&1 | grep -B1 ""End of search list."" | head -n1 | cut -c 2- | sed ""s/ .*//"") -DZIG_LIBC_STATIC_LIB_DIR=$(dirname $(cc -print-file-name=crtbegin.o)) && make && make install"
|
||||
|
||||
@echo "MinGW build successful"
|
||||
@echo "MSVC build succeeded"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
set -x
|
||||
|
||||
sudo sh -c 'echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-5.0 main" >> /etc/apt/sources.list'
|
||||
sudo sh -c 'echo "deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-6.0 main" >> /etc/apt/sources.list'
|
||||
wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get update -q
|
||||
|
||||
@@ -4,4 +4,4 @@ set -x
|
||||
|
||||
sudo apt-get remove -y llvm-*
|
||||
sudo rm -rf /usr/local/*
|
||||
sudo apt-get install -y clang-5.0 libclang-5.0 libclang-5.0-dev llvm-5.0 llvm-5.0-dev liblld-5.0 liblld-5.0-dev cmake wine1.6-amd64
|
||||
sudo apt-get install -y clang-6.0 libclang-6.0 libclang-6.0-dev llvm-6.0 llvm-6.0-dev liblld-6.0 liblld-6.0-dev cmake wine1.6-amd64
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
set -x
|
||||
set -e
|
||||
|
||||
export CC=clang-5.0
|
||||
export CXX=clang++-5.0
|
||||
export CC=clang-6.0
|
||||
export CXX=clang++-6.0
|
||||
echo $PATH
|
||||
mkdir build
|
||||
cd build
|
||||
@@ -14,16 +14,16 @@ make install
|
||||
./zig build --build-file ../build.zig test
|
||||
|
||||
./zig test ../test/behavior.zig --target-os windows --target-arch i386 --target-environ msvc
|
||||
wine test.exe
|
||||
wine zig-cache/test.exe
|
||||
|
||||
./zig test ../test/behavior.zig --target-os windows --target-arch i386 --target-environ msvc --release-fast
|
||||
wine test.exe
|
||||
wine zig-cache/test.exe
|
||||
|
||||
./zig test ../test/behavior.zig --target-os windows --target-arch i386 --target-environ msvc --release-safe
|
||||
wine test.exe
|
||||
wine zig-cache/test.exe
|
||||
|
||||
./zig test ../test/behavior.zig --target-os windows --target-arch x86_64 --target-environ msvc
|
||||
wine64 test.exe
|
||||
wine64 zig-cache/test.exe
|
||||
|
||||
#./zig test ../test/behavior.zig --target-os windows --target-arch x86_64 --target-environ msvc --release-fast
|
||||
#wine64 test.exe
|
||||
|
||||
@@ -2,6 +2,6 @@
|
||||
|
||||
set -x
|
||||
|
||||
brew install llvm@5
|
||||
brew outdated llvm@5 || brew upgrade llvm@5
|
||||
brew install llvm@6
|
||||
brew outdated llvm@6 || brew upgrade llvm@6
|
||||
|
||||
|
||||
@@ -5,21 +5,8 @@ set -e
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@5/ -DCMAKE_INSTALL_PREFIX=$(pwd)
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@6/ -DCMAKE_INSTALL_PREFIX=$(pwd)
|
||||
make VERBOSE=1
|
||||
make install
|
||||
|
||||
# TODO: we run the tests separately because when run all together there is some
|
||||
# mysterious issue where after N child process spawns it crashes. I've been
|
||||
# unable to reproduce the issue on my macbook - it only happens on Travis.
|
||||
# ./zig build --build-file ../build.zig test
|
||||
|
||||
./zig build --build-file ../build.zig test-behavior --verbose
|
||||
./zig build --build-file ../build.zig test-std --verbose
|
||||
./zig build --build-file ../build.zig test-compiler-rt --verbose
|
||||
./zig build --build-file ../build.zig test-compare-output --verbose
|
||||
./zig build --build-file ../build.zig test-build-examples --verbose
|
||||
./zig build --build-file ../build.zig test-compile-errors --verbose
|
||||
./zig build --build-file ../build.zig test-asm-link --verbose
|
||||
./zig build --build-file ../build.zig test-debug-safety --verbose
|
||||
./zig build --build-file ../build.zig test-parsec --verbose
|
||||
./zig build --build-file ../build.zig test
|
||||
|
||||
@@ -26,16 +26,16 @@ if(MSVC)
|
||||
else()
|
||||
find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
|
||||
PATHS
|
||||
/usr/lib/llvm/5/include
|
||||
/usr/lib/llvm-5.0/include
|
||||
/usr/lib/llvm/6/include
|
||||
/usr/lib/llvm-6.0/include
|
||||
/mingw64/include)
|
||||
|
||||
macro(FIND_AND_ADD_CLANG_LIB _libname_)
|
||||
string(TOUPPER ${_libname_} _prettylibname_)
|
||||
find_library(CLANG_${_prettylibname_}_LIB NAMES ${_libname_}
|
||||
PATHS
|
||||
/usr/lib/llvm/5/lib
|
||||
/usr/lib/llvm-5.0/lib
|
||||
/usr/lib/llvm/6/lib
|
||||
/usr/lib/llvm-6.0/lib
|
||||
/mingw64/lib
|
||||
/c/msys64/mingw64/lib
|
||||
c:\\msys64\\mingw64\\lib)
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
# LLD_INCLUDE_DIRS
|
||||
# LLD_LIBRARIES
|
||||
|
||||
find_path(LLD_INCLUDE_DIRS NAMES lld/Driver/Driver.h
|
||||
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
|
||||
PATHS
|
||||
/usr/lib/llvm-5.0/include
|
||||
/usr/lib/llvm-6.0/include
|
||||
/mingw64/include)
|
||||
|
||||
find_library(LLD_LIBRARY NAMES lld-5.0 lld PATHS /usr/lib/llvm-5.0/lib)
|
||||
find_library(LLD_LIBRARY NAMES lld-6.0 lld PATHS /usr/lib/llvm-6.0/lib)
|
||||
if(EXISTS ${LLD_LIBRARY})
|
||||
set(LLD_LIBRARIES ${LLD_LIBRARY})
|
||||
else()
|
||||
@@ -19,7 +19,7 @@ else()
|
||||
string(TOUPPER ${_libname_} _prettylibname_)
|
||||
find_library(LLD_${_prettylibname_}_LIB NAMES ${_libname_}
|
||||
PATHS
|
||||
/usr/lib/llvm-5.0/lib
|
||||
/usr/lib/llvm-6.0/lib
|
||||
/mingw64/lib
|
||||
/c/msys64/mingw64/lib
|
||||
c:/msys64/mingw64/lib)
|
||||
@@ -29,13 +29,14 @@ else()
|
||||
endmacro(FIND_AND_ADD_LLD_LIB)
|
||||
|
||||
FIND_AND_ADD_LLD_LIB(lldDriver)
|
||||
FIND_AND_ADD_LLD_LIB(lldMinGW)
|
||||
FIND_AND_ADD_LLD_LIB(lldELF)
|
||||
FIND_AND_ADD_LLD_LIB(lldCOFF)
|
||||
FIND_AND_ADD_LLD_LIB(lldMachO)
|
||||
FIND_AND_ADD_LLD_LIB(lldReaderWriter)
|
||||
FIND_AND_ADD_LLD_LIB(lldCore)
|
||||
FIND_AND_ADD_LLD_LIB(lldYAML)
|
||||
FIND_AND_ADD_LLD_LIB(lldConfig)
|
||||
FIND_AND_ADD_LLD_LIB(lldCommon)
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
@@ -8,12 +8,12 @@
|
||||
# LLVM_LIBDIRS
|
||||
|
||||
find_program(LLVM_CONFIG_EXE
|
||||
NAMES llvm-config-5.0 llvm-config
|
||||
NAMES llvm-config-6.0 llvm-config
|
||||
PATHS
|
||||
"/mingw64/bin"
|
||||
"/c/msys64/mingw64/bin"
|
||||
"c:/msys64/mingw64/bin"
|
||||
"C:/Libraries/llvm-5.0.0/bin")
|
||||
"C:/Libraries/llvm-6.0.0/bin")
|
||||
|
||||
if(NOT(CMAKE_BUILD_TYPE STREQUAL "Debug"))
|
||||
execute_process(
|
||||
@@ -62,7 +62,7 @@ execute_process(
|
||||
set(LLVM_LIBRARIES ${LLVM_LIBRARIES} ${LLVM_SYSTEM_LIBS})
|
||||
|
||||
if(NOT LLVM_LIBRARIES)
|
||||
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-5.0 LLVM-5)
|
||||
find_library(LLVM_LIBRARIES NAMES LLVM LLVM-6.0 LLVM-6)
|
||||
endif()
|
||||
|
||||
link_directories("${CMAKE_PREFIX_PATH}/lib")
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
|
||||
License for Berkeley SoftFloat Release 3d
|
||||
License for Berkeley SoftFloat Release 3e
|
||||
|
||||
John R. Hauser
|
||||
2017 August 10
|
||||
2018 January 20
|
||||
|
||||
The following applies to the whole of SoftFloat Release 3d as well as to
|
||||
The following applies to the whole of SoftFloat Release 3e as well as to
|
||||
each source file individually.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
|
||||
University of California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -7,11 +7,11 @@
|
||||
|
||||
<BODY>
|
||||
|
||||
<H1>Package Overview for Berkeley SoftFloat Release 3d</H1>
|
||||
<H1>Package Overview for Berkeley SoftFloat Release 3e</H1>
|
||||
|
||||
<P>
|
||||
John R. Hauser<BR>
|
||||
2017 August 10<BR>
|
||||
2018 January 20<BR>
|
||||
</P>
|
||||
|
||||
<P>
|
||||
@@ -1,8 +1,8 @@
|
||||
|
||||
Package Overview for Berkeley SoftFloat Release 3d
|
||||
Package Overview for Berkeley SoftFloat Release 3e
|
||||
|
||||
John R. Hauser
|
||||
2017 August 10
|
||||
2018 January 20
|
||||
|
||||
Berkeley SoftFloat is a software implementation of binary floating-point
|
||||
that conforms to the IEEE Standard for Floating-Point Arithmetic. SoftFloat
|
||||
@@ -7,14 +7,57 @@
|
||||
|
||||
<BODY>
|
||||
|
||||
<H1>History of Berkeley SoftFloat, to Release 3d</H1>
|
||||
<H1>History of Berkeley SoftFloat, to Release 3e</H1>
|
||||
|
||||
<P>
|
||||
John R. Hauser<BR>
|
||||
2017 August 10<BR>
|
||||
2018 January 20<BR>
|
||||
</P>
|
||||
|
||||
|
||||
<H3>Release 3e (2018 January)</H3>
|
||||
|
||||
<UL>
|
||||
|
||||
<LI>
|
||||
Changed the default numeric code for optional rounding mode <CODE>odd</CODE>
|
||||
(round to odd, also known as <EM>jamming</EM>) from 5 to 6.
|
||||
|
||||
<LI>
|
||||
Modified the behavior of rounding mode <CODE>odd</CODE> when rounding to an
|
||||
integer value (either conversion to an integer format or a
|
||||
‘<CODE>roundToInt</CODE>’ function).
|
||||
Previously, for those cases only, rounding mode <CODE>odd</CODE> acted the same
|
||||
as rounding to minimum magnitude.
|
||||
Now all operations are rounded consistently.
|
||||
|
||||
<LI>
|
||||
Fixed some errors in the specialization code modeling Intel x86 floating-point,
|
||||
specifically the integers returned on invalid operations and the propagation of
|
||||
NaN payloads in a few rare cases.
|
||||
|
||||
<LI>
|
||||
Added specialization code modeling ARM floating-point, conforming to VFPv2 or
|
||||
later.
|
||||
|
||||
<LI>
|
||||
Added an example target for ARM processors.
|
||||
|
||||
<LI>
|
||||
Fixed a minor bug whereby function <CODE>f16_to_ui64</CODE> might return a
|
||||
different integer than expected in the case that the floating-point operand is
|
||||
negative.
|
||||
|
||||
<LI>
|
||||
Added example target-specific optimization for GCC, employing GCC instrinsics
|
||||
and support for <NOBR>128-bit</NOBR> integer arithmetic.
|
||||
|
||||
<LI>
|
||||
Made other minor improvements.
|
||||
|
||||
</UL>
|
||||
|
||||
|
||||
<H3>Release 3d (2017 August)</H3>
|
||||
|
||||
<UL>
|
||||
@@ -7,11 +7,11 @@
|
||||
|
||||
<BODY>
|
||||
|
||||
<H1>Berkeley SoftFloat Release 3d: Source Documentation</H1>
|
||||
<H1>Berkeley SoftFloat Release 3e: Source Documentation</H1>
|
||||
|
||||
<P>
|
||||
John R. Hauser<BR>
|
||||
2017 August 10<BR>
|
||||
2018 January 20<BR>
|
||||
</P>
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ SoftFloat has been successfully compiled with the GNU C Compiler
|
||||
<NOBR>Release 2</NOBR> or earlier.
|
||||
Changes to the interface of SoftFloat functions are documented in
|
||||
<A HREF="SoftFloat.html"><NOBR><CODE>SoftFloat.html</CODE></NOBR></A>.
|
||||
The current version of SoftFloat is <NOBR>Release 3d</NOBR>.
|
||||
The current version of SoftFloat is <NOBR>Release 3e</NOBR>.
|
||||
</P>
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ and <CODE><stdint.h></CODE></I>.
|
||||
The SoftFloat package was written by me, <NOBR>John R.</NOBR> Hauser.
|
||||
<NOBR>Release 3</NOBR> of SoftFloat was a completely new implementation
|
||||
supplanting earlier releases.
|
||||
The project to create <NOBR>Release 3</NOBR> (now <NOBR>through 3d</NOBR>) was
|
||||
The project to create <NOBR>Release 3</NOBR> (now <NOBR>through 3e</NOBR>) was
|
||||
done in the employ of the University of California, Berkeley, within the
|
||||
Department of Electrical Engineering and Computer Sciences, first for the
|
||||
Parallel Computing Laboratory (Par Lab) and then for the ASPIRE Lab.
|
||||
@@ -148,12 +148,12 @@ Oracle, and Samsung.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
The following applies to the whole of SoftFloat <NOBR>Release 3d</NOBR> as well
|
||||
The following applies to the whole of SoftFloat <NOBR>Release 3e</NOBR> as well
|
||||
as to each source file individually.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
|
||||
University of California.
|
||||
All rights reserved.
|
||||
</P>
|
||||
@@ -215,12 +215,15 @@ source
|
||||
include
|
||||
8086
|
||||
8086-SSE
|
||||
ARM-VFPv2
|
||||
ARM-VFPv2-defaultNaN
|
||||
build
|
||||
template-FAST_INT64
|
||||
template-not-FAST_INT64
|
||||
Linux-386-GCC
|
||||
Linux-386-SSE2-GCC
|
||||
Linux-x86_64-GCC
|
||||
Linux-ARM-VFPv2-GCC
|
||||
Win32-MinGW
|
||||
Win32-SSE2-MinGW
|
||||
Win64-MinGW-w64
|
||||
@@ -228,20 +231,37 @@ build
|
||||
</BLOCKQUOTE>
|
||||
The majority of the SoftFloat sources are provided in the <CODE>source</CODE>
|
||||
directory.
|
||||
The <CODE>include</CODE> subdirectory of <CODE>source</CODE> contains several
|
||||
header files (unsurprisingly), while the <CODE>8086</CODE> and
|
||||
<NOBR><CODE>8086-SSE</CODE></NOBR> subdirectories contain source files that
|
||||
specialize the floating-point behavior to match the Intel x86 line of
|
||||
processors.
|
||||
The files in directory <CODE>8086</CODE> give floating-point behavior
|
||||
consistent solely with Intel’s older, 8087-derived floating-point, while
|
||||
those in <NOBR><CODE>8086-SSE</CODE></NOBR> update the behavior of the
|
||||
non-extended formats (<CODE>float16_t</CODE>, <CODE>float32_t</CODE>,
|
||||
<CODE>float64_t</CODE>, and <CODE>float128_t</CODE>) to mirror Intel’s
|
||||
more recent Streaming SIMD Extensions (SSE) and other compatible extensions.
|
||||
The <CODE>include</CODE> subdirectory contains several header files
|
||||
(unsurprisingly), while the other subdirectories of <CODE>source</CODE> contain
|
||||
source files that specialize the floating-point behavior to match particular
|
||||
processor families:
|
||||
<BLOCKQUOTE>
|
||||
<DL>
|
||||
<DT><CODE>8086</CODE></DT>
|
||||
<DD>
|
||||
Intel’s older, 8087-derived floating-point, extended to all supported
|
||||
floating-point types
|
||||
</DD>
|
||||
<DT><CODE>8086-SSE</CODE></DT>
|
||||
<DD>
|
||||
Intel’s x86 processors with Streaming SIMD Extensions (SSE) and later
|
||||
compatible extensions, having 8087 behavior for <NOBR>80-bit</NOBR>
|
||||
double-extended-precision (<CODE>extFloat80_t</CODE>) and SSE behavior for
|
||||
other floating-point types
|
||||
</DD>
|
||||
<DT><CODE>ARM-VFPv2</CODE></DT>
|
||||
<DD>
|
||||
ARM’s VFPv2 or later floating-point, with NaN payload propagation
|
||||
</DD>
|
||||
<DT><CODE>ARM-VFPv2-defaultNaN</CODE></DT>
|
||||
<DD>
|
||||
ARM’s VFPv2 or later floating-point, with the “default NaN”
|
||||
option
|
||||
</DD>
|
||||
</DL>
|
||||
</BLOCKQUOTE>
|
||||
If other specializations are attempted, these would be expected to be other
|
||||
subdirectories of <CODE>source</CODE> alongside <CODE>8086</CODE> and
|
||||
<NOBR><CODE>8086-SSE</CODE></NOBR>.
|
||||
subdirectories of <CODE>source</CODE> alongside the ones listed above.
|
||||
Specialization is covered later, in <NOBR>section 5.2</NOBR>, <I>Specializing
|
||||
Floating-Point Behavior</I>.
|
||||
</P>
|
||||
@@ -264,19 +284,20 @@ are intended to follow a naming system of
|
||||
For the example targets,
|
||||
<NOBR><CODE><<I>execution-environment</I>></CODE></NOBR> is
|
||||
<NOBR><CODE>Linux-386</CODE></NOBR>, <NOBR><CODE>Linux-386-SSE2</CODE></NOBR>,
|
||||
<NOBR><CODE>Linux-x86_64</CODE></NOBR>, <CODE>Win32</CODE>,
|
||||
<NOBR><CODE>Linux-x86_64</CODE></NOBR>,
|
||||
<NOBR><CODE>Linux-ARM-VFPv2</CODE></NOBR>, <CODE>Win32</CODE>,
|
||||
<NOBR><CODE>Win32-SSE2</CODE></NOBR>, or <CODE>Win64</CODE>, and
|
||||
<NOBR><CODE><<I>compiler</I>></CODE></NOBR> is <CODE>GCC</CODE>,
|
||||
<CODE>MinGW</CODE>, or <NOBR><CODE>MinGW-w64</CODE></NOBR>.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
At the current time, all of the supplied target directories are merely examples
|
||||
that may or may not be correct for compiling on any particular system.
|
||||
All of the supplied target directories are merely examples that may or may not
|
||||
be correct for compiling on any particular system.
|
||||
Despite requests, there are currently no plans to include and maintain in the
|
||||
SoftFloat package the build files needed for a great many users’
|
||||
compilation environments, which after all can span a broad range of operating
|
||||
systems, compilers, and other tools.
|
||||
compilation environments, which can span a huge range of operating systems,
|
||||
compilers, and other tools.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
@@ -402,8 +423,8 @@ A new build target may use an existing specialization, such as the ones
|
||||
provided by the <CODE>8086</CODE> and <NOBR><CODE>8086-SSE</CODE></NOBR>
|
||||
subdirectories.
|
||||
If a build target needs a new specialization, different from any existing ones,
|
||||
it is recommended that a new specialization subdirectory be created in the
|
||||
<CODE>source</CODE> directory for this purpose.
|
||||
it is recommended that a new specialization directory be created for this
|
||||
purpose.
|
||||
The <CODE>specialize.h</CODE> header file from any of the provided
|
||||
specialization subdirectories can be used as a model for what definitions are
|
||||
needed.
|
||||
@@ -577,8 +598,40 @@ function.
|
||||
This technically defines <NOBR><CODE><<I>function-name</I>></CODE></NOBR>
|
||||
as a macro, but one that resolves to the same name, which may then be a
|
||||
function.
|
||||
(A preprocessor that conforms to the C Standard must limit recursive macro
|
||||
expansion from being applied more than once.)
|
||||
(A preprocessor that conforms to the C Standard is required to limit recursive
|
||||
macro expansion from being applied more than once.)
|
||||
</P>
|
||||
|
||||
<P>
|
||||
The supplied header file <CODE>opts-GCC.h</CODE> (in directory
|
||||
<CODE>source/include</CODE>) provides an example of target-specific
|
||||
optimization for the GCC compiler.
|
||||
Each GCC target example in the <CODE>build</CODE> directory has
|
||||
<BLOCKQUOTE>
|
||||
<CODE>#include "opts-GCC.h"</CODE>
|
||||
</BLOCKQUOTE>
|
||||
in its <CODE>platform.h</CODE> header file.
|
||||
Before <CODE>opts-GCC.h</CODE> is included, the following macros must be
|
||||
defined (or not) to control which features are invoked:
|
||||
<BLOCKQUOTE>
|
||||
<DL>
|
||||
<DT><CODE>SOFTFLOAT_BUILTIN_CLZ</CODE></DT>
|
||||
<DD>
|
||||
If defined, SoftFloat’s internal
|
||||
‘<CODE>countLeadingZeros</CODE>’ functions use intrinsics
|
||||
<CODE>__builtin_clz</CODE> and <CODE>__builtin_clzll</CODE>.
|
||||
</DD>
|
||||
<DT><CODE>SOFTFLOAT_INTRINSIC_INT128</CODE></DT>
|
||||
<DD>
|
||||
If defined, SoftFloat makes use of GCC’s nonstandard <NOBR>128-bit</NOBR>
|
||||
integer type <CODE>__int128</CODE>.
|
||||
</DD>
|
||||
</DL>
|
||||
</BLOCKQUOTE>
|
||||
On some machines, these improvements are observed to increase the speeds of
|
||||
<CODE>f64_mul</CODE> and <CODE>f128_mul</CODE> by around 20 to 25%, although
|
||||
other functions receive less dramatic boosts, or none at all.
|
||||
Results can vary greatly across different platforms.
|
||||
</P>
|
||||
|
||||
|
||||
@@ -7,11 +7,11 @@
|
||||
|
||||
<BODY>
|
||||
|
||||
<H1>Berkeley SoftFloat Release 3d: Library Interface</H1>
|
||||
<H1>Berkeley SoftFloat Release 3e: Library Interface</H1>
|
||||
|
||||
<P>
|
||||
John R. Hauser<BR>
|
||||
2017 August 10<BR>
|
||||
2018 January 20<BR>
|
||||
</P>
|
||||
|
||||
|
||||
@@ -106,13 +106,20 @@ Information about the standard is available elsewhere.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
The current version of SoftFloat is <NOBR>Release 3d</NOBR>.
|
||||
This release fixes bugs that were found in the square root functions for the
|
||||
<NOBR>64-bit</NOBR>, <NOBR>80-bit</NOBR>, and <NOBR>128-bit</NOBR>
|
||||
floating-point formats.
|
||||
The current version of SoftFloat is <NOBR>Release 3e</NOBR>.
|
||||
This release modifies the behavior of the rarely used <I>odd</I> rounding mode
|
||||
(<I>round to odd</I>, also known as <I>jamming</I>), and also adds some new
|
||||
specialization and optimization examples for those compiling SoftFloat.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
The previous <NOBR>Release 3d</NOBR> fixed bugs that were found in the square
|
||||
root functions for the <NOBR>64-bit</NOBR>, <NOBR>80-bit</NOBR>, and
|
||||
<NOBR>128-bit</NOBR> floating-point formats.
|
||||
(Thanks to Alexei Sibidanov at the University of Victoria for reporting an
|
||||
incorrect result.)
|
||||
The bugs affected all prior <NOBR>Release-3</NOBR> versions of SoftFloat.
|
||||
The bugs affected all prior <NOBR>Release-3</NOBR> versions of SoftFloat
|
||||
<NOBR>through 3c</NOBR>.
|
||||
The flaw in the <NOBR>64-bit</NOBR> floating-point square root function was of
|
||||
very minor impact, causing a <NOBR>1-ulp</NOBR> error (<NOBR>1 unit</NOBR> in
|
||||
the last place) a few times out of a billion.
|
||||
@@ -124,13 +131,8 @@ wrong.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
<NOBR>Release 3d</NOBR> makes no changes to the SoftFloat library interface
|
||||
compared to the previous <NOBR>Release 3c</NOBR>.
|
||||
Since the original <NOBR>Release 3</NOBR>, the main changes to the interface
|
||||
have been that <NOBR>Release 3b</NOBR> added support for the
|
||||
<NOBR>16-bit</NOBR> half-precision format, and <NOBR>Release 3c</NOBR> added
|
||||
optional support for a rarely used rounding mode, <I>round to odd</I>, also
|
||||
known as <I>jamming</I>.
|
||||
Among earlier releases, 3b was notable for adding support for the
|
||||
<NOBR>16-bit</NOBR> half-precision format.
|
||||
For more about the evolution of SoftFloat releases, see
|
||||
<A HREF="SoftFloat-history.html"><NOBR><CODE>SoftFloat-history.html</CODE></NOBR></A>.
|
||||
</P>
|
||||
@@ -169,7 +171,7 @@ strictly required.
|
||||
<P>
|
||||
Most operations not required by the original 1985 version of the IEEE
|
||||
Floating-Point Standard but added in the 2008 version are not yet supported in
|
||||
SoftFloat <NOBR>Release 3d</NOBR>.
|
||||
SoftFloat <NOBR>Release 3e</NOBR>.
|
||||
</P>
|
||||
|
||||
|
||||
@@ -179,7 +181,7 @@ SoftFloat <NOBR>Release 3d</NOBR>.
|
||||
The SoftFloat package was written by me, <NOBR>John R.</NOBR> Hauser.
|
||||
<NOBR>Release 3</NOBR> of SoftFloat was a completely new implementation
|
||||
supplanting earlier releases.
|
||||
The project to create <NOBR>Release 3</NOBR> (now <NOBR>through 3d</NOBR>) was
|
||||
The project to create <NOBR>Release 3</NOBR> (now <NOBR>through 3e</NOBR>) was
|
||||
done in the employ of the University of California, Berkeley, within the
|
||||
Department of Electrical Engineering and Computer Sciences, first for the
|
||||
Parallel Computing Laboratory (Par Lab) and then for the ASPIRE Lab.
|
||||
@@ -213,12 +215,12 @@ Oracle, and Samsung.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
The following applies to the whole of SoftFloat <NOBR>Release 3d</NOBR> as well
|
||||
The following applies to the whole of SoftFloat <NOBR>Release 3e</NOBR> as well
|
||||
as to each source file individually.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
|
||||
University of California.
|
||||
All rights reserved.
|
||||
</P>
|
||||
@@ -395,7 +397,7 @@ comparisons between two values in the same floating-point format.
|
||||
|
||||
<P>
|
||||
The following operations required by the 2008 IEEE Floating-Point Standard are
|
||||
not supported in SoftFloat <NOBR>Release 3d</NOBR>:
|
||||
not supported in SoftFloat <NOBR>Release 3e</NOBR>:
|
||||
<UL>
|
||||
<LI>
|
||||
<B>nextUp</B>, <B>nextDown</B>, <B>minNum</B>, <B>maxNum</B>, <B>minNumMag</B>,
|
||||
@@ -445,8 +447,8 @@ exponent must both be zero.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
SoftFloat's functions are not guaranteed to operate as expected when inputs of
|
||||
type <CODE>extFloat80_t</CODE> are non-canonical.
|
||||
SoftFloat’s functions are not guaranteed to operate as expected when
|
||||
inputs of type <CODE>extFloat80_t</CODE> are non-canonical.
|
||||
Assuming all of a function’s <CODE>extFloat80_t</CODE> inputs (if any)
|
||||
are canonical, function outputs of type <CODE>extFloat80_t</CODE> will always
|
||||
be canonical.
|
||||
@@ -591,16 +593,15 @@ Variable <CODE>softfloat_roundingMode</CODE> is initialized to
|
||||
</P>
|
||||
|
||||
<P>
|
||||
If supported, mode <CODE>softfloat_round_odd</CODE> first rounds a
|
||||
floating-point result to minimum magnitude, the same as
|
||||
When <CODE>softfloat_round_odd</CODE> is the rounding mode for a function that
|
||||
rounds to an integer value (either conversion to an integer format or a
|
||||
‘<CODE>roundToInt</CODE>’ function), if the input is not already an
|
||||
integer, the rounded result is the closest <EM>odd</EM> integer.
|
||||
For other operations, this rounding mode acts as though the floating-point
|
||||
result is first rounded to minimum magnitude, the same as
|
||||
<CODE>softfloat_round_minMag</CODE>, and then, if the result is inexact, the
|
||||
least-significant bit of the result is set <NOBR>to 1</NOBR>.
|
||||
This rounding mode is also known as <EM>jamming</EM>.
|
||||
As a special case, when <CODE>softfloat_round_odd</CODE> is the rounding mode
|
||||
for a function that rounds to an integer value (either conversion to an integer
|
||||
format or a ‘<CODE>roundToInt</CODE>’ function), rounding is the
|
||||
same as <CODE>softfloat_round_minMag</CODE>, without any change to the
|
||||
least-significant integer bit.
|
||||
Rounding to odd is also known as <EM>jamming</EM>.
|
||||
</P>
|
||||
|
||||
<H3>6.2. Underflow Detection</H3>
|
||||
@@ -820,12 +821,6 @@ The <CODE><I>roundingMode</I></CODE> argument specifies the rounding mode for
|
||||
the conversion.
|
||||
The variable that usually indicates rounding mode,
|
||||
<CODE>softfloat_roundingMode</CODE>, is ignored.
|
||||
If <CODE><I>roundingMode</I></CODE> is <CODE>softfloat_round_odd</CODE>,
|
||||
rounding is to minimum magnitude, the same as
|
||||
<CODE>softfloat_round_minMag</CODE>, rather than to an odd integer.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
Argument <CODE><I>exact</I></CODE> determines whether the <I>inexact</I>
|
||||
exception flag is raised if the conversion is not exact.
|
||||
If <CODE><I>exact</I></CODE> is <CODE>true</CODE>, the <I>inexact</I> flag may
|
||||
@@ -1087,12 +1082,6 @@ The <CODE><I>roundingMode</I></CODE> argument specifies the rounding mode to
|
||||
apply.
|
||||
The variable that usually indicates rounding mode,
|
||||
<CODE>softfloat_roundingMode</CODE>, is ignored.
|
||||
If <CODE><I>roundingMode</I></CODE> is <CODE>softfloat_round_odd</CODE>,
|
||||
rounding is to minimum magnitude, the same as
|
||||
<CODE>softfloat_round_minMag</CODE>, rather than to an odd integer value.
|
||||
</P>
|
||||
|
||||
<P>
|
||||
Argument <CODE><I>exact</I></CODE> determines whether the <I>inexact</I>
|
||||
exception flag is raised if the conversion is not exact.
|
||||
If <CODE><I>exact</I></CODE> is <CODE>true</CODE>, the <I>inexact</I> flag may
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,10 +2,10 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
Copyright 2011, 2012, 2013, 2014, 2018 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
@@ -42,9 +42,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "softfloat.h"
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Interpreting the unsigned integer formed from concatenating `uiA64' and
|
||||
| `uiA0' as an 80-bit extended floating-point value, and likewise interpreting
|
||||
| the unsigned integer formed from concatenating `uiB64' and `uiB0' as another
|
||||
| Interpreting the unsigned integer formed from concatenating 'uiA64' and
|
||||
| 'uiA0' as an 80-bit extended floating-point value, and likewise interpreting
|
||||
| the unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
|
||||
| 80-bit extended floating-point value, and assuming at least on of these
|
||||
| floating-point values is a NaN, returns the bit pattern of the combined NaN
|
||||
| result. If either original floating-point value is a signaling NaN, the
|
||||
@@ -90,8 +90,8 @@ struct uint128
|
||||
uiMagB64 = uiB64 & 0x7FFF;
|
||||
if ( uiMagA64 < uiMagB64 ) goto returnB;
|
||||
if ( uiMagB64 < uiMagA64 ) goto returnA;
|
||||
if ( uiNonsigA0 < uiNonsigB0 ) goto returnB;
|
||||
if ( uiNonsigB0 < uiNonsigA0 ) goto returnA;
|
||||
if ( uiA0 < uiB0 ) goto returnB;
|
||||
if ( uiB0 < uiA0 ) goto returnA;
|
||||
if ( uiA64 < uiB64 ) goto returnA;
|
||||
returnB:
|
||||
uiZ.v64 = uiB64;
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,10 +2,10 @@
|
||||
/*============================================================================
|
||||
|
||||
This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2018 The Regents of the
|
||||
University of California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
@@ -39,10 +39,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "softfloat_types.h"
|
||||
#include "primitiveTypes.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Default value for `softfloat_detectTininess'.
|
||||
| Default value for 'softfloat_detectTininess'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define init_detectTininess softfloat_tininess_afterRounding
|
||||
|
||||
@@ -51,22 +52,22 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
| invalid exception.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define ui32_fromPosOverflow 0xFFFFFFFF
|
||||
#define ui32_fromNegOverflow 0
|
||||
#define ui32_fromNegOverflow 0xFFFFFFFF
|
||||
#define ui32_fromNaN 0xFFFFFFFF
|
||||
#define i32_fromPosOverflow 0x7FFFFFFF
|
||||
#define i32_fromPosOverflow (-0x7FFFFFFF - 1)
|
||||
#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
|
||||
#define i32_fromNaN 0x7FFFFFFF
|
||||
#define i32_fromNaN (-0x7FFFFFFF - 1)
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The values to return on conversions to 64-bit integer formats that raise an
|
||||
| invalid exception.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
||||
#define ui64_fromNegOverflow 0
|
||||
#define ui64_fromNegOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
||||
#define ui64_fromNaN UINT64_C( 0xFFFFFFFFFFFFFFFF )
|
||||
#define i64_fromPosOverflow UINT64_C( 0x7FFFFFFFFFFFFFFF )
|
||||
#define i64_fromNegOverflow (-UINT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
||||
#define i64_fromNaN UINT64_C( 0x7FFFFFFFFFFFFFFF )
|
||||
#define i64_fromPosOverflow (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
||||
#define i64_fromNegOverflow (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
||||
#define i64_fromNaN (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| "Common NaN" structure, used to transfer NaN representations from one format
|
||||
@@ -87,30 +88,30 @@ struct commonNaN {
|
||||
#define defaultNaNF16UI 0xFE00
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a
|
||||
| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
|
||||
| 16-bit floating-point signaling NaN.
|
||||
| Note: This macro evaluates its argument more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming `uiA' has the bit pattern of a 16-bit floating-point NaN, converts
|
||||
| Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Interpreting `uiA' and `uiB' as the bit patterns of two 16-bit floating-
|
||||
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
|
||||
| point values, at least one of which is a NaN, returns the bit pattern of
|
||||
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
||||
| the combined NaN result. If either 'uiA' or 'uiB' has the pattern of a
|
||||
| signaling NaN, the invalid exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast16_t
|
||||
@@ -122,30 +123,30 @@ uint_fast16_t
|
||||
#define defaultNaNF32UI 0xFFC00000
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when 32-bit unsigned integer `uiA' has the bit pattern of a
|
||||
| Returns true when 32-bit unsigned integer 'uiA' has the bit pattern of a
|
||||
| 32-bit floating-point signaling NaN.
|
||||
| Note: This macro evaluates its argument more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
|
||||
| Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
|
||||
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
|
||||
| point values, at least one of which is a NaN, returns the bit pattern of
|
||||
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
||||
| the combined NaN result. If either 'uiA' or 'uiB' has the pattern of a
|
||||
| signaling NaN, the invalid exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast32_t
|
||||
@@ -157,30 +158,30 @@ uint_fast32_t
|
||||
#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when 64-bit unsigned integer `uiA' has the bit pattern of a
|
||||
| Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
|
||||
| 64-bit floating-point signaling NaN.
|
||||
| Note: This macro evaluates its argument more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
|
||||
| Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
|
||||
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
|
||||
| point values, at least one of which is a NaN, returns the bit pattern of
|
||||
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
|
||||
| the combined NaN result. If either 'uiA' or 'uiB' has the pattern of a
|
||||
| signaling NaN, the invalid exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast64_t
|
||||
@@ -194,7 +195,7 @@ uint_fast64_t
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when the 80-bit unsigned integer formed from concatenating
|
||||
| 16-bit `uiA64' and 64-bit `uiA0' has the bit pattern of an 80-bit extended
|
||||
| 16-bit 'uiA64' and 64-bit 'uiA0' has the bit pattern of an 80-bit extended
|
||||
| floating-point signaling NaN.
|
||||
| Note: This macro evaluates its arguments more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
@@ -203,15 +204,15 @@ uint_fast64_t
|
||||
#ifdef SOFTFLOAT_FAST_INT64
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The following functions are needed only when `SOFTFLOAT_FAST_INT64' is
|
||||
| The following functions are needed only when 'SOFTFLOAT_FAST_INT64' is
|
||||
| defined.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
|
||||
| Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
|
||||
| has the bit pattern of an 80-bit extended floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
@@ -219,16 +220,16 @@ void
|
||||
uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into an 80-bit extended
|
||||
| Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
|
||||
| floating-point NaN, and returns the bit pattern of this value as an unsigned
|
||||
| integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Interpreting the unsigned integer formed from concatenating `uiA64' and
|
||||
| `uiA0' as an 80-bit extended floating-point value, and likewise interpreting
|
||||
| the unsigned integer formed from concatenating `uiB64' and `uiB0' as another
|
||||
| Interpreting the unsigned integer formed from concatenating 'uiA64' and
|
||||
| 'uiA0' as an 80-bit extended floating-point value, and likewise interpreting
|
||||
| the unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
|
||||
| 80-bit extended floating-point value, and assuming at least on of these
|
||||
| floating-point values is a NaN, returns the bit pattern of the combined NaN
|
||||
| result. If either original floating-point value is a signaling NaN, the
|
||||
@@ -250,17 +251,17 @@ struct uint128
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when the 128-bit unsigned integer formed from concatenating
|
||||
| 64-bit `uiA64' and 64-bit `uiA0' has the bit pattern of a 128-bit floating-
|
||||
| 64-bit 'uiA64' and 64-bit 'uiA0' has the bit pattern of a 128-bit floating-
|
||||
| point signaling NaN.
|
||||
| Note: This macro evaluates its arguments more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
|
||||
| Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
|
||||
| has the bit pattern of a 128-bit floating-point NaN, converts this NaN to
|
||||
| the common NaN form, and stores the resulting common NaN at the location
|
||||
| pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid exception
|
||||
| pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid exception
|
||||
| is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
@@ -268,15 +269,15 @@ void
|
||||
uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into a 128-bit floating-point
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Interpreting the unsigned integer formed from concatenating `uiA64' and
|
||||
| `uiA0' as a 128-bit floating-point value, and likewise interpreting the
|
||||
| unsigned integer formed from concatenating `uiB64' and `uiB0' as another
|
||||
| Interpreting the unsigned integer formed from concatenating 'uiA64' and
|
||||
| 'uiA0' as a 128-bit floating-point value, and likewise interpreting the
|
||||
| unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
|
||||
| 128-bit floating-point value, and assuming at least on of these floating-
|
||||
| point values is a NaN, returns the bit pattern of the combined NaN result.
|
||||
| If either original floating-point value is a signaling NaN, the invalid
|
||||
@@ -293,14 +294,14 @@ struct uint128
|
||||
#else
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The following functions are needed only when `SOFTFLOAT_FAST_INT64' is not
|
||||
| The following functions are needed only when 'SOFTFLOAT_FAST_INT64' is not
|
||||
| defined.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming the 80-bit extended floating-point value pointed to by `aSPtr' is
|
||||
| Assuming the 80-bit extended floating-point value pointed to by 'aSPtr' is
|
||||
| a NaN, converts this NaN to the common NaN form, and stores the resulting
|
||||
| common NaN at the location pointed to by `zPtr'. If the NaN is a signaling
|
||||
| common NaN at the location pointed to by 'zPtr'. If the NaN is a signaling
|
||||
| NaN, the invalid exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
@@ -308,9 +309,9 @@ void
|
||||
const struct extFloat80M *aSPtr, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into an 80-bit extended
|
||||
| Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
|
||||
| floating-point NaN, and stores this NaN at the location pointed to by
|
||||
| `zSPtr'.
|
||||
| 'zSPtr'.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
softfloat_commonNaNToExtF80M(
|
||||
@@ -318,8 +319,8 @@ void
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming at least one of the two 80-bit extended floating-point values
|
||||
| pointed to by `aSPtr' and `bSPtr' is a NaN, stores the combined NaN result
|
||||
| at the location pointed to by `zSPtr'. If either original floating-point
|
||||
| pointed to by 'aSPtr' and 'bSPtr' is a NaN, stores the combined NaN result
|
||||
| at the location pointed to by 'zSPtr'. If either original floating-point
|
||||
| value is a signaling NaN, the invalid exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
@@ -338,10 +339,10 @@ void
|
||||
#define defaultNaNF128UI0 0
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming the 128-bit floating-point value pointed to by `aWPtr' is a NaN,
|
||||
| Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
|
||||
| converts this NaN to the common NaN form, and stores the resulting common
|
||||
| NaN at the location pointed to by `zPtr'. If the NaN is a signaling NaN,
|
||||
| the invalid exception is raised. Argument `aWPtr' points to an array of
|
||||
| NaN at the location pointed to by 'zPtr'. If the NaN is a signaling NaN,
|
||||
| the invalid exception is raised. Argument 'aWPtr' points to an array of
|
||||
| four 32-bit elements that concatenate in the platform's normal endian order
|
||||
| to form a 128-bit floating-point value.
|
||||
*----------------------------------------------------------------------------*/
|
||||
@@ -349,9 +350,9 @@ void
|
||||
softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into a 128-bit floating-point
|
||||
| NaN, and stores this NaN at the location pointed to by `zWPtr'. Argument
|
||||
| `zWPtr' points to an array of four 32-bit elements that concatenate in the
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
|
||||
| NaN, and stores this NaN at the location pointed to by 'zWPtr'. Argument
|
||||
| 'zWPtr' points to an array of four 32-bit elements that concatenate in the
|
||||
| platform's normal endian order to form a 128-bit floating-point value.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
@@ -359,10 +360,10 @@ void
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming at least one of the two 128-bit floating-point values pointed to by
|
||||
| `aWPtr' and `bWPtr' is a NaN, stores the combined NaN result at the location
|
||||
| pointed to by `zWPtr'. If either original floating-point value is a
|
||||
| signaling NaN, the invalid exception is raised. Each of `aWPtr', `bWPtr',
|
||||
| and `zWPtr' points to an array of four 32-bit elements that concatenate in
|
||||
| 'aWPtr' and 'bWPtr' is a NaN, stores the combined NaN result at the location
|
||||
| pointed to by 'zWPtr'. If either original floating-point value is a
|
||||
| signaling NaN, the invalid exception is raised. Each of 'aWPtr', 'bWPtr',
|
||||
| and 'zWPtr' points to an array of four 32-bit elements that concatenate in
|
||||
| the platform's normal endian order to form a 128-bit floating-point value.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
@@ -2,7 +2,7 @@
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3d, by John R. Hauser.
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user