Compare commits
2000 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e242f6a609 | ||
|
|
f4a0658585 | ||
|
|
fe524a1fa5 | ||
|
|
cc490f0026 | ||
|
|
af3263d7a8 | ||
|
|
e7d9d00ac8 | ||
|
|
1c26c2f4d5 | ||
|
|
589201b104 | ||
|
|
dcfd15a7f0 | ||
|
|
631851f8b5 | ||
|
|
9485043b3c | ||
|
|
7b204649e3 | ||
|
|
2e562a5f36 | ||
|
|
839492d0e8 | ||
|
|
eafb8e8572 | ||
|
|
0e6c18c820 | ||
|
|
4241cd666d | ||
|
|
877036e7ef | ||
|
|
32c91ad892 | ||
|
|
422269ea6e | ||
|
|
c5509a07ca | ||
|
|
004c383292 | ||
|
|
8a7737eef4 | ||
|
|
6d048aa3bf | ||
|
|
a155f2973b | ||
|
|
a170a64776 | ||
|
|
af01c6e2bf | ||
|
|
f955844435 | ||
|
|
c84548e71d | ||
|
|
3c1f9baff1 | ||
|
|
5c15c5fc48 | ||
|
|
2e27407161 | ||
|
|
9e5cd43e6d | ||
|
|
0ca12ded2f | ||
|
|
7c5e3e1f8e | ||
|
|
5a21d3dce0 | ||
|
|
44f2ee101f | ||
|
|
073f7ebb0e | ||
|
|
9c8dfadbb1 | ||
|
|
f8fe517d12 | ||
|
|
492821781d | ||
|
|
f9bf04c38b | ||
|
|
287f5cce50 | ||
|
|
e3d8cae35a | ||
|
|
6d23619cce | ||
|
|
f418a943cc | ||
|
|
d4cb373024 | ||
|
|
69c876cc87 | ||
|
|
15301504e2 | ||
|
|
5eeef1f5ed | ||
|
|
6dd93ee5d9 | ||
|
|
21328e0036 | ||
|
|
345f8db1c4 | ||
|
|
0ab7cfa023 | ||
|
|
c1af360532 | ||
|
|
84b963cb1b | ||
|
|
148fe2e999 | ||
|
|
275b4100c0 | ||
|
|
4b2719b51d | ||
|
|
8c77c5705f | ||
|
|
5fd3af9dc6 | ||
|
|
1fc2019031 | ||
|
|
93ff5024a4 | ||
|
|
d353d5aef8 | ||
|
|
68c1d05917 | ||
|
|
13645585fe | ||
|
|
8f0618a5b1 | ||
|
|
d7492b2c22 | ||
|
|
bfcfaaf5bd | ||
|
|
15e59eb142 | ||
|
|
b16229da1d | ||
|
|
78a9a465a3 | ||
|
|
6c71e9a54d | ||
|
|
cf9200b815 | ||
|
|
4c6f1e614a | ||
|
|
9c9eefc841 | ||
|
|
dd5b2d1b04 | ||
|
|
a2abdb185f | ||
|
|
780e567446 | ||
|
|
3f776af3fa | ||
|
|
639c381128 | ||
|
|
82af31ce36 | ||
|
|
5e39328542 | ||
|
|
3d38feded9 | ||
|
|
1e03cf1739 | ||
|
|
c06a61e9bf | ||
|
|
7c3636aaa3 | ||
|
|
9ac9633b10 | ||
|
|
ac0cda8df8 | ||
|
|
22e39e1e5a | ||
|
|
77fd147b26 | ||
|
|
d332311e53 | ||
|
|
e3f0ba4984 | ||
|
|
6d0a122816 | ||
|
|
e70c543bc4 | ||
|
|
afe6316d32 | ||
|
|
0dbbc91bc9 | ||
|
|
869475c110 | ||
|
|
a757533386 | ||
|
|
178d69191b | ||
|
|
0cfd019377 | ||
|
|
3a49d115cf | ||
|
|
1caa48c2df | ||
|
|
7bd8a2695b | ||
|
|
54f7d58722 | ||
|
|
ff0b7fe29a | ||
|
|
014cc60a72 | ||
|
|
ee263a15cc | ||
|
|
25466ffb71 | ||
|
|
7e9f25dd18 | ||
|
|
6b7f3d01ae | ||
|
|
04dc5cdaca | ||
|
|
1a4dcf10fe | ||
|
|
a1132ffe0f | ||
|
|
9227315bf2 | ||
|
|
15c67d2d50 | ||
|
|
4af844732a | ||
|
|
7dd3c3814d | ||
|
|
dd1338b0e6 | ||
|
|
c4f96ea745 | ||
|
|
67735c6f15 | ||
|
|
a6bf37f8ca | ||
|
|
52f4e934a9 | ||
|
|
5ee5933ade | ||
|
|
c7f7089392 | ||
|
|
e077d765fe | ||
|
|
54e470f936 | ||
|
|
32be6e9b2a | ||
|
|
c9474faa4e | ||
|
|
fbe5737c84 | ||
|
|
c0bdcc7417 | ||
|
|
173fc842c4 | ||
|
|
7c9f7b72c5 | ||
|
|
859b0aee1e | ||
|
|
d80a5c9a79 | ||
|
|
2005898689 | ||
|
|
9c169f3cf7 | ||
|
|
9017efee22 | ||
|
|
85534a26c6 | ||
|
|
f2186e5fa7 | ||
|
|
09a1162af5 | ||
|
|
b18af37c57 | ||
|
|
7505529e44 | ||
|
|
743b2e4afc | ||
|
|
421ca1523f | ||
|
|
c528c00900 | ||
|
|
04d7b565f7 | ||
|
|
be6cccb3a5 | ||
|
|
a9a925e500 | ||
|
|
a375bd0d9f | ||
|
|
29923efb95 | ||
|
|
b4d5d4d174 | ||
|
|
15d30b967a | ||
|
|
97c9f61db4 | ||
|
|
d1855a0e93 | ||
|
|
2d4b95900e | ||
|
|
6632d85e5f | ||
|
|
1d8b8ad687 | ||
|
|
8400163e02 | ||
|
|
1f5c7ff4d7 | ||
|
|
3500d32db5 | ||
|
|
db882e5d63 | ||
|
|
a3d384e593 | ||
|
|
cc17b662e4 | ||
|
|
b517bea734 | ||
|
|
768d1fc539 | ||
|
|
ffb3b1576b | ||
|
|
c87a576cb5 | ||
|
|
ba7836ea48 | ||
|
|
a76a72469b | ||
|
|
3e94650ef7 | ||
|
|
9a123697e3 | ||
|
|
ac3cf0775f | ||
|
|
ef2b8d4574 | ||
|
|
b35c74ea4c | ||
|
|
2bf1b6840d | ||
|
|
869167fc6d | ||
|
|
cbb3f1d76c | ||
|
|
b00007056d | ||
|
|
68db9d5074 | ||
|
|
f27d82fe90 | ||
|
|
36828a2e6a | ||
|
|
ff4591f0e6 | ||
|
|
dbde8254d0 | ||
|
|
2bd2a8ea34 | ||
|
|
8b50d10a84 | ||
|
|
bc88ef2dc3 | ||
|
|
e82cd53df4 | ||
|
|
a11e73bee2 | ||
|
|
3f273479f8 | ||
|
|
2a9329c998 | ||
|
|
95636c7e5f | ||
|
|
92f7474359 | ||
|
|
d5968086fe | ||
|
|
8558caecb5 | ||
|
|
3eb89ee4db | ||
|
|
0d8412d9f0 | ||
|
|
ab387bb4c7 | ||
|
|
832caefc2a | ||
|
|
4cd50865bf | ||
|
|
98dc943c07 | ||
|
|
fbd9bac5e7 | ||
|
|
86e55567b4 | ||
|
|
78a110cda5 | ||
|
|
67cb299f7d | ||
|
|
528e3b43a6 | ||
|
|
d1752fbdc0 | ||
|
|
48d3fbef5c | ||
|
|
4bf54f3010 | ||
|
|
1a5c3e4501 | ||
|
|
19004cd5db | ||
|
|
7a633f472d | ||
|
|
454b2362ee | ||
|
|
9e6f53dd58 | ||
|
|
d4474e195e | ||
|
|
6ddbd345aa | ||
|
|
e036f65ac0 | ||
|
|
b36b93fb3e | ||
|
|
763845f95c | ||
|
|
38399941d4 | ||
|
|
a7527389cc | ||
|
|
99170aa13d | ||
|
|
72185e7dd3 | ||
|
|
44f908d2e6 | ||
|
|
96117e20cc | ||
|
|
65b89f598c | ||
|
|
686663239a | ||
|
|
f1b71053de | ||
|
|
833477abf5 | ||
|
|
41723f842c | ||
|
|
9de0f900e1 | ||
|
|
b65cca37ce | ||
|
|
901b5c1566 | ||
|
|
09cc1dc660 | ||
|
|
87eb95f816 | ||
|
|
444edd9aed | ||
|
|
048f506aa6 | ||
|
|
fb6d3859e8 | ||
|
|
4f2d49fd13 | ||
|
|
b92fac329e | ||
|
|
45d9d9f953 | ||
|
|
e2a9f2ef98 | ||
|
|
c48be3a742 | ||
|
|
ecc5464024 | ||
|
|
009e90f446 | ||
|
|
2f2215c9f4 | ||
|
|
526d8425ab | ||
|
|
68e2794e15 | ||
|
|
6a3fad1d59 | ||
|
|
0839ed1f94 | ||
|
|
f7f11e237c | ||
|
|
8047f0eae2 | ||
|
|
cce14f92fc | ||
|
|
20810e0a79 | ||
|
|
95dbba046d | ||
|
|
7109035b78 | ||
|
|
526338b00f | ||
|
|
56a53021a1 | ||
|
|
2cce171448 | ||
|
|
61c0c6d502 | ||
|
|
4003cd4747 | ||
|
|
815950996d | ||
|
|
8aacfc8465 | ||
|
|
02f5a9fa62 | ||
|
|
b95ff12f2f | ||
|
|
ac36f98e72 | ||
|
|
32901926f0 | ||
|
|
4e7c255e4d | ||
|
|
bf1f91595d | ||
|
|
6b31b178a6 | ||
|
|
95e197667e | ||
|
|
0a918aaa14 | ||
|
|
05f9b14fc2 | ||
|
|
3a02ba9b82 | ||
|
|
3173c90f14 | ||
|
|
6c064cfd88 | ||
|
|
2f7f7d815d | ||
|
|
5c1ec20c9a | ||
|
|
327482c3a4 | ||
|
|
68dcdf1c86 | ||
|
|
353419f82d | ||
|
|
8f96553be8 | ||
|
|
e95345b3dc | ||
|
|
4b68ef45af | ||
|
|
5aeb3217ee | ||
|
|
87b10400c2 | ||
|
|
3d780cf2ef | ||
|
|
3dd1026c8b | ||
|
|
02ba4b1678 | ||
|
|
478db39866 | ||
|
|
b2917e6be0 | ||
|
|
ea1b21dbdb | ||
|
|
51852d2587 | ||
|
|
bda5539e9d | ||
|
|
937b822fa9 | ||
|
|
b023db2e82 | ||
|
|
7e7e59d881 | ||
|
|
dd4b13ac03 | ||
|
|
302936309a | ||
|
|
9e9dce76ff | ||
|
|
820bf054ea | ||
|
|
b8ce8f219c | ||
|
|
3ee1b60edf | ||
|
|
bb93886791 | ||
|
|
53b18b0791 | ||
|
|
1da93caced | ||
|
|
4c95b2f9d1 | ||
|
|
fa955f0024 | ||
|
|
52471f6221 | ||
|
|
6e55f61581 | ||
|
|
65497121f4 | ||
|
|
64a71be5c3 | ||
|
|
c4b9466da7 | ||
|
|
598e80957e | ||
|
|
0df485d4dc | ||
|
|
d40f3fac74 | ||
|
|
23af36c54f | ||
|
|
26a842c264 | ||
|
|
b219feb3f1 | ||
|
|
c63ec9886a | ||
|
|
8b456927be | ||
|
|
d927f347de | ||
|
|
ac12f0df71 | ||
|
|
60955feab8 | ||
|
|
5cbfe392be | ||
|
|
a583beb76c | ||
|
|
034363a86c | ||
|
|
bbbb26f4d3 | ||
|
|
86b512c5cd | ||
|
|
dcf3869acd | ||
|
|
5d2abf4402 | ||
|
|
cb0ef3ad4c | ||
|
|
1a28f09684 | ||
|
|
fd50a6896b | ||
|
|
2c9ed664dd | ||
|
|
97be8debab | ||
|
|
c02ed80512 | ||
|
|
24d74cbf44 | ||
|
|
d2dd29e80c | ||
|
|
0a3ae9dc6e | ||
|
|
647fd0f4f1 | ||
|
|
72bac72338 | ||
|
|
79d77faebf | ||
|
|
641066d82e | ||
|
|
d2f5e57b68 | ||
|
|
63a23e848a | ||
|
|
387fab60a6 | ||
|
|
c420b234cc | ||
|
|
aa232089f2 | ||
|
|
6cf248ec08 | ||
|
|
7a2401ef1e | ||
|
|
ee68f28bba | ||
|
|
c4d31c8323 | ||
|
|
cfb29f18e4 | ||
|
|
b48948d6e8 | ||
|
|
a25824e033 | ||
|
|
86d1cc8e2f | ||
|
|
2680f9ab48 | ||
|
|
c5f1925bc8 | ||
|
|
5dfcd09e49 | ||
|
|
9bd8b01650 | ||
|
|
c66c6304f9 | ||
|
|
dcaaa241df | ||
|
|
c2a08d7c51 | ||
|
|
298abbcff8 | ||
|
|
7f6e97cb26 | ||
|
|
65140b2fba | ||
|
|
951124e177 | ||
|
|
821805aa92 | ||
|
|
fb05b96492 | ||
|
|
895f262a55 | ||
|
|
44fd3045ce | ||
|
|
9ecbabfc4c | ||
|
|
729f2aceb0 | ||
|
|
cbca434cf0 | ||
|
|
782043e2e6 | ||
|
|
dde7eb45c5 | ||
|
|
c44653f40f | ||
|
|
22fd359e2c | ||
|
|
432b7685bf | ||
|
|
96a94e7da9 | ||
|
|
ac0a87d58d | ||
|
|
9b890d7067 | ||
|
|
9bed23f8b7 | ||
|
|
915e321a23 | ||
|
|
5de92425d5 | ||
|
|
ff4a03f351 | ||
|
|
51955a5ca2 | ||
|
|
9b3cebcdb9 | ||
|
|
3241ada468 | ||
|
|
79792a32e1 | ||
|
|
bc032a89cc | ||
|
|
9fe140abad | ||
|
|
a3705b4251 | ||
|
|
efec3a0e34 | ||
|
|
b4ff464d39 | ||
|
|
244a7fdafb | ||
|
|
29057e5511 | ||
|
|
d3f628907a | ||
|
|
b3cd65d56e | ||
|
|
5e5685c117 | ||
|
|
ca1b356337 | ||
|
|
92cb330e16 | ||
|
|
13ec5db234 | ||
|
|
c546f750f1 | ||
|
|
104bdb03d6 | ||
|
|
a8ea236095 | ||
|
|
fcf53b31fc | ||
|
|
1f0040dd92 | ||
|
|
c1a3b0cb0a | ||
|
|
db362bec18 | ||
|
|
0ee6502562 | ||
|
|
81f463626a | ||
|
|
a2e5691228 | ||
|
|
9366a58bdd | ||
|
|
da5f3d5c4c | ||
|
|
cd18186715 | ||
|
|
a9ea22d4f9 | ||
|
|
e79c913cbc | ||
|
|
e3ae2cfb52 | ||
|
|
e66f538972 | ||
|
|
de949b72c7 | ||
|
|
f804310d9f | ||
|
|
058bfb254c | ||
|
|
0db33e9c86 | ||
|
|
3c8d4e04ea | ||
|
|
a870228ab4 | ||
|
|
cc45527333 | ||
|
|
5d4a02c350 | ||
|
|
cfe03c764d | ||
|
|
c91c781952 | ||
|
|
6fd6bc94f5 | ||
|
|
09304aab77 | ||
|
|
608ff52dc3 | ||
|
|
f884381c60 | ||
|
|
f36faa32c4 | ||
|
|
3ce0ea884f | ||
|
|
0d79e03816 | ||
|
|
05456eb275 | ||
|
|
dd272d1316 | ||
|
|
0ba2bc38d7 | ||
|
|
60cda3713f | ||
|
|
c6f9a4c044 | ||
|
|
6fed777637 | ||
|
|
f0c049d02b | ||
|
|
e5beca886d | ||
|
|
e491c38189 | ||
|
|
341bd0dfa4 | ||
|
|
0b7a9c0722 | ||
|
|
02c5bda704 | ||
|
|
442e244b4d | ||
|
|
10764ee0e6 | ||
|
|
7113f109a4 | ||
|
|
b3f4182ca1 | ||
|
|
2cbad364c1 | ||
|
|
fd575fe1f3 | ||
|
|
84195467ad | ||
|
|
2257660916 | ||
|
|
95f45cfc34 | ||
|
|
02713e8d8a | ||
|
|
adefd1a52b | ||
|
|
2ea08561cf | ||
|
|
1d4a94b635 | ||
|
|
29e19ace36 | ||
|
|
74c80d2c7f | ||
|
|
0046551852 | ||
|
|
dd9728c5a0 | ||
|
|
10bdf73a02 | ||
|
|
72599d420b | ||
|
|
2614ef056a | ||
|
|
7dbbddf2a6 | ||
|
|
5a919dd82d | ||
|
|
10d2f08d37 | ||
|
|
d767fae47e | ||
|
|
93e78ee722 | ||
|
|
99153ac0aa | ||
|
|
d53fae3551 | ||
|
|
07b6a3d335 | ||
|
|
bbd293355b | ||
|
|
20f286f22a | ||
|
|
f72f46e912 | ||
|
|
0a32f80d9a | ||
|
|
4d9964a457 | ||
|
|
44292721bf | ||
|
|
bb1b796711 | ||
|
|
2ec9a11646 | ||
|
|
c5c053b6fd | ||
|
|
bc411af4ff | ||
|
|
df574ccf86 | ||
|
|
501dd5f284 | ||
|
|
460c266216 | ||
|
|
8062afcb31 | ||
|
|
7ef110b484 | ||
|
|
58c5f94a99 | ||
|
|
1f4c7d5ebf | ||
|
|
f5a67dba08 | ||
|
|
33fbd8c1d3 | ||
|
|
d9fc149752 | ||
|
|
0a880d5e60 | ||
|
|
3908b4fdee | ||
|
|
0736e6aa34 | ||
|
|
a9f0681f85 | ||
|
|
1d85b588ea | ||
|
|
7f1a550760 | ||
|
|
bd1c55d2c2 | ||
|
|
aa3b41247f | ||
|
|
3e4a3fa5b7 | ||
|
|
fd3a41dadc | ||
|
|
c393a399fb | ||
|
|
cd488c9da5 | ||
|
|
a8a1b5af07 | ||
|
|
b7be082bd9 | ||
|
|
843529d234 | ||
|
|
cbfe9a4077 | ||
|
|
6394f7e9a3 | ||
|
|
a9ab528e34 | ||
|
|
ecf8da00c5 | ||
|
|
1a7cf4cbce | ||
|
|
3cbf59b4c1 | ||
|
|
d1a60243c9 | ||
|
|
69e3b4e7dc | ||
|
|
3bb00eac37 | ||
|
|
97bfeac13f | ||
|
|
0fa24b6b75 | ||
|
|
9b56efc957 | ||
|
|
558b0b8791 | ||
|
|
d3ce9d0643 | ||
|
|
e9a03cccf3 | ||
|
|
363f4facea | ||
|
|
2255f275a0 | ||
|
|
4d920cee6e | ||
|
|
28c3d4809b | ||
|
|
69e50ad2f5 | ||
|
|
278829fc2c | ||
|
|
91636f1e8c | ||
|
|
c021a44567 | ||
|
|
8be6c98ca6 | ||
|
|
29c756abba | ||
|
|
bf441ed244 | ||
|
|
ed3181f029 | ||
|
|
f78d4ed30c | ||
|
|
722b9b9e59 | ||
|
|
2a719ee6c5 | ||
|
|
b44332f5a6 | ||
|
|
a0c1498e65 | ||
|
|
317ed57cb1 | ||
|
|
5f1aa3505d | ||
|
|
e78b1b810f | ||
|
|
fe98a2da70 | ||
|
|
a1cafa650d | ||
|
|
c87102c304 | ||
|
|
171f33b961 | ||
|
|
860d3da915 | ||
|
|
5354d1f5fc | ||
|
|
ac096c2949 | ||
|
|
69e60e351b | ||
|
|
687bd92f9c | ||
|
|
ce11d6d16c | ||
|
|
30c4add85a | ||
|
|
9751a0ae04 | ||
|
|
9bdcd2a495 | ||
|
|
5954c94d20 | ||
|
|
3f30897fdc | ||
|
|
3aaf814b9d | ||
|
|
9b054e73f6 | ||
|
|
c6c49389eb | ||
|
|
c620a1fe3d | ||
|
|
da3acacc14 | ||
|
|
8197a14ceb | ||
|
|
574e31f0a0 | ||
|
|
8fba0a6ae8 | ||
|
|
cfaebb20d8 | ||
|
|
0ce6934e26 | ||
|
|
696ef0bc03 | ||
|
|
28f9230b40 | ||
|
|
b5cfbfd84e | ||
|
|
1b82a9defc | ||
|
|
b6eb404831 | ||
|
|
ccef60a640 | ||
|
|
10cc49db1c | ||
|
|
c89aac85c4 | ||
|
|
a2834d48b9 | ||
|
|
1a1534ecb5 | ||
|
|
3f4d0ecd7e | ||
|
|
0ac1b83885 | ||
|
|
9462852433 | ||
|
|
caa0085057 | ||
|
|
05f1ea33d2 | ||
|
|
2ee67b7642 | ||
|
|
9eb51e20ed | ||
|
|
42ba06133a | ||
|
|
a0c564d762 | ||
|
|
3ba451778f | ||
|
|
82e9190d09 | ||
|
|
04d3da4bd1 | ||
|
|
50d70d5f49 | ||
|
|
410b4d9bdf | ||
|
|
ced3aae3b2 | ||
|
|
c15a6fa9d0 | ||
|
|
57f36c4201 | ||
|
|
eb326e1553 | ||
|
|
d8295c1889 | ||
|
|
e19f0b5d9c | ||
|
|
4ad4cd2654 | ||
|
|
1cf7511dc9 | ||
|
|
6d793c0ea3 | ||
|
|
0e9fef78dd | ||
|
|
1a5bd88881 | ||
|
|
b5d07297de | ||
|
|
9cff23dbf9 | ||
|
|
9395162a7c | ||
|
|
8c39cdc89f | ||
|
|
1d18688628 | ||
|
|
28821b5f31 | ||
|
|
4f32b86142 | ||
|
|
291afcf75a | ||
|
|
27fc49f72c | ||
|
|
9665cfe027 | ||
|
|
1eda86e1ad | ||
|
|
6e1425e312 | ||
|
|
06e8c2e519 | ||
|
|
bd282d6cca | ||
|
|
22b7312460 | ||
|
|
35463526cc | ||
|
|
2da999372a | ||
|
|
96a6bc57d2 | ||
|
|
a3f55aaf34 | ||
|
|
2759c7951d | ||
|
|
0206b76351 | ||
|
|
e833a5a24c | ||
|
|
4c0e280d6d | ||
|
|
b182151de5 | ||
|
|
055e0fef4e | ||
|
|
ecd5e60be9 | ||
|
|
42033ea3ca | ||
|
|
3f4b77f561 | ||
|
|
01bd5c46e1 | ||
|
|
616fe798c8 | ||
|
|
887c97742f | ||
|
|
951512f5ae | ||
|
|
9f48b2ab48 | ||
|
|
814a34f263 | ||
|
|
53fef94b9f | ||
|
|
e19fc4a0a3 | ||
|
|
cb7bdc2da1 | ||
|
|
61ebfe6603 | ||
|
|
c32b2e45ef | ||
|
|
379950f81d | ||
|
|
25bbb1a8ff | ||
|
|
30cfc0ab2c | ||
|
|
be36179064 | ||
|
|
ca444e6191 | ||
|
|
d293f1a0ed | ||
|
|
1abc925292 | ||
|
|
f36b095b5f | ||
|
|
30b75ae353 | ||
|
|
a36d7b6131 | ||
|
|
27b02413dc | ||
|
|
61df5bc142 | ||
|
|
a3ab4325fd | ||
|
|
03f66825d6 | ||
|
|
f1c56f7f22 | ||
|
|
0874a5ba77 | ||
|
|
58b1692182 | ||
|
|
4c3f27ce1e | ||
|
|
b1128b18d5 | ||
|
|
3ec38b2494 | ||
|
|
4a35d7eeeb | ||
|
|
2fa588e81d | ||
|
|
19961c50e4 | ||
|
|
6f88ecc9b6 | ||
|
|
3e94347e61 | ||
|
|
1b4bae6d69 | ||
|
|
440c1d52b4 | ||
|
|
fd75e73ee9 | ||
|
|
1f45075a0e | ||
|
|
0ebc7b66e6 | ||
|
|
4de60dde6e | ||
|
|
11ca38a4e9 | ||
|
|
af95e15572 | ||
|
|
8e714289ca | ||
|
|
3290e72833 | ||
|
|
8866bef92c | ||
|
|
be2a4c42bd | ||
|
|
459d72f873 | ||
|
|
5f38d6e2e9 | ||
|
|
47dd1049c8 | ||
|
|
f1207a8e74 | ||
|
|
eeda1a1396 | ||
|
|
0ab4afbf42 | ||
|
|
f50c0c664f | ||
|
|
eb6a8e6a3b | ||
|
|
4eca75c53b | ||
|
|
85f928f8bf | ||
|
|
e891f9cd9d | ||
|
|
6bd8610063 | ||
|
|
4b46af4810 | ||
|
|
71db8df548 | ||
|
|
457c0f0a7e | ||
|
|
55193cb13b | ||
|
|
7c99c30bf4 | ||
|
|
42db807f37 | ||
|
|
ee525c92a4 | ||
|
|
c7804277bf | ||
|
|
0b92d689d0 | ||
|
|
85422d7aea | ||
|
|
9f2324389d | ||
|
|
811539f8ee | ||
|
|
1392313236 | ||
|
|
a3ddd0826b | ||
|
|
dd62737266 | ||
|
|
626b73e8be | ||
|
|
a430853a48 | ||
|
|
1aafbae5be | ||
|
|
5d705fc6e3 | ||
|
|
c757984879 | ||
|
|
1ca90b5856 | ||
|
|
cd4676a233 | ||
|
|
1aa93808b1 | ||
|
|
4ce36a6475 | ||
|
|
c09c3902c4 | ||
|
|
48985a7e68 | ||
|
|
8fd7cc11e1 | ||
|
|
d49d6f0cde | ||
|
|
7151d72532 | ||
|
|
c7057bd25b | ||
|
|
0456822892 | ||
|
|
e6b69151c0 | ||
|
|
4210f1f6a0 | ||
|
|
d52ef95f77 | ||
|
|
92a36040b1 | ||
|
|
906ed059ce | ||
|
|
431fda4141 | ||
|
|
e5956f23ca | ||
|
|
74ccf56a4b | ||
|
|
3c12ba7180 | ||
|
|
7912061226 | ||
|
|
06a26f0965 | ||
|
|
751518787a | ||
|
|
3ee4d23ebd | ||
|
|
eae9634ac9 | ||
|
|
472b7ef7e6 | ||
|
|
e311cd562b | ||
|
|
59b3dc8907 | ||
|
|
a7d59086b4 | ||
|
|
c529b814ee | ||
|
|
f47655eb6d | ||
|
|
65d04cbeb4 | ||
|
|
48de57d824 | ||
|
|
b3a3e2094e | ||
|
|
84a700f972 | ||
|
|
32dd98b19f | ||
|
|
f0697c28f8 | ||
|
|
cdf1e366f9 | ||
|
|
6943cefebf | ||
|
|
2219cc0612 | ||
|
|
4ec09ac243 | ||
|
|
a369d69c51 | ||
|
|
fc87f6e417 | ||
|
|
e1f56c9af6 | ||
|
|
41e6c664d8 | ||
|
|
8dd24796c4 | ||
|
|
9110140514 | ||
|
|
86adc1ef39 | ||
|
|
13d3255e2a | ||
|
|
fdd9cf0928 | ||
|
|
259413251d | ||
|
|
7580e39b38 | ||
|
|
3dd9af9948 | ||
|
|
0a18d53c3d | ||
|
|
5252566137 | ||
|
|
03c16c6c54 | ||
|
|
854f90aa30 | ||
|
|
dc8bda7e02 | ||
|
|
0a95b0f1ff | ||
|
|
77678b2cbc | ||
|
|
ec1b6f6673 | ||
|
|
d464b25322 | ||
|
|
7a9635555b | ||
|
|
670a0a3eed | ||
|
|
fc6446702e | ||
|
|
e0092ee4a5 | ||
|
|
9046b5eac0 | ||
|
|
6edd81109d | ||
|
|
1a9d2f3aae | ||
|
|
39fa313ad8 | ||
|
|
bf3d1c1aab | ||
|
|
ffb089a9f5 | ||
|
|
f0b6dac1f2 | ||
|
|
b65203f573 | ||
|
|
688ff2830d | ||
|
|
31aefa6a21 | ||
|
|
b11c5d8f82 | ||
|
|
4fc601895b | ||
|
|
e7f141b376 | ||
|
|
f389e53735 | ||
|
|
212449bc23 | ||
|
|
d3693dca73 | ||
|
|
76c8efd56c | ||
|
|
bd13e757e7 | ||
|
|
0ccc186869 | ||
|
|
bbb565a21e | ||
|
|
652f4bdf62 | ||
|
|
ef7f69d14a | ||
|
|
ba5eeea1e5 | ||
|
|
7a09482536 | ||
|
|
7dd18294b7 | ||
|
|
677eaf29b1 | ||
|
|
a8146ade2a | ||
|
|
236c680f6b | ||
|
|
02cb220faf | ||
|
|
e53b683bd3 | ||
|
|
32e0dfd4f0 | ||
|
|
d21a1922eb | ||
|
|
8dfa66fee3 | ||
|
|
11e7e03139 | ||
|
|
58ce79f935 | ||
|
|
96164ce613 | ||
|
|
4c273126df | ||
|
|
b85b68a7fd | ||
|
|
e514454c0e | ||
|
|
f06bce5dda | ||
|
|
7b386ea242 | ||
|
|
a3d7a807b7 | ||
|
|
081072d3b6 | ||
|
|
08693411d2 | ||
|
|
3918e7699d | ||
|
|
e29d12d821 | ||
|
|
5f38a01ede | ||
|
|
4d13ab07de | ||
|
|
2a7c8c5b10 | ||
|
|
019217d7a2 | ||
|
|
2f614c42fe | ||
|
|
fcbb7426fa | ||
|
|
717ac85a5a | ||
|
|
a05acaf9fd | ||
|
|
d8699ae57e | ||
|
|
ea58f4a5a9 | ||
|
|
b082cd4580 | ||
|
|
84b1842026 | ||
|
|
93b51b0e40 | ||
|
|
2c96f19fd3 | ||
|
|
940a854448 | ||
|
|
fb001f5e90 | ||
|
|
8938c16f38 | ||
|
|
4e1d0a59fa | ||
|
|
8fc52a94f4 | ||
|
|
8174f972a7 | ||
|
|
15302e84a4 | ||
|
|
1b3aaacba2 | ||
|
|
2b3af4ef6b | ||
|
|
b0eebfa560 | ||
|
|
d172e3f3bb | ||
|
|
0c16cd2d0e | ||
|
|
cdf30c31ea | ||
|
|
cd325e408e | ||
|
|
cb042c8343 | ||
|
|
eda6898c5b | ||
|
|
530d175422 | ||
|
|
0d1b47362c | ||
|
|
77ec81b035 | ||
|
|
71badebd08 | ||
|
|
354ab1c5c8 | ||
|
|
530da36352 | ||
|
|
6c1fda3f99 | ||
|
|
fd13a75785 | ||
|
|
122a74724c | ||
|
|
3fed10883b | ||
|
|
afdfbc0367 | ||
|
|
b184ae5ca5 | ||
|
|
118d41ef83 | ||
|
|
349365d9a4 | ||
|
|
cabf7fa93b | ||
|
|
b8d4e05361 | ||
|
|
7e900d28be | ||
|
|
0bef1f9824 | ||
|
|
0ab888c639 | ||
|
|
85ca611af1 | ||
|
|
7fa97b752e | ||
|
|
8efb3f5e19 | ||
|
|
1f96a86676 | ||
|
|
c029f4bfc4 | ||
|
|
4405897cbd | ||
|
|
a630d3e851 | ||
|
|
56cb7f1740 | ||
|
|
3f302f8411 | ||
|
|
000c01a36a | ||
|
|
08f95d0c2f | ||
|
|
dfc3e11748 | ||
|
|
ca49b6f6b4 | ||
|
|
e6afea99a9 | ||
|
|
b74dda34b6 | ||
|
|
43085417be | ||
|
|
af7073b779 | ||
|
|
fa5b0ef54f | ||
|
|
938d791b23 | ||
|
|
54e887ed9e | ||
|
|
b132a17a74 | ||
|
|
4f4afe186d | ||
|
|
698c52e796 | ||
|
|
bfbe26734d | ||
|
|
83a7809478 | ||
|
|
c38b165db4 | ||
|
|
99fc2bd4dd | ||
|
|
b73307befb | ||
|
|
942d384831 | ||
|
|
b48d354600 | ||
|
|
37c6afa5b4 | ||
|
|
9ea0e4ca68 | ||
|
|
967bad43a0 | ||
|
|
4a3d689550 | ||
|
|
288fc3a8d3 | ||
|
|
ee5f9ffad0 | ||
|
|
5cfabdd493 | ||
|
|
492a214d4c | ||
|
|
3625df25d6 | ||
|
|
74b10c08d1 | ||
|
|
04bca58a3a | ||
|
|
abcd418451 | ||
|
|
86a352c45b | ||
|
|
05ecb49bac | ||
|
|
4c3aa09f2a | ||
|
|
548ddd1f0c | ||
|
|
7cdc9d98c7 | ||
|
|
911cbf57cd | ||
|
|
a6ae45145f | ||
|
|
4277762b74 | ||
|
|
277b9cf878 | ||
|
|
8c1872543c | ||
|
|
6e821078f6 | ||
|
|
284ab109c4 | ||
|
|
efa39c5343 | ||
|
|
c3ddf5069e | ||
|
|
4787127cf6 | ||
|
|
6928badd85 | ||
|
|
4438c5e09b | ||
|
|
bbae6267fe | ||
|
|
774b6ffe1e | ||
|
|
403e5239e3 | ||
|
|
bf21747a42 | ||
|
|
116914ab3e | ||
|
|
2a74aa2067 | ||
|
|
9b29c872ce | ||
|
|
670c9f9b74 | ||
|
|
ca27ce3bee | ||
|
|
0cb65b266a | ||
|
|
69ef6ae0f9 | ||
|
|
dc23350847 | ||
|
|
3b7aa80892 | ||
|
|
77a1a216d2 | ||
|
|
2f633452bb | ||
|
|
78ba3b8485 | ||
|
|
81007d0a4b | ||
|
|
7fdbaeca72 | ||
|
|
41e1cd185b | ||
|
|
d7b029995c | ||
|
|
4d6d2f1cd2 | ||
|
|
87c0060e81 | ||
|
|
0fc8885a8d | ||
|
|
eef21df94f | ||
|
|
8721eb68fc | ||
|
|
7e37d268c8 | ||
|
|
ef3111be23 | ||
|
|
f17472635e | ||
|
|
0afc6a9886 | ||
|
|
33fa87a9d8 | ||
|
|
b9e320dd52 | ||
|
|
aa2586de18 | ||
|
|
7337029ce1 | ||
|
|
e907c5cab9 | ||
|
|
adbb691f46 | ||
|
|
6fd14f23b5 | ||
|
|
131c133bb7 | ||
|
|
02c1b9df3b | ||
|
|
c186cd187e | ||
|
|
6f002e724b | ||
|
|
1a9403f38a | ||
|
|
86a428a4a5 | ||
|
|
6309121f70 | ||
|
|
1090b289ec | ||
|
|
849ea61fa1 | ||
|
|
57940837e7 | ||
|
|
7d239414f7 | ||
|
|
1b6e97355d | ||
|
|
255c0ef406 | ||
|
|
e1535ee0a9 | ||
|
|
ff1c4e1f13 | ||
|
|
ac4d55dec1 | ||
|
|
3a8dc4e90d | ||
|
|
a35b366eb6 | ||
|
|
7c822869fe | ||
|
|
61a726c290 | ||
|
|
4cc1008c2d | ||
|
|
3e61c45f89 | ||
|
|
eed49a2104 | ||
|
|
1d06915f27 | ||
|
|
7dc8d433ab | ||
|
|
37d3ef2835 | ||
|
|
47680cc0d8 | ||
|
|
76ab1d2b6c | ||
|
|
e14db23661 | ||
|
|
54987c3d8f | ||
|
|
0bf7ebcfea | ||
|
|
fd2cd38bdb | ||
|
|
39befc35a8 | ||
|
|
4e23fb7f06 | ||
|
|
f04015c080 | ||
|
|
a912c7d75f | ||
|
|
c53209a8a8 | ||
|
|
3235eb03f9 | ||
|
|
3fa0bed985 | ||
|
|
9543c0a7cc | ||
|
|
c03b9010db | ||
|
|
2387292f20 | ||
|
|
8c567d84f1 | ||
|
|
ad4ee47d9f | ||
|
|
a0e9f1e0c3 | ||
|
|
5e5eceb0de | ||
|
|
b7095912c7 | ||
|
|
f37e79e720 | ||
|
|
013f548202 | ||
|
|
66aa760f83 | ||
|
|
c76b0a845f | ||
|
|
b21bcbd775 | ||
|
|
6376d96824 | ||
|
|
bf8e419d2b | ||
|
|
abf90eaa67 | ||
|
|
a425420993 | ||
|
|
998e25a01e | ||
|
|
a344cb03bc | ||
|
|
0bb054e5e7 | ||
|
|
ec2a81a081 | ||
|
|
a10351b439 | ||
|
|
5d6e44b3f2 | ||
|
|
96ecb40259 | ||
|
|
4ac36d094c | ||
|
|
73bf897b5c | ||
|
|
d6f033b42d | ||
|
|
837166319d | ||
|
|
af73462da4 | ||
|
|
341f8c1e86 | ||
|
|
9ba400673d | ||
|
|
fba0347ec4 | ||
|
|
2fc34eaa58 | ||
|
|
61b0180596 | ||
|
|
3178528335 | ||
|
|
ea2596280f | ||
|
|
8f703f919f | ||
|
|
a2dadbc206 | ||
|
|
9041d0d37e | ||
|
|
884e32d5c3 | ||
|
|
4d0b660f4b | ||
|
|
fbbbee6b72 | ||
|
|
bbfff46146 | ||
|
|
4aa5d87ada | ||
|
|
f5977f68eb | ||
|
|
7a91e4736a | ||
|
|
bb56360bfa | ||
|
|
dd88d7deda | ||
|
|
bc160821d3 | ||
|
|
2606993cb4 | ||
|
|
d68aea4f35 | ||
|
|
07af6559d8 | ||
|
|
f6cbe9a9cc | ||
|
|
7270f35c93 | ||
|
|
27cbb44993 | ||
|
|
84391af7b8 | ||
|
|
1d998d5dce | ||
|
|
13076d5f22 | ||
|
|
778b931bf3 | ||
|
|
182a9fad2d | ||
|
|
09d7033d1d | ||
|
|
2d8553c853 | ||
|
|
189e8e97bd | ||
|
|
0e5fb035e3 | ||
|
|
e9309d3b13 | ||
|
|
ec2a3ed500 | ||
|
|
0501e066b5 | ||
|
|
fb88f5a0d2 | ||
|
|
d5e99cc05e | ||
|
|
7eab62325b | ||
|
|
15bf0c1541 | ||
|
|
89a4c373d3 | ||
|
|
e5175d432e | ||
|
|
d8ba1bc120 | ||
|
|
8503eff8c1 | ||
|
|
75328e3204 | ||
|
|
25dff91fa0 | ||
|
|
1c41f1ca62 | ||
|
|
371a3ad4bd | ||
|
|
7af6ed3f20 | ||
|
|
21767144fc | ||
|
|
da2af9c613 | ||
|
|
0dcadc61b4 | ||
|
|
98b88bb52f | ||
|
|
3010668390 | ||
|
|
a3e9ae8f74 | ||
|
|
a1083b019c | ||
|
|
c4840d78fb | ||
|
|
1098545e47 | ||
|
|
eef4bbb65f | ||
|
|
b229aff34a | ||
|
|
6e57243a79 | ||
|
|
cc35f085ca | ||
|
|
ca4053ba49 | ||
|
|
72bf9d90cc | ||
|
|
1b91478bff | ||
|
|
6b4f6ebd89 | ||
|
|
89eade0548 | ||
|
|
06909ceaab | ||
|
|
3c9b6f8cd5 | ||
|
|
fdebe38fa3 | ||
|
|
5c83d271a3 | ||
|
|
7cfe328a16 | ||
|
|
ca4341f7ba | ||
|
|
bf9cf28322 | ||
|
|
8b66dd8c7d | ||
|
|
c90f936eef | ||
|
|
f1f998e071 | ||
|
|
96ebd8b23b | ||
|
|
c7cb5c31e5 | ||
|
|
caefaf781e | ||
|
|
88724217dd | ||
|
|
1bc140964f | ||
|
|
6492763bef | ||
|
|
1c85050dad | ||
|
|
253ecd5c11 | ||
|
|
b9360640ce | ||
|
|
859b10d8bf | ||
|
|
a8d794215e | ||
|
|
b5459eb987 | ||
|
|
4a2bfec150 | ||
|
|
b7af9edb8a | ||
|
|
06614b3fa0 | ||
|
|
d2c672ab0c | ||
|
|
fa05cab01a | ||
|
|
0509414dfe | ||
|
|
4662fd4d92 | ||
|
|
30c5f3c441 | ||
|
|
1999f0daad | ||
|
|
fe9489ad63 | ||
|
|
03bec631bd | ||
|
|
a498993fd1 | ||
|
|
44c53c9979 | ||
|
|
b946982e90 | ||
|
|
7fe1c7c04f | ||
|
|
c5f088e52c | ||
|
|
0f652b4d80 | ||
|
|
9e701e951b | ||
|
|
d4572d1140 | ||
|
|
373b3586a1 | ||
|
|
fad54e62bb | ||
|
|
d35a6655e0 | ||
|
|
29e0e4088e | ||
|
|
7b2cb7e679 | ||
|
|
c43f77f109 | ||
|
|
206c0b8bdb | ||
|
|
803f0a295b | ||
|
|
0d8646d262 | ||
|
|
70f2bb03fd | ||
|
|
ce68dda4b6 | ||
|
|
2b86ffe34a | ||
|
|
ed1b028276 | ||
|
|
5b584e06e3 | ||
|
|
e48e707c32 | ||
|
|
a7f77d7c6a | ||
|
|
df4c575525 | ||
|
|
fe7146277d | ||
|
|
28ea364e5e | ||
|
|
841ac0f4e1 | ||
|
|
4b0556ebd4 | ||
|
|
6fb5ab1b52 | ||
|
|
5f3ec023cd | ||
|
|
281c17f6ae | ||
|
|
dae287524d | ||
|
|
135a335ce1 | ||
|
|
b01c5a95c4 | ||
|
|
f43711e5fb | ||
|
|
58c6424d4f | ||
|
|
19e0ed5d3e | ||
|
|
2ec1cec92d | ||
|
|
64d96ad703 | ||
|
|
405a2390f0 | ||
|
|
f6c77746d6 | ||
|
|
27e881c2d7 | ||
|
|
b553b7ab83 | ||
|
|
ee3e2790aa | ||
|
|
0ba85ea6ff | ||
|
|
477ded9042 | ||
|
|
c6aa637146 | ||
|
|
b9cccce26d | ||
|
|
db9a9f3a6c | ||
|
|
aa09e7b639 | ||
|
|
3b80e66507 | ||
|
|
db0812d4b7 | ||
|
|
706e0d739e | ||
|
|
1b81e406f0 | ||
|
|
34af38e09b | ||
|
|
f85b9f2bf3 | ||
|
|
5cd69ee6a4 | ||
|
|
2c7996f400 | ||
|
|
4545be360a | ||
|
|
a09bb408a2 | ||
|
|
aa552633cc | ||
|
|
7d32c9521f | ||
|
|
d04346d2ac | ||
|
|
c19f5a2356 | ||
|
|
7dd55a8007 | ||
|
|
e24409ebe0 | ||
|
|
e260c8ca63 | ||
|
|
9ef1ba9d54 | ||
|
|
e85a10e9f5 | ||
|
|
cbda0fa78c | ||
|
|
acd8f6ef18 | ||
|
|
8f4ad95777 | ||
|
|
b85ef656ca | ||
|
|
0d22a00f6f | ||
|
|
292d0cbdad | ||
|
|
eae355d771 | ||
|
|
fef06f2142 | ||
|
|
ee1a4f4c1d | ||
|
|
9e98ea552d | ||
|
|
ada441157f | ||
|
|
e4083b7391 | ||
|
|
d26905c102 | ||
|
|
e4d0b46c0c | ||
|
|
bdff5bfa3e | ||
|
|
7186e92c86 | ||
|
|
820de1716b | ||
|
|
c34ce2cbc6 | ||
|
|
f667744d44 | ||
|
|
e45de607d6 | ||
|
|
873641c123 | ||
|
|
8980281184 | ||
|
|
9e8519b7a2 | ||
|
|
588116cacc | ||
|
|
5800faa318 | ||
|
|
779247ba11 | ||
|
|
cca93908e6 | ||
|
|
8938429ea1 | ||
|
|
744416ce0c | ||
|
|
f68c2e0a14 | ||
|
|
ca0085c46d | ||
|
|
020724cfa0 | ||
|
|
09cf823619 | ||
|
|
3d8541121b | ||
|
|
abd389209b | ||
|
|
e1e536e03d | ||
|
|
6050b9d835 | ||
|
|
2676da61a6 | ||
|
|
9dfd1a7c8a | ||
|
|
d1f8e722b5 | ||
|
|
ec611bf8b4 | ||
|
|
21b47b34d8 | ||
|
|
65e4bb149e | ||
|
|
aadc14fd78 | ||
|
|
d602f12df8 | ||
|
|
4fae452684 | ||
|
|
40f35e997a | ||
|
|
9d69e94bba | ||
|
|
5c82ed2ea9 | ||
|
|
0b9247fb63 | ||
|
|
b424cd75ab | ||
|
|
22e38ffb54 | ||
|
|
a2330d0ea3 | ||
|
|
4eb68987d8 | ||
|
|
b9093185f7 | ||
|
|
2e5115b068 | ||
|
|
df09c01f7f | ||
|
|
67f11190d1 | ||
|
|
aabf7cf57e | ||
|
|
975dc5a390 | ||
|
|
4d8f9e2295 | ||
|
|
eb6ff796c1 | ||
|
|
51a6ff18d4 | ||
|
|
4793c3397e | ||
|
|
8f962a957a | ||
|
|
cda3509353 | ||
|
|
26e56f2fab | ||
|
|
596f4b6002 | ||
|
|
7d66908f29 | ||
|
|
c3724ec506 | ||
|
|
5118caf5ab | ||
|
|
5d5feb11de | ||
|
|
b01c50d6fa | ||
|
|
24071c6803 | ||
|
|
1dfa927a67 | ||
|
|
f586acabdc | ||
|
|
edca173997 | ||
|
|
8ada030971 | ||
|
|
530f795769 | ||
|
|
b80398b355 | ||
|
|
ccadcbc715 | ||
|
|
0fd0f6fd1f | ||
|
|
9df2a6a502 | ||
|
|
7e951e5043 | ||
|
|
032fccf615 | ||
|
|
5627347bab | ||
|
|
72ce146293 | ||
|
|
b60b01ce97 | ||
|
|
db70b909a0 | ||
|
|
f5b43ada46 | ||
|
|
5b00dee0c2 | ||
|
|
6cb99fdac3 | ||
|
|
0b7b3190fd | ||
|
|
c0a69a5075 | ||
|
|
3e836f5516 | ||
|
|
aa2995ee39 | ||
|
|
a43c7af3d1 | ||
|
|
897e783763 | ||
|
|
18af2f9a27 | ||
|
|
b1c07c0ea9 | ||
|
|
2cff31937f | ||
|
|
7350181a4a | ||
|
|
3d1732ef6c | ||
|
|
c541ac240c | ||
|
|
fe38d8142f | ||
|
|
7a99d63c76 | ||
|
|
62668e3e6b | ||
|
|
53588f4f12 | ||
|
|
f885a1ab61 | ||
|
|
66fec3a3d7 | ||
|
|
43cdfa275a | ||
|
|
1369fecccf | ||
|
|
543952eb87 | ||
|
|
cb744f3a28 | ||
|
|
71b4ee931e | ||
|
|
0082ed0ef1 | ||
|
|
e966d375fb | ||
|
|
c4544df011 | ||
|
|
935f10502f | ||
|
|
d959faa4c7 | ||
|
|
bea9e9c7f8 | ||
|
|
bbad0fa411 | ||
|
|
453439a964 | ||
|
|
c2c34c09b9 | ||
|
|
4a921b2eba | ||
|
|
cc6ac77913 | ||
|
|
df3d2115b5 | ||
|
|
81941f9161 | ||
|
|
ce88034716 | ||
|
|
4c16deed3e | ||
|
|
f073923ea0 | ||
|
|
681c62941e | ||
|
|
9b7e4b535c | ||
|
|
52ef1aadcb | ||
|
|
4fcf01adc5 | ||
|
|
50e25f6cec | ||
|
|
efebb6d341 | ||
|
|
c828c23f71 | ||
|
|
7ac44037db | ||
|
|
2a6ad23b52 | ||
|
|
7f7823e23c | ||
|
|
2cdd50c9b2 | ||
|
|
d6e84e325b | ||
|
|
bcce77700f | ||
|
|
5834ff0cc5 | ||
|
|
1bf2810f33 | ||
|
|
49c3922037 | ||
|
|
c18059a3dd | ||
|
|
d0621391bc | ||
|
|
5bc4f1e3f1 | ||
|
|
10fb1f2730 | ||
|
|
152b408934 | ||
|
|
e4fd3fd52b | ||
|
|
6288ad865c | ||
|
|
84e952c230 | ||
|
|
3b3649b86f | ||
|
|
60b2031831 | ||
|
|
20011a7a1c | ||
|
|
61a02d9d1e | ||
|
|
f25c1c6858 | ||
|
|
70c3008a00 | ||
|
|
7a893691c0 | ||
|
|
5a7a0e8518 | ||
|
|
6db9be8900 | ||
|
|
aaf2230ae8 | ||
|
|
028ec0f2c3 | ||
|
|
aa9902b586 | ||
|
|
2db28ea849 | ||
|
|
3200ebc2ea | ||
|
|
b57cb04afc | ||
|
|
2e010c60ae | ||
|
|
b2887620f3 | ||
|
|
689e241ff8 | ||
|
|
51b2f1b80b | ||
|
|
790aaeacae | ||
|
|
bb80daf509 | ||
|
|
d96dd5bc32 | ||
|
|
6b5cfd9d99 | ||
|
|
eff3530dfa | ||
|
|
44ae891bd7 | ||
|
|
cc0f660ad2 | ||
|
|
5d5820029d | ||
|
|
07e47c058c | ||
|
|
46e258c9f7 | ||
|
|
c3807dfb34 | ||
|
|
1d378d8f26 | ||
|
|
5ab25798e3 | ||
|
|
bf47cf418a | ||
|
|
61ecc48671 | ||
|
|
ed1386eeff | ||
|
|
d34d36619e | ||
|
|
8fd7e9115c | ||
|
|
c787837ce5 | ||
|
|
db18d38a43 | ||
|
|
73a306e2fa | ||
|
|
7ee1b88042 | ||
|
|
1c244d34b3 | ||
|
|
56645c1701 | ||
|
|
101b7745c4 | ||
|
|
a217c764db | ||
|
|
7d494b3e7b | ||
|
|
de5c0c9f40 | ||
|
|
6bade0b825 | ||
|
|
8a0e1d4c02 | ||
|
|
a7c87ae1e4 | ||
|
|
253d988e7c | ||
|
|
834e992a7c | ||
|
|
8429d4ceac | ||
|
|
c622766156 | ||
|
|
807a5e94e9 | ||
|
|
36eadb569a | ||
|
|
58dc2b719c | ||
|
|
ad2a29ccf2 | ||
|
|
026aebf2ea | ||
|
|
6568be575c | ||
|
|
556f22a751 | ||
|
|
1b8a241f6f | ||
|
|
0f449a3ec1 | ||
|
|
90598b4631 | ||
|
|
d243453862 | ||
|
|
138d6f9093 | ||
|
|
132e604aa3 | ||
|
|
6e2a67724c | ||
|
|
c2f5634fb3 | ||
|
|
439621e44a | ||
|
|
4e43bde924 | ||
|
|
4ac6c4d6bf | ||
|
|
9aa65c0e8e | ||
|
|
1eecfdaa9b | ||
|
|
3e86fb500d | ||
|
|
c60496a297 | ||
|
|
6fef7406c8 | ||
|
|
6b436146a8 | ||
|
|
6cbea99ed6 | ||
|
|
b018c64ca2 | ||
|
|
fe354ebb5c | ||
|
|
704a8acb59 | ||
|
|
83f8906449 | ||
|
|
4eac75914b | ||
|
|
d2d2ba10e9 | ||
|
|
0cf327eb17 | ||
|
|
d0f2eca106 | ||
|
|
79f1ff574b | ||
|
|
bced3fb64c | ||
|
|
93cbd4eeb9 | ||
|
|
9f6c5a20de | ||
|
|
7567448b91 | ||
|
|
05bf666eb6 | ||
|
|
08d595b472 | ||
|
|
8db7a1420f | ||
|
|
4955c4b8f9 | ||
|
|
1ba6e1641a | ||
|
|
a33b689f2c | ||
|
|
9cfd7dea19 | ||
|
|
78bc62fd34 | ||
|
|
40dbcd09da | ||
|
|
f11b948019 | ||
|
|
99985ad6fc | ||
|
|
b66547e98c | ||
|
|
0845cbe277 | ||
|
|
ca1b77b2d5 | ||
|
|
88e7b9bf80 | ||
|
|
37c07d4f3f | ||
|
|
b261da0672 | ||
|
|
884b5fb4cf | ||
|
|
623466762e | ||
|
|
236bbe1183 | ||
|
|
65a51b401c | ||
|
|
a06f3c74fd | ||
|
|
3d58d7232a | ||
|
|
af10b0fec2 | ||
|
|
2b35615ffb | ||
|
|
ab48934e9c | ||
|
|
bde15cf080 | ||
|
|
72ca2b214d | ||
|
|
cbbd6cfa1e | ||
|
|
5f5880979e | ||
|
|
cc26148ba7 | ||
|
|
1c1c0691cc | ||
|
|
ca597e2bfb | ||
|
|
9fa35adbd4 | ||
|
|
629f134d38 | ||
|
|
e8d81c5acf | ||
|
|
d790670f4c | ||
|
|
1a53c648ed | ||
|
|
e7ab2bc553 | ||
|
|
c721354b73 | ||
|
|
02f70cda8a | ||
|
|
2dcff95bd2 | ||
|
|
dfbb8254ca | ||
|
|
7903a758a4 | ||
|
|
b4e44c4e80 | ||
|
|
eaac218d59 | ||
|
|
491d818f17 | ||
|
|
ec0846a00f | ||
|
|
227ead54be | ||
|
|
4a4ea92cf3 | ||
|
|
445b03384a | ||
|
|
ef6260b3a7 | ||
|
|
f2d601661d | ||
|
|
e743b30bbf | ||
|
|
46aa416c48 | ||
|
|
8c31eaf2a8 | ||
|
|
a2bd9f8912 | ||
|
|
e7bf8f3f04 | ||
|
|
1fb308ceee | ||
|
|
3919afcad2 | ||
|
|
2c697e50db | ||
|
|
5911962842 | ||
|
|
8e554561df | ||
|
|
32c988a2d7 | ||
|
|
916d24cd21 | ||
|
|
4b16874f04 | ||
|
|
ee982ae162 | ||
|
|
0efe441dfd | ||
|
|
54c06bf715 | ||
|
|
8fc6e31567 | ||
|
|
f9be970375 | ||
|
|
57edd4dcb3 | ||
|
|
1c236b0766 | ||
|
|
fee875770c | ||
|
|
76239f2089 | ||
|
|
0d5ff6f462 | ||
|
|
68238d5678 | ||
|
|
dd20f558f0 | ||
|
|
c88e6e8aee | ||
|
|
5d9e3cb77f | ||
|
|
38aed5af8b | ||
|
|
aa043a6339 | ||
|
|
79ad1d9610 | ||
|
|
0090c2d70b | ||
|
|
f99b8b006f | ||
|
|
6940212ecb | ||
|
|
917e6fe370 | ||
|
|
40e4e42a66 | ||
|
|
44d8d654a0 | ||
|
|
ec59f76526 | ||
|
|
b7bc259093 | ||
|
|
893f1088df | ||
|
|
15075d2c3d | ||
|
|
31abef172a | ||
|
|
21ce559c9c | ||
|
|
73ee434c8c | ||
|
|
61718742f7 | ||
|
|
ef5e7bb469 | ||
|
|
abf5ae6897 | ||
|
|
b8f59e14cd | ||
|
|
39d5f44863 | ||
|
|
cfb2c67692 | ||
|
|
15eb28efaf | ||
|
|
4ec856b0f0 | ||
|
|
406496ca33 | ||
|
|
13b36d458f | ||
|
|
5f518dbeb9 | ||
|
|
02b61224b2 | ||
|
|
e6d4028a84 | ||
|
|
3a11757d57 | ||
|
|
a795e4ce32 | ||
|
|
44f38b04b0 | ||
|
|
5161d70620 | ||
|
|
40ca39d3d5 | ||
|
|
3ef6a00bb8 | ||
|
|
0995a81b8b | ||
|
|
d6b7d9090e | ||
|
|
7eea20bc50 | ||
|
|
5e9f87c3bd | ||
|
|
1c60f31450 | ||
|
|
96c9a9bdb3 | ||
|
|
2b5e0b66a2 | ||
|
|
abe6c2d585 | ||
|
|
f66ac9a5e7 | ||
|
|
ad3e2a5da0 | ||
|
|
47be64af5a | ||
|
|
f7670882af | ||
|
|
3671582c15 | ||
|
|
e5bc5873d7 | ||
|
|
b71a56c9df | ||
|
|
b3a6faf13e | ||
|
|
ad2527d47a | ||
|
|
c2838f2442 | ||
|
|
b8dcdc75c1 | ||
|
|
470ec91164 | ||
|
|
fa7072f3f2 | ||
|
|
cf39819478 | ||
|
|
cacba6f435 | ||
|
|
b52bffcf8d | ||
|
|
5b7ae86af4 | ||
|
|
517e8ea426 | ||
|
|
ddd04a7b46 | ||
|
|
ec27d3b4ba | ||
|
|
a7e10565fc | ||
|
|
890bf001db | ||
|
|
9f5c0b6e60 | ||
|
|
2eede35577 | ||
|
|
d8469e3c7c | ||
|
|
a1a69f24c8 | ||
|
|
61497893d3 | ||
|
|
613c4dbf58 | ||
|
|
8be606ec80 | ||
|
|
a76023bcd8 | ||
|
|
90714a3831 | ||
|
|
21e8ecbafa | ||
|
|
2c25c8aeed | ||
|
|
ea623f2d39 | ||
|
|
4b64c777ee | ||
|
|
0fc645ab70 | ||
|
|
0b8f19fcba | ||
|
|
0aae96b5f0 | ||
|
|
4556f44806 | ||
|
|
4aed7ea6f8 | ||
|
|
48cd808185 | ||
|
|
a4e8e55908 | ||
|
|
1d6f54cc7d | ||
|
|
fa2c3be341 | ||
|
|
db0fc32ab2 | ||
|
|
2e6125bc66 | ||
|
|
7a3fd89d25 | ||
|
|
dfd5363494 | ||
|
|
7af53d0826 | ||
|
|
1eda7e0fde | ||
|
|
5aefabe045 | ||
|
|
2774fe8a1b | ||
|
|
4bdfc8a10a | ||
|
|
24c2ff5cae | ||
|
|
c58f5a4742 | ||
|
|
b897e98d30 | ||
|
|
ee9ab15679 | ||
|
|
3974b7d31d | ||
|
|
f59dcc5546 | ||
|
|
8b280d5b31 | ||
|
|
821cbd7a1b | ||
|
|
73b4f09845 | ||
|
|
66a24c9c00 | ||
|
|
fa7b33549e | ||
|
|
6a95b88d1b | ||
|
|
84d8584c5b | ||
|
|
92fc5947fc | ||
|
|
5a4968484b | ||
|
|
6ec9933fd8 | ||
|
|
4cf86b4a94 | ||
|
|
c9ac607bd3 | ||
|
|
7b57454cc1 | ||
|
|
d973b40884 | ||
|
|
f0df2cdde9 | ||
|
|
793f031c4c | ||
|
|
fa024f8092 | ||
|
|
971a6fc531 | ||
|
|
e7e7625633 | ||
|
|
9be9f1ad20 | ||
|
|
1f3ed5cf27 | ||
|
|
2659ac01be | ||
|
|
4551489b92 | ||
|
|
a2315cfbfc | ||
|
|
51fdbf7f8c | ||
|
|
304f6f1d01 | ||
|
|
32ea6f54e5 | ||
|
|
7ec783876a | ||
|
|
eb3726c502 | ||
|
|
3268276b58 | ||
|
|
465e75bc5a | ||
|
|
899e36489d | ||
|
|
891c93c118 | ||
|
|
d4f791cf6c | ||
|
|
24cd99160c | ||
|
|
19343db593 | ||
|
|
d1d3dbc7b5 | ||
|
|
3c094116aa | ||
|
|
98a95cc698 | ||
|
|
5a8d87f504 | ||
|
|
598170756c | ||
|
|
632d143bff | ||
|
|
66717db735 | ||
|
|
de1f57926f | ||
|
|
3182857224 | ||
|
|
32ba0dcea9 | ||
|
|
e7c04b6df2 | ||
|
|
bb39e503c0 | ||
|
|
ad438cfd40 | ||
|
|
e932919e68 | ||
|
|
a9d2a7f002 | ||
|
|
e91136d61f | ||
|
|
6f85c860c6 | ||
|
|
38658a597b | ||
|
|
dde7cc52d2 | ||
|
|
17e68c4a11 | ||
|
|
2200c2de6f | ||
|
|
5d9a8cbe1a | ||
|
|
e08a4ea62d | ||
|
|
2c35e24bd9 | ||
|
|
79d50d9933 | ||
|
|
f377b1e886 | ||
|
|
7f0b12a481 | ||
|
|
25ad0b47e2 | ||
|
|
d1ef17e3cd | ||
|
|
1b120d1e49 | ||
|
|
21a552682e | ||
|
|
35dc987dc8 | ||
|
|
31d632b72e | ||
|
|
7e65fe7ac3 | ||
|
|
d008e209e7 | ||
|
|
e1c03d9e8e | ||
|
|
0cd63b28f3 | ||
|
|
477e3f64fc | ||
|
|
5c8600d790 | ||
|
|
8eae4a0967 | ||
|
|
5a800db48c | ||
|
|
a45db7e853 | ||
|
|
5b156031e9 | ||
|
|
5c988cc722 | ||
|
|
36ff26609b | ||
|
|
6281a511e1 | ||
|
|
c741d3f4b2 | ||
|
|
d9d61ed563 | ||
|
|
1d77f8db28 | ||
|
|
0ea50b3157 | ||
|
|
aafb832288 | ||
|
|
d15b02a6b6 | ||
|
|
4e3d7fc4bc | ||
|
|
192a039173 | ||
|
|
6bfaf262d5 | ||
|
|
08dd1b553b | ||
|
|
6fece14cfb | ||
|
|
2a25398c86 | ||
|
|
86397a532e | ||
|
|
f0a1753607 | ||
|
|
d6a74ed463 | ||
|
|
fb96c3e73e | ||
|
|
4183c6f1a5 | ||
|
|
9dae796fe3 | ||
|
|
79c2ceb2d5 | ||
|
|
e0a1466bd8 | ||
|
|
2031989d98 | ||
|
|
8b716f941d | ||
|
|
87ba004d46 | ||
|
|
c8302a5a0e | ||
|
|
0082989f22 | ||
|
|
3cbc244e98 | ||
|
|
74a12d818d | ||
|
|
45ab752f9a | ||
|
|
fe66046283 | ||
|
|
39c7bd24e4 | ||
|
|
760b307e8a | ||
|
|
e44a11341d | ||
|
|
0e7fb69bea | ||
|
|
ea805c5fe7 | ||
|
|
d917815d81 | ||
|
|
8bc523219c | ||
|
|
d686113bd2 | ||
|
|
1cc450e6e7 | ||
|
|
1435604b84 | ||
|
|
2a8160e80f | ||
|
|
9d9201c3b4 | ||
|
|
27ba4f0baf | ||
|
|
c627f9ea18 | ||
|
|
1fdebc1dc4 | ||
|
|
3f65887974 | ||
|
|
ab44939941 | ||
|
|
39e96d933e | ||
|
|
68f6332343 | ||
|
|
6bc0561d13 | ||
|
|
75ecfdf66d | ||
|
|
c9e01412a4 | ||
|
|
f55fdc00fc | ||
|
|
84619abe9f | ||
|
|
d295279b16 | ||
|
|
0003cc8105 | ||
|
|
24c2703dfa | ||
|
|
cdaa735b2b | ||
|
|
2b9302107f | ||
|
|
cd5fd653d7 | ||
|
|
caa6433b56 | ||
|
|
23058d8b43 | ||
|
|
ed4d94a5d5 | ||
|
|
c4e7d05ce3 | ||
|
|
d8d379faf1 | ||
|
|
a3a590a32a | ||
|
|
fd6a36a235 | ||
|
|
9a51091a5c | ||
|
|
f951bcf01b | ||
|
|
53d58684a6 | ||
|
|
f210f17d30 | ||
|
|
4b1d120f58 | ||
|
|
dc2e3465c7 | ||
|
|
22dc713a2f | ||
|
|
990db3c35a | ||
|
|
62ead3a2ee | ||
|
|
e9efa74333 | ||
|
|
f466e539ef | ||
|
|
d431b0fb99 | ||
|
|
5ead3244a2 | ||
|
|
756a218e27 | ||
|
|
18cf256817 | ||
|
|
3577a80bb6 | ||
|
|
0dd3bbf6e8 | ||
|
|
182cf5b8de | ||
|
|
dc502042d5 | ||
|
|
37fbf01755 | ||
|
|
18b8a625f5 | ||
|
|
7c91a055c1 | ||
|
|
62c25af802 | ||
|
|
04612d25d7 | ||
|
|
249cb2aa30 | ||
|
|
f464fe14f4 | ||
|
|
bb6b4f8db2 | ||
|
|
c49ee9f632 | ||
|
|
2715f6fdb8 | ||
|
|
b66fb7ceae | ||
|
|
6018dbd339 | ||
|
|
960914a073 | ||
|
|
63a2f9a8b2 | ||
|
|
74cea89fce | ||
|
|
08d531143f | ||
|
|
3976981ab3 | ||
|
|
7297baa9c6 | ||
|
|
07898cc0df | ||
|
|
798dbe487b | ||
|
|
31d9dc3539 | ||
|
|
fe39ca01bc | ||
|
|
5ebed1c9ee | ||
|
|
42004f9013 | ||
|
|
a966275e50 | ||
|
|
67e6d9bc30 | ||
|
|
fea016afc0 | ||
|
|
76f3bdfff8 | ||
|
|
dd3437d5ba | ||
|
|
54138d9e82 | ||
|
|
084911d9b3 | ||
|
|
942b250895 | ||
|
|
05d9f07541 | ||
|
|
fce435db26 | ||
|
|
5a8367e892 | ||
|
|
0ad1239522 | ||
|
|
137c8f5e8a | ||
|
|
98237f7c0b | ||
|
|
54a0db0daf | ||
|
|
67b8b00c44 | ||
|
|
921825b4c0 | ||
|
|
cf96b6f87b | ||
|
|
bdd5241615 | ||
|
|
a206ef34bb | ||
|
|
ddca67a2b9 | ||
|
|
fa45407e78 | ||
|
|
9ea23272fa | ||
|
|
77b530b50a | ||
|
|
b4120423a5 | ||
|
|
264c86853b | ||
|
|
b62e2fd870 | ||
|
|
5786df933d | ||
|
|
210d0017c4 | ||
|
|
7729f6cf4e | ||
|
|
716b0b8655 | ||
|
|
ccea8dcbf6 | ||
|
|
88a7f203f9 | ||
|
|
418b0967fc | ||
|
|
afe3aae582 | ||
|
|
d4cd4a35d5 | ||
|
|
91ef68f9b1 | ||
|
|
7066283004 | ||
|
|
26096e79d1 | ||
|
|
8d5c4a67a7 | ||
|
|
e745544dac | ||
|
|
f537c51f25 | ||
|
|
1ab84a27d3 | ||
|
|
3e8fd24547 | ||
|
|
57049b95b3 | ||
|
|
04472f57be | ||
|
|
671183fa9a | ||
|
|
93fac5f257 | ||
|
|
9a8545d590 | ||
|
|
aa2ca3f02c | ||
|
|
1b0e90f70b | ||
|
|
687e359291 | ||
|
|
df0e875856 | ||
|
|
a2afcae9ff | ||
|
|
48ebb65cc7 | ||
|
|
b390929826 | ||
|
|
bf20b260ce | ||
|
|
18eb3c5f90 | ||
|
|
cd36baf530 | ||
|
|
40480c7cdc | ||
|
|
68312afcdf | ||
|
|
741504862c | ||
|
|
5a25505668 | ||
|
|
afbbdb2c67 | ||
|
|
a44283b0b2 | ||
|
|
339d48ac15 | ||
|
|
3e835973db | ||
|
|
b50c676f76 | ||
|
|
a7d07d412c | ||
|
|
d108689382 | ||
|
|
1473eb9ae0 | ||
|
|
5d2ba056c8 | ||
|
|
e26ccd5166 | ||
|
|
f12d36641f | ||
|
|
018cbff438 | ||
|
|
3740bfa3bf | ||
|
|
a984040fae | ||
|
|
9a4da6c8d8 | ||
|
|
f276fd0f37 | ||
|
|
7a74dbadd7 | ||
|
|
371e578151 | ||
|
|
5029322aa1 | ||
|
|
6ffaf4c2e2 | ||
|
|
012ce1481e | ||
|
|
4c2cdf6f4d | ||
|
|
c1fde0e8c4 | ||
|
|
6356724057 | ||
|
|
03732860be | ||
|
|
df07361642 | ||
|
|
57cd074959 | ||
|
|
1f28fcdec5 | ||
|
|
b3b4786c24 | ||
|
|
98e3c7911c | ||
|
|
a890380b6a | ||
|
|
ca87f55a7b | ||
|
|
5ae53dacfb | ||
|
|
5895204c99 | ||
|
|
87407b54b6 | ||
|
|
1403748fd8 | ||
|
|
df89291d1c | ||
|
|
019f18058b | ||
|
|
403a46abcc | ||
|
|
6bf1547148 | ||
|
|
029d37d6a7 | ||
|
|
20c2dbdbd3 | ||
|
|
1ac46fac15 | ||
|
|
e9d7623e1f | ||
|
|
336d81894d | ||
|
|
52521d5f67 | ||
|
|
7ea669e04c | ||
|
|
4f8c26d2c6 | ||
|
|
53b18c8542 | ||
|
|
4543413491 | ||
|
|
3a600297ca | ||
|
|
634e8713c3 | ||
|
|
f0dafd3f20 | ||
|
|
52a2992862 | ||
|
|
48c8181886 | ||
|
|
bd6f8d99c5 | ||
|
|
4cc9fe90a8 | ||
|
|
f0d755153d | ||
|
|
4a6df04f75 | ||
|
|
75afe73c66 | ||
|
|
d4c1ed95ac | ||
|
|
1890760206 | ||
|
|
1ef6cb1b64 | ||
|
|
795703a39c | ||
|
|
a31b23c46b | ||
|
|
dc8b011d61 | ||
|
|
4a82c2d124 | ||
|
|
188fd47a51 | ||
|
|
9a99bd3a71 | ||
|
|
94ec2190f8 | ||
|
|
abff1b6884 | ||
|
|
f7837f445e | ||
|
|
38f05d4ac5 | ||
|
|
b35689b70d | ||
|
|
25972be45c | ||
|
|
9e234d4208 | ||
|
|
7a96aca39e | ||
|
|
1a414c7b6b | ||
|
|
540bac0928 | ||
|
|
4c306af4eb | ||
|
|
f1072d0d9f | ||
|
|
6663638195 | ||
|
|
f4ca3482f1 | ||
|
|
c7053bea20 | ||
|
|
300c83d893 | ||
|
|
5f28a9d238 | ||
|
|
6764a45223 | ||
|
|
73fe5f63c6 | ||
|
|
1e784839f1 | ||
|
|
1828f8eb8e | ||
|
|
262b7428cf | ||
|
|
4f4da3c10c | ||
|
|
d7e28f991d | ||
|
|
643ab90ace | ||
|
|
03a0dfbeca | ||
|
|
92751d5e24 | ||
|
|
c1642355f0 | ||
|
|
a1af7cbf00 | ||
|
|
175893913d | ||
|
|
9b91c76088 | ||
|
|
b3d12d2c9e | ||
|
|
3c3af4b332 | ||
|
|
a27c0dd591 | ||
|
|
78cb4ce030 | ||
|
|
79193ffed2 |
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.zig text eol=lf
|
||||
langref.html.in text eol=lf
|
||||
12
.gitignore
vendored
12
.gitignore
vendored
@@ -1,3 +1,15 @@
|
||||
# This file is for zig-specific build artifacts.
|
||||
# If you have OS-specific or editor-specific files to ignore,
|
||||
# such as *.swp or .DS_Store, put those in your global
|
||||
# ~/.gitignore and put this in your ~/.gitconfig:
|
||||
#
|
||||
# [core]
|
||||
# excludesfile = ~/.gitignore
|
||||
#
|
||||
# Cheers!
|
||||
# -andrewrk
|
||||
|
||||
zig-cache/
|
||||
build/
|
||||
build-*/
|
||||
docgen_tmp/
|
||||
|
||||
24
.travis.yml
24
.travis.yml
@@ -1,16 +1,22 @@
|
||||
sudo: required
|
||||
services:
|
||||
- docker
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
- linux
|
||||
- osx
|
||||
dist: trusty
|
||||
osx_image: xcode8.3
|
||||
sudo: required
|
||||
language: cpp
|
||||
before_install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_before_install; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_before_install; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_before_install; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_before_install; fi
|
||||
install:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_install; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_install; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_install; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_install; fi
|
||||
script:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_script; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_script; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_script; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_script; fi
|
||||
env:
|
||||
global:
|
||||
- secure: QmJ+eLOxj3Irl5SHxt6lQvrj7++1AIz8bYri6RScAQGHQPIztkmbpBjAkpFgYaWPkZ04ROtamFXdS7oHtJHSECesgPoqM/CHIychQkgpDq30+TsFyYbBpDGHY+N6r2WnQTvg+9EuAp6P365us6qFS0D5zQ3P40c56uMbazFu3J4W1HZP+pLWlLjEXaN88ePhHWqNZyvwGMkLpYl3ghcrE9H4vGZQ7jenRW4UmskLEkuhUPJbQiow3Td8arJiRmLVISzWqneqNraLUpGyUVr4F3Rbjzacfoo3r9ZZynhY0mFsEye82x6TMGgH2xsNGkd91zpQuckWUT+pQv/G6FXpnEnjIJSO2Z5WAxXrx6xB1k2HZ17/4NWLF3fJVhdQJm3mS6odeGzUjgGrl1A42evxU+7VbcofEJq1aMiLgU1jUT2pt+pefCwmKJYLpEsSzuyrVxgvskQz0QpC053TAYSNf2Jj6Qhg9YDWyOeemYmDgffTqErF7AYhc6NKH0s0XKkIiNFSxorkEsfG/Ck1o+15slHNmWZXlmXToxDqFkLDoPvfGKg7koU5YTGvci/F9ZKb1juhGLxZbwap/18zN40BqA+Ip2yDBJAKxsIiwSjSIguy6g/Z1I50s0xNGOr36urfRRQX5H+rqr/xCZ63B6WSe6qBcZboWAQMDn8HLS9Xiwc=
|
||||
- secure: dnb7r5guUeMOX9e7XlPUSZzmga8VW3G9Q1aa7LxEKiTjSnWhu5KpPDe8o1X3Rj6nc5iXDqmBH/C/7eNXPDyXJJWPvpE2YRpGymyUkRaakul0QBKJEaMvwy2SuAfS69CWC+TSzfGRvtSYkdpBhhLvs0h5S819S5jYbCNSCmOKfFucaP5NsHNIZ/I19oIeTPTa0/UnVm7DLFZXZjvbS+czkdyH1DhbT85sLj+XqNTzLePImE68efrjaHnlSy/CzBVJzj55UgD5i9fxNCQWzGWim/SD5xZ0zKtLycSOf6wQN2lCo0lkjw9rDlYz69mM5L9ikfYL9oHDPZnh84oXKglQ5miOHCgqs/qs4439I05lIu8i/EfbFA55YG4NyO3rL9YVOOt5gwiwvJYhDcnkVVzSl0o5bsoZgQfYvPWaIQKNkl3C53zfDQjgqS54CeDzlZpFrQTDQ1RrH8oeVC1gfYAeMabMDadox5rfZmLIN5JTf/F8iD/QdxGcoUvkEENcQgfP9PnubExtexgHGsEmqbm6ORSZ1MkEh2m3fo0f8KE6TbN1UigmcQ8nTkWBHsSmfHnB8HwJQp8mwQmDamXA+Hl3e3w4LOdYkJVlNW1/TTyJJOOvjMQCjF8SJmPHuh+QpqKbSaT9XM/vBhxbIZEufH8kawJKCBBcCNspGMNjhXfNjM0=
|
||||
|
||||
1106
CMakeLists.txt
1106
CMakeLists.txt
File diff suppressed because it is too large
Load Diff
190
README.md
190
README.md
@@ -1,11 +1,9 @@
|
||||

|
||||

|
||||
|
||||
A programming language designed for robustness, optimality, and
|
||||
clarity.
|
||||
|
||||
[ziglang.org](http://ziglang.org)
|
||||
|
||||
[Documentation](http://ziglang.org/documentation/)
|
||||
[ziglang.org](https://ziglang.org)
|
||||
|
||||
## Feature Highlights
|
||||
|
||||
@@ -23,19 +21,19 @@ clarity.
|
||||
* Compatible with C libraries with no wrapper necessary. Directly include
|
||||
C .h files and get access to the functions and symbols therein.
|
||||
* Provides standard library which competes with the C standard library and is
|
||||
always compiled against statically in source form. Compile units do not
|
||||
always compiled against statically in source form. Zig binaries do not
|
||||
depend on libc unless explicitly linked.
|
||||
* Nullable type instead of null pointers.
|
||||
* Tagged union type instead of raw unions.
|
||||
* Optional type instead of null pointers.
|
||||
* Safe unions, tagged unions, and C ABI compatible unions.
|
||||
* Generics so that one can write efficient data structures that work for any
|
||||
data type.
|
||||
* No header files required. Top level declarations are entirely
|
||||
order-independent.
|
||||
* Compile-time code execution. Compile-time reflection.
|
||||
* Partial compile-time function evaluation with eliminates the need for
|
||||
* Partial compile-time function evaluation which eliminates the need for
|
||||
a preprocessor or macros.
|
||||
* The binaries produced by Zig have complete debugging information so you can,
|
||||
for example, use GDB to debug your software.
|
||||
for example, use GDB, MSVC, or LLDB to debug your software.
|
||||
* Built-in unit tests with `zig test`.
|
||||
* Friendly toward package maintainers. Reproducible build, bootstrapping
|
||||
process carefully documented. Issues filed by package maintainers are
|
||||
@@ -54,173 +52,109 @@ that counts as "freestanding" for the purposes of this table.
|
||||
|
||||
| | freestanding | linux | macosx | windows | other |
|
||||
|-------------|--------------|---------|---------|---------|---------|
|
||||
|i386 | OK | planned | OK | OK | planned |
|
||||
|i386 | OK | planned | OK | planned | planned |
|
||||
|x86_64 | OK | OK | OK | OK | planned |
|
||||
|arm | OK | planned | planned | N/A | planned |
|
||||
|aarch64 | OK | planned | planned | planned | planned |
|
||||
|avr | OK | planned | planned | N/A | planned |
|
||||
|bpf | OK | planned | planned | N/A | planned |
|
||||
|hexagon | OK | planned | planned | N/A | planned |
|
||||
|mips | OK | planned | planned | N/A | planned |
|
||||
|msp430 | OK | planned | planned | N/A | planned |
|
||||
|nios2 | OK | planned | planned | N/A | planned |
|
||||
|powerpc | OK | planned | planned | N/A | planned |
|
||||
|r600 | OK | planned | planned | N/A | planned |
|
||||
|amdgcn | OK | planned | planned | N/A | planned |
|
||||
|riscv | OK | planned | planned | N/A | planned |
|
||||
|sparc | OK | planned | planned | N/A | planned |
|
||||
|s390x | OK | planned | planned | N/A | planned |
|
||||
|tce | OK | planned | planned | N/A | planned |
|
||||
|thumb | OK | planned | planned | N/A | planned |
|
||||
|xcore | OK | planned | planned | N/A | planned |
|
||||
|nvptx | OK | planned | planned | N/A | planned |
|
||||
|le | OK | planned | planned | N/A | planned |
|
||||
|amdil | OK | planned | planned | N/A | planned |
|
||||
|hsail | OK | planned | planned | N/A | planned |
|
||||
|spir | OK | planned | planned | N/A | planned |
|
||||
|kalimba | OK | planned | planned | N/A | planned |
|
||||
|shave | OK | planned | planned | N/A | planned |
|
||||
|lanai | OK | planned | planned | N/A | planned |
|
||||
|wasm | OK | N/A | N/A | N/A | N/A |
|
||||
|renderscript | OK | N/A | N/A | N/A | N/A |
|
||||
|aarch64 | OK | planned | N/A | planned | planned |
|
||||
|bpf | OK | planned | N/A | N/A | planned |
|
||||
|hexagon | OK | planned | N/A | N/A | planned |
|
||||
|mips | OK | planned | N/A | N/A | planned |
|
||||
|powerpc | OK | planned | N/A | N/A | planned |
|
||||
|r600 | OK | planned | N/A | N/A | planned |
|
||||
|amdgcn | OK | planned | N/A | N/A | planned |
|
||||
|sparc | OK | planned | N/A | N/A | planned |
|
||||
|s390x | OK | planned | N/A | N/A | planned |
|
||||
|thumb | OK | planned | N/A | N/A | planned |
|
||||
|spir | OK | planned | N/A | N/A | planned |
|
||||
|lanai | OK | planned | N/A | N/A | planned |
|
||||
|
||||
## Community
|
||||
|
||||
* IRC: `#zig` on Freenode.
|
||||
* IRC: `#zig` on Freenode ([Channel Logs](https://irclog.whitequark.org/zig/)).
|
||||
* Reddit: [/r/zig](https://www.reddit.com/r/zig)
|
||||
* Email list: [ziglang@googlegroups.com](https://groups.google.com/forum/#!forum/ziglang)
|
||||
|
||||
### Wanted: Windows Developers
|
||||
|
||||
Help get the tests passing on Windows, flesh out the standard library for
|
||||
Windows, streamline Zig installation and distribution for Windows. Work with
|
||||
LLVM and LLD teams to improve PDB/CodeView/MSVC debugging. Implement stack traces
|
||||
for Windows in the MinGW environment and the MSVC environment.
|
||||
|
||||
### Wanted: MacOS and iOS Developers
|
||||
|
||||
Flesh out the standard library for MacOS. Improve the MACH-O linker. Implement
|
||||
stack traces for MacOS. Streamline the process of using Zig to build for
|
||||
iOS.
|
||||
|
||||
### Wanted: Android Developers
|
||||
|
||||
Flesh out the standard library for Android. Streamline the process of using
|
||||
Zig to build for Android and for depending on Zig code on Android.
|
||||
|
||||
### Wanted: Web Developers
|
||||
|
||||
Figure out what are the use cases for compiling Zig to WebAssembly. Create demo
|
||||
projects with it and streamline experience for users trying to output
|
||||
WebAssembly. Work on the documentation generator outputting useful searchable html
|
||||
documentation. Create Zig modules for common web tasks such as WebSockets and gzip.
|
||||
|
||||
### Wanted: Embedded Developers
|
||||
|
||||
Flesh out the standard library for uncommon CPU architectures and OS targets.
|
||||
Drive issue discussion for cross compiling and using Zig in constrained
|
||||
or unusual environments.
|
||||
|
||||
### Wanted: Game Developers
|
||||
|
||||
Create cross platform Zig modules to compete with SDL and GLFW. Create an
|
||||
OpenGL library that does not depend on libc. Drive the usability of Zig
|
||||
for video games. Create a general purpose allocator that does not depend on
|
||||
libc. Create demo games using Zig.
|
||||
|
||||
## Building
|
||||
|
||||
[](https://travis-ci.org/zig-lang/zig)
|
||||
[](https://travis-ci.org/ziglang/zig)
|
||||
[](https://ci.appveyor.com/project/andrewrk/zig-d3l86/branch/master)
|
||||
|
||||
### Dependencies
|
||||
### Stage 1: Build Zig from C++ Source Code
|
||||
|
||||
#### Build Dependencies
|
||||
|
||||
These compile tools must be available on your system and are used to build
|
||||
the Zig compiler itself:
|
||||
#### Dependencies
|
||||
|
||||
##### POSIX
|
||||
|
||||
* gcc >= 5.0.0 or clang >= 3.6.0
|
||||
* cmake >= 2.8.5
|
||||
* gcc >= 5.0.0 or clang >= 3.6.0
|
||||
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same gcc or clang version above
|
||||
- These depend on zlib and libxml2.
|
||||
|
||||
##### Windows
|
||||
|
||||
* cmake >= 2.8.5
|
||||
* Microsoft Visual Studio 2015
|
||||
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same MSVC version above
|
||||
|
||||
#### Library Dependencies
|
||||
#### Instructions
|
||||
|
||||
These libraries must be installed on your system, with the development files
|
||||
available. The Zig compiler links against them. You have to use the same
|
||||
compiler for these libraries as you do to compile Zig.
|
||||
|
||||
* LLVM, Clang, and LLD libraries == 5.x
|
||||
|
||||
### Debug / Development Build
|
||||
|
||||
If you have gcc or clang installed, you can find out what `ZIG_LIBC_LIB_DIR`,
|
||||
`ZIG_LIBC_STATIC_LIB_DIR`, and `ZIG_LIBC_INCLUDE_DIR` should be set to
|
||||
(example below).
|
||||
##### POSIX
|
||||
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_INSTALL_PREFIX=$(pwd) -DZIG_LIBC_LIB_DIR=$(dirname $(cc -print-file-name=crt1.o)) -DZIG_LIBC_INCLUDE_DIR=$(echo -n | cc -E -x c - -v 2>&1 | grep -B1 "End of search list." | head -n1 | cut -c 2- | sed "s/ .*//") -DZIG_LIBC_STATIC_LIB_DIR=$(dirname $(cc -print-file-name=crtbegin.o))
|
||||
cmake ..
|
||||
make
|
||||
make install
|
||||
./zig build --build-file ../build.zig test
|
||||
bin/zig build --build-file ../build.zig test
|
||||
```
|
||||
|
||||
#### MacOS
|
||||
|
||||
`ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_STATIC_LIB_DIR` are unused.
|
||||
##### MacOS
|
||||
|
||||
```
|
||||
brew install llvm@5
|
||||
brew outdated llvm@5 || brew upgrade llvm@5
|
||||
brew install cmake llvm@7
|
||||
brew outdated llvm@7 || brew upgrade llvm@7
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@5/ -DCMAKE_INSTALL_PREFIX=$(pwd)
|
||||
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@7/
|
||||
make install
|
||||
./zig build --build-file ../build.zig test
|
||||
bin/zig build --build-file ../build.zig test
|
||||
```
|
||||
|
||||
#### Windows
|
||||
##### Windows
|
||||
|
||||
See https://github.com/zig-lang/zig/wiki/Building-Zig-on-Windows
|
||||
See https://github.com/ziglang/zig/wiki/Building-Zig-on-Windows
|
||||
|
||||
### Release / Install Build
|
||||
### Stage 2: Build Self-Hosted Zig from Zig Source Code
|
||||
|
||||
Once installed, `ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_INCLUDE_DIR` can be overridden
|
||||
by the `--libc-lib-dir` and `--libc-include-dir` parameters to the zig binary.
|
||||
*Note: Stage 2 compiler is not complete. Beta users of Zig should use the
|
||||
Stage 1 compiler for now.*
|
||||
|
||||
Dependencies are the same as Stage 1, except now you have a working zig compiler.
|
||||
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release -DZIG_LIBC_LIB_DIR=/some/path -DZIG_LIBC_INCLUDE_DIR=/some/path -DZIG_LIBC_STATIC_INCLUDE_DIR=/some/path
|
||||
make
|
||||
sudo make install
|
||||
bin/zig build --build-file ../build.zig --prefix $(pwd)/stage2 install
|
||||
```
|
||||
|
||||
### Test Coverage
|
||||
This produces `./stage2/bin/zig` which can be used for testing and development.
|
||||
Once it is feature complete, it will be used to build stage 3 - the final compiler
|
||||
binary.
|
||||
|
||||
To see test coverage in Zig, configure with `-DZIG_TEST_COVERAGE=ON` as an
|
||||
additional parameter to the Debug build.
|
||||
### Stage 3: Rebuild Self-Hosted Zig Using the Self-Hosted Compiler
|
||||
|
||||
You must have `lcov` installed and available.
|
||||
This is the actual compiler binary that we will install to the system.
|
||||
|
||||
Then `make coverage`.
|
||||
*Note: Stage 2 compiler is not yet able to build Stage 3. Building Stage 3 is
|
||||
not yet supported.*
|
||||
|
||||
With GCC you will get a nice HTML view of the coverage data. With clang,
|
||||
the last step will fail, but you can execute
|
||||
`llvm-cov gcov $(find CMakeFiles/ -name "*.gcda")` and then inspect the
|
||||
produced .gcov files.
|
||||
#### Debug / Development Build
|
||||
|
||||
### Related Projects
|
||||
```
|
||||
./stage2/bin/zig build --build-file ../build.zig --prefix $(pwd)/stage3 install
|
||||
```
|
||||
|
||||
* [zig-mode](https://github.com/AndreaOrru/zig-mode) - Emacs integration
|
||||
* [zig.vim](https://github.com/zig-lang/zig.vim) - Vim configuration files
|
||||
* [vscode-zig](https://github.com/zig-lang/vscode-zig) - Visual Studio Code extension
|
||||
* [zig-compiler-completions](https://github.com/tiehuis/zig-compiler-completions) - bash and zsh completions for the zig compiler
|
||||
* [NppExtension](https://github.com/ice1000/NppExtension) - Notepad++ syntax highlighting
|
||||
#### Release / Install Build
|
||||
|
||||
```
|
||||
./stage2/bin/zig build --build-file ../build.zig install -Drelease-fast
|
||||
```
|
||||
|
||||
323
build.zig
323
build.zig
@@ -1,27 +1,312 @@
|
||||
const Builder = @import("std").build.Builder;
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const Builder = std.build.Builder;
|
||||
const tests = @import("test/tests.zig");
|
||||
const os = std.os;
|
||||
const BufMap = std.BufMap;
|
||||
const warn = std.debug.warn;
|
||||
const mem = std.mem;
|
||||
const ArrayList = std.ArrayList;
|
||||
const Buffer = std.Buffer;
|
||||
const io = std.io;
|
||||
|
||||
pub fn build(b: *Builder) !void {
|
||||
const mode = b.standardReleaseOptions();
|
||||
|
||||
var docgen_exe = b.addExecutable("docgen", "doc/docgen.zig");
|
||||
|
||||
const rel_zig_exe = try os.path.relative(b.allocator, b.build_root, b.zig_exe);
|
||||
const langref_out_path = os.path.join(b.allocator, b.cache_root, "langref.html") catch unreachable;
|
||||
var docgen_cmd = b.addCommand(null, b.env_map, [][]const u8{
|
||||
docgen_exe.getOutputPath(),
|
||||
rel_zig_exe,
|
||||
"doc" ++ os.path.sep_str ++ "langref.html.in",
|
||||
langref_out_path,
|
||||
});
|
||||
docgen_cmd.step.dependOn(&docgen_exe.step);
|
||||
|
||||
const docs_step = b.step("docs", "Build documentation");
|
||||
docs_step.dependOn(&docgen_cmd.step);
|
||||
|
||||
pub fn build(b: &Builder) {
|
||||
const test_filter = b.option([]const u8, "test-filter", "Skip tests that do not match filter");
|
||||
const with_lldb = b.option(bool, "with-lldb", "Run tests in LLDB to get a backtrace if one fails") ?? false;
|
||||
const test_step = b.step("test", "Run all the tests");
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter,
|
||||
"test/behavior.zig", "behavior", "Run the behavior tests",
|
||||
with_lldb));
|
||||
// find the stage0 build artifacts because we're going to re-use config.h and zig_cpp library
|
||||
const build_info = try b.exec([][]const u8{
|
||||
b.zig_exe,
|
||||
"BUILD_INFO",
|
||||
});
|
||||
var index: usize = 0;
|
||||
var ctx = Context{
|
||||
.cmake_binary_dir = nextValue(&index, build_info),
|
||||
.cxx_compiler = nextValue(&index, build_info),
|
||||
.llvm_config_exe = nextValue(&index, build_info),
|
||||
.lld_include_dir = nextValue(&index, build_info),
|
||||
.lld_libraries = nextValue(&index, build_info),
|
||||
.std_files = nextValue(&index, build_info),
|
||||
.c_header_files = nextValue(&index, build_info),
|
||||
.dia_guids_lib = nextValue(&index, build_info),
|
||||
.llvm = undefined,
|
||||
.no_rosegment = b.option(bool, "no-rosegment", "Workaround to enable valgrind builds") orelse false,
|
||||
};
|
||||
ctx.llvm = try findLLVM(b, ctx.llvm_config_exe);
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter,
|
||||
"std/index.zig", "std", "Run the standard library tests",
|
||||
with_lldb));
|
||||
var test_stage2 = b.addTest("src-self-hosted/test.zig");
|
||||
test_stage2.setBuildMode(builtin.Mode.Debug);
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter,
|
||||
"std/special/compiler_rt/index.zig", "compiler-rt", "Run the compiler_rt tests",
|
||||
with_lldb));
|
||||
var exe = b.addExecutable("zig", "src-self-hosted/main.zig");
|
||||
exe.setBuildMode(mode);
|
||||
|
||||
test_step.dependOn(tests.addCompareOutputTests(b, test_filter));
|
||||
test_step.dependOn(tests.addBuildExampleTests(b, test_filter));
|
||||
test_step.dependOn(tests.addCompileErrorTests(b, test_filter));
|
||||
test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter));
|
||||
test_step.dependOn(tests.addDebugSafetyTests(b, test_filter));
|
||||
test_step.dependOn(tests.addParseCTests(b, test_filter));
|
||||
try configureStage2(b, test_stage2, ctx);
|
||||
try configureStage2(b, exe, ctx);
|
||||
|
||||
b.default_step.dependOn(&exe.step);
|
||||
|
||||
const skip_release = b.option(bool, "skip-release", "Main test suite skips release builds") orelse false;
|
||||
const skip_release_small = b.option(bool, "skip-release-small", "Main test suite skips release-small builds") orelse skip_release;
|
||||
const skip_release_fast = b.option(bool, "skip-release-fast", "Main test suite skips release-fast builds") orelse skip_release;
|
||||
const skip_release_safe = b.option(bool, "skip-release-safe", "Main test suite skips release-safe builds") orelse skip_release;
|
||||
const skip_self_hosted = b.option(bool, "skip-self-hosted", "Main test suite skips building self hosted compiler") orelse false;
|
||||
if (!skip_self_hosted) {
|
||||
test_step.dependOn(&exe.step);
|
||||
}
|
||||
const verbose_link_exe = b.option(bool, "verbose-link", "Print link command for self hosted compiler") orelse false;
|
||||
exe.setVerboseLink(verbose_link_exe);
|
||||
|
||||
b.installArtifact(exe);
|
||||
installStdLib(b, ctx.std_files);
|
||||
installCHeaders(b, ctx.c_header_files);
|
||||
|
||||
const test_filter = b.option([]const u8, "test-filter", "Skip tests that do not match filter");
|
||||
|
||||
const test_stage2_step = b.step("test-stage2", "Run the stage2 compiler tests");
|
||||
test_stage2_step.dependOn(&test_stage2.step);
|
||||
|
||||
// TODO see https://github.com/ziglang/zig/issues/1364
|
||||
if (false) {
|
||||
test_step.dependOn(test_stage2_step);
|
||||
}
|
||||
|
||||
var chosen_modes: [4]builtin.Mode = undefined;
|
||||
var chosen_mode_index: usize = 0;
|
||||
chosen_modes[chosen_mode_index] = builtin.Mode.Debug;
|
||||
chosen_mode_index += 1;
|
||||
if (!skip_release_safe) {
|
||||
chosen_modes[chosen_mode_index] = builtin.Mode.ReleaseSafe;
|
||||
chosen_mode_index += 1;
|
||||
}
|
||||
if (!skip_release_fast) {
|
||||
chosen_modes[chosen_mode_index] = builtin.Mode.ReleaseFast;
|
||||
chosen_mode_index += 1;
|
||||
}
|
||||
if (!skip_release_small) {
|
||||
chosen_modes[chosen_mode_index] = builtin.Mode.ReleaseSmall;
|
||||
chosen_mode_index += 1;
|
||||
}
|
||||
const modes = chosen_modes[0..chosen_mode_index];
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter, "test/behavior.zig", "behavior", "Run the behavior tests", modes));
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter, "std/index.zig", "std", "Run the standard library tests", modes));
|
||||
|
||||
test_step.dependOn(tests.addPkgTests(b, test_filter, "std/special/compiler_rt/index.zig", "compiler-rt", "Run the compiler_rt tests", modes));
|
||||
|
||||
test_step.dependOn(tests.addCompareOutputTests(b, test_filter, modes));
|
||||
test_step.dependOn(tests.addBuildExampleTests(b, test_filter, modes));
|
||||
test_step.dependOn(tests.addCliTests(b, test_filter, modes));
|
||||
test_step.dependOn(tests.addCompileErrorTests(b, test_filter, modes));
|
||||
test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter, modes));
|
||||
test_step.dependOn(tests.addRuntimeSafetyTests(b, test_filter, modes));
|
||||
test_step.dependOn(tests.addTranslateCTests(b, test_filter));
|
||||
test_step.dependOn(tests.addGenHTests(b, test_filter));
|
||||
test_step.dependOn(docs_step);
|
||||
}
|
||||
|
||||
fn dependOnLib(lib_exe_obj: var, dep: *const LibraryDep) void {
|
||||
for (dep.libdirs.toSliceConst()) |lib_dir| {
|
||||
lib_exe_obj.addLibPath(lib_dir);
|
||||
}
|
||||
for (dep.system_libs.toSliceConst()) |lib| {
|
||||
lib_exe_obj.linkSystemLibrary(lib);
|
||||
}
|
||||
for (dep.libs.toSliceConst()) |lib| {
|
||||
lib_exe_obj.addObjectFile(lib);
|
||||
}
|
||||
for (dep.includes.toSliceConst()) |include_path| {
|
||||
lib_exe_obj.addIncludeDir(include_path);
|
||||
}
|
||||
}
|
||||
|
||||
fn addCppLib(b: *Builder, lib_exe_obj: var, cmake_binary_dir: []const u8, lib_name: []const u8) void {
|
||||
const lib_prefix = if (lib_exe_obj.target.isWindows()) "" else "lib";
|
||||
lib_exe_obj.addObjectFile(os.path.join(b.allocator, cmake_binary_dir, "zig_cpp", b.fmt("{}{}{}", lib_prefix, lib_name, lib_exe_obj.target.libFileExt())) catch unreachable);
|
||||
}
|
||||
|
||||
const LibraryDep = struct {
|
||||
libdirs: ArrayList([]const u8),
|
||||
libs: ArrayList([]const u8),
|
||||
system_libs: ArrayList([]const u8),
|
||||
includes: ArrayList([]const u8),
|
||||
};
|
||||
|
||||
fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
|
||||
const libs_output = try b.exec([][]const u8{
|
||||
llvm_config_exe,
|
||||
"--libs",
|
||||
"--system-libs",
|
||||
});
|
||||
const includes_output = try b.exec([][]const u8{
|
||||
llvm_config_exe,
|
||||
"--includedir",
|
||||
});
|
||||
const libdir_output = try b.exec([][]const u8{
|
||||
llvm_config_exe,
|
||||
"--libdir",
|
||||
});
|
||||
|
||||
var result = LibraryDep{
|
||||
.libs = ArrayList([]const u8).init(b.allocator),
|
||||
.system_libs = ArrayList([]const u8).init(b.allocator),
|
||||
.includes = ArrayList([]const u8).init(b.allocator),
|
||||
.libdirs = ArrayList([]const u8).init(b.allocator),
|
||||
};
|
||||
{
|
||||
var it = mem.split(libs_output, " \r\n");
|
||||
while (it.next()) |lib_arg| {
|
||||
if (mem.startsWith(u8, lib_arg, "-l")) {
|
||||
try result.system_libs.append(lib_arg[2..]);
|
||||
} else {
|
||||
if (os.path.isAbsolute(lib_arg)) {
|
||||
try result.libs.append(lib_arg);
|
||||
} else {
|
||||
try result.system_libs.append(lib_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
var it = mem.split(includes_output, " \r\n");
|
||||
while (it.next()) |include_arg| {
|
||||
if (mem.startsWith(u8, include_arg, "-I")) {
|
||||
try result.includes.append(include_arg[2..]);
|
||||
} else {
|
||||
try result.includes.append(include_arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
var it = mem.split(libdir_output, " \r\n");
|
||||
while (it.next()) |libdir| {
|
||||
if (mem.startsWith(u8, libdir, "-L")) {
|
||||
try result.libdirs.append(libdir[2..]);
|
||||
} else {
|
||||
try result.libdirs.append(libdir);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
pub fn installStdLib(b: *Builder, stdlib_files: []const u8) void {
|
||||
var it = mem.split(stdlib_files, ";");
|
||||
while (it.next()) |stdlib_file| {
|
||||
const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
|
||||
const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
|
||||
b.installFile(src_path, dest_path);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn installCHeaders(b: *Builder, c_header_files: []const u8) void {
|
||||
var it = mem.split(c_header_files, ";");
|
||||
while (it.next()) |c_header_file| {
|
||||
const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
|
||||
const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
|
||||
b.installFile(src_path, dest_path);
|
||||
}
|
||||
}
|
||||
|
||||
fn nextValue(index: *usize, build_info: []const u8) []const u8 {
|
||||
const start = index.*;
|
||||
while (true) : (index.* += 1) {
|
||||
switch (build_info[index.*]) {
|
||||
'\n' => {
|
||||
const result = build_info[start..index.*];
|
||||
index.* += 1;
|
||||
return result;
|
||||
},
|
||||
'\r' => {
|
||||
const result = build_info[start..index.*];
|
||||
index.* += 2;
|
||||
return result;
|
||||
},
|
||||
else => continue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn configureStage2(b: *Builder, exe: var, ctx: Context) !void {
|
||||
// This is for finding /lib/libz.a on alpine linux.
|
||||
// TODO turn this into -Dextra-lib-path=/lib option
|
||||
exe.addLibPath("/lib");
|
||||
|
||||
exe.setNoRoSegment(ctx.no_rosegment);
|
||||
|
||||
exe.addIncludeDir("src");
|
||||
exe.addIncludeDir(ctx.cmake_binary_dir);
|
||||
addCppLib(b, exe, ctx.cmake_binary_dir, "zig_cpp");
|
||||
if (ctx.lld_include_dir.len != 0) {
|
||||
exe.addIncludeDir(ctx.lld_include_dir);
|
||||
var it = mem.split(ctx.lld_libraries, ";");
|
||||
while (it.next()) |lib| {
|
||||
exe.addObjectFile(lib);
|
||||
}
|
||||
} else {
|
||||
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_wasm");
|
||||
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_elf");
|
||||
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_coff");
|
||||
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_lib");
|
||||
}
|
||||
dependOnLib(exe, ctx.llvm);
|
||||
|
||||
if (exe.target.getOs() == builtin.Os.linux) {
|
||||
const libstdcxx_path_padded = try b.exec([][]const u8{
|
||||
ctx.cxx_compiler,
|
||||
"-print-file-name=libstdc++.a",
|
||||
});
|
||||
const libstdcxx_path = mem.split(libstdcxx_path_padded, "\r\n").next().?;
|
||||
if (mem.eql(u8, libstdcxx_path, "libstdc++.a")) {
|
||||
warn(
|
||||
\\Unable to determine path to libstdc++.a
|
||||
\\On Fedora, install libstdc++-static and try again.
|
||||
\\
|
||||
);
|
||||
return error.RequiredLibraryNotFound;
|
||||
}
|
||||
exe.addObjectFile(libstdcxx_path);
|
||||
|
||||
exe.linkSystemLibrary("pthread");
|
||||
} else if (exe.target.isDarwin()) {
|
||||
exe.linkSystemLibrary("c++");
|
||||
}
|
||||
|
||||
if (ctx.dia_guids_lib.len != 0) {
|
||||
exe.addObjectFile(ctx.dia_guids_lib);
|
||||
}
|
||||
|
||||
if (exe.target.getOs() != builtin.Os.windows) {
|
||||
exe.linkSystemLibrary("xml2");
|
||||
}
|
||||
exe.linkSystemLibrary("c");
|
||||
}
|
||||
|
||||
const Context = struct {
|
||||
cmake_binary_dir: []const u8,
|
||||
cxx_compiler: []const u8,
|
||||
llvm_config_exe: []const u8,
|
||||
lld_include_dir: []const u8,
|
||||
lld_libraries: []const u8,
|
||||
std_files: []const u8,
|
||||
c_header_files: []const u8,
|
||||
dia_guids_lib: []const u8,
|
||||
llvm: LibraryDep,
|
||||
no_rosegment: bool,
|
||||
};
|
||||
|
||||
@@ -54,7 +54,7 @@ struct dim3;
|
||||
#define __DELETE
|
||||
#endif
|
||||
|
||||
// Make sure nobody can create instances of the special varible types. nvcc
|
||||
// Make sure nobody can create instances of the special variable types. nvcc
|
||||
// also disallows taking address of special variables, so we disable address-of
|
||||
// operator as well.
|
||||
#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \
|
||||
|
||||
@@ -131,15 +131,6 @@ __DEVICE__ float ldexp(float __arg, int __exp) {
|
||||
__DEVICE__ float log(float __x) { return ::logf(__x); }
|
||||
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
|
||||
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
||||
__DEVICE__ float nexttoward(float __from, double __to) {
|
||||
return __builtin_nexttowardf(__from, __to);
|
||||
}
|
||||
__DEVICE__ double nexttoward(double __from, double __to) {
|
||||
return __builtin_nexttoward(__from, __to);
|
||||
}
|
||||
__DEVICE__ float nexttowardf(float __from, double __to) {
|
||||
return __builtin_nexttowardf(__from, __to);
|
||||
}
|
||||
__DEVICE__ float pow(float __base, float __exp) {
|
||||
return ::powf(__base, __exp);
|
||||
}
|
||||
@@ -157,6 +148,10 @@ __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
|
||||
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
||||
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
||||
|
||||
// Notably missing above is nexttoward. We omit it because
|
||||
// libdevice doesn't provide an implementation, and we don't want to be in the
|
||||
// business of implementing tricky libm functions in this header.
|
||||
|
||||
// Now we've defined everything we promised we'd define in
|
||||
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
|
||||
// fix up our math functions.
|
||||
@@ -295,13 +290,6 @@ ldexp(__T __x, int __exp) {
|
||||
return std::ldexp((double)__x, __exp);
|
||||
}
|
||||
|
||||
template <typename __T>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
||||
double>::type
|
||||
nexttoward(__T __from, double __to) {
|
||||
return std::nexttoward((double)__from, __to);
|
||||
}
|
||||
|
||||
template <typename __T1, typename __T2>
|
||||
__DEVICE__ typename __clang_cuda_enable_if<
|
||||
std::numeric_limits<__T1>::is_specialized &&
|
||||
@@ -388,7 +376,6 @@ using ::lrint;
|
||||
using ::lround;
|
||||
using ::nearbyint;
|
||||
using ::nextafter;
|
||||
using ::nexttoward;
|
||||
using ::pow;
|
||||
using ::remainder;
|
||||
using ::remquo;
|
||||
@@ -456,8 +443,6 @@ using ::lroundf;
|
||||
using ::modff;
|
||||
using ::nearbyintf;
|
||||
using ::nextafterf;
|
||||
using ::nexttowardf;
|
||||
using ::nexttowardf;
|
||||
using ::powf;
|
||||
using ::remainderf;
|
||||
using ::remquof;
|
||||
|
||||
1768
c_headers/__clang_cuda_device_functions.h
Normal file
1768
c_headers/__clang_cuda_device_functions.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -34,23 +34,24 @@
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
#pragma push_macro("__MAKE_SHUFFLES")
|
||||
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask) \
|
||||
inline __device__ int __FnName(int __val, int __offset, \
|
||||
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \
|
||||
__Type) \
|
||||
inline __device__ int __FnName(int __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return __IntIntrinsic(__val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ float __FnName(float __val, int __offset, \
|
||||
inline __device__ float __FnName(float __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return __FloatIntrinsic(__val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ unsigned int __FnName(unsigned int __val, int __offset, \
|
||||
inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned int>( \
|
||||
::__FnName(static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ long long __FnName(long long __val, int __offset, \
|
||||
inline __device__ long long __FnName(long long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
struct __Bits { \
|
||||
int __a, __b; \
|
||||
@@ -65,12 +66,29 @@
|
||||
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
|
||||
return __ret; \
|
||||
} \
|
||||
inline __device__ long __FnName(long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
_Static_assert(sizeof(long) == sizeof(long long) || \
|
||||
sizeof(long) == sizeof(int)); \
|
||||
if (sizeof(long) == sizeof(long long)) { \
|
||||
return static_cast<long>( \
|
||||
::__FnName(static_cast<long long>(__val), __offset, __width)); \
|
||||
} else if (sizeof(long) == sizeof(int)) { \
|
||||
return static_cast<long>( \
|
||||
::__FnName(static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
} \
|
||||
inline __device__ unsigned long __FnName( \
|
||||
unsigned long __val, __Type __offset, int __width = warpSize) { \
|
||||
return static_cast<unsigned long>( \
|
||||
::__FnName(static_cast<long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ unsigned long long __FnName( \
|
||||
unsigned long long __val, int __offset, int __width = warpSize) { \
|
||||
unsigned long long __val, __Type __offset, int __width = warpSize) { \
|
||||
return static_cast<unsigned long long>(::__FnName( \
|
||||
static_cast<unsigned long long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ double __FnName(double __val, int __offset, \
|
||||
inline __device__ double __FnName(double __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
long long __tmp; \
|
||||
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
|
||||
@@ -81,17 +99,166 @@
|
||||
return __ret; \
|
||||
}
|
||||
|
||||
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
|
||||
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);
|
||||
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
|
||||
// maxLane.
|
||||
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0);
|
||||
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f);
|
||||
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
|
||||
|
||||
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,
|
||||
unsigned int);
|
||||
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,
|
||||
unsigned int);
|
||||
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
|
||||
int);
|
||||
#pragma pop_macro("__MAKE_SHUFFLES")
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
#if CUDA_VERSION >= 9000
|
||||
#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
|
||||
// __shfl_sync_* variants available in CUDA-9
|
||||
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
|
||||
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
|
||||
__Mask, __Type) \
|
||||
inline __device__ int __FnName(unsigned int __mask, int __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
return __IntIntrinsic(__mask, __val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ float __FnName(unsigned int __mask, float __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
return __FloatIntrinsic(__mask, __val, __offset, \
|
||||
((warpSize - __width) << 8) | (__Mask)); \
|
||||
} \
|
||||
inline __device__ unsigned int __FnName(unsigned int __mask, \
|
||||
unsigned int __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned int>( \
|
||||
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ long long __FnName(unsigned int __mask, long long __val, \
|
||||
__Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
struct __Bits { \
|
||||
int __a, __b; \
|
||||
}; \
|
||||
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
|
||||
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
|
||||
__Bits __tmp; \
|
||||
memcpy(&__val, &__tmp, sizeof(__val)); \
|
||||
__tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \
|
||||
__tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \
|
||||
long long __ret; \
|
||||
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
|
||||
return __ret; \
|
||||
} \
|
||||
inline __device__ unsigned long long __FnName( \
|
||||
unsigned int __mask, unsigned long long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned long long>(::__FnName( \
|
||||
__mask, static_cast<unsigned long long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ long __FnName(unsigned int __mask, long __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
_Static_assert(sizeof(long) == sizeof(long long) || \
|
||||
sizeof(long) == sizeof(int)); \
|
||||
if (sizeof(long) == sizeof(long long)) { \
|
||||
return static_cast<long>(::__FnName( \
|
||||
__mask, static_cast<long long>(__val), __offset, __width)); \
|
||||
} else if (sizeof(long) == sizeof(int)) { \
|
||||
return static_cast<long>( \
|
||||
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
|
||||
} \
|
||||
} \
|
||||
inline __device__ unsigned long __FnName( \
|
||||
unsigned int __mask, unsigned long __val, __Type __offset, \
|
||||
int __width = warpSize) { \
|
||||
return static_cast<unsigned long>( \
|
||||
::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \
|
||||
} \
|
||||
inline __device__ double __FnName(unsigned int __mask, double __val, \
|
||||
__Type __offset, int __width = warpSize) { \
|
||||
long long __tmp; \
|
||||
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
|
||||
memcpy(&__tmp, &__val, sizeof(__val)); \
|
||||
__tmp = ::__FnName(__mask, __tmp, __offset, __width); \
|
||||
double __ret; \
|
||||
memcpy(&__ret, &__tmp, sizeof(__ret)); \
|
||||
return __ret; \
|
||||
}
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
|
||||
__nvvm_shfl_sync_idx_f32, 0x1f, int);
|
||||
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
|
||||
// maxLane.
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
|
||||
__nvvm_shfl_sync_up_f32, 0, unsigned int);
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
|
||||
__nvvm_shfl_sync_down_f32, 0x1f, unsigned int);
|
||||
__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
|
||||
__nvvm_shfl_sync_bfly_f32, 0x1f, int);
|
||||
#pragma pop_macro("__MAKE_SYNC_SHUFFLES")
|
||||
|
||||
inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
|
||||
return __nvvm_bar_warp_sync(mask);
|
||||
}
|
||||
|
||||
inline __device__ void __barrier_sync(unsigned int id) {
|
||||
__nvvm_barrier_sync(id);
|
||||
}
|
||||
|
||||
inline __device__ void __barrier_sync_count(unsigned int id,
|
||||
unsigned int count) {
|
||||
__nvvm_barrier_sync_cnt(id, count);
|
||||
}
|
||||
|
||||
inline __device__ int __all_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_all_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ int __any_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_any_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ int __uni_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_uni_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
|
||||
return __nvvm_vote_ballot_sync(mask, pred);
|
||||
}
|
||||
|
||||
inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
|
||||
|
||||
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
|
||||
return __nvvm_fns(mask, base, offset);
|
||||
}
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
// Define __match* builtins CUDA-9 headers expect to see.
|
||||
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
|
||||
inline __device__ unsigned int __match32_any_sync(unsigned int mask,
|
||||
unsigned int value) {
|
||||
return __nvvm_match_any_sync_i32(mask, value);
|
||||
}
|
||||
|
||||
inline __device__ unsigned long long
|
||||
__match64_any_sync(unsigned int mask, unsigned long long value) {
|
||||
return __nvvm_match_any_sync_i64(mask, value);
|
||||
}
|
||||
|
||||
inline __device__ unsigned int
|
||||
__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
|
||||
return __nvvm_match_all_sync_i32p(mask, value, pred);
|
||||
}
|
||||
|
||||
inline __device__ unsigned long long
|
||||
__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
|
||||
return __nvvm_match_all_sync_i64p(mask, value, pred);
|
||||
}
|
||||
#include "crt/sm_70_rt.hpp"
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
|
||||
#endif // __CUDA_VERSION >= 9000
|
||||
|
||||
// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
|
||||
|
||||
// Prevent the vanilla sm_32 intrinsics header from being included.
|
||||
@@ -110,6 +277,9 @@ inline __device__ long long __ldg(const long long *ptr) {
|
||||
inline __device__ unsigned char __ldg(const unsigned char *ptr) {
|
||||
return __nvvm_ldg_uc(ptr);
|
||||
}
|
||||
inline __device__ signed char __ldg(const signed char *ptr) {
|
||||
return __nvvm_ldg_uc((const unsigned char *)ptr);
|
||||
}
|
||||
inline __device__ unsigned short __ldg(const unsigned short *ptr) {
|
||||
return __nvvm_ldg_us(ptr);
|
||||
}
|
||||
|
||||
466
c_headers/__clang_cuda_libdevice_declares.h
Normal file
466
c_headers/__clang_cuda_libdevice_declares.h
Normal file
@@ -0,0 +1,466 @@
|
||||
/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__
|
||||
#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__
|
||||
|
||||
extern "C" {
|
||||
|
||||
__device__ int __nv_abs(int __a);
|
||||
__device__ double __nv_acos(double __a);
|
||||
__device__ float __nv_acosf(float __a);
|
||||
__device__ double __nv_acosh(double __a);
|
||||
__device__ float __nv_acoshf(float __a);
|
||||
__device__ double __nv_asin(double __a);
|
||||
__device__ float __nv_asinf(float __a);
|
||||
__device__ double __nv_asinh(double __a);
|
||||
__device__ float __nv_asinhf(float __a);
|
||||
__device__ double __nv_atan2(double __a, double __b);
|
||||
__device__ float __nv_atan2f(float __a, float __b);
|
||||
__device__ double __nv_atan(double __a);
|
||||
__device__ float __nv_atanf(float __a);
|
||||
__device__ double __nv_atanh(double __a);
|
||||
__device__ float __nv_atanhf(float __a);
|
||||
__device__ int __nv_brev(int __a);
|
||||
__device__ long long __nv_brevll(long long __a);
|
||||
__device__ int __nv_byte_perm(int __a, int __b, int __c);
|
||||
__device__ double __nv_cbrt(double __a);
|
||||
__device__ float __nv_cbrtf(float __a);
|
||||
__device__ double __nv_ceil(double __a);
|
||||
__device__ float __nv_ceilf(float __a);
|
||||
__device__ int __nv_clz(int __a);
|
||||
__device__ int __nv_clzll(long long __a);
|
||||
__device__ double __nv_copysign(double __a, double __b);
|
||||
__device__ float __nv_copysignf(float __a, float __b);
|
||||
__device__ double __nv_cos(double __a);
|
||||
__device__ float __nv_cosf(float __a);
|
||||
__device__ double __nv_cosh(double __a);
|
||||
__device__ float __nv_coshf(float __a);
|
||||
__device__ double __nv_cospi(double __a);
|
||||
__device__ float __nv_cospif(float __a);
|
||||
__device__ double __nv_cyl_bessel_i0(double __a);
|
||||
__device__ float __nv_cyl_bessel_i0f(float __a);
|
||||
__device__ double __nv_cyl_bessel_i1(double __a);
|
||||
__device__ float __nv_cyl_bessel_i1f(float __a);
|
||||
__device__ double __nv_dadd_rd(double __a, double __b);
|
||||
__device__ double __nv_dadd_rn(double __a, double __b);
|
||||
__device__ double __nv_dadd_ru(double __a, double __b);
|
||||
__device__ double __nv_dadd_rz(double __a, double __b);
|
||||
__device__ double __nv_ddiv_rd(double __a, double __b);
|
||||
__device__ double __nv_ddiv_rn(double __a, double __b);
|
||||
__device__ double __nv_ddiv_ru(double __a, double __b);
|
||||
__device__ double __nv_ddiv_rz(double __a, double __b);
|
||||
__device__ double __nv_dmul_rd(double __a, double __b);
|
||||
__device__ double __nv_dmul_rn(double __a, double __b);
|
||||
__device__ double __nv_dmul_ru(double __a, double __b);
|
||||
__device__ double __nv_dmul_rz(double __a, double __b);
|
||||
__device__ float __nv_double2float_rd(double __a);
|
||||
__device__ float __nv_double2float_rn(double __a);
|
||||
__device__ float __nv_double2float_ru(double __a);
|
||||
__device__ float __nv_double2float_rz(double __a);
|
||||
__device__ int __nv_double2hiint(double __a);
|
||||
__device__ int __nv_double2int_rd(double __a);
|
||||
__device__ int __nv_double2int_rn(double __a);
|
||||
__device__ int __nv_double2int_ru(double __a);
|
||||
__device__ int __nv_double2int_rz(double __a);
|
||||
__device__ long long __nv_double2ll_rd(double __a);
|
||||
__device__ long long __nv_double2ll_rn(double __a);
|
||||
__device__ long long __nv_double2ll_ru(double __a);
|
||||
__device__ long long __nv_double2ll_rz(double __a);
|
||||
__device__ int __nv_double2loint(double __a);
|
||||
__device__ unsigned int __nv_double2uint_rd(double __a);
|
||||
__device__ unsigned int __nv_double2uint_rn(double __a);
|
||||
__device__ unsigned int __nv_double2uint_ru(double __a);
|
||||
__device__ unsigned int __nv_double2uint_rz(double __a);
|
||||
__device__ unsigned long long __nv_double2ull_rd(double __a);
|
||||
__device__ unsigned long long __nv_double2ull_rn(double __a);
|
||||
__device__ unsigned long long __nv_double2ull_ru(double __a);
|
||||
__device__ unsigned long long __nv_double2ull_rz(double __a);
|
||||
__device__ unsigned long long __nv_double_as_longlong(double __a);
|
||||
__device__ double __nv_drcp_rd(double __a);
|
||||
__device__ double __nv_drcp_rn(double __a);
|
||||
__device__ double __nv_drcp_ru(double __a);
|
||||
__device__ double __nv_drcp_rz(double __a);
|
||||
__device__ double __nv_dsqrt_rd(double __a);
|
||||
__device__ double __nv_dsqrt_rn(double __a);
|
||||
__device__ double __nv_dsqrt_ru(double __a);
|
||||
__device__ double __nv_dsqrt_rz(double __a);
|
||||
__device__ double __nv_dsub_rd(double __a, double __b);
|
||||
__device__ double __nv_dsub_rn(double __a, double __b);
|
||||
__device__ double __nv_dsub_ru(double __a, double __b);
|
||||
__device__ double __nv_dsub_rz(double __a, double __b);
|
||||
__device__ double __nv_erfc(double __a);
|
||||
__device__ float __nv_erfcf(float __a);
|
||||
__device__ double __nv_erfcinv(double __a);
|
||||
__device__ float __nv_erfcinvf(float __a);
|
||||
__device__ double __nv_erfcx(double __a);
|
||||
__device__ float __nv_erfcxf(float __a);
|
||||
__device__ double __nv_erf(double __a);
|
||||
__device__ float __nv_erff(float __a);
|
||||
__device__ double __nv_erfinv(double __a);
|
||||
__device__ float __nv_erfinvf(float __a);
|
||||
__device__ double __nv_exp10(double __a);
|
||||
__device__ float __nv_exp10f(float __a);
|
||||
__device__ double __nv_exp2(double __a);
|
||||
__device__ float __nv_exp2f(float __a);
|
||||
__device__ double __nv_exp(double __a);
|
||||
__device__ float __nv_expf(float __a);
|
||||
__device__ double __nv_expm1(double __a);
|
||||
__device__ float __nv_expm1f(float __a);
|
||||
__device__ double __nv_fabs(double __a);
|
||||
__device__ float __nv_fabsf(float __a);
|
||||
__device__ float __nv_fadd_rd(float __a, float __b);
|
||||
__device__ float __nv_fadd_rn(float __a, float __b);
|
||||
__device__ float __nv_fadd_ru(float __a, float __b);
|
||||
__device__ float __nv_fadd_rz(float __a, float __b);
|
||||
__device__ float __nv_fast_cosf(float __a);
|
||||
__device__ float __nv_fast_exp10f(float __a);
|
||||
__device__ float __nv_fast_expf(float __a);
|
||||
__device__ float __nv_fast_fdividef(float __a, float __b);
|
||||
__device__ float __nv_fast_log10f(float __a);
|
||||
__device__ float __nv_fast_log2f(float __a);
|
||||
__device__ float __nv_fast_logf(float __a);
|
||||
__device__ float __nv_fast_powf(float __a, float __b);
|
||||
__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);
|
||||
__device__ float __nv_fast_sinf(float __a);
|
||||
__device__ float __nv_fast_tanf(float __a);
|
||||
__device__ double __nv_fdim(double __a, double __b);
|
||||
__device__ float __nv_fdimf(float __a, float __b);
|
||||
__device__ float __nv_fdiv_rd(float __a, float __b);
|
||||
__device__ float __nv_fdiv_rn(float __a, float __b);
|
||||
__device__ float __nv_fdiv_ru(float __a, float __b);
|
||||
__device__ float __nv_fdiv_rz(float __a, float __b);
|
||||
__device__ int __nv_ffs(int __a);
|
||||
__device__ int __nv_ffsll(long long __a);
|
||||
__device__ int __nv_finitef(float __a);
|
||||
__device__ unsigned short __nv_float2half_rn(float __a);
|
||||
__device__ int __nv_float2int_rd(float __a);
|
||||
__device__ int __nv_float2int_rn(float __a);
|
||||
__device__ int __nv_float2int_ru(float __a);
|
||||
__device__ int __nv_float2int_rz(float __a);
|
||||
__device__ long long __nv_float2ll_rd(float __a);
|
||||
__device__ long long __nv_float2ll_rn(float __a);
|
||||
__device__ long long __nv_float2ll_ru(float __a);
|
||||
__device__ long long __nv_float2ll_rz(float __a);
|
||||
__device__ unsigned int __nv_float2uint_rd(float __a);
|
||||
__device__ unsigned int __nv_float2uint_rn(float __a);
|
||||
__device__ unsigned int __nv_float2uint_ru(float __a);
|
||||
__device__ unsigned int __nv_float2uint_rz(float __a);
|
||||
__device__ unsigned long long __nv_float2ull_rd(float __a);
|
||||
__device__ unsigned long long __nv_float2ull_rn(float __a);
|
||||
__device__ unsigned long long __nv_float2ull_ru(float __a);
|
||||
__device__ unsigned long long __nv_float2ull_rz(float __a);
|
||||
__device__ int __nv_float_as_int(float __a);
|
||||
__device__ unsigned int __nv_float_as_uint(float __a);
|
||||
__device__ double __nv_floor(double __a);
|
||||
__device__ float __nv_floorf(float __a);
|
||||
__device__ double __nv_fma(double __a, double __b, double __c);
|
||||
__device__ float __nv_fmaf(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_rd(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_rn(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_ru(float __a, float __b, float __c);
|
||||
__device__ float __nv_fmaf_rz(float __a, float __b, float __c);
|
||||
__device__ double __nv_fma_rd(double __a, double __b, double __c);
|
||||
__device__ double __nv_fma_rn(double __a, double __b, double __c);
|
||||
__device__ double __nv_fma_ru(double __a, double __b, double __c);
|
||||
__device__ double __nv_fma_rz(double __a, double __b, double __c);
|
||||
__device__ double __nv_fmax(double __a, double __b);
|
||||
__device__ float __nv_fmaxf(float __a, float __b);
|
||||
__device__ double __nv_fmin(double __a, double __b);
|
||||
__device__ float __nv_fminf(float __a, float __b);
|
||||
__device__ double __nv_fmod(double __a, double __b);
|
||||
__device__ float __nv_fmodf(float __a, float __b);
|
||||
__device__ float __nv_fmul_rd(float __a, float __b);
|
||||
__device__ float __nv_fmul_rn(float __a, float __b);
|
||||
__device__ float __nv_fmul_ru(float __a, float __b);
|
||||
__device__ float __nv_fmul_rz(float __a, float __b);
|
||||
__device__ float __nv_frcp_rd(float __a);
|
||||
__device__ float __nv_frcp_rn(float __a);
|
||||
__device__ float __nv_frcp_ru(float __a);
|
||||
__device__ float __nv_frcp_rz(float __a);
|
||||
__device__ double __nv_frexp(double __a, int *__b);
|
||||
__device__ float __nv_frexpf(float __a, int *__b);
|
||||
__device__ float __nv_frsqrt_rn(float __a);
|
||||
__device__ float __nv_fsqrt_rd(float __a);
|
||||
__device__ float __nv_fsqrt_rn(float __a);
|
||||
__device__ float __nv_fsqrt_ru(float __a);
|
||||
__device__ float __nv_fsqrt_rz(float __a);
|
||||
__device__ float __nv_fsub_rd(float __a, float __b);
|
||||
__device__ float __nv_fsub_rn(float __a, float __b);
|
||||
__device__ float __nv_fsub_ru(float __a, float __b);
|
||||
__device__ float __nv_fsub_rz(float __a, float __b);
|
||||
__device__ int __nv_hadd(int __a, int __b);
|
||||
__device__ float __nv_half2float(unsigned short __h);
|
||||
__device__ double __nv_hiloint2double(int __a, int __b);
|
||||
__device__ double __nv_hypot(double __a, double __b);
|
||||
__device__ float __nv_hypotf(float __a, float __b);
|
||||
__device__ int __nv_ilogb(double __a);
|
||||
__device__ int __nv_ilogbf(float __a);
|
||||
__device__ double __nv_int2double_rn(int __a);
|
||||
__device__ float __nv_int2float_rd(int __a);
|
||||
__device__ float __nv_int2float_rn(int __a);
|
||||
__device__ float __nv_int2float_ru(int __a);
|
||||
__device__ float __nv_int2float_rz(int __a);
|
||||
__device__ float __nv_int_as_float(int __a);
|
||||
__device__ int __nv_isfinited(double __a);
|
||||
__device__ int __nv_isinfd(double __a);
|
||||
__device__ int __nv_isinff(float __a);
|
||||
__device__ int __nv_isnand(double __a);
|
||||
__device__ int __nv_isnanf(float __a);
|
||||
__device__ double __nv_j0(double __a);
|
||||
__device__ float __nv_j0f(float __a);
|
||||
__device__ double __nv_j1(double __a);
|
||||
__device__ float __nv_j1f(float __a);
|
||||
__device__ float __nv_jnf(int __a, float __b);
|
||||
__device__ double __nv_jn(int __a, double __b);
|
||||
__device__ double __nv_ldexp(double __a, int __b);
|
||||
__device__ float __nv_ldexpf(float __a, int __b);
|
||||
__device__ double __nv_lgamma(double __a);
|
||||
__device__ float __nv_lgammaf(float __a);
|
||||
__device__ double __nv_ll2double_rd(long long __a);
|
||||
__device__ double __nv_ll2double_rn(long long __a);
|
||||
__device__ double __nv_ll2double_ru(long long __a);
|
||||
__device__ double __nv_ll2double_rz(long long __a);
|
||||
__device__ float __nv_ll2float_rd(long long __a);
|
||||
__device__ float __nv_ll2float_rn(long long __a);
|
||||
__device__ float __nv_ll2float_ru(long long __a);
|
||||
__device__ float __nv_ll2float_rz(long long __a);
|
||||
__device__ long long __nv_llabs(long long __a);
|
||||
__device__ long long __nv_llmax(long long __a, long long __b);
|
||||
__device__ long long __nv_llmin(long long __a, long long __b);
|
||||
__device__ long long __nv_llrint(double __a);
|
||||
__device__ long long __nv_llrintf(float __a);
|
||||
__device__ long long __nv_llround(double __a);
|
||||
__device__ long long __nv_llroundf(float __a);
|
||||
__device__ double __nv_log10(double __a);
|
||||
__device__ float __nv_log10f(float __a);
|
||||
__device__ double __nv_log1p(double __a);
|
||||
__device__ float __nv_log1pf(float __a);
|
||||
__device__ double __nv_log2(double __a);
|
||||
__device__ float __nv_log2f(float __a);
|
||||
__device__ double __nv_logb(double __a);
|
||||
__device__ float __nv_logbf(float __a);
|
||||
__device__ double __nv_log(double __a);
|
||||
__device__ float __nv_logf(float __a);
|
||||
__device__ double __nv_longlong_as_double(long long __a);
|
||||
__device__ int __nv_max(int __a, int __b);
|
||||
__device__ int __nv_min(int __a, int __b);
|
||||
__device__ double __nv_modf(double __a, double *__b);
|
||||
__device__ float __nv_modff(float __a, float *__b);
|
||||
__device__ int __nv_mul24(int __a, int __b);
|
||||
__device__ long long __nv_mul64hi(long long __a, long long __b);
|
||||
__device__ int __nv_mulhi(int __a, int __b);
|
||||
__device__ double __nv_nan(const signed char *__a);
|
||||
__device__ float __nv_nanf(const signed char *__a);
|
||||
__device__ double __nv_nearbyint(double __a);
|
||||
__device__ float __nv_nearbyintf(float __a);
|
||||
__device__ double __nv_nextafter(double __a, double __b);
|
||||
__device__ float __nv_nextafterf(float __a, float __b);
|
||||
__device__ double __nv_norm3d(double __a, double __b, double __c);
|
||||
__device__ float __nv_norm3df(float __a, float __b, float __c);
|
||||
__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);
|
||||
__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);
|
||||
__device__ double __nv_normcdf(double __a);
|
||||
__device__ float __nv_normcdff(float __a);
|
||||
__device__ double __nv_normcdfinv(double __a);
|
||||
__device__ float __nv_normcdfinvf(float __a);
|
||||
__device__ float __nv_normf(int __a, const float *__b);
|
||||
__device__ double __nv_norm(int __a, const double *__b);
|
||||
__device__ int __nv_popc(int __a);
|
||||
__device__ int __nv_popcll(long long __a);
|
||||
__device__ double __nv_pow(double __a, double __b);
|
||||
__device__ float __nv_powf(float __a, float __b);
|
||||
__device__ double __nv_powi(double __a, int __b);
|
||||
__device__ float __nv_powif(float __a, int __b);
|
||||
__device__ double __nv_rcbrt(double __a);
|
||||
__device__ float __nv_rcbrtf(float __a);
|
||||
__device__ double __nv_rcp64h(double __a);
|
||||
__device__ double __nv_remainder(double __a, double __b);
|
||||
__device__ float __nv_remainderf(float __a, float __b);
|
||||
__device__ double __nv_remquo(double __a, double __b, int *__c);
|
||||
__device__ float __nv_remquof(float __a, float __b, int *__c);
|
||||
__device__ int __nv_rhadd(int __a, int __b);
|
||||
__device__ double __nv_rhypot(double __a, double __b);
|
||||
__device__ float __nv_rhypotf(float __a, float __b);
|
||||
__device__ double __nv_rint(double __a);
|
||||
__device__ float __nv_rintf(float __a);
|
||||
__device__ double __nv_rnorm3d(double __a, double __b, double __c);
|
||||
__device__ float __nv_rnorm3df(float __a, float __b, float __c);
|
||||
__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);
|
||||
__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);
|
||||
__device__ float __nv_rnormf(int __a, const float *__b);
|
||||
__device__ double __nv_rnorm(int __a, const double *__b);
|
||||
__device__ double __nv_round(double __a);
|
||||
__device__ float __nv_roundf(float __a);
|
||||
__device__ double __nv_rsqrt(double __a);
|
||||
__device__ float __nv_rsqrtf(float __a);
|
||||
__device__ int __nv_sad(int __a, int __b, int __c);
|
||||
__device__ float __nv_saturatef(float __a);
|
||||
__device__ double __nv_scalbn(double __a, int __b);
|
||||
__device__ float __nv_scalbnf(float __a, int __b);
|
||||
__device__ int __nv_signbitd(double __a);
|
||||
__device__ int __nv_signbitf(float __a);
|
||||
__device__ void __nv_sincos(double __a, double *__b, double *__c);
|
||||
__device__ void __nv_sincosf(float __a, float *__b, float *__c);
|
||||
__device__ void __nv_sincospi(double __a, double *__b, double *__c);
|
||||
__device__ void __nv_sincospif(float __a, float *__b, float *__c);
|
||||
__device__ double __nv_sin(double __a);
|
||||
__device__ float __nv_sinf(float __a);
|
||||
__device__ double __nv_sinh(double __a);
|
||||
__device__ float __nv_sinhf(float __a);
|
||||
__device__ double __nv_sinpi(double __a);
|
||||
__device__ float __nv_sinpif(float __a);
|
||||
__device__ double __nv_sqrt(double __a);
|
||||
__device__ float __nv_sqrtf(float __a);
|
||||
__device__ double __nv_tan(double __a);
|
||||
__device__ float __nv_tanf(float __a);
|
||||
__device__ double __nv_tanh(double __a);
|
||||
__device__ float __nv_tanhf(float __a);
|
||||
__device__ double __nv_tgamma(double __a);
|
||||
__device__ float __nv_tgammaf(float __a);
|
||||
__device__ double __nv_trunc(double __a);
|
||||
__device__ float __nv_truncf(float __a);
|
||||
__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);
|
||||
__device__ double __nv_uint2double_rn(unsigned int __i);
|
||||
__device__ float __nv_uint2float_rd(unsigned int __a);
|
||||
__device__ float __nv_uint2float_rn(unsigned int __a);
|
||||
__device__ float __nv_uint2float_ru(unsigned int __a);
|
||||
__device__ float __nv_uint2float_rz(unsigned int __a);
|
||||
__device__ float __nv_uint_as_float(unsigned int __a);
|
||||
__device__ double __nv_ull2double_rd(unsigned long long __a);
|
||||
__device__ double __nv_ull2double_rn(unsigned long long __a);
|
||||
__device__ double __nv_ull2double_ru(unsigned long long __a);
|
||||
__device__ double __nv_ull2double_rz(unsigned long long __a);
|
||||
__device__ float __nv_ull2float_rd(unsigned long long __a);
|
||||
__device__ float __nv_ull2float_rn(unsigned long long __a);
|
||||
__device__ float __nv_ull2float_ru(unsigned long long __a);
|
||||
__device__ float __nv_ull2float_rz(unsigned long long __a);
|
||||
__device__ unsigned long long __nv_ullmax(unsigned long long __a,
|
||||
unsigned long long __b);
|
||||
__device__ unsigned long long __nv_ullmin(unsigned long long __a,
|
||||
unsigned long long __b);
|
||||
__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);
|
||||
__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);
|
||||
__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
|
||||
__device__ unsigned long long __nv_umul64hi(unsigned long long __a,
|
||||
unsigned long long __b);
|
||||
__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
|
||||
__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
|
||||
__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,
|
||||
unsigned int __c);
|
||||
#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020
|
||||
__device__ int __nv_vabs2(int __a);
|
||||
__device__ int __nv_vabs4(int __a);
|
||||
__device__ int __nv_vabsdiffs2(int __a, int __b);
|
||||
__device__ int __nv_vabsdiffs4(int __a, int __b);
|
||||
__device__ int __nv_vabsdiffu2(int __a, int __b);
|
||||
__device__ int __nv_vabsdiffu4(int __a, int __b);
|
||||
__device__ int __nv_vabsss2(int __a);
|
||||
__device__ int __nv_vabsss4(int __a);
|
||||
__device__ int __nv_vadd2(int __a, int __b);
|
||||
__device__ int __nv_vadd4(int __a, int __b);
|
||||
__device__ int __nv_vaddss2(int __a, int __b);
|
||||
__device__ int __nv_vaddss4(int __a, int __b);
|
||||
__device__ int __nv_vaddus2(int __a, int __b);
|
||||
__device__ int __nv_vaddus4(int __a, int __b);
|
||||
__device__ int __nv_vavgs2(int __a, int __b);
|
||||
__device__ int __nv_vavgs4(int __a, int __b);
|
||||
__device__ int __nv_vavgu2(int __a, int __b);
|
||||
__device__ int __nv_vavgu4(int __a, int __b);
|
||||
__device__ int __nv_vcmpeq2(int __a, int __b);
|
||||
__device__ int __nv_vcmpeq4(int __a, int __b);
|
||||
__device__ int __nv_vcmpges2(int __a, int __b);
|
||||
__device__ int __nv_vcmpges4(int __a, int __b);
|
||||
__device__ int __nv_vcmpgeu2(int __a, int __b);
|
||||
__device__ int __nv_vcmpgeu4(int __a, int __b);
|
||||
__device__ int __nv_vcmpgts2(int __a, int __b);
|
||||
__device__ int __nv_vcmpgts4(int __a, int __b);
|
||||
__device__ int __nv_vcmpgtu2(int __a, int __b);
|
||||
__device__ int __nv_vcmpgtu4(int __a, int __b);
|
||||
__device__ int __nv_vcmples2(int __a, int __b);
|
||||
__device__ int __nv_vcmples4(int __a, int __b);
|
||||
__device__ int __nv_vcmpleu2(int __a, int __b);
|
||||
__device__ int __nv_vcmpleu4(int __a, int __b);
|
||||
__device__ int __nv_vcmplts2(int __a, int __b);
|
||||
__device__ int __nv_vcmplts4(int __a, int __b);
|
||||
__device__ int __nv_vcmpltu2(int __a, int __b);
|
||||
__device__ int __nv_vcmpltu4(int __a, int __b);
|
||||
__device__ int __nv_vcmpne2(int __a, int __b);
|
||||
__device__ int __nv_vcmpne4(int __a, int __b);
|
||||
__device__ int __nv_vhaddu2(int __a, int __b);
|
||||
__device__ int __nv_vhaddu4(int __a, int __b);
|
||||
__device__ int __nv_vmaxs2(int __a, int __b);
|
||||
__device__ int __nv_vmaxs4(int __a, int __b);
|
||||
__device__ int __nv_vmaxu2(int __a, int __b);
|
||||
__device__ int __nv_vmaxu4(int __a, int __b);
|
||||
__device__ int __nv_vmins2(int __a, int __b);
|
||||
__device__ int __nv_vmins4(int __a, int __b);
|
||||
__device__ int __nv_vminu2(int __a, int __b);
|
||||
__device__ int __nv_vminu4(int __a, int __b);
|
||||
__device__ int __nv_vneg2(int __a);
|
||||
__device__ int __nv_vneg4(int __a);
|
||||
__device__ int __nv_vnegss2(int __a);
|
||||
__device__ int __nv_vnegss4(int __a);
|
||||
__device__ int __nv_vsads2(int __a, int __b);
|
||||
__device__ int __nv_vsads4(int __a, int __b);
|
||||
__device__ int __nv_vsadu2(int __a, int __b);
|
||||
__device__ int __nv_vsadu4(int __a, int __b);
|
||||
__device__ int __nv_vseteq2(int __a, int __b);
|
||||
__device__ int __nv_vseteq4(int __a, int __b);
|
||||
__device__ int __nv_vsetges2(int __a, int __b);
|
||||
__device__ int __nv_vsetges4(int __a, int __b);
|
||||
__device__ int __nv_vsetgeu2(int __a, int __b);
|
||||
__device__ int __nv_vsetgeu4(int __a, int __b);
|
||||
__device__ int __nv_vsetgts2(int __a, int __b);
|
||||
__device__ int __nv_vsetgts4(int __a, int __b);
|
||||
__device__ int __nv_vsetgtu2(int __a, int __b);
|
||||
__device__ int __nv_vsetgtu4(int __a, int __b);
|
||||
__device__ int __nv_vsetles2(int __a, int __b);
|
||||
__device__ int __nv_vsetles4(int __a, int __b);
|
||||
__device__ int __nv_vsetleu2(int __a, int __b);
|
||||
__device__ int __nv_vsetleu4(int __a, int __b);
|
||||
__device__ int __nv_vsetlts2(int __a, int __b);
|
||||
__device__ int __nv_vsetlts4(int __a, int __b);
|
||||
__device__ int __nv_vsetltu2(int __a, int __b);
|
||||
__device__ int __nv_vsetltu4(int __a, int __b);
|
||||
__device__ int __nv_vsetne2(int __a, int __b);
|
||||
__device__ int __nv_vsetne4(int __a, int __b);
|
||||
__device__ int __nv_vsub2(int __a, int __b);
|
||||
__device__ int __nv_vsub4(int __a, int __b);
|
||||
__device__ int __nv_vsubss2(int __a, int __b);
|
||||
__device__ int __nv_vsubss4(int __a, int __b);
|
||||
__device__ int __nv_vsubus2(int __a, int __b);
|
||||
__device__ int __nv_vsubus4(int __a, int __b);
|
||||
#endif // CUDA_VERSION
|
||||
__device__ double __nv_y0(double __a);
|
||||
__device__ float __nv_y0f(float __a);
|
||||
__device__ double __nv_y1(double __a);
|
||||
__device__ float __nv_y1f(float __a);
|
||||
__device__ float __nv_ynf(int __a, float __b);
|
||||
__device__ double __nv_yn(int __a, double __b);
|
||||
} // extern "C"
|
||||
#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__
|
||||
@@ -149,9 +149,6 @@ __DEVICE__ double nearbyint(double);
|
||||
__DEVICE__ float nearbyint(float);
|
||||
__DEVICE__ double nextafter(double, double);
|
||||
__DEVICE__ float nextafter(float, float);
|
||||
__DEVICE__ double nexttoward(double, double);
|
||||
__DEVICE__ float nexttoward(float, double);
|
||||
__DEVICE__ float nexttowardf(float, double);
|
||||
__DEVICE__ double pow(double, double);
|
||||
__DEVICE__ double pow(double, int);
|
||||
__DEVICE__ float pow(float, float);
|
||||
@@ -185,6 +182,10 @@ __DEVICE__ float tgamma(float);
|
||||
__DEVICE__ double trunc(double);
|
||||
__DEVICE__ float trunc(float);
|
||||
|
||||
// Notably missing above is nexttoward, which we don't define on
|
||||
// the device side because libdevice doesn't give us an implementation, and we
|
||||
// don't want to be in the business of writing one ourselves.
|
||||
|
||||
// We need to define these overloads in exactly the namespace our standard
|
||||
// library uses (including the right inline namespace), otherwise they won't be
|
||||
// picked up by other functions in the standard library (e.g. functions in
|
||||
@@ -255,7 +256,6 @@ using ::nan;
|
||||
using ::nanf;
|
||||
using ::nearbyint;
|
||||
using ::nextafter;
|
||||
using ::nexttoward;
|
||||
using ::pow;
|
||||
using ::remainder;
|
||||
using ::remquo;
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#include "cuda.h"
|
||||
#if !defined(CUDA_VERSION)
|
||||
#error "cuda.h did not define CUDA_VERSION"
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
|
||||
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020
|
||||
#error "Unsupported CUDA version!"
|
||||
#endif
|
||||
|
||||
@@ -84,19 +84,33 @@
|
||||
#define __DEVICE_FUNCTIONS_H__
|
||||
#define __MATH_FUNCTIONS_H__
|
||||
#define __COMMON_FUNCTIONS_H__
|
||||
// device_functions_decls is replaced by __clang_cuda_device_functions.h
|
||||
// included below.
|
||||
#define __DEVICE_FUNCTIONS_DECLS_H__
|
||||
|
||||
#undef __CUDACC__
|
||||
#if CUDA_VERSION < 9000
|
||||
#define __CUDABE__
|
||||
#else
|
||||
#define __CUDA_LIBDEVICE__
|
||||
#endif
|
||||
// Disables definitions of device-side runtime support stubs in
|
||||
// cuda_device_runtime_api.h
|
||||
#include "driver_types.h"
|
||||
#include "host_config.h"
|
||||
#include "host_defines.h"
|
||||
|
||||
// Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in
|
||||
// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the
|
||||
// functional equivalent of what we need.
|
||||
#pragma push_macro("nv_weak")
|
||||
#define nv_weak weak
|
||||
#undef __CUDABE__
|
||||
#undef __CUDA_LIBDEVICE__
|
||||
#define __CUDACC__
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
#pragma pop_macro("nv_weak")
|
||||
#undef __CUDACC__
|
||||
#define __CUDABE__
|
||||
|
||||
@@ -105,7 +119,9 @@
|
||||
#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
|
||||
#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
|
||||
|
||||
#if CUDA_VERSION < 9000
|
||||
#include "crt/device_runtime.h"
|
||||
#endif
|
||||
#include "crt/host_runtime.h"
|
||||
// device_runtime.h defines __cxa_* macros that will conflict with
|
||||
// cxxabi.h.
|
||||
@@ -130,20 +146,22 @@ inline __host__ double __signbitd(double x) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// We need decls for functions in CUDA's libdevice with __device__
|
||||
// attribute only. Alas they come either as __host__ __device__ or
|
||||
// with no attributes at all. To work around that, define __CUDA_RTC__
|
||||
// which produces HD variant and undef __host__ which gives us desided
|
||||
// decls with __device__ attribute.
|
||||
#pragma push_macro("__host__")
|
||||
#define __host__
|
||||
#define __CUDACC_RTC__
|
||||
#include "device_functions_decls.h"
|
||||
#undef __CUDACC_RTC__
|
||||
// CUDA 9.1 no longer provides declarations for libdevice functions, so we need
|
||||
// to provide our own.
|
||||
#include <__clang_cuda_libdevice_declares.h>
|
||||
|
||||
// Temporarily poison __host__ macro to ensure it's not used by any of
|
||||
// the headers we're about to include.
|
||||
#define __host__ UNEXPECTED_HOST_ATTRIBUTE
|
||||
// Wrappers for many device-side standard library functions became compiler
|
||||
// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now
|
||||
// provides its own implementation of the wrappers.
|
||||
#if CUDA_VERSION >= 9000
|
||||
#include <__clang_cuda_device_functions.h>
|
||||
#endif
|
||||
|
||||
// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's
|
||||
// counterpart does not do it, so we need to make it empty here to keep
|
||||
// following CUDA includes happy.
|
||||
#undef __THROW
|
||||
#define __THROW
|
||||
|
||||
// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.
|
||||
// Previous versions used to check whether they are defined or not.
|
||||
@@ -160,13 +178,20 @@ inline __host__ double __signbitd(double x) {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Temporarily poison __host__ macro to ensure it's not used by any of
|
||||
// the headers we're about to include.
|
||||
#pragma push_macro("__host__")
|
||||
#define __host__ UNEXPECTED_HOST_ATTRIBUTE
|
||||
|
||||
// device_functions.hpp and math_functions*.hpp use 'static
|
||||
// __forceinline__' (with no __device__) for definitions of device
|
||||
// functions. Temporarily redefine __forceinline__ to include
|
||||
// __device__.
|
||||
#pragma push_macro("__forceinline__")
|
||||
#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
|
||||
#if CUDA_VERSION < 9000
|
||||
#include "device_functions.hpp"
|
||||
#endif
|
||||
|
||||
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
|
||||
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
|
||||
@@ -178,17 +203,32 @@ inline __host__ double __signbitd(double x) {
|
||||
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
|
||||
#define __USE_FAST_MATH__ 1
|
||||
#endif
|
||||
|
||||
#if CUDA_VERSION >= 9000
|
||||
// CUDA-9.2 needs host-side memcpy for some host functions in
|
||||
// device_functions.hpp
|
||||
#if CUDA_VERSION >= 9020
|
||||
#include <string.h>
|
||||
#endif
|
||||
#include "crt/math_functions.hpp"
|
||||
#else
|
||||
#include "math_functions.hpp"
|
||||
#endif
|
||||
|
||||
#pragma pop_macro("__USE_FAST_MATH__")
|
||||
|
||||
#if CUDA_VERSION < 9000
|
||||
#include "math_functions_dbl_ptx3.hpp"
|
||||
#endif
|
||||
#pragma pop_macro("__forceinline__")
|
||||
|
||||
// Pull in host-only functions that are only available when neither
|
||||
// __CUDACC__ nor __CUDABE__ are defined.
|
||||
#undef __MATH_FUNCTIONS_HPP__
|
||||
#undef __CUDABE__
|
||||
#if CUDA_VERSION < 9000
|
||||
#include "math_functions.hpp"
|
||||
#endif
|
||||
// Alas, additional overloads for these functions are hard to get to.
|
||||
// Considering that we only need these overloads for a few functions,
|
||||
// we can provide them here.
|
||||
@@ -204,22 +244,36 @@ static inline float normcdfinv(float __a) { return normcdfinvf(__a); }
|
||||
static inline float normcdf(float __a) { return normcdff(__a); }
|
||||
static inline float erfcx(float __a) { return erfcxf(__a); }
|
||||
|
||||
#if CUDA_VERSION < 9000
|
||||
// For some reason single-argument variant is not always declared by
|
||||
// CUDA headers. Alas, device_functions.hpp included below needs it.
|
||||
static inline __device__ void __brkpt(int __c) { __brkpt(); }
|
||||
#endif
|
||||
|
||||
// Now include *.hpp with definitions of various GPU functions. Alas,
|
||||
// a lot of thins get declared/defined with __host__ attribute which
|
||||
// we don't want and we have to define it out. We also have to include
|
||||
// {device,math}_functions.hpp again in order to extract the other
|
||||
// branch of #if/else inside.
|
||||
|
||||
#define __host__
|
||||
#undef __CUDABE__
|
||||
#define __CUDACC__
|
||||
#if CUDA_VERSION >= 9000
|
||||
// Some atomic functions became compiler builtins in CUDA-9 , so we need their
|
||||
// declarations.
|
||||
#include "device_atomic_functions.h"
|
||||
#endif
|
||||
#undef __DEVICE_FUNCTIONS_HPP__
|
||||
#include "device_atomic_functions.hpp"
|
||||
#if CUDA_VERSION >= 9000
|
||||
#include "crt/device_functions.hpp"
|
||||
#include "crt/device_double_functions.hpp"
|
||||
#else
|
||||
#include "device_functions.hpp"
|
||||
#define __CUDABE__
|
||||
#include "device_double_functions.h"
|
||||
#undef __CUDABE__
|
||||
#endif
|
||||
#include "sm_20_atomic_functions.hpp"
|
||||
#include "sm_20_intrinsics.hpp"
|
||||
#include "sm_32_atomic_functions.hpp"
|
||||
@@ -233,8 +287,11 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
|
||||
// reason about our code.
|
||||
|
||||
#if CUDA_VERSION >= 8000
|
||||
#pragma push_macro("__CUDA_ARCH__")
|
||||
#undef __CUDA_ARCH__
|
||||
#include "sm_60_atomic_functions.hpp"
|
||||
#include "sm_61_intrinsics.hpp"
|
||||
#pragma pop_macro("__CUDA_ARCH__")
|
||||
#endif
|
||||
|
||||
#undef __MATH_FUNCTIONS_HPP__
|
||||
@@ -247,7 +304,27 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
|
||||
#pragma push_macro("__GNUC__")
|
||||
#undef __GNUC__
|
||||
#define signbit __ignored_cuda_signbit
|
||||
|
||||
// CUDA-9 omits device-side definitions of some math functions if it sees
|
||||
// include guard from math.h wrapper from libstdc++. We have to undo the header
|
||||
// guard temporarily to get the definitions we need.
|
||||
#pragma push_macro("_GLIBCXX_MATH_H")
|
||||
#pragma push_macro("_LIBCPP_VERSION")
|
||||
#if CUDA_VERSION >= 9000
|
||||
#undef _GLIBCXX_MATH_H
|
||||
// We also need to undo another guard that checks for libc++ 3.8+
|
||||
#ifdef _LIBCPP_VERSION
|
||||
#define _LIBCPP_VERSION 3700
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CUDA_VERSION >= 9000
|
||||
#include "crt/math_functions.hpp"
|
||||
#else
|
||||
#include "math_functions.hpp"
|
||||
#endif
|
||||
#pragma pop_macro("_GLIBCXX_MATH_H")
|
||||
#pragma pop_macro("_LIBCPP_VERSION")
|
||||
#pragma pop_macro("__GNUC__")
|
||||
#pragma pop_macro("signbit")
|
||||
|
||||
|
||||
@@ -20,15 +20,18 @@
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef _WMMINTRIN_AES_H
|
||||
#define _WMMINTRIN_AES_H
|
||||
|
||||
#include <emmintrin.h>
|
||||
#ifndef __WMMINTRIN_H
|
||||
#error "Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __WMMINTRIN_AES_H
|
||||
#define __WMMINTRIN_AES_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128)))
|
||||
|
||||
/// \brief Performs a single round of AES encryption using the Equivalent
|
||||
/// Performs a single round of AES encryption using the Equivalent
|
||||
/// Inverse Cipher, transforming the state value from the first source
|
||||
/// operand using a 128-bit round key value contained in the second source
|
||||
/// operand, and writes the result to the destination.
|
||||
@@ -48,7 +51,7 @@ _mm_aesenc_si128(__m128i __V, __m128i __R)
|
||||
return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);
|
||||
}
|
||||
|
||||
/// \brief Performs the final round of AES encryption using the Equivalent
|
||||
/// Performs the final round of AES encryption using the Equivalent
|
||||
/// Inverse Cipher, transforming the state value from the first source
|
||||
/// operand using a 128-bit round key value contained in the second source
|
||||
/// operand, and writes the result to the destination.
|
||||
@@ -68,7 +71,7 @@ _mm_aesenclast_si128(__m128i __V, __m128i __R)
|
||||
return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);
|
||||
}
|
||||
|
||||
/// \brief Performs a single round of AES decryption using the Equivalent
|
||||
/// Performs a single round of AES decryption using the Equivalent
|
||||
/// Inverse Cipher, transforming the state value from the first source
|
||||
/// operand using a 128-bit round key value contained in the second source
|
||||
/// operand, and writes the result to the destination.
|
||||
@@ -88,7 +91,7 @@ _mm_aesdec_si128(__m128i __V, __m128i __R)
|
||||
return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);
|
||||
}
|
||||
|
||||
/// \brief Performs the final round of AES decryption using the Equivalent
|
||||
/// Performs the final round of AES decryption using the Equivalent
|
||||
/// Inverse Cipher, transforming the state value from the first source
|
||||
/// operand using a 128-bit round key value contained in the second source
|
||||
/// operand, and writes the result to the destination.
|
||||
@@ -108,7 +111,7 @@ _mm_aesdeclast_si128(__m128i __V, __m128i __R)
|
||||
return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);
|
||||
}
|
||||
|
||||
/// \brief Applies the AES InvMixColumns() transformation to an expanded key
|
||||
/// Applies the AES InvMixColumns() transformation to an expanded key
|
||||
/// contained in the source operand, and writes the result to the
|
||||
/// destination.
|
||||
///
|
||||
@@ -125,7 +128,7 @@ _mm_aesimc_si128(__m128i __V)
|
||||
return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);
|
||||
}
|
||||
|
||||
/// \brief Generates a round key for AES encyption, operating on 128-bit data
|
||||
/// Generates a round key for AES encryption, operating on 128-bit data
|
||||
/// specified in the first source operand and using an 8-bit round constant
|
||||
/// specified by the second source operand, and writes the result to the
|
||||
/// destination.
|
||||
@@ -148,4 +151,4 @@ _mm_aesimc_si128(__m128i __V)
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* _WMMINTRIN_AES_H */
|
||||
#endif /* __WMMINTRIN_AES_H */
|
||||
|
||||
@@ -20,10 +20,15 @@
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef _WMMINTRIN_PCLMUL_H
|
||||
#define _WMMINTRIN_PCLMUL_H
|
||||
|
||||
/// \brief Multiplies two 64-bit integer values, which are selected from source
|
||||
#ifndef __WMMINTRIN_H
|
||||
#error "Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __WMMINTRIN_PCLMUL_H
|
||||
#define __WMMINTRIN_PCLMUL_H
|
||||
|
||||
/// Multiplies two 64-bit integer values, which are selected from source
|
||||
/// operands using the immediate-value operand. The multiplication is a
|
||||
/// carry-less multiplication, and the 128-bit integer product is stored in
|
||||
/// the destination.
|
||||
@@ -50,8 +55,8 @@
|
||||
/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
|
||||
/// \returns The 128-bit integer vector containing the result of the carry-less
|
||||
/// multiplication of the selected 64-bit values.
|
||||
#define _mm_clmulepi64_si128(__X, __Y, __I) \
|
||||
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
|
||||
(__v2di)(__m128i)(__Y), (char)(__I)))
|
||||
#define _mm_clmulepi64_si128(X, Y, I) \
|
||||
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
|
||||
(__v2di)(__m128i)(Y), (char)(I)))
|
||||
|
||||
#endif /* _WMMINTRIN_PCLMUL_H */
|
||||
#endif /* __WMMINTRIN_PCLMUL_H */
|
||||
|
||||
@@ -27,9 +27,9 @@
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
|
||||
|
||||
/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
|
||||
/// Extracts the specified bits from the lower 64 bits of the 128-bit
|
||||
/// integer vector operand at the index \a idx and of the length \a len.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -57,7 +57,7 @@
|
||||
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
|
||||
(char)(len), (char)(idx)))
|
||||
|
||||
/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
|
||||
/// Extracts the specified bits from the lower 64 bits of the 128-bit
|
||||
/// integer vector operand at the index and of the length specified by
|
||||
/// \a __y.
|
||||
///
|
||||
@@ -82,7 +82,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
|
||||
return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
|
||||
}
|
||||
|
||||
/// \brief Inserts bits of a specified length from the source integer vector
|
||||
/// Inserts bits of a specified length from the source integer vector
|
||||
/// \a y into the lower 64 bits of the destination integer vector \a x at
|
||||
/// the index \a idx and of the length \a len.
|
||||
///
|
||||
@@ -120,7 +120,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
|
||||
(__v2di)(__m128i)(y), \
|
||||
(char)(len), (char)(idx)))
|
||||
|
||||
/// \brief Inserts bits of a specified length from the source integer vector
|
||||
/// Inserts bits of a specified length from the source integer vector
|
||||
/// \a __y into the lower 64 bits of the destination integer vector \a __x
|
||||
/// at the index and of the length specified by \a __y.
|
||||
///
|
||||
@@ -152,7 +152,7 @@ _mm_insert_si64(__m128i __x, __m128i __y)
|
||||
return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
|
||||
}
|
||||
|
||||
/// \brief Stores a 64-bit double-precision value in a 64-bit memory location.
|
||||
/// Stores a 64-bit double-precision value in a 64-bit memory location.
|
||||
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
|
||||
/// used again soon).
|
||||
///
|
||||
@@ -170,7 +170,7 @@ _mm_stream_sd(double *__p, __m128d __a)
|
||||
__builtin_ia32_movntsd(__p, (__v2df)__a);
|
||||
}
|
||||
|
||||
/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit
|
||||
/// Stores a 32-bit single-precision floating-point value in a 32-bit
|
||||
/// memory location. To minimize caching, the data is flagged as
|
||||
/// non-temporal (unlikely to be used again soon).
|
||||
///
|
||||
|
||||
49
c_headers/arm64intr.h
Normal file
49
c_headers/arm64intr.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Only include this if we're compiling for the windows platform. */
|
||||
#ifndef _MSC_VER
|
||||
#include_next <arm64intr.h>
|
||||
#else
|
||||
|
||||
#ifndef __ARM64INTR_H
|
||||
#define __ARM64INTR_H
|
||||
|
||||
typedef enum
|
||||
{
|
||||
_ARM64_BARRIER_SY = 0xF,
|
||||
_ARM64_BARRIER_ST = 0xE,
|
||||
_ARM64_BARRIER_LD = 0xD,
|
||||
_ARM64_BARRIER_ISH = 0xB,
|
||||
_ARM64_BARRIER_ISHST = 0xA,
|
||||
_ARM64_BARRIER_ISHLD = 0x9,
|
||||
_ARM64_BARRIER_NSH = 0x7,
|
||||
_ARM64_BARRIER_NSHST = 0x6,
|
||||
_ARM64_BARRIER_NSHLD = 0x5,
|
||||
_ARM64_BARRIER_OSH = 0x3,
|
||||
_ARM64_BARRIER_OSHST = 0x2,
|
||||
_ARM64_BARRIER_OSHLD = 0x1
|
||||
} _ARM64INTR_BARRIER_TYPE;
|
||||
|
||||
#endif /* __ARM64INTR_H */
|
||||
#endif /* _MSC_VER */
|
||||
1499
c_headers/arm_fp16.h
Normal file
1499
c_headers/arm_fp16.h
Normal file
File diff suppressed because it is too large
Load Diff
15550
c_headers/arm_neon.h
15550
c_headers/arm_neon.h
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
97
c_headers/avx512bitalgintrin.h
Normal file
97
c_headers/avx512bitalgintrin.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512BITALGINTRIN_H
|
||||
#define __AVX512BITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi16(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
|
||||
(__v32hi) _mm512_popcnt_epi16(__B),
|
||||
(__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi8(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
|
||||
(__v64qi) _mm512_popcnt_epi8(__B),
|
||||
(__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -29,7 +29,7 @@
|
||||
#define __AVX512CDINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_conflict_epi64 (__m512i __A)
|
||||
@@ -82,61 +82,58 @@ _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_lzcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
|
||||
(__v16si)_mm512_lzcnt_epi32(__A),
|
||||
(__v16si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
|
||||
(__v16si)_mm512_lzcnt_epi32(__A),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_lzcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
|
||||
(__v8di)_mm512_lzcnt_epi64(__A),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
|
||||
(__v8di)_mm512_lzcnt_epi64(__A),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
||||
return (__m512i) _mm512_set1_epi64((long long) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
||||
return (__m512i) _mm512_set1_epi32((int) __A);
|
||||
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,21 +27,21 @@
|
||||
#ifndef __AVX512ERINTRIN_H
|
||||
#define __AVX512ERINTRIN_H
|
||||
|
||||
// exp2a23
|
||||
#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
|
||||
/* exp2a23 */
|
||||
#define _mm512_exp2a23_round_pd(A, R) \
|
||||
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \
|
||||
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
|
||||
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(S), (__mmask8)(M), \
|
||||
(int)(R)); })
|
||||
(int)(R))
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \
|
||||
#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
|
||||
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm512_exp2a23_pd(A) \
|
||||
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -52,20 +52,20 @@
|
||||
#define _mm512_maskz_exp2a23_pd(M, A) \
|
||||
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \
|
||||
#define _mm512_exp2a23_round_ps(A, R) \
|
||||
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)-1, (int)(R)); })
|
||||
(__mmask16)-1, (int)(R))
|
||||
|
||||
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \
|
||||
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
|
||||
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(S), (__mmask16)(M), \
|
||||
(int)(R)); })
|
||||
(int)(R))
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \
|
||||
#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
|
||||
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(M), (int)(R)); })
|
||||
(__mmask16)(M), (int)(R))
|
||||
|
||||
#define _mm512_exp2a23_ps(A) \
|
||||
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -76,21 +76,21 @@
|
||||
#define _mm512_maskz_exp2a23_ps(M, A) \
|
||||
_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
// rsqrt28
|
||||
#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
|
||||
/* rsqrt28 */
|
||||
#define _mm512_rsqrt28_round_pd(A, R) \
|
||||
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
|
||||
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
|
||||
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(S), (__mmask8)(M), \
|
||||
(int)(R)); })
|
||||
(int)(R))
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
|
||||
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
|
||||
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm512_rsqrt28_pd(A) \
|
||||
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -101,20 +101,20 @@
|
||||
#define _mm512_maskz_rsqrt28_pd(M, A) \
|
||||
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
|
||||
#define _mm512_rsqrt28_round_ps(A, R) \
|
||||
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)-1, (int)(R)); })
|
||||
(__mmask16)-1, (int)(R))
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
|
||||
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
|
||||
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(S), (__mmask16)(M), \
|
||||
(int)(R)); })
|
||||
(int)(R))
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
|
||||
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
|
||||
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(M), (int)(R)); })
|
||||
(__mmask16)(M), (int)(R))
|
||||
|
||||
#define _mm512_rsqrt28_ps(A) \
|
||||
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -125,23 +125,23 @@
|
||||
#define _mm512_maskz_rsqrt28_ps(M, A) \
|
||||
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) \
|
||||
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
|
||||
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
|
||||
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)(__m128)(S), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
|
||||
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
|
||||
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_rsqrt28_ss(A, B) \
|
||||
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -152,23 +152,23 @@
|
||||
#define _mm_maskz_rsqrt28_ss(M, A, B) \
|
||||
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) \
|
||||
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
|
||||
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
|
||||
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)(__m128d)(S), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
|
||||
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
|
||||
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_rsqrt28_sd(A, B) \
|
||||
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -179,21 +179,21 @@
|
||||
#define _mm_maskz_rsqrt28_sd(M, A, B) \
|
||||
_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
// rcp28
|
||||
#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
|
||||
/* rcp28 */
|
||||
#define _mm512_rcp28_round_pd(A, R) \
|
||||
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
|
||||
#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
|
||||
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(S), (__mmask8)(M), \
|
||||
(int)(R)); })
|
||||
(int)(R))
|
||||
|
||||
#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
|
||||
#define _mm512_maskz_rcp28_round_pd(M, A, R) \
|
||||
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm512_rcp28_pd(A) \
|
||||
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -204,20 +204,20 @@
|
||||
#define _mm512_maskz_rcp28_pd(M, A) \
|
||||
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
|
||||
#define _mm512_rcp28_round_ps(A, R) \
|
||||
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)-1, (int)(R)); })
|
||||
(__mmask16)-1, (int)(R))
|
||||
|
||||
#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
|
||||
#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
|
||||
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(S), (__mmask16)(M), \
|
||||
(int)(R)); })
|
||||
(int)(R))
|
||||
|
||||
#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
|
||||
#define _mm512_maskz_rcp28_round_ps(M, A, R) \
|
||||
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(M), (int)(R)); })
|
||||
(__mmask16)(M), (int)(R))
|
||||
|
||||
#define _mm512_rcp28_ps(A) \
|
||||
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -228,23 +228,23 @@
|
||||
#define _mm512_maskz_rcp28_ps(M, A) \
|
||||
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
|
||||
#define _mm_rcp28_round_ss(A, B, R) \
|
||||
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
|
||||
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
|
||||
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)(__m128)(S), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
|
||||
#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
|
||||
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_rcp28_ss(A, B) \
|
||||
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -255,23 +255,23 @@
|
||||
#define _mm_maskz_rcp28_ss(M, A, B) \
|
||||
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
|
||||
#define _mm_rcp28_round_sd(A, B, R) \
|
||||
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)); })
|
||||
(__mmask8)-1, (int)(R))
|
||||
|
||||
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
|
||||
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
|
||||
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)(__m128d)(S), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
|
||||
#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
|
||||
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)); })
|
||||
(__mmask8)(M), (int)(R))
|
||||
|
||||
#define _mm_rcp28_sd(A, B) \
|
||||
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
@@ -282,4 +282,4 @@
|
||||
#define _mm_maskz_rcp28_sd(M, A, B) \
|
||||
_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#endif // __AVX512ERINTRIN_H
|
||||
#endif /* __AVX512ERINTRIN_H */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -29,62 +29,52 @@
|
||||
#define __IFMAINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
|
||||
(__v8di) __Z);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__M,
|
||||
(__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__M,
|
||||
(__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
|
||||
(__v8di) __Z);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__M,
|
||||
(__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
return (__m512i)__builtin_ia32_selectq_512(__M,
|
||||
(__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
|
||||
(__v8di)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -29,121 +29,105 @@
|
||||
#define __IFMAVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl")))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
|
||||
|
||||
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y,
|
||||
(__v2di) __Z);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__M,
|
||||
(__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__M,
|
||||
(__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__M,
|
||||
(__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__M,
|
||||
(__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) -1);
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__M,
|
||||
(__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
return (__m128i)__builtin_ia32_selectq_128(__M,
|
||||
(__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) -1);
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__M,
|
||||
(__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
return (__m256i)__builtin_ia32_selectq_256(__M,
|
||||
(__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===------------- avx512pfintrin.h - PF intrinsics ------------------===
|
||||
/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
@@ -31,80 +31,80 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) __extension__ ({\
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
|
||||
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\
|
||||
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (int const *)(addr), \
|
||||
(int)(scale), (int)(hint)); })
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\
|
||||
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16) -1, \
|
||||
(__v16si)(__m512i)(index), (int const *)(addr), \
|
||||
(int)(scale), (int)(hint)); })
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\
|
||||
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\
|
||||
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(long long const *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(int const *)(addr), (int)(scale), (int)(hint)); })
|
||||
(int const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\
|
||||
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(int const *)(addr), (int)(scale), (int)(hint)); })
|
||||
(int const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint)); })
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (int *)(addr), \
|
||||
(int)(scale), (int)(hint)); })
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(long long *)(addr), (int)(scale), \
|
||||
(int)(hint)); })
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint)); })
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) __extension__ ({\
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(int *)(addr), (int)(scale), (int)(hint)); })
|
||||
(int *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
|
||||
397
c_headers/avx512vbmi2intrin.h
Normal file
397
c_headers/avx512vbmi2intrin.h
Normal file
@@ -0,0 +1,397 @@
|
||||
/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VBMI2INTRIN_H
|
||||
#define __AVX512VBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
|
||||
(__v32hi) _mm512_setzero_si512(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) _mm512_setzero_si512(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
|
||||
(__v32hi) _mm512_setzero_si512(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) _mm512_setzero_si512(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
|
||||
(__v32hi) _mm512_setzero_si512(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
|
||||
(__v64qi) _mm512_setzero_si512(),
|
||||
__U);
|
||||
}
|
||||
|
||||
#define _mm512_shldi_epi64(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), (int)(I))
|
||||
|
||||
#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
|
||||
(__v8di)_mm512_shldi_epi64((A), (B), (I)), \
|
||||
(__v8di)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
|
||||
(__v8di)_mm512_shldi_epi64((A), (B), (I)), \
|
||||
(__v8di)_mm512_setzero_si512())
|
||||
|
||||
#define _mm512_shldi_epi32(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B), (int)(I))
|
||||
|
||||
#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
|
||||
(__v16si)_mm512_shldi_epi32((A), (B), (I)), \
|
||||
(__v16si)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
|
||||
(__v16si)_mm512_shldi_epi32((A), (B), (I)), \
|
||||
(__v16si)_mm512_setzero_si512())
|
||||
|
||||
#define _mm512_shldi_epi16(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B), (int)(I))
|
||||
|
||||
#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
|
||||
(__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
|
||||
(__v32hi)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
|
||||
(__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
|
||||
(__v32hi)_mm512_setzero_si512())
|
||||
|
||||
#define _mm512_shrdi_epi64(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), (int)(I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
|
||||
(__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
|
||||
(__v8di)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
|
||||
(__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
|
||||
(__v8di)_mm512_setzero_si512())
|
||||
|
||||
#define _mm512_shrdi_epi32(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B), (int)(I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
|
||||
(__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
|
||||
(__v16si)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
|
||||
(__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
|
||||
(__v16si)_mm512_setzero_si512())
|
||||
|
||||
#define _mm512_shrdi_epi16(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B), (int)(I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
|
||||
(__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
|
||||
(__v32hi)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
|
||||
(__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
|
||||
(__v32hi)_mm512_setzero_si512())
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
||||
@@ -29,79 +29,65 @@
|
||||
#define __VBMIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B)
|
||||
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
|
||||
(__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) __U);
|
||||
return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
|
||||
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
return (__m512i)__builtin_ia32_selectb_512(__U,
|
||||
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
|
||||
(__v64qi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B)
|
||||
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) __U);
|
||||
return (__m512i)__builtin_ia32_selectb_512(__U,
|
||||
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
|
||||
(__v64qi)__I);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B)
|
||||
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) __U);
|
||||
return (__m512i)__builtin_ia32_selectb_512(__U,
|
||||
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
|
||||
(__v64qi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) _mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) _mm512_setzero_si512(),
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
|
||||
(__v64qi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
|
||||
(__v64qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -29,161 +29,127 @@
|
||||
#define __VBMIVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
|
||||
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
|
||||
__m128i __B)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
|
||||
(__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
|
||||
(__v16qi)__I,
|
||||
(__v16qi)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
|
||||
(__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectb_128(__U,
|
||||
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
|
||||
(__v16qi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -
|
||||
1);
|
||||
return (__m128i)__builtin_ia32_selectb_128(__U,
|
||||
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
|
||||
(__v16qi)__I);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
|
||||
__m128i __B)
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
return (__m128i)__builtin_ia32_selectb_128(__U,
|
||||
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
|
||||
(__v16qi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
|
||||
__m128i __B)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
|
||||
(__v32qi)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -
|
||||
1);
|
||||
return (__m256i)__builtin_ia32_selectb_256(__U,
|
||||
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
|
||||
(__v32qi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectb_256(__U,
|
||||
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
|
||||
(__v32qi)__I);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B)
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
return (__m256i)__builtin_ia32_selectb_256(__U,
|
||||
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
|
||||
(__v32qi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) _mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) _mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_permutexvar_epi8(__A, __B),
|
||||
(__v16qi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_permutexvar_epi8(__A, __B),
|
||||
(__v16qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) _mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) _mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
|
||||
(__v32qi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
|
||||
(__v32qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
@@ -192,7 +158,7 @@ _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
@@ -202,7 +168,7 @@ _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
@@ -212,7 +178,7 @@ _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
@@ -221,7 +187,7 @@ _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m2
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
@@ -231,7 +197,7 @@ _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
@@ -242,6 +208,7 @@ _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif
|
||||
|
||||
159
c_headers/avx512vlbitalgintrin.h
Normal file
159
c_headers/avx512vlbitalgintrin.h
Normal file
@@ -0,0 +1,159 @@
|
||||
/*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLBITALGINTRIN_H
|
||||
#define __AVX512VLBITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_popcnt_epi16(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
|
||||
(__v16hi) _mm256_popcnt_epi16(__B),
|
||||
(__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_popcnt_epi16(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
|
||||
(__v8hi) _mm_popcnt_epi16(__B),
|
||||
(__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
|
||||
{
|
||||
return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_popcnt_epi8(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
|
||||
(__v32qi) _mm256_popcnt_epi8(__B),
|
||||
(__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_popcnt_epi8(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
|
||||
(__v16qi) _mm_popcnt_epi8(__B),
|
||||
(__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
|
||||
{
|
||||
return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
|
||||
_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
|
||||
_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------===
|
||||
/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -28,35 +28,36 @@
|
||||
#define __AVX512VLCDINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd")))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
|
||||
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcastmb128 (__A);
|
||||
return (__m128i) _mm_set1_epi64x((long long) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcastmb256 (__A);
|
||||
return (__m256i) _mm256_set1_epi64x((long long)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_broadcastmw128 (__A);
|
||||
return (__m128i) _mm_set1_epi32((int)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_broadcastmw256 (__A);
|
||||
return (__m256i) _mm256_set1_epi32((int)__A);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_conflict_epi64 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
|
||||
@@ -64,7 +65,7 @@ _mm_conflict_epi64 (__m128i __A)
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
|
||||
@@ -72,16 +73,16 @@ _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
|
||||
(__v2di)
|
||||
_mm_setzero_di (),
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_conflict_epi64 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
|
||||
@@ -89,7 +90,7 @@ _mm256_conflict_epi64 (__m256i __A)
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
|
||||
@@ -97,7 +98,7 @@ _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
|
||||
@@ -105,7 +106,7 @@ _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_conflict_epi32 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
|
||||
@@ -113,7 +114,7 @@ _mm_conflict_epi32 (__m128i __A)
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
|
||||
@@ -121,7 +122,7 @@ _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
|
||||
@@ -129,7 +130,7 @@ _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_conflict_epi32 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
|
||||
@@ -137,7 +138,7 @@ _mm256_conflict_epi32 (__m256i __A)
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
|
||||
@@ -145,7 +146,7 @@ _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
|
||||
@@ -154,110 +155,95 @@ _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_lzcnt_epi32 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
|
||||
(__v4si)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) -1);
|
||||
return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
|
||||
(__v4si) __W,
|
||||
(__mmask8) __U);
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_lzcnt_epi32(__A),
|
||||
(__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
|
||||
(__v4si)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
|
||||
(__v4si)_mm_lzcnt_epi32(__A),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_lzcnt_epi32 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) -1);
|
||||
return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
|
||||
(__v8si) __W,
|
||||
(__mmask8) __U);
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_lzcnt_epi32(__A),
|
||||
(__v8si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) __U);
|
||||
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
|
||||
(__v8si)_mm256_lzcnt_epi32(__A),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_lzcnt_epi64 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
|
||||
(__v2di)
|
||||
_mm_setzero_di (),
|
||||
(__mmask8) -1);
|
||||
return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
|
||||
(__v2di) __W,
|
||||
(__mmask8) __U);
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_lzcnt_epi64(__A),
|
||||
(__v2di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
|
||||
(__v2di)
|
||||
_mm_setzero_di (),
|
||||
(__mmask8) __U);
|
||||
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
|
||||
(__v2di)_mm_lzcnt_epi64(__A),
|
||||
(__v2di)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_lzcnt_epi64 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
|
||||
(__v4di)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) -1);
|
||||
return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
|
||||
(__v4di) __W,
|
||||
(__mmask8) __U);
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_lzcnt_epi64(__A),
|
||||
(__v4di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
|
||||
(__v4di)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) __U);
|
||||
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
|
||||
(__v4di)_mm256_lzcnt_epi64(__A),
|
||||
(__v4di)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif /* __AVX512VLCDINTRIN_H */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
751
c_headers/avx512vlvbmi2intrin.h
Normal file
751
c_headers/avx512vlvbmi2intrin.h
Normal file
@@ -0,0 +1,751 @@
|
||||
/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLVBMI2INTRIN_H
|
||||
#define __AVX512VLVBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
#define _mm256_shldi_epi64(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
|
||||
(__v4di)(__m256i)(B), (int)(I))
|
||||
|
||||
#define _mm256_mask_shldi_epi64(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
|
||||
(__v4di)_mm256_shldi_epi64((A), (B), (I)), \
|
||||
(__v4di)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
|
||||
(__v4di)_mm256_shldi_epi64((A), (B), (I)), \
|
||||
(__v4di)_mm256_setzero_si256())
|
||||
|
||||
#define _mm_shldi_epi64(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
|
||||
(__v2di)(__m128i)(B), (int)(I))
|
||||
|
||||
#define _mm_mask_shldi_epi64(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm_shldi_epi64((A), (B), (I)), \
|
||||
(__v2di)(__m128i)(S))
|
||||
|
||||
#define _mm_maskz_shldi_epi64(U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm_shldi_epi64((A), (B), (I)), \
|
||||
(__v2di)_mm_setzero_si128())
|
||||
|
||||
#define _mm256_shldi_epi32(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
|
||||
(__v8si)(__m256i)(B), (int)(I))
|
||||
|
||||
#define _mm256_mask_shldi_epi32(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
|
||||
(__v8si)_mm256_shldi_epi32((A), (B), (I)), \
|
||||
(__v8si)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
|
||||
(__v8si)_mm256_shldi_epi32((A), (B), (I)), \
|
||||
(__v8si)_mm256_setzero_si256())
|
||||
|
||||
#define _mm_shldi_epi32(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
|
||||
(__v4si)(__m128i)(B), (int)(I))
|
||||
|
||||
#define _mm_mask_shldi_epi32(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
|
||||
(__v4si)_mm_shldi_epi32((A), (B), (I)), \
|
||||
(__v4si)(__m128i)(S))
|
||||
|
||||
#define _mm_maskz_shldi_epi32(U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
|
||||
(__v4si)_mm_shldi_epi32((A), (B), (I)), \
|
||||
(__v4si)_mm_setzero_si128())
|
||||
|
||||
#define _mm256_shldi_epi16(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
|
||||
(__v16hi)(__m256i)(B), (int)(I))
|
||||
|
||||
#define _mm256_mask_shldi_epi16(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
||||
(__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
|
||||
(__v16hi)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
||||
(__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
|
||||
(__v16hi)_mm256_setzero_si256())
|
||||
|
||||
#define _mm_shldi_epi16(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
|
||||
(__v8hi)(__m128i)(B), (int)(I))
|
||||
|
||||
#define _mm_mask_shldi_epi16(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
||||
(__v8hi)_mm_shldi_epi16((A), (B), (I)), \
|
||||
(__v8hi)(__m128i)(S))
|
||||
|
||||
#define _mm_maskz_shldi_epi16(U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
||||
(__v8hi)_mm_shldi_epi16((A), (B), (I)), \
|
||||
(__v8hi)_mm_setzero_si128())
|
||||
|
||||
#define _mm256_shrdi_epi64(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
|
||||
(__v4di)(__m256i)(B), (int)(I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
|
||||
(__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
|
||||
(__v4di)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
|
||||
(__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
|
||||
(__v4di)_mm256_setzero_si256())
|
||||
|
||||
#define _mm_shrdi_epi64(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
|
||||
(__v2di)(__m128i)(B), (int)(I))
|
||||
|
||||
#define _mm_mask_shrdi_epi64(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm_shrdi_epi64((A), (B), (I)), \
|
||||
(__v2di)(__m128i)(S))
|
||||
|
||||
#define _mm_maskz_shrdi_epi64(U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm_shrdi_epi64((A), (B), (I)), \
|
||||
(__v2di)_mm_setzero_si128())
|
||||
|
||||
#define _mm256_shrdi_epi32(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
|
||||
(__v8si)(__m256i)(B), (int)(I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
|
||||
(__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
|
||||
(__v8si)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
|
||||
(__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
|
||||
(__v8si)_mm256_setzero_si256())
|
||||
|
||||
#define _mm_shrdi_epi32(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
|
||||
(__v4si)(__m128i)(B), (int)(I))
|
||||
|
||||
#define _mm_mask_shrdi_epi32(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
|
||||
(__v4si)_mm_shrdi_epi32((A), (B), (I)), \
|
||||
(__v4si)(__m128i)(S))
|
||||
|
||||
#define _mm_maskz_shrdi_epi32(U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
|
||||
(__v4si)_mm_shrdi_epi32((A), (B), (I)), \
|
||||
(__v4si)_mm_setzero_si128())
|
||||
|
||||
#define _mm256_shrdi_epi16(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
|
||||
(__v16hi)(__m256i)(B), (int)(I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
||||
(__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
|
||||
(__v16hi)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
||||
(__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
|
||||
(__v16hi)_mm256_setzero_si256())
|
||||
|
||||
#define _mm_shrdi_epi16(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
|
||||
(__v8hi)(__m128i)(B), (int)(I))
|
||||
|
||||
#define _mm_mask_shrdi_epi16(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
||||
(__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
|
||||
(__v8hi)(__m128i)(S))
|
||||
|
||||
#define _mm_maskz_shrdi_epi16(U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
||||
(__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
|
||||
(__v8hi)_mm_setzero_si128())
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif
|
||||
223
c_headers/avx512vlvnniintrin.h
Normal file
223
c_headers/avx512vlvnniintrin.h
Normal file
@@ -0,0 +1,223 @@
|
||||
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLVNNIINTRIN_H
|
||||
#define __AVX512VLVNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
|
||||
(__v8si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
|
||||
(__v8si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
|
||||
(__v8si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
|
||||
(__v8si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
|
||||
(__v8si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_selectd_256(__U,
|
||||
(__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
|
||||
(__v8si)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpbusd_epi32(__S, __A, __B),
|
||||
(__v4si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpbusd_epi32(__S, __A, __B),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpbusds_epi32(__S, __A, __B),
|
||||
(__v4si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpbusds_epi32(__S, __A, __B),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpwssd_epi32(__S, __A, __B),
|
||||
(__v4si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpwssd_epi32(__S, __A, __B),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
|
||||
(__v4si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpwssds_epi32(__S, __A, __B),
|
||||
(__v4si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_selectd_128(__U,
|
||||
(__v4si)_mm_dpwssds_epi32(__S, __A, __B),
|
||||
(__v4si)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif
|
||||
129
c_headers/avx512vnniintrin.h
Normal file
129
c_headers/avx512vnniintrin.h
Normal file
@@ -0,0 +1,129 @@
|
||||
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VNNIINTRIN_H
|
||||
#define __AVX512VNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
|
||||
(__v16si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
|
||||
(__v16si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
|
||||
(__v16si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
|
||||
(__v16si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
|
||||
(__v16si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
|
||||
(__v16si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
|
||||
(__v16si)__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
|
||||
(__v16si)__S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_selectd_512(__U,
|
||||
(__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
|
||||
(__v16si)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -1,5 +1,4 @@
|
||||
/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
|
||||
*------------------===
|
||||
/*===----- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics-------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
@@ -32,8 +31,7 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \
|
||||
"q")))
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
|
||||
|
||||
105
c_headers/avx512vpopcntdqvlintrin.h
Normal file
105
c_headers/avx512vpopcntdqvlintrin.h
Normal file
@@ -0,0 +1,105 @@
|
||||
/*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error \
|
||||
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VPOPCNTDQVLINTRIN_H
|
||||
#define __AVX512VPOPCNTDQVLINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_popcnt_epi64(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_selectq_128(
|
||||
(__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
||||
return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_popcnt_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_selectd_128(
|
||||
(__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
|
||||
return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_popcnt_epi64(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_selectq_256(
|
||||
(__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
||||
return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_popcnt_epi32(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_selectd_256(
|
||||
(__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
|
||||
return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -49,7 +49,7 @@
|
||||
to use it as a potentially faster version of BSF. */
|
||||
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
/// \brief Counts the number of trailing zero bits in the operand.
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -65,7 +65,7 @@ __tzcnt_u16(unsigned short __X)
|
||||
return __X ? __builtin_ctzs(__X) : 16;
|
||||
}
|
||||
|
||||
/// \brief Performs a bitwise AND of the second operand with the one's
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -85,7 +85,7 @@ __andn_u32(unsigned int __X, unsigned int __Y)
|
||||
}
|
||||
|
||||
/* AMD-specified, double-leading-underscore version of BEXTR */
|
||||
/// \brief Extracts the specified bits from the first operand and returns them
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -100,6 +100,7 @@ __andn_u32(unsigned int __X, unsigned int __Y)
|
||||
/// number of bits to be extracted.
|
||||
/// \returns An unsigned integer whose least significant bits contain the
|
||||
/// extracted bits.
|
||||
/// \see _bextr_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__bextr_u32(unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
@@ -107,7 +108,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
|
||||
}
|
||||
|
||||
/* Intel-specified, single-leading-underscore version of BEXTR */
|
||||
/// \brief Extracts the specified bits from the first operand and returns them
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -124,13 +125,14 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
|
||||
/// Bits [7:0] specify the number of bits.
|
||||
/// \returns An unsigned integer whose least significant bits contain the
|
||||
/// extracted bits.
|
||||
/// \see __bextr_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
/// \brief Clears all bits in the source except for the least significant bit
|
||||
/// Clears all bits in the source except for the least significant bit
|
||||
/// containing a value of 1 and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -147,7 +149,7 @@ __blsi_u32(unsigned int __X)
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
@@ -164,7 +166,7 @@ __blsmsk_u32(unsigned int __X)
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
/// \brief Clears the least significant bit that is set to 1 in the source
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -181,7 +183,7 @@ __blsr_u32(unsigned int __X)
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
/// \brief Counts the number of trailing zero bits in the operand.
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -197,7 +199,7 @@ __tzcnt_u32(unsigned int __X)
|
||||
return __X ? __builtin_ctz(__X) : 32;
|
||||
}
|
||||
|
||||
/// \brief Counts the number of trailing zero bits in the operand.
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -226,7 +228,7 @@ _mm_tzcnt_32(unsigned int __X)
|
||||
|
||||
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
|
||||
|
||||
/// \brief Performs a bitwise AND of the second operand with the one's
|
||||
/// Performs a bitwise AND of the second operand with the one's
|
||||
/// complement of the first operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -246,7 +248,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
}
|
||||
|
||||
/* AMD-specified, double-leading-underscore version of BEXTR */
|
||||
/// \brief Extracts the specified bits from the first operand and returns them
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -261,6 +263,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
/// the number of bits to be extracted.
|
||||
/// \returns An unsigned 64-bit integer whose least significant bits contain the
|
||||
/// extracted bits.
|
||||
/// \see _bextr_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__bextr_u64(unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
@@ -268,7 +271,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
|
||||
}
|
||||
|
||||
/* Intel-specified, single-leading-underscore version of BEXTR */
|
||||
/// \brief Extracts the specified bits from the first operand and returns them
|
||||
/// Extracts the specified bits from the first operand and returns them
|
||||
/// in the least significant bits of the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -285,13 +288,14 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
|
||||
/// Bits [7:0] specify the number of bits.
|
||||
/// \returns An unsigned 64-bit integer whose least significant bits contain the
|
||||
/// extracted bits.
|
||||
/// \see __bextr_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
/// \brief Clears all bits in the source except for the least significant bit
|
||||
/// Clears all bits in the source except for the least significant bit
|
||||
/// containing a value of 1 and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -308,7 +312,7 @@ __blsi_u64(unsigned long long __X)
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// Creates a mask whose bits are set to 1, using bit 0 up to and
|
||||
/// including the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
@@ -325,7 +329,7 @@ __blsmsk_u64(unsigned long long __X)
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
/// \brief Clears the least significant bit that is set to 1 in the source
|
||||
/// Clears the least significant bit that is set to 1 in the source
|
||||
/// operand and returns the result.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -342,7 +346,7 @@ __blsr_u64(unsigned long long __X)
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
/// \brief Counts the number of trailing zero bits in the operand.
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -358,7 +362,7 @@ __tzcnt_u64(unsigned long long __X)
|
||||
return __X ? __builtin_ctzll(__X) : 64;
|
||||
}
|
||||
|
||||
/// \brief Counts the number of trailing zero bits in the operand.
|
||||
/// Counts the number of trailing zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
|
||||
113
c_headers/cetintrin.h
Normal file
113
c_headers/cetintrin.h
Normal file
@@ -0,0 +1,113 @@
|
||||
/*===---- cetintrin.h - CET intrinsic --------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <cetintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __CETINTRIN_H
|
||||
#define __CETINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("shstk")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {
|
||||
__builtin_ia32_incsspd(__a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {
|
||||
__builtin_ia32_incsspq(__a);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {
|
||||
__builtin_ia32_incsspq(__a);
|
||||
}
|
||||
#else /* __x86_64__ */
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {
|
||||
__builtin_ia32_incsspd((int)__a);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {
|
||||
return __builtin_ia32_rdsspd(__a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {
|
||||
return __builtin_ia32_rdsspq(__a);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspq(0);
|
||||
}
|
||||
#else /* __x86_64__ */
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspd(0);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {
|
||||
__builtin_ia32_saveprevssp();
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {
|
||||
__builtin_ia32_rstorssp(__p);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {
|
||||
__builtin_ia32_wrssd(__a, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {
|
||||
__builtin_ia32_wrssq(__a, __p);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {
|
||||
__builtin_ia32_wrussd(__a, __p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {
|
||||
__builtin_ia32_wrussq(__a, __p);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {
|
||||
__builtin_ia32_setssbsy();
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {
|
||||
__builtin_ia32_clrssbsy(__p);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __CETINTRIN_H */
|
||||
42
c_headers/cldemoteintrin.h
Normal file
42
c_headers/cldemoteintrin.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __CLDEMOTEINTRIN_H
|
||||
#define __CLDEMOTEINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("cldemote")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_cldemote(const void * __P) {
|
||||
__builtin_ia32_cldemote(__P);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------------------===
|
||||
/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -32,7 +32,7 @@
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_clflushopt(char * __m) {
|
||||
_mm_clflushopt(void const * __m) {
|
||||
__builtin_ia32_clflushopt(__m);
|
||||
}
|
||||
|
||||
|
||||
52
c_headers/clwbintrin.h
Normal file
52
c_headers/clwbintrin.h
Normal file
@@ -0,0 +1,52 @@
|
||||
/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __CLWBINTRIN_H
|
||||
#define __CLWBINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb")))
|
||||
|
||||
/// Writes back to memory the cache line (if modified) that contains the
|
||||
/// linear address specified in \a __p from any level of the cache hierarchy in
|
||||
/// the cache coherence domain
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CLWB </c> instruction.
|
||||
///
|
||||
/// \param __p
|
||||
/// A pointer to the memory location used to identify the cache line to be
|
||||
/// written back.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_clwb(void const *__p) {
|
||||
__builtin_ia32_clwb(__p);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -20,18 +20,18 @@
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __X86INTRIN_H
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <clzerointrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLZEROINTRIN_H
|
||||
#define _CLZEROINTRIN_H
|
||||
#ifndef __CLZEROINTRIN_H
|
||||
#define __CLZEROINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("clzero")))
|
||||
|
||||
/// \brief Loads the cache line address and zero's out the cacheline
|
||||
/// Loads the cache line address and zero's out the cacheline
|
||||
///
|
||||
/// \headerfile <clzerointrin.h>
|
||||
///
|
||||
@@ -45,6 +45,6 @@ _mm_clzero (void * __line)
|
||||
__builtin_ia32_clzero ((void *)__line);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* _CLZEROINTRIN_H */
|
||||
#endif /* __CLZEROINTRIN_H */
|
||||
|
||||
@@ -156,6 +156,7 @@
|
||||
#define bit_SMEP 0x00000080
|
||||
#define bit_BMI2 0x00000100
|
||||
#define bit_ENH_MOVSB 0x00000200
|
||||
#define bit_INVPCID 0x00000400
|
||||
#define bit_RTM 0x00000800
|
||||
#define bit_MPX 0x00004000
|
||||
#define bit_AVX512F 0x00010000
|
||||
@@ -166,32 +167,49 @@
|
||||
#define bit_CLFLUSHOPT 0x00800000
|
||||
#define bit_CLWB 0x01000000
|
||||
#define bit_AVX512PF 0x04000000
|
||||
#define bit_AVX51SER 0x08000000
|
||||
#define bit_AVX512ER 0x08000000
|
||||
#define bit_AVX512CD 0x10000000
|
||||
#define bit_SHA 0x20000000
|
||||
#define bit_AVX512BW 0x40000000
|
||||
#define bit_AVX512VL 0x80000000
|
||||
|
||||
/* Features in %ecx for leaf 7 sub-leaf 0 */
|
||||
#define bit_PREFTCHWT1 0x00000001
|
||||
#define bit_AVX512VBMI 0x00000002
|
||||
#define bit_PKU 0x00000004
|
||||
#define bit_OSPKE 0x00000010
|
||||
#define bit_PREFTCHWT1 0x00000001
|
||||
#define bit_AVX512VBMI 0x00000002
|
||||
#define bit_PKU 0x00000004
|
||||
#define bit_OSPKE 0x00000010
|
||||
#define bit_WAITPKG 0x00000020
|
||||
#define bit_AVX512VBMI2 0x00000040
|
||||
#define bit_SHSTK 0x00000080
|
||||
#define bit_GFNI 0x00000100
|
||||
#define bit_VAES 0x00000200
|
||||
#define bit_VPCLMULQDQ 0x00000400
|
||||
#define bit_AVX512VNNI 0x00000800
|
||||
#define bit_AVX512BITALG 0x00001000
|
||||
#define bit_AVX512VPOPCNTDQ 0x00004000
|
||||
#define bit_RDPID 0x00400000
|
||||
#define bit_RDPID 0x00400000
|
||||
#define bit_CLDEMOTE 0x02000000
|
||||
#define bit_MOVDIRI 0x08000000
|
||||
#define bit_MOVDIR64B 0x10000000
|
||||
|
||||
/* Features in %edx for leaf 7 sub-leaf 0 */
|
||||
#define bit_AVX5124VNNIW 0x00000004
|
||||
#define bit_AVX5124FMAPS 0x00000008
|
||||
#define bit_PCONFIG 0x00040000
|
||||
#define bit_IBT 0x00100000
|
||||
|
||||
/* Features in %eax for leaf 13 sub-leaf 1 */
|
||||
#define bit_XSAVEOPT 0x00000001
|
||||
#define bit_XSAVEC 0x00000002
|
||||
#define bit_XSAVES 0x00000008
|
||||
|
||||
/* Features in %eax for leaf 0x14 sub-leaf 0 */
|
||||
#define bit_PTWRITE 0x00000010
|
||||
|
||||
/* Features in %ecx for leaf 0x80000001 */
|
||||
#define bit_LAHF_LM 0x00000001
|
||||
#define bit_ABM 0x00000020
|
||||
#define bit_LZCNT bit_ABM /* for gcc compat */
|
||||
#define bit_SSE4a 0x00000040
|
||||
#define bit_PRFCHW 0x00000100
|
||||
#define bit_XOP 0x00000800
|
||||
@@ -206,8 +224,9 @@
|
||||
#define bit_3DNOWP 0x40000000
|
||||
#define bit_3DNOW 0x80000000
|
||||
|
||||
/* Features in %ebx for leaf 0x80000001 */
|
||||
/* Features in %ebx for leaf 0x80000008 */
|
||||
#define bit_CLZERO 0x00000001
|
||||
#define bit_WBNOINVD 0x00000200
|
||||
|
||||
|
||||
#if __i386__
|
||||
|
||||
@@ -24,28 +24,36 @@
|
||||
#ifndef __CLANG_CUDA_WRAPPERS_ALGORITHM
|
||||
#define __CLANG_CUDA_WRAPPERS_ALGORITHM
|
||||
|
||||
// This header defines __device__ overloads of std::min/max, but only if we're
|
||||
// <= C++11. In C++14, these functions are constexpr, and so are implicitly
|
||||
// __host__ __device__.
|
||||
// This header defines __device__ overloads of std::min/max.
|
||||
//
|
||||
// We don't support the initializer_list overloads because
|
||||
// initializer_list::begin() and end() are not __host__ __device__ functions.
|
||||
// Ideally we'd declare these functions only if we're <= C++11. In C++14,
|
||||
// these functions are constexpr, and so are implicitly __host__ __device__.
|
||||
//
|
||||
// When compiling in C++14 mode, we could force std::min/max to have different
|
||||
// implementations for host and device, by declaring the device overloads
|
||||
// before the constexpr overloads appear. We choose not to do this because
|
||||
|
||||
// a) why write our own implementation when we can use one from the standard
|
||||
// library? and
|
||||
// b) libstdc++ is evil and declares min/max inside a header that is included
|
||||
// *before* we include <algorithm>. So we'd have to unconditionally
|
||||
// declare our __device__ overloads of min/max, but that would pollute
|
||||
// things for people who choose not to include <algorithm>.
|
||||
// However, the compiler being in C++14 mode does not imply that the standard
|
||||
// library supports C++14. There is no macro we can test to check that the
|
||||
// stdlib has constexpr std::min/max. Thus we have to unconditionally define
|
||||
// our device overloads.
|
||||
//
|
||||
// A host+device function cannot be overloaded, and a constexpr function
|
||||
// implicitly become host device if there's no explicitly host or device
|
||||
// overload preceding it. So the simple thing to do would be to declare our
|
||||
// device min/max overloads, and then #include_next <algorithm>. This way our
|
||||
// device overloads would come first, and so if we have a C++14 stdlib, its
|
||||
// min/max won't become host+device and conflict with our device overloads.
|
||||
//
|
||||
// But that also doesn't work. libstdc++ is evil and declares std::min/max in
|
||||
// an internal header that is included *before* <algorithm>. Thus by the time
|
||||
// we're inside of this file, std::min/max may already have been declared, and
|
||||
// thus we can't prevent them from becoming host+device if they're constexpr.
|
||||
//
|
||||
// Therefore we perpetrate the following hack: We mark our __device__ overloads
|
||||
// with __attribute__((enable_if(true, ""))). This causes the signature of the
|
||||
// function to change without changing anything else about it. (Except that
|
||||
// overload resolution will prefer it over the __host__ __device__ version
|
||||
// rather than considering them equally good).
|
||||
|
||||
#include_next <algorithm>
|
||||
|
||||
#if __cplusplus <= 201103L
|
||||
|
||||
// We need to define these overloads in exactly the namespace our standard
|
||||
// library uses (including the right inline namespace), otherwise they won't be
|
||||
// picked up by other functions in the standard library (e.g. functions in
|
||||
@@ -59,30 +67,43 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#pragma push_macro("_CPP14_CONSTEXPR")
|
||||
#if __cplusplus >= 201402L
|
||||
#define _CPP14_CONSTEXPR constexpr
|
||||
#else
|
||||
#define _CPP14_CONSTEXPR
|
||||
#endif
|
||||
|
||||
template <class __T, class __Cmp>
|
||||
inline __device__ const __T &
|
||||
__attribute__((enable_if(true, "")))
|
||||
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
|
||||
max(const __T &__a, const __T &__b, __Cmp __cmp) {
|
||||
return __cmp(__a, __b) ? __b : __a;
|
||||
}
|
||||
|
||||
template <class __T>
|
||||
inline __device__ const __T &
|
||||
__attribute__((enable_if(true, "")))
|
||||
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
|
||||
max(const __T &__a, const __T &__b) {
|
||||
return __a < __b ? __b : __a;
|
||||
}
|
||||
|
||||
template <class __T, class __Cmp>
|
||||
inline __device__ const __T &
|
||||
__attribute__((enable_if(true, "")))
|
||||
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
|
||||
min(const __T &__a, const __T &__b, __Cmp __cmp) {
|
||||
return __cmp(__b, __a) ? __b : __a;
|
||||
}
|
||||
|
||||
template <class __T>
|
||||
inline __device__ const __T &
|
||||
__attribute__((enable_if(true, "")))
|
||||
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
|
||||
min(const __T &__a, const __T &__b) {
|
||||
return __a < __b ? __b : __a;
|
||||
return __a < __b ? __a : __b;
|
||||
}
|
||||
|
||||
#pragma pop_macro("_CPP14_CONSTEXPR")
|
||||
|
||||
#ifdef _LIBCPP_END_NAMESPACE_STD
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
#else
|
||||
@@ -92,5 +113,4 @@ _GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace std
|
||||
#endif
|
||||
|
||||
#endif // __cplusplus <= 201103L
|
||||
#endif // __CLANG_CUDA_WRAPPERS_ALGORITHM
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
|
||||
#include_next <new>
|
||||
|
||||
// Device overrides for placement new and delete.
|
||||
#pragma push_macro("CUDA_NOEXCEPT")
|
||||
#if __cplusplus >= 201103L
|
||||
#define CUDA_NOEXCEPT noexcept
|
||||
@@ -34,6 +33,55 @@
|
||||
#define CUDA_NOEXCEPT
|
||||
#endif
|
||||
|
||||
// Device overrides for non-placement new and delete.
|
||||
__device__ inline void *operator new(__SIZE_TYPE__ size) {
|
||||
if (size == 0) {
|
||||
size = 1;
|
||||
}
|
||||
return ::malloc(size);
|
||||
}
|
||||
__device__ inline void *operator new(__SIZE_TYPE__ size,
|
||||
const std::nothrow_t &) CUDA_NOEXCEPT {
|
||||
return ::operator new(size);
|
||||
}
|
||||
|
||||
__device__ inline void *operator new[](__SIZE_TYPE__ size) {
|
||||
return ::operator new(size);
|
||||
}
|
||||
__device__ inline void *operator new[](__SIZE_TYPE__ size,
|
||||
const std::nothrow_t &) {
|
||||
return ::operator new(size);
|
||||
}
|
||||
|
||||
__device__ inline void operator delete(void* ptr) CUDA_NOEXCEPT {
|
||||
if (ptr) {
|
||||
::free(ptr);
|
||||
}
|
||||
}
|
||||
__device__ inline void operator delete(void *ptr,
|
||||
const std::nothrow_t &) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
|
||||
__device__ inline void operator delete[](void* ptr) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
__device__ inline void operator delete[](void *ptr,
|
||||
const std::nothrow_t &) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
|
||||
// Sized delete, C++14 only.
|
||||
#if __cplusplus >= 201402L
|
||||
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
|
||||
::operator delete(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Device overrides for placement new and delete.
|
||||
__device__ inline void *operator new(__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
|
||||
return __ptr;
|
||||
}
|
||||
@@ -42,6 +90,7 @@ __device__ inline void *operator new[](__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT
|
||||
}
|
||||
__device__ inline void operator delete(void *, void *) CUDA_NOEXCEPT {}
|
||||
__device__ inline void operator delete[](void *, void *) CUDA_NOEXCEPT {}
|
||||
|
||||
#pragma pop_macro("CUDA_NOEXCEPT")
|
||||
|
||||
#endif // include guard
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,18 +21,25 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <f16cintrin.h> directly; include <emmintrin.h> instead."
|
||||
#if !defined __IMMINTRIN_H
|
||||
#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __F16CINTRIN_H
|
||||
#define __F16CINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("f16c")))
|
||||
#define __DEFAULT_FN_ATTRS128 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
|
||||
|
||||
/// \brief Converts a 16-bit half-precision float value into a 32-bit float
|
||||
/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
|
||||
* but that's because icc can emulate these without f16c using a library call.
|
||||
* Since we don't do that let's leave these in f16cintrin.h.
|
||||
*/
|
||||
|
||||
/// Converts a 16-bit half-precision float value into a 32-bit float
|
||||
/// value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -42,7 +49,7 @@
|
||||
/// \param __a
|
||||
/// A 16-bit half-precision float value.
|
||||
/// \returns The converted 32-bit float value.
|
||||
static __inline float __DEFAULT_FN_ATTRS
|
||||
static __inline float __DEFAULT_FN_ATTRS128
|
||||
_cvtsh_ss(unsigned short __a)
|
||||
{
|
||||
__v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
|
||||
@@ -50,7 +57,7 @@ _cvtsh_ss(unsigned short __a)
|
||||
return r[0];
|
||||
}
|
||||
|
||||
/// \brief Converts a 32-bit single-precision float value to a 16-bit
|
||||
/// Converts a 32-bit single-precision float value to a 16-bit
|
||||
/// half-precision float value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -72,11 +79,11 @@ _cvtsh_ss(unsigned short __a)
|
||||
/// 011: Truncate \n
|
||||
/// 1XX: Use MXCSR.RC for rounding
|
||||
/// \returns The converted 16-bit half-precision float value.
|
||||
#define _cvtss_sh(a, imm) __extension__ ({ \
|
||||
#define _cvtss_sh(a, imm) \
|
||||
(unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
|
||||
(imm)))[0]); })
|
||||
(imm)))[0])
|
||||
|
||||
/// \brief Converts a 128-bit vector containing 32-bit float values into a
|
||||
/// Converts a 128-bit vector containing 32-bit float values into a
|
||||
/// 128-bit vector containing 16-bit half-precision float values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -99,10 +106,10 @@ _cvtsh_ss(unsigned short __a)
|
||||
/// \returns A 128-bit vector containing converted 16-bit half-precision float
|
||||
/// values. The lower 64 bits are used to store the converted 16-bit
|
||||
/// half-precision floating-point values.
|
||||
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
|
||||
#define _mm_cvtps_ph(a, imm) \
|
||||
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
|
||||
|
||||
/// \brief Converts a 128-bit vector containing 16-bit half-precision float
|
||||
/// Converts a 128-bit vector containing 16-bit half-precision float
|
||||
/// values into a 128-bit vector containing 32-bit float values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -113,12 +120,57 @@ _cvtsh_ss(unsigned short __a)
|
||||
/// A 128-bit vector containing 16-bit half-precision float values. The lower
|
||||
/// 64 bits are used in the conversion.
|
||||
/// \returns A 128-bit vector of [4 x float] containing converted float values.
|
||||
static __inline __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_cvtph_ps(__m128i __a)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
|
||||
/// containing 16-bit half-precision float values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
|
||||
///
|
||||
/// \param a
|
||||
/// A 256-bit vector containing 32-bit single-precision float values to be
|
||||
/// converted to 16-bit half-precision float values.
|
||||
/// \param imm
|
||||
/// An immediate value controlling rounding using bits [2:0]: \n
|
||||
/// 000: Nearest \n
|
||||
/// 001: Down \n
|
||||
/// 010: Up \n
|
||||
/// 011: Truncate \n
|
||||
/// 1XX: Use MXCSR.RC for rounding
|
||||
/// \returns A 128-bit vector containing the converted 16-bit half-precision
|
||||
/// float values.
|
||||
#define _mm256_cvtps_ph(a, imm) \
|
||||
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
|
||||
|
||||
/// Converts a 128-bit vector containing 16-bit half-precision float
|
||||
/// values into a 256-bit vector of [8 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 128-bit vector containing 16-bit half-precision float values to be
|
||||
/// converted to 32-bit single-precision float values.
|
||||
/// \returns A vector of [8 x float] containing the converted 32-bit
|
||||
/// single-precision float values.
|
||||
static __inline __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_cvtph_ps(__m128i __a)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif /* __F16CINTRIN_H */
|
||||
|
||||
@@ -143,4 +143,18 @@
|
||||
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# define FLT16_MANT_DIG __FLT16_MANT_DIG__
|
||||
# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
|
||||
# define FLT16_DIG __FLT16_DIG__
|
||||
# define FLT16_MIN_EXP __FLT16_MIN_EXP__
|
||||
# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__
|
||||
# define FLT16_MAX_EXP __FLT16_MAX_EXP__
|
||||
# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__
|
||||
# define FLT16_MAX __FLT16_MAX__
|
||||
# define FLT16_EPSILON __FLT16_EPSILON__
|
||||
# define FLT16_MIN __FLT16_MIN__
|
||||
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
|
||||
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
|
||||
|
||||
#endif /* __FLOAT_H */
|
||||
|
||||
@@ -31,200 +31,202 @@
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif /* __FMA4INTRIN_H */
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
|
||||
/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -29,200 +29,202 @@
|
||||
#define __FMAINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
|
||||
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128
|
||||
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m128d __DEFAULT_FN_ATTRS128
|
||||
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS128
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif /* __FMAINTRIN_H */
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fxsr")))
|
||||
|
||||
/// \brief Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
|
||||
/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
|
||||
/// memory region pointed to by the input parameter \a __p.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -43,10 +43,10 @@
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_fxsave(void *__p)
|
||||
{
|
||||
return __builtin_ia32_fxsave(__p);
|
||||
__builtin_ia32_fxsave(__p);
|
||||
}
|
||||
|
||||
/// \brief Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
|
||||
/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
|
||||
/// memory region pointed to by the input parameter \a __p. The contents of
|
||||
/// this memory region should have been written to by a previous \c _fxsave
|
||||
/// or \c _fxsave64 intrinsic.
|
||||
@@ -61,11 +61,11 @@ _fxsave(void *__p)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_fxrstor(void *__p)
|
||||
{
|
||||
return __builtin_ia32_fxrstor(__p);
|
||||
__builtin_ia32_fxrstor(__p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// \brief Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
|
||||
/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
|
||||
/// memory region pointed to by the input parameter \a __p.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -78,10 +78,10 @@ _fxrstor(void *__p)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_fxsave64(void *__p)
|
||||
{
|
||||
return __builtin_ia32_fxsave64(__p);
|
||||
__builtin_ia32_fxsave64(__p);
|
||||
}
|
||||
|
||||
/// \brief Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
|
||||
/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
|
||||
/// memory region pointed to by the input parameter \a __p. The contents of
|
||||
/// this memory region should have been written to by a previous \c _fxsave
|
||||
/// or \c _fxsave64 intrinsic.
|
||||
@@ -96,7 +96,7 @@ _fxsave64(void *__p)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_fxrstor64(void *__p)
|
||||
{
|
||||
return __builtin_ia32_fxrstor64(__p);
|
||||
__builtin_ia32_fxrstor64(__p);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
208
c_headers/gfniintrin.h
Normal file
208
c_headers/gfniintrin.h
Normal file
@@ -0,0 +1,208 @@
|
||||
/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __GFNIINTRIN_H
|
||||
#define __GFNIINTRIN_H
|
||||
|
||||
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
|
||||
(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v16qi)(__m128i)(S))
|
||||
|
||||
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
|
||||
(__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
|
||||
U, A, B, I)
|
||||
|
||||
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
|
||||
(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v32qi)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
|
||||
(__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
|
||||
U, A, B, I)
|
||||
|
||||
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
|
||||
(__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v64qi)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
|
||||
(__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
|
||||
U, A, B, I)
|
||||
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
|
||||
(__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
|
||||
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
|
||||
(__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v16qi)(__m128i)(S))
|
||||
|
||||
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
|
||||
(__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
|
||||
U, A, B, I)
|
||||
|
||||
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
|
||||
(__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
|
||||
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
|
||||
(__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v32qi)(__m256i)(S))
|
||||
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
|
||||
(__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
|
||||
U, A, B, I)
|
||||
|
||||
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
|
||||
(__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
|
||||
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
|
||||
(__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v64qi)(__m512i)(S))
|
||||
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
|
||||
(__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
|
||||
U, A, B, I)
|
||||
|
||||
/* Default attributes for simple form (no masking). */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
|
||||
|
||||
/* Default attributes for YMM unmasked form. */
|
||||
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
|
||||
|
||||
/* Default attributes for VLX forms. */
|
||||
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||
(__v16qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
|
||||
_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128(__U,
|
||||
(__v16qi) _mm_gf2p8mul_epi8(__A, __B),
|
||||
(__v16qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
|
||||
_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
|
||||
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
|
||||
(__v32qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
|
||||
_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256(__U,
|
||||
(__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
|
||||
(__v32qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
|
||||
_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
|
||||
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
|
||||
_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512(__U,
|
||||
(__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
|
||||
(__v64qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
|
||||
_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_Y
|
||||
#undef __DEFAULT_FN_ATTRS_Z
|
||||
#undef __DEFAULT_FN_ATTRS_VL128
|
||||
#undef __DEFAULT_FN_ATTRS_VL256
|
||||
|
||||
#endif /* __GFNIINTRIN_H */
|
||||
|
||||
@@ -214,7 +214,7 @@ __TM_failure_code(void* const __TM_buff)
|
||||
|
||||
/* These intrinsics are being made available for compatibility with
|
||||
the IBM XL compiler. For documentation please see the "z/OS XL
|
||||
C/C++ Programming Guide" publically available on the web. */
|
||||
C/C++ Programming Guide" publicly available on the web. */
|
||||
|
||||
static __inline long __attribute__((__always_inline__, __nodebug__))
|
||||
__TM_simple_begin ()
|
||||
|
||||
@@ -70,4 +70,9 @@ __rdtscp(unsigned int *__A) {
|
||||
|
||||
#define _rdpmc(A) __rdpmc(A)
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_wbinvd(void) {
|
||||
__builtin_ia32_wbinvd();
|
||||
}
|
||||
|
||||
#endif /* __IA32INTRIN_H */
|
||||
|
||||
@@ -58,61 +58,25 @@
|
||||
#include <clflushoptintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
|
||||
#include <clwbintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
|
||||
#include <avxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
|
||||
/* The 256-bit versions of functions in f16cintrin.h.
|
||||
Intel documents these as being in immintrin.h, and
|
||||
they depend on typedefs from avxintrin.h. */
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
|
||||
#include <f16cintrin.h>
|
||||
#endif
|
||||
|
||||
/// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
|
||||
/// containing 16-bit half-precision float values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
|
||||
///
|
||||
/// \param a
|
||||
/// A 256-bit vector containing 32-bit single-precision float values to be
|
||||
/// converted to 16-bit half-precision float values.
|
||||
/// \param imm
|
||||
/// An immediate value controlling rounding using bits [2:0]: \n
|
||||
/// 000: Nearest \n
|
||||
/// 001: Down \n
|
||||
/// 010: Up \n
|
||||
/// 011: Truncate \n
|
||||
/// 1XX: Use MXCSR.RC for rounding
|
||||
/// \returns A 128-bit vector containing the converted 16-bit half-precision
|
||||
/// float values.
|
||||
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
|
||||
|
||||
/// \brief Converts a 128-bit vector containing 16-bit half-precision float
|
||||
/// values into a 256-bit vector of [8 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
|
||||
///
|
||||
/// \param __a
|
||||
/// A 128-bit vector containing 16-bit half-precision float values to be
|
||||
/// converted to 32-bit single-precision float values.
|
||||
/// \returns A vector of [8 x float] containing the converted 32-bit
|
||||
/// single-precision float values.
|
||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
|
||||
_mm256_cvtph_ps(__m128i __a)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
|
||||
}
|
||||
#endif /* __AVX2__ */
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
|
||||
#include <vpclmulqdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
||||
#include <bmiintrin.h>
|
||||
@@ -126,6 +90,10 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <lzcntintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
|
||||
#include <popcntintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
|
||||
#include <fmaintrin.h>
|
||||
#endif
|
||||
@@ -142,6 +110,10 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512bwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
|
||||
#include <avx512bitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
|
||||
#include <avx512cdintrin.h>
|
||||
#endif
|
||||
@@ -150,10 +122,29 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512vpopcntdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
|
||||
#include <avx512vpopcntdqvlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
|
||||
#include <avx512vnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
||||
#include <avx512vlvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
|
||||
#include <avx512dqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
||||
#include <avx512vlbitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
||||
#include <avx512vlbwintrin.h>
|
||||
@@ -191,6 +182,15 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512vbmivlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
|
||||
#include <avx512vbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
||||
#include <avx512vlvbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
|
||||
#include <avx512pfintrin.h>
|
||||
#endif
|
||||
@@ -199,6 +199,26 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <pkuintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
|
||||
#include <vaesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
|
||||
#include <gfniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
|
||||
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
|
||||
_rdpid_u32(void) {
|
||||
return __builtin_ia32_rdpid();
|
||||
}
|
||||
#endif // __RDPID__
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
||||
_rdrand16_step(unsigned short *__p)
|
||||
@@ -262,25 +282,25 @@ _readgsbase_u64(void)
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
||||
_writefsbase_u32(unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_wrfsbase32(__V);
|
||||
__builtin_ia32_wrfsbase32(__V);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
||||
_writefsbase_u64(unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_wrfsbase64(__V);
|
||||
__builtin_ia32_wrfsbase64(__V);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
||||
_writegsbase_u32(unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_wrgsbase32(__V);
|
||||
__builtin_ia32_wrgsbase32(__V);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
|
||||
_writegsbase_u64(unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_wrgsbase64(__V);
|
||||
__builtin_ia32_wrgsbase64(__V);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -315,8 +335,133 @@ _writegsbase_u64(unsigned long long __V)
|
||||
#include <xsavesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
|
||||
#include <cetintrin.h>
|
||||
#endif
|
||||
|
||||
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
|
||||
* whereas others are also available at all times. */
|
||||
#include <adxintrin.h>
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
|
||||
#include <rdseedintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
|
||||
#include <wbnoinvdintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
|
||||
#include <cldemoteintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
|
||||
#include <waitpkgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
defined(__MOVDIRI__) || defined(__MOVDIR64B__)
|
||||
#include <movdirintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
|
||||
#include <pconfigintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
|
||||
#include <sgxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
|
||||
#include <ptwriteintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
|
||||
#include <invpcidintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* Define the default attributes for these intrinsics */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange HLE
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
|
||||
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
|
||||
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
|
||||
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
|
||||
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
||||
return _Value;
|
||||
}
|
||||
#endif
|
||||
#if defined(__x86_64__)
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
|
||||
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
|
||||
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
|
||||
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
|
||||
: "+r" (_Value), "+m" (*_Target) :: "memory");
|
||||
return _Value;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Compare Exchange HLE
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
|
||||
: "+a" (_Comparand), "+m" (*_Destination)
|
||||
: "r" (_Exchange) : "memory");
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
|
||||
: "+a" (_Comparand), "+m" (*_Destination)
|
||||
: "r" (_Exchange) : "memory");
|
||||
return _Comparand;
|
||||
}
|
||||
#endif
|
||||
#if defined(__x86_64__)
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
|
||||
: "+a" (_Comparand), "+m" (*_Destination)
|
||||
: "r" (_Exchange) : "memory");
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ __int64 __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
|
||||
: "+a" (_Comparand), "+m" (*_Destination)
|
||||
: "r" (_Exchange) : "memory");
|
||||
return _Comparand;
|
||||
}
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#endif /* __IMMINTRIN_H */
|
||||
|
||||
@@ -38,6 +38,10 @@
|
||||
#include <armintr.h>
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#include <arm64intr.h>
|
||||
#endif
|
||||
|
||||
/* For the definition of jmp_buf. */
|
||||
#if __STDC_HOSTED__
|
||||
#include <setjmp.h>
|
||||
@@ -79,6 +83,7 @@ void __incfsdword(unsigned long);
|
||||
void __incfsword(unsigned long);
|
||||
unsigned long __indword(unsigned short);
|
||||
void __indwordstring(unsigned short, unsigned long *, unsigned long);
|
||||
void __int2c(void);
|
||||
void __invlpg(void *);
|
||||
unsigned short __inword(unsigned short);
|
||||
void __inwordstring(unsigned short, unsigned short *, unsigned long);
|
||||
@@ -136,6 +141,7 @@ void __svm_stgi(void);
|
||||
void __svm_vmload(size_t);
|
||||
void __svm_vmrun(size_t);
|
||||
void __svm_vmsave(size_t);
|
||||
void __ud2(void);
|
||||
unsigned __int64 __ull_rshift(unsigned __int64, int);
|
||||
void __vmx_off(void);
|
||||
void __vmx_vmptrst(unsigned __int64 *);
|
||||
@@ -157,25 +163,15 @@ static __inline__
|
||||
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
|
||||
static __inline__
|
||||
unsigned char _bittest(long const *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandcomplement(long *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandreset(long *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandset(long *, long);
|
||||
void __cdecl _disable(void);
|
||||
void __cdecl _enable(void);
|
||||
long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
|
||||
unsigned char _interlockedbittestandreset(long volatile *, long);
|
||||
unsigned char _interlockedbittestandset(long volatile *, long);
|
||||
long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
|
||||
long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
|
||||
__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64,
|
||||
__int64);
|
||||
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
|
||||
__int64);
|
||||
void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,
|
||||
void *);
|
||||
void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,
|
||||
@@ -252,24 +248,15 @@ void __writegsbyte(unsigned long, unsigned char);
|
||||
void __writegsdword(unsigned long, unsigned long);
|
||||
void __writegsqword(unsigned long, unsigned __int64);
|
||||
void __writegsword(unsigned long, unsigned short);
|
||||
static __inline__
|
||||
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _bittest64(__int64 const *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandcomplement64(__int64 *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandreset64(__int64 *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandset64(__int64 *, __int64);
|
||||
long _InterlockedAnd_np(long volatile *_Value, long _Mask);
|
||||
short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
|
||||
__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
|
||||
char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
|
||||
unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
|
||||
static __inline__
|
||||
unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
|
||||
long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
|
||||
long _Comparand);
|
||||
@@ -283,10 +270,6 @@ unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
|
||||
__int64 *_ComparandResult);
|
||||
short _InterlockedCompareExchange16_np(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64,
|
||||
__int64);
|
||||
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
|
||||
__int64);
|
||||
__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,
|
||||
@@ -316,7 +299,12 @@ unsigned __int64 _umul128(unsigned __int64,
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#if defined(__x86_64__) || defined(__arm__)
|
||||
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
|
||||
|
||||
static __inline__
|
||||
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
|
||||
static __inline__
|
||||
__int64 _InterlockedDecrement64(__int64 volatile *_Addend);
|
||||
@@ -337,78 +325,6 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittest(long const *_BitBase, long _BitPos) {
|
||||
return (*_BitBase >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandcomplement(long *_BitBase, long _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase ^ (1 << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandreset(long *_BitBase, long _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase & ~(1 << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandset(long *_BitBase, long _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase | (1 << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos) {
|
||||
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_ACQUIRE);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos) {
|
||||
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELAXED);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos) {
|
||||
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELEASE);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittest64(__int64 const *_BitBase, __int64 _BitPos) {
|
||||
return (*_BitBase >> _BitPos) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase ^ (1ll << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase & ~(1ll << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_bittestandset64(__int64 *_BitBase, __int64 _BitPos) {
|
||||
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
|
||||
*_BitBase = *_BitBase | (1ll << _BitPos);
|
||||
return _Res;
|
||||
}
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) {
|
||||
long long _PrevVal =
|
||||
__atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST);
|
||||
return (_PrevVal >> _BitPos) & 1;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
@@ -598,6 +514,23 @@ _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Or
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
@@ -828,7 +761,7 @@ _InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __DEFAULT_FN_ATTRS
|
||||
static __inline__ long __DEFAULT_FN_ATTRS
|
||||
_InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
|
||||
@@ -864,33 +797,40 @@ _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {
|
||||
__asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n));
|
||||
__asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n)
|
||||
: : "memory");
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {
|
||||
__asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n));
|
||||
__asm__ __volatile__("rep movsl" : "+D"(__dst), "+S"(__src), "+c"(__n)
|
||||
: : "memory");
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {
|
||||
__asm__("rep movsw" : : "D"(__dst), "S"(__src), "c"(__n));
|
||||
__asm__ __volatile__("rep movsw" : "+D"(__dst), "+S"(__src), "+c"(__n)
|
||||
: : "memory");
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
|
||||
__asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n));
|
||||
__asm__ __volatile__("rep stosl" : "+D"(__dst), "+c"(__n) : "a"(__x)
|
||||
: "memory");
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {
|
||||
__asm__("rep stosw" : : "D"(__dst), "a"(__x), "c"(__n));
|
||||
__asm__ __volatile__("rep stosw" : "+D"(__dst), "+c"(__n) : "a"(__x)
|
||||
: "memory");
|
||||
}
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {
|
||||
__asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n));
|
||||
__asm__ __volatile__("rep movsq" : "+D"(__dst), "+S"(__src), "+c"(__n)
|
||||
: : "memory");
|
||||
}
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
|
||||
__asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n));
|
||||
__asm__ __volatile__("rep stosq" : "+D"(__dst), "+c"(__n) : "a"(__x)
|
||||
: "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
37
c_headers/invpcidintrin.h
Normal file
37
c_headers/invpcidintrin.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <invpcidintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __INVPCIDINTRIN_H
|
||||
#define __INVPCIDINTRIN_H
|
||||
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("invpcid")))
|
||||
_invpcid(unsigned int __type, void *__descriptor) {
|
||||
__builtin_ia32_invpcid(__type, __descriptor);
|
||||
}
|
||||
|
||||
#endif /* __INVPCIDINTRIN_H */
|
||||
@@ -31,7 +31,7 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp")))
|
||||
|
||||
/// \brief Parses the LWPCB at the specified address and enables
|
||||
/// Parses the LWPCB at the specified address and enables
|
||||
/// profiling if valid.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -48,7 +48,7 @@ __llwpcb (void *__addr)
|
||||
__builtin_ia32_llwpcb(__addr);
|
||||
}
|
||||
|
||||
/// \brief Flushes the LWP state to memory and returns the address of the LWPCB.
|
||||
/// Flushes the LWP state to memory and returns the address of the LWPCB.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -58,12 +58,12 @@ __llwpcb (void *__addr)
|
||||
/// Address to the current Lightweight Profiling Control Block (LWPCB).
|
||||
/// If LWP is not currently enabled, returns NULL.
|
||||
static __inline__ void* __DEFAULT_FN_ATTRS
|
||||
__slwpcb ()
|
||||
__slwpcb (void)
|
||||
{
|
||||
return __builtin_ia32_slwpcb();
|
||||
}
|
||||
|
||||
/// \brief Inserts programmed event record into the LWP event ring buffer
|
||||
/// Inserts programmed event record into the LWP event ring buffer
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -84,7 +84,7 @@ __slwpcb ()
|
||||
(__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \
|
||||
(unsigned int) (FLAGS)))
|
||||
|
||||
/// \brief Decrements the LWP programmed value sample event counter. If the result is
|
||||
/// Decrements the LWP programmed value sample event counter. If the result is
|
||||
/// negative, inserts an event record into the LWP event ring buffer in memory
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
@@ -104,7 +104,7 @@ __slwpcb ()
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
/// \brief Inserts programmed event record into the LWP event ring buffer
|
||||
/// Inserts programmed event record into the LWP event ring buffer
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -125,7 +125,7 @@ __slwpcb ()
|
||||
(__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \
|
||||
(unsigned int) (FLAGS)))
|
||||
|
||||
/// \brief Decrements the LWP programmed value sample event counter. If the result is
|
||||
/// Decrements the LWP programmed value sample event counter. If the result is
|
||||
/// negative, inserts an event record into the LWP event ring buffer in memory
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
|
||||
|
||||
/// \brief Counts the number of leading zero bits in the operand.
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -47,7 +47,7 @@ __lzcnt16(unsigned short __X)
|
||||
return __X ? __builtin_clzs(__X) : 16;
|
||||
}
|
||||
|
||||
/// \brief Counts the number of leading zero bits in the operand.
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -57,13 +57,14 @@ __lzcnt16(unsigned short __X)
|
||||
/// An unsigned 32-bit integer whose leading zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
/// \see _lzcnt_u32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
__lzcnt32(unsigned int __X)
|
||||
{
|
||||
return __X ? __builtin_clz(__X) : 32;
|
||||
}
|
||||
|
||||
/// \brief Counts the number of leading zero bits in the operand.
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -73,6 +74,7 @@ __lzcnt32(unsigned int __X)
|
||||
/// An unsigned 32-bit integer whose leading zeros are to be counted.
|
||||
/// \returns An unsigned 32-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
/// \see __lzcnt32
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS
|
||||
_lzcnt_u32(unsigned int __X)
|
||||
{
|
||||
@@ -80,7 +82,7 @@ _lzcnt_u32(unsigned int __X)
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// \brief Counts the number of leading zero bits in the operand.
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -90,13 +92,14 @@ _lzcnt_u32(unsigned int __X)
|
||||
/// An unsigned 64-bit integer whose leading zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
/// \see _lzcnt_u64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__lzcnt64(unsigned long long __X)
|
||||
{
|
||||
return __X ? __builtin_clzll(__X) : 64;
|
||||
}
|
||||
|
||||
/// \brief Counts the number of leading zero bits in the operand.
|
||||
/// Counts the number of leading zero bits in the operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -106,6 +109,7 @@ __lzcnt64(unsigned long long __X)
|
||||
/// An unsigned 64-bit integer whose leading zeros are to be counted.
|
||||
/// \returns An unsigned 64-bit integer containing the number of leading zero
|
||||
/// bits in the operand.
|
||||
/// \see __lzcnt64
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
_lzcnt_u64(unsigned long long __X)
|
||||
{
|
||||
|
||||
@@ -30,9 +30,9 @@
|
||||
typedef float __v2sf __attribute__((__vector_size__(8)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
|
||||
_m_femms(void) {
|
||||
__builtin_ia32_femms();
|
||||
}
|
||||
@@ -134,7 +134,7 @@ _m_pmulhrw(__m64 __m1, __m64 __m2) {
|
||||
|
||||
/* Handle the 3dnowa instructions here. */
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pf2iw(__m64 __m) {
|
||||
|
||||
@@ -32,27 +32,27 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
|
||||
typedef char __v8qi __attribute__((__vector_size__(8)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
|
||||
|
||||
/// \brief Clears the MMX state by setting the state of the x87 stack registers
|
||||
/// Clears the MMX state by setting the state of the x87 stack registers
|
||||
/// to empty.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
|
||||
///
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
|
||||
_mm_empty(void)
|
||||
{
|
||||
__builtin_ia32_emms();
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
|
||||
/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
|
||||
/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||
/// This intrinsic corresponds to the <c> MOVD </c> instruction.
|
||||
///
|
||||
/// \param __i
|
||||
/// A 32-bit integer value.
|
||||
@@ -64,12 +64,12 @@ _mm_cvtsi32_si64(int __i)
|
||||
return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
|
||||
}
|
||||
|
||||
/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
|
||||
/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
|
||||
/// signed integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
|
||||
/// This intrinsic corresponds to the <c> MOVD </c> instruction.
|
||||
///
|
||||
/// \param __m
|
||||
/// A 64-bit integer vector.
|
||||
@@ -81,11 +81,11 @@ _mm_cvtsi64_si32(__m64 __m)
|
||||
return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
|
||||
}
|
||||
|
||||
/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
|
||||
/// Casts a 64-bit signed integer value into a 64-bit integer vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
|
||||
/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
|
||||
///
|
||||
/// \param __i
|
||||
/// A 64-bit signed integer.
|
||||
@@ -97,11 +97,11 @@ _mm_cvtsi64_m64(long long __i)
|
||||
return (__m64)__i;
|
||||
}
|
||||
|
||||
/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
|
||||
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
|
||||
/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
|
||||
///
|
||||
/// \param __m
|
||||
/// A 64-bit integer vector.
|
||||
@@ -113,7 +113,7 @@ _mm_cvtm64_si64(__m64 __m)
|
||||
return (long long)__m;
|
||||
}
|
||||
|
||||
/// \brief Converts 16-bit signed integers from both 64-bit integer vector
|
||||
/// Converts 16-bit signed integers from both 64-bit integer vector
|
||||
/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
|
||||
/// a 64-bit integer vector of [8 x i8] as the result. Positive values
|
||||
/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
|
||||
@@ -143,7 +143,7 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Converts 32-bit signed integers from both 64-bit integer vector
|
||||
/// Converts 32-bit signed integers from both 64-bit integer vector
|
||||
/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
|
||||
/// a 64-bit integer vector of [4 x i16] as the result. Positive values
|
||||
/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
|
||||
@@ -173,7 +173,7 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Converts 16-bit signed integers from both 64-bit integer vector
|
||||
/// Converts 16-bit signed integers from both 64-bit integer vector
|
||||
/// parameters of [4 x i16] into 8-bit unsigned integer values, and
|
||||
/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
|
||||
/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
|
||||
@@ -203,7 +203,7 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
|
||||
/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
|
||||
/// and interleaves them into a 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -230,7 +230,7 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
|
||||
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
|
||||
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -253,7 +253,7 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
|
||||
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
|
||||
/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -274,7 +274,7 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
|
||||
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
|
||||
/// and interleaves them into a 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -301,7 +301,7 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
|
||||
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
|
||||
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -324,7 +324,7 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
|
||||
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
|
||||
/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -345,7 +345,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 8-bit integer element of the first 64-bit integer vector
|
||||
/// Adds each 8-bit integer element of the first 64-bit integer vector
|
||||
/// of [8 x i8] to the corresponding 8-bit integer element of the second
|
||||
/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
|
||||
/// packed into a 64-bit integer vector of [8 x i8].
|
||||
@@ -366,7 +366,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 16-bit integer element of the first 64-bit integer vector
|
||||
/// Adds each 16-bit integer element of the first 64-bit integer vector
|
||||
/// of [4 x i16] to the corresponding 16-bit integer element of the second
|
||||
/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
|
||||
/// packed into a 64-bit integer vector of [4 x i16].
|
||||
@@ -387,7 +387,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 32-bit integer element of the first 64-bit integer vector
|
||||
/// Adds each 32-bit integer element of the first 64-bit integer vector
|
||||
/// of [2 x i32] to the corresponding 32-bit integer element of the second
|
||||
/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
|
||||
/// packed into a 64-bit integer vector of [2 x i32].
|
||||
@@ -408,7 +408,7 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 8-bit signed integer element of the first 64-bit integer
|
||||
/// Adds each 8-bit signed integer element of the first 64-bit integer
|
||||
/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
|
||||
/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
|
||||
/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
|
||||
@@ -430,7 +430,7 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 16-bit signed integer element of the first 64-bit integer
|
||||
/// Adds each 16-bit signed integer element of the first 64-bit integer
|
||||
/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
|
||||
/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
|
||||
/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
|
||||
@@ -453,7 +453,7 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
|
||||
/// Adds each 8-bit unsigned integer element of the first 64-bit integer
|
||||
/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
|
||||
/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
|
||||
/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
|
||||
@@ -475,7 +475,7 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
|
||||
/// Adds each 16-bit unsigned integer element of the first 64-bit integer
|
||||
/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
|
||||
/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
|
||||
/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
|
||||
@@ -497,7 +497,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 8-bit integer element of the second 64-bit integer
|
||||
/// Subtracts each 8-bit integer element of the second 64-bit integer
|
||||
/// vector of [8 x i8] from the corresponding 8-bit integer element of the
|
||||
/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
|
||||
/// are packed into a 64-bit integer vector of [8 x i8].
|
||||
@@ -518,7 +518,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 16-bit integer element of the second 64-bit integer
|
||||
/// Subtracts each 16-bit integer element of the second 64-bit integer
|
||||
/// vector of [4 x i16] from the corresponding 16-bit integer element of the
|
||||
/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
|
||||
/// results are packed into a 64-bit integer vector of [4 x i16].
|
||||
@@ -539,7 +539,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 32-bit integer element of the second 64-bit integer
|
||||
/// Subtracts each 32-bit integer element of the second 64-bit integer
|
||||
/// vector of [2 x i32] from the corresponding 32-bit integer element of the
|
||||
/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
|
||||
/// results are packed into a 64-bit integer vector of [2 x i32].
|
||||
@@ -560,7 +560,7 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 8-bit signed integer element of the second 64-bit
|
||||
/// Subtracts each 8-bit signed integer element of the second 64-bit
|
||||
/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
|
||||
/// element of the first 64-bit integer vector of [8 x i8]. Positive results
|
||||
/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
|
||||
@@ -583,7 +583,7 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 16-bit signed integer element of the second 64-bit
|
||||
/// Subtracts each 16-bit signed integer element of the second 64-bit
|
||||
/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
|
||||
/// element of the first 64-bit integer vector of [4 x i16]. Positive results
|
||||
/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
|
||||
@@ -606,7 +606,7 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
|
||||
/// Subtracts each 8-bit unsigned integer element of the second 64-bit
|
||||
/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
|
||||
/// element of the first 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
@@ -630,7 +630,7 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
|
||||
/// Subtracts each 16-bit unsigned integer element of the second 64-bit
|
||||
/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
|
||||
/// integer element of the first 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
@@ -654,7 +654,7 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
|
||||
/// Multiplies each 16-bit signed integer element of the first 64-bit
|
||||
/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
|
||||
/// element of the second 64-bit integer vector of [4 x i16] and get four
|
||||
/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
|
||||
@@ -681,7 +681,7 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
|
||||
/// Multiplies each 16-bit signed integer element of the first 64-bit
|
||||
/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
|
||||
/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
|
||||
/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
|
||||
@@ -702,7 +702,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
|
||||
/// Multiplies each 16-bit signed integer element of the first 64-bit
|
||||
/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
|
||||
/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
|
||||
/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
|
||||
@@ -723,7 +723,7 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Left-shifts each 16-bit signed integer element of the first
|
||||
/// Left-shifts each 16-bit signed integer element of the first
|
||||
/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
|
||||
/// of bits specified by the second parameter, which is a 64-bit integer. The
|
||||
/// lower 16 bits of the results are packed into a 64-bit integer vector of
|
||||
@@ -746,7 +746,7 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
|
||||
/// Left-shifts each 16-bit signed integer element of a 64-bit integer
|
||||
/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
|
||||
/// The lower 16 bits of the results are packed into a 64-bit integer vector
|
||||
/// of [4 x i16].
|
||||
@@ -768,7 +768,7 @@ _mm_slli_pi16(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Left-shifts each 32-bit signed integer element of the first
|
||||
/// Left-shifts each 32-bit signed integer element of the first
|
||||
/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
|
||||
/// of bits specified by the second parameter, which is a 64-bit integer. The
|
||||
/// lower 32 bits of the results are packed into a 64-bit integer vector of
|
||||
@@ -791,7 +791,7 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
|
||||
/// Left-shifts each 32-bit signed integer element of a 64-bit integer
|
||||
/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
|
||||
/// The lower 32 bits of the results are packed into a 64-bit integer vector
|
||||
/// of [2 x i32].
|
||||
@@ -813,7 +813,7 @@ _mm_slli_pi32(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Left-shifts the first 64-bit integer parameter by the number of bits
|
||||
/// Left-shifts the first 64-bit integer parameter by the number of bits
|
||||
/// specified by the second 64-bit integer parameter. The lower 64 bits of
|
||||
/// result are returned.
|
||||
///
|
||||
@@ -833,7 +833,7 @@ _mm_sll_si64(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
|
||||
/// Left-shifts the first parameter, which is a 64-bit integer, by the
|
||||
/// number of bits specified by the second parameter, which is a 32-bit
|
||||
/// integer. The lower 64 bits of result are returned.
|
||||
///
|
||||
@@ -853,7 +853,7 @@ _mm_slli_si64(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 16-bit integer element of the first parameter,
|
||||
/// Right-shifts each 16-bit integer element of the first parameter,
|
||||
/// which is a 64-bit integer vector of [4 x i16], by the number of bits
|
||||
/// specified by the second parameter, which is a 64-bit integer.
|
||||
///
|
||||
@@ -877,7 +877,7 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
|
||||
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
|
||||
/// of [4 x i16] by the number of bits specified by a 32-bit integer.
|
||||
///
|
||||
/// High-order bits are filled with the sign bit of the initial value of each
|
||||
@@ -900,7 +900,7 @@ _mm_srai_pi16(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 32-bit integer element of the first parameter,
|
||||
/// Right-shifts each 32-bit integer element of the first parameter,
|
||||
/// which is a 64-bit integer vector of [2 x i32], by the number of bits
|
||||
/// specified by the second parameter, which is a 64-bit integer.
|
||||
///
|
||||
@@ -924,7 +924,7 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
|
||||
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
|
||||
/// of [2 x i32] by the number of bits specified by a 32-bit integer.
|
||||
///
|
||||
/// High-order bits are filled with the sign bit of the initial value of each
|
||||
@@ -947,7 +947,7 @@ _mm_srai_pi32(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 16-bit integer element of the first parameter,
|
||||
/// Right-shifts each 16-bit integer element of the first parameter,
|
||||
/// which is a 64-bit integer vector of [4 x i16], by the number of bits
|
||||
/// specified by the second parameter, which is a 64-bit integer.
|
||||
///
|
||||
@@ -970,7 +970,7 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
|
||||
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
|
||||
/// of [4 x i16] by the number of bits specified by a 32-bit integer.
|
||||
///
|
||||
/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
|
||||
@@ -992,7 +992,7 @@ _mm_srli_pi16(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 32-bit integer element of the first parameter,
|
||||
/// Right-shifts each 32-bit integer element of the first parameter,
|
||||
/// which is a 64-bit integer vector of [2 x i32], by the number of bits
|
||||
/// specified by the second parameter, which is a 64-bit integer.
|
||||
///
|
||||
@@ -1015,7 +1015,7 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
|
||||
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
|
||||
/// of [2 x i32] by the number of bits specified by a 32-bit integer.
|
||||
///
|
||||
/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
|
||||
@@ -1037,7 +1037,7 @@ _mm_srli_pi32(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts the first 64-bit integer parameter by the number of bits
|
||||
/// Right-shifts the first 64-bit integer parameter by the number of bits
|
||||
/// specified by the second 64-bit integer parameter.
|
||||
///
|
||||
/// High-order bits are cleared.
|
||||
@@ -1057,7 +1057,7 @@ _mm_srl_si64(__m64 __m, __m64 __count)
|
||||
return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
|
||||
/// Right-shifts the first parameter, which is a 64-bit integer, by the
|
||||
/// number of bits specified by the second parameter, which is a 32-bit
|
||||
/// integer.
|
||||
///
|
||||
@@ -1078,7 +1078,7 @@ _mm_srli_si64(__m64 __m, int __count)
|
||||
return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
/// \brief Performs a bitwise AND of two 64-bit integer vectors.
|
||||
/// Performs a bitwise AND of two 64-bit integer vectors.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1096,7 +1096,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
|
||||
return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
|
||||
/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
|
||||
/// Performs a bitwise NOT of the first 64-bit integer vector, and then
|
||||
/// performs a bitwise AND of the intermediate result and the second 64-bit
|
||||
/// integer vector.
|
||||
///
|
||||
@@ -1117,7 +1117,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
|
||||
return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
|
||||
/// \brief Performs a bitwise OR of two 64-bit integer vectors.
|
||||
/// Performs a bitwise OR of two 64-bit integer vectors.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1135,7 +1135,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
|
||||
return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
|
||||
/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
|
||||
/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -1153,7 +1153,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
|
||||
return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
|
||||
/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
|
||||
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
|
||||
/// [8 x i8] to determine if the element of the first vector is equal to the
|
||||
/// corresponding element of the second vector.
|
||||
///
|
||||
@@ -1175,7 +1175,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
|
||||
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
|
||||
/// [4 x i16] to determine if the element of the first vector is equal to the
|
||||
/// corresponding element of the second vector.
|
||||
///
|
||||
@@ -1197,7 +1197,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
|
||||
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
|
||||
/// [2 x i32] to determine if the element of the first vector is equal to the
|
||||
/// corresponding element of the second vector.
|
||||
///
|
||||
@@ -1219,7 +1219,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
|
||||
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
|
||||
/// [8 x i8] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
@@ -1241,7 +1241,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
|
||||
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
|
||||
/// [4 x i16] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
@@ -1263,7 +1263,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
|
||||
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
|
||||
/// [2 x i32] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
@@ -1285,20 +1285,20 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector initialized to zero.
|
||||
/// Constructs a 64-bit integer vector initialized to zero.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
|
||||
/// This intrinsic corresponds to the <c> PXOR </c> instruction.
|
||||
///
|
||||
/// \returns An initialized 64-bit integer vector with all elements set to zero.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_mm_setzero_si64(void)
|
||||
{
|
||||
return (__m64){ 0LL };
|
||||
return __extension__ (__m64){ 0LL };
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector initialized with the specified
|
||||
/// Constructs a 64-bit integer vector initialized with the specified
|
||||
/// 32-bit integer values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1319,7 +1319,7 @@ _mm_set_pi32(int __i1, int __i0)
|
||||
return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector initialized with the specified
|
||||
/// Constructs a 64-bit integer vector initialized with the specified
|
||||
/// 16-bit integer values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1342,7 +1342,7 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
|
||||
return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector initialized with the specified
|
||||
/// Constructs a 64-bit integer vector initialized with the specified
|
||||
/// 8-bit integer values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1375,13 +1375,14 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
|
||||
__b4, __b5, __b6, __b7);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
|
||||
/// Constructs a 64-bit integer vector of [2 x i32], with each of the
|
||||
/// 32-bit integer vector elements set to the specified 32-bit integer
|
||||
/// value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
|
||||
/// This intrinsic is a utility function and does not correspond to a specific
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __i
|
||||
/// A 32-bit integer value used to initialize each vector element of the
|
||||
@@ -1393,13 +1394,14 @@ _mm_set1_pi32(int __i)
|
||||
return _mm_set_pi32(__i, __i);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
|
||||
/// Constructs a 64-bit integer vector of [4 x i16], with each of the
|
||||
/// 16-bit integer vector elements set to the specified 16-bit integer
|
||||
/// value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
|
||||
/// This intrinsic is a utility function and does not correspond to a specific
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __w
|
||||
/// A 16-bit integer value used to initialize each vector element of the
|
||||
@@ -1411,13 +1413,13 @@ _mm_set1_pi16(short __w)
|
||||
return _mm_set_pi16(__w, __w, __w, __w);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
|
||||
/// Constructs a 64-bit integer vector of [8 x i8], with each of the
|
||||
/// 8-bit integer vector elements set to the specified 8-bit integer value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPUNPCKLBW + VPSHUFLW / PUNPCKLBW +
|
||||
/// PSHUFLW </c> instruction.
|
||||
/// This intrinsic is a utility function and does not correspond to a specific
|
||||
/// instruction.
|
||||
///
|
||||
/// \param __b
|
||||
/// An 8-bit integer value used to initialize each vector element of the
|
||||
@@ -1429,7 +1431,7 @@ _mm_set1_pi8(char __b)
|
||||
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
|
||||
/// Constructs a 64-bit integer vector, initialized in reverse order with
|
||||
/// the specified 32-bit integer values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1450,7 +1452,7 @@ _mm_setr_pi32(int __i0, int __i1)
|
||||
return _mm_set_pi32(__i1, __i0);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
|
||||
/// Constructs a 64-bit integer vector, initialized in reverse order with
|
||||
/// the specified 16-bit integer values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1473,7 +1475,7 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
|
||||
return _mm_set_pi16(__w3, __w2, __w1, __w0);
|
||||
}
|
||||
|
||||
/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
|
||||
/// Constructs a 64-bit integer vector, initialized in reverse order with
|
||||
/// the specified 8-bit integer values.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
|
||||
@@ -38,6 +38,7 @@ module _Builtin_intrinsics [system] [extern_c] {
|
||||
explicit module neon {
|
||||
requires neon
|
||||
header "arm_neon.h"
|
||||
header "arm_fp16.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
@@ -62,6 +63,17 @@ module _Builtin_intrinsics [system] [extern_c] {
|
||||
textual header "fma4intrin.h"
|
||||
textual header "mwaitxintrin.h"
|
||||
textual header "clzerointrin.h"
|
||||
textual header "wbnoinvdintrin.h"
|
||||
textual header "cldemoteintrin.h"
|
||||
textual header "waitpkgintrin.h"
|
||||
textual header "movdirintrin.h"
|
||||
textual header "pconfigintrin.h"
|
||||
textual header "sgxintrin.h"
|
||||
textual header "ptwriteintrin.h"
|
||||
textual header "invpcidintrin.h"
|
||||
|
||||
textual header "__wmmintrin_aes.h"
|
||||
textual header "__wmmintrin_pclmul.h"
|
||||
|
||||
explicit module mm_malloc {
|
||||
requires !freestanding
|
||||
@@ -128,14 +140,6 @@ module _Builtin_intrinsics [system] [extern_c] {
|
||||
export aes
|
||||
export pclmul
|
||||
}
|
||||
|
||||
explicit module aes {
|
||||
header "__wmmintrin_aes.h"
|
||||
}
|
||||
|
||||
explicit module pclmul {
|
||||
header "__wmmintrin_pclmul.h"
|
||||
}
|
||||
}
|
||||
|
||||
explicit module systemz {
|
||||
|
||||
63
c_headers/movdirintrin.h
Normal file
63
c_headers/movdirintrin.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*===------------------------- movdirintrin.h ------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <movdirintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _MOVDIRINTRIN_H
|
||||
#define _MOVDIRINTRIN_H
|
||||
|
||||
/* Move doubleword as direct store */
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("movdiri")))
|
||||
_directstoreu_u32 (void *__dst, unsigned int __value)
|
||||
{
|
||||
__builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
/* Move quadword as direct store */
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("movdiri")))
|
||||
_directstoreu_u64 (void *__dst, unsigned long __value)
|
||||
{
|
||||
__builtin_ia32_directstore_u64((unsigned long *)__dst, __value);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
/*
|
||||
* movdir64b - Move 64 bytes as direct store.
|
||||
* The destination must be 64 byte aligned, and the store is atomic.
|
||||
* The source address has no alignment requirement, and the load from
|
||||
* the source address is not atomic.
|
||||
*/
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("movdir64b")))
|
||||
_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)
|
||||
{
|
||||
__builtin_ia32_movdir64b(__dst, __src);
|
||||
}
|
||||
|
||||
#endif /* _MOVDIRINTRIN_H */
|
||||
@@ -25,8 +25,8 @@
|
||||
#error "Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _MWAITXINTRIN_H
|
||||
#define _MWAITXINTRIN_H
|
||||
#ifndef __MWAITXINTRIN_H
|
||||
#define __MWAITXINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx")))
|
||||
@@ -44,4 +44,4 @@ _mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* _MWAITXINTRIN_H */
|
||||
#endif /* __MWAITXINTRIN_H */
|
||||
|
||||
@@ -21,10 +21,10 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _NMMINTRIN_H
|
||||
#define _NMMINTRIN_H
|
||||
#ifndef __NMMINTRIN_H
|
||||
#define __NMMINTRIN_H
|
||||
|
||||
/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
|
||||
just include it now then. */
|
||||
#include <smmintrin.h>
|
||||
#endif /* _NMMINTRIN_H */
|
||||
#endif /* __NMMINTRIN_H */
|
||||
|
||||
@@ -11381,6 +11381,8 @@ half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);
|
||||
* For each component of a vector type,
|
||||
* result[i] = if MSB of c[i] is set ? b[i] : a[i].
|
||||
* For a scalar type, result = c ? b : a.
|
||||
* b and a must have the same type.
|
||||
* c must have the same number of elements and bits as a.
|
||||
*/
|
||||
char __ovld __cnfn select(char a, char b, char c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, char c);
|
||||
@@ -11394,60 +11396,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, char8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, char16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);
|
||||
short __ovld __cnfn select(short a, short b, char c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, char c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, char2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, char2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, char3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, char3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, char4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, char4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, char8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, char8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, char16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, char16 c);
|
||||
int __ovld __cnfn select(int a, int b, char c);
|
||||
uint __ovld __cnfn select(uint a, uint b, char c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, char2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, char2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, char3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, char3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, char4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, char4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, char8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, char8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, char16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, char16 c);
|
||||
long __ovld __cnfn select(long a, long b, char c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, char c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, char2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, char2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, char3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, char3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, char4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, char4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, char8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, char8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, char16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, char16 c);
|
||||
float __ovld __cnfn select(float a, float b, char c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, char2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, char3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, char4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, char8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, char16 c);
|
||||
char __ovld __cnfn select(char a, char b, short c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, short c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, short2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, short2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, short3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, short3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, short4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, short4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, short8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, short8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, short16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, short16 c);
|
||||
|
||||
short __ovld __cnfn select(short a, short b, short c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, short c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, short2 c);
|
||||
@@ -11460,60 +11409,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, short8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, short16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);
|
||||
int __ovld __cnfn select(int a, int b, short c);
|
||||
uint __ovld __cnfn select(uint a, uint b, short c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, short2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, short2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, short3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, short3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, short4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, short4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, short8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, short8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, short16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, short16 c);
|
||||
long __ovld __cnfn select(long a, long b, short c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, short c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, short2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, short2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, short3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, short3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, short4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, short4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, short8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, short8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, short16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, short16 c);
|
||||
float __ovld __cnfn select(float a, float b, short c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, short2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, short3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, short4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, short8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, short16 c);
|
||||
char __ovld __cnfn select(char a, char b, int c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, int c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, int2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, int2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, int3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, int3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, int4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, int4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, int8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, int8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, int16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, int16 c);
|
||||
short __ovld __cnfn select(short a, short b, int c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, int c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, int2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, int2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, int3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, int3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, int4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, int4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, int8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, int8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, int16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, int16 c);
|
||||
|
||||
int __ovld __cnfn select(int a, int b, int c);
|
||||
uint __ovld __cnfn select(uint a, uint b, int c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, int2 c);
|
||||
@@ -11526,60 +11422,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, int8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, int16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);
|
||||
long __ovld __cnfn select(long a, long b, int c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, int c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, int2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, int2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, int3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, int3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, int4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, int4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, int8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, int8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, int16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, int16 c);
|
||||
float __ovld __cnfn select(float a, float b, int c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, int2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, int3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, int4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, int8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, int16 c);
|
||||
char __ovld __cnfn select(char a, char b, long c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, long c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, long2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, long2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, long3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, long3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, long4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, long4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, long8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, long8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, long16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, long16 c);
|
||||
short __ovld __cnfn select(short a, short b, long c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, long c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, long2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, long2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, long3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, long3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, long4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, long4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, long8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, long8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, long16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, long16 c);
|
||||
int __ovld __cnfn select(int a, int b, long c);
|
||||
uint __ovld __cnfn select(uint a, uint b, long c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, long2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, long2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, long3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, long3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, long4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, long4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, long8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, long8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, long16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, long16 c);
|
||||
|
||||
long __ovld __cnfn select(long a, long b, long c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, long c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, long2 c);
|
||||
@@ -11592,12 +11441,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, long8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, long16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);
|
||||
float __ovld __cnfn select(float a, float b, long c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, long2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, long3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, long4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, long8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, long16 c);
|
||||
|
||||
char __ovld __cnfn select(char a, char b, uchar c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, uchar c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);
|
||||
@@ -11610,60 +11454,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);
|
||||
short __ovld __cnfn select(short a, short b, uchar c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, uchar c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, uchar2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uchar2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, uchar3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uchar3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, uchar4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uchar4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, uchar8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uchar8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, uchar16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uchar16 c);
|
||||
int __ovld __cnfn select(int a, int b, uchar c);
|
||||
uint __ovld __cnfn select(uint a, uint b, uchar c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, uchar2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, uchar2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, uchar3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, uchar3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, uchar4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, uchar4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, uchar8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, uchar8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, uchar16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, uchar16 c);
|
||||
long __ovld __cnfn select(long a, long b, uchar c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, uchar c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, uchar2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uchar2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, uchar3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uchar3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, uchar4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uchar4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, uchar8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uchar8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, uchar16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uchar16 c);
|
||||
float __ovld __cnfn select(float a, float b, uchar c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, uchar2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, uchar3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, uchar4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, uchar8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, uchar16 c);
|
||||
char __ovld __cnfn select(char a, char b, ushort c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, ushort c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, ushort2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ushort2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, ushort3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ushort3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, ushort4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ushort4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, ushort8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ushort8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, ushort16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ushort16 c);
|
||||
|
||||
short __ovld __cnfn select(short a, short b, ushort c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, ushort c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);
|
||||
@@ -11676,60 +11467,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);
|
||||
int __ovld __cnfn select(int a, int b, ushort c);
|
||||
uint __ovld __cnfn select(uint a, uint b, ushort c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, ushort2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, ushort2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, ushort3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, ushort3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, ushort4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, ushort4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, ushort8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, ushort8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, ushort16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, ushort16 c);
|
||||
long __ovld __cnfn select(long a, long b, ushort c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, ushort c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, ushort2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ushort2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, ushort3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ushort3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, ushort4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ushort4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, ushort8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ushort8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, ushort16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ushort16 c);
|
||||
float __ovld __cnfn select(float a, float b, ushort c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, ushort2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, ushort3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, ushort4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, ushort8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, ushort16 c);
|
||||
char __ovld __cnfn select(char a, char b, uint c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, uint c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, uint2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uint2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, uint3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uint3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, uint4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uint4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, uint8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uint8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, uint16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uint16 c);
|
||||
short __ovld __cnfn select(short a, short b, uint c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, uint c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, uint2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uint2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, uint3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uint3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, uint4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uint4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, uint8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uint8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, uint16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uint16 c);
|
||||
|
||||
int __ovld __cnfn select(int a, int b, uint c);
|
||||
uint __ovld __cnfn select(uint a, uint b, uint c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);
|
||||
@@ -11742,60 +11480,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);
|
||||
long __ovld __cnfn select(long a, long b, uint c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, uint c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, uint2 c);
|
||||
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uint2 c);
|
||||
long3 __ovld __cnfn select(long3 a, long3 b, uint3 c);
|
||||
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uint3 c);
|
||||
long4 __ovld __cnfn select(long4 a, long4 b, uint4 c);
|
||||
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uint4 c);
|
||||
long8 __ovld __cnfn select(long8 a, long8 b, uint8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uint8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, uint16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uint16 c);
|
||||
float __ovld __cnfn select(float a, float b, uint c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);
|
||||
char __ovld __cnfn select(char a, char b, ulong c);
|
||||
uchar __ovld __cnfn select(uchar a, uchar b, ulong c);
|
||||
char2 __ovld __cnfn select(char2 a, char2 b, ulong2 c);
|
||||
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ulong2 c);
|
||||
char3 __ovld __cnfn select(char3 a, char3 b, ulong3 c);
|
||||
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ulong3 c);
|
||||
char4 __ovld __cnfn select(char4 a, char4 b, ulong4 c);
|
||||
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ulong4 c);
|
||||
char8 __ovld __cnfn select(char8 a, char8 b, ulong8 c);
|
||||
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ulong8 c);
|
||||
char16 __ovld __cnfn select(char16 a, char16 b, ulong16 c);
|
||||
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ulong16 c);
|
||||
short __ovld __cnfn select(short a, short b, ulong c);
|
||||
ushort __ovld __cnfn select(ushort a, ushort b, ulong c);
|
||||
short2 __ovld __cnfn select(short2 a, short2 b, ulong2 c);
|
||||
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ulong2 c);
|
||||
short3 __ovld __cnfn select(short3 a, short3 b, ulong3 c);
|
||||
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ulong3 c);
|
||||
short4 __ovld __cnfn select(short4 a, short4 b, ulong4 c);
|
||||
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ulong4 c);
|
||||
short8 __ovld __cnfn select(short8 a, short8 b, ulong8 c);
|
||||
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ulong8 c);
|
||||
short16 __ovld __cnfn select(short16 a, short16 b, ulong16 c);
|
||||
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ulong16 c);
|
||||
int __ovld __cnfn select(int a, int b, ulong c);
|
||||
uint __ovld __cnfn select(uint a, uint b, ulong c);
|
||||
int2 __ovld __cnfn select(int2 a, int2 b, ulong2 c);
|
||||
uint2 __ovld __cnfn select(uint2 a, uint2 b, ulong2 c);
|
||||
int3 __ovld __cnfn select(int3 a, int3 b, ulong3 c);
|
||||
uint3 __ovld __cnfn select(uint3 a, uint3 b, ulong3 c);
|
||||
int4 __ovld __cnfn select(int4 a, int4 b, ulong4 c);
|
||||
uint4 __ovld __cnfn select(uint4 a, uint4 b, ulong4 c);
|
||||
int8 __ovld __cnfn select(int8 a, int8 b, ulong8 c);
|
||||
uint8 __ovld __cnfn select(uint8 a, uint8 b, ulong8 c);
|
||||
int16 __ovld __cnfn select(int16 a, int16 b, ulong16 c);
|
||||
uint16 __ovld __cnfn select(uint16 a, uint16 b, ulong16 c);
|
||||
|
||||
long __ovld __cnfn select(long a, long b, ulong c);
|
||||
ulong __ovld __cnfn select(ulong a, ulong b, ulong c);
|
||||
long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);
|
||||
@@ -11808,12 +11499,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);
|
||||
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);
|
||||
long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);
|
||||
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);
|
||||
float __ovld __cnfn select(float a, float b, ulong c);
|
||||
float2 __ovld __cnfn select(float2 a, float2 b, ulong2 c);
|
||||
float3 __ovld __cnfn select(float3 a, float3 b, ulong3 c);
|
||||
float4 __ovld __cnfn select(float4 a, float4 b, ulong4 c);
|
||||
float8 __ovld __cnfn select(float8 a, float8 b, ulong8 c);
|
||||
float16 __ovld __cnfn select(float16 a, float16 b, ulong16 c);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
double __ovld __cnfn select(double a, double b, long c);
|
||||
double2 __ovld __cnfn select(double2 a, double2 b, long2 c);
|
||||
@@ -11854,7 +11540,7 @@ half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);
|
||||
*
|
||||
* vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).
|
||||
*
|
||||
* The address computed as (p + (offset * n)) must be
|
||||
* The address computed as (p + (offset * n)) must be
|
||||
* 8-bit aligned if gentype is char, uchar;
|
||||
* 16-bit aligned if gentype is short, ushort, half;
|
||||
* 32-bit aligned if gentype is int, uint, float;
|
||||
@@ -13141,13 +12827,14 @@ void __ovld __conv barrier(cl_mem_fence_flags flags);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
typedef enum memory_scope
|
||||
{
|
||||
memory_scope_work_item,
|
||||
memory_scope_work_group,
|
||||
memory_scope_device,
|
||||
memory_scope_all_svm_devices,
|
||||
memory_scope_sub_group
|
||||
typedef enum memory_scope {
|
||||
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
|
||||
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
|
||||
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
|
||||
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
|
||||
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
|
||||
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
|
||||
#endif
|
||||
} memory_scope;
|
||||
|
||||
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
|
||||
@@ -13175,7 +12862,7 @@ void __ovld mem_fence(cl_mem_fence_flags flags);
|
||||
* Read memory barrier that orders only
|
||||
* loads.
|
||||
* The flags argument specifies the memory
|
||||
* address space and can be set to to a
|
||||
* address space and can be set to a
|
||||
* combination of the following literal
|
||||
* values:
|
||||
* CLK_LOCAL_MEM_FENCE
|
||||
@@ -13187,7 +12874,7 @@ void __ovld read_mem_fence(cl_mem_fence_flags flags);
|
||||
* Write memory barrier that orders only
|
||||
* stores.
|
||||
* The flags argument specifies the memory
|
||||
* address space and can be set to to a
|
||||
* address space and can be set to a
|
||||
* combination of the following literal
|
||||
* values:
|
||||
* CLK_LOCAL_MEM_FENCE
|
||||
@@ -13201,7 +12888,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags);
|
||||
cl_mem_fence_flags __ovld get_fence(const void *ptr);
|
||||
cl_mem_fence_flags __ovld get_fence(void *ptr);
|
||||
|
||||
/**
|
||||
/**
|
||||
* Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions
|
||||
* and checked in Sema since they should be declared as
|
||||
* addr gentype* to_addr (gentype*);
|
||||
@@ -13952,11 +13639,11 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
|
||||
// enum values aligned with what clang uses in EmitAtomicExpr()
|
||||
typedef enum memory_order
|
||||
{
|
||||
memory_order_relaxed,
|
||||
memory_order_acquire,
|
||||
memory_order_release,
|
||||
memory_order_acq_rel,
|
||||
memory_order_seq_cst
|
||||
memory_order_relaxed = __ATOMIC_RELAXED,
|
||||
memory_order_acquire = __ATOMIC_ACQUIRE,
|
||||
memory_order_release = __ATOMIC_RELEASE,
|
||||
memory_order_acq_rel = __ATOMIC_ACQ_REL,
|
||||
memory_order_seq_cst = __ATOMIC_SEQ_CST
|
||||
} memory_order;
|
||||
|
||||
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
|
||||
@@ -14086,7 +13773,7 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera
|
||||
// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.
|
||||
// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
|
||||
|
||||
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
|
||||
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
|
||||
uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);
|
||||
uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
|
||||
uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
|
||||
@@ -14884,7 +14571,7 @@ int printf(__constant const char* st, ...);
|
||||
* only. The filter_mode specified in sampler
|
||||
* must be set to CLK_FILTER_NEAREST; otherwise
|
||||
* the values returned are undefined.
|
||||
|
||||
|
||||
* The read_image{f|i|ui} calls that take
|
||||
* integer coordinates must use a sampler with
|
||||
* normalized coordinates set to
|
||||
@@ -15734,8 +15421,8 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_dept
|
||||
#define CLK_DEPTH_STENCIL 0x10BE
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#define CLK_sRGB 0x10BF
|
||||
#define CLK_sRGBA 0x10C1
|
||||
#define CLK_sRGBx 0x10C0
|
||||
#define CLK_sRGBA 0x10C1
|
||||
#define CLK_sBGRA 0x10C2
|
||||
#define CLK_ABGR 0x10C3
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
@@ -16199,6 +15886,313 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
|
||||
|
||||
#endif //cl_khr_subgroups cl_intel_subgroups
|
||||
|
||||
#if defined(cl_intel_subgroups)
|
||||
// Intel-Specific Sub Group Functions
|
||||
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle( int x, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle( long x, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );
|
||||
|
||||
float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );
|
||||
|
||||
float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );
|
||||
|
||||
float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );
|
||||
float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );
|
||||
float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );
|
||||
float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );
|
||||
float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );
|
||||
float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );
|
||||
|
||||
int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );
|
||||
int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );
|
||||
int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );
|
||||
int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );
|
||||
int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );
|
||||
int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );
|
||||
uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );
|
||||
uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );
|
||||
uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );
|
||||
uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );
|
||||
uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
|
||||
|
||||
long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
|
||||
ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
|
||||
uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
|
||||
uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
|
||||
uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read( const __global uint* p );
|
||||
uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );
|
||||
uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );
|
||||
uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
|
||||
void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
|
||||
void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
|
||||
void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
|
||||
void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
|
||||
void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
|
||||
void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
half __ovld __conv intel_sub_group_shuffle( half x, uint c );
|
||||
half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );
|
||||
half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );
|
||||
half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );
|
||||
#endif
|
||||
|
||||
#if defined(cl_khr_fp64)
|
||||
double __ovld __conv intel_sub_group_shuffle( double x, uint c );
|
||||
double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );
|
||||
double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );
|
||||
double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );
|
||||
#endif
|
||||
|
||||
#endif //cl_intel_subgroups
|
||||
|
||||
#if defined(cl_intel_subgroups_short)
|
||||
short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );
|
||||
short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );
|
||||
short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );
|
||||
short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );
|
||||
short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );
|
||||
ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );
|
||||
ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );
|
||||
ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );
|
||||
ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle( short x, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );
|
||||
short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );
|
||||
short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );
|
||||
short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );
|
||||
short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );
|
||||
short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );
|
||||
ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );
|
||||
ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );
|
||||
ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );
|
||||
ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );
|
||||
ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );
|
||||
|
||||
short __ovld __conv intel_sub_group_reduce_add( short x );
|
||||
ushort __ovld __conv intel_sub_group_reduce_add( ushort x );
|
||||
short __ovld __conv intel_sub_group_reduce_min( short x );
|
||||
ushort __ovld __conv intel_sub_group_reduce_min( ushort x );
|
||||
short __ovld __conv intel_sub_group_reduce_max( short x );
|
||||
ushort __ovld __conv intel_sub_group_reduce_max( ushort x );
|
||||
|
||||
short __ovld __conv intel_sub_group_scan_exclusive_add( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_exclusive_min( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_exclusive_max( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );
|
||||
|
||||
short __ovld __conv intel_sub_group_scan_inclusive_add( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_inclusive_min( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );
|
||||
short __ovld __conv intel_sub_group_scan_inclusive_max( short x );
|
||||
ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
|
||||
|
||||
ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);
|
||||
void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);
|
||||
void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );
|
||||
void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
|
||||
#endif // cl_intel_subgroups_short
|
||||
|
||||
#ifdef cl_amd_media_ops
|
||||
uint __ovld amd_bitalign(uint a, uint b, uint c);
|
||||
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);
|
||||
|
||||
50
c_headers/pconfigintrin.h
Normal file
50
c_headers/pconfigintrin.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*===---- pconfigintrin.h - X86 platform configuration ---------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <pconfigintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __PCONFIGINTRIN_H
|
||||
#define __PCONFIGINTRIN_H
|
||||
|
||||
#define __PCONFIG_KEY_PROGRAM 0x00000001
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("pconfig")))
|
||||
|
||||
static __inline unsigned int __DEFAULT_FN_ATTRS
|
||||
_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
|
||||
{
|
||||
unsigned int __result;
|
||||
__asm__ ("pconfig"
|
||||
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
|
||||
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
|
||||
: "cc");
|
||||
return __result;
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===------------- pkuintrin.h - PKU intrinsics ------------------===
|
||||
/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
@@ -40,7 +40,7 @@ _rdpkru_u32(void)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_wrpkru(unsigned int __val)
|
||||
{
|
||||
return __builtin_ia32_wrpkru(__val);
|
||||
__builtin_ia32_wrpkru(__val);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
@@ -28,9 +28,9 @@
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse3")))
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
|
||||
|
||||
/// \brief Loads data from an unaligned memory location to elements in a 128-bit
|
||||
/// Loads data from an unaligned memory location to elements in a 128-bit
|
||||
/// vector.
|
||||
///
|
||||
/// If the address of the data is not 16-byte aligned, the instruction may
|
||||
@@ -50,7 +50,7 @@ _mm_lddqu_si128(__m128i const *__p)
|
||||
return (__m128i)__builtin_ia32_lddqu((char const *)__p);
|
||||
}
|
||||
|
||||
/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
|
||||
/// Adds the even-indexed values and subtracts the odd-indexed values of
|
||||
/// two 128-bit vectors of [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -69,7 +69,7 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
|
||||
return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in two
|
||||
/// Horizontally adds the adjacent pairs of values contained in two
|
||||
/// 128-bit vectors of [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -92,7 +92,7 @@ _mm_hadd_ps(__m128 __a, __m128 __b)
|
||||
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in two
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in two
|
||||
/// 128-bit vectors of [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -115,8 +115,8 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
|
||||
return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
|
||||
}
|
||||
|
||||
/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
|
||||
/// vector of [4 x float] to float values stored in a 128-bit vector of
|
||||
/// Moves and duplicates odd-indexed values from a 128-bit vector
|
||||
/// of [4 x float] to float values stored in a 128-bit vector of
|
||||
/// [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -137,7 +137,7 @@ _mm_movehdup_ps(__m128 __a)
|
||||
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
|
||||
}
|
||||
|
||||
/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
|
||||
/// Duplicates even-indexed values from a 128-bit vector of
|
||||
/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -158,7 +158,7 @@ _mm_moveldup_ps(__m128 __a)
|
||||
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
|
||||
}
|
||||
|
||||
/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
|
||||
/// Adds the even-indexed values and subtracts the odd-indexed values of
|
||||
/// two 128-bit vectors of [2 x double].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -177,7 +177,7 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
|
||||
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the pairs of values contained in two 128-bit
|
||||
/// Horizontally adds the pairs of values contained in two 128-bit
|
||||
/// vectors of [2 x double].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -200,7 +200,7 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
|
||||
return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the pairs of values contained in two 128-bit
|
||||
/// Horizontally subtracts the pairs of values contained in two 128-bit
|
||||
/// vectors of [2 x double].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -223,13 +223,13 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
|
||||
return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
|
||||
}
|
||||
|
||||
/// \brief Moves and duplicates one double-precision value to double-precision
|
||||
/// Moves and duplicates one double-precision value to double-precision
|
||||
/// values stored in a 128-bit vector of [2 x double].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128d _mm_loaddup_pd(double const * dp);
|
||||
/// __m128d _mm_loaddup_pd(double const *dp);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
|
||||
@@ -240,7 +240,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
|
||||
/// duplicated values.
|
||||
#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
|
||||
|
||||
/// \brief Moves and duplicates the double-precision value in the lower bits of
|
||||
/// Moves and duplicates the double-precision value in the lower bits of
|
||||
/// a 128-bit vector of [2 x double] to double-precision values stored in a
|
||||
/// 128-bit vector of [2 x double].
|
||||
///
|
||||
@@ -259,7 +259,7 @@ _mm_movedup_pd(__m128d __a)
|
||||
return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
|
||||
}
|
||||
|
||||
/// \brief Establishes a linear address memory range to be monitored and puts
|
||||
/// Establishes a linear address memory range to be monitored and puts
|
||||
/// the processor in the monitor event pending state. Data stored in the
|
||||
/// monitored address range causes the processor to exit the pending state.
|
||||
///
|
||||
@@ -280,7 +280,7 @@ _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
|
||||
__builtin_ia32_monitor((void *)__p, __extensions, __hints);
|
||||
}
|
||||
|
||||
/// \brief Used with the MONITOR instruction to wait while the processor is in
|
||||
/// Used with the MONITOR instruction to wait while the processor is in
|
||||
/// the monitor event pending state. Data stored in the monitored address
|
||||
/// range causes the processor to exit the pending state.
|
||||
///
|
||||
|
||||
@@ -21,13 +21,13 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _POPCNTINTRIN_H
|
||||
#define _POPCNTINTRIN_H
|
||||
#ifndef __POPCNTINTRIN_H
|
||||
#define __POPCNTINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
|
||||
|
||||
/// \brief Counts the number of bits in the source operand having a value of 1.
|
||||
/// Counts the number of bits in the source operand having a value of 1.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -43,7 +43,7 @@ _mm_popcnt_u32(unsigned int __A)
|
||||
return __builtin_popcount(__A);
|
||||
}
|
||||
|
||||
/// \brief Counts the number of bits in the source operand having a value of 1.
|
||||
/// Counts the number of bits in the source operand having a value of 1.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -60,7 +60,7 @@ _popcnt32(int __A)
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// \brief Counts the number of bits in the source operand having a value of 1.
|
||||
/// Counts the number of bits in the source operand having a value of 1.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -76,7 +76,7 @@ _mm_popcnt_u64(unsigned long long __A)
|
||||
return __builtin_popcountll(__A);
|
||||
}
|
||||
|
||||
/// \brief Counts the number of bits in the source operand having a value of 1.
|
||||
/// Counts the number of bits in the source operand having a value of 1.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -95,4 +95,4 @@ _popcnt64(long long __A)
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* _POPCNTINTRIN_H */
|
||||
#endif /* __POPCNTINTRIN_H */
|
||||
|
||||
@@ -28,8 +28,7 @@
|
||||
#ifndef __PRFCHWINTRIN_H
|
||||
#define __PRFCHWINTRIN_H
|
||||
|
||||
#if defined(__PRFCHW__) || defined(__3dNOW__)
|
||||
/// \brief Loads a memory sequence containing the specified memory address into
|
||||
/// Loads a memory sequence containing the specified memory address into
|
||||
/// all data cache levels. The cache-coherency state is set to exclusive.
|
||||
/// Data can be read from and written to the cache line without additional
|
||||
/// delay.
|
||||
@@ -46,7 +45,7 @@ _m_prefetch(void *__P)
|
||||
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
|
||||
/// \brief Loads a memory sequence containing the specified memory address into
|
||||
/// Loads a memory sequence containing the specified memory address into
|
||||
/// the L1 data cache and sets the cache-coherency to modified. This
|
||||
/// provides a hint to the processor that the cache line will be modified.
|
||||
/// It is intended for use when the cache line will be written to shortly
|
||||
@@ -66,6 +65,5 @@ _m_prefetchw(void *__P)
|
||||
{
|
||||
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __PRFCHWINTRIN_H */
|
||||
|
||||
51
c_headers/ptwriteintrin.h
Normal file
51
c_headers/ptwriteintrin.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __PTWRITEINTRIN_H
|
||||
#define __PTWRITEINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("ptwrite")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_ptwrite32(unsigned int __value) {
|
||||
__builtin_ia32_ptwrite32(__value);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_ptwrite64(unsigned long long __value) {
|
||||
__builtin_ia32_ptwrite64(__value);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __PTWRITEINTRIN_H */
|
||||
@@ -21,7 +21,7 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __X86INTRIN_H
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
|
||||
70
c_headers/sgxintrin.h
Normal file
70
c_headers/sgxintrin.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <sgxintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __SGXINTRIN_H
|
||||
#define __SGXINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("sgx")))
|
||||
|
||||
static __inline unsigned int __DEFAULT_FN_ATTRS
|
||||
_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
|
||||
{
|
||||
unsigned int __result;
|
||||
__asm__ ("enclu"
|
||||
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
|
||||
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
|
||||
: "cc");
|
||||
return __result;
|
||||
}
|
||||
|
||||
static __inline unsigned int __DEFAULT_FN_ATTRS
|
||||
_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
|
||||
{
|
||||
unsigned int __result;
|
||||
__asm__ ("encls"
|
||||
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
|
||||
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
|
||||
: "cc");
|
||||
return __result;
|
||||
}
|
||||
|
||||
static __inline unsigned int __DEFAULT_FN_ATTRS
|
||||
_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
|
||||
{
|
||||
unsigned int __result;
|
||||
__asm__ ("enclv"
|
||||
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
|
||||
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
|
||||
: "cc");
|
||||
return __result;
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
@@ -29,10 +29,10 @@
|
||||
#define __SHAINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128)))
|
||||
|
||||
#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
|
||||
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); })
|
||||
#define _mm_sha1rnds4_epu32(V1, V2, M) \
|
||||
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
|
||||
|
||||
@@ -21,13 +21,13 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _SMMINTRIN_H
|
||||
#define _SMMINTRIN_H
|
||||
#ifndef __SMMINTRIN_H
|
||||
#define __SMMINTRIN_H
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128)))
|
||||
|
||||
/* SSE4 Rounding macros. */
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
@@ -46,7 +46,7 @@
|
||||
#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
|
||||
#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
/// \brief Rounds up each element of the 128-bit vector of [4 x float] to an
|
||||
/// Rounds up each element of the 128-bit vector of [4 x float] to an
|
||||
/// integer and returns the rounded values in a 128-bit vector of
|
||||
/// [4 x float].
|
||||
///
|
||||
@@ -63,7 +63,7 @@
|
||||
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
|
||||
#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)
|
||||
|
||||
/// \brief Rounds up each element of the 128-bit vector of [2 x double] to an
|
||||
/// Rounds up each element of the 128-bit vector of [2 x double] to an
|
||||
/// integer and returns the rounded values in a 128-bit vector of
|
||||
/// [2 x double].
|
||||
///
|
||||
@@ -80,7 +80,7 @@
|
||||
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
|
||||
#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)
|
||||
|
||||
/// \brief Copies three upper elements of the first 128-bit vector operand to
|
||||
/// Copies three upper elements of the first 128-bit vector operand to
|
||||
/// the corresponding three upper elements of the 128-bit result vector of
|
||||
/// [4 x float]. Rounds up the lowest element of the second 128-bit vector
|
||||
/// operand to an integer and copies it to the lowest element of the 128-bit
|
||||
@@ -105,7 +105,7 @@
|
||||
/// values.
|
||||
#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
|
||||
|
||||
/// \brief Copies the upper element of the first 128-bit vector operand to the
|
||||
/// Copies the upper element of the first 128-bit vector operand to the
|
||||
/// corresponding upper element of the 128-bit result vector of [2 x double].
|
||||
/// Rounds up the lower element of the second 128-bit vector operand to an
|
||||
/// integer and copies it to the lower element of the 128-bit result vector
|
||||
@@ -130,7 +130,7 @@
|
||||
/// values.
|
||||
#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
|
||||
|
||||
/// \brief Rounds down each element of the 128-bit vector of [4 x float] to an
|
||||
/// Rounds down each element of the 128-bit vector of [4 x float] to an
|
||||
/// an integer and returns the rounded values in a 128-bit vector of
|
||||
/// [4 x float].
|
||||
///
|
||||
@@ -147,7 +147,7 @@
|
||||
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
|
||||
#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)
|
||||
|
||||
/// \brief Rounds down each element of the 128-bit vector of [2 x double] to an
|
||||
/// Rounds down each element of the 128-bit vector of [2 x double] to an
|
||||
/// integer and returns the rounded values in a 128-bit vector of
|
||||
/// [2 x double].
|
||||
///
|
||||
@@ -164,7 +164,7 @@
|
||||
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
|
||||
#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)
|
||||
|
||||
/// \brief Copies three upper elements of the first 128-bit vector operand to
|
||||
/// Copies three upper elements of the first 128-bit vector operand to
|
||||
/// the corresponding three upper elements of the 128-bit result vector of
|
||||
/// [4 x float]. Rounds down the lowest element of the second 128-bit vector
|
||||
/// operand to an integer and copies it to the lowest element of the 128-bit
|
||||
@@ -189,7 +189,7 @@
|
||||
/// values.
|
||||
#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
|
||||
|
||||
/// \brief Copies the upper element of the first 128-bit vector operand to the
|
||||
/// Copies the upper element of the first 128-bit vector operand to the
|
||||
/// corresponding upper element of the 128-bit result vector of [2 x double].
|
||||
/// Rounds down the lower element of the second 128-bit vector operand to an
|
||||
/// integer and copies it to the lower element of the 128-bit result vector
|
||||
@@ -214,7 +214,7 @@
|
||||
/// values.
|
||||
#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
|
||||
|
||||
/// \brief Rounds each element of the 128-bit vector of [4 x float] to an
|
||||
/// Rounds each element of the 128-bit vector of [4 x float] to an
|
||||
/// integer value according to the rounding control specified by the second
|
||||
/// argument and returns the rounded values in a 128-bit vector of
|
||||
/// [4 x float].
|
||||
@@ -244,10 +244,10 @@
|
||||
/// 10: Upward (toward positive infinity) \n
|
||||
/// 11: Truncated
|
||||
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
|
||||
#define _mm_round_ps(X, M) __extension__ ({ \
|
||||
(__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
|
||||
#define _mm_round_ps(X, M) \
|
||||
(__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
|
||||
|
||||
/// \brief Copies three upper elements of the first 128-bit vector operand to
|
||||
/// Copies three upper elements of the first 128-bit vector operand to
|
||||
/// the corresponding three upper elements of the 128-bit result vector of
|
||||
/// [4 x float]. Rounds the lowest element of the second 128-bit vector
|
||||
/// operand to an integer value according to the rounding control specified
|
||||
@@ -285,11 +285,11 @@
|
||||
/// 11: Truncated
|
||||
/// \returns A 128-bit vector of [4 x float] containing the copied and rounded
|
||||
/// values.
|
||||
#define _mm_round_ss(X, Y, M) __extension__ ({ \
|
||||
#define _mm_round_ss(X, Y, M) \
|
||||
(__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
|
||||
(__v4sf)(__m128)(Y), (M)); })
|
||||
(__v4sf)(__m128)(Y), (M))
|
||||
|
||||
/// \brief Rounds each element of the 128-bit vector of [2 x double] to an
|
||||
/// Rounds each element of the 128-bit vector of [2 x double] to an
|
||||
/// integer value according to the rounding control specified by the second
|
||||
/// argument and returns the rounded values in a 128-bit vector of
|
||||
/// [2 x double].
|
||||
@@ -319,10 +319,10 @@
|
||||
/// 10: Upward (toward positive infinity) \n
|
||||
/// 11: Truncated
|
||||
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
|
||||
#define _mm_round_pd(X, M) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
|
||||
#define _mm_round_pd(X, M) \
|
||||
(__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))
|
||||
|
||||
/// \brief Copies the upper element of the first 128-bit vector operand to the
|
||||
/// Copies the upper element of the first 128-bit vector operand to the
|
||||
/// corresponding upper element of the 128-bit result vector of [2 x double].
|
||||
/// Rounds the lower element of the second 128-bit vector operand to an
|
||||
/// integer value according to the rounding control specified by the third
|
||||
@@ -360,12 +360,12 @@
|
||||
/// 11: Truncated
|
||||
/// \returns A 128-bit vector of [2 x double] containing the copied and rounded
|
||||
/// values.
|
||||
#define _mm_round_sd(X, Y, M) __extension__ ({ \
|
||||
#define _mm_round_sd(X, Y, M) \
|
||||
(__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), (M)); })
|
||||
(__v2df)(__m128d)(Y), (M))
|
||||
|
||||
/* SSE4 Packed Blending Intrinsics. */
|
||||
/// \brief Returns a 128-bit vector of [2 x double] where the values are
|
||||
/// Returns a 128-bit vector of [2 x double] where the values are
|
||||
/// selected from either the first or second operand as specified by the
|
||||
/// third operand, the control mask.
|
||||
///
|
||||
@@ -389,13 +389,11 @@
|
||||
/// When a mask bit is 1, the corresponding 64-bit element in operand \a V2
|
||||
/// is copied to the same position in the result.
|
||||
/// \returns A 128-bit vector of [2 x double] containing the copied values.
|
||||
#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
|
||||
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
|
||||
(__v2df)(__m128d)(V2), \
|
||||
(((M) & 0x01) ? 2 : 0), \
|
||||
(((M) & 0x02) ? 3 : 1)); })
|
||||
#define _mm_blend_pd(V1, V2, M) \
|
||||
(__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
|
||||
(__v2df)(__m128d)(V2), (int)(M))
|
||||
|
||||
/// \brief Returns a 128-bit vector of [4 x float] where the values are selected
|
||||
/// Returns a 128-bit vector of [4 x float] where the values are selected
|
||||
/// from either the first or second operand as specified by the third
|
||||
/// operand, the control mask.
|
||||
///
|
||||
@@ -419,14 +417,11 @@
|
||||
/// When a mask bit is 1, the corresponding 32-bit element in operand \a V2
|
||||
/// is copied to the same position in the result.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the copied values.
|
||||
#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
|
||||
(__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
|
||||
(((M) & 0x01) ? 4 : 0), \
|
||||
(((M) & 0x02) ? 5 : 1), \
|
||||
(((M) & 0x04) ? 6 : 2), \
|
||||
(((M) & 0x08) ? 7 : 3)); })
|
||||
#define _mm_blend_ps(V1, V2, M) \
|
||||
(__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
|
||||
(__v4sf)(__m128)(V2), (int)(M))
|
||||
|
||||
/// \brief Returns a 128-bit vector of [2 x double] where the values are
|
||||
/// Returns a 128-bit vector of [2 x double] where the values are
|
||||
/// selected from either the first or second operand as specified by the
|
||||
/// third operand, the control mask.
|
||||
///
|
||||
@@ -453,7 +448,7 @@ _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
|
||||
(__v2df)__M);
|
||||
}
|
||||
|
||||
/// \brief Returns a 128-bit vector of [4 x float] where the values are
|
||||
/// Returns a 128-bit vector of [4 x float] where the values are
|
||||
/// selected from either the first or second operand as specified by the
|
||||
/// third operand, the control mask.
|
||||
///
|
||||
@@ -480,7 +475,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
|
||||
(__v4sf)__M);
|
||||
}
|
||||
|
||||
/// \brief Returns a 128-bit vector of [16 x i8] where the values are selected
|
||||
/// Returns a 128-bit vector of [16 x i8] where the values are selected
|
||||
/// from either of the first or second operand as specified by the third
|
||||
/// operand, the control mask.
|
||||
///
|
||||
@@ -493,7 +488,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
|
||||
/// \param __V2
|
||||
/// A 128-bit vector of [16 x i8].
|
||||
/// \param __M
|
||||
/// A 128-bit vector operand, with mask bits 127, 119, 111 ... 7 specifying
|
||||
/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying
|
||||
/// how the values are to be copied. The position of the mask bit corresponds
|
||||
/// to the most significant bit of a copied value. When a mask bit is 0, the
|
||||
/// corresponding 8-bit element in operand \a __V1 is copied to the same
|
||||
@@ -507,7 +502,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
|
||||
(__v16qi)__M);
|
||||
}
|
||||
|
||||
/// \brief Returns a 128-bit vector of [8 x i16] where the values are selected
|
||||
/// Returns a 128-bit vector of [8 x i16] where the values are selected
|
||||
/// from either of the first or second operand as specified by the third
|
||||
/// operand, the control mask.
|
||||
///
|
||||
@@ -531,20 +526,12 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
|
||||
/// When a mask bit is 1, the corresponding 16-bit element in operand \a V2
|
||||
/// is copied to the same position in the result.
|
||||
/// \returns A 128-bit vector of [8 x i16] containing the copied values.
|
||||
#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
|
||||
(__v8hi)(__m128i)(V2), \
|
||||
(((M) & 0x01) ? 8 : 0), \
|
||||
(((M) & 0x02) ? 9 : 1), \
|
||||
(((M) & 0x04) ? 10 : 2), \
|
||||
(((M) & 0x08) ? 11 : 3), \
|
||||
(((M) & 0x10) ? 12 : 4), \
|
||||
(((M) & 0x20) ? 13 : 5), \
|
||||
(((M) & 0x40) ? 14 : 6), \
|
||||
(((M) & 0x80) ? 15 : 7)); })
|
||||
#define _mm_blend_epi16(V1, V2, M) \
|
||||
(__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
|
||||
(__v8hi)(__m128i)(V2), (int)(M))
|
||||
|
||||
/* SSE4 Dword Multiply Instructions. */
|
||||
/// \brief Multiples corresponding elements of two 128-bit vectors of [4 x i32]
|
||||
/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
|
||||
/// and returns the lower 32 bits of the each product in a 128-bit vector of
|
||||
/// [4 x i32].
|
||||
///
|
||||
@@ -563,7 +550,7 @@ _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) ((__v4su)__V1 * (__v4su)__V2);
|
||||
}
|
||||
|
||||
/// \brief Multiplies corresponding even-indexed elements of two 128-bit
|
||||
/// Multiplies corresponding even-indexed elements of two 128-bit
|
||||
/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]
|
||||
/// containing the products.
|
||||
///
|
||||
@@ -584,7 +571,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
|
||||
}
|
||||
|
||||
/* SSE4 Floating Point Dot Product Instructions. */
|
||||
/// \brief Computes the dot product of the two 128-bit vectors of [4 x float]
|
||||
/// Computes the dot product of the two 128-bit vectors of [4 x float]
|
||||
/// and returns it in the elements of the 128-bit result vector of
|
||||
/// [4 x float].
|
||||
///
|
||||
@@ -616,11 +603,11 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
|
||||
/// each [4 x float] subvector. If a bit is set, the dot product is returned
|
||||
/// in the corresponding element; otherwise that element is set to zero.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the dot product.
|
||||
#define _mm_dp_ps(X, Y, M) __extension__ ({ \
|
||||
#define _mm_dp_ps(X, Y, M) \
|
||||
(__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
|
||||
(__v4sf)(__m128)(Y), (M)); })
|
||||
(__v4sf)(__m128)(Y), (M))
|
||||
|
||||
/// \brief Computes the dot product of the two 128-bit vectors of [2 x double]
|
||||
/// Computes the dot product of the two 128-bit vectors of [2 x double]
|
||||
/// and returns it in the elements of the 128-bit result vector of
|
||||
/// [2 x double].
|
||||
///
|
||||
@@ -648,15 +635,15 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
|
||||
/// input vectors are used as an input for dot product; otherwise that input
|
||||
/// is treated as zero. Bits [1:0] determine which elements of the result
|
||||
/// will receive a copy of the final dot product, with bit [0] corresponding
|
||||
/// to the lowest element and bit [3] corresponding to the highest element of
|
||||
/// to the lowest element and bit [1] corresponding to the highest element of
|
||||
/// each [2 x double] vector. If a bit is set, the dot product is returned in
|
||||
/// the corresponding element; otherwise that element is set to zero.
|
||||
#define _mm_dp_pd(X, Y, M) __extension__ ({\
|
||||
#define _mm_dp_pd(X, Y, M) \
|
||||
(__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), (M)); })
|
||||
(__v2df)(__m128d)(Y), (M))
|
||||
|
||||
/* SSE4 Streaming Load Hint Instruction. */
|
||||
/// \brief Loads integer values from a 128-bit aligned memory location to a
|
||||
/// Loads integer values from a 128-bit aligned memory location to a
|
||||
/// 128-bit integer vector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -675,7 +662,7 @@ _mm_stream_load_si128 (__m128i const *__V)
|
||||
}
|
||||
|
||||
/* SSE4 Packed Integer Min/Max Instructions. */
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser
|
||||
/// of the two values.
|
||||
///
|
||||
@@ -694,7 +681,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the
|
||||
/// greater value of the two.
|
||||
///
|
||||
@@ -713,7 +700,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser
|
||||
/// value of the two.
|
||||
///
|
||||
@@ -732,7 +719,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the
|
||||
/// greater value of the two.
|
||||
///
|
||||
@@ -751,7 +738,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser
|
||||
/// value of the two.
|
||||
///
|
||||
@@ -770,7 +757,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the
|
||||
/// greater value of the two.
|
||||
///
|
||||
@@ -789,7 +776,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser
|
||||
/// value of the two.
|
||||
///
|
||||
@@ -808,7 +795,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2)
|
||||
return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
|
||||
}
|
||||
|
||||
/// \brief Compares the corresponding elements of two 128-bit vectors of
|
||||
/// Compares the corresponding elements of two 128-bit vectors of
|
||||
/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the
|
||||
/// greater value of the two.
|
||||
///
|
||||
@@ -828,7 +815,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
}
|
||||
|
||||
/* SSE4 Insertion and Extraction from XMM Register Instructions. */
|
||||
/// \brief Takes the first argument \a X and inserts an element from the second
|
||||
/// Takes the first argument \a X and inserts an element from the second
|
||||
/// argument \a Y as selected by the third argument \a N. That result then
|
||||
/// has elements zeroed out also as selected by the third argument \a N. The
|
||||
/// resulting 128-bit vector of [4 x float] is then returned.
|
||||
@@ -866,11 +853,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
|
||||
/// Bits[3:0]: If any of these bits are set, the corresponding result
|
||||
/// element is cleared.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the copied single-
|
||||
/// precision floating point elements from the operands.
|
||||
/// \returns A 128-bit vector of [4 x float] containing the copied
|
||||
/// single-precision floating point elements from the operands.
|
||||
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
|
||||
|
||||
/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
|
||||
/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
|
||||
/// returns it, using the immediate value parameter \a N as a selector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -893,15 +880,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 11: Bits [127:96] of parameter \a X are returned.
|
||||
/// \returns A 32-bit integer containing the extracted 32 bits of float data.
|
||||
#define _mm_extract_ps(X, N) (__extension__ \
|
||||
({ union { int __i; float __f; } __t; \
|
||||
__v4sf __a = (__v4sf)(__m128)(X); \
|
||||
__t.__f = __a[(N) & 3]; \
|
||||
__t.__i;}))
|
||||
({ union { int __i; float __f; } __t; \
|
||||
__t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
|
||||
__t.__i;}))
|
||||
|
||||
/* Miscellaneous insert and extract macros. */
|
||||
/* Extract a single-precision float from X at index N into D. */
|
||||
#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
|
||||
(D) = __a[N]; }))
|
||||
#define _MM_EXTRACT_FLOAT(D, X, N) \
|
||||
{ (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
|
||||
|
||||
/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
|
||||
an index suitable for _mm_insert_ps. */
|
||||
@@ -912,7 +898,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
_MM_MK_INSERTPS_NDX((N), 0, 0x0e))
|
||||
|
||||
/* Insert int into packed integer array at index. */
|
||||
/// \brief Constructs a 128-bit vector of [16 x i8] by first making a copy of
|
||||
/// Constructs a 128-bit vector of [16 x i8] by first making a copy of
|
||||
/// the 128-bit integer vector parameter, and then inserting the lower 8 bits
|
||||
/// of an integer parameter \a I into an offset specified by the immediate
|
||||
/// value parameter \a N.
|
||||
@@ -952,12 +938,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 1110: Bits [119:112] of the result are used for insertion. \n
|
||||
/// 1111: Bits [127:120] of the result are used for insertion.
|
||||
/// \returns A 128-bit integer vector containing the constructed values.
|
||||
#define _mm_insert_epi8(X, I, N) (__extension__ \
|
||||
({ __v16qi __a = (__v16qi)(__m128i)(X); \
|
||||
__a[(N) & 15] = (I); \
|
||||
(__m128i)__a;}))
|
||||
#define _mm_insert_epi8(X, I, N) \
|
||||
(__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
|
||||
(int)(I), (int)(N))
|
||||
|
||||
/// \brief Constructs a 128-bit vector of [4 x i32] by first making a copy of
|
||||
/// Constructs a 128-bit vector of [4 x i32] by first making a copy of
|
||||
/// the 128-bit integer vector parameter, and then inserting the 32-bit
|
||||
/// integer parameter \a I at the offset specified by the immediate value
|
||||
/// parameter \a N.
|
||||
@@ -985,13 +970,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 10: Bits [95:64] of the result are used for insertion. \n
|
||||
/// 11: Bits [127:96] of the result are used for insertion.
|
||||
/// \returns A 128-bit integer vector containing the constructed values.
|
||||
#define _mm_insert_epi32(X, I, N) (__extension__ \
|
||||
({ __v4si __a = (__v4si)(__m128i)(X); \
|
||||
__a[(N) & 3] = (I); \
|
||||
(__m128i)__a;}))
|
||||
#define _mm_insert_epi32(X, I, N) \
|
||||
(__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
|
||||
(int)(I), (int)(N))
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// \brief Constructs a 128-bit vector of [2 x i64] by first making a copy of
|
||||
/// Constructs a 128-bit vector of [2 x i64] by first making a copy of
|
||||
/// the 128-bit integer vector parameter, and then inserting the 64-bit
|
||||
/// integer parameter \a I, using the immediate value parameter \a N as an
|
||||
/// insertion location selector.
|
||||
@@ -1017,16 +1001,15 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 0: Bits [63:0] of the result are used for insertion. \n
|
||||
/// 1: Bits [127:64] of the result are used for insertion. \n
|
||||
/// \returns A 128-bit integer vector containing the constructed values.
|
||||
#define _mm_insert_epi64(X, I, N) (__extension__ \
|
||||
({ __v2di __a = (__v2di)(__m128i)(X); \
|
||||
__a[(N) & 1] = (I); \
|
||||
(__m128i)__a;}))
|
||||
#define _mm_insert_epi64(X, I, N) \
|
||||
(__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
|
||||
(long long)(I), (int)(N))
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
/* Extract int from packed integer array at index. This returns the element
|
||||
* as a zero extended value, so it is unsigned.
|
||||
*/
|
||||
/// \brief Extracts an 8-bit element from the 128-bit integer vector of
|
||||
/// Extracts an 8-bit element from the 128-bit integer vector of
|
||||
/// [16 x i8], using the immediate value parameter \a N as a selector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1061,11 +1044,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// \returns An unsigned integer, whose lower 8 bits are selected from the
|
||||
/// 128-bit integer vector parameter and the remaining bits are assigned
|
||||
/// zeros.
|
||||
#define _mm_extract_epi8(X, N) (__extension__ \
|
||||
({ __v16qi __a = (__v16qi)(__m128i)(X); \
|
||||
(int)(unsigned char) __a[(N) & 15];}))
|
||||
#define _mm_extract_epi8(X, N) \
|
||||
(int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
|
||||
(int)(N))
|
||||
|
||||
/// \brief Extracts a 32-bit element from the 128-bit integer vector of
|
||||
/// Extracts a 32-bit element from the 128-bit integer vector of
|
||||
/// [4 x i32], using the immediate value parameter \a N as a selector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1087,12 +1070,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 11: Bits [127:96] of the parameter \a X are exracted.
|
||||
/// \returns An integer, whose lower 32 bits are selected from the 128-bit
|
||||
/// integer vector parameter and the remaining bits are assigned zeros.
|
||||
#define _mm_extract_epi32(X, N) (__extension__ \
|
||||
({ __v4si __a = (__v4si)(__m128i)(X); \
|
||||
(int)__a[(N) & 3];}))
|
||||
#define _mm_extract_epi32(X, N) \
|
||||
(int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// \brief Extracts a 64-bit element from the 128-bit integer vector of
|
||||
/// Extracts a 64-bit element from the 128-bit integer vector of
|
||||
/// [2 x i64], using the immediate value parameter \a N as a selector.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1111,13 +1093,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
/// 0: Bits [63:0] are returned. \n
|
||||
/// 1: Bits [127:64] are returned. \n
|
||||
/// \returns A 64-bit integer.
|
||||
#define _mm_extract_epi64(X, N) (__extension__ \
|
||||
({ __v2di __a = (__v2di)(__m128i)(X); \
|
||||
(long long)__a[(N) & 1];}))
|
||||
#define _mm_extract_epi64(X, N) \
|
||||
(long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
|
||||
#endif /* __x86_64 */
|
||||
|
||||
/* SSE4 128-bit Packed Integer Comparisons. */
|
||||
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// zeros.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1135,7 +1116,7 @@ _mm_testz_si128(__m128i __M, __m128i __V)
|
||||
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// ones.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1153,7 +1134,7 @@ _mm_testc_si128(__m128i __M, __m128i __V)
|
||||
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/// \brief Tests whether the specified bits in a 128-bit integer vector are
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are
|
||||
/// neither all zeros nor all ones.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1172,7 +1153,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
|
||||
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// ones.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1189,7 +1170,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
|
||||
/// otherwise.
|
||||
#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
|
||||
|
||||
/// \brief Tests whether the specified bits in a 128-bit integer vector are
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are
|
||||
/// neither all zeros nor all ones.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1208,7 +1189,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
|
||||
/// FALSE otherwise.
|
||||
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
|
||||
|
||||
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
/// zeros.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1227,7 +1208,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
|
||||
#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
|
||||
|
||||
/* SSE4 64-bit Packed Integer Comparisons. */
|
||||
/// \brief Compares each of the corresponding 64-bit values of the 128-bit
|
||||
/// Compares each of the corresponding 64-bit values of the 128-bit
|
||||
/// integer vectors for equality.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1246,7 +1227,7 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
|
||||
}
|
||||
|
||||
/* SSE4 Packed Integer Sign-Extension. */
|
||||
/// \brief Sign-extends each of the lower eight 8-bit integer elements of a
|
||||
/// Sign-extends each of the lower eight 8-bit integer elements of a
|
||||
/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
|
||||
/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
|
||||
/// are unused.
|
||||
@@ -1267,7 +1248,7 @@ _mm_cvtepi8_epi16(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
|
||||
}
|
||||
|
||||
/// \brief Sign-extends each of the lower four 8-bit integer elements of a
|
||||
/// Sign-extends each of the lower four 8-bit integer elements of a
|
||||
/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
|
||||
/// 128-bit vector of [4 x i32]. The upper twelve elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1277,8 +1258,8 @@ _mm_cvtepi8_epi16(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign-
|
||||
/// extended to 32-bit values.
|
||||
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
|
||||
/// sign-extended to 32-bit values.
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepi8_epi32(__m128i __V)
|
||||
@@ -1288,7 +1269,7 @@ _mm_cvtepi8_epi32(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
|
||||
}
|
||||
|
||||
/// \brief Sign-extends each of the lower two 8-bit integer elements of a
|
||||
/// Sign-extends each of the lower two 8-bit integer elements of a
|
||||
/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
|
||||
/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1298,8 +1279,8 @@ _mm_cvtepi8_epi32(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign-
|
||||
/// extended to 64-bit values.
|
||||
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
|
||||
/// sign-extended to 64-bit values.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepi8_epi64(__m128i __V)
|
||||
@@ -1309,7 +1290,7 @@ _mm_cvtepi8_epi64(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
|
||||
}
|
||||
|
||||
/// \brief Sign-extends each of the lower four 16-bit integer elements of a
|
||||
/// Sign-extends each of the lower four 16-bit integer elements of a
|
||||
/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
|
||||
/// a 128-bit vector of [4 x i32]. The upper four elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1319,8 +1300,8 @@ _mm_cvtepi8_epi64(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign-
|
||||
/// extended to 32-bit values.
|
||||
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
|
||||
/// sign-extended to 32-bit values.
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepi16_epi32(__m128i __V)
|
||||
@@ -1328,7 +1309,7 @@ _mm_cvtepi16_epi32(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
|
||||
}
|
||||
|
||||
/// \brief Sign-extends each of the lower two 16-bit integer elements of a
|
||||
/// Sign-extends each of the lower two 16-bit integer elements of a
|
||||
/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
|
||||
/// a 128-bit vector of [2 x i64]. The upper six elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1338,8 +1319,8 @@ _mm_cvtepi16_epi32(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign-
|
||||
/// extended to 64-bit values.
|
||||
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
|
||||
/// sign-extended to 64-bit values.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepi16_epi64(__m128i __V)
|
||||
@@ -1347,7 +1328,7 @@ _mm_cvtepi16_epi64(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
|
||||
}
|
||||
|
||||
/// \brief Sign-extends each of the lower two 32-bit integer elements of a
|
||||
/// Sign-extends each of the lower two 32-bit integer elements of a
|
||||
/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
|
||||
/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
|
||||
/// are unused.
|
||||
@@ -1357,8 +1338,8 @@ _mm_cvtepi16_epi64(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign-
|
||||
/// extended to 64-bit values.
|
||||
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
|
||||
/// sign-extended to 64-bit values.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepi32_epi64(__m128i __V)
|
||||
@@ -1367,7 +1348,7 @@ _mm_cvtepi32_epi64(__m128i __V)
|
||||
}
|
||||
|
||||
/* SSE4 Packed Integer Zero-Extension. */
|
||||
/// \brief Zero-extends each of the lower eight 8-bit integer elements of a
|
||||
/// Zero-extends each of the lower eight 8-bit integer elements of a
|
||||
/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
|
||||
/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
|
||||
/// are unused.
|
||||
@@ -1377,8 +1358,8 @@ _mm_cvtepi32_epi64(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero-
|
||||
/// extended to 16-bit values.
|
||||
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
|
||||
/// zero-extended to 16-bit values.
|
||||
/// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepu8_epi16(__m128i __V)
|
||||
@@ -1386,7 +1367,7 @@ _mm_cvtepu8_epi16(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
|
||||
}
|
||||
|
||||
/// \brief Zero-extends each of the lower four 8-bit integer elements of a
|
||||
/// Zero-extends each of the lower four 8-bit integer elements of a
|
||||
/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
|
||||
/// 128-bit vector of [4 x i32]. The upper twelve elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1396,8 +1377,8 @@ _mm_cvtepu8_epi16(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero-
|
||||
/// extended to 32-bit values.
|
||||
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
|
||||
/// zero-extended to 32-bit values.
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepu8_epi32(__m128i __V)
|
||||
@@ -1405,7 +1386,7 @@ _mm_cvtepu8_epi32(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
|
||||
}
|
||||
|
||||
/// \brief Zero-extends each of the lower two 8-bit integer elements of a
|
||||
/// Zero-extends each of the lower two 8-bit integer elements of a
|
||||
/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
|
||||
/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1415,8 +1396,8 @@ _mm_cvtepu8_epi32(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero-
|
||||
/// extended to 64-bit values.
|
||||
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
|
||||
/// zero-extended to 64-bit values.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepu8_epi64(__m128i __V)
|
||||
@@ -1424,7 +1405,7 @@ _mm_cvtepu8_epi64(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
|
||||
}
|
||||
|
||||
/// \brief Zero-extends each of the lower four 16-bit integer elements of a
|
||||
/// Zero-extends each of the lower four 16-bit integer elements of a
|
||||
/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
|
||||
/// a 128-bit vector of [4 x i32]. The upper four elements of the input
|
||||
/// vector are unused.
|
||||
@@ -1434,8 +1415,8 @@ _mm_cvtepu8_epi64(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero-
|
||||
/// extended to 32-bit values.
|
||||
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
|
||||
/// zero-extended to 32-bit values.
|
||||
/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepu16_epi32(__m128i __V)
|
||||
@@ -1443,7 +1424,7 @@ _mm_cvtepu16_epi32(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
|
||||
}
|
||||
|
||||
/// \brief Zero-extends each of the lower two 16-bit integer elements of a
|
||||
/// Zero-extends each of the lower two 16-bit integer elements of a
|
||||
/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
|
||||
/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector
|
||||
/// are unused.
|
||||
@@ -1453,8 +1434,8 @@ _mm_cvtepu16_epi32(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero-
|
||||
/// extended to 64-bit values.
|
||||
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
|
||||
/// zero-extended to 64-bit values.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepu16_epi64(__m128i __V)
|
||||
@@ -1462,7 +1443,7 @@ _mm_cvtepu16_epi64(__m128i __V)
|
||||
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
|
||||
}
|
||||
|
||||
/// \brief Zero-extends each of the lower two 32-bit integer elements of a
|
||||
/// Zero-extends each of the lower two 32-bit integer elements of a
|
||||
/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
|
||||
/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
|
||||
/// are unused.
|
||||
@@ -1472,8 +1453,8 @@ _mm_cvtepu16_epi64(__m128i __V)
|
||||
/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
|
||||
///
|
||||
/// \param __V
|
||||
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero-
|
||||
/// extended to 64-bit values.
|
||||
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
|
||||
/// zero-extended to 64-bit values.
|
||||
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_cvtepu32_epi64(__m128i __V)
|
||||
@@ -1482,7 +1463,7 @@ _mm_cvtepu32_epi64(__m128i __V)
|
||||
}
|
||||
|
||||
/* SSE4 Pack with Unsigned Saturation. */
|
||||
/// \brief Converts 32-bit signed integers from both 128-bit integer vector
|
||||
/// Converts 32-bit signed integers from both 128-bit integer vector
|
||||
/// operands into 16-bit unsigned integers, and returns the packed result.
|
||||
/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
|
||||
/// 0x0000 are saturated to 0x0000.
|
||||
@@ -1511,7 +1492,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
|
||||
}
|
||||
|
||||
/* SSE4 Multiple Packed Sums of Absolute Difference. */
|
||||
/// \brief Subtracts 8-bit unsigned integer values and computes the absolute
|
||||
/// Subtracts 8-bit unsigned integer values and computes the absolute
|
||||
/// values of the differences to the corresponding bits in the destination.
|
||||
/// Then sums of the absolute differences are returned according to the bit
|
||||
/// fields in the immediate operand.
|
||||
@@ -1534,23 +1515,23 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
|
||||
/// \code
|
||||
/// // M2 represents bit 2 of the immediate operand
|
||||
/// // M10 represents bits [1:0] of the immediate operand
|
||||
/// i = M2 * 4
|
||||
/// j = M10 * 4
|
||||
/// i = M2 * 4;
|
||||
/// j = M10 * 4;
|
||||
/// for (k = 0; k < 8; k = k + 1) {
|
||||
/// d0 = abs(X[i + k + 0] - Y[j + 0])
|
||||
/// d1 = abs(X[i + k + 1] - Y[j + 1])
|
||||
/// d2 = abs(X[i + k + 2] - Y[j + 2])
|
||||
/// d3 = abs(X[i + k + 3] - Y[j + 3])
|
||||
/// r[k] = d0 + d1 + d2 + d3
|
||||
/// d0 = abs(X[i + k + 0] - Y[j + 0]);
|
||||
/// d1 = abs(X[i + k + 1] - Y[j + 1]);
|
||||
/// d2 = abs(X[i + k + 2] - Y[j + 2]);
|
||||
/// d3 = abs(X[i + k + 3] - Y[j + 3]);
|
||||
/// r[k] = d0 + d1 + d2 + d3;
|
||||
/// }
|
||||
/// \endcode
|
||||
/// \returns A 128-bit integer vector containing the sums of the sets of
|
||||
/// absolute differences between both operands.
|
||||
#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
|
||||
#define _mm_mpsadbw_epu8(X, Y, M) \
|
||||
(__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (M)); })
|
||||
(__v16qi)(__m128i)(Y), (M))
|
||||
|
||||
/// \brief Finds the minimum unsigned 16-bit element in the input 128-bit
|
||||
/// Finds the minimum unsigned 16-bit element in the input 128-bit
|
||||
/// vector of [8 x u16] and returns it and along with its index.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -1604,7 +1585,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
#define _SIDD_UNIT_MASK 0x40
|
||||
|
||||
/* SSE4.2 Packed Comparison Intrinsics. */
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns a 128-bit integer vector representing the result
|
||||
/// mask of the comparison.
|
||||
@@ -1660,7 +1641,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns an integer representing the result index of the
|
||||
/// comparison.
|
||||
@@ -1714,7 +1695,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns a 128-bit integer vector representing the result
|
||||
/// mask of the comparison.
|
||||
@@ -1775,7 +1756,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns an integer representing the result index of the
|
||||
/// comparison.
|
||||
@@ -1835,7 +1816,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)(M))
|
||||
|
||||
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
|
||||
/// string in \a B is the maximum, otherwise, returns 0.
|
||||
@@ -1885,7 +1866,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns
|
||||
/// 0.
|
||||
@@ -1934,7 +1915,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
|
||||
///
|
||||
@@ -1982,7 +1963,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the length of the string in \a A is less than
|
||||
/// the maximum, otherwise, returns 0.
|
||||
@@ -2032,7 +2013,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with implicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the length of the string in \a B is less than
|
||||
/// the maximum, otherwise, returns 0.
|
||||
@@ -2082,7 +2063,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
|
||||
/// string in \a B is the maximum, otherwise, returns 0.
|
||||
@@ -2137,7 +2118,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise,
|
||||
/// returns 0.
|
||||
@@ -2191,7 +2172,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
|
||||
///
|
||||
@@ -2244,7 +2225,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the length of the string in \a A is less than
|
||||
/// the maximum, otherwise, returns 0.
|
||||
@@ -2299,7 +2280,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(__v16qi)(__m128i)(B), (int)(LB), \
|
||||
(int)(M))
|
||||
|
||||
/// \brief Uses the immediate operand \a M to perform a comparison of string
|
||||
/// Uses the immediate operand \a M to perform a comparison of string
|
||||
/// data with explicitly defined lengths that is contained in source operands
|
||||
/// \a A and \a B. Returns 1 if the length of the string in \a B is less than
|
||||
/// the maximum, otherwise, returns 0.
|
||||
@@ -2354,7 +2335,7 @@ _mm_minpos_epu16(__m128i __V)
|
||||
(int)(M))
|
||||
|
||||
/* SSE4.2 Compare Packed Data -- Greater Than. */
|
||||
/// \brief Compares each of the corresponding 64-bit values of the 128-bit
|
||||
/// Compares each of the corresponding 64-bit values of the 128-bit
|
||||
/// integer vectors to determine if the values in the first operand are
|
||||
/// greater than those in the second operand.
|
||||
///
|
||||
@@ -2374,7 +2355,7 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
|
||||
}
|
||||
|
||||
/* SSE4.2 Accumulate CRC32. */
|
||||
/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned char operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -2394,7 +2375,7 @@ _mm_crc32_u8(unsigned int __C, unsigned char __D)
|
||||
return __builtin_ia32_crc32qi(__C, __D);
|
||||
}
|
||||
|
||||
/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned short operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -2414,7 +2395,7 @@ _mm_crc32_u16(unsigned int __C, unsigned short __D)
|
||||
return __builtin_ia32_crc32hi(__C, __D);
|
||||
}
|
||||
|
||||
/// \brief Adds the first unsigned integer operand to the CRC-32C checksum of
|
||||
/// Adds the first unsigned integer operand to the CRC-32C checksum of
|
||||
/// the second unsigned integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -2435,7 +2416,7 @@ _mm_crc32_u32(unsigned int __C, unsigned int __D)
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// Adds the unsigned integer operand to the CRC-32C checksum of the
|
||||
/// unsigned 64-bit integer operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -2458,8 +2439,6 @@ _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#ifdef __POPCNT__
|
||||
#include <popcntintrin.h>
|
||||
#endif
|
||||
|
||||
#endif /* _SMMINTRIN_H */
|
||||
#endif /* __SMMINTRIN_H */
|
||||
|
||||
@@ -46,9 +46,6 @@ typedef __builtin_va_list va_list;
|
||||
#ifndef __GNUC_VA_LIST
|
||||
#define __GNUC_VA_LIST 1
|
||||
typedef __builtin_va_list __gnuc_va_list;
|
||||
/* zig: added because glibc stdio.h was duplicately defining va_list
|
||||
*/
|
||||
#define _VA_LIST_DEFINED
|
||||
#endif
|
||||
|
||||
#endif /* __STDARG_H */
|
||||
|
||||
@@ -32,12 +32,15 @@
|
||||
#define true 1
|
||||
#define false 0
|
||||
#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
/* Define _Bool, bool, false, true as a GNU extension. */
|
||||
/* Define _Bool as a GNU extension. */
|
||||
#define _Bool bool
|
||||
#if __cplusplus < 201103L
|
||||
/* For C++98, define bool, false, true as a GNU extension. */
|
||||
#define bool bool
|
||||
#define false false
|
||||
#define true true
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define __bool_true_false_are_defined 1
|
||||
|
||||
|
||||
@@ -48,13 +48,7 @@
|
||||
#if !__has_feature(modules)
|
||||
#define _PTRDIFF_T
|
||||
#endif
|
||||
|
||||
/* Zig: wrap in _PTRDIFF_T_DEFINED to protect against mingw defining it twice */
|
||||
#if !defined(_PTRDIFF_T_DEFINED)
|
||||
typedef __PTRDIFF_TYPE__ ptrdiff_t;
|
||||
#define _PTRDIFF_T_DEFINED
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#undef __need_ptrdiff_t
|
||||
#endif /* defined(__need_ptrdiff_t) */
|
||||
@@ -65,24 +59,7 @@ typedef __PTRDIFF_TYPE__ ptrdiff_t;
|
||||
#if !__has_feature(modules)
|
||||
#define _SIZE_T
|
||||
#endif
|
||||
|
||||
/* Zig: added to avoid collisions with mingw */
|
||||
#if !defined(_SIZE_T_DEFINED_)
|
||||
#if !defined(_SIZE_T_DEFINED)
|
||||
#if !defined(_BSD_SIZE_T_DEFINED_)
|
||||
#if !defined(_SIZE_T_DECLARED)
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
#define _SIZE_T_DEFINED_
|
||||
#define _SIZE_T_DEFINED
|
||||
#define _BSD_SIZE_T_DEFINED_
|
||||
#define _SIZE_T_DECLARED
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#undef __need_size_t
|
||||
#endif /*defined(__need_size_t) */
|
||||
@@ -110,22 +87,7 @@ typedef __SIZE_TYPE__ rsize_t;
|
||||
#define _WCHAR_T_DEFINED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* zig added to prevent duplicate definition with mingw */
|
||||
#if !defined(__INT_WCHAR_T_H)
|
||||
#if !defined(_GCC_WCHAR_T)
|
||||
#if !defined(_WCHAR_T_DECLARED)
|
||||
#if !defined(_WCHAR_T_DEFINED)
|
||||
#define __INT_WCHAR_T_H
|
||||
#define _GCC_WCHAR_T
|
||||
#define _WCHAR_T_DECLARED
|
||||
#define _WCHAR_T_DEFINED
|
||||
typedef __WCHAR_TYPE__ wchar_t;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#undef __need_wchar_t
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
*
|
||||
* To accommodate targets that are missing types that are exactly 8, 16, 32, or
|
||||
* 64 bits wide, this implementation takes an approach of cascading
|
||||
* redefintions, redefining __int_leastN_t to successively smaller exact-width
|
||||
* redefinitions, redefining __int_leastN_t to successively smaller exact-width
|
||||
* types. It is therefore important that the types are defined in order of
|
||||
* descending widths.
|
||||
*
|
||||
@@ -461,7 +461,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
* As in the type definitions, this section takes an approach of
|
||||
* successive-shrinking to determine which limits to use for the standard (8,
|
||||
* 16, 32, 64) bit widths when they don't have exact representations. It is
|
||||
* therefore important that the defintions be kept in order of decending
|
||||
* therefore important that the definitions be kept in order of decending
|
||||
* widths.
|
||||
*
|
||||
* Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the
|
||||
|
||||
@@ -27,9 +27,10 @@
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
|
||||
#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
|
||||
|
||||
/// \brief Computes the absolute value of each of the packed 8-bit signed
|
||||
/// Computes the absolute value of each of the packed 8-bit signed
|
||||
/// integers in the source operand and stores the 8-bit unsigned integer
|
||||
/// results in the destination.
|
||||
///
|
||||
@@ -41,13 +42,13 @@
|
||||
/// A 64-bit vector of [8 x i8].
|
||||
/// \returns A 64-bit integer vector containing the absolute values of the
|
||||
/// elements in the operand.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_abs_pi8(__m64 __a)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
|
||||
}
|
||||
|
||||
/// \brief Computes the absolute value of each of the packed 8-bit signed
|
||||
/// Computes the absolute value of each of the packed 8-bit signed
|
||||
/// integers in the source operand and stores the 8-bit unsigned integer
|
||||
/// results in the destination.
|
||||
///
|
||||
@@ -65,7 +66,7 @@ _mm_abs_epi8(__m128i __a)
|
||||
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
|
||||
}
|
||||
|
||||
/// \brief Computes the absolute value of each of the packed 16-bit signed
|
||||
/// Computes the absolute value of each of the packed 16-bit signed
|
||||
/// integers in the source operand and stores the 16-bit unsigned integer
|
||||
/// results in the destination.
|
||||
///
|
||||
@@ -77,13 +78,13 @@ _mm_abs_epi8(__m128i __a)
|
||||
/// A 64-bit vector of [4 x i16].
|
||||
/// \returns A 64-bit integer vector containing the absolute values of the
|
||||
/// elements in the operand.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_abs_pi16(__m64 __a)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
|
||||
}
|
||||
|
||||
/// \brief Computes the absolute value of each of the packed 16-bit signed
|
||||
/// Computes the absolute value of each of the packed 16-bit signed
|
||||
/// integers in the source operand and stores the 16-bit unsigned integer
|
||||
/// results in the destination.
|
||||
///
|
||||
@@ -101,7 +102,7 @@ _mm_abs_epi16(__m128i __a)
|
||||
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
|
||||
}
|
||||
|
||||
/// \brief Computes the absolute value of each of the packed 32-bit signed
|
||||
/// Computes the absolute value of each of the packed 32-bit signed
|
||||
/// integers in the source operand and stores the 32-bit unsigned integer
|
||||
/// results in the destination.
|
||||
///
|
||||
@@ -113,13 +114,13 @@ _mm_abs_epi16(__m128i __a)
|
||||
/// A 64-bit vector of [2 x i32].
|
||||
/// \returns A 64-bit integer vector containing the absolute values of the
|
||||
/// elements in the operand.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_abs_pi32(__m64 __a)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pabsd((__v2si)__a);
|
||||
}
|
||||
|
||||
/// \brief Computes the absolute value of each of the packed 32-bit signed
|
||||
/// Computes the absolute value of each of the packed 32-bit signed
|
||||
/// integers in the source operand and stores the 32-bit unsigned integer
|
||||
/// results in the destination.
|
||||
///
|
||||
@@ -137,7 +138,7 @@ _mm_abs_epi32(__m128i __a)
|
||||
return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
|
||||
}
|
||||
|
||||
/// \brief Concatenates the two 128-bit integer vector operands, and
|
||||
/// Concatenates the two 128-bit integer vector operands, and
|
||||
/// right-shifts the result by the number of bytes specified in the immediate
|
||||
/// operand.
|
||||
///
|
||||
@@ -157,11 +158,11 @@ _mm_abs_epi32(__m128i __a)
|
||||
/// An immediate operand specifying how many bytes to right-shift the result.
|
||||
/// \returns A 128-bit integer vector containing the concatenated right-shifted
|
||||
/// value.
|
||||
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
|
||||
#define _mm_alignr_epi8(a, b, n) \
|
||||
(__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
|
||||
(__v16qi)(__m128i)(b), (n)); })
|
||||
(__v16qi)(__m128i)(b), (n))
|
||||
|
||||
/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
|
||||
/// Concatenates the two 64-bit integer vector operands, and right-shifts
|
||||
/// the result by the number of bytes specified in the immediate operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -180,10 +181,10 @@ _mm_abs_epi32(__m128i __a)
|
||||
/// An immediate operand specifying how many bytes to right-shift the result.
|
||||
/// \returns A 64-bit integer vector containing the concatenated right-shifted
|
||||
/// value.
|
||||
#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
|
||||
(__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
|
||||
#define _mm_alignr_pi8(a, b, n) \
|
||||
(__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -206,7 +207,7 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [4 x i32].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -229,7 +230,7 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -246,13 +247,13 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
|
||||
/// destination.
|
||||
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
|
||||
/// operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_hadd_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [2 x i32].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -269,15 +270,16 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
|
||||
/// destination.
|
||||
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
|
||||
/// operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_hadd_pi32(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
|
||||
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
|
||||
/// 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -299,9 +301,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
|
||||
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
|
||||
/// 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -317,13 +320,13 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
|
||||
/// destination.
|
||||
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
|
||||
/// sums of both operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_hadds_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [8 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -346,7 +349,7 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [4 x i32].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -369,7 +372,7 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -386,13 +389,13 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
|
||||
/// the destination.
|
||||
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
|
||||
/// of both operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_hsub_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of [2 x i32].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@@ -409,16 +412,16 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
|
||||
/// the destination.
|
||||
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
|
||||
/// of both operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_hsub_pi32(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
|
||||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
|
||||
/// saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -440,10 +443,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
|
||||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
|
||||
/// saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@@ -459,13 +462,13 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
|
||||
/// the destination.
|
||||
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
|
||||
/// differences of both operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_hsubs_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
|
||||
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
|
||||
/// values contained in the first source operand and packed 8-bit signed
|
||||
/// integer values contained in the second source operand, adds pairs of
|
||||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
@@ -499,7 +502,7 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
|
||||
}
|
||||
|
||||
/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
|
||||
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
|
||||
/// values contained in the first source operand and packed 8-bit signed
|
||||
/// integer values contained in the second source operand, adds pairs of
|
||||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
@@ -523,13 +526,13 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
|
||||
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
|
||||
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
|
||||
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_maddubs_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
|
||||
}
|
||||
|
||||
/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||
/// products to the 18 most significant bits by right-shifting, rounds the
|
||||
/// truncated value by adding 1, and writes bits [16:1] to the destination.
|
||||
///
|
||||
@@ -549,7 +552,7 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||
/// products to the 18 most significant bits by right-shifting, rounds the
|
||||
/// truncated value by adding 1, and writes bits [16:1] to the destination.
|
||||
///
|
||||
@@ -563,13 +566,13 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
|
||||
/// A 64-bit vector of [4 x i16] containing one of the source operands.
|
||||
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
|
||||
/// products of both operands.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
/// \brief Copies the 8-bit integers from a 128-bit integer vector to the
|
||||
/// Copies the 8-bit integers from a 128-bit integer vector to the
|
||||
/// destination or clears 8-bit values in the destination, as specified by
|
||||
/// the second source operand.
|
||||
///
|
||||
@@ -595,7 +598,7 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
|
||||
}
|
||||
|
||||
/// \brief Copies the 8-bit integers from a 64-bit integer vector to the
|
||||
/// Copies the 8-bit integers from a 64-bit integer vector to the
|
||||
/// destination or clears 8-bit values in the destination, as specified by
|
||||
/// the second source operand.
|
||||
///
|
||||
@@ -614,13 +617,13 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
|
||||
/// destination. \n
|
||||
/// Bits [3:0] select the source byte to be copied.
|
||||
/// \returns A 64-bit integer vector containing the copied or cleared values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_shuffle_pi8(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
|
||||
}
|
||||
|
||||
/// \brief For each 8-bit integer in the first source operand, perform one of
|
||||
/// For each 8-bit integer in the first source operand, perform one of
|
||||
/// the following actions as specified by the second source operand.
|
||||
///
|
||||
/// If the byte in the second source is negative, calculate the two's
|
||||
@@ -646,7 +649,7 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
|
||||
}
|
||||
|
||||
/// \brief For each 16-bit integer in the first source operand, perform one of
|
||||
/// For each 16-bit integer in the first source operand, perform one of
|
||||
/// the following actions as specified by the second source operand.
|
||||
///
|
||||
/// If the word in the second source is negative, calculate the two's
|
||||
@@ -672,7 +675,7 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// \brief For each 32-bit integer in the first source operand, perform one of
|
||||
/// For each 32-bit integer in the first source operand, perform one of
|
||||
/// the following actions as specified by the second source operand.
|
||||
///
|
||||
/// If the doubleword in the second source is negative, calculate the two's
|
||||
@@ -698,7 +701,7 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
|
||||
}
|
||||
|
||||
/// \brief For each 8-bit integer in the first source operand, perform one of
|
||||
/// For each 8-bit integer in the first source operand, perform one of
|
||||
/// the following actions as specified by the second source operand.
|
||||
///
|
||||
/// If the byte in the second source is negative, calculate the two's
|
||||
@@ -718,13 +721,13 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
|
||||
/// A 64-bit integer vector containing control bytes corresponding to
|
||||
/// positions in the destination.
|
||||
/// \returns A 64-bit integer vector containing the resultant values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_sign_pi8(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
|
||||
}
|
||||
|
||||
/// \brief For each 16-bit integer in the first source operand, perform one of
|
||||
/// For each 16-bit integer in the first source operand, perform one of
|
||||
/// the following actions as specified by the second source operand.
|
||||
///
|
||||
/// If the word in the second source is negative, calculate the two's
|
||||
@@ -744,13 +747,13 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
|
||||
/// A 64-bit integer vector containing control words corresponding to
|
||||
/// positions in the destination.
|
||||
/// \returns A 64-bit integer vector containing the resultant values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_sign_pi16(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
|
||||
}
|
||||
|
||||
/// \brief For each 32-bit integer in the first source operand, perform one of
|
||||
/// For each 32-bit integer in the first source operand, perform one of
|
||||
/// the following actions as specified by the second source operand.
|
||||
///
|
||||
/// If the doubleword in the second source is negative, calculate the two's
|
||||
@@ -770,12 +773,13 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
|
||||
/// A 64-bit integer vector containing two control doublewords corresponding
|
||||
/// to positions in the destination.
|
||||
/// \returns A 64-bit integer vector containing the resultant values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
|
||||
_mm_sign_pi32(__m64 __a, __m64 __b)
|
||||
{
|
||||
return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_MMX
|
||||
|
||||
#endif /* __TMMINTRIN_H */
|
||||
|
||||
@@ -76,7 +76,13 @@ typedef intptr_t _sleb128_t;
|
||||
typedef uintptr_t _uleb128_t;
|
||||
|
||||
struct _Unwind_Context;
|
||||
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
|
||||
struct _Unwind_Control_Block;
|
||||
typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
|
||||
#else
|
||||
struct _Unwind_Exception;
|
||||
typedef struct _Unwind_Exception _Unwind_Exception;
|
||||
#endif
|
||||
typedef enum {
|
||||
_URC_NO_REASON = 0,
|
||||
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
|
||||
@@ -109,34 +115,73 @@ typedef enum {
|
||||
} _Unwind_Action;
|
||||
|
||||
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
|
||||
struct _Unwind_Exception *);
|
||||
_Unwind_Exception *);
|
||||
|
||||
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
|
||||
typedef struct _Unwind_Control_Block _Unwind_Control_Block;
|
||||
typedef uint32_t _Unwind_EHT_Header;
|
||||
|
||||
struct _Unwind_Control_Block {
|
||||
uint64_t exception_class;
|
||||
void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);
|
||||
/* unwinder cache (private fields for the unwinder's use) */
|
||||
struct {
|
||||
uint32_t reserved1; /* forced unwind stop function, 0 if not forced */
|
||||
uint32_t reserved2; /* personality routine */
|
||||
uint32_t reserved3; /* callsite */
|
||||
uint32_t reserved4; /* forced unwind stop argument */
|
||||
uint32_t reserved5;
|
||||
} unwinder_cache;
|
||||
/* propagation barrier cache (valid after phase 1) */
|
||||
struct {
|
||||
uint32_t sp;
|
||||
uint32_t bitpattern[5];
|
||||
} barrier_cache;
|
||||
/* cleanup cache (preserved over cleanup) */
|
||||
struct {
|
||||
uint32_t bitpattern[4];
|
||||
} cleanup_cache;
|
||||
/* personality cache (for personality's benefit) */
|
||||
struct {
|
||||
uint32_t fnstart; /* function start address */
|
||||
_Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */
|
||||
uint32_t additional; /* additional data */
|
||||
uint32_t reserved1;
|
||||
} pr_cache;
|
||||
long long int : 0; /* force alignment of next item to 8-byte boundary */
|
||||
} __attribute__((__aligned__(8)));
|
||||
#else
|
||||
struct _Unwind_Exception {
|
||||
_Unwind_Exception_Class exception_class;
|
||||
_Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||||
#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)
|
||||
_Unwind_Word private_[6];
|
||||
#else
|
||||
_Unwind_Word private_1;
|
||||
_Unwind_Word private_2;
|
||||
#endif
|
||||
/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
|
||||
* aligned". GCC has interpreted this to mean "use the maximum useful
|
||||
* alignment for the target"; so do we. */
|
||||
} __attribute__((__aligned__));
|
||||
#endif
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
|
||||
_Unwind_Exception_Class,
|
||||
struct _Unwind_Exception *,
|
||||
_Unwind_Exception *,
|
||||
struct _Unwind_Context *,
|
||||
void *);
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
|
||||
int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
|
||||
struct _Unwind_Context *);
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,
|
||||
_Unwind_Exception_Class,
|
||||
_Unwind_Exception *,
|
||||
struct _Unwind_Context *);
|
||||
typedef _Unwind_Personality_Fn __personality_routine;
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
|
||||
void *);
|
||||
|
||||
#if defined(__arm__) && !defined(__APPLE__)
|
||||
|
||||
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
|
||||
typedef enum {
|
||||
_UVRSC_CORE = 0, /* integer register */
|
||||
_UVRSC_VFP = 1, /* vfp */
|
||||
@@ -158,14 +203,12 @@ typedef enum {
|
||||
_UVRSR_FAILED = 2
|
||||
} _Unwind_VRS_Result;
|
||||
|
||||
#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
|
||||
typedef uint32_t _Unwind_State;
|
||||
#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)
|
||||
#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
|
||||
#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)
|
||||
#define _US_ACTION_MASK ((_Unwind_State)3)
|
||||
#define _US_FORCE_UNWIND ((_Unwind_State)8)
|
||||
#endif
|
||||
|
||||
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
|
||||
_Unwind_VRS_RegClass __regclass,
|
||||
@@ -224,13 +267,12 @@ _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
|
||||
|
||||
/* DWARF EH functions; currently not available on Darwin/ARM */
|
||||
#if !defined(__APPLE__) || !defined(__arm__)
|
||||
|
||||
_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
|
||||
_Unwind_Stop_Fn, void *);
|
||||
void _Unwind_DeleteException(struct _Unwind_Exception *);
|
||||
void _Unwind_Resume(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,
|
||||
void *);
|
||||
void _Unwind_DeleteException(_Unwind_Exception *);
|
||||
void _Unwind_Resume(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -241,11 +283,11 @@ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
|
||||
|
||||
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
|
||||
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
|
||||
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,
|
||||
_Unwind_Stop_Fn, void *);
|
||||
void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
|
||||
void _Unwind_SjLj_Resume(_Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);
|
||||
|
||||
void *_Unwind_FindEnclosingFunction(void *);
|
||||
|
||||
|
||||
98
c_headers/vaesintrin.h
Normal file
98
c_headers/vaesintrin.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VAESINTRIN_H
|
||||
#define __VAESINTRIN_H
|
||||
|
||||
/* Default attributes for YMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenc_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdec_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdec_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_F
|
||||
|
||||
#endif
|
||||
42
c_headers/vpclmulqdqintrin.h
Normal file
42
c_headers/vpclmulqdqintrin.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VPCLMULQDQINTRIN_H
|
||||
#define __VPCLMULQDQINTRIN_H
|
||||
|
||||
#define _mm256_clmulepi64_epi128(A, B, I) \
|
||||
(__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
|
||||
(__v4di)(__m256i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#define _mm512_clmulepi64_epi128(A, B, I) \
|
||||
(__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), \
|
||||
(char)(I))
|
||||
|
||||
#endif /* __VPCLMULQDQINTRIN_H */
|
||||
|
||||
56
c_headers/waitpkgintrin.h
Normal file
56
c_headers/waitpkgintrin.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __WAITPKGINTRIN_H
|
||||
#define __WAITPKGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("waitpkg")))
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_umonitor (void * __address)
|
||||
{
|
||||
__builtin_ia32_umonitor (__address);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_umwait (unsigned int __control, unsigned long long __counter)
|
||||
{
|
||||
return __builtin_ia32_umwait (__control,
|
||||
(unsigned int)(__counter >> 32), (unsigned int)__counter);
|
||||
}
|
||||
|
||||
static __inline__ unsigned char __DEFAULT_FN_ATTRS
|
||||
_tpause (unsigned int __control, unsigned long long __counter)
|
||||
{
|
||||
return __builtin_ia32_tpause (__control,
|
||||
(unsigned int)(__counter >> 32), (unsigned int)__counter);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __WAITPKGINTRIN_H */
|
||||
38
c_headers/wbnoinvdintrin.h
Normal file
38
c_headers/wbnoinvdintrin.h
Normal file
@@ -0,0 +1,38 @@
|
||||
/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
|
||||
#error "Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __WBNOINVDINTRIN_H
|
||||
#define __WBNOINVDINTRIN_H
|
||||
|
||||
static __inline__ void
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("wbnoinvd")))
|
||||
_wbnoinvd (void)
|
||||
{
|
||||
__builtin_ia32_wbnoinvd ();
|
||||
}
|
||||
|
||||
#endif /* __WBNOINVDINTRIN_H */
|
||||
@@ -21,8 +21,8 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef _WMMINTRIN_H
|
||||
#define _WMMINTRIN_H
|
||||
#ifndef __WMMINTRIN_H
|
||||
#define __WMMINTRIN_H
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
@@ -30,4 +30,4 @@
|
||||
|
||||
#include <__wmmintrin_pclmul.h>
|
||||
|
||||
#endif /* _WMMINTRIN_H */
|
||||
#endif /* __WMMINTRIN_H */
|
||||
|
||||
@@ -32,26 +32,6 @@
|
||||
#include <mm3dnow.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
||||
#include <bmiintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
|
||||
#include <bmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
|
||||
#include <lzcntintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
|
||||
#include <popcntintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
|
||||
#include <rdseedintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)
|
||||
#include <prfchwintrin.h>
|
||||
#endif
|
||||
@@ -76,10 +56,6 @@
|
||||
#include <lwpintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
|
||||
#include <f16cintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)
|
||||
#include <mwaitxintrin.h>
|
||||
#endif
|
||||
@@ -88,4 +64,5 @@
|
||||
#include <clzerointrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __X86INTRIN_H */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -31,7 +31,8 @@
|
||||
#include <fma4intrin.h>
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
|
||||
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
|
||||
@@ -201,7 +202,7 @@ _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
|
||||
return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
|
||||
@@ -237,17 +238,17 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
|
||||
return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
#define _mm_roti_epi8(A, N) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
|
||||
#define _mm_roti_epi8(A, N) \
|
||||
(__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
|
||||
|
||||
#define _mm_roti_epi16(A, N) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
|
||||
#define _mm_roti_epi16(A, N) \
|
||||
(__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
|
||||
|
||||
#define _mm_roti_epi32(A, N) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
|
||||
#define _mm_roti_epi32(A, N) \
|
||||
(__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
|
||||
|
||||
#define _mm_roti_epi64(A, N) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
|
||||
#define _mm_roti_epi64(A, N) \
|
||||
(__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_shl_epi8(__m128i __A, __m128i __B)
|
||||
@@ -297,37 +298,37 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
|
||||
return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
|
||||
}
|
||||
|
||||
#define _mm_com_epu8(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epu8(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (N)); })
|
||||
(__v16qi)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epu16(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epu16(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
|
||||
(__v8hi)(__m128i)(B), (N)); })
|
||||
(__v8hi)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epu32(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epu32(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
|
||||
(__v4si)(__m128i)(B), (N)); })
|
||||
(__v4si)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epu64(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epu64(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
|
||||
(__v2di)(__m128i)(B), (N)); })
|
||||
(__v2di)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epi8(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epi8(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (N)); })
|
||||
(__v16qi)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epi16(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epi16(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
|
||||
(__v8hi)(__m128i)(B), (N)); })
|
||||
(__v8hi)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epi32(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epi32(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
|
||||
(__v4si)(__m128i)(B), (N)); })
|
||||
(__v4si)(__m128i)(B), (N))
|
||||
|
||||
#define _mm_com_epi64(A, B, N) __extension__ ({ \
|
||||
#define _mm_com_epi64(A, B, N) \
|
||||
(__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
|
||||
(__v2di)(__m128i)(B), (N)); })
|
||||
(__v2di)(__m128i)(B), (N))
|
||||
|
||||
#define _MM_PCOMCTRL_LT 0
|
||||
#define _MM_PCOMCTRL_LE 1
|
||||
@@ -722,24 +723,24 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
|
||||
#define _mm_permute2_pd(X, Y, C, I) \
|
||||
(__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), \
|
||||
(__v2di)(__m128i)(C), (I)); })
|
||||
(__v2di)(__m128i)(C), (I))
|
||||
|
||||
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
|
||||
#define _mm256_permute2_pd(X, Y, C, I) \
|
||||
(__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
|
||||
(__v4df)(__m256d)(Y), \
|
||||
(__v4di)(__m256i)(C), (I)); })
|
||||
(__v4di)(__m256i)(C), (I))
|
||||
|
||||
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
|
||||
#define _mm_permute2_ps(X, Y, C, I) \
|
||||
(__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
|
||||
(__v4si)(__m128i)(C), (I)); })
|
||||
(__v4si)(__m128i)(C), (I))
|
||||
|
||||
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
|
||||
#define _mm256_permute2_ps(X, Y, C, I) \
|
||||
(__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
|
||||
(__v8sf)(__m256)(Y), \
|
||||
(__v8si)(__m256i)(C), (I)); })
|
||||
(__v8si)(__m256i)(C), (I))
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_frcz_ss(__m128 __A)
|
||||
@@ -765,18 +766,19 @@ _mm_frcz_pd(__m128d __A)
|
||||
return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256 __DEFAULT_FN_ATTRS256
|
||||
_mm256_frcz_ps(__m256 __A)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
|
||||
}
|
||||
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS
|
||||
static __inline__ __m256d __DEFAULT_FN_ATTRS256
|
||||
_mm256_frcz_pd(__m256d __A)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS256
|
||||
|
||||
#endif /* __XOPINTRIN_H */
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===---- xsavecintrin.h - XSAVEC intrinsic ------------------------------------===
|
||||
/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===---- xsaveintrin.h - XSAVE intrinsic ------------------------------------===
|
||||
/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -33,23 +33,23 @@
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_xsave(void *__p, unsigned long long __m) {
|
||||
return __builtin_ia32_xsave(__p, __m);
|
||||
__builtin_ia32_xsave(__p, __m);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_xrstor(void *__p, unsigned long long __m) {
|
||||
return __builtin_ia32_xrstor(__p, __m);
|
||||
__builtin_ia32_xrstor(__p, __m);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_xsave64(void *__p, unsigned long long __m) {
|
||||
return __builtin_ia32_xsave64(__p, __m);
|
||||
__builtin_ia32_xsave64(__p, __m);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_xrstor64(void *__p, unsigned long long __m) {
|
||||
return __builtin_ia32_xrstor64(__p, __m);
|
||||
__builtin_ia32_xrstor64(__p, __m);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ------------------------------------===
|
||||
/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -33,13 +33,13 @@
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_xsaveopt(void *__p, unsigned long long __m) {
|
||||
return __builtin_ia32_xsaveopt(__p, __m);
|
||||
__builtin_ia32_xsaveopt(__p, __m);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_xsaveopt64(void *__p, unsigned long long __m) {
|
||||
return __builtin_ia32_xsaveopt64(__p, __m);
|
||||
__builtin_ia32_xsaveopt64(__p, __m);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user