2000 Commits
0.1.0 ... 0.3.0

Author SHA1 Message Date
Andrew Kelley
e242f6a609 Release 0.3.0 2018-09-28 09:03:40 -04:00
Andrew Kelley
f4a0658585 rely on gcc for static builds on macos 2018-09-27 22:27:52 -04:00
Andrew Kelley
fe524a1fa5 build: add support for ZIG_STATIC on MacOS 2018-09-27 15:09:14 -04:00
Andrew Kelley
cc490f0026 update @typeInfo docs 2018-09-27 12:34:41 -04:00
Andrew Kelley
af3263d7a8 add dll export storage class where appropriate
closes #1443
2018-09-27 10:22:16 -04:00
Josh Wolfe
e7d9d00ac8 overhaul api for getting random integers (#1578)
* rand api overhaul
* no retry limits. instead documented a recommendation
  to call int(T) % len directly.
2018-09-27 00:35:38 -04:00
Andrew Kelley
1c26c2f4d5 fix crash when compile error evaluating return...
...type of inferred error set. closes #1591
2018-09-26 16:59:08 -04:00
Andrew Kelley
589201b104 fix variables which are pointers to packed struct fields
closes #1121
2018-09-26 14:54:53 -04:00
Andrew Kelley
dcfd15a7f0 the last number in a packed ptr is host int bytes
See #1121
2018-09-26 14:54:52 -04:00
Wink Saville
631851f8b5 Tweak SYMBOL_CHAR define in tokenizer.cpp
Make it a little clearer what a SYMBOL_CHAR is, use ALPHA instead of
ALPHA_EXCEPT_C and case 'c', which is ALPHA's definition.
2018-09-26 11:29:01 -04:00
Andrew Kelley
9485043b3c fix implicit casting to *c_void
closes #1588

also some small std lib changes regarding posix sockets
and one doc typo fix
2018-09-26 11:06:09 -04:00
Jay Weisskopf
7b204649e3 stage1: Added zig help to show usage on stdout
This will make it easier to do things like `zig help | grep something`.

Invalid arguments will now display a short notice for `zig help`
instead of showing the full usage information. This will make it easier
to see the actual error.
2018-09-25 22:05:44 -04:00
Andrew Kelley
2e562a5f36 fix crash on runtime index into slice of comptime type
closes #1435
2018-09-25 12:03:39 -04:00
Andrew Kelley
839492d0e8 fix not syntax highlighting builtin module 2018-09-25 12:03:09 -04:00
Andrew Kelley
eafb8e8572 fix self reference through fn ptr field crash
closes #1208
2018-09-25 10:45:11 -04:00
Wink Saville
0e6c18c820 Remove StrLitKind enum
I was looking at the tokenizer specifically fn testTokenize and the
this statement looked odd:

  if (@TagType(Token.Id)(token.id) != @TagType(Token.Id)(expected_token_id)) {

I then saw the TODO and thought I'd remove StrLitKind figuring that
would make testTokenize simpler. It did so I thought I'd prepare this PR.

The tests are still working and stage2 zig seems to work, it compiles and
I was able to use the fmt command.
2018-09-24 19:28:46 -04:00
Andrew Kelley
4241cd666d fix more bigint code paths and add tests 2018-09-24 16:31:22 -04:00
Andrew Kelley
877036e7ef fix translate-c test expecting incorrect C ABI on windows 2018-09-24 15:14:20 -04:00
Andrew Kelley
32c91ad892 fix comptime bitwise operations with negative values
closes #1387
closes #1529
2018-09-24 14:38:51 -04:00
Andrew Kelley
422269ea6e minor langref improvements 2018-09-24 14:38:51 -04:00
Wink Saville
c5509a07ca Ignore class-memaccess error for gcc 8 and above
On Arch Linux the current default compiler is gcc 8.2.1 and this change
is needed to ignore the following errors:

  In file included from /home/wink/local/include/llvm/ADT/STLExtras.h:21,
                   from /home/wink/local/include/llvm/ADT/StringRef.h:13,
                   from /home/wink/local/include/llvm/ADT/StringMap.h:17,
                   from /home/wink/local/include/llvm/Support/Host.h:17,
                   from /home/wink/local/include/llvm/ADT/Hashing.h:49,
                   from /home/wink/local/include/llvm/ADT/ArrayRef.h:13,
                   from /home/wink/local/include/llvm/ADT/APFloat.h:21,
                   from /home/wink/local/include/clang/AST/APValue.h:18,
                   from /home/wink/local/include/clang/AST/Decl.h:17,
                   from /home/wink/local/include/clang/AST/ASTTypeTraits.h:20,
                   from /home/wink/local/include/clang/AST/ASTContext.h:18,
                   from /home/wink/local/include/clang/Frontend/ASTUnit.h:18,
                   from /home/wink/prgs/ziglang/zig/src/translate_c.cpp:18:
  /home/wink/local/include/llvm/ADT/SmallVector.h: In instantiation of ‘void llvm::SmallVectorTemplateBase<T, true>::push_back(const T&) [with T = std::pair<void*, long unsigned int>]’:
  /home/wink/local/include/llvm/Support/Allocator.h:249:33:   required from ‘void* llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize, SizeThreshold>::Allocate(size_t, size_t) [with AllocatorT = llvm::MallocAllocator; long unsigned int SlabSize = 4096; long unsigned int SizeThreshold = 4096; size_t = long unsigned int]’
  /home/wink/local/include/clang/AST/ASTContext.h:659:42:   required from here
  /home/wink/local/include/llvm/ADT/SmallVector.h:313:11: error: ‘void* memcpy(void*, const void*, size_t)’ writing to an object of type ‘struct std::pair<void*, long unsigned int>’ with no trivial copy-assignment; use copy-assignment or copy-initialization instead [-Werror=class-memaccess]
       memcpy(this->end(), &Elt, sizeof(T));
       ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  In file included from /usr/include/c++/8.2.1/utility:70,
                   from /home/wink/local/include/llvm/Support/type_traits.h:19,
                   from /home/wink/local/include/llvm/Support/Casting.h:19,
                   from /home/wink/local/include/clang/Basic/LLVM.h:22,
                   from /home/wink/local/include/clang/AST/APValue.h:17,
                   from /home/wink/local/include/clang/AST/Decl.h:17,
                   from /home/wink/local/include/clang/AST/ASTTypeTraits.h:20,
                   from /home/wink/local/include/clang/AST/ASTContext.h:18,
                   from /home/wink/local/include/clang/Frontend/ASTUnit.h:18,
                   from /home/wink/prgs/ziglang/zig/src/translate_c.cpp:18:
  /usr/include/c++/8.2.1/bits/stl_pair.h:198:12: note: ‘struct std::pair<void*, long unsigned int>’ declared here
       struct pair
              ^~~~
2018-09-24 14:01:59 -04:00
Andrew Kelley
004c383292 fix translate-c incorrectly translating negative enum init values
closes #1360
2018-09-24 12:19:16 -04:00
Andrew Kelley
8a7737eef4 fix godbolt cli test on non-linux-x86_64 hosts 2018-09-24 12:01:51 -04:00
Andrew Kelley
6d048aa3bf add panic function to godbolt CLI API test 2018-09-24 11:14:39 -04:00
Andrew Kelley
a155f2973b add test to cover the CLI API that godbolt is using
closes #1399
2018-09-24 11:12:21 -04:00
Wink Saville
a170a64776 Fix typo in argsAlloc comment
Changed freeArgs to argsFree.
2018-09-23 13:12:57 -04:00
Andrew Kelley
af01c6e2bf include LICENSE in windows static builds 2018-09-23 12:29:38 -04:00
Jeff Fowler
f955844435 rm extraneous macro 2018-09-22 10:50:35 -04:00
Andrew Kelley
c84548e71d fix @compileLog having unintended side effects
closes #1459
2018-09-22 10:46:22 -04:00
Andrew Kelley
3c1f9baff1 doc: add note about reproducible builds 2018-09-22 10:22:01 -04:00
Andrew Kelley
5c15c5fc48 add compile error for slice of undefined slice
closes #1293
2018-09-22 10:04:47 -04:00
Andrew Kelley
2e27407161 stage1: unify 2 implementations of pointer deref
I found out there were accidentally two code paths
in zig ir for pointer dereference. So this should
fix a few bugs.

closes #1486
2018-09-21 18:47:12 -04:00
Andrew Kelley
9e5cd43e6d fix comptime string concatenation ignoring slice bounds
closes #1362
2018-09-21 15:45:13 -04:00
Andrew Kelley
0ca12ded2f Merge branch 'raulgrell-BitByteOffsetOfs' 2018-09-21 14:16:14 -04:00
Andrew Kelley
7c5e3e1f8e fixups 2018-09-21 14:15:58 -04:00
Andrew Kelley
5a21d3dce0 Merge branch 'BitByteOffsetOfs' of https://github.com/raulgrell/zig into raulgrell-BitByteOffsetOfs 2018-09-21 13:10:21 -04:00
Andrew Kelley
44f2ee101f fix comptime slice of pointer to array
closes #1565
2018-09-21 10:31:11 -04:00
Andrew Kelley
073f7ebb0e fix formatInt to handle upcasting to base int size 2018-09-20 13:46:20 -04:00
Andrew Kelley
9c8dfadbb1 add compile error for casting const array to mutable slice
See #1565
2018-09-20 12:24:51 -04:00
Andrew Kelley
f8fe517d12 better string literal caching implementation
We were caching the ConstExprValue of string literals,
which works if you can never modify ConstExprValues.
This premise is broken with `comptime var ...`.

So I implemented an optimization in ConstExprValue
arrays, where it stores a `Buf *` directly rather
than an array of ConstExprValues for the elements,
and then similar to array of undefined, it is
expanded into the canonical form when necessary.
However many operations can happen directly on the
`Buf *`, which is faster.

Furthermore, before a ConstExprValue array is expanded
into canonical form, it removes itself from the string
literal cache. This fixes the issue, because before an
array element is modified it would have to be expanded.

closes #1076
2018-09-20 11:04:31 -04:00
Andrew Kelley
492821781d add workaround for llvm-config --system-libs
not handling static libs correctly
2018-09-19 18:28:50 -04:00
Andrew Kelley
f9bf04c38b travis: build zig with gcc to match what llvm was built with
from #llvm IRC:

<andrewrk> does llvm 7 as a .so have some kind of new initialization
requirement? I'm getting a segfault in llvm::DIBuilder::createFile
(with valid non-null parameters), when linking my frontend against
llvm-7.so but not when linking against llvm .a libraries

<d0k> we have an ABI bug in LLVM 7 when the .so is built with gcc
but your program is build with clang. I'm sorry for that.
2018-09-19 17:15:33 -04:00
Andrew Kelley
287f5cce50 build: fix finding llvm and clang 7 2018-09-19 15:38:36 -04:00
Andrew Kelley
e3d8cae35a travis: fix llvm apt package names 2018-09-19 14:54:32 -04:00
Andrew Kelley
6d23619cce fix typo from previous commit 2018-09-19 13:53:47 -04:00
Andrew Kelley
f418a943cc travis: fix package names 2018-09-19 13:46:20 -04:00
Andrew Kelley
d4cb373024 travis: update apt repo url 2018-09-19 13:25:11 -04:00
Andrew Kelley
69c876cc87 appveyor: remove old cache file 2018-09-19 12:58:58 -04:00
Andrew Kelley
15301504e2 Merge remote-tracking branch 'origin/llvm7' 2018-09-19 12:57:58 -04:00
Christian Wesselhoeft
5eeef1f5ed std/index.zig: Fix import
BufferOutStream is defined in io.zig
2018-09-18 19:01:35 -04:00
Andrew Kelley
6dd93ee5d9 fix regression from previous commit 2018-09-18 18:56:29 -04:00
Andrew Kelley
21328e0036 zig fmt: handle shebang lines
closes #1546
2018-09-18 18:36:39 -04:00
Andrew Kelley
345f8db1c4 fix optional pointer to empty struct incorrectly being non-null
closes #1178
2018-09-18 17:51:50 -04:00
Andrew Kelley
0ab7cfa023 appveyor: use MSVC 2017 2018-09-18 16:36:31 -04:00
Andrew Kelley
c1af360532 add compile error for slice.*.len
closes #1372
2018-09-18 16:32:40 -04:00
Andrew Kelley
84b963cb1b Merge remote-tracking branch 'origin/master' into llvm7 2018-09-18 15:39:05 -04:00
Andrew Kelley
148fe2e999 stage1 caching: don't write manifest until cache release
this prevents the situation where we determine the cache
manifest and write it, but then crash or otherwise error out
before putting the artifacts in the proper place.

now the artifacts will be in place because cache_release
happens after that step is done.
2018-09-18 15:35:03 -04:00
Andrew Kelley
275b4100c0 remove unnecessary setFloatMode calls
Now that Strict is the default, these calls only add noise.
2018-09-18 15:15:03 -04:00
Andrew Kelley
4b2719b51d Merge remote-tracking branch 'origin/master' into llvm7 2018-09-18 15:05:47 -04:00
Andrew Kelley
8c77c5705f implementation for bitcasting extern enum type to c_int
closes #1036
2018-09-18 15:00:14 -04:00
Andrew Kelley
5fd3af9dc6 fix implicit cast of packed struct field to const ptr
closes #966
2018-09-18 14:34:30 -04:00
Andrew Kelley
1fc2019031 fix @embedFile reading garbage memory
closes #1547
2018-09-18 10:25:57 -04:00
Andrew Kelley
93ff5024a4 minor cleanups from 68c1d05917 2018-09-18 10:17:48 -04:00
Andrew Kelley
d353d5aef8 fix @bytesToSlice on a packed struct
closes #1551
2018-09-18 09:49:57 -04:00
emekoi
68c1d05917 compiling on mingw is now supported (#1542)
* compiles on mingw-w64
* fixed error in os_file_overwrite on windows
* fixed windows hello_world example
2018-09-18 00:13:17 -04:00
Josh Wolfe
13645585fe link to #1544 2018-09-17 21:25:37 -04:00
Josh Wolfe
8f0618a5b1 Merge pull request #1543 from ziglang/bit_shifting
somewhat realistic usecase test for shifting strange integer sizes
2018-09-17 21:07:01 -04:00
Josh Wolfe
d7492b2c22 somewhat realistic usecase test for shifting strange integer sizes 2018-09-17 20:49:23 -04:00
Andrew Kelley
bfcfaaf5bd fix codegen for @intCast to u0 2018-09-17 20:33:42 -04:00
Andrew Kelley
15e59eb142 remove deprecated, unused windows functions
* `CryptAcquireContextA`
 * `CryptReleaseContext`
 * `CryptGenRandom`

See https://github.com/ziglang/zig/issues/534#issuecomment-422208368
2018-09-17 20:07:48 -04:00
Andrew Kelley
b16229da1d add compile error for @ptrCast 0 bit type to non-0 bit type 2018-09-17 19:41:11 -04:00
Andrew Kelley
78a9a465a3 add compile error for non-optional types compared against null
closes #1539
2018-09-17 18:58:50 -04:00
Andrew Kelley
6c71e9a54d fix crash when bit shifting a u1 2018-09-17 18:44:45 -04:00
Andrew Kelley
cf9200b815 dereferencing a *u0 is comptime-known to be 0 2018-09-17 18:13:38 -04:00
Andrew Kelley
4c6f1e614a remove zig build --init. add zig init-lib and zig init-exe
init-lib creates a working static library with tests, and
init-exe creates a working hello world with a `run` target.

both now have test coverage with the new "cli tests" file.

closes #1035
2018-09-17 17:11:18 -04:00
Andrew Kelley
9c9eefc841 allow extern structs to have stdcallcc function pointers
closes #1536
2018-09-17 11:22:30 -04:00
Andrew Kelley
dd5b2d1b04 fix crash when pointer casting a runtime extern function 2018-09-16 11:23:38 -04:00
Andrew Kelley
a2abdb185f Merge remote-tracking branch 'origin/master' into llvm7 2018-09-16 10:51:58 -04:00
Andrew Kelley
780e567446 add docs for @This() 2018-09-15 10:14:50 -04:00
Andrew Kelley
3f776af3fa fix alignment of structs
closes #1248
closes #1052
closes #1154
2018-09-14 19:08:59 -04:00
Andrew Kelley
639c381128 fix coroutine alignment
zig returned the wrong alignment for coroutine promises
in some cases
2018-09-14 13:55:45 -04:00
Wink Saville
82af31ce36 Fix additional regressions calling FileOutStream/FileInStream init()
This is caused by change 686663239a and not
fixed in 832caefc2a.
2018-09-14 12:07:21 -04:00
Andrew Kelley
5e39328542 docs: more syntax highlighting 2018-09-14 10:35:03 -04:00
Andrew Kelley
3d38feded9 fix tagged union with all void payloads but meaningful tag
closes #1322
2018-09-14 00:38:22 -04:00
Andrew Kelley
1e03cf1739 fix assertion failure on compile-time @intToPtr of function 2018-09-13 19:12:25 -04:00
Andrew Kelley
c06a61e9bf remove this. add @This().
closes #1283
2018-09-13 16:34:33 -04:00
Andrew Kelley
7c3636aaa3 remove the scope parameter of setFloatMode
also document that scopes inherit this value. See #367
See #1283
2018-09-13 15:46:34 -04:00
Andrew Kelley
9ac9633b10 stage1: put test output artifact back in zig-cache folder
close #1508
2018-09-13 14:30:15 -04:00
Andrew Kelley
ac0cda8df8 add compile error for merging non- error sets
closes #1509
2018-09-13 13:48:41 -04:00
Andrew Kelley
22e39e1e5a fix tagged union with only 1 field tripping assertion
closes #1495

now the tag type of an enum with only 1 item is comptime_int.
2018-09-13 13:33:11 -04:00
Andrew Kelley
77fd147b26 appveyor: skip all release tests to save time
appveyor is taking longer than 1 hour to run the tests
2018-09-13 11:26:13 -04:00
Andrew Kelley
d332311e53 Merge branch 'kristate-cache-invalidformat-issue1510' 2018-09-13 11:25:31 -04:00
Andrew Kelley
e3f0ba4984 alternate fix using the rest() function 2018-09-13 11:24:57 -04:00
kristopher tate
6d0a122816 src/cache_hash.cpp: support file paths that contain spaces;
ref: #1510
2018-09-13 23:34:01 +09:00
Marc Tiehuis
e70c543bc4 math/complex: cexp test correction and ldexp usage fix 2018-09-13 20:33:05 +12:00
Andrew Kelley
afe6316d32 appveyor: skip release-safe to save time
appveyor is taking longer than 1 hour to run the tests
2018-09-12 17:36:24 -04:00
Andrew Kelley
0dbbc91bc9 docs: langref is now javascript-free 2018-09-12 17:27:10 -04:00
Andrew Kelley
869475c110 ci: skip release-small tests to save time
we keep hitting the limit on how long CI tests take to run.
2018-09-12 14:50:26 -04:00
Andrew Kelley
a757533386 fix zig fmt on windows
closes #1069
2018-09-12 14:26:21 -04:00
Andrew Kelley
178d69191b windows: std.fs functions support concurrent ops
when reading and writing the same file descriptors
2018-09-12 13:55:35 -04:00
Andrew Kelley
0cfd019377 Merge pull request #1494 from ziglang/stage1-caching
stage1 caching
2018-09-12 12:40:16 -04:00
Andrew Kelley
3a49d115cf fix zig build cache dir path 2018-09-12 11:49:46 -04:00
Andrew Kelley
1caa48c2df windows os.cpp implementations 2018-09-12 11:33:26 -04:00
Andrew Kelley
7bd8a2695b Merge pull request #1506 from emekoi/master
fixed WriteFile segfault
2018-09-12 09:03:14 -04:00
emekoi
54f7d58722 fixed WriteFile segfault 2018-09-12 07:01:48 -05:00
Andrew Kelley
ff0b7fe29a error messages for attempted cache when zig cannot perfectly do it 2018-09-11 22:59:40 -04:00
Andrew Kelley
014cc60a72 rename --enable-timing-info to -ftime-report to match clang
and have it print llvm's internal timing info
2018-09-11 22:46:22 -04:00
Andrew Kelley
ee263a15cc bring back zig-cache
we need somewhere to put .o files and leave them while the user
executes their program, so that stack traces on MacOS can find
the .o files and get at the DWARF info.

if we try to clean up old global tmp dir files, first of all that's
a hard and complicated problem, and secondly it's not clear how
that is better than dumping the .o file inside zig-cache locally.
2018-09-11 22:25:52 -04:00
Andrew Kelley
25466ffb71 Merge remote-tracking branch 'origin/master' into stage1-caching 2018-09-11 20:54:55 -04:00
Andrew Kelley
7e9f25dd18 stage1: clean up timing report in test mode 2018-09-11 20:54:39 -04:00
Andrew Kelley
6b7f3d01ae ci: build zig in release mode
It makes sense to test release mode, plus we're up against the
time limits of CI, so this should make room.
2018-09-11 20:53:28 -04:00
Andrew Kelley
04dc5cdaca zig build: make the cache root dir before building 2018-09-11 18:15:08 -04:00
Andrew Kelley
1a4dcf10fe darwin fixups 2018-09-11 17:42:03 -04:00
Andrew Kelley
a1132ffe0f stage1: build blake code with -std=c99 2018-09-11 17:29:18 -04:00
Andrew Kelley
9227315bf2 zig build: better placement of test exe artifact 2018-09-11 17:23:36 -04:00
Andrew Kelley
15c67d2d50 fix docgen tests 2018-09-11 16:52:50 -04:00
Andrew Kelley
4af844732a Merge remote-tracking branch 'origin/master' into stage1-caching 2018-09-11 15:56:04 -04:00
Andrew Kelley
7dd3c3814d fix incorrect error union const value generation
closes #1442

zig needed to insert explicit padding into this structure before
it got bitcasted.
2018-09-11 15:16:50 -04:00
Andrew Kelley
dd1338b0e6 fix incorrect union const value generation
closes #1381

The union was generated as a 3 byte struct when it needed to be
4 bytes so that the packed struct bitcast could work correctly.

Now it recognizes this situation and adds padding bytes to become
the correct size so that it can fit into an array.
2018-09-11 12:59:39 -04:00
Andrew Kelley
c4f96ea745 disable stage2 tests on all targets
See #1364
2018-09-11 11:52:16 -04:00
Andrew Kelley
67735c6f15 ability to disable cache. off by default except for...
...zig run, zig build, compiler_rt.a, and builtin.a
2018-09-11 00:32:40 -04:00
Andrew Kelley
a6bf37f8ca Merge remote-tracking branch 'origin/master' into llvm7 2018-09-10 22:45:20 -04:00
Andrew Kelley
52f4e934a9 fix llvm assertion and missing compile error 2018-09-10 22:44:27 -04:00
Andrew Kelley
5ee5933ade stage1 caching: zig no longer uses zig-cache 2018-09-10 17:30:45 -04:00
Andrew Kelley
c7f7089392 drop patches on top of clang's C headers
We now match clang 7.0.0rc3 exactly.

See https://reviews.llvm.org/D51265 for more details.
2018-09-10 15:55:39 -04:00
Andrew Kelley
e077d765fe LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2018-09-10 15:54:51 -04:00
Andrew Kelley
54e470f936 update embedded LLD to 7.0.0rc3 2018-09-10 15:54:16 -04:00
Andrew Kelley
32be6e9b2a caching is working
* add almost all the input parameter state to the hash
   - missing items are the detected MSVC installation on Windows
     and detected libc installation on POSIX
   - also missing are C files and .h files that libclang finds
 * artifacts are created in global cache directory instead of
   zig-cache.
   - exception: builtin.zig is still in zig-cache
 * zig run uses the new cache correctly
 * zig run uses execv on posix systems
2018-09-10 13:46:23 -04:00
Andrew Kelley
c9474faa4e Merge remote-tracking branch 'origin/master' into llvm7 2018-09-10 12:30:57 -04:00
Andrew Kelley
fbe5737c84 stage1: always optimize blake and softfloat even in debug mode 2018-09-10 09:46:15 -04:00
Andrew Kelley
c0bdcc7417 zig id command 2018-09-09 23:58:52 -04:00
Andrew Kelley
173fc842c4 basic compiler id hash working 2018-09-09 18:38:41 -04:00
Bas van den Berg
7c9f7b72c5 Add capacity and appendAssumeCapacity to ArrayList 2018-09-09 10:28:07 -04:00
Andrew Kelley
859b0aee1e Merge pull request #1488 from Sahnvour/windows-resolve-path
Fixes a path corruption when compiling on windows.
2018-09-08 16:26:46 -04:00
Sahnvour
d80a5c9a79 Fixes a path corruption when compiling on windows. 2018-09-08 18:43:18 +02:00
Andrew Kelley
2005898689 improve panic message from previous commit 2018-09-07 20:19:55 -04:00
Andrew Kelley
9c169f3cf7 C ABI: support returning large structs on x86_64
also panic instead of emitting bad code for returning small structs

See #1481
2018-09-07 20:09:33 -04:00
Andrew Kelley
9017efee22 C ABI: support medium size structs & unions for x86_64 params
See #1481
2018-09-07 18:51:34 -04:00
Andrew Kelley
85534a26c6 stage1: function to classify x86_64 abi types 2018-09-07 18:51:34 -04:00
Raul Leal
f2186e5fa7 Update langref.html.in 2018-09-07 22:58:31 +01:00
raulgrell
09a1162af5 builtin functions: @byteOffsetOf and @bitOffsetOf 2018-09-07 22:49:19 +01:00
Andrew Kelley
b18af37c57 fix crash when var init has compile error
and then the var is referenced

closes #1483
2018-09-07 15:17:24 -04:00
Andrew Kelley
7505529e44 Merge branch 'c-abi'
closes #1411
closes #1264
2018-09-07 13:56:54 -04:00
Andrew Kelley
743b2e4afc add C ABI test for big unions 2018-09-07 13:51:11 -04:00
Andrew Kelley
421ca1523f stage1: refactor variable inits to use c abi fn walk 2018-09-07 13:24:41 -04:00
Andrew Kelley
c528c00900 stage1: refactor param vars for C ABI 2018-09-07 12:59:59 -04:00
Andrew Kelley
04d7b565f7 stage1: refactor fn type analysis to use C ABI walk fn 2018-09-07 12:23:50 -04:00
Andrew Kelley
be6cccb3a5 stage1: c abi for big struct works 2018-09-07 11:52:57 -04:00
Andrew Kelley
a9a925e500 add C ABI tests 2018-09-06 16:29:35 -04:00
Andrew Kelley
a375bd0d9f stage1: compile error instead of incorrect code
for unimplemented C ABI

See #1411
See #1481
2018-09-06 11:58:58 -04:00
Marc Tiehuis
29923efb95 Merge pull request #1480 from kristate/x25519-pubkey-fix
X25519: Fix createPublicKey signature and add test
2018-09-06 17:23:38 +12:00
Andrew Kelley
b4d5d4d174 assume evenly divided base64 2018-09-05 23:39:14 -04:00
kristopher tate
15d30b967a std/crypto/x25519.zig: add test for createPublicKey; 2018-09-06 12:24:53 +09:00
Andrew Kelley
97c9f61db4 start creating a hash of input parameters
See #1416
2018-09-05 23:24:40 -04:00
kristopher tate
d1855a0e93 std/crypto/x25519.zig: fix signature for createPublicKey; 2018-09-06 12:24:12 +09:00
Andrew Kelley
2d4b95900e stage1: import blake2b implementation
from master branch of blake2 reference implementation
320c325437539ae91091ce62efec1913cd8093c2
2018-09-05 23:23:11 -04:00
Andrew Kelley
6632d85e5f stage1: improve handling of generic fn proto type expr
closes #902
2018-09-05 21:21:59 -04:00
Andrew Kelley
1d8b8ad687 add compile error for using outer scoped runtime variables
from a fn defined inside it. closes #876
2018-09-05 20:32:06 -04:00
Andrew Kelley
8400163e02 stage1: rename more TypeTableEntry types to ZigType 2018-09-05 18:42:56 -04:00
Andrew Kelley
1f5c7ff4d7 stage1: rename VariableTableEntry to ZigVar 2018-09-05 18:35:57 -04:00
Andrew Kelley
3500d32db5 stage1: rename FnTableEntry to ZigFn 2018-09-05 18:34:33 -04:00
Andrew Kelley
db882e5d63 stage1: rename TypeTableEntry to ZigType 2018-09-05 18:33:07 -04:00
Andrew Kelley
a3d384e593 add test case for #726 2018-09-05 18:20:04 -04:00
Andrew Kelley
cc17b662e4 Merge branch 'hcff-floatToIntError' 2018-09-05 18:02:13 -04:00
Andrew Kelley
b517bea734 allow comptime_int to @floatToInt 2018-09-05 18:01:48 -04:00
hfcc
768d1fc539 Added compilation error when a non-float is given to @floatToInt() 2018-09-05 23:31:25 +02:00
Andrew Kelley
ffb3b1576b stage1: fix tagged union with no payloads
closes #1478
2018-09-05 16:19:58 -04:00
Andrew Kelley
c87a576cb5 stage1 compile error instead of crashing for unsupported comptime ptr cast
See #955
2018-09-05 15:53:36 -04:00
Andrew Kelley
ba7836ea48 stage1: fix build on macos 2018-09-05 12:10:53 -04:00
Andrew Kelley
a76a72469b stage1: fix crash when invalid type used in array type
closes #1186
2018-09-05 10:43:35 -04:00
Andrew Kelley
3e94650ef7 stage1: fix emit asm with explicit output file
closes #1473
2018-09-05 10:28:08 -04:00
Andrew Kelley
9a123697e3 fix compile error on gcc 7.3.0
Only set -Werror for debug builds, and only for zig itself, not for
embedded LLD.

See #1474
2018-09-05 10:18:12 -04:00
Andrew Kelley
ac3cf0775f Merge pull request #1474 from ziglang/issue-1357
Downgrade new g++-8.0 error to warning
2018-09-05 09:05:01 -04:00
Marc Tiehuis
ef2b8d4574 Downgrade new g++-8.0 error to warning
Allows building in Debug mode. Closes #1357.
2018-09-05 20:43:14 +12:00
Andrew Kelley
b35c74ea4c stage1: use os_path_resolve instead of os_path_real
to canonicalize imports.

This means that softlinks can represent different files,
but referencing the same absolute path different ways
still references the same import.
2018-09-04 23:17:38 -04:00
Andrew Kelley
2bf1b6840d port std.os.path.resolve to stage1 2018-09-04 22:45:20 -04:00
Andrew Kelley
869167fc6d compile error for @noInlineCall on an inline fn
closes #1133
2018-09-04 17:38:48 -04:00
Andrew Kelley
cbb3f1d76c ir: consistent error checking for br and cond_br instructions 2018-09-04 16:58:19 -04:00
Andrew Kelley
b00007056d update throughput test to new File API
closes #1468
2018-09-04 15:33:44 -04:00
Andrew Kelley
68db9d5074 add compile error for comptime control flow inside runtime block
closes #834
2018-09-04 15:28:35 -04:00
Andrew Kelley
f27d82fe90 Merge remote-tracking branch 'origin/master' into llvm7 2018-09-04 12:51:50 -04:00
Andrew Kelley
36828a2e6a fix incorrect variable ref count
regression introduced by e82cd53df4
2018-09-04 12:50:02 -04:00
Andrew Kelley
ff4591f0e6 fix llvm assertion when adding callsite sret attr 2018-09-04 12:15:15 -04:00
Andrew Kelley
dbde8254d0 Merge remote-tracking branch 'origin/master' into llvm7 2018-09-04 11:58:31 -04:00
Andrew Kelley
2bd2a8ea34 Merge pull request #1441 from ziglang/poly1305-x25519
Add poly1305 and x25519 crypto primitives
2018-09-04 10:34:46 -04:00
Marc Tiehuis
8b50d10a84 std/crypto: Clean up poly1305/x25519 2018-09-04 20:16:12 +12:00
Andrew Kelley
bc88ef2dc3 compile errors for unimplemented minValue/maxValue builtins 2018-09-03 22:47:23 -04:00
Andrew Kelley
e82cd53df4 fix incorrect value for inline loop
09cc1dc660 failed to handle mem_slot_index correctly

closes #1436
2018-09-03 21:24:20 -04:00
Andrew Kelley
a11e73bee2 compile error instead of segfault for unimplemented feature
closes #1103
2018-09-03 12:38:24 -04:00
Andrew Kelley
3f273479f8 clarify const variables in docs
closes #1200
2018-09-03 12:18:12 -04:00
Andrew Kelley
2a9329c998 better anonymous struct naming
this makes anonymous structs inherit the name of the function they are in
only when they are the return expression.

also document the behavior and provide examples.

closes #1243
2018-09-03 11:32:39 -04:00
Andrew Kelley
95636c7e5f ability to @ptrCast to *void
fixes #960
2018-09-03 00:04:12 -04:00
Andrew Kelley
92f7474359 switch most windows calls to use W versions instead of A
See #534
2018-09-02 23:25:04 -04:00
Andrew Kelley
d5968086fe use the sret attribute at the callsite when appropriate
Thanks to Shawn Landden for the original pull request.

closes #1450
2018-09-02 21:08:59 -04:00
Andrew Kelley
8558caecb5 Merge branch 'kristate-std-fmt-hexToBytes' 2018-09-02 19:25:07 -04:00
Andrew Kelley
3eb89ee4db fixups
* zig fmt
 * use canonical parameter order. memcpy has dest first and
   the base64 code follows the pattern.
 * pass correct radix to charToDigit
2018-09-02 19:23:30 -04:00
Andrew Kelley
0d8412d9f0 Merge branch 'std-fmt-hexToBytes' of https://github.com/kristate/zig into kristate-std-fmt-hexToBytes 2018-09-02 19:08:54 -04:00
Andrew Kelley
ab387bb4c7 Merge pull request #1460 from ziglang/Sahnvour-windows-coff-issue721
Stack traces for Windows
2018-09-02 18:47:48 -04:00
Andrew Kelley
832caefc2a fix regressions 2018-09-02 18:35:32 -04:00
Andrew Kelley
4cd50865bf fix source file lookup 2018-09-02 17:58:50 -04:00
Andrew Kelley
98dc943c07 rework code to avoid duplicate operations 2018-09-02 15:58:08 -04:00
kristopher tate
fbd9bac5e7 std/fmt/index.zig: add hexToBytes function under std.fmt;
Depends on #1454 being implemented;
2018-09-03 01:12:52 +09:00
Andrew Kelley
86e55567b4 Merge pull request #1454 from kristate/str-hexbytes-issue1453
std.fmt: print zeroed high-order bytes correctly in hex
2018-09-02 11:31:05 -04:00
Andrew Kelley
78a110cda5 Merge pull request #1452 from shawnl/patch-1
std/rb.zig: fix comment
2018-09-02 11:17:54 -04:00
Andrew Kelley
67cb299f7d Merge pull request #1456 from shawnl/inaccurate-comments
these all use futex() (inaccurate comments)
2018-09-02 11:14:17 -04:00
Shawn Landden
528e3b43a6 these all use futex() (inaccurate comments) 2018-09-01 23:52:52 -07:00
kristopher tate
d1752fbdc0 std/fmt/index.zig: test for printing double width hex bytes with zeros;
Co-Authored-By: Shawn Landden <shawn@git.icu>
2018-09-02 15:04:57 +09:00
kristopher tate
48d3fbef5c std/fmt/index.zig: set width from 0 to 2;
\x00 was printed as 0 and \x0E was printed as E;
\x00 now correctly prints 00 and \x0E correctly prints 0E;
2018-09-02 15:04:20 +09:00
Shawn Landden
4bf54f3010 std/rb.zig: fix comment 2018-09-01 22:23:34 -07:00
Andrew Kelley
1a5c3e4501 Merge pull request #1451 from kristate/fmt-hexbytes-issue1358
allow bytes to be printed-out as hex (#1358)
2018-09-01 10:44:54 -04:00
Andrew Kelley
19004cd5db Merge pull request #1444 from kristate/winsdk-vercheck-issue1438
correct version comparison for detecting msvc (fixes #1438)
2018-09-01 10:39:19 -04:00
kristopher tate
7a633f472d std/fmt/index.zig: #1358: test bytes printed-out as hex; 2018-09-01 19:53:11 +09:00
kristopher tate
454b2362ee std/fmt/index.zig: #1358 allow bytes to be printed-out as hex;
Supports {x} for lowercase and {X} for uppercase;
2018-09-01 19:40:05 +09:00
kristopher tate
9e6f53dd58 i#1438: src/windows_sdk.cpp: fix version guard in find_81_version; 2018-09-01 12:02:21 +09:00
kristopher tate
d4474e195e i#1438: src/windows_sdk.cpp: fix version guard in find_10_version; 2018-09-01 12:02:04 +09:00
Andrew Kelley
6ddbd345aa figuring out where /names stream is 2018-08-31 19:50:03 -04:00
Jimmi Holst Christensen
e036f65ac0 Translate-c: Check for error before working on while loop body (#1445) 2018-08-31 23:17:17 +02:00
Andrew Kelley
b36b93fb3e awareness of debug subsections 2018-08-31 15:02:41 -04:00
Marc Tiehuis
763845f95c std/crypto: zig fmt 2018-08-31 18:45:45 +12:00
Marc Tiehuis
38399941d4 std/crypto: Update throughput_test.zig to include all hash functions
This avoids the need to recompile to test specific hash functions. This
also adds mac/key exchange performance tests as well.
2018-08-31 18:45:07 +12:00
Marc Tiehuis
a7527389cc Make poly1305 and x25519 more idiomatic zig
This also adjusts the current hash/hmac functions to have a consistent
interface allowing easier switching/testing.
2018-08-31 18:40:09 +12:00
Andrew Kelley
99170aa13d finding source file, line, and column info 2018-08-31 01:01:37 -04:00
Andrew Kelley
72185e7dd3 finding the function that an address is in 2018-08-30 16:57:55 -04:00
Andrew Kelley
44f908d2e6 figuring out which module an address belongs in 2018-08-30 15:33:50 -04:00
Andrew Kelley
96117e20cc reading the module information substream 2018-08-30 03:44:34 -04:00
Marc Tiehuis
65b89f598c Add poly1305 and x25519 crypto primitives
These are translated from [monocypher](https://monocypher.org/) which
has fairly competitive performance while remaining quite simple.

Initial performance comparision:

Zig:
 Poly1305: 1423 MiB/s
 X25519:   8671 exchanges per second

Monocypher:
 Poly1305: 1567 MiB/s
 X25519:   10539 exchanges per second

There is room for improvement and no real effort has been made at all in
optimization beyond a direct translation.
2018-08-30 18:02:19 +12:00
Andrew Kelley
686663239a printing info from the ModuleInfo substream of DebugInfo 2018-08-29 19:00:24 -04:00
Andrew Kelley
f1b71053de use RtlCaptureStackBackTrace on windows 2018-08-29 16:35:51 -04:00
Andrew Kelley
833477abf5 fix unresolved path preventing PDB loading 2018-08-28 18:55:51 -04:00
Andrew Kelley
41723f842c Merge branch 'windows-coff-issue721' of https://github.com/Sahnvour/zig into Sahnvour-windows-coff-issue721 2018-08-28 17:32:32 -04:00
Andrew Kelley
9de0f900e1 Merge pull request #1369 from shawnl/crypto
std/crypto: add chacha20
2018-08-28 16:07:58 -04:00
Andrew Kelley
b65cca37ce add test coverage for invalid switch expression parameter
closes #604
2018-08-28 15:48:39 -04:00
Andrew Kelley
901b5c1566 add compile error for function prototype with no body
closes #1231
2018-08-28 15:39:32 -04:00
Andrew Kelley
09cc1dc660 fix crash when var in inline loop has different types
closes #917
closes #845
closes #741
closes #740
2018-08-28 15:24:28 -04:00
Marc Tiehuis
87eb95f816 speed up chacha20
The main changes are:

    Unrolling the inner rounds of salsa20_wordtobyte which doubles the speed.
    Passing the slice explicitly instead of returning the array saves a copy (can optimize out in future with copy elision) and gives ~10% improvement.
    Inlining the outer loop gives ~15-20% improvement but it costs an extra 4Kb of code space. I think the tradeoff is worthwhile here.
    The other inline loops are small and can be done by the compiler if it is worthwhile.
    The rotate function replacement doesn't alter the performance from the former.

The modified throughput test I've used to benchmark is as follows. Interestingly we need to allocate memory instead of using a fixed buffer else Zig optimizes the whole thing out.

https://github.com/ziglang/zig/pull/1369#issuecomment-416456628
2018-08-27 22:55:53 -07:00
Shawn Landden
444edd9aed std.crypto: add chaCha20
v3
2018-08-27 19:44:11 -07:00
Andrew Kelley
048f506aa6 langref: document labeled blocks, labeled for, labeled while
closes #1327
2018-08-27 20:59:28 -04:00
Andrew Kelley
fb6d3859e8 zig fmt 2018-08-27 19:25:40 -04:00
Andrew Kelley
4f2d49fd13 std.zig.parse: fix parsing of doc comments after fields
closes #1404
2018-08-27 19:21:38 -04:00
Andrew Kelley
b92fac329e Merge branch 'raulgrell-CastToCVoid' 2018-08-27 18:31:41 -04:00
Andrew Kelley
45d9d9f953 minor fixups 2018-08-27 18:31:28 -04:00
raulgrell
e2a9f2ef98 Allow implicit cast from *T and [*]T to ?*c_void 2018-08-27 23:13:57 +01:00
Andrew Kelley
c48be3a742 langref: document exporting a library
closes #1431
2018-08-27 17:44:58 -04:00
tgschultz
ecc5464024 Handle unions differently in std.fmt (#1432)
* Handle unions differently in std.fmt

Print the active tag's value in tagged unions. Untagged unions considered unsafe to print and treated like a pointer or an array.
2018-08-27 17:25:33 -04:00
Andrew Kelley
009e90f446 fix @typeInfo unable to distinguish compile error vs no-payload
closes #1421
closes #1426
2018-08-27 17:13:34 -04:00
Andrew Kelley
2f2215c9f4 this was intended to be included in the previous commit 2018-08-27 16:26:36 -04:00
Andrew Kelley
526d8425ab fix false negative determining if function is generic
This solves the smaller test case of #1421 but the
other test case is still an assertion failure.
2018-08-27 16:14:48 -04:00
Andrew Kelley
68e2794e15 ir: const_ptr_pointee asserts that its return value is non-null 2018-08-26 13:13:26 -04:00
Andrew Kelley
6a3fad1d59 Revert "src/ir.cpp: check return value of const_ptr_pointee to protect against dereferencing null pointers;"
This reverts commit 0839ed1f94.

I realized too late there is a better fix. See PR #1419
2018-08-26 13:04:58 -04:00
kristopher tate
0839ed1f94 src/ir.cpp: check return value of const_ptr_pointee to protect against dereferencing null pointers; 2018-08-26 13:02:09 -04:00
Andrew Kelley
f7f11e237c Merge remote-tracking branch 'origin/master' into llvm7 2018-08-26 02:39:26 -04:00
Andrew Kelley
8047f0eae2 fix llvm assertion failure when building std lib tests for macos
closes #1417
2018-08-26 02:36:18 -04:00
Andrew Kelley
cce14f92fc update clang headers to 7.0.0rc2 2018-08-25 22:14:41 -04:00
Andrew Kelley
20810e0a79 LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2018-08-25 22:11:32 -04:00
Andrew Kelley
95dbba046d update LLD fork to 7.0.0rc2 2018-08-25 22:10:32 -04:00
Andrew Kelley
7109035b78 Merge remote-tracking branch 'origin/master' into llvm7 2018-08-25 21:57:28 -04:00
Andrew Kelley
526338b00f document fixed-width integer types
closes #1280
2018-08-25 17:39:43 -04:00
Andrew Kelley
56a53021a1 Merge branch 'tgschultz-patch-3' 2018-08-25 17:28:40 -04:00
Andrew Kelley
2cce171448 add test for previous commit 2018-08-25 17:28:30 -04:00
tgschultz
61c0c6d502 Fixed compile error when passing enum to fmt
Caused by struct printing behavior. Enums are different enough from structs and unions that the field iteration behavior doesn't do what we want even if @memberName didn't error on enums.
2018-08-25 10:51:49 -05:00
Andrew Kelley
4003cd4747 Merge pull request #1406 from ziglang/macos-stack-traces
MacOS stack traces
closes #1365
2018-08-25 04:50:51 -04:00
Andrew Kelley
815950996d Merge remote-tracking branch 'origin/master' into macos-stack-traces 2018-08-25 04:48:58 -04:00
Andrew Kelley
8aacfc8465 add workaround on macos for shared libraries 2018-08-25 04:37:55 -04:00
Andrew Kelley
02f5a9fa62 fix handling multiple extern vars with the same name 2018-08-25 03:55:59 -04:00
Andrew Kelley
b95ff12f2f fix regressions 2018-08-25 03:40:47 -04:00
Andrew Kelley
ac36f98e72 fix stack traces on linux 2018-08-25 03:07:37 -04:00
Andrew Kelley
32901926f0 compilation unit cwd dir appears to be unnecessary on macos 2018-08-24 15:43:48 -04:00
Andrew Kelley
4e7c255e4d macos stack traces have address-to-line translation 2018-08-24 14:55:55 -04:00
Andrew Kelley
bf1f91595d std.debug: remove workaround for fixed bug 2018-08-24 13:00:28 -04:00
Andrew Kelley
6b31b178a6 fix regression from 2f7f7d815d 2018-08-24 12:59:31 -04:00
Andrew Kelley
95e197667e macos stack traces have the compilation unit in them 2018-08-24 11:30:36 -04:00
Andrew Kelley
0a918aaa14 Merge pull request #1407 from ziglang/builtin-alignment-fix
Fix builtin alignment type
2018-08-24 10:42:43 -04:00
Marc Tiehuis
05f9b14fc2 Fix builtin alignment type
Closes #1235.
2018-08-24 17:57:17 +12:00
Andrew Kelley
3a02ba9b82 fix error message for incorrect panic handler fn signature
closes #1353
2018-08-23 23:22:48 -04:00
Andrew Kelley
3173c90f14 macos stack traces: read debug info sections from .o files 2018-08-23 23:08:34 -04:00
Andrew Kelley
6c064cfd88 Merge pull request #1405 from shawnl/path-max
missing PATH_MAX change
2018-08-23 20:16:13 -04:00
Shawn Landden
2f7f7d815d missing PATH_MAX change 2018-08-23 17:00:50 -07:00
Andrew Kelley
5c1ec20c9a MacOS stack traces use the already mmapped executable
...rather than trying to find the executable on the file system.

Also use a more robust PIE offset calculation based on the
available metadata.

And for the last function, use the data that tells the end
rather than assuming 4K.

Also they print in a consistent way with Linux stack traces.
2018-08-23 16:23:33 -04:00
Andrew Kelley
327482c3a4 Merge pull request #1402 from ziglang/default-fp-ieee-strict
Default to strict IEEE floating point
2018-08-23 08:26:09 -04:00
Andrew Kelley
68dcdf1c86 Merge pull request #1401 from kristate/mem-testWriteIntImpl-u64
std/mem.zig: test writing u64 integers;
2018-08-23 08:25:26 -04:00
Marc Tiehuis
353419f82d Default to strict IEEE floating point
Closes #1227.
2018-08-23 22:54:46 +12:00
Andrew Kelley
8f96553be8 rename std.debug.ElfStackTrace to std.debug.DebugInfo 2018-08-22 21:35:49 -04:00
kristopher tate
e95345b3dc std/mem.zig: test writing u64 integers; 2018-08-23 09:03:02 +09:00
Andrew Kelley
4b68ef45af fix incorrectly generating an unused const fn global
closes #1277
2018-08-22 14:31:30 -04:00
Andrew Kelley
5aeb3217ee fixup for previous commit 2018-08-22 14:24:48 -04:00
Raul Leal
87b10400c2 allow implicit cast from *[N]T to ?[*]T (#1398)
* allow implicit cast from *[N]T to ?[*]T
2018-08-22 13:12:08 -04:00
Andrew Kelley
3d780cf2ef Merge branch 'shawnl-path_max'
This does a proof of concept of changing most file system APIs to not
require an allocator and remove the possibility of failure via
OutOfMemory.

This also does most of the work of #534.
2018-08-21 21:02:01 -04:00
Andrew Kelley
3dd1026c8b fix docs on windows 2018-08-21 21:01:37 -04:00
Andrew Kelley
02ba4b1678 Merge branch 'master' into shawnl-path_max 2018-08-21 20:56:28 -04:00
Andrew Kelley
478db39866 fix selfExePath on macosx 2018-08-21 20:52:21 -04:00
Andrew Kelley
b2917e6be0 Revert "Merge branch 'mtn-translate-c-enum-vals'"
This reverts commit 937b822fa9, reversing
changes made to dd4b13ac03.

Tests failing on Windows.

Re-opens #1360
2018-08-21 20:50:03 -04:00
Andrew Kelley
ea1b21dbdb fix linux
* error.BadFd is not a valid error code. it would always be a bug to
   get this error code.
 * merge error.Io with existing error.InputOutput
 * merge error.PathNotFound with existing error.FileNotFound.
   Not all OS's support both.
 * add os.File.openReadC
 * add error.BadPathName for windows file operations with invalid
   characters
 * add os.toPosixPath to help stack allocate a null terminating byte
 * add some TODOs for other functions to investigate removing the
   allocator requirement
 * optimize some implementations to use the alternate functions when
   a null byte is already available
 * add a missing error.SkipZigTest
 * os.selfExePath uses a non-allocating API
 * os.selfExeDirPath uses a non-allocating API
 * os.path.real uses a non-allocating API
 * add os.path.realAlloc and os.path.realC
 * convert many windows syscalls to use the W versions (See #534)
2018-08-21 20:31:50 -04:00
Andrew Kelley
51852d2587 fix windows 2018-08-21 16:07:28 -04:00
Andrew Kelley
bda5539e9d *WIP* std.os assumes comptime-known max path size
this allows us to remove the requirement of allocators for a lot
of functions

See #1392
2018-08-21 00:46:42 -04:00
Andrew Kelley
937b822fa9 Merge branch 'mtn-translate-c-enum-vals'
This reintroduces b8ce8f219c.
(reverting dd4b13ac03)

Now with correct author information. Apologies to kristopher tate
and Michael Noronha.
2018-08-20 22:46:50 -04:00
kristopher tate
b023db2e82 src/translate_c.cpp: correctly bridge llvm::APSInt with Zig BigInt;
ACHTUNG: llvm::APSInt stores an int's sign inside of its getRawData; Internally to Zig we store an integer's sign outside of getRawData! (~aps_int) calls .flip() internally on the raw data to match Zig.

test/translate_c.zig: enum: add wider range of values (u64) to try;
2018-08-20 22:46:11 -04:00
Michael Noronha
7e7e59d881 translate-c: Correctly translate enum init values, addressing #1360 2018-08-20 22:45:19 -04:00
Andrew Kelley
dd4b13ac03 Revert "translate-c: Correctly translate enum init values, addressing #1360 (#1377)"
This reverts commit b8ce8f219c.

Squashing the commits from the pull request resulted in kristopher tate
from being omitted from the authors. A future commit will merge
the code correctly.
2018-08-20 22:39:39 -04:00
Andrew Kelley
302936309a Merge branch 'path_max' of https://github.com/shawnl/zig into shawnl-path_max 2018-08-20 17:57:49 -04:00
Andrew Kelley
9e9dce76ff refactor std.os.makePath to use a switch instead of if 2018-08-20 17:57:03 -04:00
Andrew Kelley
820bf054ea std.fmt.format: handle non-pointer struct/union/enum
Also adds support for printing structs via reflection.
The case when structs have pointers to themselves is not
handled yet.

closes #1380
2018-08-20 16:04:03 -04:00
Michael Noronha
b8ce8f219c translate-c: Correctly translate enum init values, addressing #1360 (#1377)
* translate-c: Correctly translate enum init values

* translate-c: Test enum initialization

* translate-c: Flip to positive using APSInt builtins

* src/translate_c.cpp: correctly bridge llvm::APSInt with Zig BigInt;

ACHTUNG: llvm::APSInt stores an int's sign inside of its getRawData; Internally to Zig we store an integer's sign outside of getRawData! (~aps_int) calls .flip() internally on the raw data to match Zig.

* test/translate_c.zig: enum: add wider range of values (u64) to try;

closes #1360
2018-08-20 14:29:26 -04:00
Andrew Kelley
3ee1b60edf langref: add docs for peer type resolution
closes #1367
2018-08-20 14:22:16 -04:00
Shawn Landden
bb93886791 do not use an allocator when we don't need to because of the existance of PATH_MAX 2018-08-19 21:42:48 -07:00
Marc Tiehuis
53b18b0791 Add secureZero function
This is identical to `mem.set(u8, slice, 0)` except that it will never
be optimized out by the compiler. Intended usage is for clearing
secret data.

The resulting assembly has been manually verified in --release-* modes.

It would be valuable to test the 'never be optimized out' claim in tests
but this is harder than initially expected due to how much Zig appears
to know locally. May be doable with @intToPtr, @ptrToInt to get around
known data dependencies but I could not work it out right now.
2018-08-18 12:15:39 +12:00
Marc Tiehuis
1da93caced docs: correct @memcpy, @memset function signatures 2018-08-18 12:06:25 +12:00
Andrew Kelley
4c95b2f9d1 Merge pull request #1379 from tgschultz/patch-1
fixed handling of [*]u8 when no format specifier is set
2018-08-14 14:38:20 -04:00
tgschultz
fa955f0024 fixed handling of [*]u8 when no format specifier is set
If fmt was called on with a [*]u8 or [*]const u8 argument, but the fmt string did not specify 's' to treat it as a string, it produced a compile error due to accessing index 1 of a 0 length slice.
2018-08-14 12:56:41 -05:00
Andrew Kelley
52471f6221 Merge pull request #1378 from prazzb/cmake-fix
Find local llvm-config first
2018-08-14 12:53:54 -04:00
prazzb
6e55f61581 Find local llvm-config first
Distro's llvm usually have a 6.0 suffix.Any custom llvm build
names the binary as llvm-config.Keeping 6.0 variant first causes
the distro's llvm to be compiled in place of a custom one even if
given using CMAKE_PREFIX_PATH.
2018-08-13 22:55:19 +05:30
Andrew Kelley
65497121f4 Merge pull request #1370 from shawnl/master
rb: some style fixes
2018-08-12 12:58:11 -04:00
Shawn Landden
64a71be5c3 rb: some style fixes
avoid @import("std") as is the custom

compare function name
2018-08-10 21:46:30 -07:00
Andrew Kelley
c4b9466da7 Merge pull request #1294 from ziglang/async-fs
introduce std.event.fs for async file system functions
2018-08-10 15:51:17 -04:00
Andrew Kelley
598e80957e windows: call CancelIo when canceling an fs watch 2018-08-10 13:19:07 -04:00
Andrew Kelley
0df485d4dc self-hosted: reorganize creation and destruction of Compilation 2018-08-10 12:28:20 -04:00
Andrew Kelley
d40f3fac74 docgen: fix usage of std.HashMap 2018-08-10 00:03:16 -04:00
Andrew Kelley
23af36c54f windows fs watching: fix not initializing table value 2018-08-09 21:48:25 -04:00
Andrew Kelley
26a842c264 windows: only create io completion port once 2018-08-09 20:12:46 -04:00
Andrew Kelley
b219feb3f1 initial windows implementation of std.event.fs.Watch 2018-08-09 16:48:44 -04:00
Andrew Kelley
c63ec9886a std.event.fs.preadv windows implementation 2018-08-08 16:55:19 -04:00
Andrew Kelley
8b456927be std.event.fs.pwritev windows implementation
also fix 2 bugs where the function didn't call allocator.shrink:
 * std.mem.join
 * std.os.path.resolve
2018-08-08 15:06:32 -04:00
Wink Saville
d927f347de Fix ir_analyze_instruction_atomic_rmw (#1351)
There were two tests of type_is_valid(casted_ptr->value.type) change the
second one to type_is_valie(casted_operand->value.type).
2018-08-07 23:18:26 -04:00
Andrew Kelley
ac12f0df71 fix linux regressions 2018-08-07 22:23:26 -04:00
Andrew Kelley
60955feab8 std.event.fs.Watch distinguishes between Delete and CloseWrite on darwin
TODO: after 1 event emitted for a deleted file, the file is no longer
watched
2018-08-07 22:14:30 -04:00
Andrew Kelley
5cbfe392be implement std.event.fs.Watch for macos 2018-08-07 21:06:21 -04:00
Shawn Landden
a583beb76c mem: use pub on Compare (#1352)
fixes rb

/home/shawn/git/zig/std/rb.zig:133:37: error: 'Compare' is private
    compare_fn: fn(*Node, *Node) mem.Compare,
2018-08-07 19:15:11 -04:00
Andrew Kelley
034363a86c Merge pull request #1338 from shawnl/master
std: add red-black tree implementation
2018-08-07 12:47:28 -04:00
Shawn Landden
bbbb26f4d3 mem: add mem.compare(), and use it for mem.lessThan() 2018-08-07 05:30:54 -07:00
Shawn Landden
86b512c5cd mem: move enum Compare from rb to mem 2018-08-07 04:57:41 -07:00
Andrew Kelley
dcf3869acd Merge pull request #1346 from shawnl/doc
doc: @addWithOverflow also returns if overflow occured
2018-08-07 01:28:55 -04:00
Shawn Landden
5d2abf4402 std: add red-black tree implementation
This is to be used with @fieldParentPtr();

Example:

const rb = @import("std").rb;

const Number = struct {
    node: rb.Node,
    value: i32,
};

fn number(node: *rb.Node) *Number {
    @fieldParentPtr(Number, "node", node);
}

fn compare(l: *rb.Node, r: *rb.Node) rb.Compare {
    var left = number(l);
    var right = number(r);

    if (left.value < right.value) {
        return rb.Compare.LessThan;
    } else if (left.value == right.value) {
        return rb.Compare.Equal;
    } else if (left.value > right.value) {
        return rb.Compare.GreaterThan;
    }
    unreachable;
}
--

A version that caches rb.Tree.first() could be added in the future.
2018-08-06 22:18:44 -07:00
Shawn Landden
cb0ef3ad4c doc: @addWithOverflow also returns if overflow occured 2018-08-06 22:12:14 -07:00
Andrew Kelley
1a28f09684 fix hash map test 2018-08-07 00:54:19 -04:00
Andrew Kelley
fd50a6896b std.event.fs support for macos
The file I/O stuff is working, but the fs watching
stuff is not yet.
2018-08-07 00:49:09 -04:00
Andrew Kelley
2c9ed664dd merge @kristate's std lib changes to darwin 2018-08-06 19:36:31 -04:00
Andrew Kelley
97be8debab std.HashMap.autoHash: use xor instead of wrapping mult 2018-08-06 19:09:22 -04:00
Andrew Kelley
c02ed80512 Merge branch 'mdsteele-threadid' 2018-08-06 17:32:55 -04:00
Andrew Kelley
24d74cbf44 fix Thread impl on Linux and add docs 2018-08-06 17:31:52 -04:00
Andrew Kelley
d2dd29e80c separate os.Thread.Id and os.Thread.Handle because of windows 2018-08-06 17:25:24 -04:00
Andrew Kelley
0a3ae9dc6e fix std.os.Thread.getCurrentId for linux 2018-08-06 16:48:49 -04:00
Andrew Kelley
647fd0f4f1 Merge branch 'threadid' of https://github.com/mdsteele/zig into mdsteele-threadid 2018-08-06 16:12:37 -04:00
Andrea Orru
72bac72338 Merge pull request #1339 from ziglang/zen_stdlib
Updates and fixes for the Zen stdlib
2018-08-06 03:05:22 -04:00
Andrea Orru
79d77faebf More type cast fixes 2018-08-06 02:42:12 -04:00
Andrea Orru
641066d82e Fix casts 2018-08-06 02:29:11 -04:00
Andrea Orru
d2f5e57b68 Merge branch 'master' into zen_stdlib 2018-08-06 01:43:19 -04:00
Andrew Kelley
63a23e848a translate-c: fix for loops with var init and empty body 2018-08-05 18:40:14 -04:00
Andrew Kelley
387fab60a6 translate-c: fix do while with empty body 2018-08-05 18:32:38 -04:00
Andrew Kelley
c420b234cc translate-c: handle for loop with empty body 2018-08-05 18:18:24 -04:00
Andrew Kelley
aa232089f2 translate-c: fix while loop with no body 2018-08-05 18:06:39 -04:00
Andrew Kelley
6cf248ec08 update c_headers/* to LLVM 7.0.0rc1 2018-08-05 02:20:05 -04:00
Matthew D. Steele
7a2401ef1e Don't compare ?Thread.Id == Thread.Id in the test
It doesn't work, because of issue #1332.
2018-08-04 21:47:13 -04:00
Andrew Kelley
ee68f28bba LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2018-08-04 18:19:26 -04:00
Andrew Kelley
c4d31c8323 build: update embedded LLD build files 2018-08-04 18:09:19 -04:00
Andrew Kelley
cfb29f18e4 update embedded LLD to 7.0.0rc1 2018-08-04 17:47:16 -04:00
Andrew Kelley
b48948d6e8 Merge branch 'master' into llvm7 2018-08-04 15:19:03 -04:00
kristopher tate
a25824e033 zig/std/os/index.zig: clean-up thread id; (#1)
Ref #1316 #1330
2018-08-04 14:38:51 -04:00
Matthew D. Steele
86d1cc8e2f Add thread ID support to std.os.Thread (fixes #1316) 2018-08-03 21:36:04 -04:00
Andrew Kelley
2680f9ab48 Merge remote-tracking branch 'origin/master' into async-fs 2018-08-03 18:47:30 -04:00
Andrew Kelley
c5f1925bc8 when decls don't change, don't regenerate them 2018-08-03 17:59:11 -04:00
Andrew Kelley
5dfcd09e49 self-hosted: watch files and trigger a rebuild 2018-08-03 17:22:17 -04:00
Andrew Kelley
9bd8b01650 fix tagged union initialization with a runtime void
closes #1328
2018-08-03 15:21:08 -04:00
Andrew Kelley
c66c6304f9 add a friendly note in .gitignore 2018-08-03 15:20:19 -04:00
Matthew D. Steele
dcaaa241df Fix a type error in std.os.linux.getpid() (#1326)
syscall0() returns usize, but we were trying to @bitCast to i32.
2018-08-03 11:45:23 -04:00
Matthew D. Steele
c2a08d7c51 Fix the start-less-than-end assertion in std.rand.Random.range (#1325)
The function returns a value in [start, end), but was asserting
start <= end instead of start < end.  With this fix, range(1, 1)
will now assertion error instead of dividing by zero.
2018-08-03 11:44:39 -04:00
kristopher tate
298abbcff8 better support for _ identifier
* disallow variable declaration of `_`
 * prevent `_` from shadowing itself
 * prevent read access of `_`

closes #1204
closes #1320
2018-08-03 02:57:17 -04:00
Andrew Kelley
7f6e97cb26 fixups from the merge 2018-08-02 17:36:08 -04:00
Andrew Kelley
65140b2fba Merge remote-tracking branch 'origin/master' into async-fs 2018-08-02 17:29:31 -04:00
Andrew Kelley
951124e177 evented I/O zig fmt 2018-08-02 17:24:15 -04:00
Andrew Kelley
821805aa92 WIP: Channel.getOrNull 2018-08-02 17:04:17 -04:00
Andrew Kelley
fb05b96492 Merge branch 'kristate-handle-builtin-issue1296' 2018-08-02 14:16:46 -04:00
Andrew Kelley
895f262a55 pull request fixups
* clean up parser code
 * fix stage2 parse and render code
 * remove redundant test
 * make stage1 compile tests leaner
2018-08-02 14:15:31 -04:00
Andrew Kelley
44fd3045ce Merge branch 'handle-builtin-issue1296' of https://github.com/kristate/zig into kristate-handle-builtin-issue1296 2018-08-02 13:37:24 -04:00
Andrew Kelley
9ecbabfc4c Merge branch 'pr-1319' 2018-08-02 13:35:06 -04:00
Andrew Kelley
729f2aceb0 fix API of RtlGenRandom 2018-08-02 13:34:31 -04:00
Andrew Kelley
cbca434cf0 Merge branch 'windows-RtlGenRandom-issue1318' of https://github.com/kristate/zig into pr-1319 2018-08-02 13:26:02 -04:00
kristopher tate
782043e2e6 std/os/windows/util.zig: SKIP instead of PASS on non-windows systems;
Tracking Issue #1318 ;
2018-08-03 02:16:49 +09:00
kristopher tate
dde7eb45c5 std/os/index.zig: call getRandomBytes() twice and compare;
Tracking Issue #1318 ;
2018-08-03 02:16:19 +09:00
kristopher tate
c44653f40f std/os/index.zig: swap CryptGetRandom() with RtlGenRandom();
Tracking Issue #1318 ;
2018-08-03 02:14:52 +09:00
kristopher tate
22fd359e2c std/os/windows/advapi32.zig: add SystemFunction036;
Tracking Issue #1318 ;
2018-08-03 02:14:06 +09:00
kristopher tate
432b7685bf std/os/index.zig: use "hw.logicalcpu" instead of "hw.ncpu" in macOS; (#1317)
Tracking Issue #1252 ;

hw.ncpu was deprecated in macOS. Among 4 new options available (hw.{physicalcpu, physicalcpu_max, logicalcpu, logicalcpu_max}), hw.logicalcpu was chosen because it actually reflects the number of logical cores the OS sees.
2018-08-02 12:59:59 -04:00
kristopher tate
96a94e7da9 std/event: directly return @handle();
Tracking Issue #1296 ;
2018-08-02 17:52:40 +09:00
kristopher tate
ac0a87d58d doc/langref.html.in: add builtin @handle() to docs;
Tracking Issue #1296 ;
2018-08-02 17:47:39 +09:00
kristopher tate
9b890d7067 test/cases/cancel.zig: update suspend to use @handle();
Tracking Issue #1296 ;
2018-08-02 17:47:03 +09:00
kristopher tate
9bed23f8b7 test/cases/coroutines.zig: update suspend to use @handle();
Tracking Issue #1296 ;
2018-08-02 17:46:41 +09:00
kristopher tate
915e321a23 doc/langref.html.in: update suspend example with @handle();
Tracking Issue #1296 ;
2018-08-02 17:45:35 +09:00
kristopher tate
5de92425d5 src/parser.cpp: fix typo from rebase; 2018-08-02 17:11:37 +09:00
kristopher tate
ff4a03f351 doc/langref.html.in: update docs to reflect that the promise symbol is no in scope with suspend;
Tracking Issue #1296 ;
2018-08-02 17:03:39 +09:00
kristopher tate
51955a5ca2 test/compile_errors.zig: update test to reflect that the promise symbol is no in scope with suspend;
Tracking Issue #1296 ;
2018-08-02 17:02:14 +09:00
kristopher tate
9b3cebcdb9 test/cases/coroutines.zig: test for immediate resume inside of suspend with @handle();
Tracking Issue #1296 ;
2018-08-02 17:02:14 +09:00
kristopher tate
3241ada468 test/cases/coroutines.zig: update test to reflect that the promise symbol is no in scope with suspend;
Tracking Issue #1296 ;
2018-08-02 17:02:14 +09:00
kristopher tate
79792a32e1 test/cases/coroutine_await_struct.zig: update test to reflect that the promise symbol is no in scope with suspend;
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
bc032a89cc std/zig/parser_test.zig: update test to reflect that the promise symbol is no in scope with suspend;
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
9fe140abad std/event/tcp.zig: remove promise_symbol from suspend and use @handle();
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
a3705b4251 std/event/loop.zig: remove promise_symbol from suspend and use @handle();
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
efec3a0e34 std/event/lock.zig: remove promise_symbol from suspend and use @handle();
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
b4ff464d39 std/event/group.zig: remove promise_symbol from suspend and use @handle();
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
244a7fdafb std/event/future.zig: remove promise_symbol from suspend and use @handle();
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
29057e5511 std/event/channel.zig: remove promise_symbol from suspend and use @handle();
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
d3f628907a src/parser.cpp: remove promise_symbol from suspend;
Tracking Issue #1296 ;
2018-08-02 16:59:11 +09:00
kristopher tate
b3cd65d56e src/ir.cpp: remove promise_symbol from suspend;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
5e5685c117 src/ast_render.cpp: remove promise_symbol from suspend;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
ca1b356337 src/all_types.hpp: remove promise_symbol from suspend;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
92cb330e16 src/codegen.cpp: @handle(): replace hacky ref chain with llvm intrinsic;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
13ec5db234 test/compile_errors.zig: @handle() in non-async function
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
c546f750f1 test/compile_errors.zig: @handle() called outside of function definition;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
104bdb03d6 src/codegen.cpp: return promise instead of null promise;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
a8ea236095 src/ir.cpp: don't allow @handle() outside of a function;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
fcf53b31fc src/ir.cpp: return promise->T instead of promise;
Tracking Issue #1296 ;
Thanks @andrewrk ;
2018-08-02 16:50:08 +09:00
kristopher tate
1f0040dd92 test/cases/coroutines.zig: remove dummy assert used for testing; 2018-08-02 16:50:08 +09:00
kristopher tate
c1a3b0cb0a src/ir.cpp: add/throw error for @handle() in a non async context;
Tracking Issue #1296 ;
Thanks @andrewrk ;
2018-08-02 16:50:08 +09:00
kristopher tate
db362bec18 src/codegen.cpp: reassert that there are no generated errors in codegen;
Tracking Issue #1296 ;
Thanks @andrewrk ;
2018-08-02 16:50:08 +09:00
kristopher tate
0ee6502562 src/codegen.cpp: remove add_node_error from ir_render_handle;
Tracking Issue #1296 ;
Thanks @andrewrk ;
2018-08-02 16:50:08 +09:00
kristopher tate
81f463626a src/codegen.cpp: add/throw error for @handle() in a non async context;
Tracking Issue #1296 ;

I removed/commented-out the assert checking for no errors since we now have some errors rendered.
2018-08-02 16:50:08 +09:00
kristopher tate
a2e5691228 src/codegen.cpp: return null if calling convention is not async;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
9366a58bdd test/cases/couroutines.zig: test @handle();
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
da5f3d5c4c src/ir_print.cpp: support @handle();
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
cd18186715 src/codegen.cpp: base handle builtin on @frameAddress();
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
a9ea22d4f9 src/ir.cpp: wire-up IR for handle builtin;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
kristopher tate
e79c913cbc src/all_types.hpp: add enums for Handle Builtin;
Tracking Issue #1296 ;
2018-08-02 16:50:08 +09:00
Andrew Kelley
e3ae2cfb52 add std.event.RwLock and a few more std changes
* add std.event.RwLock and std.event.RwLocked
 * std.debug.warn does its printing locked
 * add std.Mutex, however it's currently implemented as a spinlock
 * rename std.event.Group.cancelAll to std.event.Group.deinit and change
   the docs and assumptions.
 * add std.HashMap.clone
2018-08-01 16:26:37 -04:00
Marc Tiehuis
e66f538972 Add integer binary output format (#1313) 2018-08-01 11:38:04 -04:00
Andrew Kelley
de949b72c7 simpler std.event.Lock implementation 2018-07-31 19:57:46 -04:00
Andrew Kelley
f804310d9f Merge remote-tracking branch 'origin/master' into llvm7 2018-07-31 14:36:27 -04:00
Andrew Kelley
058bfb254c std.fmt.format: add '*' for formatting things as pointers
closes #1285
2018-07-31 11:36:57 -04:00
Matthew D. Steele
0db33e9c86 Add "Comments" section to language reference (#1309)
The contents of this section come from the discussion on issue #1305.
2018-07-30 22:27:07 -04:00
Andrew Kelley
3c8d4e04ea std: file system watching for linux 2018-07-30 13:46:09 -04:00
Andrew Kelley
a870228ab4 self-hosted: use std.event.fs.readFile 2018-07-30 13:44:36 -04:00
Andrew Kelley
cc45527333 introduce std.event.fs for async file system functions
only works on linux so far
2018-07-30 13:44:36 -04:00
Andrew Kelley
5d4a02c350 Merge pull request #1307 from ziglang/cancel-semantics
improved coroutine cancel semantics
2018-07-30 13:42:26 -04:00
Andrew Kelley
cfe03c764d fix docs for break from suspend 2018-07-30 13:07:04 -04:00
Andrew Kelley
c91c781952 add behavior tests for cancel semantics 2018-07-30 12:49:53 -04:00
Andrew Kelley
6fd6bc94f5 await sets suspend bit; return clears suspend bit 2018-07-30 12:22:54 -04:00
Andrew Kelley
09304aab77 fix cancel and await semantics 2018-07-29 23:25:40 -04:00
dbandstra
608ff52dc3 add SliceOutStream, rename SliceStream to SliceInStream (#1301) 2018-07-29 14:52:10 -04:00
Andrew Kelley
f884381c60 Merge pull request #1300 from dbandstra/more-stream-functions
A few new functions in InStream/OutStream
2018-07-29 14:51:25 -04:00
dbandstra
f36faa32c4 add skipBytes function to InStream
this reads N bytes, discarding their values
2018-07-28 17:34:28 -07:00
dbandstra
3ce0ea884f add int writing functions to OutStream
added: writeInt, writeIntLe, and writeIntBe
2018-07-28 17:30:05 -07:00
Andrew Kelley
0d79e03816 canceling an await also cancels things awaiting it 2018-07-28 13:52:48 -04:00
Andrew Kelley
05456eb275 make some functions in std.event.Loop public 2018-07-28 12:53:33 -04:00
Andrew Kelley
dd272d1316 await cancels the await target when it is canceled 2018-07-28 12:36:02 -04:00
Andrew Kelley
0ba2bc38d7 await checks the cancel bit 2018-07-28 12:23:47 -04:00
Andrew Kelley
60cda3713f suspend cancels awaiter when it gets canceled 2018-07-28 12:11:39 -04:00
Andrew Kelley
c6f9a4c044 cancel detects suspend bit 2018-07-28 01:26:11 -04:00
Andrew Kelley
6fed777637 cancel detects if the target handle has already returned 2018-07-28 01:22:51 -04:00
Andrew Kelley
f0c049d02b detect double await 2018-07-27 18:37:30 -04:00
Andrew Kelley
e5beca886d suspend checks the cancel bit 2018-07-27 18:07:30 -04:00
Andrew Kelley
e491c38189 resume detects resuming when not suspended 2018-07-27 18:01:39 -04:00
Andrew Kelley
341bd0dfa4 await sets the await bit 2018-07-27 17:47:27 -04:00
Andrew Kelley
0b7a9c0722 cancel sets the cancel bit 2018-07-27 17:42:09 -04:00
Andrew Kelley
02c5bda704 remove ability to break from suspend blocks
closes #803
2018-07-27 17:27:03 -04:00
Andrew Kelley
442e244b4d suspend sets suspend bit 2018-07-27 17:16:00 -04:00
Andrew Kelley
10764ee0e6 resume clears suspend bit 2018-07-27 17:00:41 -04:00
Andrew Kelley
7113f109a4 update coroutine return codegen with new status bits 2018-07-27 15:50:26 -04:00
Andrew Kelley
b3f4182ca1 coroutines have 3 more bits of atomic state 2018-07-26 22:26:00 -04:00
Andrew Kelley
2cbad364c1 add compile error for ignoring return value of while loop bodies
closes #1049
2018-07-26 18:29:07 -04:00
Andrew Kelley
fd575fe1f3 add compile error for missing parameter name of generic function 2018-07-25 18:15:55 -04:00
Andrew Kelley
84195467ad add compile error for non-inline for loop on comptime type 2018-07-25 17:08:55 -04:00
Andrew Kelley
2257660916 fix assertion failure when some compile errors happen
I don't actually know of a test case to trigger this

self-hosted won't have this problem because get_pointer_to_type
will return error.SemanticAnalysisFailed
2018-07-25 13:12:03 -04:00
Andrew Kelley
95f45cfc34 patch LLD to fix COFF crashing when linking twice in same process
closes #1289
2018-07-25 02:36:29 -04:00
Andrew Kelley
02713e8d8a fix race conditions in self-hosted compiler; add test
* fix race condition in std.event.Channel deinit
 * add support to zig build for --no-rosegment
 * add passing self-hosted compare-output test for calling a function
 * put a global lock on LLD linking because it's not thread safe
2018-07-24 21:28:54 -04:00
Andrew Kelley
adefd1a52b self-hosted: function calling another function 2018-07-24 20:24:05 -04:00
Andrew Kelley
2ea08561cf self-hosted: function types use table lookup 2018-07-24 14:20:49 -04:00
Andrew Kelley
1d4a94b635 remove old section from readme
we still want all these people but I think there are better ways
to communicate this than the readme file
2018-07-24 11:04:01 -04:00
Andrew Kelley
29e19ace36 fix logic for determining whether param requires comptime
closes #778
closes #1213
2018-07-24 10:21:33 -04:00
Andrew Kelley
74c80d2c7f Merge pull request #1282 from nwsharp/master
std.io: PeekStream and SliceStream
2018-07-24 09:26:08 -04:00
Nathan Sharp
0046551852 std.io: PeekStream and SliceStream
SliceStream is a read-only stream wrapper around a slice of bytes. It
allows adapting algorithms which work on InStreams to in-memory data.

PeekStream is a stream wrapper which allows "putting back" bytes into
the stream so that they can be read again. This will help make
look-ahead parsers easier to write.
2018-07-23 23:30:40 -07:00
Andrew Kelley
dd9728c5a0 Merge remote-tracking branch 'origin/master' into llvm7 2018-07-24 00:43:12 -04:00
Andrew Kelley
10bdf73a02 Merge pull request #1266 from ziglang/self-hosted-libc-hello-world
Self hosted libc hello world
2018-07-24 00:31:33 -04:00
Andrew Kelley
72599d420b self-hosted: find all libc paths; windows linker code 2018-07-24 00:06:34 -04:00
Andrew Kelley
2614ef056a self-hosted: basic linker code for macos 2018-07-23 17:38:03 -04:00
Andrew Kelley
7dbbddf2a6 macho backtraces - use std.sort.sort instead of insertion sort
it's way faster
2018-07-23 15:36:45 -04:00
Andrew Kelley
5a919dd82d Merge remote-tracking branch 'origin/master' into self-hosted-libc-hello-world 2018-07-23 14:32:13 -04:00
Andrew Kelley
10d2f08d37 self-hosted: fix error messages not cleaning up correctly 2018-07-23 14:28:14 -04:00
Andrew Kelley
d767fae47e self-hosted: add first compare-output test 2018-07-23 00:35:53 -04:00
Andrew Kelley
93e78ee722 self-hosted can compile libc hello world 2018-07-22 23:28:53 -04:00
Andrew Kelley
99153ac0aa add std.math.big.Int.fitsInTwosComp
so that we can pass runtime-known values
2018-07-22 10:58:45 -04:00
Marc Tiehuis
d53fae3551 Add big int fits function (#1279)
Returns whether the current value in an Int fits in the requested type.
2018-07-22 10:11:27 -04:00
Marc Tiehuis
07b6a3d335 Tighten Int.to bounds and add twos-complement bitcount 2018-07-22 17:47:57 +12:00
Andrew Kelley
bbd293355b Merge branch 'kristate-posix-darwin-issue1271' 2018-07-22 00:08:30 -04:00
Andrew Kelley
20f286f22a re-organize std lib darwin files 2018-07-22 00:04:24 -04:00
Andrew Kelley
f72f46e912 Merge branch 'posix-darwin-issue1271' of https://github.com/kristate/zig into kristate-posix-darwin-issue1271 2018-07-21 23:59:35 -04:00
Andrew Kelley
0a32f80d9a Merge branch 'kristate-skippable-tests-issue1274' 2018-07-21 23:44:17 -04:00
Andrew Kelley
4d9964a457 rename error.skip to error.SkipZigTest
also print stats at the end of test runner
2018-07-21 23:43:43 -04:00
Andrew Kelley
44292721bf Merge branch 'skippable-tests-issue1274' of https://github.com/kristate/zig into kristate-skippable-tests-issue1274 2018-07-21 23:32:12 -04:00
kristopher tate
bb1b796711 README: include link to channel logs (#1278) 2018-07-21 23:26:52 -04:00
Sahnvour
2ec9a11646 Very much WIP base implementation for #721.
Currently does:
- read COFF executable file
- locate and load corresponding .pdb file
- expose .pdb content as streams (PDB format)
2018-07-21 20:30:11 +02:00
kristopher tate
c5c053b6fd std.event.tcp: add switch statement in preparation for building-out abstractions;
depends on issue #1274 ;
2018-07-22 03:11:55 +09:00
kristopher tate
bc411af4ff std.event.tcp: SKIP test instead of OKing test;
tracking issue #1274 ;
2018-07-22 02:21:52 +09:00
kristopher tate
df574ccf86 std.special.test_runner.zig: make tests skippable;
tracking issue #1274;

tests can be skipped by returnning `error.skip` :
2018-07-22 02:20:03 +09:00
kristopher tate
501dd5f284 CMakeLists.txt: add darwin_socket.zig;
Tracking issue #1271;
thanks @Hejsil;
2018-07-22 01:47:53 +09:00
kristopher tate
460c266216 std.os.posix: Add SOCK_* for darwin;
Tracking issue #1271;
2018-07-21 19:15:03 +09:00
kristopher tate
8062afcb31 std.os.posix: Add SYSPROTO_* for darwin;
Tracking issue #1271;
2018-07-21 19:14:40 +09:00
kristopher tate
7ef110b484 std.os.posix: Add AF_* for darwin;
Tracking issue #1271;
2018-07-21 19:14:14 +09:00
Andrew Kelley
58c5f94a99 self-hosted: share C++ code for finding libc on windows 2018-07-20 23:38:13 -04:00
Jimmi HC
1f4c7d5ebf Fixed windows getPos 2018-07-20 23:05:53 +02:00
Andrew Kelley
f5a67dba08 self-hosted: implicit cast comptime ints to other ints
we now have successful exit codes from main linking
against libc
2018-07-20 01:46:49 -04:00
Andrew Kelley
33fbd8c1d3 self-hosted: convert some stuff to async/await 2018-07-20 00:13:48 -04:00
Andrew Kelley
d9fc149752 relative path to cwd in compile errors 2018-07-19 23:52:44 -04:00
Andrew Kelley
0a880d5e60 fix generation of error defers for fns inside fns
closes #878
2018-07-19 18:06:41 -04:00
Andrew Kelley
3908b4fdee self-hosted: refactor ParsedFile out of existence
also we are successfully analyzing the return type of main
2018-07-19 15:11:39 -04:00
Andrew Kelley
0736e6aa34 std.os.File: add missing pub modifiers 2018-07-19 13:06:13 -04:00
Andrew Kelley
a9f0681f85 prevent non-export symbols from clobbering builtins
closes #1263
2018-07-19 10:47:17 -04:00
Andrew Kelley
1d85b588ea self-hosted: progress on IR for supporting libc hello world
* add c int types
 * some more ir stubs
2018-07-19 00:08:47 -04:00
Andrew Kelley
7f1a550760 std.zig.parse: fix treating integer literals as string literals 2018-07-18 17:56:34 -04:00
Andrew Kelley
bd1c55d2c2 self-hosted: compile errors for return in wrong place
* outside fn definition
 * inside defer expression
2018-07-18 17:43:36 -04:00
Andrew Kelley
aa3b41247f self-hosted: linking against libc
also introduce `zig libc` command to display paths
`zig libc file.txt` will parse equivalent text and use that for libc
paths.
2018-07-18 17:43:36 -04:00
Andrew Kelley
3e4a3fa5b7 self-hosted: find libc on linux 2018-07-18 17:43:36 -04:00
Jimmi Holst Christensen
fd3a41dadc Allow pointers to anything in extern/exported declarations (#1258)
* type_allowed_in_extern accepts all ptr not size 0

* Generate correct headers for none extern structs/unions/enums
2018-07-18 11:00:42 -04:00
Andrew Kelley
c393a399fb fix invalid character test on windows 2018-07-18 10:51:42 -04:00
Andrew Kelley
cd488c9da5 fix std.os.getAppDataDir test on linux 2018-07-18 10:45:17 -04:00
Andrew Kelley
a8a1b5af07 fix build on windows
* move getAppDataDir and utf16leToUtf8 from self-hosted to std lib
 * fix std.event.Loop on windows
2018-07-18 10:07:22 -04:00
Jimmi HC
b7be082bd9 -Dskip-release now also skips build example tests 2018-07-18 10:28:14 +02:00
Josh Wolfe
843529d234 implement proper utf16leToUtf8 2018-07-18 03:01:01 -04:00
Andrew Kelley
cbfe9a4077 fix @setEvalBranchQuota not respected in generic fn calls
closes #1257
2018-07-17 23:37:17 -04:00
Jay Weisskopf
6394f7e9a3 Fixed minor documentation errors (#1256)
Changed:
- "retuns" to "returns"
- "null-terminated pointers" to "pointers to null-terminated arrays"
2018-07-17 21:18:41 -04:00
Andrew Kelley
a9ab528e34 std.event.Loop.onNextTick dispatches work to waiting threads 2018-07-17 15:17:44 -04:00
Andrew Kelley
ecf8da00c5 self-hosted: linking 2018-07-17 13:18:13 -04:00
Andrew Kelley
1a7cf4cbce port 69e3b4e to self-hosted compiler
See #1249
2018-07-17 10:42:44 -04:00
Wink Saville
3cbf59b4c1 Add swapRemoveOrError (#1254)
* Add swapRemoveOrError, this mirrors setOrError.
2018-07-17 10:29:42 -04:00
Wink Saville
d1a60243c9 Give ArrayList tests consistent names (#1253)
The recent change that added swapRemove used std.ArrayList as the test
name prefix. Change the other tests to use the same prefix for consistency
and making it easier to use --test-filter.
2018-07-17 10:28:08 -04:00
kristopher tate
69e3b4e7dc revert commit 860d3da915 ; please see #1249 for more information; (#1255) 2018-07-17 10:27:18 -04:00
Andrew Kelley
3bb00eac37 self-hosted: implement getAppDataDir for windows 2018-07-17 00:01:36 -04:00
Andrew Kelley
97bfeac13f self-hosted: create tmp dir for .o files and emit .o file for fn 2018-07-16 20:52:50 -04:00
Andrew Kelley
0fa24b6b75 allow implicit cast of undefined to optional 2018-07-16 19:26:15 -04:00
Andrew Kelley
9b56efc957 remove std.ArrayList.removeOrError function 2018-07-16 17:45:18 -04:00
Andrew Kelley
558b0b8791 Merge remote-tracking branch 'origin/master' into llvm7 2018-07-16 13:37:16 -04:00
Andrew Kelley
d3ce9d0643 codegen: remove unused variable 2018-07-16 11:59:37 -04:00
Andrew Kelley
e9a03cccf3 all integer sizes are available as primitives
* fix wrong implicit cast for `@IntType` bit_count parameter.
 * fix incorrect docs for `@IntType` bit_count parameter.

closes #1242
closes #745
closes #1240
2018-07-16 10:53:15 -04:00
Andrew Kelley
363f4facea self-hosted: generate LLVM IR for simple function 2018-07-15 00:07:33 -04:00
Andrew Kelley
2255f275a0 update for latest clang API 2018-07-15 00:03:33 -04:00
Andrew Kelley
4d920cee6e Merge remote-tracking branch 'origin/master' into llvm7 2018-07-14 18:27:51 -04:00
Andrew Kelley
28c3d4809b rename Module to Compilation
and CompilationUnit to ObjectFile
2018-07-14 16:31:17 -04:00
Bas van den Berg
69e50ad2f5 Improve realloc on fixed buffer allocator (#1238)
* Add test to check re-use of memory

* Check if realloc has to reallocate the last allocated memory block.
If so extend that block instead of allocating a new one.

* Also check if the realloc actually preserves the data.
2018-07-14 16:31:11 -04:00
Andrew Kelley
278829fc2c self-hosted: adding a fn to an llvm module 2018-07-14 16:03:22 -04:00
Andrew Kelley
91636f1e8c Merge pull request #1237 from BarabasGitHub/fix-reallocating-from-0
Fix aligned reallocation
2018-07-14 12:09:54 -04:00
Bas van den Berg
c021a44567 Fix aligned reallocation from zero size. 2018-07-14 18:05:05 +02:00
Bas van den Berg
8be6c98ca6 Create unit test that tests aligned reallocation. 2018-07-14 18:04:23 +02:00
Andrew Kelley
29c756abba docs: correct some misinformation 2018-07-14 11:53:51 -04:00
Marc Tiehuis
bf441ed244 Add --stdin option to zig fmt 2018-07-14 11:43:35 -04:00
Andrew Kelley
ed3181f029 Merge branch 'eduardosm-extern-return-small-struct' 2018-07-14 11:33:13 -04:00
Andrew Kelley
f78d4ed30c add an assertion to the test 2018-07-14 11:33:01 -04:00
Eduardo Sánchez Muñoz
722b9b9e59 codegen: Store returned value if type is 'handle_is_ptr' and function is not 'first_arg_ret'.
Seems to fix #1230, includes test.
2018-07-14 11:33:01 -04:00
Andrew Kelley
2a719ee6c5 Merge branch 'tgschultz-arraylist-remove' 2018-07-14 10:02:13 -04:00
Andrew Kelley
b44332f5a6 std.ArrayList - rename remove to swapRemove 2018-07-14 10:01:45 -04:00
tgschultz
a0c1498e65 Added remove to ArrayList 2018-07-14 09:55:47 -04:00
Andrew Kelley
317ed57cb1 docs: clarify mem.Allocator.reallocFn 2018-07-14 09:55:10 -04:00
Andrew Kelley
5f1aa3505d Merge pull request #1232 from BarabasGitHub/fix-array-list-insert
Fix array list insert
2018-07-14 09:35:50 -04:00
Andrew Kelley
e78b1b810f self-hosted: basic IR pass2 2018-07-13 21:56:38 -04:00
Bas van den Berg
fe98a2da70 Add a copyBackwards to fix the broken insert methods for ArrayList. 2018-07-13 23:01:21 +02:00
Bas van den Berg
a1cafa650d Improve ArrayList insert unit tests. 2018-07-13 22:35:34 +02:00
Andrew Kelley
c87102c304 ir_get_ref: delete unnecessary and probably buggy code 2018-07-13 14:53:54 -04:00
Andrew Kelley
171f33b961 ir: remove unnecessary and probably buggy code 2018-07-13 14:18:37 -04:00
Andrew Kelley
860d3da915 ir: remove dead code 2018-07-13 13:37:01 -04:00
Andrew Kelley
5354d1f5fc allow == for comparing optional pointers
closes #658
2018-07-13 12:34:42 -04:00
Andrew Kelley
ac096c2949 zig fmt 2018-07-12 19:24:32 -04:00
Andrew Kelley
69e60e351b self-hosted: better IR for empty fn
avoids a void
2018-07-12 15:22:23 -04:00
Andrew Kelley
687bd92f9c self-hosted: generate zig IR for simple function
no tests for this yet. I think the quickest path to testing will be
creating the .o files and linking with libc, executing, and then
comparing output.
2018-07-12 15:12:44 -04:00
Andrew Kelley
ce11d6d16c ir: refactor lvalues 2018-07-11 21:37:47 -04:00
Andrew Kelley
30c4add85a std.event.Future: workaround in tests for llvm coro memory
See #1194
2018-07-11 20:17:47 -04:00
Andrew Kelley
9751a0ae04 std.atomic: use spinlocks
the lock-free data structures all had ABA problems and
std.atomic.Stack had a possibility to load an unmapped memory address.
2018-07-11 19:38:01 -04:00
Andrew Kelley
9bdcd2a495 add std.event.Future
This is like a promise, but it's for multiple getters, and
uses an event loop.
2018-07-11 16:00:06 -04:00
Andrew Kelley
5954c94d20 build system: add -Dskip-release option to test faster 2018-07-11 14:09:05 -04:00
Andrew Kelley
3f30897fdc add compile error for disallowed types in extern structs
closes #1218
2018-07-11 14:08:56 -04:00
Andrew Kelley
3aaf814b9d Merge pull request #1216 from ziglang/sort-improvements
Add generic comparator generator functions for sorting
2018-07-11 11:56:14 -04:00
Marc Tiehuis
9b054e73f6 Add generic comparator generator functions for sorting
- Copy-by-value instead of pointer where appropriate
 - Clean up old zig fmt issues
2018-07-11 18:44:30 +12:00
Andrew Kelley
c6c49389eb self-hosted: add compile error test for missing fn name 2018-07-11 01:26:46 -04:00
Andrew Kelley
c620a1fe3d Merge pull request #1215 from ziglang/self-hosted-first-test
self-hosted: first passing test
2018-07-11 00:50:17 -04:00
Andrew Kelley
da3acacc14 update vendor list for newest llvm version 2018-07-11 00:46:48 -04:00
Andrew Kelley
8197a14ceb self-hosted test: use C allocator since we depend on libc 2018-07-10 20:27:15 -04:00
Andrew Kelley
574e31f0a0 self-hosted: first passing test
* introduce std.atomic.Int
 * add src-self-hosted/test.zig which is tested by the main test suite
   - it fully utilizes the multithreaded async/await event loop so the
     tests should Go Fast
 * `stage2/bin/zig build-obj test.zig` is able to spit out an error if 2 exported
   functions collide
 * ability for `zig test` to accept `--object` and `--assembly`
   arguments
 * std.build: TestStep supports addLibPath and addObjectFile
2018-07-10 20:18:43 -04:00
Andrew Kelley
8fba0a6ae8 introduce std.event.Group for making parallel async calls 2018-07-10 15:17:01 -04:00
Andrew Kelley
cfaebb20d8 Merge remote-tracking branch 'origin/master' into llvm7 2018-07-10 14:03:03 -04:00
Andrew Kelley
0ce6934e26 allow var args calls to async functions 2018-07-10 11:44:47 -04:00
Andrew Kelley
696ef0bc03 langref: docs for union safety 2018-07-10 10:37:58 -04:00
Andrew Kelley
28f9230b40 fix crash when calling comptime-known undefined function ptr
closes #880
closes #1212
2018-07-10 10:12:08 -04:00
Andrew Kelley
b5cfbfd84e fix regression from b6eb4048 2018-07-09 23:41:28 -04:00
Andrew Kelley
1b82a9defc enable basic event loop test 2018-07-09 22:41:16 -04:00
Andrew Kelley
b6eb404831 organize std.event into directories 2018-07-09 22:22:44 -04:00
Andrew Kelley
ccef60a640 Merge pull request #1198 from ziglang/m-n-threading
M:N threading
2018-07-09 22:06:47 -04:00
Andrew Kelley
10cc49db1c define c macros before importing llvm h files
Seems to matter on Ubuntu 16.04.

closes #1196
2018-07-09 22:05:22 -04:00
Andrew Kelley
c89aac85c4 better workaround for guaranteeing memory in coroutine frame
See #1194
2018-07-09 21:21:59 -04:00
wilsonk
a2834d48b9 Update throughput_test.zig. (#1211) 2018-07-09 17:21:20 -04:00
Andrew Kelley
1a1534ecb5 fix regression on macos 2018-07-09 17:16:06 -04:00
Andrew Kelley
3f4d0ecd7e Merge remote-tracking branch 'origin/master' into m-n-threading 2018-07-09 17:14:42 -04:00
Andrew Kelley
0ac1b83885 fix non-portable format specifier 2018-07-09 17:13:31 -04:00
Andrew Kelley
9462852433 std.event.Loop multithreading for windows using IOCP 2018-07-09 16:49:46 -04:00
Andrew Kelley
caa0085057 implement std.os.cpuCount for windows 2018-07-09 13:19:11 -04:00
Andrew Kelley
05f1ea33d2 ZIG_DEBUG_COLOR=1 overrides tty detection for runtime stack traces 2018-07-09 12:12:37 -04:00
Andrew Kelley
2ee67b7642 langref: docs for invalid error set cast and incorrect pointer alignment
also add detection of incorrect pointer alignment at compile-time
of pointers that were constructed with `@intToPtr`.
2018-07-09 11:13:29 -04:00
Andrew Kelley
9eb51e20ed fix crash on @ptrToInt of a *void
closes #1192
2018-07-09 10:44:06 -04:00
Andrew Kelley
42ba06133a std.Hashmap - don't use catch unreachable in tests 2018-07-09 10:44:06 -04:00
Andrew Kelley
a0c564d762 zig fmt 2018-07-09 01:23:47 -04:00
Andrew Kelley
3ba451778f fix regressions on linux 2018-07-09 01:22:36 -04:00
Marc Tiehuis
82e9190d09 Update zig.parser benchmark program 2018-07-09 17:14:04 +12:00
Andrew Kelley
04d3da4bd1 std.os.cpuCount implementation for macos 2018-07-09 01:08:33 -04:00
Andrew Kelley
50d70d5f49 tests passing with kqueue on macos 2018-07-08 02:46:10 -04:00
Josh Wolfe
410b4d9bdf builder.addBuildOption 2018-07-08 00:00:05 -04:00
Andrew Kelley
ced3aae3b2 cleaner output from zig build when there are compile errors 2018-07-07 20:31:50 -04:00
Andrew Kelley
c15a6fa9d0 add std.os.cpuCount and have std.event.Loop use it for thread pool size 2018-07-07 01:23:18 -04:00
Andrew Kelley
57f36c4201 std.event.Loop: use EPOLLONESHOT to save 1 syscall
when a thread pool worker accepts a coroutine to resume
2018-07-07 00:32:19 -04:00
Andrew Kelley
eb326e1553 M:N threading
* add std.atomic.QueueMpsc.isEmpty
 * make std.debug.global_allocator thread-safe
 * std.event.Loop: now you have to choose between
   - initSingleThreaded
   - initMultiThreaded
 * std.event.Loop multiplexes coroutines onto kernel threads
 * Remove std.event.Loop.stop. Instead the event loop run() function
   returns once there are no pending coroutines.
 * fix crash in ir.cpp for calling methods under some conditions
 * small progress self-hosted compiler, analyzing top level declarations
 * Introduce std.event.Lock for synchronizing coroutines
 * introduce std.event.Locked(T) for data that only 1 coroutine should
   modify at once.
 * make the self hosted compiler use multi threaded event loop
 * make std.heap.DirectAllocator thread-safe

See #174

TODO:
 * call sched_getaffinity instead of hard coding thread pool size 4
 * support for Windows and MacOS
 * #1194
 * #1197
2018-07-07 00:32:19 -04:00
Andrew Kelley
d8295c1889 add @popCount intrinsic 2018-07-07 00:25:32 -04:00
Andrew Kelley
e19f0b5d9c remove outdated semantic analysis documentation 2018-07-06 18:24:09 -04:00
Andrew Kelley
4ad4cd2654 fix iterating over a void slice
closes #1203
2018-07-06 17:27:44 -04:00
Andrew Kelley
1cf7511dc9 add compile error notes for where struct definitions are
closes #1202
2018-07-06 16:20:46 -04:00
Andrew Kelley
6d793c0ea3 langref: add more internal links 2018-07-06 16:20:31 -04:00
Andrew Kelley
0e9fef78dd Merge branch 'isaachier-switch-enum-fix' 2018-07-06 12:07:57 -04:00
Andrew Kelley
1a5bd88881 alternate implementation of previous commit
This strategy adds another field to the SwitchBr instruction,
which is the result of the CheckSwitchProngs instruction. The
type of the result is void, and is unused, except that the SwitchBr
instruction will not perform analysis if the CheckSwitchProngs
instruction did not pass analysis. This allows the CheckSwitchProngs
instruction to do implicit casting for its type checking, while
preventing duplicate compile error messages.
2018-07-06 12:03:07 -04:00
Andrew Kelley
b5d07297de Merge remote-tracking branch 'origin/master' into llvm7 2018-07-04 20:43:49 -04:00
Isaac Hier
9cff23dbf9 Fix assertion crash on enum switch values 2018-07-04 13:27:10 -04:00
Isaac Hier
9395162a7c Debug enum issue 2018-07-04 12:47:35 -04:00
Andrew Kelley
8c39cdc89f fix await on early return when return type is struct
previously, await on an early return would try to access the
destroyed coroutine frame; now it copies the result into a
temporary variable before destroying the coroutine frame
2018-07-04 11:51:02 -04:00
Marc Tiehuis
1d18688628 Do not normalize langref.html.in line endings
See #1191.
2018-07-04 23:47:15 +12:00
Jimmi HC
28821b5f31 Fixed last commit compiler error 2018-07-04 11:35:29 +02:00
Jimmi HC
4f32b86142 Allow allocation of any 0 sized type (not just void) 2018-07-04 11:29:02 +02:00
Andrew Kelley
291afcf75a fix runtime libc detection depending on locale
closes #1165
2018-07-03 14:20:26 -04:00
Andrew Kelley
27fc49f72c langref: improve docs for while and undefined
closes #1190
2018-07-03 14:03:27 -04:00
Andrew Kelley
9665cfe027 update for latest llvm 2018-07-03 03:48:47 -04:00
Marc Tiehuis
1eda86e1ad Clean up outstanding compiler_rt todos 2018-07-03 13:22:12 +12:00
Andrew Kelley
6e1425e312 Merge remote-tracking branch 'origin/master' into llvm7 2018-07-02 20:00:13 -04:00
Andrew Kelley
06e8c2e519 fix stage2 macos build
See #1173
2018-07-02 17:55:59 -04:00
Andrew Kelley
bd282d6cca Merge pull request #1176 from bnoordhuis/f16-std
improve std.math f16 support
2018-07-02 16:03:25 -04:00
Andrew Kelley
22b7312460 Merge pull request #1173 from ziglang/event-loop-channel
add event loop Channel abstraction
2018-07-02 16:01:38 -04:00
Andrew Kelley
35463526cc add runtime safety for @intToEnum; add docs for runtime safety
See #367
2018-07-02 15:50:28 -04:00
Andrew Kelley
2da999372a add another BuildError code 2018-07-02 15:25:23 -04:00
Andrew Kelley
96a6bc57d2 modify std.event.Loop to work for windows and macos 2018-07-02 14:38:11 -04:00
Andrew Kelley
a3f55aaf34 add event loop Channel abstraction
This is akin to channels in Go, except:
 * implemented in userland
 * they are lock-free and thread-safe
 * they integrate with the userland event loop

The self hosted compiler is changed to use a channel for events,
and made to stay alive, watching files and performing builds when
things change, however the main.zig file exits after 1 build.

Note that nothing is actually built yet, it just parses the input
and then declares that the build succeeded.

Next items to do:
 * add windows and macos support for std.event.Loop
 * improve the event loop stop() operation
 * make the event loop multiplex coroutines onto kernel threads
 * watch source file for updates, and provide AST diffs
   (at least list the top level declaration changes)
 * top level declaration analysis
2018-07-02 14:38:11 -04:00
Andrew Kelley
2759c7951d always link against compiler_rt.o even when linking libc
sometimes libgcc is missing things we need, so we always link
compiler_rt and rely on weak linkage to allow libgcc to override.
2018-07-02 14:10:27 -04:00
Josh Wolfe
0206b76351 syntax in build.zig example doc 2018-07-01 22:03:51 -04:00
Josh Wolfe
e833a5a24c gitignore docgen test artifacts 2018-07-01 13:47:29 -04:00
Andrew Kelley
4c0e280d6d Merge pull request #1185 from ziglang/undefined-at-comptime-improvements
Operators now throw a compiler error when operating on undefined values
2018-07-01 01:33:23 -04:00
Jimmi Holst Christensen
b182151de5 Fixed line numbers for tests 2018-06-30 21:59:14 +02:00
Jimmi Holst Christensen
055e0fef4e Avoid resolve_const in cmp when instr are not comptime 2018-06-30 21:22:26 +02:00
Jimmi Holst Christensen
ecd5e60be9 Expanded the list of operators that catch undefined values at comptime 2018-06-30 20:50:09 +02:00
Jimmi Holst Christensen
42033ea3ca Merge pull request #1167 from ziglang/comptime-array-by-value
Implement const_values_equal for arrays
2018-06-30 18:58:31 +02:00
Andrew Kelley
3f4b77f561 Merge pull request #1177 from jayschwa/fix-out-of-src-builds
Fix version detection for out-of-source builds
2018-06-30 12:01:47 -04:00
Jimmi Holst Christensen
01bd5c46e1 Revert "ir_resolve_const now checks recursivly for undef values"
This reverts commit 4c3f27ce1e.
2018-06-30 17:35:06 +02:00
Jimmi Holst Christensen
616fe798c8 Revert "contains_comptime_undefined_value should not follow pointers"
This reverts commit 58b1692182.
2018-06-30 17:35:05 +02:00
Marc Tiehuis
887c97742f Alignment fix and allow rudimentary f128 float printing 2018-06-30 21:58:59 +12:00
Marc Tiehuis
951512f5ae compiler_rt: Add CMake entries 2018-06-30 21:58:59 +12:00
Marc Tiehuis
9f48b2ab48 compiler_rt: Remove wrapping add/sub operators where unneeded
Closes #495.
2018-06-30 21:58:59 +12:00
Marc Tiehuis
814a34f263 compiler_rt: Add floattitf/floattidf/floattisf 2018-06-30 21:58:59 +12:00
Marc Tiehuis
53fef94b9f compiler_rt: Add missing install targets 2018-06-30 21:58:59 +12:00
Marc Tiehuis
e19fc4a0a3 compiler_rt: Add missing exports 2018-06-30 21:58:59 +12:00
Marc Tiehuis
cb7bdc2da1 compiler_rt: Add floatuntitf 2018-06-30 21:58:59 +12:00
Marc Tiehuis
61ebfe6603 compiler_rt: Add floatunditf and floatunsitf 2018-06-30 21:58:59 +12:00
Marc Tiehuis
c32b2e45ef compiler_rt: Add floatuntisf 2018-06-30 21:58:59 +12:00
Marc Tiehuis
379950f81d compiler_rt: Add trunc f128 narrowing functions 2018-06-30 21:58:59 +12:00
Jay Weisskopf
25bbb1a8ff Fix version detection for out-of-source builds
Git was called in the build directory and not the source directory.
This works fine when the build directory resides within the source
repository, but doesn't work for out-of-source builds. Example:

```
~/zigbuild$ cmake ../zig
fatal: not a git repository (or any of the parent directories): .git
Configuring zig version 0.2.0+
```

Use Git's `-C <path>` flag to always point to the source directory so
that it doesn't matter where the build directory lives.
2018-06-29 22:22:04 -04:00
Ben Noordhuis
30cfc0ab2c test std.math f16 sqrt support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
be36179064 add std.math f16 signbit support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
ca444e6191 add std.math f16 copysign support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
d293f1a0ed add std.math f16 floor support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
1abc925292 add std.math f16 fabs support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
f36b095b5f add std.math f16 isnormal support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
30b75ae353 add std.math f16 isfinite support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
a36d7b6131 add std.math f16 inf support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
27b02413dc add std.math f16 nan support
refs #1122
2018-06-30 01:58:17 +02:00
Ben Noordhuis
61df5bc142 add std.math f16 constants
refs #1122
2018-06-30 01:58:17 +02:00
Andrew Kelley
a3ab4325fd Merge pull request #1175 from bnoordhuis/zig-test-emit-switch
support --emit in 'test' command
2018-06-29 18:39:19 -04:00
Ben Noordhuis
03f66825d6 support --emit in 'test' command
Support the `--emit` switch in `zig --emit asm test file.zig`.

The command fails because no tests run (no executable is created) but
it emits the requested file.  That seems like a good tradeoff.
2018-06-29 23:32:10 +02:00
isaachier
f1c56f7f22 Clarify reason implicit cast does not work for large RHS (#1168)
* Clarify reason implicit cast does not work for large RHS
2018-06-29 14:52:25 -04:00
Andrew Kelley
0874a5ba77 std.atomic.queue - document limitation and add MPSC queue 2018-06-29 14:45:42 -04:00
Jimmi HC
58b1692182 contains_comptime_undefined_value should not follow pointers 2018-06-29 11:34:38 +02:00
Jimmi HC
4c3f27ce1e ir_resolve_const now checks recursivly for undef values 2018-06-29 10:21:43 +02:00
Jimmi HC
b1128b18d5 Assert that array is not ConstArraySpecialUndef in const_values_equal 2018-06-29 08:41:16 +02:00
Jimmi HC
3ec38b2494 Implement const_values_equal for array type
* This allows arrays to be passed by value at comptime
2018-06-28 10:34:37 +02:00
Marc Tiehuis
4a35d7eeeb Correct hex-float parsing
Unblocks #495.
2018-06-28 20:12:03 +12:00
Andrew Kelley
2fa588e81d fix coroutine accessing freed memory
closes #1164
2018-06-27 18:45:21 -04:00
Andrew Kelley
19961c50e4 fix comptime @tagName crashing sometimes
closes #1118
2018-06-27 13:15:55 -04:00
Andrew Kelley
6f88ecc9b6 add f16 to langref 2018-06-27 12:59:12 -04:00
tgschultz
3e94347e61 Fix up some std.rand syntax #1161 (#1162)
* Fix old syntax in rand

Ziggurat somehow did not get updated to latest syntax

* Fix broken float casts

f32 float casts somehow not updated to latest syntax
2018-06-27 12:30:15 -04:00
Andrew Kelley
1b4bae6d69 Merge pull request #1159 from bnoordhuis/f16
add f16 type
2018-06-27 12:29:05 -04:00
Ben Noordhuis
440c1d52b4 simplify comptime floating-point @divTrunc
Replace a conditional ceil/floor call with an unconditional trunc call.
2018-06-27 16:20:04 +02:00
Ben Noordhuis
fd75e73ee9 add f16 type
Add support for half-precision floating point operations.

Introduce `__extendhfsf2` and `__truncsfhf2` in std/special/compiler_rt.

Add `__gnu_h2f_ieee` and `__gnu_f2h_ieee` as aliases that are used in
Windows builds.

The logic in std/special/compiler_rt/extendXfYf2.zig has been reworked
and can now operate on 16 bits floating point types.

`extendXfYf2()` and `truncXfYf2()` are marked `inline` to work around
a not entirely understood stack alignment issue on Windows when calling
the f16 versions of the builtins.

closes #1122
2018-06-27 16:20:04 +02:00
Ben Noordhuis
1f45075a0e dry floating-point type definitions 2018-06-27 16:20:04 +02:00
Ben Noordhuis
0ebc7b66e6 scope variables in floating point cast tests
Fixes a bug where the result of a @floatCast wasn't actually checked; it
was checking the result from the previous @floatCast.
2018-06-27 16:20:04 +02:00
Andrew Kelley
4de60dde6e langref: explicit cast section 2018-06-26 15:48:42 -04:00
Andrew Kelley
11ca38a4e9 fix crash for optional pointer to empty struct
closes #1153
2018-06-26 15:27:41 -04:00
Andrew Kelley
af95e15572 rename get_maybe_type to get_optional_type 2018-06-26 15:11:05 -04:00
Isaac Hier
8e714289ca Fix os_path_join for case where dirname is empty 2018-06-26 13:31:32 -04:00
Andrew Kelley
3290e72833 std.zig.ast: fix incorrect impl of FnProto.firstToken
closes #1151
2018-06-25 11:54:10 -04:00
Andrew Kelley
8866bef92c clean up self hosted main. delete unsupported commands 2018-06-22 01:54:38 -04:00
Andrew Kelley
be2a4c42bd Merge pull request #1149 from ziglang/issue346
fix compiler crash for invalid enum
2018-06-21 21:21:05 -04:00
Andrew Kelley
459d72f873 fix compiler crash for invalid enum
closes #1079
closes #1147
2018-06-21 17:41:49 -04:00
Andrew Kelley
5f38d6e2e9 add casting docs, __extenddftf2, and __extendsftf2 2018-06-21 14:44:35 -04:00
Andrew Kelley
47dd1049c8 Merge pull request #1145 from isaachier/bigint-neg-one-incr-fix
Fix bigint -1 increment operation
2018-06-21 13:40:37 -04:00
Isaac Hier
f1207a8e74 Add test case 2018-06-21 08:32:05 -04:00
Isaac Hier
eeda1a1396 Fix logic 2018-06-21 08:17:08 -04:00
Isaac Hier
0ab4afbf42 Fix increment operation for bigint -1 2018-06-21 08:14:26 -04:00
Marc Tiehuis
f50c0c664f Add float repr bit extraction functions 2018-06-21 01:45:12 -04:00
Ben Noordhuis
eb6a8e6a3b fix f128 remainder division bug
The modulo operation computed rem(b+rem(a,b), b) which produces -1
for a=1 and b=2.

Switch to a - b * trunc(a/b) which produces the expected result, 1.

closes #1137
2018-06-20 17:37:38 -04:00
Andrew Kelley
4eca75c53b Merge branch 'kristate-stdmem-replace-create-with-construct' 2018-06-20 17:33:49 -04:00
Andrew Kelley
85f928f8bf remove std.mem.Allocator.construct and other fixups 2018-06-20 17:33:29 -04:00
Andrew Kelley
e891f9cd9d zig fmt 2018-06-20 17:16:27 -04:00
kristopher tate
6bd8610063 std.mem.Allocator.construct: improve formatting; 2018-06-21 01:40:25 +09:00
kristopher tate
4b46af4810 std.mem.Allocator.construct: remove deprecation warning; 2018-06-21 01:39:48 +09:00
kristopher tate
71db8df548 std: update stdlib to match updated allocator create signature; ref #733 2018-06-21 00:40:21 +09:00
kristopher tate
457c0f0a7e std.mem: remove allocator create in favor of construct; ref #733 2018-06-21 00:39:19 +09:00
Andrew Kelley
55193cb13b fix runtime fn ptr equality codegen
closes #1140
2018-06-20 06:46:53 -04:00
Andrew Kelley
7c99c30bf4 fix calling method with comptime pass-by-non-copyign-value self arg
closes #1124
2018-06-19 19:35:59 -04:00
Andrew Kelley
42db807f37 remove redundant implicit casting code
and introduce better type mismatch errors

closes #1061
2018-06-19 18:51:46 -04:00
Andrew Kelley
ee525c92a4 langref: organize docs for inline loops and add note about when to use it 2018-06-19 17:21:08 -04:00
Andrew Kelley
c7804277bf @floatToInt now has safety-checked undefined behavior
when the integer part does not fit in the destination integer type

 * Also fix incorrect safety triggered for integer casting an
   `i32` to a `u7`. closes #1138
 * adds compiler-rt function: `__floatuntidf`
2018-06-19 16:06:10 -04:00
Andrew Kelley
0b92d689d0 update langref 2018-06-19 12:16:59 -04:00
Andrew Kelley
85422d7aea Merge pull request #1136 from alexnask/typeinfo_improvements
@typeInfo now uses optional types instead of @typeOf(undefined)
2018-06-19 11:46:32 -04:00
Andrew Kelley
9f2324389d Merge pull request #1134 from ziglang/no-explicit-casting
remove "cast harder" casting syntax; add new casting builtins
2018-06-19 11:40:21 -04:00
Alexandros Naskos
811539f8ee Added missing ?type in docs. 2018-06-19 17:49:48 +03:00
Alexandros Naskos
1392313236 @typeInfo now uses optional types instead of @typeOf(undefined) 2018-06-19 17:45:19 +03:00
Andrew Kelley
a3ddd0826b remove enum to/from int casting syntax; add @enumToInt/@intToEnum
see #1061
2018-06-19 03:50:38 -04:00
Andrew Kelley
dd62737266 translate-c: add new libclang c types 2018-06-19 03:43:40 -04:00
Andrew Kelley
626b73e8be remove error to/from int casting syntax; add @errorToInt/@intToError
See #1061
2018-06-18 18:48:29 -04:00
Andrew Kelley
a430853a48 standard library fixes 2018-06-18 17:43:01 -04:00
Andrew Kelley
1aafbae5be remove []u8 casting syntax. add @bytesToSlice and @sliceToBytes
See #1061
2018-06-18 17:25:29 -04:00
Andrew Kelley
5d705fc6e3 remove error set casting syntax. add @errSetCast
See #1061
2018-06-18 15:01:42 -04:00
Andrew Kelley
c757984879 Merge remote-tracking branch 'origin/master' into llvm7 2018-06-18 14:51:23 -04:00
Andrew Kelley
1ca90b5856 zig fmt: support directories
zig fmt accepts any number of file paths. For each one, if it
is a file, then it formats the file. If it is a directory, then
zig recursively scans the directory, formatting all files that
end in `.zig`.

it maintains a map of paths that have been seen already, to avoid
softlink loops.

closes #1068
2018-06-18 13:58:08 -04:00
Andrew Kelley
cd4676a233 stage1: update darwin code to workaround old libc bug
See #1128
2018-06-18 12:54:31 -04:00
Andrew Kelley
1aa93808b1 Merge branch 'binary132-fix-1117-macos-realpath' 2018-06-18 12:19:16 -04:00
Andrew Kelley
4ce36a6475 adjust logic for finding the path to zig executable on darwin 2018-06-18 12:18:39 -04:00
Andrew Kelley
c09c3902c4 Merge branch 'fix-1117-macos-realpath' of https://github.com/binary132/zig into binary132-fix-1117-macos-realpath 2018-06-18 12:16:47 -04:00
Andrew Kelley
48985a7e68 langref: add docs for void
see #367
2018-06-18 12:02:30 -04:00
Andrew Kelley
8fd7cc11e1 disallow opaque as a return type of fn type syntax
closes #1115
2018-06-18 11:12:15 -04:00
Andrew Kelley
d49d6f0cde fix compiler crash when using @intToFloat with float literal
closes #1132
2018-06-18 11:04:18 -04:00
Andrew Kelley
7151d72532 Merge pull request #1130 from ziglang/remove-bool-casting-syntax
remove bool to int syntax. add @boolToInt
2018-06-18 10:52:31 -04:00
Bodie Solomon
c7057bd25b Fix 1117: Revise realpath scratch logic 2018-06-18 07:40:31 -04:00
Bodie Solomon
0456822892 Fix 1117: Tweak realpath logic to use out_path as scratch space 2018-06-18 07:40:31 -04:00
Bodie Solomon
e6b69151c0 Fix 1117: Use realpath in stage1 Darwin os_self_exe_path
Issue: https://github.com/ziglang/zig/issues/1117

The macOS stage1 Zig compiler should look in Zig's real absolute path
for the Zig stdlib, but os_self_exe_path looks in its path as returned
by _NSGetExecutablePath, which may be a symlink.  This means that a
symlinked Zig cannot find the Zig stdlib.

This patch fixes the issue by resolving the _NSGetExecutablePath result
to the real path using realpath() before copying the result to the
output path.
2018-06-18 07:40:31 -04:00
Andrew Kelley
4210f1f6a0 remove bool to int syntax. add @boolToInt
add missing docs

See #1061
2018-06-18 03:07:16 -04:00
Andrew Kelley
d52ef95f77 disable failing macos test. see #1126
I'm unable to reproduce the failure on my mac laptop
more investigation required
2018-06-18 01:09:51 -04:00
Andrew Kelley
92a36040b1 msp430 target: c_long is always 32 bits
closes #1125
2018-06-18 01:03:45 -04:00
Andrew Kelley
906ed059ce update std.DynLib to use @intCast 2018-06-18 00:52:55 -04:00
Andrew Kelley
431fda4141 Merge pull request #1123 from ziglang/remove-number-casting-syntax
Remove number casting syntax
2018-06-17 14:40:07 -04:00
Andrew Kelley
e5956f23ca add target C int type information for msp430 target
closes #1125
2018-06-17 12:47:27 -04:00
Andrew Kelley
74ccf56a4b update more tests 2018-06-17 12:33:24 -04:00
Andrew Kelley
3c12ba7180 update test cases 2018-06-17 04:32:57 -04:00
Andrew Kelley
7912061226 remove integer and float casting syntax
* add `@intCast`
 * add `@floatCast`
 * add `@floatToInt`
 * add `@intToFloat`

See #1061
2018-06-17 02:57:07 -04:00
Andrew Kelley
06a26f0965 std.Complex: use better arg passing convention and fix a TODO 2018-06-16 21:32:53 -04:00
Andrew Kelley
751518787a Merge pull request #1109 from ziglang/pass-by-non-copying-value
allow passing by non-copying value
2018-06-16 21:13:10 -04:00
Andrew Kelley
3ee4d23ebd posix read can return error.IsDir 2018-06-16 19:54:16 -04:00
Andrew Kelley
eae9634ac9 langref: be clear that float types are always IEEE 754 2018-06-16 19:53:52 -04:00
Andrew Kelley
472b7ef7e6 disable byval 2018-06-16 19:37:00 -04:00
Andrew Kelley
e311cd562b don't automatically take pointer when passing by non-copying value
this commit does not have all tests passing
2018-06-16 19:37:00 -04:00
Andrew Kelley
59b3dc8907 allow passing by non-copying value
closes #733
2018-06-16 19:36:33 -04:00
Andrew Kelley
a7d59086b4 disable load dynamic library test
it's failing on CI. I will troubleshoot it and then re-enable
2018-06-16 19:36:06 -04:00
Andrew Kelley
c529b814ee load_dynamic_library test: no need to link libc 2018-06-16 18:54:41 -04:00
Sahnvour
f47655eb6d pointer reform: missed change in windows specific code. 2018-06-16 23:47:51 +02:00
Andrew Kelley
65d04cbeb4 std.DynLib: open the fd with CLOEXEC 2018-06-16 17:27:45 -04:00
Andrew Kelley
48de57d824 add basic std lib code for loading dynamic libraries
this is going to only work for very basic libraries;
I plan to slowly add more features over time to support more
complicated libraries
2018-06-16 17:01:23 -04:00
Jay Weisskopf
b3a3e2094e Make zig version compliant with SemVer (#1113)
The git revision is build metadata and should be appended with a plus sign.

https://semver.org/#spec-item-10
2018-06-15 14:06:56 -04:00
Andrew Kelley
84a700f972 llvm7: find external liblldWasm and update for newest lld macho API 2018-06-14 18:28:42 -04:00
Andrew Kelley
32dd98b19f Merge remote-tracking branch 'origin/master' into llvm7 2018-06-14 18:27:59 -04:00
Andrew Kelley
f0697c28f8 langref: docs for error return traces
See #367
2018-06-14 18:12:31 -04:00
Andrew Kelley
cdf1e366f9 fix build on windows, broken by previous commit 2018-06-14 16:36:07 -04:00
Andrew Kelley
6943cefebf std.os.path.dirname: return null instead of empty slice
for when there is no directory component. Makes it harder
to write bugs.

closes #1017
2018-06-14 16:15:32 -04:00
Andrew Kelley
2219cc0612 Merge pull request #1105 from ziglang/i128-compiler-rt
Add i128 compiler-rt div/mul support
2018-06-14 10:59:38 -04:00
Alexandros Naskos
4ec09ac243 Enabled optional types of zero bit types with no LLVM DI type. (#1110)
* Zero bit optional types do not need a LLVM DI type
2018-06-14 10:57:28 -04:00
Marc Tiehuis
a369d69c51 Add windows x86_64 i128 abi workaround 2018-06-14 21:18:36 +12:00
Andrew Kelley
fc87f6e417 fix race condition bug in test harness of std.atomic 2018-06-13 11:57:57 -04:00
Andrew Kelley
e1f56c9af6 std.zig.ast: add test for iterate
closes #1101
2018-06-13 11:48:06 -04:00
Andrew Kelley
41e6c664d8 langref: add merge error sets operator to operator table 2018-06-13 11:09:41 -04:00
Andrew Kelley
8dd24796c4 disallow implicit casts that break rules for optionals
closes #1102
2018-06-13 11:04:09 -04:00
Marc Tiehuis
9110140514 Add i128 compiler-rt div/mul support 2018-06-13 22:25:04 +12:00
Andrew Kelley
86adc1ef39 add docs and missing test case for merging error sets
See #367
2018-06-12 19:38:59 -04:00
Andrew Kelley
13d3255e2a docgen: don't leave garbage .h files lying around
closes #1100
2018-06-12 15:21:14 -04:00
Andrew Kelley
fdd9cf0928 better debugging for CI failures of std.atomic 2018-06-12 15:14:32 -04:00
Andrew Kelley
259413251d fix ability to call mutating methods on zero size structs
closes #838
2018-06-12 15:06:02 -04:00
Andrew Kelley
7580e39b38 zig fmt 2018-06-12 02:18:11 -04:00
Andrew Kelley
3dd9af9948 implement std.os.Dir for windows
improve std.os.File.access so that it does not depend on shlwapi.dll

closes #1084
2018-06-12 01:57:09 -04:00
Andrew Kelley
0a18d53c3d langref: add orelse keyword to syntax highlighting 2018-06-11 17:38:24 -04:00
Andrew Kelley
5252566137 langref: add coroutines documentation
See #367
2018-06-11 17:34:45 -04:00
Andrew Kelley
03c16c6c54 implement @tagName as a switch instead of table lookup
closes #976
closes #1080
2018-06-11 14:58:42 -04:00
tgschultz
854f90aa30 Added C string support to fmt by using "{s}". The format string requirement is for saftey. (#1092) 2018-06-10 12:57:21 -04:00
Marc Tiehuis
dc8bda7e02 Add arbitrary-precision integer to std
A few notes on the implementation:

 - Any unsigned power of two integer type less than 64 bits in size is supported
 as a Limb type.
 - The algorithms used are kept simple for the moment. More complicated
 algorithms are generally only more useful as integer sizes increase a
 lot and I don't expect our current usage to be used for this purpose
 just yet.
 - All branches (practically) have been covered by tests.

See 986a2b3243/bench
for rough performance comparison numbers.

Closes #364.
2018-06-10 18:24:34 +12:00
Andrew Kelley
0a95b0f1ff std.zig: update syntax for orelse keyword 2018-06-10 01:18:31 -04:00
Andrew Kelley
77678b2cbc breaking syntax change: orelse keyword instead of ?? (#1096)
use the `zig-fmt-optional-default` branch to have zig fmt
automatically do the changes.

closes #1023
2018-06-10 01:13:51 -04:00
Andrew Kelley
ec1b6f6673 breaking syntax change: ??x to x.? (#1095)
See #1023

This also renames Nullable/Maybe to Optional
2018-06-09 23:42:14 -04:00
Ben Noordhuis
d464b25322 support --target-arch wasm32 (#1094)
Add wasm32 support to the build-obj, build-exe and build-lib commands
of the stage 1 compiler.  Wasm64 should work transparently once it's
supported in upstream LLVM.

To export a function:

    // lib.zig - for exposition, not necessary for this example
    pub use @import("add.zig");

    // add.zig
    export fn add(a: i32, b: i32) i32 {
        return a + b;
    }

To import a function:

    // cube.zig
    extern fn square(x: i32) i32;

    export fn cube(x: i32) i32 {
        return x * square(x);
    }
2018-06-09 22:39:22 -04:00
marleck55
7a9635555b std/fmt: Use lowercase k for kilo in base 1000 (#1090) 2018-06-09 12:05:58 -04:00
Andrew Kelley
670a0a3eed Merge branch 'clownpriest-arraylist_set' 2018-06-09 12:04:48 -04:00
Andrew Kelley
fc6446702e clean up std.ArrayList
* add `std.debug.assertError`
 * `std.ArrayList` update everything to follow `self` convention
 * rename `std.ArrayList.set` to `std.ArrayList.setOrError`
 * add `std.ArrayList.set` which asserts

Before 1.0.0 we might remove some of this API, because you can use
`toSlice()` for everything, but it's ok to add these functions as
an experiment before then.
2018-06-09 12:03:11 -04:00
Arthur Elliott
e0092ee4a5 add set function to arraylist
so you can set a value without growing the underlying buffer,
with range safety checks
2018-06-09 11:42:41 -04:00
Andrew Kelley
9046b5eac0 fix assertion failure when debug printing comptime values 2018-06-09 11:41:59 -04:00
Andrew Kelley
6edd81109d nullable pointers follow const-casting rules
any *T -> ?*T cast is allowed implicitly, even
when it occurs deep inside the type, and the cast
is a no-op at runtime.

in order to add this I had to make the comptime value
representation of nullable pointers the same as the
comptime value representation of normal pointers,
so that we don't have to do any recursive transformation
of values when doing this kind of cast.
2018-06-09 00:26:26 -04:00
isaachier
1a9d2f3aae Fix error handling in Buffer::fromOwnedSlice (#1082) 2018-06-08 19:24:48 -04:00
Andrew Kelley
39fa313ad8 disable some implicit casts for unknown length pointers
closes #770
2018-06-08 14:57:16 -04:00
Jimmi HC
bf3d1c1aab Allow access of array.len through a pointer 2018-06-08 09:21:31 +02:00
Marc Tiehuis
ffb089a9f5 Fix json parser comma after empty object case 2018-06-08 17:43:13 +12:00
Andrew Kelley
f0b6dac1f2 add implicit casts from *[N]T
* to `[]T`
 * to `[*]T`

See #770
2018-06-07 22:41:58 -04:00
Andrew Kelley
b65203f573 remove @canImplicitCast builtin
nobody will miss it
2018-06-07 19:50:25 -04:00
Andrew Kelley
688ff2830d langref: automatic update of builtin.zig
now the docs can't get out of date for this

See #367
2018-06-07 19:10:45 -04:00
Andrew Kelley
31aefa6a21 fix structs that contain types which require comptime
Now, if a struct has any fields which require comptime,
such as `type`, then the struct is marked as requiring
comptime as well. Same goes for unions.

This means that a function will implicitly be called
at comptime if the return type is a struct which contains
a field of type `type`.

closes #586
2018-06-07 18:07:30 -04:00
Andrew Kelley
b11c5d8f82 fix std.os.windows.PathFileExists specified in the wrong DLL (#1066)
closes #1054
2018-06-06 15:36:47 -04:00
isaachier
4fc601895b Fix const-ness of buffer in replaceContents method (#1065) 2018-06-06 14:09:47 -04:00
Marc Tiehuis
e7f141b376 Add json.TokenStream (#1062)
This hides some of the low-level parsing details from the
StreamingParser. These don't need to be known when parsing a complete
slice at once (which is we can usually do).

Also, remove `Json` from Parser names. The namespace `json` is sufficient.
2018-06-06 11:24:36 -04:00
Braedon
f389e53735 Add newline to zig fmt error (#1064) 2018-06-06 10:45:19 -04:00
Marc Tiehuis
212449bc23 Fix Log2Int type construction
The following case for example, would previously fail:

    const a = u24(1) << Log2Int(u24)(22);
2018-06-06 22:41:55 +12:00
Andrew Kelley
d3693dca73 Pointer Reform: update @typeInfo
* add assertion for trying to do @typeInfo on global error set
 * remove TypeInfo.Slice
 * add TypeInfo.Pointer.Size with possible values
   - One
   - Many
   - Slice

See #770
2018-06-06 00:39:39 -04:00
Andrew Kelley
76c8efd56c add test for not allowing implicit cast from T to [*]const T
See #770
2018-06-05 23:54:14 -04:00
Andrew Kelley
bd13e757e7 disable deref syntax for unknown length pointers
See #770
2018-06-05 23:26:43 -04:00
Andrew Kelley
0ccc186869 disable field access for unknown length pointers
See #770
2018-06-05 23:26:43 -04:00
Andrew Kelley
bbb565a21e README: update support table
macosx does not run on some of these architectures
2018-06-05 21:56:19 -04:00
Andrew Kelley
652f4bdf62 disallow unknown-length pointer to opaque
This also means that translate-c has to detect when a pointer to
opaque is happening, and use `*` instead of `[*]`.

See #1059
2018-06-05 18:03:21 -04:00
Andrew Kelley
ef7f69d14a update to latest libclang 2018-06-05 15:02:48 -04:00
Andrew Kelley
ba5eeea1e5 Merge remote-tracking branch 'origin/master' into llvm7 2018-06-05 11:28:30 -04:00
Andrew Kelley
7a09482536 fix crash when evaluating return type has compile error
closes #1058
2018-06-05 10:48:53 -04:00
Andrew Kelley
7dd18294b7 Merge remote-tracking branch 'origin/master' into llvm7 2018-06-05 10:29:54 -04:00
Andrew Kelley
677eaf29b1 Merge pull request #1057 from ziglang/comptime-int
Rename integer literal type and float literal type
2018-06-05 10:16:51 -04:00
Jimmi HC
a8146ade2a Renamed UndefinedLiteral to Undefined 2018-06-05 11:54:11 +02:00
Jimmi HC
236c680f6b Removed NullLiteral to Null 2018-06-05 11:30:01 +02:00
Jimmi HC
02cb220faf Renamed "(int/float literal)" to "comptime_int/float" 2018-06-05 11:14:43 +02:00
Andrew Kelley
e53b683bd3 Pointer Reform: proper slicing and indexing (#1053)
* enable slicing for single-item ptr to arrays
 * disable slicing for other single-item pointers
 * enable indexing for single-item ptr to arrays
 * disable indexing for other single-item pointers

see #770
closes #386
2018-06-04 22:11:14 -04:00
Andrew Kelley
32e0dfd4f0 never call malloc with size 0
instead we return nullptr. this makes the behavior consistent
across all platforms.

closes #1044
closes #1045
2018-06-04 14:09:31 -04:00
Andrew Kelley
d21a1922eb support zig fmt: off and zig fmt: on between top level decls
closes #1030
closes #1033
2018-06-04 12:15:02 -04:00
Andrew Kelley
8dfa66fee3 Merge branch 'tgschultz-zig-custom-format' 2018-06-04 11:07:47 -04:00
Andrew Kelley
11e7e03139 Merge branch 'zig-custom-format' of https://github.com/tgschultz/zig into tgschultz-zig-custom-format
I removed the code that checks for type signature and type.
A function named `format` is enough for zig to give it a try.
2018-06-04 11:06:55 -04:00
Andrew Kelley
58ce79f935 Merge remote-tracking branch 'origin/master' into llvm7 2018-06-04 02:58:55 -04:00
Andrew Kelley
96164ce613 disallow single-item pointer indexing
add pointer arithmetic for unknown length pointer
2018-06-04 01:39:57 -04:00
Marc Tiehuis
4c273126df Add context to zig_unreachable calls (#1039)
This greatly aids debugging on platforms with no stack-traces.
2018-06-03 02:30:43 -04:00
Andrew Kelley
b85b68a7fd better compile error for error sets behind nullable 2018-06-02 15:20:51 -04:00
Marc Tiehuis
e514454c0e Make zig fmt exit with error on any parse errors
This is required for proper detection in editor plugins. Other files may
have been formatted correctly, this only indicates that some failed.
2018-06-02 20:49:35 +12:00
Andrew Kelley
f06bce5dda introduce [*] for unknown length pointers
See #770

Currently it does not have any different behavior than `*`
but it is now recommended to use `[*]` for unknown length
pointers to be future-proof.

Instead of [ * ] being separate tokens as the proposal
suggested, this commit implements `[*]` as a single token.
2018-06-02 04:04:23 -04:00
Andrew Kelley
7b386ea242 fix build file template
See #1035
2018-06-01 21:51:54 -04:00
Andrew Kelley
a3d7a807b7 appveyor: remove llvm 5.0.1 from cache 2018-06-01 12:22:53 -04:00
Andrew Kelley
081072d3b6 docs: add missing builtin to langref syntax coloring 2018-06-01 12:22:53 -04:00
Arthur Elliott
08693411d2 fix typo (#1034) 2018-06-01 12:23:07 -04:00
Andrew Kelley
3918e7699d Merge pull request #1032 from ziglang/pointer-reform
use * for pointer type instead of &
2018-06-01 11:49:25 -04:00
Andrew Kelley
e29d12d821 fix incorrect address-of syntax on windows 2018-06-01 01:29:34 -04:00
Andrew Kelley
5f38a01ede run zig fmt 2018-06-01 01:22:35 -04:00
Andrew Kelley
4d13ab07de std.zig: update to new pointer syntax 2018-06-01 01:19:26 -04:00
Andrew Kelley
2a7c8c5b10 add test case for pointer to type and slice of type
closes #588
2018-06-01 00:18:10 -04:00
Andrew Kelley
019217d7a2 fix regressions 2018-06-01 00:17:31 -04:00
Andrew Kelley
2f614c42fe ir: rip out special logic for using addr-of instruction for types
See #588
2018-05-31 18:23:39 -04:00
Andrew Kelley
fcbb7426fa use * for pointer type instead of &
See #770

To help automatically translate code, see the
zig-fmt-pointer-reform-2 branch.

This will convert all & into *. Due to the syntax
ambiguity (which is why we are making this change),
even address-of & will turn into *, so you'll have
to manually fix thes instances. You will be guaranteed
to get compile errors for them - expected 'type', found 'foo'
2018-05-31 17:28:07 -04:00
Andrew Kelley
717ac85a5a zig fmt: add --color option to CLI help text 2018-05-30 18:37:12 -04:00
Andrew Kelley
a05acaf9fd Add --color CLI option to zig fmt
It doesn't actually do terminal color yet because we need to add
cross platform terminal color abstractions. But it toggles between
the single line error reporting and the multiline error reporting.

See #1026
2018-05-30 18:26:09 -04:00
Andrew Kelley
d8699ae57e zig fmt: don't compute a sha-256 for no reason
I forgot to delete this code before pushing 2c96f19fd3
2018-05-30 16:45:32 -04:00
Andrew Kelley
ea58f4a5a9 run zig fmt on the codebase 2018-05-30 16:09:11 -04:00
Andrew Kelley
b082cd4580 zig fmt: field access does not cause spaces for slicing
See #1003
2018-05-30 16:08:40 -04:00
Andrew Kelley
84b1842026 zig fmt: space after fn in fn prototypes
See #1003
2018-05-30 15:50:01 -04:00
Andrew Kelley
93b51b0e40 spaces around slice operator if operands are infix
See #1003
2018-05-30 15:33:58 -04:00
Andrew Kelley
2c96f19fd3 std.zig.render returns bool of whether anything changed
zig fmt only renames files and prints to stdout for files which changed
2018-05-30 14:58:27 -04:00
tgschultz
940a854448 Fix MacOS CI Timer test failing...? 2018-05-30 13:38:41 -05:00
tgschultz
fb001f5e90 Fixed character handling 2018-05-30 12:18:24 -05:00
tgschultz
8938c16f38 Formatting 2018-05-30 10:41:48 -05:00
tgschultz
4e1d0a59fa Minor typo 2018-05-30 10:24:27 -05:00
tgschultz
8fc52a94f4 Added custom formatter support, refactored fmt.format 2018-05-30 10:18:11 -05:00
tgschultz
8174f972a7 Merge pull request #2 from ziglang/master
sync with ziglang
2018-05-30 08:26:13 -05:00
Jimmi HC
15302e84a4 Adding workaround for when the user tries to unwrap 'type'
closes #1011
2018-05-30 11:51:46 +02:00
Jimmi HC
1b3aaacba2 Removed copy-pasted resolve_inferred_error_set
both ir.cpp and analyze.cpp have a function resolve_inferred_error_set,
which is a nearly exact copy-paste. This commit removes the one in ir.cpp
and exposes then one in analyze.cpp. This also allows us to make
analyze_fn_body local to analyze.cpp, as it is not used anywhere in
ir.cpp after this change
2018-05-30 10:34:20 +02:00
Jimmi HC
2b3af4ef6b fixed #1009
ir_make_type_info_defs already calls resolve_top_level_decl on all Tld
when building the def array. This means, that there is no reason that
analyze_fn_body is nessesary, as the fn type should have already been
resolved completly. The only thing analyze_fn_body does here, is cause
problems with generic functions.
2018-05-30 10:30:09 +02:00
Andrew Kelley
b0eebfa560 fix syntax of std/json_test.zig
See #663
2018-05-29 18:10:36 -04:00
Andrew Kelley
d172e3f3bb fix AtomicFile for relative paths
closes #1017
2018-05-29 17:38:50 -04:00
Andrew Kelley
0c16cd2d0e run zig fmt on the codebase
See #1003
2018-05-29 04:23:38 -04:00
Andrew Kelley
cdf30c31ea zig fmt: fix implementation of firstToken() for fn call 2018-05-29 03:47:27 -04:00
Andrew Kelley
cd325e408e zig fmt: fix extra space after comma before multi line string 2018-05-29 03:33:03 -04:00
Andrew Kelley
cb042c8343 Merge remote-tracking branch 'origin/master' into llvm7 2018-05-29 03:22:52 -04:00
Andrew Kelley
eda6898c5b zig fmt: handle if and while indentation better 2018-05-29 03:15:12 -04:00
Andrew Kelley
530d175422 zig fmt: fix spacing when moving doc comment on var decls 2018-05-28 23:41:09 -04:00
Andrew Kelley
0d1b47362c zig fmt: if-else with comment before else 2018-05-28 22:41:05 -04:00
Andrew Kelley
77ec81b035 zig fmt: respect line breaks in if-else 2018-05-28 22:22:01 -04:00
Andrew Kelley
71badebd08 zig fmt: respect line breaks after infix operators 2018-05-28 21:28:32 -04:00
Andrew Kelley
354ab1c5c8 zig fmt: render fn decl with trailing comma 1 line per param 2018-05-28 21:18:41 -04:00
Andrew Kelley
530da36352 zig fmt: fix enum decl with no trailing comma
See #1003
2018-05-28 17:43:17 -04:00
Andrew Kelley
6c1fda3f99 zig fmt: fix switch body indent 2018-05-28 17:09:55 -04:00
Andrew Kelley
fd13a75785 zig fmt: allow same line struct literal with no trailing comma
See #1003
2018-05-28 17:00:04 -04:00
Andrew Kelley
122a74724c zig fmt: use simple newlines rather than empty comments to hint
now the first row of an array literal is the hint to zig fmt
for how long each row should be.

See #1003
2018-05-28 16:23:33 -04:00
Andrew Kelley
3fed10883b zig fmt: array literals with no trailing comma all on one line 2018-05-26 23:25:04 -04:00
Andrew Kelley
afdfbc0367 zig fmt: delete empty comments that do nothing 2018-05-26 23:25:04 -04:00
Andrew Kelley
b184ae5ca5 run zig fmt on some of the codebase
See #1003
2018-05-26 23:00:29 -04:00
Andrew Kelley
118d41ef83 zig fmt: support array literal row size hint
See #1003
2018-05-26 22:59:46 -04:00
Andrew Kelley
349365d9a4 zig fmt: better multiline string formatting 2018-05-26 19:32:28 -04:00
Andrew Kelley
cabf7fa93b zig fmt: fn calls with trailing comma with params on new lines 2018-05-26 18:44:10 -04:00
Andrew Kelley
b8d4e05361 zig fmt: handle empty block with comment inside 2018-05-26 18:29:14 -04:00
Andrew Kelley
7e900d28be zig fmt: no space on switch range operator 2018-05-26 18:10:06 -04:00
Andrew Kelley
0bef1f9824 zig fmt: fix rendering of struct with no trailing comma on last field 2018-05-26 16:43:33 -04:00
Andrew Kelley
0ab888c639 zig fmt: parse extra comma in asm expressions 2018-05-26 16:37:55 -04:00
Andrew Kelley
85ca611af1 zig fmt: put nested struct inits on newlines
See #1003
2018-05-26 15:37:47 -04:00
Andrew Kelley
7fa97b752e add strict float mode to some math functions
fixes a test failure for acosh32
2018-05-26 14:57:53 -04:00
Andrew Kelley
8efb3f5e19 update to latest LLVM API 2018-05-26 12:18:13 -04:00
Andrew Kelley
1f96a86676 Merge remote-tracking branch 'origin/master' into llvm7 2018-05-26 12:06:08 -04:00
Andrew Kelley
c029f4bfc4 trailing comma after var args is not supported 2018-05-25 20:41:14 -04:00
Andrew Kelley
4405897cbd zig fmt: support trailing comma on switch case items 2018-05-25 20:34:53 -04:00
Andrew Kelley
a630d3e851 zig fmt: fix rendering of align keyword of slice type 2018-05-25 02:19:53 -04:00
Andrew Kelley
56cb7f1740 update json_test to be compliant with zig source encoding
See #663
2018-05-25 02:08:43 -04:00
Andrew Kelley
3f302f8411 handle more cases of inserting trailing commas 2018-05-25 01:52:59 -04:00
Andrew Kelley
000c01a36a zig fmt: handle missing trailing comma in array literals 2018-05-25 01:45:14 -04:00
Andrew Kelley
08f95d0c2f enum fields with a type are not supported
the c++ codebase lets it slide

the self hosted parser correctly reports a parse error
2018-05-25 01:10:54 -04:00
Andrew Kelley
dfc3e11748 zig fmt: fix handling of comments at top of file 2018-05-25 01:03:15 -04:00
Andrew Kelley
ca49b6f6b4 struct fields with no explicit type are not supported
the c++ codebase lets it slide

the self hosted parser correctly reports a parse error
2018-05-25 00:39:18 -04:00
Andrew Kelley
e6afea99a9 zig fmt: support aligned ptr with bit fields 2018-05-25 00:38:07 -04:00
Andrew Kelley
b74dda34b6 std.zig.tokenizer: support hex escape in char literals 2018-05-24 21:51:58 -04:00
Andrew Kelley
43085417be update github.com/zig-lang to github.com/ziglang 2018-05-24 21:27:44 -04:00
Andrew Kelley
af7073b779 Merge branch 'BraedonWooding-patch-3' 2018-05-24 20:59:39 -04:00
Andrew Kelley
fa5b0ef54f doc fixups 2018-05-24 20:59:19 -04:00
braedonww@gmail.com
938d791b23 Added argtype and error inferring info 2018-05-24 20:20:07 -04:00
Andrew Kelley
54e887ed9e std.zig.tokenizer: fix tokenization of hex floats 2018-05-24 01:16:07 -04:00
Andrew Kelley
b132a17a74 std.zig.parse ignores comments
std.zig.render handles comments by looking at nearby tokens
2018-05-24 00:35:53 -04:00
Marc Tiehuis
4f4afe186d Make JsonParser public 2018-05-22 15:34:17 +12:00
Marc Tiehuis
698c52e796 Make StreamingJsonParser public 2018-05-22 15:32:17 +12:00
Andrew Kelley
bfbe26734d zig fmt: add pointer deref syntax 2018-05-20 14:50:27 -04:00
Andrew Kelley
83a7809478 Merge pull request #1019 from zig-lang/pointer-reform
Pointer Reform - change prefix deref syntax to postfix deref syntax
2018-05-18 13:30:25 -04:00
Andrew Kelley
c38b165db4 all tests passing with postfix deref syntax 2018-05-17 23:21:44 -04:00
Andrew Kelley
99fc2bd4dd Merge remote-tracking branch 'origin/master' into pointer-reform 2018-05-17 00:56:35 -04:00
Andrew Kelley
b73307befb more std lib to postfix deref with zig fmt 2018-05-17 00:56:14 -04:00
Andrew Kelley
942d384831 fix std.SegmentedList.Iterator.set 2018-05-17 00:52:36 -04:00
Andrew Kelley
b48d354600 zig fmt: fix comment after if before another if 2018-05-17 00:44:55 -04:00
Andrew Kelley
37c6afa5b4 zig fmt: line comment between if block and else keyword 2018-05-17 00:31:47 -04:00
Andrew Kelley
9ea0e4ca68 zig fmt: same line comments after tokens in expression 2018-05-17 00:16:32 -04:00
Andrew Kelley
967bad43a0 OpenBSD has the same C integer sizes as Linux
Thanks Jan S <jan.schreib@gmail.com> for this information

closes #1016
2018-05-16 20:18:38 -04:00
Andrew Kelley
4a3d689550 std.fmt: use SI prefixes for printing bytes
closes #1015
2018-05-16 18:22:39 -04:00
Andrew Kelley
288fc3a8d3 convert more std lib files to postfix pointer deref 2018-05-16 00:43:28 -04:00
Andrew Kelley
ee5f9ffad0 zig fmt: add comma on last switch prong 2018-05-16 00:27:18 -04:00
Andrew Kelley
5cfabdd493 Merge remote-tracking branch 'origin/master' into pointer-reform 2018-05-16 00:02:26 -04:00
Andrew Kelley
492a214d4c std.fmt.format: support {B} for human readable bytes 2018-05-15 22:11:03 -04:00
Andrew Kelley
3625df25d6 build: add flag to LLD to fix gcc 8 build (#1013)
* build: add flag to LLD to fix gcc 8 build
* build: add -Wno-unknown-warning-option to work around older gcc
2018-05-15 16:21:47 -04:00
Andrew Kelley
74b10c08d1 fix @typeInfo not setting a field to comptime 2018-05-15 14:11:41 -04:00
Andrew Kelley
04bca58a3a zig fmt: preserve same line doc comments on var decls 2018-05-15 00:33:34 -04:00
Andrew Kelley
abcd418451 std.zig.parse cleanup 2018-05-13 14:20:01 -04:00
Andrew Kelley
86a352c45b Merge branch 'master' into pointer-reform 2018-05-13 13:38:03 -04:00
Andrew Kelley
05ecb49bac README: https links 2018-05-13 01:07:55 -04:00
Andrew Kelley
4c3aa09f2a self hosted compiler: remove unused flag 2018-05-13 01:06:52 -04:00
Andrew Kelley
548ddd1f0c fix AST dumping code in self hosted compiler 2018-05-12 23:57:15 -04:00
Andrew Kelley
7cdc9d98c7 refactor std.zig.render to be recursive
See #1006
2018-05-12 23:06:54 -04:00
Andrew Kelley
911cbf57cd recursive render top level decl 2018-05-12 19:03:39 -04:00
Andrew Kelley
a6ae45145f add @newStackCall builtin function
See #1006
2018-05-12 17:35:15 -04:00
Andrew Kelley
4277762b74 fix windows build system
broken by 6e821078f6
2018-05-11 23:04:41 -04:00
Andrew Kelley
277b9cf878 fix comptime code modification of global const
closes #1008
2018-05-11 22:41:44 -04:00
tgschultz
8c1872543c Merge pull request #1 from zig-lang/master
Sync with zig-lang/zig master
2018-05-11 21:36:02 -05:00
Andrew Kelley
6e821078f6 update std.Buffer API
* remove Buffer.appendFormat
 * remove Buffer.appendByte
 * remove Buffer.appendByteNTimes

Added test to demo what to use instead of the above functions
2018-05-11 14:08:16 -04:00
Andrew Kelley
284ab109c4 Merge remote-tracking branch 'origin/master' into llvm7 2018-05-10 11:37:25 -04:00
Marc Tiehuis
efa39c5343 Fix bigint shift-right partial shift 2018-05-10 22:26:26 +12:00
Andrew Kelley
c3ddf5069e zig fmt: fix not writing results 2018-05-10 00:40:07 -04:00
Andrew Kelley
4787127cf6 partial conversion to post-fix pointer deref using zig fmt 2018-05-10 00:29:49 -04:00
Andrew Kelley
6928badd85 Merge branch 'master' into pointer-reform 2018-05-09 23:43:07 -04:00
Andrew Kelley
4438c5e09b Merge branch 'rework-parser' 2018-05-09 22:17:47 -04:00
Andrew Kelley
bbae6267fe fix self hosted compiler 2018-05-09 21:45:29 -04:00
Andrew Kelley
774b6ffe1e fix parser performance regression 2018-05-09 21:17:05 -04:00
Andrew Kelley
403e5239e3 all tests passing again 2018-05-09 21:15:34 -04:00
Andrew Kelley
bf21747a42 translate-c: fix typedef duplicate definition of variable
closes #998
2018-05-09 20:23:36 -04:00
Andrew Kelley
116914ab3e Merge pull request #1000 from zig-lang/slice-type-info
Added Slice as its own type info in userland
2018-05-09 09:27:14 -04:00
Jimmi HC
2a74aa2067 Freeing ptr_field_name after use 2018-05-09 09:40:57 +02:00
Jimmi HC
9b29c872ce Added Slice as it's own type info in userland 2018-05-09 09:34:04 +02:00
Andrew Kelley
670c9f9b74 add benchmark for measuring parser performance 2018-05-08 16:23:08 -04:00
Andrew Kelley
ca27ce3bee std.zig.parser supports same-line comments on any token 2018-05-07 23:54:35 -04:00
Andrew Kelley
0cb65b266a separate std.zig.parse and std.zig.render 2018-05-07 22:07:50 -04:00
Andrew Kelley
69ef6ae0f9 rework std.zig.parser 2018-05-07 21:57:44 -04:00
Andrew Kelley
dc23350847 add std.SegmentedList.Iterator.prev 2018-05-07 16:50:33 -04:00
Andrew Kelley
3b7aa80892 add std.SegmentedList.Iterator 2018-05-07 16:50:33 -04:00
Andrew Kelley
77a1a216d2 tagged union field access prioritizes members over enum tags
closes #959
2018-05-07 16:43:20 -04:00
Andrew Kelley
2f633452bb std.SegmentedList: cleaner separation of capacity functions 2018-05-07 10:34:38 -04:00
Andrew Kelley
78ba3b8485 Merge pull request #992 from zig-lang/segmented-list
Segmented list implementation
2018-05-07 09:54:30 -04:00
Andrew Kelley
81007d0a4b SegmentedList: fixups from review comments 2018-05-07 09:53:52 -04:00
Andrew Kelley
7fdbaeca72 zig fmt: same-line comment after non-block if expression 2018-05-07 01:05:06 -04:00
Andrew Kelley
41e1cd185b std.SegmentedList implementation 2018-05-07 01:04:43 -04:00
Marc Tiehuis
d7b029995c Fix bigint multi-limb shift and masks 2018-05-05 22:40:29 +12:00
Andrew Kelley
4d6d2f1cd2 zig fmt: same-line comment after non-block if expression 2018-05-04 18:35:43 -04:00
Jimmi Holst Christensen
87c0060e81 Made container methods that can be const, const 2018-05-04 23:48:14 +02:00
Andrew Kelley
0fc8885a8d zig fmt: switch with empty body 2018-05-04 16:49:51 -04:00
Andrew Kelley
eef21df94f zig fmt: same-line comment on comptime expression 2018-05-04 16:46:35 -04:00
Andrew Kelley
8721eb68fc zig fmt: fix tokenization of float literal with exponent 2018-05-04 14:34:32 -04:00
Andrew Kelley
7e37d268c8 Merge remote-tracking branch 'origin/master' into llvm7 2018-05-04 13:39:27 -04:00
Marc Tiehuis
ef3111be23 Use allocator backed array for json value decoder 2018-05-04 17:56:20 +12:00
Marc Tiehuis
f17472635e Fix review comments for json decoder 2018-05-04 17:56:20 +12:00
Marc Tiehuis
0afc6a9886 Add json decoder
- streaming json decoder
 - dynamic tree/value decoder
2018-05-04 17:56:20 +12:00
Andrew Kelley
33fa87a9d8 Merge pull request #981 from BraedonWooding/ArrayIteratorUnifiedSyntax
ArrayList iterator, unifying API of HashMap and its derivatives
2018-05-03 23:15:17 -04:00
Andrew Kelley
b9e320dd52 Merge pull request #951 from alexnask/reflect_reify
Metaprogramming - @typeInfo [DONE]
2018-05-03 23:02:33 -04:00
Jimmi Holst Christensen
aa2586de18 Fixed extern enums having the wrong size (#970)
Fixed extern enums having the wrong size

See #977
2018-05-03 22:27:04 -04:00
Andrew Kelley
7337029ce1 Merge pull request #979 from zig-lang/test-release-small
add ReleaseSmall mode in zig tests
2018-05-03 11:47:22 -04:00
Braedon
e907c5cab9 Unified API 2018-05-03 23:54:33 +10:00
Andrew Kelley
adbb691f46 fix behavior tests for ReleaseSmall 2018-05-02 22:48:04 -04:00
Andrew Kelley
6fd14f23b5 add ReleaseSmall mode in zig tests
closes #969
2018-05-02 22:41:02 -04:00
Alexandros Naskos
131c133bb7 Fixed inlining determination test (#972)
When deciding wether we should inline a scope, look up the parents until we get to a function definition scope
2018-05-02 21:43:07 -04:00
Andrew Kelley
02c1b9df3b fix compiler-rt tests accidentally running std tests
also reduce the aggressiveness of std.atomic.stack
and std.atomic.queue fuzz testing. appveyor has 1 core
and 10,000 iterations is too much for 6 threads to
thrash over
2018-05-02 21:34:34 -04:00
Andrew Kelley
c186cd187e std.atomic - use AtomicOrder.SeqCst for everything
also use less memory for the tests
2018-05-02 20:19:26 -04:00
Andrew Kelley
6f002e724b Merge pull request #975 from zig-lang/none-pure-enums
Added better support for none pure enums in translate-c
2018-05-02 16:56:47 -04:00
Jimmi Holst Christensen
1a9403f38a Added better support for none pure enums in tranlate C 2018-05-02 21:50:41 +02:00
Andrew Kelley
86a428a4a5 windows threading: add missing call to CloseHandle 2018-05-02 10:03:58 -04:00
Andrew Kelley
6309121f70 translate-c: update to llvm7 2018-05-02 09:46:29 -04:00
Andrew Kelley
1090b289ec Merge remote-tracking branch 'origin/master' into llvm7 2018-05-01 14:29:50 -04:00
Alexandros Naskos
849ea61fa1 Small fix. 2018-05-01 17:10:50 +03:00
Alexandros Naskos
57940837e7 Added typeInfo to langref built_ins 2018-05-01 13:44:19 +03:00
Alexandros Naskos
7d239414f7 Fixed type info test, added documentation. 2018-05-01 13:42:20 +03:00
Alexandros Naskos
1b6e97355d Added type info tests to behavior test listing 2018-05-01 13:31:14 +03:00
Alexandros Naskos
255c0ef406 Resolved merge conflict. 2018-05-01 13:09:34 +03:00
Alexandros Naskos
e1535ee0a9 Added typeInfo tests 2018-05-01 13:01:54 +03:00
Alexandros Naskos
ff1c4e1f13 Added tests. 2018-05-01 13:00:39 +03:00
Andrew Kelley
ac4d55dec1 behavior tests passing with new pointer deref syntax 2018-05-01 01:53:04 -04:00
Andrew Kelley
3a8dc4e90d zig fmt: line comments in struct initializer 2018-05-01 01:30:53 -04:00
Andrew Kelley
a35b366eb6 [breaking] delete ptr deref prefix op
start using zig-fmt-pointer-reform branch build of zig fmt
to fix code to use the new syntax

all of test/cases/* are processed, but there are more left
to be done - all the std lib used by the behavior tests
2018-04-30 20:35:54 -04:00
Andrew Kelley
7c822869fe zig fmt: only some docs have doc comments 2018-04-30 20:25:54 -04:00
Andrew Kelley
61a726c290 zig fmt: comments in field decls 2018-04-30 19:27:14 -04:00
Andrew Kelley
4cc1008c2d zig fmt: error set decls 2018-04-30 19:16:46 -04:00
Andrew Kelley
3e61c45f89 zig fmt: consistent spacing for container inits 2018-04-30 18:49:05 -04:00
Andrew Kelley
eed49a2104 zig fmt: aggregate type init with only 1 field 2018-04-30 18:30:47 -04:00
Andrew Kelley
1d06915f27 zig fmt: support union(enum(tag)) and enum init values 2018-04-30 18:20:27 -04:00
Andrew Kelley
7dc8d433ab zig fmt: support labeled suspend 2018-04-30 17:30:57 -04:00
Andrew Kelley
37d3ef2835 zig fmt: support promise->T 2018-04-30 16:16:58 -04:00
Andrew Kelley
47680cc0d8 zig fmt: better multiline string handling 2018-04-30 15:10:55 -04:00
Andrew Kelley
76ab1d2b6c support foo.* for ptr deref
See #770
2018-04-30 14:20:56 -04:00
Andrew Kelley
e14db23661 run zig fmt on std/os/index.zig 2018-04-30 01:03:38 -04:00
Andrew Kelley
54987c3d8f std.zig.tokenizer: 3 slashes is doc comment, 4 is line comment 2018-04-30 00:56:59 -04:00
Andrew Kelley
0bf7ebcfea std.zig.tokenizer: fix handling of line comment / doc comment 2018-04-30 00:52:09 -04:00
Andrew Kelley
fd2cd38bdb zig fmt: support line comments and doc comments
line comments can go anywhere a list of something is allowed
2018-04-30 00:19:55 -04:00
Andrew Kelley
39befc35a8 update comment in std/os/index.zig 2018-04-29 22:31:42 -04:00
Andrew Kelley
4e23fb7f06 zig fmt: comments before error set decl 2018-04-29 22:12:17 -04:00
Andrew Kelley
f04015c080 zig fmt: comments before switch prong 2018-04-29 21:47:54 -04:00
Andrew Kelley
a912c7d75f zig fmt: same-line comment after switch prong 2018-04-29 21:27:44 -04:00
Andrew Kelley
c53209a8a8 zig fmt: comments before var decl in struct 2018-04-29 19:55:57 -04:00
Andrew Kelley
3235eb03f9 zig fmt: preserve same line comment after struct field 2018-04-29 19:23:19 -04:00
Andrew Kelley
3fa0bed985 zig fmt: array literal with 1 item on 1 line 2018-04-29 18:22:39 -04:00
Josh Wolfe
9543c0a7cc use explicit error sets for utf8Decode functions
and run unicode tests at comptime also
2018-04-29 18:07:18 -04:00
Andrew Kelley
c03b9010db zig fmt: preserve same-line comment after statement 2018-04-29 17:37:15 -04:00
Josh Wolfe
2387292f20 move some checks around in utf8Encode logic to be more zig idiomatic 2018-04-29 17:28:11 -04:00
Josh Wolfe
8c567d84f1 Merge pull request #954 from BraedonWooding/patch-2
Utf8 Encoding from Codepoint to Bytes
2018-04-29 16:57:29 -04:00
Andrew Kelley
ad4ee47d9f zig fmt: preserve comments before global variables 2018-04-29 16:24:12 -04:00
Andrew Kelley
a0e9f1e0c3 fix bootstrap_lib for windows, take 2 2018-04-29 15:51:23 -04:00
Andrew Kelley
5e5eceb0de fix bootstrap_lib for windows 2018-04-29 15:50:56 -04:00
Andrew Kelley
b7095912c7 zig fmt: respect comments before statements 2018-04-29 15:48:53 -04:00
Andrew Kelley
f37e79e720 Merge pull request #963 from zig-lang/atomic-stack-and-queue
Atomic stack and queue
2018-04-29 12:29:40 -04:00
Alexandros Naskos
013f548202 Finished FnDef TypeInfo generation (warning: may be buggy). 2018-04-29 15:40:26 +03:00
Alexandros Naskos
66aa760f83 More FnDef TypeInfo generation. 2018-04-29 14:03:55 +03:00
Andrew Kelley
c76b0a845f fix std threads for linux 2018-04-29 02:56:59 -04:00
Andrew Kelley
b21bcbd775 fix std threads for macos 2018-04-29 02:52:04 -04:00
Andrew Kelley
6376d96824 support kernel threads for windows
* remove std.os.spawnThreadAllocator - windows does not support
   an explicit stack, so using an allocator for a thread stack
   space does not work.
 * std.os.spawnThread - instead of accepting a stack argument, the
   implementation will directly allocate using OS-specific APIs.
2018-04-29 02:40:22 -04:00
Andrew Kelley
bf8e419d2b linux uses pthreads when linking against libc 2018-04-29 00:40:04 -04:00
Andrew Kelley
abf90eaa67 enable atomic queue and stack tests for macos 2018-04-29 00:09:32 -04:00
Andrew Kelley
a425420993 make pthreads threads work on darwin
darwin pthreads adds a restriction that the stack start and end
must be page aligned
2018-04-29 00:07:32 -04:00
Andrew Kelley
998e25a01e pthread support working 2018-04-28 23:47:39 -04:00
Andrew Kelley
a344cb03bc *WIP* use pthreads when linking libc 2018-04-28 23:30:13 -04:00
Andrew Kelley
0bb054e5e7 Merge pull request #930 from zig-lang/float-printing
Finish and fix float printing
2018-04-28 22:43:52 -04:00
Andrew Kelley
ec2a81a081 fix compiler-rt ABI for x86_64 windows 2018-04-28 22:03:07 -04:00
Andrew Kelley
a10351b439 disable atomic stack and queue tests for non-linux 2018-04-28 18:19:00 -04:00
Andrew Kelley
5d6e44b3f2 add tests for std.atomic Queue and Stack 2018-04-28 18:00:51 -04:00
Andrew Kelley
96ecb40259 add fuzz tests for std.atomic.Stack 2018-04-28 17:53:06 -04:00
Andrew Kelley
4ac36d094c add std.atomic.Stack and std.atomic.Queue 2018-04-28 16:11:32 -04:00
Jimmi Holst Christensen
73bf897b5c Using allocate instead of allocate_nonzero so we don't have to memset 2018-04-28 19:21:23 +02:00
Jimmi Holst Christensen
d6f033b42d Fixed build error 2018-04-28 19:09:25 +02:00
Jimmi Holst Christensen
837166319d Trying to fix osx build failing by setting param_info.type to nullptr 2018-04-28 19:02:46 +02:00
Alexandros Naskos
af73462da4 Started work on function definition TypeInfo generation. 2018-04-28 19:57:59 +03:00
Jimmi Holst Christensen
341f8c1e86 Fixed wrong formatting for arg_index when reporting @ArgType error 2018-04-28 17:57:47 +02:00
Alexandros Naskos
9ba400673d Generating TypeInfo's now forces definitions to be resolved. 2018-04-28 18:38:38 +03:00
Jimmi Holst Christensen
fba0347ec4 .ReturnType and @ArgType now emits errors on unresolved types
related: #846
2018-04-28 17:17:48 +02:00
Jimmi Holst Christensen
2fc34eaa58 Functions with infered error set can now return literals
fixes #852
2018-04-28 16:27:31 +02:00
Alexandros Naskos
61b0180596 Added definition TypeInfo generation, except for function definitions. 2018-04-28 17:01:19 +03:00
Jimmi Holst Christensen
3178528335 Removed zero sized error set optimization
fixes #762
fixes #818
2018-04-28 14:05:08 +02:00
Alexandros Naskos
ea2596280f Added BoundFn TypeInfo generation. 2018-04-27 05:10:20 +03:00
Alexandros Naskos
8f703f919f Added Fn TypeInfo generation. 2018-04-27 04:29:50 +03:00
Alexandros Naskos
a2dadbc206 Added struct TypeInfo generation. 2018-04-27 02:52:09 +03:00
Alexandros Naskos
9041d0d37e Fixed enum tag type detection in TypeInfo generation. 2018-04-27 02:05:24 +03:00
Alexandros Naskos
884e32d5c3 Added ErrorUnion, Union TypeInfo generation 2018-04-26 19:56:34 +03:00
Andrew Kelley
4d0b660f4b translate-c: add missing decl type from LLVM 7 2018-04-26 11:43:18 -04:00
Alexandros Naskos
fbbbee6b72 Switched to shallow TypeInfo. 2018-04-26 18:18:47 +03:00
Andrew Kelley
bbfff46146 Merge remote-tracking branch 'origin/master' into llvm7 2018-04-26 10:55:29 -04:00
Alexandros Naskos
4aa5d87ada Added ErrorSet TypeInfo generation. 2018-04-26 17:14:38 +03:00
Alexandros Naskos
f5977f68eb Added Enum TypeInfo except for methods 2018-04-26 16:41:59 +03:00
Alexandros Naskos
7a91e4736a Reset parent on cached TypeInfo values if we need to. 2018-04-26 14:29:27 +03:00
Alexandros Naskos
bb56360bfa Added TypeInfo cache 2018-04-26 14:03:19 +03:00
Alexandros Naskos
dd88d7deda Cleanup 2018-04-26 13:27:16 +03:00
Alexandros Naskos
bc160821d3 Changed TypeInfo layout. 2018-04-25 17:50:11 +03:00
Alexandros Naskos
2606993cb4 Fixed ir_type_info_struct_set_parent for struct parents. 2018-04-25 11:59:35 +03:00
Alexandros Naskos
d68aea4f35 Added checks for field name/index mapping in TypeInfo generation. Abstracted the parent setting out. 2018-04-25 11:35:46 +03:00
Braedon
07af6559d8 Changed to use shifting and masking 2018-04-25 16:26:57 +10:00
Braedon
f6cbe9a9cc Utf8 Encode 2018-04-25 14:59:03 +10:00
Andrew Kelley
7270f35c93 Merge remote-tracking branch 'origin/master' into float-printing 2018-04-24 23:47:37 -04:00
Andrew Kelley
27cbb44993 Merge pull request #949 from zig-lang/complex-math
Add initial complex-number support
2018-04-24 21:24:08 -04:00
Andrew Kelley
84391af7b8 convert NOTE to TODO so we catch it later
See #363

For Complex as a builtin type, see discussion in #949
2018-04-24 21:23:03 -04:00
Andrew Kelley
1d998d5dce clean up complex math tests 2018-04-24 21:14:12 -04:00
Andrew Kelley
13076d5f22 std.mem: add more slice manipulation functions
* add std.mem.trimLeft
 * add std.mem.trimRight
 * add std.mem.trimRight
 * add std.mem.lastIndexOfScalar
 * add std.mem.lastIndexOfAny
 * add std.mem.lastIndexOf
 * add std.mem.endsWith

closes #944

Thanks Braedon Wooding for the original PR
2018-04-24 20:53:36 -04:00
Alexandros Naskos
778b931bf3 Fixed comptime union void field access 2018-04-25 02:50:18 +03:00
Alexandros Naskos
182a9fad2d Added ArrayInfo, NullableInfo, PromiseInfo generation 2018-04-24 17:38:30 +03:00
Alexandros Naskos
09d7033d1d PointerInfo child is known at comptime 2018-04-24 17:08:45 +03:00
Alexandros Naskos
2d8553c853 Fixed PointerInfo generation 2018-04-24 17:01:20 +03:00
Alexandros Naskos
189e8e97bd PointerInfo child is a pointer to a TypeInfo union, still not working correctly 2018-04-24 16:50:36 +03:00
Alexandros Naskos
0e5fb035e3 Added (broken) pointer info, float info 2018-04-24 16:23:22 +03:00
Alexandros Naskos
e9309d3b13 Fixed IntInfo generation. 2018-04-24 15:17:34 +03:00
Alexandros Naskos
ec2a3ed500 Attempt at adding comptime union field access 2018-04-24 15:03:46 +03:00
Marc Tiehuis
0501e066b5 crypto throughput test now uses os.time module 2018-04-24 23:54:27 +12:00
Alexandros Naskos
fb88f5a0d2 @typeInfo with void payloads now works! 2018-04-24 11:20:33 +03:00
Marc Tiehuis
d5e99cc05e Add initial complex-number support
- Library type instead of builtin
 - All C complex functions implemented

Partial WIP: Needs more tests for edge cases.
2018-04-24 19:18:31 +12:00
Alexandros Naskos
7eab62325b One step towards @typeInfo 2018-04-24 01:49:22 +03:00
Andrew Kelley
15bf0c1541 fix interaction between defer and labeled break
closes #830
2018-04-23 18:06:33 -04:00
Andrew Kelley
89a4c373d3 fix bigint twos complement implementation
closes #948
2018-04-23 12:06:18 -04:00
Marc Tiehuis
e5175d432e Fix release float printing errors
Fixes #564.
Fixes #669.
Fixes #928.
2018-04-23 17:22:51 +12:00
Marc Tiehuis
d8ba1bc120 Improve fmt float-printing
- Fix errors printing very small numbers
 - Add explicit scientific output mode
 - Add rounding based on a specific precision for both decimal/exp
 modes.
 - Test and confirm exp/decimal against libc for all f32 values. Various
 changes to better match libc.
2018-04-23 17:22:51 +12:00
Andrew Kelley
8503eff8c1 add compile error for invalid deref on switch target
closes #945
2018-04-22 23:46:55 -04:00
Andrew Kelley
75328e3204 exit(1) instead of abort() for file not found 2018-04-22 21:47:25 -04:00
Andrew Kelley
25dff91fa0 fix windows build broken by previous commit
fixes build failure from 1c41f1ca62
2018-04-22 21:08:52 -04:00
Andrew Kelley
1c41f1ca62 better error reporting for missing libc on windows
closes #931
2018-04-22 20:54:52 -04:00
Andrew Kelley
371a3ad4bd Merge branch 'tgschultz-std.os.time' 2018-04-22 18:13:57 -04:00
Andrew Kelley
7af6ed3f20 add alignment docs 2018-04-22 18:13:53 -04:00
Andrew Kelley
21767144fc linux: support VDSO for clock_gettime
also fix a compiler crash when using cmpxchg with nullable pointer
2018-04-22 18:11:50 -04:00
Andrew Kelley
da2af9c613 fixups 2018-04-22 13:36:26 -04:00
Andrew Kelley
0dcadc61b4 Merge branch 'std.os.time' of https://github.com/tgschultz/zig into tgschultz-std.os.time 2018-04-22 13:24:25 -04:00
Andrew Kelley
98b88bb52f add alignment docs 2018-04-22 12:52:28 -04:00
Andrew Kelley
3010668390 Merge pull request #939 from tgschultz/large-alignment-directalloc
DirectAllocator alignments > os.page_size on posix
2018-04-22 12:49:13 -04:00
Andrew Kelley
a3e9ae8f74 travis: use encrypted env vars for s3 credentials 2018-04-22 12:33:02 -04:00
tgschultz
a1083b019c Added DirectAllocator support for alignments > os.page_size on posix systems 2018-04-21 20:41:49 -05:00
Andrew Kelley
c4840d78fb add test case for #936 2018-04-21 02:10:22 -04:00
Andrew Kelley
1098545e47 std.zig.parser: remove unused field 2018-04-21 02:10:22 -04:00
Harry Eakins
eef4bbb65f Changed all MB to MiB 2018-04-21 11:06:10 +12:00
Harry Eakins
b229aff34a Readability improvements and bug-fix to std/crypto/throughput_test.zig 2018-04-21 11:06:10 +12:00
Andrew Kelley
6e57243a79 zig fmt: preserve comments in front of test blocks
* refactor std.zig.parser
 * fix compiler crashing for some compile errors
 * take advantage of @field in std.zig.ast
 * move ast.NodeFoo to ast.Node.Foo
 * comment preservation is more explicit

See #911
2018-04-20 02:15:09 -04:00
Jimmi Holst Christensen
cc35f085ca Merge pull request #934 from zig-lang/adding-builtin-field
Added @field builtin function
2018-04-19 22:43:41 +02:00
tgschultz
ca4053ba49 Use std.os.errorUnexpectedPosix if timer initialization encounters unexpected error 2018-04-19 14:53:58 -05:00
Jimmi Holst Christensen
72bf9d90cc Added builtin field to docs 2018-04-19 21:48:09 +02:00
Jimmi Holst Christensen
1b91478bff Optimized field ptr ir for hot path and fix assignment bug 2018-04-19 21:34:18 +02:00
Jimmi Holst Christensen
6b4f6ebd89 Added field builtin function 2018-04-19 20:11:16 +02:00
tgschultz
89eade0548 Style cleanups, u64 casts, Timer.start returns error instead of unreachable on unexpected errno. 2018-04-19 10:01:41 -05:00
Andrew Kelley
06909ceaab support break in suspend blocks
* you can label suspend blocks
 * labeled break supports suspend blocks

See #803
2018-04-18 22:21:54 -04:00
tgschultz
3c9b6f8cd5 Fixed another incorrect comment 2018-04-18 19:57:47 -05:00
tgschultz
fdebe38fa3 Added notes regarding CLOCK_MONOTONIC_RAW and made it easy to change our mind in the future.
Updated std.os imported tests' block with lazy declaration workaround and added time.zig.
Corrected some incorrect comments.
2018-04-18 19:48:19 -05:00
tgschultz
5c83d271a3 Fixed incorrect sign on epoch.clr 2018-04-18 18:50:28 -05:00
tgschultz
7cfe328a16 fixed typos. 2018-04-18 17:43:35 -05:00
Andrew Kelley
ca4341f7ba add --no-rosegment cli option
this provides a workaround for #896
until valgrind adds support for clang/LLD
(equivalent to gcc/gold -rosegment)
2018-04-18 17:14:09 -04:00
tgschultz
bf9cf28322 Fixed compiler errors around darwin code. 2018-04-18 15:46:50 -05:00
tgschultz
8b66dd8c7d Added unstaged changes. 2018-04-18 13:55:42 -05:00
tgschultz
c90f936eef Added timestamp, high-perf. timer functions. 2018-04-18 13:52:25 -05:00
Andrew Kelley
f1f998e071 improve cmpxchg
* remove @cmpxchg, add @cmpxchgWeak and @cmpxchgStrong
   - See explanations in the langref.
 * add operand type as first parameter
 * return type is ?T where T is the operand type

closes #461
2018-04-18 12:16:42 -04:00
Andrew Kelley
96ebd8b23b fix windows not respecting --msvc-lib-dir, --kernel32-lib-dir
I believe this was a regression caused by
51a6ff18d4

closes #927
2018-04-16 22:33:34 -04:00
Marc Tiehuis
c7cb5c31e5 Add exp/norm distributed random float generation 2018-04-16 20:06:50 +12:00
Andrew Kelley
caefaf781e std.debug: dumpStackTrace & friends use DirectAllocator
this has the downside of failing to print a stack trace
when the system is out of memory (maybe we could add a
FallbackAllocator which tries DirectAllocator and falls
back on the 200KB preallocated buffer).

but for the more common use case when the system is not
out of memory, but the debug info cannot fit in
std.debug.global_allocator, now stack traces will work.

this is the case for the self hosted compiler.
2018-04-16 03:17:15 -04:00
Andrew Kelley
88724217dd Merge pull request #925 from alexnask/release_small
Added ReleaseSmall mode.
2018-04-15 21:57:26 -04:00
Alexandros Naskos
1bc140964f Added ReleaseSmall mode to docgen 2018-04-16 04:18:52 +03:00
Alexandros Naskos
6492763bef Fixed test build code 2018-04-16 04:06:00 +03:00
Alexandros Naskos
1c85050dad Set SizeLevel to 2 in ReleaseSmall mode 2018-04-16 03:54:40 +03:00
Alexandros Naskos
253ecd5c11 Added ReleaseSmall mode 2018-04-16 03:26:10 +03:00
Andrew Kelley
b9360640ce add @atomicLoad builtin
See #174
2018-04-15 18:12:00 -04:00
Andrew Kelley
859b10d8bf std.math.ln and std.math.exp use float strict mode
closes #920
2018-04-15 15:22:27 -04:00
Andrew Kelley
a8d794215e exit with error code instead of panic for file not found 2018-04-15 15:22:07 -04:00
Andrew Kelley
b5459eb987 add @sqrt built-in function
See #767
2018-04-15 13:26:58 -04:00
Andrew Kelley
4a2bfec150 fix linux implementation of self exe path
closes #894
2018-04-15 12:57:45 -04:00
Andrew Kelley
b7af9edb8a add std.os.createThread
this adds kernel thread support to the standard library for
linux.

See #174
2018-04-14 02:24:05 -04:00
Andrea Orru
06614b3fa0 Merge branch 'master' into zen_stdlib 2018-04-13 11:11:21 -07:00
Andrea Orru
d2c672ab0c FIXME note 2018-04-13 11:10:36 -07:00
Andrew Kelley
fa05cab01a travis: put cache-control header for ziglang.org/download 2018-04-13 12:17:07 -04:00
Andrew Kelley
0509414dfe fix regression with zig install dir
introduced in 1999f0daad
2018-04-13 11:31:38 -04:00
Andrew Kelley
4662fd4d92 Merge pull request #919 from zig-lang/self-hosted-parser-refactor
Self-hosted parser refactor
2018-04-13 11:17:09 -04:00
Andrew Kelley
30c5f3c441 Merge pull request #915 from zig-lang/self-hosted-cli
Revise self-hosted command line interface
2018-04-13 11:16:06 -04:00
Andrew Kelley
1999f0daad fix undefined behavior triggered by fn inline test
LLVM destroys the string that we use to test if LLVM deleted the
inlined function.

Also fixed forgetting to initialize a buffer in std lib path detection.
2018-04-13 11:10:17 -04:00
Marc Tiehuis
fe9489ad63 Fix windows access check 2018-04-13 22:50:57 +12:00
Marc Tiehuis
03bec631bd Replace File.exists with File.access 2018-04-13 21:27:09 +12:00
Jimmi Holst Christensen
a498993fd1 Merged with master 2018-04-13 10:40:37 +02:00
Jimmi Holst Christensen
44c53c9979 std.zig.parser: Refactor round 2
* More work on ensuring that each state only eat one token
* VarDecl parsing now constructs its node
* Handling all fn parsing in the same case
* Using eatToken instead of getNextToken where possible
* All tokenIdTo* now takes  @TagType(Token.Id)
* Added a createToCtxLiteral function
2018-04-13 10:15:12 +02:00
Marc Tiehuis
b946982e90 Use builtin Arch/Os/Environ 2018-04-13 20:12:30 +12:00
Marc Tiehuis
7fe1c7c04f Remove cc command 2018-04-13 19:43:18 +12:00
Andrea Orru
c5f088e52c Pass up to 5 arguments in Zen IPC 2018-04-12 22:24:57 -07:00
Andrew Kelley
0f652b4d80 zig fmt: switch cases on new lines
See #911
2018-04-12 21:56:12 -04:00
Andrew Kelley
9e701e951b zig fmt includes trailing commas
See #911
2018-04-12 21:40:15 -04:00
Andrew Kelley
d4572d1140 zig fmt: container init fields each on own line
See #911
2018-04-12 21:23:18 -04:00
Andrew Kelley
373b3586a1 inline functions must be stored in const or comptime var
closes #913
2018-04-12 16:26:23 -04:00
Jimmi Holst Christensen
fad54e62bb std.zig.ast: Fixed build failures 2018-04-12 18:56:58 +02:00
Jimmi Holst Christensen
d35a6655e0 std.zig.parser: Refactored commaOrEnd to expectCommaOrEnd
* Now it returns end when found, or null if comma was found.
* State should now be appended outside the function
2018-04-12 18:13:09 +02:00
Andrew Kelley
29e0e4088e Merge remote-tracking branch 'origin/master' into self-hosted-cli 2018-04-12 11:20:38 -04:00
Andrew Kelley
7b2cb7e679 remove --zig-install-prefix arg now that we find std at runtime 2018-04-12 11:00:11 -04:00
Andrew Kelley
c43f77f109 fix invalid implicit cast on macos 2018-04-12 10:38:32 -04:00
Jimmi Holst Christensen
206c0b8bdb std.zig.parser: Refactor, round 1:
* Removed the Optional state
  * We now have an OptionalCtx instead of DestPtr
  * OptionalCtx simulated return, instead of reverting states
  * OptionalCtx is a lot less hacky, but is still a small footgun
* Trying to avoid consuming more than one token per state
  * This is required, because of comments
  * The C++ compiler allows comments between all tokens
  * We therefor have to consume comment tokens between each state
* Reordered states so they are grouped in some logical fasion
2018-04-12 16:08:23 +02:00
Marc Tiehuis
803f0a295b Revise self-hosted command line interface
Commands are now separated more precisely from one another. Arguments
are parsed mostly using a custom argument parser instead of manually.
This should be on parity feature-wise with the previous main.zig but
adds a few extra code-paths as well that were not yet implemented.

Subcommands are much more prominent and consistent. The first argument
is always a sub-command and then all following arguments refer to that
command. Different commands display there own usage messages and options
based on what they can do instead of a one-for-all usage message that
was only applicable for the build commands previously.

The `cc` command is added and is intended for driving a c compiler. See #490.
This is currently a wrapper over the system cc and assumes that it
exists, but it should suffice as a starting point.
2018-04-12 22:28:47 +12:00
Jimmi Holst Christensen
0d8646d262 std.zig.parser now parses alignment of functions
Related #909
This allows it to parse `std/special/compiler_rt/index.zig`
2018-04-12 08:46:26 +02:00
Andrea Orru
70f2bb03fd outb syscall 2018-04-11 23:11:26 -07:00
Andrew Kelley
ce68dda4b6 Merge remote-tracking branch 'origin/master' into llvm7 2018-04-11 18:27:06 -04:00
Andrew Kelley
2b86ffe34a LLD patch: Do not keep shared symbols to garbage...
-collected eliminated DSOs.

This applies https://reviews.llvm.org/D45536 to the embedded
LLD.

Closes #883
2018-04-11 18:15:33 -04:00
Jimmi Holst Christensen
ed1b028276 Merge branch 'master' of github.com:zig-lang/zig 2018-04-11 20:56:22 +02:00
Jimmi Holst Christensen
5b584e06e3 std.zig.parser special cased error in return.
Related #909
This allows parsing of `std/special/build_runner.zig`
2018-04-11 20:56:05 +02:00
Andrew Kelley
e48e707c32 allow integer and float literals to be passed to var params
closes #623
2018-04-11 14:47:37 -04:00
Jimmi Holst Christensen
a7f77d7c6a std.zig.parser: requireSemiColon now matches the C++ behavior
Related #909
Allowes parsing of `std/os/child_process.zig`
2018-04-11 15:26:00 +02:00
Jimmi Holst Christensen
df4c575525 std.zig.parser now parses inline fn proto
Related #909
Allows parsing of `std/os/zen.zig`.
2018-04-11 15:17:51 +02:00
Jimmi Holst Christensen
fe7146277d std.zig.parser now accept both string and multiline string for strings
Related #909
Allows it to parse `std/special/compiler_rt/aullrem.zig`,
`std/special/compiler_rt/aulldiv.zig` and `std/math/x86_64/sqrt.zig`
2018-04-11 14:43:53 +02:00
Jimmi Holst Christensen
28ea364e5e std.zig.parser now handle try's precedence correctly
This allows parsing of `std/zig/parser.zig`. Related: #909
2018-04-11 13:56:39 +02:00
Jimmi Holst Christensen
841ac0f4e1 std.zig.parser now allows assignment expr in switch cases.
This makes `std/os/index.zig` parse
related: #909
2018-04-11 13:46:35 +02:00
Jimmi Holst Christensen
4b0556ebd4 std.zig.parser can now parse std/heap.zig:
related: #909
* Struct fields can now be pub
* Parsing of double deref now works
* Block expressions now have the right precedence
2018-04-11 13:38:06 +02:00
Jimmi Holst Christensen
6fb5ab1b52 std.zig.parser: Redid parsing of error set delc
related: #909
2018-04-11 13:05:42 +02:00
Jimmi Holst Christensen
5f3ec023cd std.zig.parser: Fixed parsing of field access rhs
related: #909
2018-04-11 12:53:01 +02:00
Jimmi Holst Christensen
281c17f6ae std.zig.parser:
* Renamed eatToken to expectToken
* A new eatToken fn, which only eats the token, if the id match
* Inlined initNode, as it is not suppose to be used outside createNode
2018-04-11 12:05:10 +02:00
Jimmi Holst Christensen
dae287524d std.zig: Major refactor
* There now exists a few function to allocate all nodes in parser.zig
* ast.zig now have a table of Ids and their corrisponding type
2018-04-11 10:37:04 +02:00
Andrea Orru
135a335ce1 Merge branch 'master' into zen_stdlib 2018-04-11 00:33:19 -07:00
Andrea Orru
b01c5a95c4 Update zen library 2018-04-11 00:31:32 -07:00
Andrew Kelley
f43711e5fb Merge branch 'bnoordhuis-fix879' 2018-04-11 00:33:14 -04:00
Andrew Kelley
58c6424d4f simplify and fix BufMap logic 2018-04-11 00:32:42 -04:00
Andrew Kelley
19e0ed5d3e Merge branch 'fix879' of https://github.com/bnoordhuis/zig into bnoordhuis-fix879 2018-04-10 23:37:08 -04:00
Andrew Kelley
2ec1cec92d add more linux syscalls and constants
Based on #904 by tgshultz
2018-04-10 23:29:24 -04:00
Andrew Kelley
64d96ad703 Merge pull request #873 from zig-lang/self-hosted-parser
Self hosted parser completion
2018-04-10 22:47:18 -04:00
Josh Wolfe
405a2390f0 zig fmt while-else with no blocks 2018-04-10 22:44:55 -04:00
Andrew Kelley
f6c77746d6 add memmove to builtin.o
related: #514
2018-04-10 22:24:01 -04:00
Andrew Kelley
27e881c2d7 fix another undefined deref
see 0ba85ea6ff
2018-04-10 21:58:04 -04:00
Josh Wolfe
b553b7ab83 Merge branch 'master' into self-hosted-parser 2018-04-10 21:46:13 -04:00
Andrew Kelley
ee3e2790aa cmake defaults stage1 to install in build directory 2018-04-10 20:57:37 -04:00
Jimmi Holst Christensen
0ba85ea6ff std.zig.parser fixed segfault when parsing cc for fn decl 2018-04-10 17:46:17 +02:00
Andrew Kelley
477ded9042 add missing call in zig fmt to commit results to disk 2018-04-10 11:00:57 -04:00
Jimmi Holst Christensen
c6aa637146 std.zig.parser: removed dublicate "zig fmt: coroutines" test 2018-04-10 16:33:43 +02:00
Jimmi Holst Christensen
b9cccce26d std.zig.ast: fixed none compiling code 2018-04-10 15:56:37 +02:00
Jimmi Holst Christensen
db9a9f3a6c std.zig.parser now parses the var type
* I parse it as a type in all contexts. This is not how the
  C++ compiler does it, but I think typechecking should catch this
2018-04-10 15:16:31 +02:00
Jimmi Holst Christensen
aa09e7b639 std.zig.tokinizer now treats string identifiers as identifiers 2018-04-10 15:01:21 +02:00
Jimmi Holst Christensen
3b80e66507 std.zig.parser now parses toplevel use 2018-04-10 14:52:47 +02:00
Jimmi Holst Christensen
db0812d4b7 std.zig.parser: changed block exprs from primary expr to expr 2018-04-10 14:22:01 +02:00
Jimmi Holst Christensen
706e0d739e std.zig.parser readded all tests
* Ops!
2018-04-10 13:49:52 +02:00
Jimmi Holst Christensen
1b81e406f0 std.zig: fixed compiler errors 2018-04-10 13:43:20 +02:00
Jimmi Holst Christensen
34af38e09b std.zig.tokinizer: fixed failing tests 2018-04-10 11:35:41 +02:00
Jimmi Holst Christensen
f85b9f2bf3 std.zig.parser now parses coroutine code 2018-04-10 11:25:58 +02:00
Jimmi Holst Christensen
5cd69ee6a4 std.zig.parser changed assign expr to only be allowed in some contexts
* Only allowed in while continue expr and statement expr
2018-04-10 09:37:29 +02:00
Jimmi Holst Christensen
2c7996f400 std.zig.parser can now render asm expressions 2018-04-10 09:27:11 +02:00
Andrew Kelley
4545be360a fix std.io.readline to work on windows
closes #882
2018-04-09 21:14:55 -04:00
Jimmi Holst Christensen
a09bb408a2 std.zig.parser now parses asm expressions
* We cannot render asm expressions yet
2018-04-09 15:40:16 +02:00
Jimmi Holst Christensen
aa552633cc std.zig.parser now parses fn types 2018-04-09 14:02:03 +02:00
Jimmi Holst Christensen
7d32c9521f std.zig.parser now parses comptime 2018-04-09 13:24:47 +02:00
Jimmi Holst Christensen
d04346d2ac ast.zig.parser now parses defer statements 2018-04-09 13:07:46 +02:00
Jimmi Holst Christensen
c19f5a2356 std.zig.parser now parses if statements 2018-04-09 12:51:18 +02:00
Jimmi Holst Christensen
7dd55a8007 std.zig.parser now parses for loops 2018-04-09 11:48:25 +02:00
Jimmi Holst Christensen
e24409ebe0 std.zig.parser unified code for rendering and parsing semicolon in statements 2018-04-09 11:17:57 +02:00
Jimmi Holst Christensen
e260c8ca63 std.zig.parser now parses while loops and labeled break and continue 2018-04-09 11:11:18 +02:00
Andrew Kelley
9ef1ba9d54 Merge branch 'async-tcp-server' 2018-04-09 00:53:16 -04:00
Andrew Kelley
e85a10e9f5 async tcp server proof of concept 2018-04-09 00:52:45 -04:00
Andrew Kelley
cbda0fa78c basic tcp server working when used with netcat 2018-04-08 20:08:40 -04:00
Andrew Kelley
acd8f6ef18 fixups from rebase 2018-04-08 18:49:20 -04:00
Andrew Kelley
8f4ad95777 update what std tests to run 2018-04-08 18:30:54 -04:00
Andrew Kelley
b85ef656ca running into the llvm corosplit error again 2018-04-08 18:30:54 -04:00
Andrew Kelley
0d22a00f6f *WIP* async/await TCP server 2018-04-08 18:26:24 -04:00
Andrew Kelley
292d0cbdad add docs for union methods 2018-04-08 18:03:09 -04:00
Andrew Kelley
eae355d771 add docs for packed enum 2018-04-08 18:03:09 -04:00
Andrew Kelley
fef06f2142 Merge branch 'async-err-ret-traces'
closes #821
2018-04-08 17:57:29 -04:00
Andrew Kelley
ee1a4f4c1d error return traces work with async return case 2018-04-08 17:44:29 -04:00
Andrew Kelley
9e98ea552d fix calling convention at callsite of zig-generated fns 2018-04-08 16:40:59 -04:00
Andrew Kelley
ada441157f put the error return addresses in the coro frame 2018-04-08 16:04:48 -04:00
Andrew Kelley
e4083b7391 codegen: fix not putting llvm allocas together 2018-04-08 16:04:48 -04:00
Andrew Kelley
d26905c102 error return traces for the early return case
it would work but LLVM is not correctly spilling the addresses.

See #821
2018-04-08 16:04:48 -04:00
Jimmi Holst Christensen
e4d0b46c0c std.zig.parser WIP generalizing parsing of payloads
* Note, it doesn't work :)
2018-04-08 17:05:08 +02:00
Jimmi Holst Christensen
bdff5bfa3e std.zig.parser now parses switch 2018-04-07 01:38:38 +02:00
Andrew Kelley
7186e92c86 Merge pull request #900 from zig-lang/hash-and-checksums
Add common hash/checksum functions
2018-04-06 09:44:25 -04:00
Jimmi Holst Christensen
820de1716b std.zig.parser now parses labeled blocks.
* There is also some code for switch range parsing
2018-04-06 15:37:49 +02:00
Marc Tiehuis
c34ce2cbc6 Add common hash/checksum functions
- SipHash64, SipHash128
 - Crc32 (fast + small variants)
 - Adler32
 - Fnv1a (32, 64 and 128 bit variants)
2018-04-06 23:10:54 +12:00
Jimmi Holst Christensen
f667744d44 std.zig.parser Fixed:
* Parsing of the optional expression in contrl flow expr
* Rendering of catch expressions
2018-04-06 09:36:11 +02:00
Jimmi Holst Christensen
e45de607d6 std.zig.parser: Initializers are now parsed and fmt correctly 2018-04-06 08:56:28 +02:00
Andrew Kelley
873641c123 Merge pull request #899 from bnoordhuis/fix898
fix llvm assert on version string with git sha
2018-04-05 18:55:09 -04:00
Ben Noordhuis
8980281184 fix llvm assert on version string with git sha
LLVM's CodeViewDebug pass misparses the version string when it contains
a git revision so stop doing that.  This only affected Windows builds.

closes #898
2018-04-06 00:31:55 +02:00
Ben Noordhuis
9e8519b7a2 fix use-after-free in BufMap.set()
closes #879
2018-04-05 23:32:49 +02:00
Andrew Kelley
588116cacc travis: update docker image tag for llvm7 2018-04-04 18:29:31 -04:00
Andrew Kelley
5800faa318 update to latest llvm API 2018-04-04 18:27:52 -04:00
Jimmi Holst Christensen
779247ba11 std.zig Major Refactor
* parser now parses expression like the C++ compiler does
* This makes initializers work
* Added control flow expression (only return is parsed)
* Added catch parsing (It doesn't quite work)
* The parse can now specify states as optional.
  * The parse will roll back on error if states are optional
  * This can be overriden by State.Required
2018-04-04 23:36:55 +02:00
Andrew Kelley
cca93908e6 Merge remote-tracking branch 'origin/master' into llvm7 2018-04-04 17:22:26 -04:00
Marc Tiehuis
8938429ea1 Add Hmac function (#890) 2018-04-04 10:31:10 -04:00
Jimmi Holst Christensen
744416ce0c std.zig.parser should now parse operators with precedence.
* This haven't been tested yet
2018-04-04 14:58:51 +02:00
Marc Tiehuis
f68c2e0a14 Fix off-by-one error in all crypto functions 2018-04-04 21:32:23 +12:00
Jimmi Holst Christensen
ca0085c46d std.zig.parser now parses error set declarations 2018-04-04 10:54:48 +02:00
Jimmi Holst Christensen
020724cfa0 std.zig.tokenizer Tokens now don't contain a line and column field.
* Instead, this information is optained by asking the tokenizer.
* getTokenLocation takes a start_index, so relative loc can be optained
2018-04-04 10:27:38 +02:00
Jimmi Holst Christensen
09cf823619 std.zig.parser now parses container decls 2018-04-04 09:57:37 +02:00
Andrew Kelley
3d8541121b Merge branch 'hellerve-wip-macos-dirent' 2018-04-04 00:08:48 -04:00
Andrew Kelley
abd389209b fix up logic for macos std.os.deleteTree 2018-04-04 00:08:10 -04:00
Andrew Kelley
e1e536e03d Merge branch 'wip-macos-dirent' of https://github.com/hellerve/zig into hellerve-wip-macos-dirent 2018-04-03 23:33:18 -04:00
Andrew Kelley
6050b9d835 travis: don't skip tests
fix broken previous commit
2018-04-03 21:40:36 -04:00
Andrew Kelley
2676da61a6 travis: better s3 artifacts 2018-04-03 21:39:03 -04:00
Andrew Kelley
9dfd1a7c8a remove more signal handling stuff from std.os.ChildProcess
439621e44a failed to remove
everything. this finishes the job
2018-04-03 18:26:49 -04:00
Andrew Kelley
d1f8e722b5 travis: don't upload other files as artifacts 2018-04-03 14:23:56 -04:00
Jimmi Holst Christensen
ec611bf8b4 std.zig.parser now parses regular enums, unions and struct
* Still missing packed, and extern
2018-04-03 20:00:02 +02:00
Andrew Kelley
21b47b34d8 travis: don't upload build/ folder as artifacts 2018-04-03 12:59:28 -04:00
Andrew Kelley
65e4bb149e travis artifacts: don't upload extra stuff 2018-04-03 12:04:06 -04:00
Andrew Kelley
aadc14fd78 upload static linux artifacts on successful travis build 2018-04-03 11:22:18 -04:00
Jimmi Holst Christensen
d602f12df8 std.zig.ast Added ContainerDecl 2018-04-03 15:59:14 +02:00
Jimmi Holst Christensen
4fae452684 std.zig.parser Refactored top level decl parsing
* Now, the arraylist from the root node is passed through the states.
* This allows us to reuse the code for enums, unions and structs
2018-04-03 15:33:22 +02:00
Jimmi Holst Christensen
40f35e997a std.zig.parser moved container initializer tests down 2018-04-03 15:17:26 +02:00
Jimmi Holst Christensen
9d69e94bba std.zig.parser now parses grouped expressions
* I also moved some tests down, as they fail in ways I can't fix yet
2018-04-03 15:16:32 +02:00
Jimmi Holst Christensen
5c82ed2ea9 std.zig.parser now parses initializers... Or, it would, if it worked 2018-04-03 14:53:27 +02:00
Jimmi Holst Christensen
0b9247fb63 std.zig.parser Refactor:
* Slice/Array access is now not parsed in the expr contruction loop
* State.ExprListItemOrEnd now takes a token id for the end token
2018-04-03 14:20:34 +02:00
Jimmi Holst Christensen
b424cd75ab std.zig.parser refactored call, slice and array access to be suffix op 2018-04-03 12:33:06 +02:00
Jimmi Holst Christensen
22e38ffb54 std.zig.tokenizer fixed tokens having wrong column and line 2018-04-03 11:18:18 +02:00
Jimmi Holst Christensen
a2330d0ea3 std.zig.parser now parses slice and array types 2018-04-03 10:54:19 +02:00
Andrew Kelley
4eb68987d8 std.io.readLine function
this provides a better input for guess number example.

see #882
2018-04-02 11:34:31 -04:00
Jimmi Holst Christensen
b9093185f7 std.zig.parser now parses slicing and array access 2018-04-01 22:02:51 +02:00
Marc Tiehuis
2e5115b068 Add run compiler command
'zig run file.zig' builds a file and stores the artifacts in the global
cache. On successful compilation the binary is executed.

'zig run file.zig -- a b c' does the same, but passes the arguments a,
b and c as runtime arguments to the program. Everything after an '--' are
treated as runtime arguments.

On a posix system, a shebang can be used to run a zig file directly. An
example shebang would be '#!/usr/bin/zig run'. You may not be able pass
extra compile arguments currently as part of the shebang. Linux for example
treats all arguments after the first as a single argument which will result
in an 'invalid command'.

Currently there is no customisability for the cache path as a compile
argument. For a posix system you can use `TMPDIR=. zig run file.zig` to
override, in this case using the current directory for the run cache.

The input file is always recompiled, even if it has changed. This is
intended to be cached but further discussion/thought needs to go into
this.

Closes #466.
2018-04-01 17:03:06 +12:00
Jimmi Holst Christensen
df09c01f7f std.zig.parser now parses error, this and unreachable 2018-03-31 22:48:12 +02:00
Andrew Kelley
67f11190d1 musl-friendly dynamic linking 2018-03-31 16:34:55 -04:00
Jimmi Holst Christensen
aabf7cf57e std.zig.parser now parses null and bool literals 2018-03-31 22:10:49 +02:00
Jimmi Holst Christensen
975dc5a390 std.zig.parser now parses char literals 2018-03-31 21:28:40 +02:00
Jimmi Holst Christensen
4d8f9e2295 std.zig.parser now parses multi line strings 2018-03-31 21:04:54 +02:00
Raul Leal
eb6ff796c1 Fix undeclared identifier error in readUntilDelimiterBuffer and incorrect number of parameters in readUntilDelimiterAlloc (#877) 2018-03-31 12:21:19 -04:00
Andrew Kelley
51a6ff18d4 Merge pull request #872 from zig-lang/runtime-libc
find libc and zig std lib at runtime
2018-03-31 12:13:30 -04:00
Jimmi Holst Christensen
4793c3397e std.zig.parser now handles lib name for extern var and fn 2018-03-31 17:46:29 +02:00
Andrew Kelley
8f962a957a fix regressions on windows 2018-03-31 11:26:02 -04:00
Jimmi Holst Christensen
cda3509353 Added test cases to cover all of zigs syntax 2018-03-31 15:39:51 +02:00
Jimmi Holst Christensen
26e56f2fab Each test now have it's own test name 2018-03-31 14:18:09 +02:00
Jimmi Holst Christensen
596f4b6002 Fixed review commented code 2018-03-31 14:00:49 +02:00
Marc Tiehuis
7d66908f29 docs: fix unclosed code tag 2018-03-31 23:17:02 +13:00
Andrew Kelley
c3724ec506 implement os_self_exe_path in the c++ compiler for darwin
ported from the zig std lib

this fixes looking for zig std lib at runtime on darwin
2018-03-31 02:12:44 -04:00
Jimmi Holst Christensen
5118caf5ab Added a lot of test cases 2018-03-31 00:53:00 +02:00
Andrew Kelley
5d5feb11de appveyor and travis ci: stop passing unused configure args 2018-03-30 17:26:01 -04:00
Andrew Kelley
b01c50d6fa find libc and zig std lib at runtime
this removes the following configure options:
 * ZIG_LIBC_LIB_DIR
 * ZIG_LIBC_STATIC_LIB_DIR
 * ZIG_LIBC_INCLUDE_DIR
 * ZIG_DYNAMIC_LINKER
 * ZIG_EACH_LIB_RPATH
 * zig's reliance on CMAKE_INSTALL_PREFIX

these options are still available as command line options, however,
the default will attempt to execute the system's C compiler to
collect system defaults for these values.

closes #870
2018-03-30 17:10:54 -04:00
Jimmi Holst Christensen
24071c6803 std.zig.parser parses all prefix operators 2018-03-30 21:45:42 +02:00
Jimmi Holst Christensen
1dfa927a67 std.zig.parser now treats call expr as a suffix operator 2018-03-30 20:47:09 +02:00
Andrew Kelley
f586acabdc add ZIG_STATIC cmake option
it's not compatible with glibc but it works with musl
2018-03-30 13:20:13 -04:00
Jimmi Holst Christensen
edca173997 std.zig.parser now parses call expr 2018-03-29 23:40:46 +02:00
Jimmi Holst Christensen
8ada030971 Fixed self hosted compiler compiler error from prev commit 2018-03-29 22:37:54 +02:00
Jimmi Holst Christensen
530f795769 std.zig.parser now supports all infix operators 2018-03-29 22:31:17 +02:00
Andrew Kelley
b80398b355 Merge pull request #867 from zig-lang/rand-overhaul
Rewrite Rand functions
2018-03-29 14:14:35 -04:00
Andrew Kelley
ccadcbc715 fix examples and rename std.rand.Rand to std.rand.Random 2018-03-29 12:33:29 -04:00
Marc Tiehuis
0fd0f6fd1f Rewrite Rand functions
We now use a generic Rand structure which abstracts the core functions
from the backing engine.

The old Mersenne Twister engine is removed and replaced instead with
three alternatives:

 - Pcg32
 - Xoroshiro128+
 - Isaac64

These should provide sufficient coverage for most purposes, including a
CSPRNG using Isaac64. Consumers of the library that do not care about
the actual engine implementation should use DefaultPrng and DefaultCsprng.
2018-03-30 01:50:58 +13:00
Jimmi Holst Christensen
9df2a6a502 std.zig.parser can now parse top level test declarations 2018-03-29 13:43:17 +02:00
hellerve
7e951e5043 st/os: address @andrewrk concerns 2018-03-29 10:23:44 +02:00
Andrew Kelley
032fccf615 fix compile time array concatenation for slices
closes #866
2018-03-28 23:25:12 -04:00
Andrew Kelley
5627347bab Merge pull request #865 from bnoordhuis/zig-build-stage2
skeleton stage 2 support for 'zig build'
2018-03-28 21:02:09 -04:00
Jimmi Holst Christensen
72ce146293 Fixed looking for windows sdk when targeting linux 2018-03-29 00:53:06 +02:00
Ben Noordhuis
b60b01ce97 skeleton stage 2 support for 'zig build'
Initial port of the 'zig build' logic from the stage 1 compiler to the
stage 2 compiler sans code generation and BUILD_INFO support.
2018-03-28 18:30:41 +02:00
Ben Noordhuis
db70b909a0 non-zero exit when build.zig cannot be created
Make the stage 1 compiler exit with a non-zero status code
when `zig build --init` cannot create a new build.zig file.
2018-03-28 18:30:41 +02:00
Andrew Kelley
f5b43ada46 std/os: getting dir entries works on OS X 2018-03-28 12:06:48 +02:00
Andrew Kelley
5b00dee0c2 std.math.cast handles signed integers 2018-03-27 15:20:07 -04:00
Andrew Kelley
6cb99fdac3 fix crash when compile error in analyzing @panic call 2018-03-27 15:07:45 -04:00
Andrew Kelley
0b7b3190fd fix bitrotted code in unexpected error tracing 2018-03-27 10:44:13 -04:00
Andrew Kelley
c0a69a5075 Merge pull request #860 from jayschwa/patch-1
doc: fix typo and tighten wording in error sections
2018-03-25 23:31:12 -04:00
Jay Weisskopf
3e836f5516 doc: fix typo and tighten wording in error sections
Changes:
- Removed superfluous "when possible"
- Fixed typo in "documentationt"
- Added missing comma
- Moved definition of error union type up to first sentence
2018-03-25 18:48:07 -04:00
Andrew Kelley
aa2995ee39 fix invalid codegen for error return traces across suspend points
See #821

Now the code works correctly, but error return traces are missing
the frames from coroutines.
2018-03-24 22:07:12 -04:00
Andrew Kelley
a43c7af3d1 add comptime test for the type of suspend promise 2018-03-24 19:31:00 -04:00
Andrew Kelley
897e783763 add promise->T syntax parsing
closes #857
2018-03-24 19:25:53 -04:00
Andrew Kelley
18af2f9a27 fix async fns with inferred error sets
closes #856
2018-03-24 18:28:32 -04:00
Andrew Kelley
b1c07c0ea9 move error ret tracing codegen to zig ir
progress towards #821
2018-03-24 18:28:32 -04:00
Andrew Kelley
2cff31937f std.os.linux exposes syscall functions and syscall numbers 2018-03-24 15:57:36 -04:00
Marc Tiehuis
7350181a4a Fix os.File.mode function 2018-03-23 22:41:08 +13:00
Marc Tiehuis
3d1732ef6c Fix OpqaueType usage in exported c functions
We prefer `struct typename`. If a typedef is required, this must be done
manually after generation.
2018-03-23 20:27:11 +13:00
Andrew Kelley
c541ac240c use the llvm API for creating memcpy and memset instructions 2018-03-22 20:59:26 -04:00
Andrew Kelley
fe38d8142f create multiple llvm.memcpy and llvm.memset with different align params 2018-03-22 20:22:15 -04:00
Andrew Kelley
7a99d63c76 ability to use async function pointers
closes #817
2018-03-22 16:56:03 -04:00
Andrew Kelley
62668e3e6b update to llvm 7.0.0 2018-03-22 15:39:50 -04:00
Marc Tiehuis
53588f4f12 Add missing pub specifier to atan2 2018-03-22 19:18:51 +13:00
Andrew Kelley
f885a1ab61 change async function call syntax
* instead of `async(allocator) call()`, now it is
   `async<allocator> call()`.
 * Fixes syntax ambiguity when leaving off the allocator
 * Fixes parse failure when call is a field access

This sets a precedent for using `<` to pass arguments
to a keyword. This will affect `enum`, `union`, and
`fn` (see #661)
2018-03-21 19:56:41 -04:00
Andrew Kelley
66fec3a3d7 Merge pull request #851 from zig-lang/zen_stdlib
Zen specific hacks
2018-03-20 16:16:08 -04:00
Andrea Orru
43cdfa275a Zen specific hacks 2018-03-20 16:09:30 -04:00
Andrew Kelley
1369fecccf Merge pull request #847 from walac/master
Include libxml2 and zlib as required libraries
2018-03-20 16:05:15 -04:00
Wander Lairson Costa
543952eb87 Include libxml2 and zlib as required libraries
libxml2 is a required library, but we only find out that when the build
fails to link against it, if it is not present. The same for zlib.

We use find_library to find these two libraries and print nice fail
messages if they are not found.
2018-03-20 18:15:02 +00:00
Andrew Kelley
cb744f3a28 self-hosted build: print helpful message when libstdc++.a not found
closes #843
2018-03-20 13:48:25 -04:00
Andrew Kelley
71b4ee931e Merge pull request #849 from zig-lang/zen_stdlib
Updates to the Zen standard library
2018-03-20 11:47:19 -04:00
Andrea Orru
0082ed0ef1 Public SplitIterator 2018-03-20 11:40:33 -04:00
Andrew Kelley
e966d375fb Merge pull request #844 from ice1000/patch-1
Remove unnecessary rule and re-fix an old bug
2018-03-19 15:12:44 -04:00
Tesla Ice Zhang
c4544df011 Remove unnecessary rule and re-fix an old bug
The "old bug" is cause my last pr. I'm fixing it now.
2018-03-20 03:00:11 +08:00
Andrea Orru
935f10502f Message type, Undefined mailbox, read syscall, more constructors 2018-03-18 14:45:23 -04:00
Andrew Kelley
d959faa4c7 add test for addIncludeDir for test step 2018-03-17 18:19:23 -04:00
Marc Tiehuis
bea9e9c7f8 Add addIncludeDir to TestStep + fix build template
Closes #794.
2018-03-17 18:15:35 -04:00
Andrew Kelley
bbad0fa411 Merge pull request #839 from ice1000/patch-1
Fix typos in grammar section of langref
2018-03-17 14:44:17 -04:00
Andrew Kelley
453439a964 Merge pull request #840 from bnoordhuis/libdirs
add CLANG_LIBDIRS cmake build variable
2018-03-17 14:43:26 -04:00
Ben Noordhuis
c2c34c09b9 add LLVM_LIBDIRS to link directories
This seems to be the only way to get the zig build to link against llvm
libraries in a non-standard location.
2018-03-17 18:41:54 +01:00
Ben Noordhuis
4a921b2eba add CLANG_LIBDIRS cmake build variable
Mirrors LLVM_LIBDIRS, tells cmake where to look for libclang libraries.
2018-03-17 18:41:47 +01:00
Tesla Ice Zhang
cc6ac77913 Fix some explicit errors 2018-03-17 23:17:07 +08:00
Andrea Orru
df3d2115b5 Service -> Server 2018-03-16 20:27:13 -07:00
Andrea Orru
81941f9161 Add Thread option for Mailboxes 2018-03-16 01:41:45 -07:00
Andrea Orru
ce88034716 Merge branch 'master' into zen_stdlib 2018-03-15 21:17:40 -07:00
Andrea Orru
4c16deed3e Some POSIX stuff, including a primitive write 2018-03-15 17:57:56 -07:00
Andrew Kelley
f073923ea0 Release 0.2.0 2018-03-15 09:15:05 -04:00
Andrea Orru
681c62941e subscribeIRQ support 2018-03-15 04:28:45 -07:00
Andrea Orru
9b7e4b535c More precise naming 2018-03-15 02:22:03 -07:00
Andrea Orru
52ef1aadcb Merge branch 'master' into zen_stdlib 2018-03-14 22:15:33 -07:00
Andrea Orru
4fcf01adc5 IPC structure updates 2018-03-14 22:07:17 -07:00
Andrew Kelley
50e25f6cec add missing docs for setAlignStack builtin 2018-03-14 21:51:06 -04:00
Andrew Kelley
efebb6d341 fix tests broken by previous commit 2018-03-14 03:37:54 -04:00
Andrea Orru
c828c23f71 Tests for zero-bit field compiler error 2018-03-13 22:07:40 -07:00
Andrea Orru
7ac44037db Compiler error when taking @offsetOf of void struct member
closes #739
2018-03-13 21:20:06 -07:00
Andrea Orru
2a6ad23b52 Merge branch 'master' of https://github.com/zig-lang/zig 2018-03-13 16:16:22 -07:00
Andrew Kelley
7f7823e23c fix casting a function to a pointer causing compiler crash
closes #777
2018-03-13 19:15:20 -04:00
Andrea Orru
2cdd50c9b2 Panic instead of segfault when returning generic type from functions
closes #829
2018-03-13 16:14:21 -07:00
Marc Tiehuis
d6e84e325b Add WebAssembly output workaround for LLVM 6 2018-03-13 21:53:42 +13:00
Andrew Kelley
bcce77700f some return types disqualify comptime fn call caching
closes #828
2018-03-12 12:56:25 -04:00
Andrew Kelley
5834ff0cc5 don't memoize comptime fn calls that access comptime mutable state
closes #827
2018-03-12 08:35:41 -04:00
Andrew Kelley
1bf2810f33 fix comptime slicing not preserving comptime mutability
* fix comptime slice of slice not preserving mutatibility
   of the comptime data
 * fix comptime slice of pointer not preserving mutability
   of the comptime data

closes #826
2018-03-12 01:21:10 -04:00
Andrew Kelley
49c3922037 fix incorrect setEvalBranchQuota compile error
closes #688
2018-03-12 00:08:52 -04:00
Andrea Orru
c18059a3dd Merge branch 'master' of https://github.com/zig-lang/zig 2018-03-10 16:59:53 -08:00
Andrea Orru
d0621391bc zen-specific: main -> _start 2018-03-10 16:59:28 -08:00
Andrew Kelley
5bc4f1e3f1 xml2 workaround is relevant for linux too 2018-03-10 18:23:08 -05:00
Andrea Orru
10fb1f2730 Merge branch 'test-ci' 2018-03-10 13:13:48 -08:00
Andrea Orru
152b408934 Simplify intrusive linked list test 2018-03-10 12:20:29 -08:00
Andrew Kelley
e4fd3fd52b workaround for llvm-config missing xml2 2018-03-10 14:48:41 -05:00
Andrew Kelley
6288ad865c change 5 to 6 in travis osx scripts 2018-03-10 14:36:59 -05:00
Andrew Kelley
84e952c230 fix await multithreaded data race
coro return was reading from a value that coro await was
writing to. that wasn't how it was designed to work, it
was an implementation mistake.

this commit also has some work-in-progress code for fixing
error return traces across suspend points.
2018-03-10 01:38:40 -05:00
Andrew Kelley
3b3649b86f refactor stack trace code to remove global state 2018-03-10 01:38:40 -05:00
Andrew Kelley
60b2031831 improvements to stack traces
* @panic generates an error return trace
 * printing an error return trace no longer interferes with
   normal stack traces.
 * instead of ignore_frame_count, we look at the return address
   when you call panic, and that's the first stack trace function
   makes stack traces much cleaner - the error return trace
   flows gracefully into the stack trace
2018-03-10 01:38:40 -05:00
Andrew Kelley
20011a7a1c add behavior test for coroutine frame allocation failure 2018-03-10 01:38:40 -05:00
Andrew Kelley
61a02d9d1e omit pad zeroes in debug stack traces 2018-03-10 01:38:40 -05:00
Andrea Orru
f25c1c6858 Fixed syntax errors in linux-i386 syscalls 2018-03-09 22:25:21 -08:00
Andrea Orru
70c3008a00 Added 6 parameters syscalls for zen 2018-03-09 22:24:52 -08:00
Marc Tiehuis
7a893691c0 Unroll Sha3 inner loop
Issue #699 since fixed. Nearly a x3 perf improvement.

Using --release-fast.

Sha3_256 (before): 96 Mb/s
Sha3_256  (after): 267 Mb/s

Sha3_512 (before): 53 Mb/s
Sha3_512  (after): 142 Mb/s

No real gains from unrolling other initialization loops in crypto
functions so have been left as is.
2018-03-10 10:00:07 +13:00
Andrew Kelley
5a7a0e8518 update to SoftFloat-3e
closes #823
2018-03-09 15:06:06 -05:00
Andrew Kelley
6db9be8900 don't memoize comptime functions if they can mutate state via parameters
closes #639
2018-03-09 14:20:44 -05:00
Andrew Kelley
aaf2230ae8 fix partial inlining of binary math operator using old value
the code was abusing the internal IR API. fixed now.

closes #699
2018-03-08 17:15:55 -05:00
Andrew Kelley
028ec0f2c3 enums with 1 field and explicit tag type still get the tag type
closes #820
2018-03-08 15:22:42 -05:00
Andrew Kelley
aa9902b586 translate-c: add missing case labels 2018-03-08 11:47:07 -05:00
Andrew Kelley
2db28ea849 travis ci: update ubuntu llvm repo and CC,CXX env vars to 6 2018-03-08 11:46:47 -05:00
Andrew Kelley
3200ebc2ea Merge branch 'llvm6'
Zig now depends on LLVM 6.0.0.

The latest commit that depends on LLVM 5.0.1 is
2e010c60ae.
2018-03-08 10:59:54 -05:00
Andrew Kelley
b57cb04afc Merge remote-tracking branch 'origin/master' into llvm6 2018-03-08 10:59:24 -05:00
Jimmi Holst Christensen
2e010c60ae Translate C now correctly converts ints, floats, ptrs and enums to bools
* Boolean "and" and "or" should also work with these types.
* This new method also simplifies to output code.
2018-03-08 15:34:00 +01:00
Jimmi Holst Christensen
b2887620f3 Translate C will now handle ignored return values 2018-03-08 13:15:30 +01:00
Jimmi Holst Christensen
689e241ff8 Merge branch 'master' of github.com:zig-lang/zig 2018-03-08 10:29:43 +01:00
Jimmi Holst Christensen
51b2f1b80b Translate C can now translate switch statements again 2018-03-08 10:29:29 +01:00
Andrew Kelley
790aaeacae add compile error for using @tagName on extern union
closes #742
2018-03-07 14:35:48 -05:00
Jimmi Holst Christensen
bb80daf509 Ast Render no longer outputs erroneous semicolon
closes #813
2018-03-07 10:39:32 +01:00
Andrew Kelley
d96dd5bc32 fix missing compile error for returning error from void async function
closes #799
2018-03-06 21:44:27 -05:00
Andrew Kelley
6b5cfd9d99 turn assertion into compile error for using var as return type
closes #758
2018-03-06 20:41:49 -05:00
Andrew Kelley
eff3530dfa var is no longer a pseudo-type, it is syntax
closes #779
2018-03-06 18:31:31 -05:00
Andrew Kelley
44ae891bd7 fix assertion when taking slice of zero-length array
closes #788
2018-03-06 17:19:45 -05:00
Andrew Kelley
cc0f660ad2 unless hf is specified in target environ, assume soft floating point
closes #804
2018-03-06 16:57:41 -05:00
Andrew Kelley
5d5820029d fix broken tests from previous commit 2018-03-06 16:46:45 -05:00
Andrew Kelley
07e47c058c ptrCast builtin now gives an error for removing const qualifier
closes #384
2018-03-06 16:37:03 -05:00
Andrew Kelley
46e258c9f7 Merge pull request #815 from Hejsil/more-translate-c
Translate C now handles bools better
2018-03-06 10:43:52 -05:00
Andrew Kelley
c3807dfb34 remove value judgement from std lib API docs
documentation should be purely technical, and not contain opinions about
how easy or hard something is.
2018-03-06 10:41:07 -05:00
Jimmi Holst Christensen
1d378d8f26 Removed fixed todo 2018-03-06 12:33:09 +01:00
Jimmi Holst Christensen
5ab25798e3 We now also use trans_to_bool_expr on bool not 2018-03-06 12:04:14 +01:00
Jimmi Holst Christensen
bf47cf418a expr to bool is now it's own function.
* Now while and for loops work on ints and floats, like if statements
* This fixes the loop problem in #813
2018-03-06 11:57:51 +01:00
Jimmi Holst Christensen
61ecc48671 Added appropriate TODO comment to UO_LNot 2018-03-06 11:15:13 +01:00
Jimmi Holst Christensen
ed1386eeff Simple translation of UO_LNot 2018-03-06 11:13:10 +01:00
Andrew Kelley
d34d36619e Merge pull request #814 from jacobdufault/utf8-view
Make Utf8View public, add comments, and make iterator lowercase.
2018-03-06 01:42:04 -05:00
Jacob Dufault
8fd7e9115c Make Utf8View public, add comments, and make iterator lowercase. 2018-03-05 21:42:01 -08:00
Joshua Olson
c787837ce5 Clarify what is meant by 'libraries' (#808) 2018-03-04 19:26:16 -05:00
Joshua Olson
db18d38a43 Fix Linux gcc requirement (#807)
g++ may be a separate package. I had this problem on Fedora.
2018-03-04 17:46:17 -05:00
Andrew Kelley
73a306e2fa fix conflict artifact accidentally in appveyor script 2018-03-03 17:44:41 -05:00
Andrew Kelley
7ee1b88042 add llvm 6.0.0 binaries to appveyor cache 2018-03-03 16:43:57 -05:00
Andrew Kelley
1c244d34b3 Merge branch 'master' into llvm6 2018-03-03 16:30:59 -05:00
Andrew Kelley
56645c1701 std.debug.dwarf supports line number version 4
fixes stack traces for llvm6 generated zig programs
2018-03-02 16:26:22 -05:00
Andrew Kelley
101b7745c4 add optnone noinline to async functions
this works around LLVM optimization assertion failures.
https://bugs.llvm.org/show_bug.cgi?id=36578

closes #800
2018-03-02 13:40:03 -05:00
Andrew Kelley
a217c764db Merge remote-tracking branch 'origin/master' into llvm6 2018-03-01 22:25:15 -05:00
Andrew Kelley
7d494b3e7b Merge branch 'async'
closes #727
2018-03-01 21:55:15 -05:00
Andrew Kelley
de5c0c9f40 Merge remote-tracking branch 'origin/master' into async 2018-03-01 20:47:35 -05:00
Andrew Kelley
6bade0b825 coroutines: add await early test case 2018-03-01 16:17:38 -05:00
Andrew Kelley
8a0e1d4c02 await keyword works 2018-03-01 15:46:35 -05:00
Andrew Kelley
a7c87ae1e4 fix not casting result of llvm.coro.promise 2018-03-01 10:23:47 -05:00
Andrew Kelley
253d988e7c implementation of await
but it has bugs
2018-03-01 03:28:13 -05:00
Andrew Kelley
834e992a7c add test for coroutine suspend with block 2018-02-28 22:26:26 -05:00
Andrew Kelley
8429d4ceac implement coroutine resume 2018-02-28 22:18:48 -05:00
Andrew Kelley
c622766156 async function fulfills promise atomically 2018-02-28 21:48:20 -05:00
Andrew Kelley
807a5e94e9 add atomicrmw builtin function 2018-02-28 21:19:51 -05:00
Andrew Kelley
36eadb569a run coroutine tests only in Debug mode
LLVM 5.0.1, 6.0.0, and trunk crash when attempting to optimize coroutine code.
So, Zig does not support ReleaseFast or ReleaseSafe for coroutines yet.
Luckily, Clang users are running into the same crashes, so folks from the LLVM
community are working on fixes. If we're really lucky they'll be fixed in 6.0.1.
Otherwise we can hope for 7.0.0.
2018-02-28 18:56:26 -05:00
Andrew Kelley
58dc2b719c better coroutine codegen, now passing first coro test
we have to use the Suspend block with llvm.coro.end to
return from the coro
2018-02-28 18:22:43 -05:00
Andrew Kelley
ad2a29ccf2 break the data dependencies that llvm coro transforms cant handle
my simple coro test program builds now

see #727
2018-02-28 16:47:13 -05:00
Andrew Kelley
026aebf2ea another workaround for llvm coroutines
this one doesn't work either
2018-02-28 04:01:22 -05:00
Andrew Kelley
6568be575c Merge branch 'bnoordhuis-fix795' 2018-02-28 00:29:20 -05:00
Andrew Kelley
556f22a751 different way of fixing previous commit
get_fn_type doesn't need the complete parameter type, it
can just ensure zero bits known.
2018-02-28 00:28:26 -05:00
Andrew Kelley
1b8a241f6f Merge branch 'fix795' of https://github.com/bnoordhuis/zig into bnoordhuis-fix795 2018-02-28 00:22:53 -05:00
Andrew Kelley
0f449a3ec1 Merge pull request #796 from bnoordhuis/fix731-more
allow implicit cast from &const to ?&const &const
2018-02-27 23:55:03 -05:00
Ben Noordhuis
90598b4631 fix assert on self-referencing function ptr field
The construct `struct S { f: fn(S) void }` is not legal because structs
are not copyable but it should not result in an ICE.

Fixes #795.
2018-02-28 00:56:00 +01:00
Andrew Kelley
d243453862 Revert "llvm coroutine workaround: sret functions return sret pointer"
This reverts commit 132e604aa3.

this workaround didn't work either
2018-02-27 17:47:18 -05:00
Andrew Kelley
138d6f9093 revert workaround for alloc and free as coro params
reverts 4ac6c4d6bf

the workaround didn't work
2018-02-27 17:46:13 -05:00
Andrew Kelley
132e604aa3 llvm coroutine workaround: sret functions return sret pointer 2018-02-27 17:12:53 -05:00
Andrew Kelley
6e2a67724c Revert "another llvm workaround for getelementptr"
This reverts commit c2f5634fb3.

It doesn't work. With this, LLVM moves the allocate fn call
to after llvm.coro.begin
2018-02-27 14:58:02 -05:00
Andrew Kelley
c2f5634fb3 another llvm workaround for getelementptr 2018-02-27 14:57:49 -05:00
Andrew Kelley
439621e44a remove signal hanlding stuff from std.os.ChildProcess 2018-02-27 11:14:14 -05:00
Andrew Kelley
4e43bde924 workaround for llvm: delete coroutine allocation elision
maybe this can be reverted, but it seems to be related
to llvm's coro transformations crashing.

See #727
2018-02-26 21:31:00 -05:00
Andrew Kelley
4ac6c4d6bf workaround llvm coro transformations
by making alloc and free functions be parameters to async
functions instead of using getelementptr in the DynAlloc block

See #727
2018-02-26 21:14:15 -05:00
Ben Noordhuis
9aa65c0e8e allow implicit cast from &const to ?&const &const
Allow implicit casts from n-th degree const pointers to nullable const
pointers of degree n+1.  That is:

    fn f() void {
        const s = S {};
        const p = &s;
        g(p);   // Works.
        g(&p);  // So does this.
    }

    fn g(_: ?&const &const S) void {  // Nullable 2nd degree const ptr.
    }

Fixes #731 some more.
2018-02-26 19:56:26 +01:00
Andrew Kelley
1eecfdaa9b Merge pull request #785 from bnoordhuis/fix731
allow implicit cast from `S` to `?&const S`
2018-02-26 03:20:46 -05:00
Andrew Kelley
3e86fb500d implement coroutine suspend
see #727
2018-02-26 02:46:21 -05:00
Andrew Kelley
c60496a297 parse await and suspend syntax
See #727
2018-02-26 00:04:11 -05:00
Andrew Kelley
6fef7406c8 move coroutine init code to after coro.begin 2018-02-25 20:29:14 -05:00
Andrew Kelley
6b436146a8 fix invalid memory write in coroutines implementation 2018-02-25 20:28:44 -05:00
Andrew Kelley
6cbea99ed6 async functions are allowed to accept zig types 2018-02-25 20:27:53 -05:00
Andrew Kelley
b018c64ca2 add coroutine LLVM passes 2018-02-25 18:09:39 -05:00
Andrew Kelley
fe354ebb5c coroutines: fix llvm error of instruction not dominating uses
See #727
2018-02-25 17:57:05 -05:00
Andrew Kelley
704a8acb59 fix handle_is_ptr for promise type 2018-02-25 17:34:18 -05:00
Andrew Kelley
83f8906449 codegen for coro_resume instruction
See #727
2018-02-25 17:34:05 -05:00
Andrew Kelley
4eac75914b codegen for coro_free instruction
See #727
2018-02-25 16:46:01 -05:00
Andrew Kelley
d2d2ba10e9 codegen for coro_end instruction
See #727
2018-02-25 16:40:00 -05:00
Andrew Kelley
0cf327eb17 codegen for coro_suspend instruction
See #727
2018-02-25 16:29:07 -05:00
Andrew Kelley
d0f2eca106 codegen for coro_begin instruction
See #727
2018-02-25 16:22:19 -05:00
Andrew Kelley
79f1ff574b codegen for coro_alloc_fail instruction
See #727
2018-02-25 16:15:14 -05:00
Andrew Kelley
bced3fb64c codegen for get_implicit_allocator instruction
See #727
2018-02-25 16:05:10 -05:00
Andrew Kelley
93cbd4eeb9 codegen for coro_alloc and coro_size instructions
See #727
2018-02-25 15:20:31 -05:00
Andrew Kelley
9f6c5a20de codegen for coro_id instruction
See #727
2018-02-25 15:10:29 -05:00
Andrew Kelley
7567448b91 codegen for cancel
See #727
2018-02-25 14:47:58 -05:00
Andrew Kelley
05bf666eb6 codegen for calling an async function
See #727
2018-02-25 02:47:31 -05:00
Marc Tiehuis
08d595b472 Add utf8 string view 2018-02-24 11:32:01 -07:00
Andrew Kelley
8db7a1420f update errors section of docs
closes #768
2018-02-23 20:43:47 -05:00
Andrew Kelley
4955c4b8f9 update C headers to clang 6.0.0rc3 2018-02-23 13:15:16 -05:00
Andrew Kelley
1ba6e1641a LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2018-02-23 13:05:17 -05:00
Andrew Kelley
a33b689f2c update embedded LLD to 6.0.0rc3 2018-02-23 13:04:47 -05:00
Andrew Kelley
9cfd7dea19 Merge remote-tracking branch 'origin/master' into llvm6 2018-02-23 12:56:41 -05:00
Andrew Kelley
78bc62fd34 Revert "workaround on windows for llvm6 missing advapi32.lib in llvm-config"
This reverts commit eaac218d59.

This is fixed now in llvm6 rc3
2018-02-23 12:55:58 -05:00
Andrew Kelley
40dbcd09da fix type_is_codegen_pointer being used incorrectly
The names of these functions should probably change, but at least
the semantics are correct now:
 * type_is_codegen_pointer - the type is either a fn, ptr, or promise
 * get_codegen_ptr_type -
   - ?&T and &T returns &T
   - ?promise and promise returns promise
   - ?fn()void and fn()void returns fn()void
   - otherwise returns nullptr
2018-02-23 12:49:21 -05:00
Ben Noordhuis
f11b948019 allow implicit cast from S to ?&const S
Allow implicit casts from container types to nullable const pointers to
said container type.  That is:

    fn f() void {
        const s = S {};
        g(s);   // Works.
        g(&s);  // So does this.
    }

    fn g(_: ?&const S) void {  // Nullable const pointer.
    }

Fixes #731.
2018-02-23 15:55:57 +01:00
Andrew Kelley
99985ad6fc implement Zig IR for async functions
See #727
2018-02-23 03:03:06 -05:00
Andrew Kelley
b66547e98c Merge pull request #783 from bnoordhuis/fix675
name types inside functions after variable
2018-02-22 14:26:45 -05:00
Ben Noordhuis
0845cbe277 name types inside functions after variable
Before this commit:

    fn f() []const u8 {
        const S = struct {};
        return @typeName(S);  // "f()", unexpected.
    }

And now:

    fn f() []const u8 {
        const S = struct {};
        return @typeName(S);  // "S", expected.
    }

Fixes #675.
2018-02-22 19:54:02 +01:00
Andrew Kelley
ca1b77b2d5 IR analysis for coro.begin
See #727
2018-02-22 11:54:27 -05:00
Andrew Kelley
88e7b9bf80 ir analysis for coro_id and coro_alloc
See #727
2018-02-22 09:36:58 -05:00
Andrew Kelley
37c07d4f3f coroutines: analyze get_implicit_allocator instruction
see #727
2018-02-22 09:30:55 -05:00
Andrew Kelley
b261da0672 add coroutine startup IR to async functions
See #727
2018-02-21 23:28:35 -05:00
Andrew Kelley
884b5fb4cf Merge branch 'bnoordhuis-macho' 2018-02-21 02:00:52 -05:00
Andrew Kelley
623466762e clean up mach-o stack trace code 2018-02-21 02:00:33 -05:00
Andrew Kelley
236bbe1183 implement IR analysis for async function calls
See #727
2018-02-21 00:52:20 -05:00
Andrew Kelley
65a51b401c add promise type
See #727
2018-02-20 16:42:14 -05:00
Andrew Kelley
a06f3c74fd parse async fn definitions
See #727
2018-02-20 00:31:52 -05:00
Andrew Kelley
3d58d7232a parse async fn calls and cancel expressions 2018-02-20 00:05:38 -05:00
Andrew Kelley
af10b0fec2 add async, await, suspend, resume, cancel keywords
See #727
2018-02-19 23:19:59 -05:00
Ben Noordhuis
2b35615ffb fix memory leak in std.debug.openSelfDebugInfo() 2018-02-19 23:11:11 +01:00
Ben Noordhuis
ab48934e9c add support for stack traces on macosx
Add basic address->symbol resolution support.  Uses symtab data from the
MachO image, not external dSYM data; that's left as a future exercise.

The net effect is that we can now map addresses to function names but
not much more.  File names and line number data will have to wait until
a future pull request.

Partially fixes #434.
2018-02-19 23:11:11 +01:00
Andrew Kelley
bde15cf080 improve std lib linux epoll API 2018-02-17 17:53:07 -05:00
Andrew Kelley
72ca2b214d ability to slice an undefined pointer at compile time if the len is 0 2018-02-16 15:22:29 -05:00
Andrew Kelley
cbbd6cfa1e add an assert to catch #777
asserting is better than segfaulting
2018-02-15 23:39:35 -05:00
Andrew Kelley
5f5880979e zig fmt supports simple line comments 2018-02-15 12:30:29 -05:00
Andrew Kelley
cc26148ba7 fix compiler crash when struct contains...
ptr to another struct which contains original struct
2018-02-15 12:14:20 -05:00
Andrew Kelley
1c1c0691cc fix crash when doing comptime float rem comptime int
closes #776
2018-02-14 23:12:51 -05:00
Andrew Kelley
ca597e2bfb std.zig.parser understands try. zig fmt respects a double line break. 2018-02-14 23:00:53 -05:00
Andrew Kelley
9fa35adbd4 fix sometimes not type checking function parameters
closes #774

regression introduced in cfb2c67692
2018-02-14 16:24:43 -05:00
Andrew Kelley
629f134d38 std.zig.parser understands inferred return type and error inference 2018-02-14 15:50:40 -05:00
Andrew Kelley
e8d81c5acf fix build broken by previous commit 2018-02-14 13:55:06 -05:00
Andrew Kelley
d790670f4c self hosted parser: support string literals 2018-02-14 13:43:05 -05:00
Andrew Kelley
1a53c648ed self hosted parser supports builtin fn call with no args 2018-02-14 09:45:10 -05:00
Andrew Kelley
e7ab2bc553 Merge remote-tracking branch 'origin/master' into llvm6 2018-02-13 11:53:20 -05:00
Andrew Kelley
c721354b73 correct doc comment in self hosted parser 2018-02-13 11:17:26 -05:00
Andrew Kelley
02f70cda8a zig_llvm.cpp uses new(std::nothrow)
This fixes a mismatched malloc/delete because
we were allocating with malloc and then llvm was
freeing with delete.
2018-02-13 10:54:46 -05:00
Andrew Kelley
2dcff95bd2 self hosted: add tokenizer test fix eof handling 2018-02-13 10:28:55 -05:00
Andrew Kelley
dfbb8254ca fix self hosted tokenizer handling of EOF 2018-02-12 21:26:15 -05:00
Andrew Kelley
7903a758a4 Merge remote-tracking branch 'origin/master' into llvm6 2018-02-12 17:00:02 -05:00
Andrew Kelley
b4e44c4e80 self hosted parser tests every combination of memory allocation failure 2018-02-12 13:31:50 -05:00
Andrew Kelley
eaac218d59 workaround on windows for llvm6 missing advapi32.lib in llvm-config 2018-02-12 11:05:28 -05:00
Andrew Kelley
491d818f17 Merge remote-tracking branch 'origin/master' into llvm6 2018-02-12 10:48:02 -05:00
Andrew Kelley
ec0846a00f std.heap.ArenaAllocator: fix incorrectly activating safety check 2018-02-12 03:21:18 -05:00
Andrew Kelley
227ead54be back to malloc instead of aligned_alloc for c_allocator
it seems that a 7 years old standard is still too new for the
libc variants that are ubiquitous

(tests failing on macos for not providing C11 ABI)
2018-02-12 03:15:12 -05:00
Andrew Kelley
4a4ea92cf3 remove std.heap.IncrementingAllocator
Use std.heap.FixedBufferAllocator combined with
std.heap.DirectAllocator instead.

std.mem.FixedBufferAllocator is moved to std.heap.FixedBufferAllocator
2018-02-12 02:44:31 -05:00
Andrew Kelley
445b03384a introduce std.heap.ArenaAllocator and std.heap.DirectAllocator
* DirectAllocator does the underlying syscall for every allocation.
 * ArenaAllocator takes another allocator as an argument and
   allocates bytes up front, falling back to DirectAllocator with
   increasingly large allocation sizes, to avoid calling it too often.
   Then the entire arena can be freed at once.

The self hosted parser is updated to take advantage of ArenaAllocator
for the AST that it returns. This significantly reduces the complexity
of cleanup code.

docgen and build runner are updated to use the combination of
ArenaAllocator and DirectAllocator instead of IncrementingAllocator,
which is now deprecated in favor of FixedBufferAllocator combined
with DirectAllocator.

The C allocator calls aligned_alloc instead of malloc, in order to
respect the alignment parameter.

Added asserts in Allocator to ensure that implementors of the
interface return slices of the correct size.

Fixed a bug in Allocator when you call realloc to grow the allocation.
2018-02-12 02:14:44 -05:00
Andrew Kelley
ef6260b3a7 Merge remote-tracking branch 'origin/master' into llvm6 2018-02-11 23:49:20 -05:00
Andrew Kelley
f2d601661d fix exported variable not named in the object file
closes #771
2018-02-11 16:46:02 -05:00
Andrew Kelley
e743b30bbf std: refactor posixOpen to be friendlier to error return traces 2018-02-11 05:26:51 -05:00
Andrew Kelley
46aa416c48 std.os and std.io API update
* move std.io.File to std.os.File
 * add `zig fmt` to self hosted compiler
 * introduce std.io.BufferedAtomicFile API
 * introduce std.os.AtomicFile API
 * add `std.os.default_file_mode`
 * change FileMode on posix from being a usize to a u32
 * add std.os.File.mode to return mode of an open file
 * std.os.copyFile copies the mode from the source file instead of
   using the default file mode for the dest file
 * move `std.os.line_sep` to `std.cstr.line_sep`
2018-02-10 21:02:24 -05:00
Andrew Kelley
8c31eaf2a8 std zig tokenizer: don't require 3 newlines at the end of the source 2018-02-10 14:52:39 -05:00
Andrew Kelley
a2bd9f8912 std lib: modify allocator idiom
Before we accepted a nullable allocator for some stuff like
opening files. Now we require an allocator.

Use the mem.FixedBufferAllocator pattern if a bound on the amount
to allocate is known.

This also establishes the pattern that usually an allocator is the
first argument to a function (possibly after "self").

fix docs for std.cstr.addNullByte

self hosted compiler:
 * only build docs when explicitly asked to
 * clean up main
 * stub out zig fmt
2018-02-09 18:27:50 -05:00
Andrew Kelley
e7bf8f3f04 fix compiler crash switching on global error with no else 2018-02-09 13:49:58 -05:00
Andrew Kelley
1fb308ceee self hosted compiler: move tokenization and parsing to std lib 2018-02-09 13:08:02 -05:00
Andrew Kelley
3919afcad2 fix crash with error peer type resolution
closes #765
2018-02-09 11:16:04 -05:00
Andrew Kelley
2c697e50db appveyor: don't try to build for mingw
pacman is giving me:
:: msys2-runtime and catgets are in conflict.
Remove catgets? [y/N] error: unresolvable package conflicts detected
error: failed to prepare transaction (conflicting dependencies)
2018-02-09 01:15:17 -05:00
Andrew Kelley
5911962842 Merge pull request #759 from zig-lang/error-sets
Error Sets
2018-02-09 00:47:57 -05:00
Andrew Kelley
8e554561df appveyor: answer Yes to all pacman questions 2018-02-09 00:47:13 -05:00
Andrew Kelley
32c988a2d7 fix build runner on windows 2018-02-09 00:24:23 -05:00
Andrew Kelley
916d24cd21 add compile error tests for error sets 2018-02-08 23:44:21 -05:00
Andrew Kelley
4b16874f04 add test for comptime err to int with only 1 member of set 2018-02-08 22:44:15 -05:00
Andrew Kelley
ee982ae162 syntax: parse ?error!i32 as ?(error!i32) 2018-02-08 22:30:08 -05:00
Andrew Kelley
0efe441dfd if statements support comptime known test error, runtime payload 2018-02-08 22:18:13 -05:00
Andrew Kelley
54c06bf715 error sets: runtime safety for int-to-err and err set cast 2018-02-08 21:54:44 -05:00
Andrew Kelley
8fc6e31567 std: fix return type of std.c.write 2018-02-08 20:46:12 -05:00
Andrew Kelley
f9be970375 Merge remote-tracking branch 'origin/master' into error-sets 2018-02-08 20:45:26 -05:00
Andrew Kelley
57edd4dcb3 error sets - fix bad value for constant error literal 2018-02-08 18:13:07 -05:00
Marc Tiehuis
1c236b0766 Add ArrayList functions (#755)
at - Get the item at the n-th index.

insert - Insert and item into the middle of the list, resizing and copying
existing elements if needed.

insertSlice - Insert a slice into the middle of the list, resizing and
copying existing elements if needed.
2018-02-08 11:22:31 -05:00
Andrew Kelley
fee875770c error set casting building 2018-02-08 11:09:18 -05:00
Andrew Kelley
76239f2089 error sets - update langref. all tests passing 2018-02-08 03:02:41 -05:00
Andrew Kelley
0d5ff6f462 error sets - most tests passing 2018-02-08 02:08:45 -05:00
Andrew Kelley
68238d5678 fix comptime fn execution not returning error unions properly 2018-02-07 22:33:05 -05:00
Ben Noordhuis
dd20f558f0 implement openSelfExe() on darwin (#753) 2018-02-07 18:14:32 -05:00
Jeff Fowler
c88e6e8aee improve behavior of zig build (#754)
See #748
2018-02-07 17:45:20 -05:00
Andrew Kelley
5d9e3cb77f LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2018-02-07 17:38:33 -05:00
Andrew Kelley
38aed5af8b update embedded LLD to 6.0.0rc2 2018-02-07 17:38:02 -05:00
Andrew Kelley
aa043a6339 Merge remote-tracking branch 'origin/master' into llvm6 2018-02-07 17:27:30 -05:00
Ben Noordhuis
79ad1d9610 format struct pointers as "<typename>@<address>" (#752) 2018-02-07 16:18:48 -05:00
Ben Noordhuis
0090c2d70b DRY 'is slice?' conditionals in parser (#750) 2018-02-07 14:38:49 -05:00
Andrew Kelley
f99b8b006f error sets - fix most std lib compile errors 2018-02-05 18:09:13 -05:00
Andrew Kelley
6940212ecb error sets: fix peer resolution of error unions 2018-02-05 17:42:13 -05:00
Andrew Kelley
917e6fe370 handle linux returning EINVAL for large writes
See #743
2018-02-05 13:21:08 -05:00
Andrew Kelley
40e4e42a66 handle linux returning EINVAL for large reads
see #743
2018-02-05 12:48:29 -05:00
Andrew Kelley
44d8d654a0 fix test failure, organize code, add new compile error 2018-02-05 09:26:39 -05:00
Andrew Kelley
ec59f76526 Merge pull request #743 from bnoordhuis/linux-random
Use /dev/urandom and sysctl(RANDOM_UUID) on Linux.
2018-02-05 08:09:10 -05:00
Andrew Kelley
b7bc259093 make OutStream and InStream take an error set param 2018-02-05 07:38:24 -05:00
Andrew Kelley
893f1088df error sets - peer resolution for error unions 2018-02-05 01:49:14 -05:00
Andrew Kelley
15075d2c3d error sets - compile error for equality with no common errors 2018-02-05 00:05:04 -05:00
Andrew Kelley
31abef172a fix accidentally linking against kernel32 on non windows 2018-02-04 22:13:21 -05:00
Andrew Kelley
21ce559c9c add --forbid-library
to help track down accidentally linking against a library
2018-02-04 22:06:03 -05:00
Ben Noordhuis
73ee434c8c Use /dev/urandom and sysctl(RANDOM_UUID) on Linux.
Add fallback paths for when the getrandom(2) system call is not
available.  Try /dev/urandom first and sysctl(RANDOM_UUID) second.

The sysctl issues a warning in the system logs with some kernels but
that seems like an acceptable tradeoff for the fallback of a fallback.
2018-02-04 18:58:36 +01:00
Andrew Kelley
61718742f7 *WIP* error sets - std lib test compile but try to link against windows 2018-02-03 14:42:20 -05:00
Andrew Kelley
ef5e7bb469 *WIP* error sets - an inferred error set can end up being the global one 2018-02-03 14:06:37 -05:00
Andrew Kelley
abf5ae6897 *WIP* error sets - support fns called at comptime 2018-02-03 11:51:29 -05:00
Andrew Kelley
b8f59e14cd *WIP* error sets - correctly resolve inferred error sets 2018-02-02 18:13:32 -05:00
Andrew Kelley
39d5f44863 *WI* error sets - basic support working 2018-02-02 14:26:14 -05:00
Andrew Kelley
cfb2c67692 *WIP* error sets - rewrite "const cast only" function 2018-02-02 11:50:19 -05:00
Andrew Kelley
15eb28efaf Merge pull request #738 from corngood/cygwin-fixes
make lld include paths private
2018-02-02 10:53:54 -05:00
David McFarland
4ec856b0f0 make lld include paths private
This fixes a build failure on cygwin caused by <string.h> -> <strings.h> taking
the latter from one of the lld paths.
2018-02-02 10:49:31 -04:00
Andrew Kelley
406496ca33 *WIP* error sets - allow peer type resolution to create new error set 2018-02-01 23:32:09 -05:00
Andrew Kelley
13b36d458f *WIP* error sets - fix implicit cast 2018-02-01 10:23:25 -05:00
Andrew Kelley
5f518dbeb9 *WIP* error sets converting std lib 2018-01-31 22:48:40 -05:00
Andrew Kelley
02b61224b2 add docs for memberType, memberCount, memberName 2018-01-31 20:56:53 -05:00
Andrew Kelley
e6d4028a84 docs: move source encoding section 2018-01-31 20:42:27 -05:00
Andrew Kelley
3a11757d57 add docs recommending to only have 1 cImport 2018-01-31 20:18:47 -05:00
Andrew Kelley
a795e4ce32 add some docs for reflection 2018-01-31 11:47:56 -05:00
Andrew Kelley
44f38b04b0 fix assertion fail when using global var number literal
closes #697
2018-01-31 11:13:39 -05:00
Andrew Kelley
5161d70620 *WIP* error sets 2018-01-31 01:51:31 -05:00
Andrew Kelley
40ca39d3d5 fix error message mentioning unreachable instead of noreturn 2018-01-31 01:44:52 -05:00
Andrew Kelley
3ef6a00bb8 add compile error for duplicate struct, enum, union fields
closes #730
2018-01-30 11:52:03 -05:00
Andrew Kelley
0995a81b8b langref: remove page title header 2018-01-30 10:31:01 -05:00
Andrew Kelley
d6b7d9090e Merge pull request #729 from zig-lang/www-changes
Improve documentation styling for mobile devices
2018-01-30 01:06:20 -05:00
Andrea Orru
7eea20bc50 Add IntrusiveLinkedList to index.zig 2018-01-29 21:02:57 -08:00
Marc Tiehuis
5e9f87c3bd Improve documentation styling for mobile devices
- No overscrolling on small screens
 - Font-size is reduced for more content per screen
 - Tables + Code blocks scroll within a block to avoid page-widenening
2018-01-30 17:33:38 +13:00
Andrew Kelley
1c60f31450 add compile error for calling naked function 2018-01-29 14:01:12 -05:00
Andrew Kelley
96c9a9bdb3 Merge remote-tracking branch 'origin/master' into llvm6 2018-01-29 13:26:09 -05:00
Andrew Kelley
2b5e0b66a2 std: fix fn return syntax for zen os 2018-01-29 10:57:27 -05:00
Andrew Kelley
abe6c2d585 allow packed containers in extern functions 2018-01-29 10:57:09 -05:00
Andrew Kelley
f66ac9a5e7 fix crash when align 1 field before self referential...
...align 8 field as slice return type

closes #723
2018-01-27 18:30:36 -05:00
Andrew Kelley
ad3e2a5da0 fix compiler crash on function with invalid return type
closes #722
2018-01-26 10:37:18 -05:00
Andrew Kelley
47be64af5a Merge remote-tracking branch 'origin/master' into llvm6 2018-01-25 11:51:41 -05:00
Andrew Kelley
f7670882af Merge pull request #720 from zig-lang/require-return-type
syntax: functions require return type. remove `->`
2018-01-25 10:03:26 -05:00
Andrew Kelley
3671582c15 syntax: functions require return type. remove ->
The purpose of this is:

 * Only one way to do things
 * Changing a function with void return type to return a possible
   error becomes a 1 character change, subtly encouraging
   people to use errors.

See #632

Here are some imperfect sed commands for performing this update:

remove arrow:

```
sed -i 's/\(\bfn\b.*\)-> /\1/g' $(find . -name "*.zig")
```

add void:

```
sed -i 's/\(\bfn\b.*\))\s*{/\1) void {/g' $(find ../ -name "*.zig")
```

Some cleanup may be necessary, but this should do the bulk of the work.
2018-01-25 04:10:11 -05:00
Andrew Kelley
e5bc5873d7 rename "debug safety" to "runtime safety"
closes #437
2018-01-25 01:46:12 -05:00
Andrew Kelley
b71a56c9df cleanups that I meant to put in the previous commit 2018-01-23 23:12:38 -05:00
Andrew Kelley
b3a6faf13e replace %defer with errdefer
See #632

now we have 1 less sigil
2018-01-23 23:08:09 -05:00
Andrew Kelley
ad2527d47a clean up readme 2018-01-23 22:56:03 -05:00
Andrew Kelley
c2838f2442 fix printf format specifier 2018-01-23 11:40:22 -05:00
Andrew Kelley
b8dcdc75c1 Merge pull request #716 from zig-lang/export-c-additions
Add array type handling for gen_h
2018-01-23 09:20:57 -05:00
Marc Tiehuis
470ec91164 Add array type handling for gen_h 2018-01-23 23:38:20 +13:00
Andrew Kelley
fa7072f3f2 docgen: verify internal links 2018-01-22 23:06:07 -05:00
Andrew Kelley
cf39819478 add new kind of test: generating .h files. and more
* docgen supports obj_err code kind for demonstrating
   errors without explicit test cases
 * add documentation for `extern enum`. See #367
 * remove coldcc keyword and add @setIsCold. See #661
 * add compile errors for non-extern struct, enum, unions
   in function signatures
 * add .h file generation for extern struct, enum, unions
2018-01-22 22:24:07 -05:00
Andrew Kelley
cacba6f435 fix crash on union-enums with only 1 field
closes #713
2018-01-22 17:23:23 -05:00
Andrew Kelley
b52bffcf8d appveyor: add language reference to build artifacts 2018-01-22 16:14:06 -05:00
Andrew Kelley
5b7ae86af4 fix crash when switching on enum with 1 field and no switch prongs
closes #712
2018-01-21 14:44:24 -05:00
Andrew Kelley
517e8ea426 remove unused function, fixes mingw build 2018-01-20 02:49:53 -05:00
Andrew Kelley
ddd04a7b46 fix docgen on windows 2018-01-19 22:17:31 -05:00
Andrew Kelley
ec27d3b4ba Merge pull request #711 from zig-lang/fix-build-template
Fix build template to match build runner changes
2018-01-19 20:47:20 -05:00
Marc Tiehuis
a7e10565fc Fix build template to match build runner changes
Api changed in 7b57454cc1.
2018-01-20 13:32:49 +13:00
Andrew Kelley
890bf001db os_rename uses MoveFileEx on windows 2018-01-19 16:53:08 -05:00
Andrew Kelley
9f5c0b6e60 windows-compatible os_rename function
windows libc rename() requires destination file path to not exist
2018-01-19 16:31:21 -05:00
Andrew Kelley
2eede35577 Merge pull request #710 from Hejsil/seekto-getpos-windows
Implemented windows versions of seekTo and getPos
2018-01-19 16:17:04 -05:00
Jimmi Holst Christensen
d8469e3c7c usize might be same size as LARGE_INTEGER. If that's the case, then we don't want to compare pos to @maxValue(usize). 2018-01-19 22:08:44 +01:00
Jimmi Holst Christensen
a1a69f24c8 We now make a more correct conversion from windows LARGE_INTEGER type to usize 2018-01-19 22:05:56 +01:00
Jimmi Holst Christensen
61497893d3 Removed bitcast from usize to isize in seekTo 2018-01-19 21:57:13 +01:00
Andrew Kelley
613c4dbf58 temporary workaround for os.deleteTree not implemented for windows/mac
See #709
2018-01-19 15:51:37 -05:00
Jimmi Holst Christensen
8be606ec80 Now using the right unexpectedError in seekForward 2018-01-19 21:51:10 +01:00
Jimmi Holst Christensen
a76023bcd8 Removed PLARGE_INTEGER 2018-01-19 21:49:16 +01:00
Jimmi Holst Christensen
90714a3831 Implemented windows versions of seekTo and getPos 2018-01-19 21:30:57 +01:00
Andrew Kelley
21e8ecbafa readme: specify that we need exactly llvm 5.0.1
closes #708
2018-01-19 04:01:03 -05:00
Andrew Kelley
2c25c8aeed docs: remove references to %% prefix operator
also cleanup the table of contents
2018-01-19 03:47:27 -05:00
Andrew Kelley
ea623f2d39 all doc code examples are now tested
improve color scheme of docs
make docs depend on no external files
fix broken example code in docs

closes #465
2018-01-19 03:21:47 -05:00
Andrew Kelley
4b64c777ee add compile error for shifting by negative comptime integer
closes #698
2018-01-18 17:47:21 -05:00
Andrew Kelley
0fc645ab70 emit a compile error for @panic called at compile time
closes #706
2018-01-18 17:15:36 -05:00
Andrew Kelley
0b8f19fcba fix null debug info for 0-length array type
closes #702
2018-01-18 15:08:20 -05:00
Andrew Kelley
0aae96b5f0 test: fix brace expansion test not checking invalid inputs 2018-01-18 11:41:20 -05:00
Andrew Kelley
4556f44806 LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2018-01-17 17:30:38 -05:00
Andrew Kelley
4aed7ea6f8 update embedded LLD to 6.0.0rc1 2018-01-17 17:29:21 -05:00
Andrew Kelley
48cd808185 Merge remote-tracking branch 'origin/master' into llvm6 2018-01-17 13:11:21 -05:00
Andrew Kelley
a4e8e55908 Merge pull request #701 from Hejsil/fix-xor-with-zero
Fixed bigint_xor for none negative numbers
2018-01-17 10:24:27 -05:00
Jimmi Holst Christensen
1d6f54cc7d A few more none negative cases, just to be sure we've covered everything 2018-01-17 14:35:13 +01:00
Jimmi Holst Christensen
fa2c3be341 More tests, and fixed none negative bigint xor 2018-01-17 14:31:47 +01:00
Jimmi Holst Christensen
db0fc32ab2 fixed xor with zero 2018-01-17 14:00:27 +01:00
Andrew Kelley
2e6125bc66 ziglang.org home page no longer in this repo
update docs examples which use build-exe to be tested

See #465
2018-01-17 03:24:49 -05:00
Marc Tiehuis
7a3fd89d25 Add Sha3 hashing functions
These are on the slower side and could be improved. No performance optimizations
yet have been done.

```
Cpu: Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz
```

-- Sha3-256

```
Zig --release-fast
    93 Mb/s
Zig --release-safe
    99 Mb/s
Zig
    4 Mb/s
```

-- Sha3-512

```
Zig --release-fast
    49 Mb/s
Zig --release-safe
    54 Mb/s
Zig
    2 Mb/s
```

Interestingly, release-safe is producing slightly better code than
release-fast.
2018-01-17 21:19:45 +13:00
Marc Tiehuis
dfd5363494 Add throughput test program
Blake performance numbers for reference:

```
Cpu: Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz
```

-- Blake2s

```
Zig --release-fast
    485 Mb/s
Zig --release-safe
    377 Mb/s
Zig
    11 Mb/s
```

-- Blake2b
```
Zig --release-fast
    616 Mb/s
Zig --release-safe
    573 Mb/s
Zig
    18 Mb/s
```
2018-01-17 21:19:45 +13:00
Marc Tiehuis
7af53d0826 Fix crypto exports 2018-01-17 21:19:45 +13:00
Andrew Kelley
1eda7e0fde docgen: support executing exe code examples
See #465
2018-01-17 01:50:35 -05:00
Andrew Kelley
5aefabe045 docgen: validate See Also sections
See #465
2018-01-17 00:22:53 -05:00
Andrew Kelley
2774fe8a1b docgen auto generates table of contents
See #465
2018-01-17 00:22:53 -05:00
Andrew Kelley
4bdfc8a10a fix error return traces pointing to off-by-one source line
See #651
2018-01-17 00:22:53 -05:00
Josh Wolfe
24c2ff5cae Revert "Buffer.toSliceCopy"
This reverts commit c58f5a4742.
2018-01-16 13:45:34 -07:00
Josh Wolfe
c58f5a4742 Buffer.toSliceCopy 2018-01-16 13:28:53 -07:00
Andrew Kelley
b897e98d30 Merge remote-tracking branch 'origin/master' into llvm6 2018-01-16 12:26:04 -05:00
Andrew Kelley
ee9ab15679 Merge pull request #695 from Hejsil/tranlate-c-fixes
Tranlate c fixes - undefined variable initialization and non-bool if statements
2018-01-16 10:32:37 -05:00
Jimmi Holst Christensen
3974b7d31d translate_c can now translate if statements on integers and floats 2018-01-16 15:48:28 +01:00
Jimmi Holst Christensen
f59dcc5546 Fixed tests for undefined variables 2018-01-16 15:21:48 +01:00
Andrew Kelley
8b280d5b31 Merge pull request #689 from zig-lang/blake2
Add Blake2X hash functions
2018-01-16 09:13:09 -05:00
Jimmi Holst Christensen
821cbd7a1b Output "undefined" on uninitialized variables 2018-01-16 15:01:02 +01:00
Marc Tiehuis
73b4f09845 Add crypto internal test functions 2018-01-17 00:20:20 +13:00
Marc Tiehuis
66a24c9c00 Merge branch 'master' into blake2 2018-01-17 00:20:06 +13:00
Marc Tiehuis
fa7b33549e Change crypto functions to fill a buffer
- Rename blake2x -> blake2
 - Fix blake2s truncated tests
2018-01-17 00:17:48 +13:00
Andrew Kelley
6a95b88d1b fix bigint remainder division
See #405
2018-01-16 03:09:44 -05:00
Andrew Kelley
84d8584c5b implement bigint div and rem
See #405
2018-01-16 02:22:19 -05:00
Andrew Kelley
92fc5947fc fix compiler crash related to @alignOf 2018-01-15 20:44:21 -05:00
Andrew Kelley
5a4968484b Merge branch 'wip-err-ret-trace' 2018-01-15 16:28:30 -05:00
Andrew Kelley
6ec9933fd8 fix getting debug info twice in default panic handler 2018-01-15 16:26:13 -05:00
Marc Tiehuis
4cf86b4a94 Add Blake2X hash functions
The truncated output variants currently are dependent on a more complete
bigint implementation in the compiler.
2018-01-15 23:14:13 +13:00
Andrew Kelley
c9ac607bd3 add builtin.have_error_return_tracing 2018-01-15 00:14:14 -05:00
Andrew Kelley
7b57454cc1 clean up error return tracing
* error return tracing is disabled in release-fast mode
 * add @errorReturnTrace
 * zig build API changes build return type from `void` to `%void`
 * allow `void`, `noreturn`, and `u8` from main. closes #535
2018-01-15 00:01:02 -05:00
Andrew Kelley
d973b40884 stack traces are a variable number of frames 2018-01-14 19:40:02 -05:00
Andrew Kelley
f0df2cdde9 error return traces use a zig-provided function to save binary size 2018-01-14 16:26:06 -05:00
Andrew Kelley
793f031c4c remove 32-bit windows from supported targets list
we still want to support it, but there are too many bugs
to claim that we support it right now.

See #537
2018-01-14 15:17:07 -05:00
Andrew Kelley
fa024f8092 error return trace pointer prefixes other params
instead of being last. This increases the chances that it can
remain in the same register between calls.
2018-01-14 14:35:43 -05:00
Andrew Kelley
971a6fc531 fix duplicate stack trace code 2018-01-14 10:19:21 -05:00
Andrew Kelley
e7e7625633 Merge pull request #687 from zig-lang/sha2
Add Sha2 functions
2018-01-13 21:38:29 -05:00
Marc Tiehuis
9be9f1ad20 Disable win32 tests for Sha2 + correct lengths 2018-01-14 09:58:30 +13:00
Marc Tiehuis
1f3ed5cf27 Change indexing variable types for crypto functions 2018-01-13 22:44:58 +13:00
Marc Tiehuis
2659ac01be Add Sha2 functions
We take the fastest time measurement taken across multiple runs. Tested
across multiple compiler flags and the best chosen.

```
Cpu: Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz
Gcc: 7.2.1 20171224
Clang: 5.0.1
Zig: 0.1.1.304f6f1d
```

See https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly.

```
Gcc -O2
    219 Mb/s
Clang -O2
    213 Mb/s
Zig --release-fast
    284 Mb/s
Zig --release-safe
    211 Mb/s
Zig
    6 Mb/s
```

```
Gcc -O2
    350 Mb/s
Clang -O2
    354 Mb/s
Zig --release-fast
    426 Mb/s
Zig --release-safe
    300 Mb/s
Zig
    11 Mb/s
```
2018-01-13 22:37:47 +13:00
Andrew Kelley
4551489b92 typecheck the panic function 2018-01-13 01:00:50 -05:00
Andrew Kelley
a2315cfbfc Merge pull request #686 from zig-lang/md5-sha1
Add Md5 and Sha1 functions
2018-01-13 00:00:33 -05:00
Marc Tiehuis
51fdbf7f8c Add Md5 and Sha1 hash functions
Some performance comparisons to C.

We take the fastest time measurement taken across multiple runs.

The block hashing functions use the same md5/sha1 methods.

```
Cpu: Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz
Gcc: 7.2.1 20171224
Clang: 5.0.1
Zig: 0.1.1.304f6f1d
```

See https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly:

```
gcc -O2
    661 Mb/s
clang -O2
    490 Mb/s
zig --release-fast and zig --release-safe
    570 Mb/s
zig
    50 Mb/s
```

See https://www.nayuki.io/page/fast-sha1-hash-implementation-in-x86-assembly:

```
gcc -O2
    588 Mb/s
clang -O2
    563 Mb/s
zig --release-fast and zig --release-safe
    610 Mb/s
zig
    21 Mb/s
```

In short, zig provides pretty useful tools for writing this sort of
code. We are in the lead against clang (which uses the same LLVM
backend) with us being slower only against md5 with GCC.
2018-01-13 14:40:21 +13:00
Marc Tiehuis
304f6f1d01 Add integer rotation functions 2018-01-13 13:23:12 +13:00
Andrew Kelley
32ea6f54e5 *WIP* proof of concept error return traces 2018-01-12 02:12:11 -05:00
Andrew Kelley
7ec783876a functions which can return errors have secret stack trace param
See #651
2018-01-11 23:04:08 -05:00
Andrew Kelley
eb3726c502 Merge branch 'master' into llvm6 2018-01-11 22:26:55 -05:00
Andrew Kelley
3268276b58 the same string literal codegens to the same constant
this makes it so that you can send the same string literal
as a comptime slice and get the same type
2018-01-11 21:02:30 -05:00
Andrew Kelley
465e75bc5a Merge pull request #682 from zig-lang/fix-endian
Fix endian swap parameters
2018-01-11 02:51:17 -05:00
Marc Tiehuis
899e36489d Fix endian swap parameters 2018-01-11 19:50:08 +13:00
Andrew Kelley
891c93c118 Merge pull request #681 from zig-lang/hw-math
Add hw sqrt for x86_64
2018-01-10 10:22:40 -05:00
Andrew Kelley
d4f791cf6c Merge pull request #680 from zig-lang/intrusiveLinkedList
Intrusive linked lists
2018-01-10 10:13:15 -05:00
Marc Tiehuis
24cd99160c Add hw sqrt for x86_64 2018-01-10 19:53:36 +13:00
Andrea Orru
19343db593 Intrusive linked lists 2018-01-10 00:33:07 -05:00
Andrew Kelley
d1d3dbc7b5 Merge branch 'master' into llvm6 2018-01-09 09:56:24 -05:00
Andrew Kelley
3c094116aa remove %% prefix operator
See #632
closes #545
closes #510

this makes #651 higher priority
2018-01-09 00:51:51 -05:00
Andrea Orru
98a95cc698 exit, createThread for zen 2018-01-08 12:16:23 -05:00
Andrew Kelley
5a8d87f504 Merge branch 'master' into llvm6 2018-01-08 10:34:45 -05:00
Andrew Kelley
598170756c a catch unreachable generates unwrap-error code
See #545
See #510
See #632
2018-01-07 18:13:54 -05:00
Andrew Kelley
632d143bff replace a %% b with a catch b
See #632

better fits the convention of using keywords for control flow
2018-01-07 17:28:20 -05:00
Andrew Kelley
66717db735 replace %return with try
See #632

better fits the convention of using keywords for control flow
2018-01-07 16:53:13 -05:00
Andrea Orru
de1f57926f Merge branch 'master' of github.com:zig-lang/zig 2018-01-07 04:43:15 -05:00
Andrea Orru
3182857224 Adding zen support 2018-01-07 04:43:08 -05:00
Andrew Kelley
32ba0dcea9 update hello world docs 2018-01-07 01:59:23 -05:00
Andrew Kelley
e7c04b6df2 add a test for returning a type that closes over a local const
closes #552
2018-01-07 00:50:43 -05:00
Andrew Kelley
bb39e503c0 fix struct inside function referencing local const
closes #672

the crash and compile errors are fixed but structs
inside functions still get named after the functions
they're in. this will be fixed later.
2018-01-07 00:28:37 -05:00
Andrea Orru
ad438cfd40 Merge branch 'master' of github.com:zig-lang/zig 2018-01-06 23:13:51 -05:00
Andrea Orru
e932919e68 Darwin -> MacOSX, added Zen. See #438 2018-01-06 23:10:53 -05:00
Andrew Kelley
a9d2a7f002 Merge pull request #674 from Hejsil/readInt-calling-fix
Fixed calls to mem.readInt
2018-01-06 19:45:08 -05:00
Jimmi Holst Christensen
e91136d61f Fixed the call to mem.readInt in endian.swap 2018-01-07 00:24:35 +01:00
Jimmi Holst Christensen
6f85c860c6 Fixed the call to mem.readInt in Rand.scalar 2018-01-07 00:24:17 +01:00
Andrew Kelley
38658a597b Merge branch 'master' into llvm6 2018-01-06 02:59:17 -05:00
Andrew Kelley
dde7cc52d2 fix exp1m implementation
in the llvm6 branch with assertions on, it failed the test
this fixes it
2018-01-06 02:58:45 -05:00
Andrew Kelley
17e68c4a11 disable NewGVN
closes #673
2018-01-06 00:15:37 -05:00
Andrew Kelley
2200c2de6f translate-c: update to clang 6.0.0 which has more binary operators 2018-01-05 13:53:04 -05:00
Andrew Kelley
5d9a8cbe1a Merge remote-tracking branch 'origin/master' into llvm6 2018-01-05 13:46:21 -05:00
Andrew Kelley
e08a4ea62d Merge branch 'appveyor' 2018-01-05 12:16:16 -05:00
Andrew Kelley
2c35e24bd9 workaround for microsoft releasing windows SDK with wrong version 2018-01-05 11:35:46 -05:00
Andrew Kelley
79d50d9933 appveyor: enable verbose link for self hosted compiler 2018-01-04 23:43:46 -05:00
Andrew Kelley
f377b1e886 Revert "appveyor ci: look for newer windows sdk version"
This reverts commit 31d632b72e.

according to
https://developer.microsoft.com/en-us/windows/downloads/sdk-archive
10240 is actually 26624 and there was some kind of versioning issue.
2018-01-04 23:37:21 -05:00
Andrew Kelley
7f0b12a481 appveyor: skip building self hosted compiler for now 2018-01-04 23:30:03 -05:00
Andrew Kelley
25ad0b47e2 appveyor: try using vcvarsall 2018-01-04 23:11:27 -05:00
Andrew Kelley
d1ef17e3cd appveyor: set VCINSTALLDIR 2018-01-04 22:59:39 -05:00
Andrew Kelley
1b120d1e49 update windows build to llvm 5.0.1
llvm-config.exe does not handle diaguids.lib for us so we have to
duplicate the work.
2018-01-04 22:46:26 -05:00
Andrew Kelley
21a552682e Revert "try using appveyor's llvm copy"
This reverts commit 35dc987dc8.
2018-01-04 19:06:48 -05:00
Andrew Kelley
35dc987dc8 try using appveyor's llvm copy 2018-01-04 18:54:46 -05:00
Andrew Kelley
31d632b72e appveyor ci: look for newer windows sdk version 2018-01-04 18:34:42 -05:00
Andrew Kelley
7e65fe7ac3 fix test regressions on windows from previous commit 2018-01-04 16:36:59 -05:00
Andrew Kelley
d008e209e7 self-hosted compiler works on macos 2018-01-04 15:30:22 -05:00
Andrew Kelley
e1c03d9e8e self-hosted compiler works on windows
* better error message for realpath failing
 * fix bug in std.io.readFileAllocExtra incorrectly returning
   error.EndOfStream
 * implement std.os.selfExePath and std.os.selfExeDirPath for windows
2018-01-04 13:48:45 -05:00
Andrew Kelley
0cd63b28f3 fix self-hosted build on windows 2018-01-03 22:38:13 -05:00
Andrew Kelley
477e3f64fc self-hosted build: use llvm-config from stage1 2018-01-03 21:32:50 -05:00
Andrew Kelley
5c8600d790 add december in review to reading material; fix docs 2018-01-03 21:11:58 -05:00
Andrew Kelley
8eae4a0967 Merge branch 'master' into llvm6 2018-01-03 20:53:53 -05:00
Andrew Kelley
5a800db48c build: std files and c header files are only specified once
In the CMakeLists.txt file. And then we communicate the list
to the zig build.
2018-01-03 19:39:04 -05:00
Andrew Kelley
a45db7e853 add building the self hosted compiler to the main test suite 2018-01-03 18:25:17 -05:00
Andrew Kelley
5b156031e9 enum tag values are expressions so no parentheses needed 2018-01-03 16:05:37 -05:00
Andrew Kelley
5c988cc722 readme: update macos installation instructions 2018-01-03 14:16:20 -05:00
Andrew Kelley
36ff26609b fix self hosted compiler on windows 2018-01-03 04:55:49 -05:00
Andrew Kelley
6281a511e1 add noInlineCall to docs 2018-01-03 03:27:48 -05:00
Andrew Kelley
c741d3f4b2 add test for while respecting implicit comptime 2018-01-03 03:15:06 -05:00
Andrew Kelley
d9d61ed563 doc fixes 2018-01-03 02:51:45 -05:00
Andrew Kelley
1d77f8db28 Merge branch 'master' into llvm6 2018-01-03 00:42:00 -05:00
Andrew Kelley
0ea50b3157 ir: new pass iteration strategy
Before:
 * IR basic blocks are in arbitrary order
 * when doing an IR pass, when a block is encountered, code
   must look at all the instructions in the old basic block,
   determine what blocks are referenced, and queue up those
   old basic blocks first.
 * This had a bug (See #667)

Now:
 * IR basic blocks are required to be in an order that guarantees
   they will be referenced by a branch, before any instructions
   within are referenced.
   ir pass1 is updated to meet this constraint.
 * When doing an IR pass, we iterate over old basic blocks
   in the order they appear. Blocks which have not been
   referenced are discarded.
 * After the pass is complete, we must iterate again to look
   for old basic blocks which now point to incomplete new
   basic blocks, due to comptime code generation.
 * This last part can probably be optimized - most of the time
   we don't need to iterate over the basic block again.

closes #667
2018-01-02 21:08:12 -05:00
Andrew Kelley
aafb832288 Merge pull request #668 from sparrisable/master
Added format for floating point numbers. {.x} where x is the number of decimals.
2017-12-30 23:21:02 -05:00
Peter Rönnquist
d15b02a6b6 Added format for floating point numbers. {.x} where x is the number of decimals. 2017-12-31 00:27:58 +01:00
Josh Wolfe
4e3d7fc4bc fix self-hosted parser test 2017-12-26 23:29:15 -07:00
Josh Wolfe
192a039173 move utf8 parsing to std
source files no longer need to end with a newline
2017-12-26 23:17:33 -07:00
Andrew Kelley
6bfaf262d5 Merge branch 'master' into llvm6 2017-12-26 21:44:08 -05:00
Josh Wolfe
08dd1b553b set compile flags for zip_cpp 2017-12-26 18:05:43 -07:00
Andrew Kelley
6fece14cfb self-hosted: build against zig_llvm and embedded LLD
Now the self-hosted compiler re-uses the same C++ code for interfacing
with LLVM as the C++ code.
It also links against the same LLD library files.
2017-12-26 19:44:08 -05:00
Andrew Kelley
2a25398c86 fix segfault when passing union enum with sub byte...
...field to const slice parameter

we use a packed struct internally to represent a const array
of disparate union values, and needed to update the internal
getelementptr instruction to recognize that.

closes #664
2017-12-24 04:11:58 -05:00
Andrew Kelley
86397a532e docs: fix typo 2017-12-24 02:52:30 -05:00
Josh Wolfe
f0a1753607 add source encoding rules to the docs. see #663 2017-12-23 22:23:06 -07:00
Josh Wolfe
d6a74ed463 [self-hosted] source must be valid utf8. see #663 2017-12-23 21:47:13 -07:00
Josh Wolfe
fb96c3e73e debug needs to export FailingAllocator 2017-12-23 21:47:13 -07:00
Andrew Kelley
4183c6f1a5 move std/debug.zig to a subdirectory
self hosted compiler parser tests do some fuzz testing
2017-12-23 22:15:48 -05:00
Andrew Kelley
9dae796fe3 translate-c: set up debug scope for translated functions 2017-12-23 22:14:35 -05:00
Andrew Kelley
79c2ceb2d5 build: findLLVM correctly handles system libraries 2017-12-23 22:14:35 -05:00
Andrew Kelley
e0a1466bd8 build: add --search-prefix option 2017-12-23 22:14:35 -05:00
Andrew Kelley
2031989d98 std.os.path.resolve handles an absolute path that is missing the drive 2017-12-23 22:14:35 -05:00
Andrew Kelley
8b716f941d Merge branch 'master' into llvm6 2017-12-23 21:21:32 -05:00
Andrew Kelley
87ba004d46 translate-c: set up debug scope for translated functions 2017-12-23 21:20:38 -05:00
Andrew Kelley
c8302a5a0e build: findLLVM correctly handles system libraries 2017-12-23 21:19:48 -05:00
Josh Wolfe
0082989f22 [self-hosted] tokenizer error for ascii control codes 2017-12-23 18:35:45 -07:00
Andrew Kelley
3cbc244e98 build: add --search-prefix option 2017-12-23 20:21:57 -05:00
Andrew Kelley
74a12d818d std.os.path.resolve handles an absolute path that is missing the drive 2017-12-23 19:50:01 -05:00
Josh Wolfe
45ab752f9a source files must end with newline 2017-12-23 17:47:48 -07:00
Andrew Kelley
fe66046283 Merge remote-tracking branch 'origin/master' into llvm6 2017-12-23 12:00:25 -05:00
Andrew Kelley
39c7bd24e4 port most of main.cpp to self hosted compiler 2017-12-23 00:57:56 -05:00
Andrew Kelley
760b307e8a fix endianness of sub-byte integer fields in packed structs
closes #307
2017-12-22 18:27:33 -05:00
Andrew Kelley
e44a11341d std.math: remove unnecessary inline calls and
workaround windows 32 bit test failure
See #537
2017-12-22 13:14:07 -05:00
Josh Wolfe
0e7fb69bea bufPrint returns an error 2017-12-22 00:52:01 -07:00
Andrew Kelley
ea805c5fe7 fix darwin and windows from previous commit 2017-12-22 02:33:39 -05:00
Andrew Kelley
d917815d81 explicitly return from blocks
instead of last statement being expression value

closes #629
2017-12-22 00:50:30 -05:00
Andrew Kelley
8bc523219c add labeled loops, labeled break, labeled continue. remove goto
closes #346
closes #630

regression: translate-c can no longer translate switch statements.
after #629 we can ressurect and modify the code to utilize arbitrarily
returning from blocks.
2017-12-20 23:00:19 -05:00
Andrew Kelley
d686113bd2 fix crash when implicitly casting array of len 0 to slice
closes #660
2017-12-19 22:38:02 -05:00
Andrew Kelley
1cc450e6e7 fix assert when wrapping zero bit type in nullable
closes #659
2017-12-19 18:21:42 -05:00
Andrew Kelley
1435604b84 add sort.min and sort.max functions to stdlib 2017-12-19 17:35:38 -05:00
Andrew Kelley
2a8160e80f Merge branch 'export-rewrite'
introduces the `@export` builtin function which can be used
in a comptime block to conditionally export a function.

it also allows creation of aliases.

previous export syntax is still allowed.

closes #462
closes #420
2017-12-19 02:44:14 -05:00
Andrew Kelley
9d9201c3b4 bring back code that uses export and fix tests
partial revert of 1fdebc1dc4
2017-12-19 02:39:43 -05:00
Andrew Kelley
27ba4f0baf export keyword works again 2017-12-19 01:49:42 -05:00
Andrew Kelley
c627f9ea18 wip bring back export keyword 2017-12-19 01:19:49 -05:00
Andrew Kelley
1fdebc1dc4 wip export rewrite 2017-12-18 09:59:57 -05:00
Andrew Kelley
3f65887974 fix std.mem missing error.OutOfMemory decl
this will be fixed in a better way later by #632
2017-12-17 20:52:29 -05:00
Josh Wolfe
ab44939941 roughly parsing infix operators 2017-12-17 11:16:55 -07:00
Andrew Kelley
39e96d933e change mem.cmp to mem.lessThan and add test 2017-12-15 17:26:22 -05:00
Andrew Kelley
68f6332343 fix missing import from previous commit 2017-12-14 21:24:00 -05:00
Andrew Kelley
6bc0561d13 disable sort tests for 32-bit windows because of issue #537 2017-12-14 19:55:34 -05:00
Andrew Kelley
75ecfdf66d replace quicksort with blocksort
closes #657
2017-12-14 19:41:35 -05:00
Andrew Kelley
c9e01412a4 fix compiler crash in a nullable if after an if in...
...a switch prong of a switch with 2 prongs in an else

closes #656
2017-12-14 01:07:23 -05:00
Andrew Kelley
f55fdc00fc fix const and volatile qualifiers being dropped sometimes
in the expression `&const a.b`, the const (and/or volatile)
qualifiers would be incorrectly dropped.

closes #655
2017-12-13 21:53:52 -05:00
Andrew Kelley
84619abe9f add test for allowing slice[slice.len..slice.len] 2017-12-12 21:56:13 -05:00
Josh Wolfe
d295279b16 self-hosted: implement var decl align 2017-12-12 19:50:43 -07:00
Josh Wolfe
0003cc8105 self-hosted: implement addr of align parsing 2017-12-12 19:26:33 -07:00
Andrew Kelley
24c2703dfa self-hosted: look for llvm-config in homebrew 2017-12-12 17:25:57 -05:00
Andrew Kelley
cdaa735b2b self-hosted: build tries to find llvm-config.exe 2017-12-12 16:40:04 -05:00
Andrew Kelley
2b9302107f self-hosted: cleanup build looking for llvm-config 2017-12-12 16:03:20 -05:00
Andrew Kelley
cd5fd653d7 self-hosted: move code to std.os.ChildProcess.exec 2017-12-12 14:35:53 -05:00
Andrew Kelley
caa6433b56 stack traces: support DW_AT_ranges
This makes some cases print stack traces where it previously failed.
2017-12-12 12:05:28 -05:00
Andrew Kelley
23058d8b43 self-hosted: link with LLVM 2017-12-11 23:34:59 -05:00
Andrew Kelley
ed4d94a5d5 self-hosted: test all out of memory conditions 2017-12-11 21:12:47 -05:00
Andrew Kelley
c4e7d05ce3 refactor debug.global_allocator into mem.FixedBufferAllocator 2017-12-11 17:27:31 -05:00
Andrew Kelley
d8d379faf1 self-hosted: refactor into multiple files
add return expression
add number literal
2017-12-11 16:18:06 -05:00
Andrew Kelley
a3a590a32a self-hosted: workaround for issue #537 2017-12-11 14:47:20 -05:00
Andrew Kelley
fd6a36a235 self-hosted: parsing and rendering blocks 2017-12-11 09:21:06 -05:00
Andrew Kelley
9a51091a5c self-hosted: clean up parser 2017-12-10 23:19:01 -05:00
Andrew Kelley
f951bcf01b self-hosted: parse variable declarations with types 2017-12-10 23:02:45 -05:00
Andrew Kelley
53d58684a6 self-hosted: parse var decls 2017-12-10 22:44:04 -05:00
Andrew Kelley
f210f17d30 add self-hosted parsing and rendering to main tests 2017-12-10 21:26:52 -05:00
Andrew Kelley
4b1d120f58 Merge remote-tracking branch 'origin/master' into self-hosted 2017-12-10 19:41:01 -05:00
Andrew Kelley
dc2e3465c7 rendering source code without recursion 2017-12-10 19:40:46 -05:00
Andrew Kelley
22dc713a2f mem.Allocator initializes bytes to undefined 2017-12-10 15:38:05 -05:00
Andrew Kelley
990db3c35a rename @EnumTagType to @TagType in type names 2017-12-10 15:03:57 -05:00
Andrew Kelley
62ead3a2ee parsing an extern fn declaration 2017-12-09 20:50:31 -05:00
Andrew Kelley
e9efa74333 partial parameter decl parsing 2017-12-09 20:01:13 -05:00
Andrew Kelley
f466e539ef tokenizing libc hello world 2017-12-08 23:56:07 -05:00
Andrew Kelley
d431b0fb99 parse a simple variable declaration 2017-12-08 23:15:43 -05:00
Andrew Kelley
5ead3244a2 Merge remote-tracking branch 'origin/master' into self-hosted 2017-12-08 23:15:07 -05:00
Andrew Kelley
756a218e27 add implicit cast from enum tag type of union to const ptr to the union
closes #654
2017-12-08 17:49:14 -05:00
Andrew Kelley
18cf256817 Merge branch 'master' into self-hosted 2017-12-08 16:39:00 -05:00
Andrew Kelley
3577a80bb6 translate-c: more complex logic for translating a C cast in a macro 2017-12-08 12:28:21 -05:00
Andrew Kelley
0dd3bbf6e8 Merge branch 'master' into self-hosted 2017-12-07 14:22:41 -05:00
Andrew Kelley
182cf5b8de translate-c: support macros with pointer casting 2017-12-07 12:27:29 -05:00
Andrew Kelley
dc502042d5 translate-c: refactor prefix and suffix op C macro parsing 2017-12-07 11:52:52 -05:00
Andrew Kelley
37fbf01755 awkward void union field syntax no longer needed 2017-12-06 21:41:38 -05:00
Andrew Kelley
18b8a625f5 upgrade to new args api 2017-12-06 18:22:52 -05:00
Andrew Kelley
7c91a055c1 Merge branch 'master' into self-hosted 2017-12-06 18:20:02 -05:00
Andrew Kelley
62c25af802 add higher level arg-parsing API + misc. changes
* add @noInlineCall - see #640
   This fixes a crash in --release-safe and --release-fast modes
   where the optimizer inlines everything into _start and
   clobbers the command line argument data.
   If we were able to verify that the user's code never reads
   command line args, we could leave off this "no inline"
   attribute.
 * add i29 and u29 primitive types. u29 is the type of alignment,
   so it makes sense to be a primitive.
   probably in the future we'll make any `i` or `u` followed by
   digits into a primitive.
 * add `aligned` functions to Allocator interface
 * add `os.argsAlloc` and `os.argsFree` so that you can get
   a `[]const []u8`, do whatever arg parsing you want, and then free
   it. For now this uses the other API under the hood, but it could
   be reimplemented to do a single allocation.
 * add tests to make sure command line argument parsing works.
2017-12-06 18:12:05 -05:00
Andrew Kelley
04612d25d7 Merge branch 'master' into self-hosted 2017-12-06 14:58:24 -05:00
Andrew Kelley
249cb2aa30 fix regressions from previous commit
c49ee9f632 broke the tests
and this fixes them
2017-12-05 22:39:36 -05:00
Andrew Kelley
f464fe14f4 switch on enum which only has 1 field is comptime known
closes #593
2017-12-05 22:26:17 -05:00
Andrew Kelley
bb6b4f8db2 fix enum with 1 member causing segfault
closes #647
2017-12-05 22:15:33 -05:00
Andrew Kelley
c49ee9f632 allow union and its tag type to peer resolve to the tag type 2017-12-05 21:33:24 -05:00
Andrew Kelley
2715f6fdb8 allow implicit cast from union to its enum tag type
closes #642
2017-12-05 21:10:47 -05:00
Andrew Kelley
b66fb7ceae revert to master branch ir.cpp, fixes issue better than this branch 2017-12-05 20:51:49 -05:00
Andrew Kelley
6018dbd339 Merge branch 'master' into self-hosted 2017-12-05 20:49:03 -05:00
Andrew Kelley
960914a073 add implicit cast from enum to union
when the enum is the tag type of the union and is comptime known
to be of a void field of the union

See #642
2017-12-05 20:46:58 -05:00
Andrew Kelley
63a2f9a8b2 fix casting integer literal to enum 2017-12-05 18:09:22 -05:00
Andrew Kelley
74cea89fce translate-c: fix not printing clang errors 2017-12-05 12:28:59 -05:00
Andrew Kelley
08d531143f parser skeleton 2017-12-05 00:20:23 -05:00
Andrew Kelley
3976981ab3 tokenizing hello world 2017-12-04 23:40:33 -05:00
Andrew Kelley
7297baa9c6 tokenizing basic operators 2017-12-04 23:29:39 -05:00
Andrew Kelley
07898cc0df tokenizing string literals 2017-12-04 23:25:59 -05:00
Andrew Kelley
798dbe487b simple tokenization 2017-12-04 23:09:03 -05:00
Andrew Kelley
31d9dc3539 read a file 2017-12-04 22:05:27 -05:00
Andrew Kelley
fe39ca01bc Merge remote-tracking branch 'origin/master' into llvm6 2017-12-04 17:45:21 -05:00
Andrew Kelley
5ebed1c9ee fix incorrect LLVM IR for union constant when active field is void
found in the llvm6 branch with llvm assertions on
2017-12-04 17:10:46 -05:00
Andrew Kelley
42004f9013 Merge branch 'master' into llvm6 2017-12-04 15:28:17 -05:00
Andrew Kelley
a966275e50 rename builtin.is_big_endian to builtin.endian
See #307
2017-12-04 10:36:31 -05:00
Andrew Kelley
67e6d9bc30 Merge pull request #644 from Dubhead/Dubhead-fix-message-color
Fix the color of compiler messages for light-themed terminal.
2017-12-04 09:15:17 -05:00
MIURA Masahiro
fea016afc0 Fix the color of compiler messages for light-themed terminal. 2017-12-04 19:22:34 +09:00
Andrew Kelley
76f3bdfff8 add test for casting union to tag type of union 2017-12-04 02:12:13 -05:00
Andrew Kelley
dd3437d5ba fix build on windows 2017-12-04 02:08:26 -05:00
Andrew Kelley
54138d9e82 add test for union with 1 void field being 0 bits 2017-12-04 02:05:33 -05:00
Andrew Kelley
084911d9b3 add test for @sizeOf on extern and packed unions 2017-12-04 02:04:08 -05:00
Andrew Kelley
942b250895 update docs regarding enums and unions 2017-12-04 01:43:06 -05:00
Andrew Kelley
05d9f07541 more tests for unions
See #618
2017-12-04 00:56:27 -05:00
Andrew Kelley
fce435db26 fix abi alignment of union-enums not counting tag type
add more tests for unions

See #618
2017-12-04 00:32:12 -05:00
Andrew Kelley
5a8367e892 rename @EnumTagType to @TagType. add tests for union-enums
See #618
2017-12-03 22:36:01 -05:00
Andrew Kelley
0ad1239522 rework enums and unions and their relationship to each other
* @enumTagName renamed to @tagName and it works on enums and
   union-enums
 * Remove the EnumTag type. Now there is only enum and union,
   and the tag type of a union is always an enum.
 * unions support specifying the tag enum type, and they support
   inferring an enum tag type.
 * Enums no longer support field types but they do support
   setting the tag values. Likewise union-enums when inferring
   an enum tag type support setting the tag values.
 * It is now an error for enums and unions to have 0 fields.
 * switch statements support union-enums

closes #618
2017-12-03 20:43:56 -05:00
Andrew Kelley
137c8f5e8a ability to set tag values of enums
also remove support for enums with 0 values

closes #305
2017-12-02 22:32:39 -05:00
Andrew Kelley
98237f7c0b casting between integer and enum only works via tag type
See #305
2017-12-02 17:12:37 -05:00
Josh Wolfe
54a0db0daf todo: fix #639 2017-12-01 19:54:01 -07:00
Josh Wolfe
67b8b00c44 implement insertion sort. something's broken 2017-12-01 16:11:39 -07:00
Andrew Kelley
921825b4c0 Merge branch 'llvm5.0.1' 2017-12-01 13:51:53 -05:00
Andrew Kelley
cf96b6f87b update to LLVM 5.0.1rc2 2017-12-01 13:44:28 -05:00
Andrew Kelley
bdd5241615 update c_headers to llvm 5.0.1rc2 2017-12-01 12:15:19 -05:00
Andrew Kelley
a206ef34bb LLD patch: Fix the ASM code generated for __stub_helpers section
This applies 93ca847862af07632197dcf2d8a68b9b27a26d7a
from the llvm-project git monorepo to the embedded LLD.
2017-12-01 12:11:55 -05:00
Andrew Kelley
ddca67a2b9 LLD patch: workaround for buggy MACH-O code
This reapplies 1a1414fc42
to the embedded LLD.
2017-12-01 12:09:55 -05:00
Andrew Kelley
fa45407e78 LLD patch: Fix for LLD on linker scripts with empty sections
This reapplies 569cf286ff
to the embedded LLD.
2017-12-01 12:08:16 -05:00
Andrew Kelley
9ea23272fa LLD patch: COFF: better behavior when using as a library
This applies de776439b61fb71c1256ad86238799c758c66048
from the LLVM git monorepo to the embedded LLD.
2017-12-01 12:06:33 -05:00
Andrew Kelley
77b530b50a updated embedded LLD to 5.0.1rc2 2017-12-01 11:59:14 -05:00
Andrew Kelley
b4120423a5 translate-c: only emit enum tag type if not c_int or c_uint 2017-12-01 00:37:15 -05:00
Andrew Kelley
264c86853b packed structs can have enums with explicit tag types
See #305
2017-12-01 00:34:29 -05:00
Andrew Kelley
b62e2fd870 ability to specify tag type of enums
see #305
2017-11-30 22:08:11 -05:00
Josh Wolfe
5786df933d add mem.readIntLE and readIntBE 2017-11-30 11:20:50 -07:00
Andrew Kelley
210d0017c4 fix build broken by previous commit
now we report a compile error for unusual failures from translate-c
2017-11-29 23:09:35 -05:00
Andrew Kelley
7729f6cf4e translate-c: support static incomplete array inside function 2017-11-29 21:50:38 -05:00
Andrew Kelley
716b0b8655 fix capturing value of switch with all unreachable prongs
closes #635
2017-11-29 21:34:17 -05:00
Andrew Kelley
ccea8dcbf6 better error code for File.getEndPos failure 2017-11-29 21:34:17 -05:00
Josh Wolfe
88a7f203f9 add Buffer.appendFormat() 2017-11-29 19:31:09 -07:00
Josh Wolfe
418b0967fc fix os.Dir compile errors 2017-11-29 17:52:58 -07:00
Andrew Kelley
afe3aae582 Merge remote-tracking branch 'origin/llvm6' into llvm6 2017-11-29 19:12:55 -05:00
Andrew Kelley
d4cd4a35d5 update fast math llvm API to latest 2017-11-29 19:11:34 -05:00
Andrew Kelley
91ef68f9b1 Merge remote-tracking branch 'origin/master' into llvm6 2017-11-29 16:34:50 -05:00
Andrew Kelley
7066283004 translate-c: support const ptr initializer 2017-11-28 23:44:45 -05:00
Andrew Kelley
26096e79d1 translate-c: fix clobbering primitive types 2017-11-28 03:17:28 -05:00
Andrew Kelley
8d5c4a67a7 Merge branch 'dimenus-c-field-expr' 2017-11-28 03:00:13 -05:00
Andrew Kelley
e745544dac translate-c: detect macros referencing field lookup
as fn calls which assert the fn ptr is non-null
2017-11-28 02:58:51 -05:00
Andrew Kelley
f537c51f25 Merge branch 'c-field-expr' of https://github.com/dimenus/zig into dimenus-c-field-expr 2017-11-28 00:44:16 -05:00
Andrew Kelley
1ab84a27d3 translate-c: fix sometimes getting (no file) warnings
Thanks to Mason Remaley for testing the fix.
2017-11-28 00:32:32 -05:00
Mason Remaley
3e8fd24547 Implements translation for the prefix not operator (#628) 2017-11-27 21:00:05 -05:00
Ryan Saunderson
57049b95b3 Resolving merge w/ upstream master 2017-11-27 11:42:48 -06:00
dimenus
04472f57be Added support for exporting of C field expressions 2017-11-27 11:23:14 -06:00
Andrew Kelley
671183fa9a translate-c: support pointer casting
also avoid some unnecessary casts
2017-11-26 20:05:55 -05:00
Andrew Kelley
93fac5f257 translate-c: support variable name shadowing 2017-11-26 17:30:43 -05:00
Andrew Kelley
9a8545d590 translate-c: fix translation when no default switch case 2017-11-26 16:03:56 -05:00
Andrew Kelley
aa2ca3f02c translate-c: better way to translate switch
previously `continue` would be handled incorrectly
2017-11-26 15:58:49 -05:00
Andrew Kelley
1b0e90f70b translate-c supports switch statements 2017-11-26 00:58:11 -05:00
Andrew Kelley
687e359291 translate-c: avoid global state and introduce var decl scopes
in preparation to implement switch and solve variable name collisions
2017-11-25 22:17:24 -05:00
Andrew Kelley
df0e875856 translate-c: introduce the concept of scopes
in preparation to implement switch and solve variable name collisions
2017-11-25 20:34:05 -05:00
Andrew Kelley
a2afcae9ff fix crash when constant inside comptime function has compile error
closes #625
2017-11-25 18:16:33 -05:00
Andrew Kelley
48ebb65cc7 add an assert to catch corrupted memory 2017-11-25 16:34:08 -05:00
Andrew Kelley
b390929826 translate-c supports break and continue 2017-11-25 11:56:17 -05:00
Andrew Kelley
bf20b260ce translate-c supports for loops 2017-11-25 00:57:48 -05:00
Andrew Kelley
18eb3c5f90 translate-c supports returning void 2017-11-25 00:25:47 -05:00
Andrew Kelley
cd36baf530 fix assertion failed when invalid type encountered 2017-11-24 22:04:24 -05:00
Andrew Kelley
40480c7cdc translate-c supports string literals 2017-11-24 19:26:05 -05:00
Andrew Kelley
68312afcdf translate-c: support pre increment and decrement operators 2017-11-24 16:36:39 -05:00
Andrew Kelley
741504862c update homepage docs 2017-11-24 15:06:12 -05:00
Andrew Kelley
5a25505668 rename "parsec" to "translate-c" 2017-11-24 14:56:05 -05:00
Josh Wolfe
afbbdb2c67 move base64 functions into structs 2017-11-20 23:26:45 -07:00
Josh Wolfe
a44283b0b2 rework std.base64 api
* rename decode to decodeExactUnsafe.
* add decodeExact, which checks for invalid chars and padding.
* add decodeWithIgnore, which also allows ignoring chars.
* alphabets are supplied to the decoders with their
  char-to-index mapping already built, which enables it to be
  done at comptime.
* all decode/encode apis except decodeWithIgnore require dest
  to be the exactly correct length. This is calculated by a
  calc function corresponding to each api. These apis no longer
  return the dest parameter.
* for decodeWithIgnore, an exact size cannot be known a priori.
  Instead, a calc function gives an upperbound, and a runtime
  error is returned in case of overflow. decodeWithIgnore
  returns the number of bytes written to dest.

closes #611
2017-11-20 23:26:45 -07:00
Andrew Kelley
339d48ac15 parse-c: support address of operator 2017-11-17 12:11:03 -05:00
Andrew Kelley
3e835973db Merge pull request #617 from dimenus/dll-load
Added DLL loading capability in windows to the std lib.
2017-11-17 10:24:34 -05:00
Andrew Kelley
b50c676f76 add parse-c support for unions 2017-11-16 23:54:33 -05:00
dimenus
a7d07d412c Added DLL loading capability in windows to the std lib. 2017-11-16 21:49:05 -06:00
Andrew Kelley
d108689382 Merge branch 'unions'
closes #144
2017-11-16 22:14:50 -05:00
Andrew Kelley
1473eb9ae0 add documentation placeholders for unions 2017-11-16 22:13:20 -05:00
Andrew Kelley
5d2ba056c8 fix codegen for union init with runtime value
see #144
2017-11-16 22:06:08 -05:00
Andrew Kelley
e26ccd5166 debug safety for unions 2017-11-16 21:15:15 -05:00
Andrew Kelley
f12d36641f union secret field is the tag index instead of distinct type index
See #144
2017-11-16 10:06:58 -05:00
Andrew Kelley
018cbff438 unions have a secret field for the type
See #144
2017-11-15 22:52:47 -05:00
Andrew Kelley
3740bfa3bf update fast math flags for latest llvm 2017-11-15 22:32:57 -05:00
Andrew Kelley
a984040fae Merge remote-tracking branch 'origin/master' into llvm6 2017-11-15 22:32:23 -05:00
Andrew Kelley
9a4da6c8d8 Merge branch 'master' into llvm6 2017-11-15 22:24:42 -05:00
Andrew Kelley
f276fd0f37 basic union support
See #144
2017-11-15 13:04:18 -05:00
Andrew Kelley
7a74dbadd7 add docs for std.base64 2017-11-14 17:58:58 -05:00
Ryan Saunderson
371e578151 Merge remote-tracking branch 'upstream/master' into llvm6 2017-11-14 07:00:27 -06:00
Andrew Kelley
5029322aa1 c-to-zig: handle UO_Deref 2017-11-14 02:10:13 -05:00
Josh Wolfe
6ffaf4c2e2 parsec supports do loop 2017-11-13 22:56:20 -07:00
Josh Wolfe
012ce1481e parsec supports post increment/decrement with used result 2017-11-13 22:19:51 -07:00
Josh Wolfe
4c2cdf6f4d parsec supports more compound assign operators 2017-11-13 21:37:30 -07:00
Josh Wolfe
c1fde0e8c4 parsec supports bitshift operators 2017-11-13 20:49:53 -07:00
Andrew Kelley
6356724057 Merge branch 'dimenus-parsec' 2017-11-13 22:33:58 -05:00
Andrew Kelley
03732860be add test case for previous commit 2017-11-13 22:33:41 -05:00
Andrew Kelley
df07361642 Merge branch 'parsec' of https://github.com/dimenus/zig into dimenus-parsec 2017-11-13 22:26:31 -05:00
Josh Wolfe
57cd074959 parsec supports C comma operator 2017-11-13 19:59:32 -07:00
Josh Wolfe
1f28fcdec5 parsec supports C NULL to pointer implicit cast 2017-11-13 19:39:46 -07:00
dimenus
b3b4786c24 Fixed duplicate decl detection for typedefs/enums 2017-11-13 20:10:36 -06:00
dimenus
98e3c7911c Fixed duplicate decl detection for typedefs/enums 2017-11-13 16:37:46 -06:00
Andrew Kelley
a890380b6a fix windows trying to run linux-only tests 2017-11-10 18:29:49 -05:00
Andrew Kelley
ca87f55a7b Merge branch 'bscheinman-linux_timer' 2017-11-10 18:25:32 -05:00
Andrew Kelley
5ae53dacfb rename test 2017-11-10 18:24:52 -05:00
Andrew Kelley
5895204c99 Merge branch 'linux_timer' of https://github.com/bscheinman/zig into bscheinman-linux_timer 2017-11-10 18:18:03 -05:00
Brendon Scheinman
87407b54b6 add epoll and timerfd support on linux 2017-11-10 15:12:46 -08:00
Andrew Kelley
1403748fd8 disable broken 32 bit windows test
See #537
2017-11-10 17:08:11 -05:00
Andrew Kelley
df89291d1c Merge remote-tracking branch 'origin/master' into llvm6 2017-11-10 16:45:01 -05:00
Andrew Kelley
019f18058b fix test failures
put all the codegen for fn prototypes to the same place
2017-11-10 16:32:37 -05:00
Andrew Kelley
403a46abcc fix test failure on 32 bit windows 2017-11-10 16:03:14 -05:00
Andrew Kelley
6bf1547148 Merge branch 'darwin-stat'
closes #606
2017-11-10 15:01:09 -05:00
Andrew Kelley
029d37d6a7 fix bug when multiple function definitions exist
This might be related to #529
2017-11-10 14:58:50 -05:00
Andrew Kelley
20c2dbdbd3 add windows implementation of io.File.getEndPos 2017-11-10 14:36:03 -05:00
Andrew Kelley
1ac46fac15 add a std lib test for reading and writing files
* fix fstat wrong on darwin
 * move std.debug.global_allocator to std.debug.global_allocator_state and make it private
 * add std.debug.global_allocator as a pointer (to upgrade your zig code remove
   the '&')
2017-11-10 14:17:23 -05:00
dimenus
e9d7623e1f Merge remote-tracking branch 'origin/master' into llvm6 2017-11-10 09:49:45 -06:00
Jeff Fowler
336d81894d Fix Stat include in darwin land (#605) 2017-11-09 13:46:53 -05:00
Jeff Fowler
52521d5f67 fix typo on darwin lseek (#602) 2017-11-09 11:35:35 -05:00
Andrew Kelley
7ea669e04c fix parameter of extern var args not type checked
closes #601
2017-11-09 11:30:39 -05:00
Andrew Kelley
4f8c26d2c6 fix enum sizes too large
closes #598
2017-11-08 21:44:10 -05:00
Andrew Kelley
53b18c8542 fix travis linux script 2017-11-07 09:06:29 -05:00
Andrew Kelley
4543413491 std.io: introduce buffered I/O and change API
I started working on #465 and made some corresponding std.io
API changes.

New structs:
 * std.io.FileInStream
 * std.io.FileOutStream
 * std.io.BufferedOutStream
 * std.io.BufferedInStream

Removed:
 * std.io.File.in_stream
 * std.io.File.out_stream

Now instead of &file.out_stream or &file.in_stream to get access to
the stream API for a file, you get it like this:

var file_in_stream = io.FileInStream.init(&file);
const in_stream = &file_in_stream.stream;

var file_out_stream = io.FileOutStream.init(&file);
const out_stream = &file_out_stream.stream;

This is evidence that we might not need any OOP features -
See #130.
2017-11-07 03:22:27 -05:00
Andrew Kelley
3a600297ca Merge remote-tracking branch 'origin/master' into llvm6 2017-11-06 22:41:12 -05:00
Andrew Kelley
634e8713c3 add @memberType and @memberName builtin functions
see #383

there is a plan to unify most of the reflection into 2
builtin functions, as outlined in the above issue,
but this gives us needed features for now, and we can
iterate on the design in future commits
2017-11-06 22:07:19 -05:00
scurest
f0dafd3f20 fix typos in std.io (#589)
Fixes a bug that prevented InStream.realAllAlloc from compiling.
2017-11-06 11:40:58 -05:00
Andrew Kelley
52a2992862 Merge pull request #587 from scurest/c_alloc_redeclaration_of_mem
Fix #585
2017-11-05 19:38:50 -05:00
scurest
48c8181886 fix redeclaration of mem (#585) 2017-11-05 15:46:54 -06:00
scurest
bd6f8d99c5 add test for c_allocator 2017-11-05 15:46:10 -06:00
Andrew Kelley
4cc9fe90a8 fix build on MacOS 2017-11-04 16:40:55 -04:00
Andrew Kelley
f0d755153d add compile-time reflection for function arg types
See #383
2017-11-04 16:20:02 -04:00
Andrew Kelley
4a6df04f75 slightly more verbose error message when building object file fails 2017-11-03 20:07:32 -04:00
Andrew Kelley
75afe73c66 Merge pull request #581 from Dimenus/line_endings
Add support for windows line endings with c macros within a c_import.
2017-11-03 18:40:38 -04:00
Andrew Kelley
d4c1ed95ac Merge pull request #583 from Dimenus/libc_runtime
Win32 libc runtime fixes.
2017-11-03 18:32:03 -04:00
dimenus
1890760206 Windows libc & static libc are located in the same dir which is already covered by msvc_lib_dir 2017-11-03 17:09:35 -05:00
dimenus
1ef6cb1b64 Add support for windows line endings with c macros 2017-11-03 16:29:49 -05:00
Marc Tiehuis
795703a39c Add emit command-line option (#580)
Add emit command-line option
2017-11-03 09:09:33 -04:00
Andrew Kelley
a31b23c46b more compile-time type reflection
See #383
2017-11-03 00:00:57 -04:00
Andrew Kelley
dc8b011d61 fix incorrect debug info for empty structs
closes #579

now all tests pass for llvm master branch
2017-11-02 21:57:55 -04:00
Andrew Kelley
4a82c2d124 fix incorrect debug info for empty structs
now all tests pass for llvm master branch
2017-11-02 21:54:24 -04:00
Andrew Kelley
188fd47a51 add missing environment 2017-11-02 21:54:24 -04:00
Andrew Kelley
9a99bd3a71 use llvm named structs for const values when possible
normally we want to use llvm types for constants. but
union constants (which are found inside enums) when
they are initialized with the non-most-aligned-member
must be unnamed structs.

these bubble up to all aggregate types. if a constant of
an aggregate type contains, recursively, a union constant
with a non-most-aligned-member initialized, the aggregate
typed constant must be unnamed too.

this fixes all the asserts that were coming in from
llvm master branch.
2017-11-02 21:54:24 -04:00
Andrew Kelley
94ec2190f8 update to llvm master 2017-11-02 21:54:24 -04:00
Andrew Kelley
abff1b6884 windows: use the same libc search within a compilation unit 2017-11-01 23:08:34 -04:00
Andrew Kelley
f7837f445e bump build_runner allocator to use 30 MB 2017-11-01 16:46:10 -04:00
Dimenus
38f05d4ac5 WIN32: Linking with the CRT at runtime. (#570)
Disclaimer: Forgive me if my format sucks, I've never submitted a PR before!

Fixes: #517 

I added a few things to allow zig to link with the CRT properly both statically and dynamically. In Visual Studio 2017, Microsoft changed how the c-runtime is factored again. With this change, they also added a COM interface to allow you to query the respective Visual Studio instance for two of them. This does that and also falls back on a registry query for 2015 support. If you're using a Visual Studio instance older than 2015, you'll have to use the existing options available with the zig compiler. Changes are listed below along with a general description of the changes.

all_types.cpp:

The separate variables for msvc/kern32 have been removed and all win32 libc directory paths have been combined into a ZigList since we're querying more than two directories and differentiating one from another doesn't matter to lld.

analyze.cpp:

The existing functions were extended to support querying libc libs & libc headers at runtime.

codegen.cpp/hpp:

Microsoft uses the new 'Universal C Runtime' name now. Doesn't matter from a functionality standpoint. I left the compiler switches as is to not introduce any breaking changes.

link.cpp:

We're linking 4 libs and generating another in order to support the UCRT.
Dynamic: msvcrt/d, vcruntime/d, ucrt/d, legacy_stdio_definitions.lib
Static: libcmt/d, libvcruntime/d libucrt/d, legacy_stdio_definitions.lib

main.cpp:

Update function call names.

os.cpp/hpp:

COM/Registry interface for querying Windows UCRT/SDK.

Sources:
[Windows CRT](https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features)
[VS 2015 Breaking Changes](https://msdn.microsoft.com/en-us/library/bb531344.aspx)
2017-11-01 15:33:14 -04:00
Andreas Haferburg
b35689b70d Enforce "\n" line endings on Windows (#574)
With Windows line endings, which seems to be the default on Windows, the
zig compiler won't understand std out of the box. This project should
not rely on git's global core.autocrlf setting.
2017-11-01 10:31:32 -04:00
Andrew Kelley
25972be45c fix windows build from previous commit 2017-10-31 22:24:02 -04:00
Andrew Kelley
9e234d4208 breaking change to std.io API
* Merge io.InStream and io.OutStream into io.File
 * Introduce io.OutStream and io.InStream interfaces
   - io.File implements both of these
 * Move mem.IncrementingAllocator to heap.IncrementingAllocator

Instead of:

```
%return std.io.stderr.printf("hello\n");
```

now do:

```
std.debug.warn("hello\n");
```

To print to stdout, see `io.getStdOut()`.

 * Rename std.ArrayList.resizeDown to std.ArrayList.shrink.
2017-10-31 04:47:55 -04:00
Andrew Kelley
7a96aca39e Merge branch 'master' into self-hosted 2017-10-27 12:54:46 -04:00
Andrew Kelley
1a414c7b6b delete -municode command line argument
The solution to this is to always have it on and only
use the 'W' versions of respective windows APIs.

See the issue for this.
2017-10-27 01:29:58 -04:00
Andrew Kelley
540bac0928 Merge branch 'master' into self-hosted 2017-10-27 01:28:08 -04:00
Andrew Kelley
4c306af4eb add test case for previous commit 2017-10-27 01:22:48 -04:00
Andrew Kelley
f1072d0d9f use llvm named structs for const values when possible
normally we want to use llvm types for constants. but
union constants (which are found inside enums) when
they are initialized with the non-most-aligned-member
must be unnamed structs.

these bubble up to all aggregate types. if a constant of
an aggregate type contains, recursively, a union constant
with a non-most-aligned-member initialized, the aggregate
typed constant must be unnamed too.

this fixes some of the asserts that were coming in from
llvm master branch.
2017-10-27 00:14:56 -04:00
Marc Tiehuis
6663638195 Improve invalid character error messages (#566)
See #544
2017-10-26 10:00:23 -04:00
Andrew Kelley
f4ca3482f1 add guard to c_headers for duplicate va_list on darwin 2017-10-26 01:11:57 -04:00
Andrew Kelley
c7053bea20 better output when @cImport generates invalid zig 2017-10-26 00:32:30 -04:00
Andrew Kelley
300c83d893 fix crash on field access of opaque type 2017-10-25 23:18:18 -04:00
Andrew Kelley
5f28a9d238 cleaner verbose flags and zig build prints failed command 2017-10-25 23:10:41 -04:00
Andrew Kelley
6764a45223 Merge branch 'better-float-printing' 2017-10-24 21:58:09 -04:00
Andrew Kelley
73fe5f63c6 add some sanity tests for float printing 2017-10-24 21:57:58 -04:00
Andrew Kelley
1e784839f1 Merge branch 'float-printing' of https://github.com/scurest/zig into better-float-printing 2017-10-24 21:44:49 -04:00
Andrew Kelley
1828f8eb8e fix missing compiler_rt in release modes
the optimizer was deleting compiler_rt symbols, so I changed
the linkage type from LinkOnce to Weak

also changed LinkOnce to mean linkonce_odr in llvm and
Weak to mean weak_odr in llvm.

See #563
2017-10-24 21:31:47 -04:00
scurest
262b7428cf More corrections to float printing
Testing suggests all f32s are now printed accurately.
2017-10-24 14:18:50 -05:00
Andrew Kelley
4f4da3c10c wip self hosted code 2017-10-24 10:08:20 -04:00
Andrew Kelley
d7e28f991d remove CXX ABI workaround
the actual solution is you must compile zig with the same
compiler that compiled llvm, lld, and clang.

reverts 8d60ffe314
2017-10-23 22:37:59 -04:00
Andrew Kelley
643ab90ace add maximum value for @setAlignStack 2017-10-23 22:33:00 -04:00
scurest
03a0dfbeca Print better floats 2017-10-23 15:40:49 -05:00
Andrew Kelley
92751d5e24 self hosted zig: print usage 2017-10-21 17:31:06 -04:00
Andrew Kelley
c1642355f0 parse-c: improve performance
previously we did linear search to find existing global
declarations; now we index using a hash map.

building tetris went from taking 5.3 sec to 0.76 sec
2017-10-21 16:46:33 -04:00
Andrew Kelley
a1af7cbf00 report compile error instead of crashing for void in var args
See #557
2017-10-21 15:46:04 -04:00
Andrew Kelley
175893913d fix compiler crash regarding type name of undefined
See #547
2017-10-21 13:14:10 -04:00
Andrew Kelley
9b91c76088 std.fmt.format supports ints smaller than u8
closes #546

thanks to @Dimenus for the fix
2017-10-21 13:03:08 -04:00
Andrew Kelley
b3d12d2c9e zig build: fix system libraries not respected for C artifacts
closes #550
2017-10-21 12:58:47 -04:00
Andrew Kelley
3c3af4b332 fix docs link 2017-10-17 16:05:46 -04:00
Andrew Kelley
a27c0dd591 remove unsupported targets from readme
See #438
2017-10-17 14:15:50 -04:00
Andrew Kelley
78cb4ce030 Release 0.1.1 2017-10-17 08:50:00 -04:00
Andrew Kelley
79193ffed2 build: fix logic for version when there is a git tag 2017-10-17 08:47:27 -04:00
2917 changed files with 226706 additions and 64321 deletions

2
.gitattributes vendored Normal file
View File

@@ -0,0 +1,2 @@
*.zig text eol=lf
langref.html.in text eol=lf

12
.gitignore vendored
View File

@@ -1,3 +1,15 @@
# This file is for zig-specific build artifacts.
# If you have OS-specific or editor-specific files to ignore,
# such as *.swp or .DS_Store, put those in your global
# ~/.gitignore and put this in your ~/.gitconfig:
#
# [core]
# excludesfile = ~/.gitignore
#
# Cheers!
# -andrewrk
zig-cache/
build/
build-*/
docgen_tmp/

View File

@@ -1,16 +1,22 @@
sudo: required
services:
- docker
os:
- linux
- osx
- linux
- osx
dist: trusty
osx_image: xcode8.3
sudo: required
language: cpp
before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_before_install; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_before_install; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_before_install; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_before_install; fi
install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_install; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_install; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_install; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_install; fi
script:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_script; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_script; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ci/travis_linux_script; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then ci/travis_osx_script; fi
env:
global:
- secure: QmJ+eLOxj3Irl5SHxt6lQvrj7++1AIz8bYri6RScAQGHQPIztkmbpBjAkpFgYaWPkZ04ROtamFXdS7oHtJHSECesgPoqM/CHIychQkgpDq30+TsFyYbBpDGHY+N6r2WnQTvg+9EuAp6P365us6qFS0D5zQ3P40c56uMbazFu3J4W1HZP+pLWlLjEXaN88ePhHWqNZyvwGMkLpYl3ghcrE9H4vGZQ7jenRW4UmskLEkuhUPJbQiow3Td8arJiRmLVISzWqneqNraLUpGyUVr4F3Rbjzacfoo3r9ZZynhY0mFsEye82x6TMGgH2xsNGkd91zpQuckWUT+pQv/G6FXpnEnjIJSO2Z5WAxXrx6xB1k2HZ17/4NWLF3fJVhdQJm3mS6odeGzUjgGrl1A42evxU+7VbcofEJq1aMiLgU1jUT2pt+pefCwmKJYLpEsSzuyrVxgvskQz0QpC053TAYSNf2Jj6Qhg9YDWyOeemYmDgffTqErF7AYhc6NKH0s0XKkIiNFSxorkEsfG/Ck1o+15slHNmWZXlmXToxDqFkLDoPvfGKg7koU5YTGvci/F9ZKb1juhGLxZbwap/18zN40BqA+Ip2yDBJAKxsIiwSjSIguy6g/Z1I50s0xNGOr36urfRRQX5H+rqr/xCZ63B6WSe6qBcZboWAQMDn8HLS9Xiwc=
- secure: dnb7r5guUeMOX9e7XlPUSZzmga8VW3G9Q1aa7LxEKiTjSnWhu5KpPDe8o1X3Rj6nc5iXDqmBH/C/7eNXPDyXJJWPvpE2YRpGymyUkRaakul0QBKJEaMvwy2SuAfS69CWC+TSzfGRvtSYkdpBhhLvs0h5S819S5jYbCNSCmOKfFucaP5NsHNIZ/I19oIeTPTa0/UnVm7DLFZXZjvbS+czkdyH1DhbT85sLj+XqNTzLePImE68efrjaHnlSy/CzBVJzj55UgD5i9fxNCQWzGWim/SD5xZ0zKtLycSOf6wQN2lCo0lkjw9rDlYz69mM5L9ikfYL9oHDPZnh84oXKglQ5miOHCgqs/qs4439I05lIu8i/EfbFA55YG4NyO3rL9YVOOt5gwiwvJYhDcnkVVzSl0o5bsoZgQfYvPWaIQKNkl3C53zfDQjgqS54CeDzlZpFrQTDQ1RrH8oeVC1gfYAeMabMDadox5rfZmLIN5JTf/F8iD/QdxGcoUvkEENcQgfP9PnubExtexgHGsEmqbm6ORSZ1MkEh2m3fo0f8KE6TbN1UigmcQ8nTkWBHsSmfHnB8HwJQp8mwQmDamXA+Hl3e3w4LOdYkJVlNW1/TTyJJOOvjMQCjF8SJmPHuh+QpqKbSaT9XM/vBhxbIZEufH8kawJKCBBcCNspGMNjhXfNjM0=

File diff suppressed because it is too large Load Diff

190
README.md
View File

@@ -1,11 +1,9 @@
![ZIG](http://ziglang.org/zig-logo.svg)
![ZIG](https://ziglang.org/zig-logo.svg)
A programming language designed for robustness, optimality, and
clarity.
[ziglang.org](http://ziglang.org)
[Documentation](http://ziglang.org/documentation/)
[ziglang.org](https://ziglang.org)
## Feature Highlights
@@ -23,19 +21,19 @@ clarity.
* Compatible with C libraries with no wrapper necessary. Directly include
C .h files and get access to the functions and symbols therein.
* Provides standard library which competes with the C standard library and is
always compiled against statically in source form. Compile units do not
always compiled against statically in source form. Zig binaries do not
depend on libc unless explicitly linked.
* Nullable type instead of null pointers.
* Tagged union type instead of raw unions.
* Optional type instead of null pointers.
* Safe unions, tagged unions, and C ABI compatible unions.
* Generics so that one can write efficient data structures that work for any
data type.
* No header files required. Top level declarations are entirely
order-independent.
* Compile-time code execution. Compile-time reflection.
* Partial compile-time function evaluation with eliminates the need for
* Partial compile-time function evaluation which eliminates the need for
a preprocessor or macros.
* The binaries produced by Zig have complete debugging information so you can,
for example, use GDB to debug your software.
for example, use GDB, MSVC, or LLDB to debug your software.
* Built-in unit tests with `zig test`.
* Friendly toward package maintainers. Reproducible build, bootstrapping
process carefully documented. Issues filed by package maintainers are
@@ -54,173 +52,109 @@ that counts as "freestanding" for the purposes of this table.
| | freestanding | linux | macosx | windows | other |
|-------------|--------------|---------|---------|---------|---------|
|i386 | OK | planned | OK | OK | planned |
|i386 | OK | planned | OK | planned | planned |
|x86_64 | OK | OK | OK | OK | planned |
|arm | OK | planned | planned | N/A | planned |
|aarch64 | OK | planned | planned | planned | planned |
|avr | OK | planned | planned | N/A | planned |
|bpf | OK | planned | planned | N/A | planned |
|hexagon | OK | planned | planned | N/A | planned |
|mips | OK | planned | planned | N/A | planned |
|msp430 | OK | planned | planned | N/A | planned |
|nios2 | OK | planned | planned | N/A | planned |
|powerpc | OK | planned | planned | N/A | planned |
|r600 | OK | planned | planned | N/A | planned |
|amdgcn | OK | planned | planned | N/A | planned |
|riscv | OK | planned | planned | N/A | planned |
|sparc | OK | planned | planned | N/A | planned |
|s390x | OK | planned | planned | N/A | planned |
|tce | OK | planned | planned | N/A | planned |
|thumb | OK | planned | planned | N/A | planned |
|xcore | OK | planned | planned | N/A | planned |
|nvptx | OK | planned | planned | N/A | planned |
|le | OK | planned | planned | N/A | planned |
|amdil | OK | planned | planned | N/A | planned |
|hsail | OK | planned | planned | N/A | planned |
|spir | OK | planned | planned | N/A | planned |
|kalimba | OK | planned | planned | N/A | planned |
|shave | OK | planned | planned | N/A | planned |
|lanai | OK | planned | planned | N/A | planned |
|wasm | OK | N/A | N/A | N/A | N/A |
|renderscript | OK | N/A | N/A | N/A | N/A |
|aarch64 | OK | planned | N/A | planned | planned |
|bpf | OK | planned | N/A | N/A | planned |
|hexagon | OK | planned | N/A | N/A | planned |
|mips | OK | planned | N/A | N/A | planned |
|powerpc | OK | planned | N/A | N/A | planned |
|r600 | OK | planned | N/A | N/A | planned |
|amdgcn | OK | planned | N/A | N/A | planned |
|sparc | OK | planned | N/A | N/A | planned |
|s390x | OK | planned | N/A | N/A | planned |
|thumb | OK | planned | N/A | N/A | planned |
|spir | OK | planned | N/A | N/A | planned |
|lanai | OK | planned | N/A | N/A | planned |
## Community
* IRC: `#zig` on Freenode.
* IRC: `#zig` on Freenode ([Channel Logs](https://irclog.whitequark.org/zig/)).
* Reddit: [/r/zig](https://www.reddit.com/r/zig)
* Email list: [ziglang@googlegroups.com](https://groups.google.com/forum/#!forum/ziglang)
### Wanted: Windows Developers
Help get the tests passing on Windows, flesh out the standard library for
Windows, streamline Zig installation and distribution for Windows. Work with
LLVM and LLD teams to improve PDB/CodeView/MSVC debugging. Implement stack traces
for Windows in the MinGW environment and the MSVC environment.
### Wanted: MacOS and iOS Developers
Flesh out the standard library for MacOS. Improve the MACH-O linker. Implement
stack traces for MacOS. Streamline the process of using Zig to build for
iOS.
### Wanted: Android Developers
Flesh out the standard library for Android. Streamline the process of using
Zig to build for Android and for depending on Zig code on Android.
### Wanted: Web Developers
Figure out what are the use cases for compiling Zig to WebAssembly. Create demo
projects with it and streamline experience for users trying to output
WebAssembly. Work on the documentation generator outputting useful searchable html
documentation. Create Zig modules for common web tasks such as WebSockets and gzip.
### Wanted: Embedded Developers
Flesh out the standard library for uncommon CPU architectures and OS targets.
Drive issue discussion for cross compiling and using Zig in constrained
or unusual environments.
### Wanted: Game Developers
Create cross platform Zig modules to compete with SDL and GLFW. Create an
OpenGL library that does not depend on libc. Drive the usability of Zig
for video games. Create a general purpose allocator that does not depend on
libc. Create demo games using Zig.
## Building
[![Build Status](https://travis-ci.org/zig-lang/zig.svg?branch=master)](https://travis-ci.org/zig-lang/zig)
[![Build Status](https://travis-ci.org/ziglang/zig.svg?branch=master)](https://travis-ci.org/ziglang/zig)
[![Build status](https://ci.appveyor.com/api/projects/status/4t80mk2dmucrc38i/branch/master?svg=true)](https://ci.appveyor.com/project/andrewrk/zig-d3l86/branch/master)
### Dependencies
### Stage 1: Build Zig from C++ Source Code
#### Build Dependencies
These compile tools must be available on your system and are used to build
the Zig compiler itself:
#### Dependencies
##### POSIX
* gcc >= 5.0.0 or clang >= 3.6.0
* cmake >= 2.8.5
* gcc >= 5.0.0 or clang >= 3.6.0
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same gcc or clang version above
- These depend on zlib and libxml2.
##### Windows
* cmake >= 2.8.5
* Microsoft Visual Studio 2015
* LLVM, Clang, LLD development libraries == 7.x, compiled with the same MSVC version above
#### Library Dependencies
#### Instructions
These libraries must be installed on your system, with the development files
available. The Zig compiler links against them. You have to use the same
compiler for these libraries as you do to compile Zig.
* LLVM, Clang, and LLD libraries == 5.x
### Debug / Development Build
If you have gcc or clang installed, you can find out what `ZIG_LIBC_LIB_DIR`,
`ZIG_LIBC_STATIC_LIB_DIR`, and `ZIG_LIBC_INCLUDE_DIR` should be set to
(example below).
##### POSIX
```
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=$(pwd) -DZIG_LIBC_LIB_DIR=$(dirname $(cc -print-file-name=crt1.o)) -DZIG_LIBC_INCLUDE_DIR=$(echo -n | cc -E -x c - -v 2>&1 | grep -B1 "End of search list." | head -n1 | cut -c 2- | sed "s/ .*//") -DZIG_LIBC_STATIC_LIB_DIR=$(dirname $(cc -print-file-name=crtbegin.o))
cmake ..
make
make install
./zig build --build-file ../build.zig test
bin/zig build --build-file ../build.zig test
```
#### MacOS
`ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_STATIC_LIB_DIR` are unused.
##### MacOS
```
brew install llvm@5
brew outdated llvm@5 || brew upgrade llvm@5
brew install cmake llvm@7
brew outdated llvm@7 || brew upgrade llvm@7
mkdir build
cd build
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@5/ -DCMAKE_INSTALL_PREFIX=$(pwd)
cmake .. -DCMAKE_PREFIX_PATH=/usr/local/opt/llvm@7/
make install
./zig build --build-file ../build.zig test
bin/zig build --build-file ../build.zig test
```
#### Windows
##### Windows
See https://github.com/zig-lang/zig/wiki/Building-Zig-on-Windows
See https://github.com/ziglang/zig/wiki/Building-Zig-on-Windows
### Release / Install Build
### Stage 2: Build Self-Hosted Zig from Zig Source Code
Once installed, `ZIG_LIBC_LIB_DIR` and `ZIG_LIBC_INCLUDE_DIR` can be overridden
by the `--libc-lib-dir` and `--libc-include-dir` parameters to the zig binary.
*Note: Stage 2 compiler is not complete. Beta users of Zig should use the
Stage 1 compiler for now.*
Dependencies are the same as Stage 1, except now you have a working zig compiler.
```
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DZIG_LIBC_LIB_DIR=/some/path -DZIG_LIBC_INCLUDE_DIR=/some/path -DZIG_LIBC_STATIC_INCLUDE_DIR=/some/path
make
sudo make install
bin/zig build --build-file ../build.zig --prefix $(pwd)/stage2 install
```
### Test Coverage
This produces `./stage2/bin/zig` which can be used for testing and development.
Once it is feature complete, it will be used to build stage 3 - the final compiler
binary.
To see test coverage in Zig, configure with `-DZIG_TEST_COVERAGE=ON` as an
additional parameter to the Debug build.
### Stage 3: Rebuild Self-Hosted Zig Using the Self-Hosted Compiler
You must have `lcov` installed and available.
This is the actual compiler binary that we will install to the system.
Then `make coverage`.
*Note: Stage 2 compiler is not yet able to build Stage 3. Building Stage 3 is
not yet supported.*
With GCC you will get a nice HTML view of the coverage data. With clang,
the last step will fail, but you can execute
`llvm-cov gcov $(find CMakeFiles/ -name "*.gcda")` and then inspect the
produced .gcov files.
#### Debug / Development Build
### Related Projects
```
./stage2/bin/zig build --build-file ../build.zig --prefix $(pwd)/stage3 install
```
* [zig-mode](https://github.com/AndreaOrru/zig-mode) - Emacs integration
* [zig.vim](https://github.com/zig-lang/zig.vim) - Vim configuration files
* [vscode-zig](https://github.com/zig-lang/vscode-zig) - Visual Studio Code extension
* [zig-compiler-completions](https://github.com/tiehuis/zig-compiler-completions) - bash and zsh completions for the zig compiler
* [NppExtension](https://github.com/ice1000/NppExtension) - Notepad++ syntax highlighting
#### Release / Install Build
```
./stage2/bin/zig build --build-file ../build.zig install -Drelease-fast
```

323
build.zig
View File

@@ -1,27 +1,312 @@
const Builder = @import("std").build.Builder;
const builtin = @import("builtin");
const std = @import("std");
const Builder = std.build.Builder;
const tests = @import("test/tests.zig");
const os = std.os;
const BufMap = std.BufMap;
const warn = std.debug.warn;
const mem = std.mem;
const ArrayList = std.ArrayList;
const Buffer = std.Buffer;
const io = std.io;
pub fn build(b: *Builder) !void {
const mode = b.standardReleaseOptions();
var docgen_exe = b.addExecutable("docgen", "doc/docgen.zig");
const rel_zig_exe = try os.path.relative(b.allocator, b.build_root, b.zig_exe);
const langref_out_path = os.path.join(b.allocator, b.cache_root, "langref.html") catch unreachable;
var docgen_cmd = b.addCommand(null, b.env_map, [][]const u8{
docgen_exe.getOutputPath(),
rel_zig_exe,
"doc" ++ os.path.sep_str ++ "langref.html.in",
langref_out_path,
});
docgen_cmd.step.dependOn(&docgen_exe.step);
const docs_step = b.step("docs", "Build documentation");
docs_step.dependOn(&docgen_cmd.step);
pub fn build(b: &Builder) {
const test_filter = b.option([]const u8, "test-filter", "Skip tests that do not match filter");
const with_lldb = b.option(bool, "with-lldb", "Run tests in LLDB to get a backtrace if one fails") ?? false;
const test_step = b.step("test", "Run all the tests");
test_step.dependOn(tests.addPkgTests(b, test_filter,
"test/behavior.zig", "behavior", "Run the behavior tests",
with_lldb));
// find the stage0 build artifacts because we're going to re-use config.h and zig_cpp library
const build_info = try b.exec([][]const u8{
b.zig_exe,
"BUILD_INFO",
});
var index: usize = 0;
var ctx = Context{
.cmake_binary_dir = nextValue(&index, build_info),
.cxx_compiler = nextValue(&index, build_info),
.llvm_config_exe = nextValue(&index, build_info),
.lld_include_dir = nextValue(&index, build_info),
.lld_libraries = nextValue(&index, build_info),
.std_files = nextValue(&index, build_info),
.c_header_files = nextValue(&index, build_info),
.dia_guids_lib = nextValue(&index, build_info),
.llvm = undefined,
.no_rosegment = b.option(bool, "no-rosegment", "Workaround to enable valgrind builds") orelse false,
};
ctx.llvm = try findLLVM(b, ctx.llvm_config_exe);
test_step.dependOn(tests.addPkgTests(b, test_filter,
"std/index.zig", "std", "Run the standard library tests",
with_lldb));
var test_stage2 = b.addTest("src-self-hosted/test.zig");
test_stage2.setBuildMode(builtin.Mode.Debug);
test_step.dependOn(tests.addPkgTests(b, test_filter,
"std/special/compiler_rt/index.zig", "compiler-rt", "Run the compiler_rt tests",
with_lldb));
var exe = b.addExecutable("zig", "src-self-hosted/main.zig");
exe.setBuildMode(mode);
test_step.dependOn(tests.addCompareOutputTests(b, test_filter));
test_step.dependOn(tests.addBuildExampleTests(b, test_filter));
test_step.dependOn(tests.addCompileErrorTests(b, test_filter));
test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter));
test_step.dependOn(tests.addDebugSafetyTests(b, test_filter));
test_step.dependOn(tests.addParseCTests(b, test_filter));
try configureStage2(b, test_stage2, ctx);
try configureStage2(b, exe, ctx);
b.default_step.dependOn(&exe.step);
const skip_release = b.option(bool, "skip-release", "Main test suite skips release builds") orelse false;
const skip_release_small = b.option(bool, "skip-release-small", "Main test suite skips release-small builds") orelse skip_release;
const skip_release_fast = b.option(bool, "skip-release-fast", "Main test suite skips release-fast builds") orelse skip_release;
const skip_release_safe = b.option(bool, "skip-release-safe", "Main test suite skips release-safe builds") orelse skip_release;
const skip_self_hosted = b.option(bool, "skip-self-hosted", "Main test suite skips building self hosted compiler") orelse false;
if (!skip_self_hosted) {
test_step.dependOn(&exe.step);
}
const verbose_link_exe = b.option(bool, "verbose-link", "Print link command for self hosted compiler") orelse false;
exe.setVerboseLink(verbose_link_exe);
b.installArtifact(exe);
installStdLib(b, ctx.std_files);
installCHeaders(b, ctx.c_header_files);
const test_filter = b.option([]const u8, "test-filter", "Skip tests that do not match filter");
const test_stage2_step = b.step("test-stage2", "Run the stage2 compiler tests");
test_stage2_step.dependOn(&test_stage2.step);
// TODO see https://github.com/ziglang/zig/issues/1364
if (false) {
test_step.dependOn(test_stage2_step);
}
var chosen_modes: [4]builtin.Mode = undefined;
var chosen_mode_index: usize = 0;
chosen_modes[chosen_mode_index] = builtin.Mode.Debug;
chosen_mode_index += 1;
if (!skip_release_safe) {
chosen_modes[chosen_mode_index] = builtin.Mode.ReleaseSafe;
chosen_mode_index += 1;
}
if (!skip_release_fast) {
chosen_modes[chosen_mode_index] = builtin.Mode.ReleaseFast;
chosen_mode_index += 1;
}
if (!skip_release_small) {
chosen_modes[chosen_mode_index] = builtin.Mode.ReleaseSmall;
chosen_mode_index += 1;
}
const modes = chosen_modes[0..chosen_mode_index];
test_step.dependOn(tests.addPkgTests(b, test_filter, "test/behavior.zig", "behavior", "Run the behavior tests", modes));
test_step.dependOn(tests.addPkgTests(b, test_filter, "std/index.zig", "std", "Run the standard library tests", modes));
test_step.dependOn(tests.addPkgTests(b, test_filter, "std/special/compiler_rt/index.zig", "compiler-rt", "Run the compiler_rt tests", modes));
test_step.dependOn(tests.addCompareOutputTests(b, test_filter, modes));
test_step.dependOn(tests.addBuildExampleTests(b, test_filter, modes));
test_step.dependOn(tests.addCliTests(b, test_filter, modes));
test_step.dependOn(tests.addCompileErrorTests(b, test_filter, modes));
test_step.dependOn(tests.addAssembleAndLinkTests(b, test_filter, modes));
test_step.dependOn(tests.addRuntimeSafetyTests(b, test_filter, modes));
test_step.dependOn(tests.addTranslateCTests(b, test_filter));
test_step.dependOn(tests.addGenHTests(b, test_filter));
test_step.dependOn(docs_step);
}
fn dependOnLib(lib_exe_obj: var, dep: *const LibraryDep) void {
for (dep.libdirs.toSliceConst()) |lib_dir| {
lib_exe_obj.addLibPath(lib_dir);
}
for (dep.system_libs.toSliceConst()) |lib| {
lib_exe_obj.linkSystemLibrary(lib);
}
for (dep.libs.toSliceConst()) |lib| {
lib_exe_obj.addObjectFile(lib);
}
for (dep.includes.toSliceConst()) |include_path| {
lib_exe_obj.addIncludeDir(include_path);
}
}
fn addCppLib(b: *Builder, lib_exe_obj: var, cmake_binary_dir: []const u8, lib_name: []const u8) void {
const lib_prefix = if (lib_exe_obj.target.isWindows()) "" else "lib";
lib_exe_obj.addObjectFile(os.path.join(b.allocator, cmake_binary_dir, "zig_cpp", b.fmt("{}{}{}", lib_prefix, lib_name, lib_exe_obj.target.libFileExt())) catch unreachable);
}
const LibraryDep = struct {
libdirs: ArrayList([]const u8),
libs: ArrayList([]const u8),
system_libs: ArrayList([]const u8),
includes: ArrayList([]const u8),
};
fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
const libs_output = try b.exec([][]const u8{
llvm_config_exe,
"--libs",
"--system-libs",
});
const includes_output = try b.exec([][]const u8{
llvm_config_exe,
"--includedir",
});
const libdir_output = try b.exec([][]const u8{
llvm_config_exe,
"--libdir",
});
var result = LibraryDep{
.libs = ArrayList([]const u8).init(b.allocator),
.system_libs = ArrayList([]const u8).init(b.allocator),
.includes = ArrayList([]const u8).init(b.allocator),
.libdirs = ArrayList([]const u8).init(b.allocator),
};
{
var it = mem.split(libs_output, " \r\n");
while (it.next()) |lib_arg| {
if (mem.startsWith(u8, lib_arg, "-l")) {
try result.system_libs.append(lib_arg[2..]);
} else {
if (os.path.isAbsolute(lib_arg)) {
try result.libs.append(lib_arg);
} else {
try result.system_libs.append(lib_arg);
}
}
}
}
{
var it = mem.split(includes_output, " \r\n");
while (it.next()) |include_arg| {
if (mem.startsWith(u8, include_arg, "-I")) {
try result.includes.append(include_arg[2..]);
} else {
try result.includes.append(include_arg);
}
}
}
{
var it = mem.split(libdir_output, " \r\n");
while (it.next()) |libdir| {
if (mem.startsWith(u8, libdir, "-L")) {
try result.libdirs.append(libdir[2..]);
} else {
try result.libdirs.append(libdir);
}
}
}
return result;
}
pub fn installStdLib(b: *Builder, stdlib_files: []const u8) void {
var it = mem.split(stdlib_files, ";");
while (it.next()) |stdlib_file| {
const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
b.installFile(src_path, dest_path);
}
}
pub fn installCHeaders(b: *Builder, c_header_files: []const u8) void {
var it = mem.split(c_header_files, ";");
while (it.next()) |c_header_file| {
const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
b.installFile(src_path, dest_path);
}
}
fn nextValue(index: *usize, build_info: []const u8) []const u8 {
const start = index.*;
while (true) : (index.* += 1) {
switch (build_info[index.*]) {
'\n' => {
const result = build_info[start..index.*];
index.* += 1;
return result;
},
'\r' => {
const result = build_info[start..index.*];
index.* += 2;
return result;
},
else => continue,
}
}
}
fn configureStage2(b: *Builder, exe: var, ctx: Context) !void {
// This is for finding /lib/libz.a on alpine linux.
// TODO turn this into -Dextra-lib-path=/lib option
exe.addLibPath("/lib");
exe.setNoRoSegment(ctx.no_rosegment);
exe.addIncludeDir("src");
exe.addIncludeDir(ctx.cmake_binary_dir);
addCppLib(b, exe, ctx.cmake_binary_dir, "zig_cpp");
if (ctx.lld_include_dir.len != 0) {
exe.addIncludeDir(ctx.lld_include_dir);
var it = mem.split(ctx.lld_libraries, ";");
while (it.next()) |lib| {
exe.addObjectFile(lib);
}
} else {
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_wasm");
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_elf");
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_coff");
addCppLib(b, exe, ctx.cmake_binary_dir, "embedded_lld_lib");
}
dependOnLib(exe, ctx.llvm);
if (exe.target.getOs() == builtin.Os.linux) {
const libstdcxx_path_padded = try b.exec([][]const u8{
ctx.cxx_compiler,
"-print-file-name=libstdc++.a",
});
const libstdcxx_path = mem.split(libstdcxx_path_padded, "\r\n").next().?;
if (mem.eql(u8, libstdcxx_path, "libstdc++.a")) {
warn(
\\Unable to determine path to libstdc++.a
\\On Fedora, install libstdc++-static and try again.
\\
);
return error.RequiredLibraryNotFound;
}
exe.addObjectFile(libstdcxx_path);
exe.linkSystemLibrary("pthread");
} else if (exe.target.isDarwin()) {
exe.linkSystemLibrary("c++");
}
if (ctx.dia_guids_lib.len != 0) {
exe.addObjectFile(ctx.dia_guids_lib);
}
if (exe.target.getOs() != builtin.Os.windows) {
exe.linkSystemLibrary("xml2");
}
exe.linkSystemLibrary("c");
}
const Context = struct {
cmake_binary_dir: []const u8,
cxx_compiler: []const u8,
llvm_config_exe: []const u8,
lld_include_dir: []const u8,
lld_libraries: []const u8,
std_files: []const u8,
c_header_files: []const u8,
dia_guids_lib: []const u8,
llvm: LibraryDep,
no_rosegment: bool,
};

View File

@@ -54,7 +54,7 @@ struct dim3;
#define __DELETE
#endif
// Make sure nobody can create instances of the special varible types. nvcc
// Make sure nobody can create instances of the special variable types. nvcc
// also disallows taking address of special variables, so we disable address-of
// operator as well.
#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \

View File

@@ -131,15 +131,6 @@ __DEVICE__ float ldexp(float __arg, int __exp) {
__DEVICE__ float log(float __x) { return ::logf(__x); }
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
__DEVICE__ float nexttoward(float __from, double __to) {
return __builtin_nexttowardf(__from, __to);
}
__DEVICE__ double nexttoward(double __from, double __to) {
return __builtin_nexttoward(__from, __to);
}
__DEVICE__ float nexttowardf(float __from, double __to) {
return __builtin_nexttowardf(__from, __to);
}
__DEVICE__ float pow(float __base, float __exp) {
return ::powf(__base, __exp);
}
@@ -157,6 +148,10 @@ __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
// Notably missing above is nexttoward. We omit it because
// libdevice doesn't provide an implementation, and we don't want to be in the
// business of implementing tricky libm functions in this header.
// Now we've defined everything we promised we'd define in
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
// fix up our math functions.
@@ -295,13 +290,6 @@ ldexp(__T __x, int __exp) {
return std::ldexp((double)__x, __exp);
}
template <typename __T>
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
double>::type
nexttoward(__T __from, double __to) {
return std::nexttoward((double)__from, __to);
}
template <typename __T1, typename __T2>
__DEVICE__ typename __clang_cuda_enable_if<
std::numeric_limits<__T1>::is_specialized &&
@@ -388,7 +376,6 @@ using ::lrint;
using ::lround;
using ::nearbyint;
using ::nextafter;
using ::nexttoward;
using ::pow;
using ::remainder;
using ::remquo;
@@ -456,8 +443,6 @@ using ::lroundf;
using ::modff;
using ::nearbyintf;
using ::nextafterf;
using ::nexttowardf;
using ::nexttowardf;
using ::powf;
using ::remainderf;
using ::remquof;

File diff suppressed because it is too large Load Diff

View File

@@ -34,23 +34,24 @@
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
#pragma push_macro("__MAKE_SHUFFLES")
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask) \
inline __device__ int __FnName(int __val, int __offset, \
#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \
__Type) \
inline __device__ int __FnName(int __val, __Type __offset, \
int __width = warpSize) { \
return __IntIntrinsic(__val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ float __FnName(float __val, int __offset, \
inline __device__ float __FnName(float __val, __Type __offset, \
int __width = warpSize) { \
return __FloatIntrinsic(__val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ unsigned int __FnName(unsigned int __val, int __offset, \
inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned int>( \
::__FnName(static_cast<int>(__val), __offset, __width)); \
} \
inline __device__ long long __FnName(long long __val, int __offset, \
inline __device__ long long __FnName(long long __val, __Type __offset, \
int __width = warpSize) { \
struct __Bits { \
int __a, __b; \
@@ -65,12 +66,29 @@
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
return __ret; \
} \
inline __device__ long __FnName(long __val, __Type __offset, \
int __width = warpSize) { \
_Static_assert(sizeof(long) == sizeof(long long) || \
sizeof(long) == sizeof(int)); \
if (sizeof(long) == sizeof(long long)) { \
return static_cast<long>( \
::__FnName(static_cast<long long>(__val), __offset, __width)); \
} else if (sizeof(long) == sizeof(int)) { \
return static_cast<long>( \
::__FnName(static_cast<int>(__val), __offset, __width)); \
} \
} \
inline __device__ unsigned long __FnName( \
unsigned long __val, __Type __offset, int __width = warpSize) { \
return static_cast<unsigned long>( \
::__FnName(static_cast<long>(__val), __offset, __width)); \
} \
inline __device__ unsigned long long __FnName( \
unsigned long long __val, int __offset, int __width = warpSize) { \
unsigned long long __val, __Type __offset, int __width = warpSize) { \
return static_cast<unsigned long long>(::__FnName( \
static_cast<unsigned long long>(__val), __offset, __width)); \
} \
inline __device__ double __FnName(double __val, int __offset, \
inline __device__ double __FnName(double __val, __Type __offset, \
int __width = warpSize) { \
long long __tmp; \
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
@@ -81,17 +99,166 @@
return __ret; \
}
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
// maxLane.
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0);
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f);
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,
unsigned int);
__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,
unsigned int);
__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
int);
#pragma pop_macro("__MAKE_SHUFFLES")
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
#if CUDA_VERSION >= 9000
#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
// __shfl_sync_* variants available in CUDA-9
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
__Mask, __Type) \
inline __device__ int __FnName(unsigned int __mask, int __val, \
__Type __offset, int __width = warpSize) { \
return __IntIntrinsic(__mask, __val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ float __FnName(unsigned int __mask, float __val, \
__Type __offset, int __width = warpSize) { \
return __FloatIntrinsic(__mask, __val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
inline __device__ unsigned int __FnName(unsigned int __mask, \
unsigned int __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned int>( \
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
} \
inline __device__ long long __FnName(unsigned int __mask, long long __val, \
__Type __offset, \
int __width = warpSize) { \
struct __Bits { \
int __a, __b; \
}; \
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
__Bits __tmp; \
memcpy(&__val, &__tmp, sizeof(__val)); \
__tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \
__tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \
long long __ret; \
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
return __ret; \
} \
inline __device__ unsigned long long __FnName( \
unsigned int __mask, unsigned long long __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned long long>(::__FnName( \
__mask, static_cast<unsigned long long>(__val), __offset, __width)); \
} \
inline __device__ long __FnName(unsigned int __mask, long __val, \
__Type __offset, int __width = warpSize) { \
_Static_assert(sizeof(long) == sizeof(long long) || \
sizeof(long) == sizeof(int)); \
if (sizeof(long) == sizeof(long long)) { \
return static_cast<long>(::__FnName( \
__mask, static_cast<long long>(__val), __offset, __width)); \
} else if (sizeof(long) == sizeof(int)) { \
return static_cast<long>( \
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
} \
} \
inline __device__ unsigned long __FnName( \
unsigned int __mask, unsigned long __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned long>( \
::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \
} \
inline __device__ double __FnName(unsigned int __mask, double __val, \
__Type __offset, int __width = warpSize) { \
long long __tmp; \
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
memcpy(&__tmp, &__val, sizeof(__val)); \
__tmp = ::__FnName(__mask, __tmp, __offset, __width); \
double __ret; \
memcpy(&__ret, &__tmp, sizeof(__ret)); \
return __ret; \
}
__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
__nvvm_shfl_sync_idx_f32, 0x1f, int);
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
// maxLane.
__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
__nvvm_shfl_sync_up_f32, 0, unsigned int);
__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
__nvvm_shfl_sync_down_f32, 0x1f, unsigned int);
__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
__nvvm_shfl_sync_bfly_f32, 0x1f, int);
#pragma pop_macro("__MAKE_SYNC_SHUFFLES")
inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
return __nvvm_bar_warp_sync(mask);
}
inline __device__ void __barrier_sync(unsigned int id) {
__nvvm_barrier_sync(id);
}
inline __device__ void __barrier_sync_count(unsigned int id,
unsigned int count) {
__nvvm_barrier_sync_cnt(id, count);
}
inline __device__ int __all_sync(unsigned int mask, int pred) {
return __nvvm_vote_all_sync(mask, pred);
}
inline __device__ int __any_sync(unsigned int mask, int pred) {
return __nvvm_vote_any_sync(mask, pred);
}
inline __device__ int __uni_sync(unsigned int mask, int pred) {
return __nvvm_vote_uni_sync(mask, pred);
}
inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
return __nvvm_vote_ballot_sync(mask, pred);
}
inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
return __nvvm_fns(mask, base, offset);
}
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
// Define __match* builtins CUDA-9 headers expect to see.
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
inline __device__ unsigned int __match32_any_sync(unsigned int mask,
unsigned int value) {
return __nvvm_match_any_sync_i32(mask, value);
}
inline __device__ unsigned long long
__match64_any_sync(unsigned int mask, unsigned long long value) {
return __nvvm_match_any_sync_i64(mask, value);
}
inline __device__ unsigned int
__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {
return __nvvm_match_all_sync_i32p(mask, value, pred);
}
inline __device__ unsigned long long
__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {
return __nvvm_match_all_sync_i64p(mask, value, pred);
}
#include "crt/sm_70_rt.hpp"
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
#endif // __CUDA_VERSION >= 9000
// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
// Prevent the vanilla sm_32 intrinsics header from being included.
@@ -110,6 +277,9 @@ inline __device__ long long __ldg(const long long *ptr) {
inline __device__ unsigned char __ldg(const unsigned char *ptr) {
return __nvvm_ldg_uc(ptr);
}
inline __device__ signed char __ldg(const signed char *ptr) {
return __nvvm_ldg_uc((const unsigned char *)ptr);
}
inline __device__ unsigned short __ldg(const unsigned short *ptr) {
return __nvvm_ldg_us(ptr);
}

View File

@@ -0,0 +1,466 @@
/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__
#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__
extern "C" {
__device__ int __nv_abs(int __a);
__device__ double __nv_acos(double __a);
__device__ float __nv_acosf(float __a);
__device__ double __nv_acosh(double __a);
__device__ float __nv_acoshf(float __a);
__device__ double __nv_asin(double __a);
__device__ float __nv_asinf(float __a);
__device__ double __nv_asinh(double __a);
__device__ float __nv_asinhf(float __a);
__device__ double __nv_atan2(double __a, double __b);
__device__ float __nv_atan2f(float __a, float __b);
__device__ double __nv_atan(double __a);
__device__ float __nv_atanf(float __a);
__device__ double __nv_atanh(double __a);
__device__ float __nv_atanhf(float __a);
__device__ int __nv_brev(int __a);
__device__ long long __nv_brevll(long long __a);
__device__ int __nv_byte_perm(int __a, int __b, int __c);
__device__ double __nv_cbrt(double __a);
__device__ float __nv_cbrtf(float __a);
__device__ double __nv_ceil(double __a);
__device__ float __nv_ceilf(float __a);
__device__ int __nv_clz(int __a);
__device__ int __nv_clzll(long long __a);
__device__ double __nv_copysign(double __a, double __b);
__device__ float __nv_copysignf(float __a, float __b);
__device__ double __nv_cos(double __a);
__device__ float __nv_cosf(float __a);
__device__ double __nv_cosh(double __a);
__device__ float __nv_coshf(float __a);
__device__ double __nv_cospi(double __a);
__device__ float __nv_cospif(float __a);
__device__ double __nv_cyl_bessel_i0(double __a);
__device__ float __nv_cyl_bessel_i0f(float __a);
__device__ double __nv_cyl_bessel_i1(double __a);
__device__ float __nv_cyl_bessel_i1f(float __a);
__device__ double __nv_dadd_rd(double __a, double __b);
__device__ double __nv_dadd_rn(double __a, double __b);
__device__ double __nv_dadd_ru(double __a, double __b);
__device__ double __nv_dadd_rz(double __a, double __b);
__device__ double __nv_ddiv_rd(double __a, double __b);
__device__ double __nv_ddiv_rn(double __a, double __b);
__device__ double __nv_ddiv_ru(double __a, double __b);
__device__ double __nv_ddiv_rz(double __a, double __b);
__device__ double __nv_dmul_rd(double __a, double __b);
__device__ double __nv_dmul_rn(double __a, double __b);
__device__ double __nv_dmul_ru(double __a, double __b);
__device__ double __nv_dmul_rz(double __a, double __b);
__device__ float __nv_double2float_rd(double __a);
__device__ float __nv_double2float_rn(double __a);
__device__ float __nv_double2float_ru(double __a);
__device__ float __nv_double2float_rz(double __a);
__device__ int __nv_double2hiint(double __a);
__device__ int __nv_double2int_rd(double __a);
__device__ int __nv_double2int_rn(double __a);
__device__ int __nv_double2int_ru(double __a);
__device__ int __nv_double2int_rz(double __a);
__device__ long long __nv_double2ll_rd(double __a);
__device__ long long __nv_double2ll_rn(double __a);
__device__ long long __nv_double2ll_ru(double __a);
__device__ long long __nv_double2ll_rz(double __a);
__device__ int __nv_double2loint(double __a);
__device__ unsigned int __nv_double2uint_rd(double __a);
__device__ unsigned int __nv_double2uint_rn(double __a);
__device__ unsigned int __nv_double2uint_ru(double __a);
__device__ unsigned int __nv_double2uint_rz(double __a);
__device__ unsigned long long __nv_double2ull_rd(double __a);
__device__ unsigned long long __nv_double2ull_rn(double __a);
__device__ unsigned long long __nv_double2ull_ru(double __a);
__device__ unsigned long long __nv_double2ull_rz(double __a);
__device__ unsigned long long __nv_double_as_longlong(double __a);
__device__ double __nv_drcp_rd(double __a);
__device__ double __nv_drcp_rn(double __a);
__device__ double __nv_drcp_ru(double __a);
__device__ double __nv_drcp_rz(double __a);
__device__ double __nv_dsqrt_rd(double __a);
__device__ double __nv_dsqrt_rn(double __a);
__device__ double __nv_dsqrt_ru(double __a);
__device__ double __nv_dsqrt_rz(double __a);
__device__ double __nv_dsub_rd(double __a, double __b);
__device__ double __nv_dsub_rn(double __a, double __b);
__device__ double __nv_dsub_ru(double __a, double __b);
__device__ double __nv_dsub_rz(double __a, double __b);
__device__ double __nv_erfc(double __a);
__device__ float __nv_erfcf(float __a);
__device__ double __nv_erfcinv(double __a);
__device__ float __nv_erfcinvf(float __a);
__device__ double __nv_erfcx(double __a);
__device__ float __nv_erfcxf(float __a);
__device__ double __nv_erf(double __a);
__device__ float __nv_erff(float __a);
__device__ double __nv_erfinv(double __a);
__device__ float __nv_erfinvf(float __a);
__device__ double __nv_exp10(double __a);
__device__ float __nv_exp10f(float __a);
__device__ double __nv_exp2(double __a);
__device__ float __nv_exp2f(float __a);
__device__ double __nv_exp(double __a);
__device__ float __nv_expf(float __a);
__device__ double __nv_expm1(double __a);
__device__ float __nv_expm1f(float __a);
__device__ double __nv_fabs(double __a);
__device__ float __nv_fabsf(float __a);
__device__ float __nv_fadd_rd(float __a, float __b);
__device__ float __nv_fadd_rn(float __a, float __b);
__device__ float __nv_fadd_ru(float __a, float __b);
__device__ float __nv_fadd_rz(float __a, float __b);
__device__ float __nv_fast_cosf(float __a);
__device__ float __nv_fast_exp10f(float __a);
__device__ float __nv_fast_expf(float __a);
__device__ float __nv_fast_fdividef(float __a, float __b);
__device__ float __nv_fast_log10f(float __a);
__device__ float __nv_fast_log2f(float __a);
__device__ float __nv_fast_logf(float __a);
__device__ float __nv_fast_powf(float __a, float __b);
__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);
__device__ float __nv_fast_sinf(float __a);
__device__ float __nv_fast_tanf(float __a);
__device__ double __nv_fdim(double __a, double __b);
__device__ float __nv_fdimf(float __a, float __b);
__device__ float __nv_fdiv_rd(float __a, float __b);
__device__ float __nv_fdiv_rn(float __a, float __b);
__device__ float __nv_fdiv_ru(float __a, float __b);
__device__ float __nv_fdiv_rz(float __a, float __b);
__device__ int __nv_ffs(int __a);
__device__ int __nv_ffsll(long long __a);
__device__ int __nv_finitef(float __a);
__device__ unsigned short __nv_float2half_rn(float __a);
__device__ int __nv_float2int_rd(float __a);
__device__ int __nv_float2int_rn(float __a);
__device__ int __nv_float2int_ru(float __a);
__device__ int __nv_float2int_rz(float __a);
__device__ long long __nv_float2ll_rd(float __a);
__device__ long long __nv_float2ll_rn(float __a);
__device__ long long __nv_float2ll_ru(float __a);
__device__ long long __nv_float2ll_rz(float __a);
__device__ unsigned int __nv_float2uint_rd(float __a);
__device__ unsigned int __nv_float2uint_rn(float __a);
__device__ unsigned int __nv_float2uint_ru(float __a);
__device__ unsigned int __nv_float2uint_rz(float __a);
__device__ unsigned long long __nv_float2ull_rd(float __a);
__device__ unsigned long long __nv_float2ull_rn(float __a);
__device__ unsigned long long __nv_float2ull_ru(float __a);
__device__ unsigned long long __nv_float2ull_rz(float __a);
__device__ int __nv_float_as_int(float __a);
__device__ unsigned int __nv_float_as_uint(float __a);
__device__ double __nv_floor(double __a);
__device__ float __nv_floorf(float __a);
__device__ double __nv_fma(double __a, double __b, double __c);
__device__ float __nv_fmaf(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
__device__ float __nv_fmaf_rd(float __a, float __b, float __c);
__device__ float __nv_fmaf_rn(float __a, float __b, float __c);
__device__ float __nv_fmaf_ru(float __a, float __b, float __c);
__device__ float __nv_fmaf_rz(float __a, float __b, float __c);
__device__ double __nv_fma_rd(double __a, double __b, double __c);
__device__ double __nv_fma_rn(double __a, double __b, double __c);
__device__ double __nv_fma_ru(double __a, double __b, double __c);
__device__ double __nv_fma_rz(double __a, double __b, double __c);
__device__ double __nv_fmax(double __a, double __b);
__device__ float __nv_fmaxf(float __a, float __b);
__device__ double __nv_fmin(double __a, double __b);
__device__ float __nv_fminf(float __a, float __b);
__device__ double __nv_fmod(double __a, double __b);
__device__ float __nv_fmodf(float __a, float __b);
__device__ float __nv_fmul_rd(float __a, float __b);
__device__ float __nv_fmul_rn(float __a, float __b);
__device__ float __nv_fmul_ru(float __a, float __b);
__device__ float __nv_fmul_rz(float __a, float __b);
__device__ float __nv_frcp_rd(float __a);
__device__ float __nv_frcp_rn(float __a);
__device__ float __nv_frcp_ru(float __a);
__device__ float __nv_frcp_rz(float __a);
__device__ double __nv_frexp(double __a, int *__b);
__device__ float __nv_frexpf(float __a, int *__b);
__device__ float __nv_frsqrt_rn(float __a);
__device__ float __nv_fsqrt_rd(float __a);
__device__ float __nv_fsqrt_rn(float __a);
__device__ float __nv_fsqrt_ru(float __a);
__device__ float __nv_fsqrt_rz(float __a);
__device__ float __nv_fsub_rd(float __a, float __b);
__device__ float __nv_fsub_rn(float __a, float __b);
__device__ float __nv_fsub_ru(float __a, float __b);
__device__ float __nv_fsub_rz(float __a, float __b);
__device__ int __nv_hadd(int __a, int __b);
__device__ float __nv_half2float(unsigned short __h);
__device__ double __nv_hiloint2double(int __a, int __b);
__device__ double __nv_hypot(double __a, double __b);
__device__ float __nv_hypotf(float __a, float __b);
__device__ int __nv_ilogb(double __a);
__device__ int __nv_ilogbf(float __a);
__device__ double __nv_int2double_rn(int __a);
__device__ float __nv_int2float_rd(int __a);
__device__ float __nv_int2float_rn(int __a);
__device__ float __nv_int2float_ru(int __a);
__device__ float __nv_int2float_rz(int __a);
__device__ float __nv_int_as_float(int __a);
__device__ int __nv_isfinited(double __a);
__device__ int __nv_isinfd(double __a);
__device__ int __nv_isinff(float __a);
__device__ int __nv_isnand(double __a);
__device__ int __nv_isnanf(float __a);
__device__ double __nv_j0(double __a);
__device__ float __nv_j0f(float __a);
__device__ double __nv_j1(double __a);
__device__ float __nv_j1f(float __a);
__device__ float __nv_jnf(int __a, float __b);
__device__ double __nv_jn(int __a, double __b);
__device__ double __nv_ldexp(double __a, int __b);
__device__ float __nv_ldexpf(float __a, int __b);
__device__ double __nv_lgamma(double __a);
__device__ float __nv_lgammaf(float __a);
__device__ double __nv_ll2double_rd(long long __a);
__device__ double __nv_ll2double_rn(long long __a);
__device__ double __nv_ll2double_ru(long long __a);
__device__ double __nv_ll2double_rz(long long __a);
__device__ float __nv_ll2float_rd(long long __a);
__device__ float __nv_ll2float_rn(long long __a);
__device__ float __nv_ll2float_ru(long long __a);
__device__ float __nv_ll2float_rz(long long __a);
__device__ long long __nv_llabs(long long __a);
__device__ long long __nv_llmax(long long __a, long long __b);
__device__ long long __nv_llmin(long long __a, long long __b);
__device__ long long __nv_llrint(double __a);
__device__ long long __nv_llrintf(float __a);
__device__ long long __nv_llround(double __a);
__device__ long long __nv_llroundf(float __a);
__device__ double __nv_log10(double __a);
__device__ float __nv_log10f(float __a);
__device__ double __nv_log1p(double __a);
__device__ float __nv_log1pf(float __a);
__device__ double __nv_log2(double __a);
__device__ float __nv_log2f(float __a);
__device__ double __nv_logb(double __a);
__device__ float __nv_logbf(float __a);
__device__ double __nv_log(double __a);
__device__ float __nv_logf(float __a);
__device__ double __nv_longlong_as_double(long long __a);
__device__ int __nv_max(int __a, int __b);
__device__ int __nv_min(int __a, int __b);
__device__ double __nv_modf(double __a, double *__b);
__device__ float __nv_modff(float __a, float *__b);
__device__ int __nv_mul24(int __a, int __b);
__device__ long long __nv_mul64hi(long long __a, long long __b);
__device__ int __nv_mulhi(int __a, int __b);
__device__ double __nv_nan(const signed char *__a);
__device__ float __nv_nanf(const signed char *__a);
__device__ double __nv_nearbyint(double __a);
__device__ float __nv_nearbyintf(float __a);
__device__ double __nv_nextafter(double __a, double __b);
__device__ float __nv_nextafterf(float __a, float __b);
__device__ double __nv_norm3d(double __a, double __b, double __c);
__device__ float __nv_norm3df(float __a, float __b, float __c);
__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);
__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);
__device__ double __nv_normcdf(double __a);
__device__ float __nv_normcdff(float __a);
__device__ double __nv_normcdfinv(double __a);
__device__ float __nv_normcdfinvf(float __a);
__device__ float __nv_normf(int __a, const float *__b);
__device__ double __nv_norm(int __a, const double *__b);
__device__ int __nv_popc(int __a);
__device__ int __nv_popcll(long long __a);
__device__ double __nv_pow(double __a, double __b);
__device__ float __nv_powf(float __a, float __b);
__device__ double __nv_powi(double __a, int __b);
__device__ float __nv_powif(float __a, int __b);
__device__ double __nv_rcbrt(double __a);
__device__ float __nv_rcbrtf(float __a);
__device__ double __nv_rcp64h(double __a);
__device__ double __nv_remainder(double __a, double __b);
__device__ float __nv_remainderf(float __a, float __b);
__device__ double __nv_remquo(double __a, double __b, int *__c);
__device__ float __nv_remquof(float __a, float __b, int *__c);
__device__ int __nv_rhadd(int __a, int __b);
__device__ double __nv_rhypot(double __a, double __b);
__device__ float __nv_rhypotf(float __a, float __b);
__device__ double __nv_rint(double __a);
__device__ float __nv_rintf(float __a);
__device__ double __nv_rnorm3d(double __a, double __b, double __c);
__device__ float __nv_rnorm3df(float __a, float __b, float __c);
__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);
__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);
__device__ float __nv_rnormf(int __a, const float *__b);
__device__ double __nv_rnorm(int __a, const double *__b);
__device__ double __nv_round(double __a);
__device__ float __nv_roundf(float __a);
__device__ double __nv_rsqrt(double __a);
__device__ float __nv_rsqrtf(float __a);
__device__ int __nv_sad(int __a, int __b, int __c);
__device__ float __nv_saturatef(float __a);
__device__ double __nv_scalbn(double __a, int __b);
__device__ float __nv_scalbnf(float __a, int __b);
__device__ int __nv_signbitd(double __a);
__device__ int __nv_signbitf(float __a);
__device__ void __nv_sincos(double __a, double *__b, double *__c);
__device__ void __nv_sincosf(float __a, float *__b, float *__c);
__device__ void __nv_sincospi(double __a, double *__b, double *__c);
__device__ void __nv_sincospif(float __a, float *__b, float *__c);
__device__ double __nv_sin(double __a);
__device__ float __nv_sinf(float __a);
__device__ double __nv_sinh(double __a);
__device__ float __nv_sinhf(float __a);
__device__ double __nv_sinpi(double __a);
__device__ float __nv_sinpif(float __a);
__device__ double __nv_sqrt(double __a);
__device__ float __nv_sqrtf(float __a);
__device__ double __nv_tan(double __a);
__device__ float __nv_tanf(float __a);
__device__ double __nv_tanh(double __a);
__device__ float __nv_tanhf(float __a);
__device__ double __nv_tgamma(double __a);
__device__ float __nv_tgammaf(float __a);
__device__ double __nv_trunc(double __a);
__device__ float __nv_truncf(float __a);
__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);
__device__ double __nv_uint2double_rn(unsigned int __i);
__device__ float __nv_uint2float_rd(unsigned int __a);
__device__ float __nv_uint2float_rn(unsigned int __a);
__device__ float __nv_uint2float_ru(unsigned int __a);
__device__ float __nv_uint2float_rz(unsigned int __a);
__device__ float __nv_uint_as_float(unsigned int __a);
__device__ double __nv_ull2double_rd(unsigned long long __a);
__device__ double __nv_ull2double_rn(unsigned long long __a);
__device__ double __nv_ull2double_ru(unsigned long long __a);
__device__ double __nv_ull2double_rz(unsigned long long __a);
__device__ float __nv_ull2float_rd(unsigned long long __a);
__device__ float __nv_ull2float_rn(unsigned long long __a);
__device__ float __nv_ull2float_ru(unsigned long long __a);
__device__ float __nv_ull2float_rz(unsigned long long __a);
__device__ unsigned long long __nv_ullmax(unsigned long long __a,
unsigned long long __b);
__device__ unsigned long long __nv_ullmin(unsigned long long __a,
unsigned long long __b);
__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
__device__ unsigned long long __nv_umul64hi(unsigned long long __a,
unsigned long long __b);
__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,
unsigned int __c);
#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020
__device__ int __nv_vabs2(int __a);
__device__ int __nv_vabs4(int __a);
__device__ int __nv_vabsdiffs2(int __a, int __b);
__device__ int __nv_vabsdiffs4(int __a, int __b);
__device__ int __nv_vabsdiffu2(int __a, int __b);
__device__ int __nv_vabsdiffu4(int __a, int __b);
__device__ int __nv_vabsss2(int __a);
__device__ int __nv_vabsss4(int __a);
__device__ int __nv_vadd2(int __a, int __b);
__device__ int __nv_vadd4(int __a, int __b);
__device__ int __nv_vaddss2(int __a, int __b);
__device__ int __nv_vaddss4(int __a, int __b);
__device__ int __nv_vaddus2(int __a, int __b);
__device__ int __nv_vaddus4(int __a, int __b);
__device__ int __nv_vavgs2(int __a, int __b);
__device__ int __nv_vavgs4(int __a, int __b);
__device__ int __nv_vavgu2(int __a, int __b);
__device__ int __nv_vavgu4(int __a, int __b);
__device__ int __nv_vcmpeq2(int __a, int __b);
__device__ int __nv_vcmpeq4(int __a, int __b);
__device__ int __nv_vcmpges2(int __a, int __b);
__device__ int __nv_vcmpges4(int __a, int __b);
__device__ int __nv_vcmpgeu2(int __a, int __b);
__device__ int __nv_vcmpgeu4(int __a, int __b);
__device__ int __nv_vcmpgts2(int __a, int __b);
__device__ int __nv_vcmpgts4(int __a, int __b);
__device__ int __nv_vcmpgtu2(int __a, int __b);
__device__ int __nv_vcmpgtu4(int __a, int __b);
__device__ int __nv_vcmples2(int __a, int __b);
__device__ int __nv_vcmples4(int __a, int __b);
__device__ int __nv_vcmpleu2(int __a, int __b);
__device__ int __nv_vcmpleu4(int __a, int __b);
__device__ int __nv_vcmplts2(int __a, int __b);
__device__ int __nv_vcmplts4(int __a, int __b);
__device__ int __nv_vcmpltu2(int __a, int __b);
__device__ int __nv_vcmpltu4(int __a, int __b);
__device__ int __nv_vcmpne2(int __a, int __b);
__device__ int __nv_vcmpne4(int __a, int __b);
__device__ int __nv_vhaddu2(int __a, int __b);
__device__ int __nv_vhaddu4(int __a, int __b);
__device__ int __nv_vmaxs2(int __a, int __b);
__device__ int __nv_vmaxs4(int __a, int __b);
__device__ int __nv_vmaxu2(int __a, int __b);
__device__ int __nv_vmaxu4(int __a, int __b);
__device__ int __nv_vmins2(int __a, int __b);
__device__ int __nv_vmins4(int __a, int __b);
__device__ int __nv_vminu2(int __a, int __b);
__device__ int __nv_vminu4(int __a, int __b);
__device__ int __nv_vneg2(int __a);
__device__ int __nv_vneg4(int __a);
__device__ int __nv_vnegss2(int __a);
__device__ int __nv_vnegss4(int __a);
__device__ int __nv_vsads2(int __a, int __b);
__device__ int __nv_vsads4(int __a, int __b);
__device__ int __nv_vsadu2(int __a, int __b);
__device__ int __nv_vsadu4(int __a, int __b);
__device__ int __nv_vseteq2(int __a, int __b);
__device__ int __nv_vseteq4(int __a, int __b);
__device__ int __nv_vsetges2(int __a, int __b);
__device__ int __nv_vsetges4(int __a, int __b);
__device__ int __nv_vsetgeu2(int __a, int __b);
__device__ int __nv_vsetgeu4(int __a, int __b);
__device__ int __nv_vsetgts2(int __a, int __b);
__device__ int __nv_vsetgts4(int __a, int __b);
__device__ int __nv_vsetgtu2(int __a, int __b);
__device__ int __nv_vsetgtu4(int __a, int __b);
__device__ int __nv_vsetles2(int __a, int __b);
__device__ int __nv_vsetles4(int __a, int __b);
__device__ int __nv_vsetleu2(int __a, int __b);
__device__ int __nv_vsetleu4(int __a, int __b);
__device__ int __nv_vsetlts2(int __a, int __b);
__device__ int __nv_vsetlts4(int __a, int __b);
__device__ int __nv_vsetltu2(int __a, int __b);
__device__ int __nv_vsetltu4(int __a, int __b);
__device__ int __nv_vsetne2(int __a, int __b);
__device__ int __nv_vsetne4(int __a, int __b);
__device__ int __nv_vsub2(int __a, int __b);
__device__ int __nv_vsub4(int __a, int __b);
__device__ int __nv_vsubss2(int __a, int __b);
__device__ int __nv_vsubss4(int __a, int __b);
__device__ int __nv_vsubus2(int __a, int __b);
__device__ int __nv_vsubus4(int __a, int __b);
#endif // CUDA_VERSION
__device__ double __nv_y0(double __a);
__device__ float __nv_y0f(float __a);
__device__ double __nv_y1(double __a);
__device__ float __nv_y1f(float __a);
__device__ float __nv_ynf(int __a, float __b);
__device__ double __nv_yn(int __a, double __b);
} // extern "C"
#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__

View File

@@ -149,9 +149,6 @@ __DEVICE__ double nearbyint(double);
__DEVICE__ float nearbyint(float);
__DEVICE__ double nextafter(double, double);
__DEVICE__ float nextafter(float, float);
__DEVICE__ double nexttoward(double, double);
__DEVICE__ float nexttoward(float, double);
__DEVICE__ float nexttowardf(float, double);
__DEVICE__ double pow(double, double);
__DEVICE__ double pow(double, int);
__DEVICE__ float pow(float, float);
@@ -185,6 +182,10 @@ __DEVICE__ float tgamma(float);
__DEVICE__ double trunc(double);
__DEVICE__ float trunc(float);
// Notably missing above is nexttoward, which we don't define on
// the device side because libdevice doesn't give us an implementation, and we
// don't want to be in the business of writing one ourselves.
// We need to define these overloads in exactly the namespace our standard
// library uses (including the right inline namespace), otherwise they won't be
// picked up by other functions in the standard library (e.g. functions in
@@ -255,7 +256,6 @@ using ::nan;
using ::nanf;
using ::nearbyint;
using ::nextafter;
using ::nexttoward;
using ::pow;
using ::remainder;
using ::remquo;

View File

@@ -62,7 +62,7 @@
#include "cuda.h"
#if !defined(CUDA_VERSION)
#error "cuda.h did not define CUDA_VERSION"
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 8000
#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020
#error "Unsupported CUDA version!"
#endif
@@ -84,19 +84,33 @@
#define __DEVICE_FUNCTIONS_H__
#define __MATH_FUNCTIONS_H__
#define __COMMON_FUNCTIONS_H__
// device_functions_decls is replaced by __clang_cuda_device_functions.h
// included below.
#define __DEVICE_FUNCTIONS_DECLS_H__
#undef __CUDACC__
#if CUDA_VERSION < 9000
#define __CUDABE__
#else
#define __CUDA_LIBDEVICE__
#endif
// Disables definitions of device-side runtime support stubs in
// cuda_device_runtime_api.h
#include "driver_types.h"
#include "host_config.h"
#include "host_defines.h"
// Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in
// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the
// functional equivalent of what we need.
#pragma push_macro("nv_weak")
#define nv_weak weak
#undef __CUDABE__
#undef __CUDA_LIBDEVICE__
#define __CUDACC__
#include "cuda_runtime.h"
#pragma pop_macro("nv_weak")
#undef __CUDACC__
#define __CUDABE__
@@ -105,7 +119,9 @@
#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
#if CUDA_VERSION < 9000
#include "crt/device_runtime.h"
#endif
#include "crt/host_runtime.h"
// device_runtime.h defines __cxa_* macros that will conflict with
// cxxabi.h.
@@ -130,20 +146,22 @@ inline __host__ double __signbitd(double x) {
}
#endif
// We need decls for functions in CUDA's libdevice with __device__
// attribute only. Alas they come either as __host__ __device__ or
// with no attributes at all. To work around that, define __CUDA_RTC__
// which produces HD variant and undef __host__ which gives us desided
// decls with __device__ attribute.
#pragma push_macro("__host__")
#define __host__
#define __CUDACC_RTC__
#include "device_functions_decls.h"
#undef __CUDACC_RTC__
// CUDA 9.1 no longer provides declarations for libdevice functions, so we need
// to provide our own.
#include <__clang_cuda_libdevice_declares.h>
// Temporarily poison __host__ macro to ensure it's not used by any of
// the headers we're about to include.
#define __host__ UNEXPECTED_HOST_ATTRIBUTE
// Wrappers for many device-side standard library functions became compiler
// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now
// provides its own implementation of the wrappers.
#if CUDA_VERSION >= 9000
#include <__clang_cuda_device_functions.h>
#endif
// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's
// counterpart does not do it, so we need to make it empty here to keep
// following CUDA includes happy.
#undef __THROW
#define __THROW
// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.
// Previous versions used to check whether they are defined or not.
@@ -160,13 +178,20 @@ inline __host__ double __signbitd(double x) {
#endif
#endif
// Temporarily poison __host__ macro to ensure it's not used by any of
// the headers we're about to include.
#pragma push_macro("__host__")
#define __host__ UNEXPECTED_HOST_ATTRIBUTE
// device_functions.hpp and math_functions*.hpp use 'static
// __forceinline__' (with no __device__) for definitions of device
// functions. Temporarily redefine __forceinline__ to include
// __device__.
#pragma push_macro("__forceinline__")
#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
#if CUDA_VERSION < 9000
#include "device_functions.hpp"
#endif
// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
// get the slow-but-accurate or fast-but-inaccurate versions of functions like
@@ -178,17 +203,32 @@ inline __host__ double __signbitd(double x) {
#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
#define __USE_FAST_MATH__ 1
#endif
#if CUDA_VERSION >= 9000
// CUDA-9.2 needs host-side memcpy for some host functions in
// device_functions.hpp
#if CUDA_VERSION >= 9020
#include <string.h>
#endif
#include "crt/math_functions.hpp"
#else
#include "math_functions.hpp"
#endif
#pragma pop_macro("__USE_FAST_MATH__")
#if CUDA_VERSION < 9000
#include "math_functions_dbl_ptx3.hpp"
#endif
#pragma pop_macro("__forceinline__")
// Pull in host-only functions that are only available when neither
// __CUDACC__ nor __CUDABE__ are defined.
#undef __MATH_FUNCTIONS_HPP__
#undef __CUDABE__
#if CUDA_VERSION < 9000
#include "math_functions.hpp"
#endif
// Alas, additional overloads for these functions are hard to get to.
// Considering that we only need these overloads for a few functions,
// we can provide them here.
@@ -204,22 +244,36 @@ static inline float normcdfinv(float __a) { return normcdfinvf(__a); }
static inline float normcdf(float __a) { return normcdff(__a); }
static inline float erfcx(float __a) { return erfcxf(__a); }
#if CUDA_VERSION < 9000
// For some reason single-argument variant is not always declared by
// CUDA headers. Alas, device_functions.hpp included below needs it.
static inline __device__ void __brkpt(int __c) { __brkpt(); }
#endif
// Now include *.hpp with definitions of various GPU functions. Alas,
// a lot of thins get declared/defined with __host__ attribute which
// we don't want and we have to define it out. We also have to include
// {device,math}_functions.hpp again in order to extract the other
// branch of #if/else inside.
#define __host__
#undef __CUDABE__
#define __CUDACC__
#if CUDA_VERSION >= 9000
// Some atomic functions became compiler builtins in CUDA-9 , so we need their
// declarations.
#include "device_atomic_functions.h"
#endif
#undef __DEVICE_FUNCTIONS_HPP__
#include "device_atomic_functions.hpp"
#if CUDA_VERSION >= 9000
#include "crt/device_functions.hpp"
#include "crt/device_double_functions.hpp"
#else
#include "device_functions.hpp"
#define __CUDABE__
#include "device_double_functions.h"
#undef __CUDABE__
#endif
#include "sm_20_atomic_functions.hpp"
#include "sm_20_intrinsics.hpp"
#include "sm_32_atomic_functions.hpp"
@@ -233,8 +287,11 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
// reason about our code.
#if CUDA_VERSION >= 8000
#pragma push_macro("__CUDA_ARCH__")
#undef __CUDA_ARCH__
#include "sm_60_atomic_functions.hpp"
#include "sm_61_intrinsics.hpp"
#pragma pop_macro("__CUDA_ARCH__")
#endif
#undef __MATH_FUNCTIONS_HPP__
@@ -247,7 +304,27 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
#pragma push_macro("__GNUC__")
#undef __GNUC__
#define signbit __ignored_cuda_signbit
// CUDA-9 omits device-side definitions of some math functions if it sees
// include guard from math.h wrapper from libstdc++. We have to undo the header
// guard temporarily to get the definitions we need.
#pragma push_macro("_GLIBCXX_MATH_H")
#pragma push_macro("_LIBCPP_VERSION")
#if CUDA_VERSION >= 9000
#undef _GLIBCXX_MATH_H
// We also need to undo another guard that checks for libc++ 3.8+
#ifdef _LIBCPP_VERSION
#define _LIBCPP_VERSION 3700
#endif
#endif
#if CUDA_VERSION >= 9000
#include "crt/math_functions.hpp"
#else
#include "math_functions.hpp"
#endif
#pragma pop_macro("_GLIBCXX_MATH_H")
#pragma pop_macro("_LIBCPP_VERSION")
#pragma pop_macro("__GNUC__")
#pragma pop_macro("signbit")

View File

@@ -20,15 +20,18 @@
*
*===-----------------------------------------------------------------------===
*/
#ifndef _WMMINTRIN_AES_H
#define _WMMINTRIN_AES_H
#include <emmintrin.h>
#ifndef __WMMINTRIN_H
#error "Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead."
#endif
#ifndef __WMMINTRIN_AES_H
#define __WMMINTRIN_AES_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128)))
/// \brief Performs a single round of AES encryption using the Equivalent
/// Performs a single round of AES encryption using the Equivalent
/// Inverse Cipher, transforming the state value from the first source
/// operand using a 128-bit round key value contained in the second source
/// operand, and writes the result to the destination.
@@ -48,7 +51,7 @@ _mm_aesenc_si128(__m128i __V, __m128i __R)
return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);
}
/// \brief Performs the final round of AES encryption using the Equivalent
/// Performs the final round of AES encryption using the Equivalent
/// Inverse Cipher, transforming the state value from the first source
/// operand using a 128-bit round key value contained in the second source
/// operand, and writes the result to the destination.
@@ -68,7 +71,7 @@ _mm_aesenclast_si128(__m128i __V, __m128i __R)
return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);
}
/// \brief Performs a single round of AES decryption using the Equivalent
/// Performs a single round of AES decryption using the Equivalent
/// Inverse Cipher, transforming the state value from the first source
/// operand using a 128-bit round key value contained in the second source
/// operand, and writes the result to the destination.
@@ -88,7 +91,7 @@ _mm_aesdec_si128(__m128i __V, __m128i __R)
return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);
}
/// \brief Performs the final round of AES decryption using the Equivalent
/// Performs the final round of AES decryption using the Equivalent
/// Inverse Cipher, transforming the state value from the first source
/// operand using a 128-bit round key value contained in the second source
/// operand, and writes the result to the destination.
@@ -108,7 +111,7 @@ _mm_aesdeclast_si128(__m128i __V, __m128i __R)
return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);
}
/// \brief Applies the AES InvMixColumns() transformation to an expanded key
/// Applies the AES InvMixColumns() transformation to an expanded key
/// contained in the source operand, and writes the result to the
/// destination.
///
@@ -125,7 +128,7 @@ _mm_aesimc_si128(__m128i __V)
return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);
}
/// \brief Generates a round key for AES encyption, operating on 128-bit data
/// Generates a round key for AES encryption, operating on 128-bit data
/// specified in the first source operand and using an 8-bit round constant
/// specified by the second source operand, and writes the result to the
/// destination.
@@ -148,4 +151,4 @@ _mm_aesimc_si128(__m128i __V)
#undef __DEFAULT_FN_ATTRS
#endif /* _WMMINTRIN_AES_H */
#endif /* __WMMINTRIN_AES_H */

View File

@@ -20,10 +20,15 @@
*
*===-----------------------------------------------------------------------===
*/
#ifndef _WMMINTRIN_PCLMUL_H
#define _WMMINTRIN_PCLMUL_H
/// \brief Multiplies two 64-bit integer values, which are selected from source
#ifndef __WMMINTRIN_H
#error "Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead."
#endif
#ifndef __WMMINTRIN_PCLMUL_H
#define __WMMINTRIN_PCLMUL_H
/// Multiplies two 64-bit integer values, which are selected from source
/// operands using the immediate-value operand. The multiplication is a
/// carry-less multiplication, and the 128-bit integer product is stored in
/// the destination.
@@ -50,8 +55,8 @@
/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
/// \returns The 128-bit integer vector containing the result of the carry-less
/// multiplication of the selected 64-bit values.
#define _mm_clmulepi64_si128(__X, __Y, __I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
(__v2di)(__m128i)(__Y), (char)(__I)))
#define _mm_clmulepi64_si128(X, Y, I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (char)(I)))
#endif /* _WMMINTRIN_PCLMUL_H */
#endif /* __WMMINTRIN_PCLMUL_H */

View File

@@ -27,9 +27,9 @@
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
/// Extracts the specified bits from the lower 64 bits of the 128-bit
/// integer vector operand at the index \a idx and of the length \a len.
///
/// \headerfile <x86intrin.h>
@@ -57,7 +57,7 @@
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
(char)(len), (char)(idx)))
/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
/// Extracts the specified bits from the lower 64 bits of the 128-bit
/// integer vector operand at the index and of the length specified by
/// \a __y.
///
@@ -82,7 +82,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
}
/// \brief Inserts bits of a specified length from the source integer vector
/// Inserts bits of a specified length from the source integer vector
/// \a y into the lower 64 bits of the destination integer vector \a x at
/// the index \a idx and of the length \a len.
///
@@ -120,7 +120,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
(__v2di)(__m128i)(y), \
(char)(len), (char)(idx)))
/// \brief Inserts bits of a specified length from the source integer vector
/// Inserts bits of a specified length from the source integer vector
/// \a __y into the lower 64 bits of the destination integer vector \a __x
/// at the index and of the length specified by \a __y.
///
@@ -152,7 +152,7 @@ _mm_insert_si64(__m128i __x, __m128i __y)
return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
}
/// \brief Stores a 64-bit double-precision value in a 64-bit memory location.
/// Stores a 64-bit double-precision value in a 64-bit memory location.
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
///
@@ -170,7 +170,7 @@ _mm_stream_sd(double *__p, __m128d __a)
__builtin_ia32_movntsd(__p, (__v2df)__a);
}
/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit
/// Stores a 32-bit single-precision floating-point value in a 32-bit
/// memory location. To minimize caching, the data is flagged as
/// non-temporal (unlikely to be used again soon).
///

49
c_headers/arm64intr.h Normal file
View File

@@ -0,0 +1,49 @@
/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
/* Only include this if we're compiling for the windows platform. */
#ifndef _MSC_VER
#include_next <arm64intr.h>
#else
#ifndef __ARM64INTR_H
#define __ARM64INTR_H
typedef enum
{
_ARM64_BARRIER_SY = 0xF,
_ARM64_BARRIER_ST = 0xE,
_ARM64_BARRIER_LD = 0xD,
_ARM64_BARRIER_ISH = 0xB,
_ARM64_BARRIER_ISHST = 0xA,
_ARM64_BARRIER_ISHLD = 0x9,
_ARM64_BARRIER_NSH = 0x7,
_ARM64_BARRIER_NSHST = 0x6,
_ARM64_BARRIER_NSHLD = 0x5,
_ARM64_BARRIER_OSH = 0x3,
_ARM64_BARRIER_OSHST = 0x2,
_ARM64_BARRIER_OSHLD = 0x1
} _ARM64INTR_BARRIER_TYPE;
#endif /* __ARM64INTR_H */
#endif /* _MSC_VER */

1499
c_headers/arm_fp16.h Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,97 @@
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512BITALGINTRIN_H
#define __AVX512BITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
(__v32hi) _mm512_popcnt_epi16(__B),
(__v32hi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(),
__U,
__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi8(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
(__v64qi) _mm512_popcnt_epi8(__B),
(__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
{
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(),
__U,
__B);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
(__v64qi) __B,
__U);
}
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
{
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
__A,
__B);
}
#undef __DEFAULT_FN_ATTRS
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -29,7 +29,7 @@
#define __AVX512CDINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_conflict_epi64 (__m512i __A)
@@ -82,61 +82,58 @@ _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_lzcnt_epi32 (__m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) -1);
return (__m512i) __builtin_ia32_vplzcntd_512 ((__v16si) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
(__v16si) __W,
(__mmask16) __U);
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_lzcnt_epi32(__A),
(__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) __U);
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_lzcnt_epi32(__A),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_lzcnt_epi64 (__m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) -1);
return (__m512i) __builtin_ia32_vplzcntq_512 ((__v8di) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_lzcnt_epi64(__A),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) __U);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_lzcnt_epi64(__A),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastmb_epi64 (__mmask8 __A)
{
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
return (__m512i) _mm512_set1_epi64((long long) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastmw_epi32 (__mmask16 __A)
{
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
return (__m512i) _mm512_set1_epi32((int) __A);
}
#undef __DEFAULT_FN_ATTRS

File diff suppressed because it is too large Load Diff

View File

@@ -27,21 +27,21 @@
#ifndef __AVX512ERINTRIN_H
#define __AVX512ERINTRIN_H
// exp2a23
#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
/* exp2a23 */
#define _mm512_exp2a23_round_pd(A, R) \
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)); })
(int)(R))
#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \
#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm512_exp2a23_pd(A) \
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -52,20 +52,20 @@
#define _mm512_maskz_exp2a23_pd(M, A) \
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \
#define _mm512_exp2a23_round_ps(A, R) \
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)); })
(__mmask16)-1, (int)(R))
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)); })
(int)(R))
#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \
#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)); })
(__mmask16)(M), (int)(R))
#define _mm512_exp2a23_ps(A) \
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -76,21 +76,21 @@
#define _mm512_maskz_exp2a23_ps(M, A) \
_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
// rsqrt28
#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
/* rsqrt28 */
#define _mm512_rsqrt28_round_pd(A, R) \
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)); })
(int)(R))
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm512_rsqrt28_pd(A) \
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -101,20 +101,20 @@
#define _mm512_maskz_rsqrt28_pd(M, A) \
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
#define _mm512_rsqrt28_round_ps(A, R) \
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)); })
(__mmask16)-1, (int)(R))
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)); })
(int)(R))
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)); })
(__mmask16)(M), (int)(R))
#define _mm512_rsqrt28_ps(A) \
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -125,23 +125,23 @@
#define _mm512_maskz_rsqrt28_ps(M, A) \
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
#define _mm_rsqrt28_round_ss(A, B, R) \
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_rsqrt28_ss(A, B) \
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -152,23 +152,23 @@
#define _mm_maskz_rsqrt28_ss(M, A, B) \
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
#define _mm_rsqrt28_round_sd(A, B, R) \
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_rsqrt28_sd(A, B) \
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -179,21 +179,21 @@
#define _mm_maskz_rsqrt28_sd(M, A, B) \
_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
// rcp28
#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
/* rcp28 */
#define _mm512_rcp28_round_pd(A, R) \
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)(__m512d)(S), (__mmask8)(M), \
(int)(R)); })
(int)(R))
#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
#define _mm512_maskz_rcp28_round_pd(M, A, R) \
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm512_rcp28_pd(A) \
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -204,20 +204,20 @@
#define _mm512_maskz_rcp28_pd(M, A) \
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
#define _mm512_rcp28_round_ps(A, R) \
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)-1, (int)(R)); })
(__mmask16)-1, (int)(R))
#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)(__m512)(S), (__mmask16)(M), \
(int)(R)); })
(int)(R))
#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
#define _mm512_maskz_rcp28_round_ps(M, A, R) \
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(M), (int)(R)); })
(__mmask16)(M), (int)(R))
#define _mm512_rcp28_ps(A) \
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -228,23 +228,23 @@
#define _mm512_maskz_rcp28_ps(M, A) \
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
#define _mm_rcp28_round_ss(A, B, R) \
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)(__m128)(S), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_rcp28_ss(A, B) \
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -255,23 +255,23 @@
#define _mm_maskz_rcp28_ss(M, A, B) \
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
#define _mm_rcp28_round_sd(A, B, R) \
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)-1, (int)(R)); })
(__mmask8)-1, (int)(R))
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)(__m128d)(S), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2df)_mm_setzero_pd(), \
(__mmask8)(M), (int)(R)); })
(__mmask8)(M), (int)(R))
#define _mm_rcp28_sd(A, B) \
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -282,4 +282,4 @@
#define _mm_maskz_rcp28_sd(M, A, B) \
_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#endif // __AVX512ERINTRIN_H
#endif /* __AVX512ERINTRIN_H */

File diff suppressed because it is too large Load Diff

View File

@@ -29,62 +29,52 @@
#define __IFMAINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
{
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) -1);
return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
(__v8di) __Z);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
__m512i __Y)
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
(__v8di) __X,
(__v8di) __Y,
(__mmask8) __M);
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_madd52hi_epu64(__W, __X, __Y),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
{
return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) __M);
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z),
(__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
{
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) -1);
return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y,
(__v8di) __Z);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
__m512i __Y)
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
(__v8di) __X,
(__v8di) __Y,
(__mmask8) __M);
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_madd52lo_epu64(__W, __X, __Y),
(__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
{
return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) __M);
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z),
(__v8di)_mm512_setzero_si512());
}
#undef __DEFAULT_FN_ATTRS

View File

@@ -29,121 +29,105 @@
#define __IFMAVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl")))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
{
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) -1);
return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y,
(__v2di) __Z);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
(__v2di) __X,
(__v2di) __Y,
(__mmask8) __M);
return (__m128i)__builtin_ia32_selectq_128(__M,
(__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
(__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
{
return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) __M);
return (__m128i)__builtin_ia32_selectq_128(__M,
(__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
{
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) -1);
return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
(__v4di)__Z);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
(__v4di) __X,
(__v4di) __Y,
(__mmask8) __M);
return (__m256i)__builtin_ia32_selectq_256(__M,
(__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
(__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
{
return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) __M);
return (__m256i)__builtin_ia32_selectq_256(__M,
(__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
(__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
{
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) -1);
return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
(__v2di)__Z);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
(__v2di) __X,
(__v2di) __Y,
(__mmask8) __M);
return (__m128i)__builtin_ia32_selectq_128(__M,
(__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
(__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
{
return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) __M);
return (__m128i)__builtin_ia32_selectq_128(__M,
(__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
{
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) -1);
return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
(__v4di)__Z);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
(__v4di) __X,
(__v4di) __Y,
(__mmask8) __M);
return (__m256i)__builtin_ia32_selectq_256(__M,
(__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
(__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
{
return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) __M);
return (__m256i)__builtin_ia32_selectq_256(__M,
(__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
(__v4di)_mm256_setzero_si256());
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

View File

@@ -1,4 +1,4 @@
/*===------------- avx512pfintrin.h - PF intrinsics ------------------===
/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -31,80 +31,80 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) __extension__ ({\
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(long long const *)(addr), (int)(scale), \
(int)(hint)); })
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\
(int)(hint))
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
(long long const *)(addr), (int)(scale), \
(int)(hint)); })
(int)(hint))
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (int const *)(addr), \
(int)(scale), (int)(hint)); })
(int)(scale), (int)(hint))
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfdps((__mmask16) -1, \
(__v16si)(__m512i)(index), (int const *)(addr), \
(int)(scale), (int)(hint)); })
(int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(long long const *)(addr), (int)(scale), \
(int)(hint)); })
(int)(hint))
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
(long long const *)(addr), (int)(scale), \
(int)(hint)); })
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\
(int)(hint))
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(int const *)(addr), (int)(scale), (int)(hint)); })
(int const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
(int const *)(addr), (int)(scale), (int)(hint)); })
(int const *)(addr), (int)(scale), (int)(hint))
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
(long long *)(addr), (int)(scale), \
(int)(hint)); })
(int)(hint))
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) __extension__ ({\
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
(long long *)(addr), (int)(scale), \
(int)(hint)); })
(int)(hint))
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) __extension__ ({\
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint)); })
(int *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) __extension__ ({\
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
(__v16si)(__m512i)(index), (int *)(addr), \
(int)(scale), (int)(hint)); })
(int)(scale), (int)(hint))
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) __extension__ ({\
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
(long long *)(addr), (int)(scale), \
(int)(hint)); })
(int)(hint))
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) __extension__ ({\
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
(long long *)(addr), (int)(scale), \
(int)(hint)); })
(int)(hint))
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) __extension__ ({\
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint)); })
(int *)(addr), (int)(scale), (int)(hint))
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) __extension__ ({\
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
(int *)(addr), (int)(scale), (int)(hint)); })
(int *)(addr), (int)(scale), (int)(hint))
#undef __DEFAULT_FN_ATTRS

View File

@@ -0,0 +1,397 @@
/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VBMI2INTRIN_H
#define __AVX512VBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
(__v32hi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
(__v32hi) _mm512_setzero_si512(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
(__v64qi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
(__v64qi) _mm512_setzero_si512(),
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
{
__builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
{
__builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
(__v32hi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
(__v32hi) _mm512_setzero_si512(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
(__v64qi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
{
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
(__v64qi) _mm512_setzero_si512(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
(__v32hi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
(__v32hi) _mm512_setzero_si512(),
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
(__v64qi) __S,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
{
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
(__v64qi) _mm512_setzero_si512(),
__U);
}
#define _mm512_shldi_epi64(A, B, I) \
(__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (int)(I))
#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_shldi_epi64((A), (B), (I)), \
(__v8di)(__m512i)(S))
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_shldi_epi64((A), (B), (I)), \
(__v8di)_mm512_setzero_si512())
#define _mm512_shldi_epi32(A, B, I) \
(__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (int)(I))
#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shldi_epi32((A), (B), (I)), \
(__v16si)(__m512i)(S))
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shldi_epi32((A), (B), (I)), \
(__v16si)_mm512_setzero_si512())
#define _mm512_shldi_epi16(A, B, I) \
(__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
(__v32hi)(__m512i)(B), (int)(I))
#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
(__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
(__v32hi)(__m512i)(S))
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
(__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
(__v32hi)_mm512_setzero_si512())
#define _mm512_shrdi_epi64(A, B, I) \
(__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (int)(I))
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
(__v8di)(__m512i)(S))
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
(__v8di)_mm512_setzero_si512())
#define _mm512_shrdi_epi32(A, B, I) \
(__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (int)(I))
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
(__v16si)(__m512i)(S))
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
(__v16si)_mm512_setzero_si512())
#define _mm512_shrdi_epi16(A, B, I) \
(__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
(__v32hi)(__m512i)(B), (int)(I))
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
(__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
(__v32hi)(__m512i)(S))
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
(__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
(__v32hi)_mm512_setzero_si512())
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
(__v8di) __A,
(__v8di) __B,
(__mmask8) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
(__v16si) __A,
(__v16si) __B,
(__mmask16) -1);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
__U);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
(__v32hi) __A,
(__v32hi) __B,
(__mmask32) -1);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@@ -29,79 +29,65 @@
#define __VBMIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
__mmask64 __U, __m512i __B)
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
(__v64qi) __I
/* idx */ ,
(__v64qi) __B,
(__mmask64) __U);
return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
(__v64qi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
__m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
/* idx */ ,
(__v64qi) __A,
(__v64qi) __B,
(__mmask64) -1);
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
__m512i __I, __m512i __B)
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
__m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
/* idx */ ,
(__v64qi) __A,
(__v64qi) __B,
(__mmask64) __U);
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__I);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
__m512i __I, __m512i __B)
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
__m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
/* idx */ ,
(__v64qi) __A,
(__v64qi) __B,
(__mmask64) __U);
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
(__v64qi) __A,
(__v64qi) _mm512_undefined_epi32 (),
(__mmask64) -1);
return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
__m512i __B)
{
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
(__v64qi) __A,
(__v64qi) _mm512_setzero_si512(),
(__mmask64) __M);
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
__m512i __B)
{
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
(__v64qi) __A,
(__v64qi) __W,
(__mmask64) __M);
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS

View File

@@ -29,161 +29,127 @@
#define __VBMIVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
__m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
{
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
(__v16qi) __I
/* idx */ ,
(__v16qi) __B,
(__mmask16)
__U);
return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
(__v16qi)__I,
(__v16qi)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
__mmask32 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
__m128i __B)
{
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
(__v32qi) __I
/* idx */ ,
(__v32qi) __B,
(__mmask32)
__U);
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
__m128i __B)
{
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
/* idx */ ,
(__v16qi) __A,
(__v16qi) __B,
(__mmask16) -
1);
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__I);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
__m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
__m128i __B)
{
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
/* idx */ ,
(__v16qi) __A,
(__v16qi) __B,
(__mmask16)
__U);
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
__m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
{
return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
/* idx */ ,
(__v16qi) __A,
(__v16qi) __B,
(__mmask16)
__U);
return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
(__v32qi)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
__m256i __B)
{
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
/* idx */ ,
(__v32qi) __A,
(__v32qi) __B,
(__mmask32) -
1);
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
__m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
__m256i __B)
{
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
/* idx */ ,
(__v32qi) __A,
(__v32qi) __B,
(__mmask32)
__U);
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__I);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
__m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
__m256i __B)
{
return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
/* idx */ ,
(__v32qi) __A,
(__v32qi) __B,
(__mmask32)
__U);
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
(__v16qi) __A,
(__v16qi) _mm_undefined_si128 (),
(__mmask16) -1);
return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
(__v16qi) __A,
(__v16qi) _mm_setzero_si128 (),
(__mmask16) __M);
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
__m128i __B)
{
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
(__v16qi) __A,
(__v16qi) __W,
(__mmask16) __M);
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
(__v32qi) __A,
(__v32qi) _mm256_undefined_si256 (),
(__mmask32) -1);
return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
__m256i __B)
{
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
(__v32qi) __A,
(__v32qi) _mm256_setzero_si256 (),
(__mmask32) __M);
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
__m256i __B)
{
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
(__v32qi) __A,
(__v32qi) __W,
(__mmask32) __M);
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
@@ -192,7 +158,7 @@ _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i
(__mmask16) __M);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
@@ -202,7 +168,7 @@ _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
(__mmask16) __M);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
@@ -212,7 +178,7 @@ _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
(__mmask16) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
@@ -221,7 +187,7 @@ _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m2
(__mmask32) __M);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
@@ -231,7 +197,7 @@ _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
(__mmask32) __M);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
@@ -242,6 +208,7 @@ _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

View File

@@ -0,0 +1,159 @@
/*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLBITALGINTRIN_H
#define __AVX512VLBITALGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
(__v16hi) _mm256_popcnt_epi16(__B),
(__v16hi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
__U,
__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
(__v8hi) _mm_popcnt_epi16(__B),
(__v8hi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
{
return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
__U,
__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi8(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
(__v32qi) _mm256_popcnt_epi8(__B),
(__v32qi) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
{
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
__U,
__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
{
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
(__v16qi) _mm_popcnt_epi8(__B),
(__v16qi) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
{
return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
__U,
__B);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
(__v32qi) __B,
__U);
}
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B)
{
return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1,
__A,
__B);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
(__v16qi) __B,
__U);
}
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
{
return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1,
__A,
__B);
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------===
/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -28,35 +28,36 @@
#define __AVX512VLCDINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd")))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmb_epi64 (__mmask8 __A)
{
return (__m128i) __builtin_ia32_broadcastmb128 (__A);
return (__m128i) _mm_set1_epi64x((long long) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_broadcastmb_epi64 (__mmask8 __A)
{
return (__m256i) __builtin_ia32_broadcastmb256 (__A);
return (__m256i) _mm256_set1_epi64x((long long)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmw_epi32 (__mmask16 __A)
{
return (__m128i) __builtin_ia32_broadcastmw128 (__A);
return (__m128i) _mm_set1_epi32((int)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_broadcastmw_epi32 (__mmask16 __A)
{
return (__m256i) __builtin_ia32_broadcastmw256 (__A);
return (__m256i) _mm256_set1_epi32((int)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_conflict_epi64 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
@@ -64,7 +65,7 @@ _mm_conflict_epi64 (__m128i __A)
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
@@ -72,16 +73,16 @@ _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
(__v2di)
_mm_setzero_di (),
_mm_setzero_si128 (),
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_conflict_epi64 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
@@ -89,7 +90,7 @@ _mm256_conflict_epi64 (__m256i __A)
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
@@ -97,7 +98,7 @@ _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
@@ -105,7 +106,7 @@ _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_conflict_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
@@ -113,7 +114,7 @@ _mm_conflict_epi32 (__m128i __A)
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
@@ -121,7 +122,7 @@ _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
@@ -129,7 +130,7 @@ _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_conflict_epi32 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
@@ -137,7 +138,7 @@ _mm256_conflict_epi32 (__m256i __A)
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
@@ -145,7 +146,7 @@ _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
(__mmask8) __U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
@@ -154,110 +155,95 @@ _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
(__mmask8) __U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_lzcnt_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
(__v4si)
_mm_setzero_si128 (),
(__mmask8) -1);
return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
(__v4si) __W,
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_lzcnt_epi32(__A),
(__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
(__v4si)
_mm_setzero_si128 (),
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_lzcnt_epi32(__A),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_lzcnt_epi32 (__m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
(__v8si)
_mm256_setzero_si256 (),
(__mmask8) -1);
return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
(__v8si) __W,
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_lzcnt_epi32(__A),
(__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
(__v8si)
_mm256_setzero_si256 (),
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_lzcnt_epi32(__A),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_lzcnt_epi64 (__m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
(__v2di)
_mm_setzero_di (),
(__mmask8) -1);
return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
(__v2di) __W,
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_lzcnt_epi64(__A),
(__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
(__v2di)
_mm_setzero_di (),
(__mmask8) __U);
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_lzcnt_epi64(__A),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_lzcnt_epi64 (__m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
(__v4di)
_mm256_setzero_si256 (),
(__mmask8) -1);
return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
(__v4di) __W,
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_lzcnt_epi64(__A),
(__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
(__v4di)
_mm256_setzero_si256 (),
(__mmask8) __U);
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_lzcnt_epi64(__A),
(__v4di)_mm256_setzero_si256());
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif /* __AVX512VLCDINTRIN_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,751 @@
/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLVBMI2INTRIN_H
#define __AVX512VLVBMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
(__v8hi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
(__v8hi) _mm_setzero_si128(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
(__v16qi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
(__v16qi) _mm_setzero_si128(),
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
{
__builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS128
_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
{
__builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
(__v8hi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
(__v8hi) _mm_setzero_si128(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
(__v16qi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
{
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
(__v16qi) _mm_setzero_si128(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
(__v8hi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
(__v8hi) _mm_setzero_si128(),
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
(__v16qi) __S,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
{
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
(__v16qi) _mm_setzero_si128(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
(__v16hi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
(__v16hi) _mm256_setzero_si256(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
(__v32qi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
(__v32qi) _mm256_setzero_si256(),
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
{
__builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
__U);
}
static __inline__ void __DEFAULT_FN_ATTRS256
_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
{
__builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
(__v16hi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
(__v16hi) _mm256_setzero_si256(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
(__v32qi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
{
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
(__v32qi) _mm256_setzero_si256(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
(__v16hi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
(__v16hi) _mm256_setzero_si256(),
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
(__v32qi) __S,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
{
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
(__v32qi) _mm256_setzero_si256(),
__U);
}
#define _mm256_shldi_epi64(A, B, I) \
(__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), (int)(I))
#define _mm256_mask_shldi_epi64(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_shldi_epi64((A), (B), (I)), \
(__v4di)(__m256i)(S))
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_shldi_epi64((A), (B), (I)), \
(__v4di)_mm256_setzero_si256())
#define _mm_shldi_epi64(A, B, I) \
(__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), (int)(I))
#define _mm_mask_shldi_epi64(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_shldi_epi64((A), (B), (I)), \
(__v2di)(__m128i)(S))
#define _mm_maskz_shldi_epi64(U, A, B, I) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_shldi_epi64((A), (B), (I)), \
(__v2di)_mm_setzero_si128())
#define _mm256_shldi_epi32(A, B, I) \
(__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
(__v8si)(__m256i)(B), (int)(I))
#define _mm256_mask_shldi_epi32(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_shldi_epi32((A), (B), (I)), \
(__v8si)(__m256i)(S))
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_shldi_epi32((A), (B), (I)), \
(__v8si)_mm256_setzero_si256())
#define _mm_shldi_epi32(A, B, I) \
(__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), (int)(I))
#define _mm_mask_shldi_epi32(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_shldi_epi32((A), (B), (I)), \
(__v4si)(__m128i)(S))
#define _mm_maskz_shldi_epi32(U, A, B, I) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_shldi_epi32((A), (B), (I)), \
(__v4si)_mm_setzero_si128())
#define _mm256_shldi_epi16(A, B, I) \
(__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
(__v16hi)(__m256i)(B), (int)(I))
#define _mm256_mask_shldi_epi16(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
(__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
(__v16hi)(__m256i)(S))
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
(__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
(__v16hi)_mm256_setzero_si256())
#define _mm_shldi_epi16(A, B, I) \
(__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), (int)(I))
#define _mm_mask_shldi_epi16(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
(__v8hi)_mm_shldi_epi16((A), (B), (I)), \
(__v8hi)(__m128i)(S))
#define _mm_maskz_shldi_epi16(U, A, B, I) \
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
(__v8hi)_mm_shldi_epi16((A), (B), (I)), \
(__v8hi)_mm_setzero_si128())
#define _mm256_shrdi_epi64(A, B, I) \
(__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), (int)(I))
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
(__v4di)(__m256i)(S))
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
(__v4di)_mm256_setzero_si256())
#define _mm_shrdi_epi64(A, B, I) \
(__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), (int)(I))
#define _mm_mask_shrdi_epi64(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_shrdi_epi64((A), (B), (I)), \
(__v2di)(__m128i)(S))
#define _mm_maskz_shrdi_epi64(U, A, B, I) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_shrdi_epi64((A), (B), (I)), \
(__v2di)_mm_setzero_si128())
#define _mm256_shrdi_epi32(A, B, I) \
(__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
(__v8si)(__m256i)(B), (int)(I))
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
(__v8si)(__m256i)(S))
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
(__v8si)_mm256_setzero_si256())
#define _mm_shrdi_epi32(A, B, I) \
(__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), (int)(I))
#define _mm_mask_shrdi_epi32(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_shrdi_epi32((A), (B), (I)), \
(__v4si)(__m128i)(S))
#define _mm_maskz_shrdi_epi32(U, A, B, I) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_shrdi_epi32((A), (B), (I)), \
(__v4si)_mm_setzero_si128())
#define _mm256_shrdi_epi16(A, B, I) \
(__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
(__v16hi)(__m256i)(B), (int)(I))
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
(__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
(__v16hi)(__m256i)(S))
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
(__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
(__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
(__v16hi)_mm256_setzero_si256())
#define _mm_shrdi_epi16(A, B, I) \
(__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), (int)(I))
#define _mm_mask_shrdi_epi16(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
(__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
(__v8hi)(__m128i)(S))
#define _mm_maskz_shrdi_epi16(U, A, B, I) \
(__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
(__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
(__v8hi)_mm_setzero_si128())
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
(__mmask16) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
(__v4di) __A,
(__v4di) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
(__v2di) __A,
(__v2di) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
(__v8si) __A,
(__v8si) __B,
(__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
(__v4si) __A,
(__v4si) __B,
(__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
__U);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
(__v16hi) __A,
(__v16hi) __B,
(__mmask16) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
__U);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
(__v8hi) __A,
(__v8hi) __B,
(__mmask8) -1);
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

View File

@@ -0,0 +1,223 @@
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VLVNNIINTRIN_H
#define __AVX512VLVNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
(__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
(__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
(__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
(__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
(__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
(__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
(__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
(__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectd_256(__U,
(__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
(__v8si)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
(__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusd_epi32(__S, __A, __B),
(__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusd_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
(__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusds_epi32(__S, __A, __B),
(__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpbusds_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
(__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssd_epi32(__S, __A, __B),
(__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssd_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
(__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssds_epi32(__S, __A, __B),
(__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectd_128(__U,
(__v4si)_mm_dpwssds_epi32(__S, __A, __B),
(__v4si)_mm_setzero_si128());
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

View File

@@ -0,0 +1,129 @@
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VNNIINTRIN_H
#define __AVX512VNNIINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
(__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
(__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
(__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
(__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
(__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
(__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
(__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
(__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectd_512(__U,
(__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
(__v16si)_mm512_setzero_si512());
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@@ -1,5 +1,4 @@
/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
*------------------===
/*===----- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics-------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -32,8 +31,7 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \
"q")))
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);

View File

@@ -0,0 +1,105 @@
/*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error \
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __AVX512VPOPCNTDQVLINTRIN_H
#define __AVX512VPOPCNTDQVLINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi64(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectq_128(
(__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi32(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128(
(__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi64(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectq_256(
(__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi32(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectd_256(
(__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -49,7 +49,7 @@
to use it as a potentially faster version of BSF. */
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
/// \brief Counts the number of trailing zero bits in the operand.
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -65,7 +65,7 @@ __tzcnt_u16(unsigned short __X)
return __X ? __builtin_ctzs(__X) : 16;
}
/// \brief Performs a bitwise AND of the second operand with the one's
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
@@ -85,7 +85,7 @@ __andn_u32(unsigned int __X, unsigned int __Y)
}
/* AMD-specified, double-leading-underscore version of BEXTR */
/// \brief Extracts the specified bits from the first operand and returns them
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
@@ -100,6 +100,7 @@ __andn_u32(unsigned int __X, unsigned int __Y)
/// number of bits to be extracted.
/// \returns An unsigned integer whose least significant bits contain the
/// extracted bits.
/// \see _bextr_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__bextr_u32(unsigned int __X, unsigned int __Y)
{
@@ -107,7 +108,7 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
}
/* Intel-specified, single-leading-underscore version of BEXTR */
/// \brief Extracts the specified bits from the first operand and returns them
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
@@ -124,13 +125,14 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
/// Bits [7:0] specify the number of bits.
/// \returns An unsigned integer whose least significant bits contain the
/// extracted bits.
/// \see __bextr_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
{
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
}
/// \brief Clears all bits in the source except for the least significant bit
/// Clears all bits in the source except for the least significant bit
/// containing a value of 1 and returns the result.
///
/// \headerfile <x86intrin.h>
@@ -147,7 +149,7 @@ __blsi_u32(unsigned int __X)
return __X & -__X;
}
/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
@@ -164,7 +166,7 @@ __blsmsk_u32(unsigned int __X)
return __X ^ (__X - 1);
}
/// \brief Clears the least significant bit that is set to 1 in the source
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
@@ -181,7 +183,7 @@ __blsr_u32(unsigned int __X)
return __X & (__X - 1);
}
/// \brief Counts the number of trailing zero bits in the operand.
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -197,7 +199,7 @@ __tzcnt_u32(unsigned int __X)
return __X ? __builtin_ctz(__X) : 32;
}
/// \brief Counts the number of trailing zero bits in the operand.
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -226,7 +228,7 @@ _mm_tzcnt_32(unsigned int __X)
#define _tzcnt_u64(a) (__tzcnt_u64((a)))
/// \brief Performs a bitwise AND of the second operand with the one's
/// Performs a bitwise AND of the second operand with the one's
/// complement of the first operand.
///
/// \headerfile <x86intrin.h>
@@ -246,7 +248,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
}
/* AMD-specified, double-leading-underscore version of BEXTR */
/// \brief Extracts the specified bits from the first operand and returns them
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
@@ -261,6 +263,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
/// the number of bits to be extracted.
/// \returns An unsigned 64-bit integer whose least significant bits contain the
/// extracted bits.
/// \see _bextr_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__bextr_u64(unsigned long long __X, unsigned long long __Y)
{
@@ -268,7 +271,7 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
}
/* Intel-specified, single-leading-underscore version of BEXTR */
/// \brief Extracts the specified bits from the first operand and returns them
/// Extracts the specified bits from the first operand and returns them
/// in the least significant bits of the result.
///
/// \headerfile <x86intrin.h>
@@ -285,13 +288,14 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
/// Bits [7:0] specify the number of bits.
/// \returns An unsigned 64-bit integer whose least significant bits contain the
/// extracted bits.
/// \see __bextr_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
{
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
}
/// \brief Clears all bits in the source except for the least significant bit
/// Clears all bits in the source except for the least significant bit
/// containing a value of 1 and returns the result.
///
/// \headerfile <x86intrin.h>
@@ -308,7 +312,7 @@ __blsi_u64(unsigned long long __X)
return __X & -__X;
}
/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
/// Creates a mask whose bits are set to 1, using bit 0 up to and
/// including the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
@@ -325,7 +329,7 @@ __blsmsk_u64(unsigned long long __X)
return __X ^ (__X - 1);
}
/// \brief Clears the least significant bit that is set to 1 in the source
/// Clears the least significant bit that is set to 1 in the source
/// operand and returns the result.
///
/// \headerfile <x86intrin.h>
@@ -342,7 +346,7 @@ __blsr_u64(unsigned long long __X)
return __X & (__X - 1);
}
/// \brief Counts the number of trailing zero bits in the operand.
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -358,7 +362,7 @@ __tzcnt_u64(unsigned long long __X)
return __X ? __builtin_ctzll(__X) : 64;
}
/// \brief Counts the number of trailing zero bits in the operand.
/// Counts the number of trailing zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///

113
c_headers/cetintrin.h Normal file
View File

@@ -0,0 +1,113 @@
/*===---- cetintrin.h - CET intrinsic --------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <cetintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __CETINTRIN_H
#define __CETINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("shstk")))
static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {
__builtin_ia32_incsspd(__a);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {
__builtin_ia32_incsspq(__a);
}
#endif /* __x86_64__ */
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {
__builtin_ia32_incsspq(__a);
}
#else /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {
__builtin_ia32_incsspd((int)__a);
}
#endif /* __x86_64__ */
static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {
return __builtin_ia32_rdsspd(__a);
}
#ifdef __x86_64__
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {
return __builtin_ia32_rdsspq(__a);
}
#endif /* __x86_64__ */
#ifdef __x86_64__
static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {
return __builtin_ia32_rdsspq(0);
}
#else /* __x86_64__ */
static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {
return __builtin_ia32_rdsspd(0);
}
#endif /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {
__builtin_ia32_saveprevssp();
}
static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {
__builtin_ia32_rstorssp(__p);
}
static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {
__builtin_ia32_wrssd(__a, __p);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {
__builtin_ia32_wrssq(__a, __p);
}
#endif /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {
__builtin_ia32_wrussd(__a, __p);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {
__builtin_ia32_wrussq(__a, __p);
}
#endif /* __x86_64__ */
static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {
__builtin_ia32_setssbsy();
}
static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {
__builtin_ia32_clrssbsy(__p);
}
#undef __DEFAULT_FN_ATTRS
#endif /* __CETINTRIN_H */

View File

@@ -0,0 +1,42 @@
/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __CLDEMOTEINTRIN_H
#define __CLDEMOTEINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("cldemote")))
static __inline__ void __DEFAULT_FN_ATTRS
_cldemote(const void * __P) {
__builtin_ia32_cldemote(__P);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@@ -1,4 +1,4 @@
/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------------------===
/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -32,7 +32,7 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt")))
static __inline__ void __DEFAULT_FN_ATTRS
_mm_clflushopt(char * __m) {
_mm_clflushopt(void const * __m) {
__builtin_ia32_clflushopt(__m);
}

52
c_headers/clwbintrin.h Normal file
View File

@@ -0,0 +1,52 @@
/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __CLWBINTRIN_H
#define __CLWBINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb")))
/// Writes back to memory the cache line (if modified) that contains the
/// linear address specified in \a __p from any level of the cache hierarchy in
/// the cache coherence domain
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> CLWB </c> instruction.
///
/// \param __p
/// A pointer to the memory location used to identify the cache line to be
/// written back.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_clwb(void const *__p) {
__builtin_ia32_clwb(__p);
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@@ -20,18 +20,18 @@
*
*===-----------------------------------------------------------------------===
*/
#ifndef __X86INTRIN_H
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <clzerointrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _CLZEROINTRIN_H
#define _CLZEROINTRIN_H
#ifndef __CLZEROINTRIN_H
#define __CLZEROINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("clzero")))
/// \brief Loads the cache line address and zero's out the cacheline
/// Loads the cache line address and zero's out the cacheline
///
/// \headerfile <clzerointrin.h>
///
@@ -45,6 +45,6 @@ _mm_clzero (void * __line)
__builtin_ia32_clzero ((void *)__line);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS
#endif /* _CLZEROINTRIN_H */
#endif /* __CLZEROINTRIN_H */

View File

@@ -156,6 +156,7 @@
#define bit_SMEP 0x00000080
#define bit_BMI2 0x00000100
#define bit_ENH_MOVSB 0x00000200
#define bit_INVPCID 0x00000400
#define bit_RTM 0x00000800
#define bit_MPX 0x00004000
#define bit_AVX512F 0x00010000
@@ -166,32 +167,49 @@
#define bit_CLFLUSHOPT 0x00800000
#define bit_CLWB 0x01000000
#define bit_AVX512PF 0x04000000
#define bit_AVX51SER 0x08000000
#define bit_AVX512ER 0x08000000
#define bit_AVX512CD 0x10000000
#define bit_SHA 0x20000000
#define bit_AVX512BW 0x40000000
#define bit_AVX512VL 0x80000000
/* Features in %ecx for leaf 7 sub-leaf 0 */
#define bit_PREFTCHWT1 0x00000001
#define bit_AVX512VBMI 0x00000002
#define bit_PKU 0x00000004
#define bit_OSPKE 0x00000010
#define bit_PREFTCHWT1 0x00000001
#define bit_AVX512VBMI 0x00000002
#define bit_PKU 0x00000004
#define bit_OSPKE 0x00000010
#define bit_WAITPKG 0x00000020
#define bit_AVX512VBMI2 0x00000040
#define bit_SHSTK 0x00000080
#define bit_GFNI 0x00000100
#define bit_VAES 0x00000200
#define bit_VPCLMULQDQ 0x00000400
#define bit_AVX512VNNI 0x00000800
#define bit_AVX512BITALG 0x00001000
#define bit_AVX512VPOPCNTDQ 0x00004000
#define bit_RDPID 0x00400000
#define bit_RDPID 0x00400000
#define bit_CLDEMOTE 0x02000000
#define bit_MOVDIRI 0x08000000
#define bit_MOVDIR64B 0x10000000
/* Features in %edx for leaf 7 sub-leaf 0 */
#define bit_AVX5124VNNIW 0x00000004
#define bit_AVX5124FMAPS 0x00000008
#define bit_PCONFIG 0x00040000
#define bit_IBT 0x00100000
/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
#define bit_XSAVEC 0x00000002
#define bit_XSAVES 0x00000008
/* Features in %eax for leaf 0x14 sub-leaf 0 */
#define bit_PTWRITE 0x00000010
/* Features in %ecx for leaf 0x80000001 */
#define bit_LAHF_LM 0x00000001
#define bit_ABM 0x00000020
#define bit_LZCNT bit_ABM /* for gcc compat */
#define bit_SSE4a 0x00000040
#define bit_PRFCHW 0x00000100
#define bit_XOP 0x00000800
@@ -206,8 +224,9 @@
#define bit_3DNOWP 0x40000000
#define bit_3DNOW 0x80000000
/* Features in %ebx for leaf 0x80000001 */
/* Features in %ebx for leaf 0x80000008 */
#define bit_CLZERO 0x00000001
#define bit_WBNOINVD 0x00000200
#if __i386__

View File

@@ -24,28 +24,36 @@
#ifndef __CLANG_CUDA_WRAPPERS_ALGORITHM
#define __CLANG_CUDA_WRAPPERS_ALGORITHM
// This header defines __device__ overloads of std::min/max, but only if we're
// <= C++11. In C++14, these functions are constexpr, and so are implicitly
// __host__ __device__.
// This header defines __device__ overloads of std::min/max.
//
// We don't support the initializer_list overloads because
// initializer_list::begin() and end() are not __host__ __device__ functions.
// Ideally we'd declare these functions only if we're <= C++11. In C++14,
// these functions are constexpr, and so are implicitly __host__ __device__.
//
// When compiling in C++14 mode, we could force std::min/max to have different
// implementations for host and device, by declaring the device overloads
// before the constexpr overloads appear. We choose not to do this because
// a) why write our own implementation when we can use one from the standard
// library? and
// b) libstdc++ is evil and declares min/max inside a header that is included
// *before* we include <algorithm>. So we'd have to unconditionally
// declare our __device__ overloads of min/max, but that would pollute
// things for people who choose not to include <algorithm>.
// However, the compiler being in C++14 mode does not imply that the standard
// library supports C++14. There is no macro we can test to check that the
// stdlib has constexpr std::min/max. Thus we have to unconditionally define
// our device overloads.
//
// A host+device function cannot be overloaded, and a constexpr function
// implicitly become host device if there's no explicitly host or device
// overload preceding it. So the simple thing to do would be to declare our
// device min/max overloads, and then #include_next <algorithm>. This way our
// device overloads would come first, and so if we have a C++14 stdlib, its
// min/max won't become host+device and conflict with our device overloads.
//
// But that also doesn't work. libstdc++ is evil and declares std::min/max in
// an internal header that is included *before* <algorithm>. Thus by the time
// we're inside of this file, std::min/max may already have been declared, and
// thus we can't prevent them from becoming host+device if they're constexpr.
//
// Therefore we perpetrate the following hack: We mark our __device__ overloads
// with __attribute__((enable_if(true, ""))). This causes the signature of the
// function to change without changing anything else about it. (Except that
// overload resolution will prefer it over the __host__ __device__ version
// rather than considering them equally good).
#include_next <algorithm>
#if __cplusplus <= 201103L
// We need to define these overloads in exactly the namespace our standard
// library uses (including the right inline namespace), otherwise they won't be
// picked up by other functions in the standard library (e.g. functions in
@@ -59,30 +67,43 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif
#endif
#pragma push_macro("_CPP14_CONSTEXPR")
#if __cplusplus >= 201402L
#define _CPP14_CONSTEXPR constexpr
#else
#define _CPP14_CONSTEXPR
#endif
template <class __T, class __Cmp>
inline __device__ const __T &
__attribute__((enable_if(true, "")))
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
max(const __T &__a, const __T &__b, __Cmp __cmp) {
return __cmp(__a, __b) ? __b : __a;
}
template <class __T>
inline __device__ const __T &
__attribute__((enable_if(true, "")))
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
max(const __T &__a, const __T &__b) {
return __a < __b ? __b : __a;
}
template <class __T, class __Cmp>
inline __device__ const __T &
__attribute__((enable_if(true, "")))
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
min(const __T &__a, const __T &__b, __Cmp __cmp) {
return __cmp(__b, __a) ? __b : __a;
}
template <class __T>
inline __device__ const __T &
__attribute__((enable_if(true, "")))
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
min(const __T &__a, const __T &__b) {
return __a < __b ? __b : __a;
return __a < __b ? __a : __b;
}
#pragma pop_macro("_CPP14_CONSTEXPR")
#ifdef _LIBCPP_END_NAMESPACE_STD
_LIBCPP_END_NAMESPACE_STD
#else
@@ -92,5 +113,4 @@ _GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
#endif
#endif // __cplusplus <= 201103L
#endif // __CLANG_CUDA_WRAPPERS_ALGORITHM

View File

@@ -26,7 +26,6 @@
#include_next <new>
// Device overrides for placement new and delete.
#pragma push_macro("CUDA_NOEXCEPT")
#if __cplusplus >= 201103L
#define CUDA_NOEXCEPT noexcept
@@ -34,6 +33,55 @@
#define CUDA_NOEXCEPT
#endif
// Device overrides for non-placement new and delete.
__device__ inline void *operator new(__SIZE_TYPE__ size) {
if (size == 0) {
size = 1;
}
return ::malloc(size);
}
__device__ inline void *operator new(__SIZE_TYPE__ size,
const std::nothrow_t &) CUDA_NOEXCEPT {
return ::operator new(size);
}
__device__ inline void *operator new[](__SIZE_TYPE__ size) {
return ::operator new(size);
}
__device__ inline void *operator new[](__SIZE_TYPE__ size,
const std::nothrow_t &) {
return ::operator new(size);
}
__device__ inline void operator delete(void* ptr) CUDA_NOEXCEPT {
if (ptr) {
::free(ptr);
}
}
__device__ inline void operator delete(void *ptr,
const std::nothrow_t &) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ inline void operator delete[](void* ptr) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ inline void operator delete[](void *ptr,
const std::nothrow_t &) CUDA_NOEXCEPT {
::operator delete(ptr);
}
// Sized delete, C++14 only.
#if __cplusplus >= 201402L
__device__ void operator delete(void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
::operator delete(ptr);
}
__device__ void operator delete[](void *ptr, __SIZE_TYPE__ size) CUDA_NOEXCEPT {
::operator delete(ptr);
}
#endif
// Device overrides for placement new and delete.
__device__ inline void *operator new(__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT {
return __ptr;
}
@@ -42,6 +90,7 @@ __device__ inline void *operator new[](__SIZE_TYPE__, void *__ptr) CUDA_NOEXCEPT
}
__device__ inline void operator delete(void *, void *) CUDA_NOEXCEPT {}
__device__ inline void operator delete[](void *, void *) CUDA_NOEXCEPT {}
#pragma pop_macro("CUDA_NOEXCEPT")
#endif // include guard

File diff suppressed because it is too large Load Diff

View File

@@ -21,18 +21,25 @@
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
#error "Never use <f16cintrin.h> directly; include <emmintrin.h> instead."
#if !defined __IMMINTRIN_H
#error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __F16CINTRIN_H
#define __F16CINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("f16c")))
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
/// \brief Converts a 16-bit half-precision float value into a 32-bit float
/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
* but that's because icc can emulate these without f16c using a library call.
* Since we don't do that let's leave these in f16cintrin.h.
*/
/// Converts a 16-bit half-precision float value into a 32-bit float
/// value.
///
/// \headerfile <x86intrin.h>
@@ -42,7 +49,7 @@
/// \param __a
/// A 16-bit half-precision float value.
/// \returns The converted 32-bit float value.
static __inline float __DEFAULT_FN_ATTRS
static __inline float __DEFAULT_FN_ATTRS128
_cvtsh_ss(unsigned short __a)
{
__v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
@@ -50,7 +57,7 @@ _cvtsh_ss(unsigned short __a)
return r[0];
}
/// \brief Converts a 32-bit single-precision float value to a 16-bit
/// Converts a 32-bit single-precision float value to a 16-bit
/// half-precision float value.
///
/// \headerfile <x86intrin.h>
@@ -72,11 +79,11 @@ _cvtsh_ss(unsigned short __a)
/// 011: Truncate \n
/// 1XX: Use MXCSR.RC for rounding
/// \returns The converted 16-bit half-precision float value.
#define _cvtss_sh(a, imm) __extension__ ({ \
#define _cvtss_sh(a, imm) \
(unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
(imm)))[0]); })
(imm)))[0])
/// \brief Converts a 128-bit vector containing 32-bit float values into a
/// Converts a 128-bit vector containing 32-bit float values into a
/// 128-bit vector containing 16-bit half-precision float values.
///
/// \headerfile <x86intrin.h>
@@ -99,10 +106,10 @@ _cvtsh_ss(unsigned short __a)
/// \returns A 128-bit vector containing converted 16-bit half-precision float
/// values. The lower 64 bits are used to store the converted 16-bit
/// half-precision floating-point values.
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
#define _mm_cvtps_ph(a, imm) \
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
/// \brief Converts a 128-bit vector containing 16-bit half-precision float
/// Converts a 128-bit vector containing 16-bit half-precision float
/// values into a 128-bit vector containing 32-bit float values.
///
/// \headerfile <x86intrin.h>
@@ -113,12 +120,57 @@ _cvtsh_ss(unsigned short __a)
/// A 128-bit vector containing 16-bit half-precision float values. The lower
/// 64 bits are used in the conversion.
/// \returns A 128-bit vector of [4 x float] containing converted float values.
static __inline __m128 __DEFAULT_FN_ATTRS
static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_cvtph_ps(__m128i __a)
{
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
}
#undef __DEFAULT_FN_ATTRS
/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
/// containing 16-bit half-precision float values.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
///
/// \param a
/// A 256-bit vector containing 32-bit single-precision float values to be
/// converted to 16-bit half-precision float values.
/// \param imm
/// An immediate value controlling rounding using bits [2:0]: \n
/// 000: Nearest \n
/// 001: Down \n
/// 010: Up \n
/// 011: Truncate \n
/// 1XX: Use MXCSR.RC for rounding
/// \returns A 128-bit vector containing the converted 16-bit half-precision
/// float values.
#define _mm256_cvtps_ph(a, imm) \
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
/// Converts a 128-bit vector containing 16-bit half-precision float
/// values into a 256-bit vector of [8 x float].
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
///
/// \param __a
/// A 128-bit vector containing 16-bit half-precision float values to be
/// converted to 32-bit single-precision float values.
/// \returns A vector of [8 x float] containing the converted 32-bit
/// single-precision float values.
static __inline __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtph_ps(__m128i __a)
{
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
}
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif /* __F16CINTRIN_H */

View File

@@ -143,4 +143,18 @@
# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
#endif
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
# define FLT16_MANT_DIG __FLT16_MANT_DIG__
# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
# define FLT16_DIG __FLT16_DIG__
# define FLT16_MIN_EXP __FLT16_MIN_EXP__
# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__
# define FLT16_MAX_EXP __FLT16_MAX_EXP__
# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__
# define FLT16_MAX __FLT16_MAX__
# define FLT16_EPSILON __FLT16_EPSILON__
# define FLT16_MIN __FLT16_MIN__
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
#endif /* __FLOAT_H */

View File

@@ -31,200 +31,202 @@
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif /* __FMA4INTRIN_H */

View File

@@ -1,4 +1,4 @@
/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -29,200 +29,202 @@
#define __FMAINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#endif /* __FMAINTRIN_H */

View File

@@ -30,7 +30,7 @@
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fxsr")))
/// \brief Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
/// memory region pointed to by the input parameter \a __p.
///
/// \headerfile <x86intrin.h>
@@ -43,10 +43,10 @@
static __inline__ void __DEFAULT_FN_ATTRS
_fxsave(void *__p)
{
return __builtin_ia32_fxsave(__p);
__builtin_ia32_fxsave(__p);
}
/// \brief Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
/// memory region pointed to by the input parameter \a __p. The contents of
/// this memory region should have been written to by a previous \c _fxsave
/// or \c _fxsave64 intrinsic.
@@ -61,11 +61,11 @@ _fxsave(void *__p)
static __inline__ void __DEFAULT_FN_ATTRS
_fxrstor(void *__p)
{
return __builtin_ia32_fxrstor(__p);
__builtin_ia32_fxrstor(__p);
}
#ifdef __x86_64__
/// \brief Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte
/// memory region pointed to by the input parameter \a __p.
///
/// \headerfile <x86intrin.h>
@@ -78,10 +78,10 @@ _fxrstor(void *__p)
static __inline__ void __DEFAULT_FN_ATTRS
_fxsave64(void *__p)
{
return __builtin_ia32_fxsave64(__p);
__builtin_ia32_fxsave64(__p);
}
/// \brief Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
/// memory region pointed to by the input parameter \a __p. The contents of
/// this memory region should have been written to by a previous \c _fxsave
/// or \c _fxsave64 intrinsic.
@@ -96,7 +96,7 @@ _fxsave64(void *__p)
static __inline__ void __DEFAULT_FN_ATTRS
_fxrstor64(void *__p)
{
return __builtin_ia32_fxrstor64(__p);
__builtin_ia32_fxrstor64(__p);
}
#endif

208
c_headers/gfniintrin.h Normal file
View File

@@ -0,0 +1,208 @@
/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __GFNIINTRIN_H
#define __GFNIINTRIN_H
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
(__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), \
(char)(I))
#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
(__v16qi)(__m128i)(S))
#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
(__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
U, A, B, I)
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
(__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
(__v32qi)(__m256i)(B), \
(char)(I))
#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
(__v32qi)(__m256i)(S))
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
(__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
U, A, B, I)
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
(__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
(__v64qi)(__m512i)(B), \
(char)(I))
#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
(__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
(__v64qi)(__m512i)(S))
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
(__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
U, A, B, I)
#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
(__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), \
(char)(I))
#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
(__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
(__v16qi)(__m128i)(S))
#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
(__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
U, A, B, I)
#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
(__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
(__v32qi)(__m256i)(B), \
(char)(I))
#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
(__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
(__v32qi)(__m256i)(S))
#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
(__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
U, A, B, I)
#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
(__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
(__v64qi)(__m512i)(B), \
(char)(I))
#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
(__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
(__v64qi)(__m512i)(S))
#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
(__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
U, A, B, I)
/* Default attributes for simple form (no masking). */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
/* Default attributes for YMM unmasked form. */
#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
/* Default attributes for ZMM forms. */
#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
/* Default attributes for VLX forms. */
#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
(__v16qi) __B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_selectb_128(__U,
(__v16qi) _mm_gf2p8mul_epi8(__A, __B),
(__v16qi) __S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
{
return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
__U, __A, __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
(__v32qi) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_selectb_256(__U,
(__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
(__v32qi) __S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
{
return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
__U, __A, __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
(__v64qi) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_selectb_512(__U,
(__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
(__v64qi) __S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
{
return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),
__U, __A, __B);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_Y
#undef __DEFAULT_FN_ATTRS_Z
#undef __DEFAULT_FN_ATTRS_VL128
#undef __DEFAULT_FN_ATTRS_VL256
#endif /* __GFNIINTRIN_H */

View File

@@ -214,7 +214,7 @@ __TM_failure_code(void* const __TM_buff)
/* These intrinsics are being made available for compatibility with
the IBM XL compiler. For documentation please see the "z/OS XL
C/C++ Programming Guide" publically available on the web. */
C/C++ Programming Guide" publicly available on the web. */
static __inline long __attribute__((__always_inline__, __nodebug__))
__TM_simple_begin ()

View File

@@ -70,4 +70,9 @@ __rdtscp(unsigned int *__A) {
#define _rdpmc(A) __rdpmc(A)
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_wbinvd(void) {
__builtin_ia32_wbinvd();
}
#endif /* __IA32INTRIN_H */

View File

@@ -58,61 +58,25 @@
#include <clflushoptintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
#include <clwbintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
#include <avxintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
#include <avx2intrin.h>
#endif
/* The 256-bit versions of functions in f16cintrin.h.
Intel documents these as being in immintrin.h, and
they depend on typedefs from avxintrin.h. */
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
#include <f16cintrin.h>
#endif
/// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
/// containing 16-bit half-precision float values.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
/// \endcode
///
/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
///
/// \param a
/// A 256-bit vector containing 32-bit single-precision float values to be
/// converted to 16-bit half-precision float values.
/// \param imm
/// An immediate value controlling rounding using bits [2:0]: \n
/// 000: Nearest \n
/// 001: Down \n
/// 010: Up \n
/// 011: Truncate \n
/// 1XX: Use MXCSR.RC for rounding
/// \returns A 128-bit vector containing the converted 16-bit half-precision
/// float values.
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
/// \brief Converts a 128-bit vector containing 16-bit half-precision float
/// values into a 256-bit vector of [8 x float].
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
///
/// \param __a
/// A 128-bit vector containing 16-bit half-precision float values to be
/// converted to 32-bit single-precision float values.
/// \returns A vector of [8 x float] containing the converted 32-bit
/// single-precision float values.
static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
_mm256_cvtph_ps(__m128i __a)
{
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
}
#endif /* __AVX2__ */
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
#include <vpclmulqdqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
#include <bmiintrin.h>
@@ -126,6 +90,10 @@ _mm256_cvtph_ps(__m128i __a)
#include <lzcntintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
#include <popcntintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
#include <fmaintrin.h>
#endif
@@ -142,6 +110,10 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512bwintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
#include <avx512bitalgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
#include <avx512cdintrin.h>
#endif
@@ -150,10 +122,29 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512vpopcntdqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
#include <avx512vpopcntdqvlintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
#include <avx512vnniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
#include <avx512vlvnniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
#include <avx512vlbitalgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VL__) && defined(__AVX512BW__))
#include <avx512vlbwintrin.h>
@@ -191,6 +182,15 @@ _mm256_cvtph_ps(__m128i __a)
#include <avx512vbmivlintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
#include <avx512vbmi2intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
#include <avx512vlvbmi2intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
#include <avx512pfintrin.h>
#endif
@@ -199,6 +199,26 @@ _mm256_cvtph_ps(__m128i __a)
#include <pkuintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
#include <vaesintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
#include <gfniintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> RDPID </c> instruction.
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void) {
return __builtin_ia32_rdpid();
}
#endif // __RDPID__
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short *__p)
@@ -262,25 +282,25 @@ _readgsbase_u64(void)
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)
{
return __builtin_ia32_wrfsbase32(__V);
__builtin_ia32_wrfsbase32(__V);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)
{
return __builtin_ia32_wrfsbase64(__V);
__builtin_ia32_wrfsbase64(__V);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)
{
return __builtin_ia32_wrgsbase32(__V);
__builtin_ia32_wrgsbase32(__V);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)
{
return __builtin_ia32_wrgsbase64(__V);
__builtin_ia32_wrgsbase64(__V);
}
#endif
@@ -315,8 +335,133 @@ _writegsbase_u64(unsigned long long __V)
#include <xsavesintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
#include <cetintrin.h>
#endif
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
* whereas others are also available at all times. */
#include <adxintrin.h>
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
#include <rdseedintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
#include <wbnoinvdintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
#include <cldemoteintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
#include <waitpkgintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || \
defined(__MOVDIRI__) || defined(__MOVDIR64B__)
#include <movdirintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
#include <pconfigintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
#include <sgxintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
#include <ptwriteintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
#include <invpcidintrin.h>
#endif
#ifdef _MSC_VER
/* Define the default attributes for these intrinsics */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
#ifdef __cplusplus
extern "C" {
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange HLE
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
#endif
#if defined(__x86_64__)
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
__asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
__asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Compare Exchange HLE
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
long _Exchange, long _Comparand) {
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
long _Exchange, long _Comparand) {
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
}
#endif
#if defined(__x86_64__)
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
}
#endif
#ifdef __cplusplus
}
#endif
#undef __DEFAULT_FN_ATTRS
#endif /* _MSC_VER */
#endif /* __IMMINTRIN_H */

View File

@@ -38,6 +38,10 @@
#include <armintr.h>
#endif
#if defined(__aarch64__)
#include <arm64intr.h>
#endif
/* For the definition of jmp_buf. */
#if __STDC_HOSTED__
#include <setjmp.h>
@@ -79,6 +83,7 @@ void __incfsdword(unsigned long);
void __incfsword(unsigned long);
unsigned long __indword(unsigned short);
void __indwordstring(unsigned short, unsigned long *, unsigned long);
void __int2c(void);
void __invlpg(void *);
unsigned short __inword(unsigned short);
void __inwordstring(unsigned short, unsigned short *, unsigned long);
@@ -136,6 +141,7 @@ void __svm_stgi(void);
void __svm_vmload(size_t);
void __svm_vmrun(size_t);
void __svm_vmsave(size_t);
void __ud2(void);
unsigned __int64 __ull_rshift(unsigned __int64, int);
void __vmx_off(void);
void __vmx_vmptrst(unsigned __int64 *);
@@ -157,25 +163,15 @@ static __inline__
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
static __inline__
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
static __inline__
unsigned char _bittest(long const *, long);
static __inline__
unsigned char _bittestandcomplement(long *, long);
static __inline__
unsigned char _bittestandreset(long *, long);
static __inline__
unsigned char _bittestandset(long *, long);
void __cdecl _disable(void);
void __cdecl _enable(void);
long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
unsigned char _interlockedbittestandreset(long volatile *, long);
unsigned char _interlockedbittestandset(long volatile *, long);
long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64,
__int64);
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
__int64);
void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,
void *);
void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,
@@ -252,24 +248,15 @@ void __writegsbyte(unsigned long, unsigned char);
void __writegsdword(unsigned long, unsigned long);
void __writegsqword(unsigned long, unsigned __int64);
void __writegsword(unsigned long, unsigned short);
static __inline__
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
static __inline__
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
static __inline__
unsigned char _bittest64(__int64 const *, __int64);
static __inline__
unsigned char _bittestandcomplement64(__int64 *, __int64);
static __inline__
unsigned char _bittestandreset64(__int64 *, __int64);
static __inline__
unsigned char _bittestandset64(__int64 *, __int64);
long _InterlockedAnd_np(long volatile *_Value, long _Mask);
short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
static __inline__
unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
long _Comparand);
@@ -283,10 +270,6 @@ unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
__int64 *_ComparandResult);
short _InterlockedCompareExchange16_np(short volatile *_Destination,
short _Exchange, short _Comparand);
__int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64,
__int64);
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
__int64);
__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,
@@ -316,7 +299,12 @@ unsigned __int64 _umul128(unsigned __int64,
#endif /* __x86_64__ */
#if defined(__x86_64__) || defined(__arm__)
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
static __inline__
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
static __inline__
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
static __inline__
__int64 _InterlockedDecrement64(__int64 volatile *_Addend);
@@ -337,78 +325,6 @@ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
#endif
/*----------------------------------------------------------------------------*\
|* Bit Counting and Testing
\*----------------------------------------------------------------------------*/
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittest(long const *_BitBase, long _BitPos) {
return (*_BitBase >> _BitPos) & 1;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittestandcomplement(long *_BitBase, long _BitPos) {
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
*_BitBase = *_BitBase ^ (1 << _BitPos);
return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittestandreset(long *_BitBase, long _BitPos) {
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
*_BitBase = *_BitBase & ~(1 << _BitPos);
return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittestandset(long *_BitBase, long _BitPos) {
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
*_BitBase = *_BitBase | (1 << _BitPos);
return _Res;
}
#if defined(__arm__) || defined(__aarch64__)
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos) {
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_ACQUIRE);
return (_PrevVal >> _BitPos) & 1;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos) {
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELAXED);
return (_PrevVal >> _BitPos) & 1;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos) {
long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELEASE);
return (_PrevVal >> _BitPos) & 1;
}
#endif
#ifdef __x86_64__
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittest64(__int64 const *_BitBase, __int64 _BitPos) {
return (*_BitBase >> _BitPos) & 1;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) {
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
*_BitBase = *_BitBase ^ (1ll << _BitPos);
return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) {
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
*_BitBase = *_BitBase & ~(1ll << _BitPos);
return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_bittestandset64(__int64 *_BitBase, __int64 _BitPos) {
unsigned char _Res = (*_BitBase >> _BitPos) & 1;
*_BitBase = *_BitBase | (1ll << _BitPos);
return _Res;
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) {
long long _PrevVal =
__atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST);
return (_PrevVal >> _BitPos) & 1;
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange Add
\*----------------------------------------------------------------------------*/
@@ -598,6 +514,23 @@ _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {
}
#endif
/*----------------------------------------------------------------------------*\
|* Bit Counting and Testing
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,
long _BitPos);
unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
long _BitPos);
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Or
\*----------------------------------------------------------------------------*/
#if defined(__arm__) || defined(__aarch64__)
@@ -828,7 +761,7 @@ _InterlockedCompareExchange_nf(long volatile *_Destination,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return _Comparand;
}
static __inline__ short __DEFAULT_FN_ATTRS
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_rel(long volatile *_Destination,
long _Exchange, long _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,
@@ -864,33 +797,40 @@ _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
#if defined(__i386__) || defined(__x86_64__)
static __inline__ void __DEFAULT_FN_ATTRS
__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {
__asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n));
__asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n)
: : "memory");
}
static __inline__ void __DEFAULT_FN_ATTRS
__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {
__asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n));
__asm__ __volatile__("rep movsl" : "+D"(__dst), "+S"(__src), "+c"(__n)
: : "memory");
}
static __inline__ void __DEFAULT_FN_ATTRS
__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {
__asm__("rep movsw" : : "D"(__dst), "S"(__src), "c"(__n));
__asm__ __volatile__("rep movsw" : "+D"(__dst), "+S"(__src), "+c"(__n)
: : "memory");
}
static __inline__ void __DEFAULT_FN_ATTRS
__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
__asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n));
__asm__ __volatile__("rep stosl" : "+D"(__dst), "+c"(__n) : "a"(__x)
: "memory");
}
static __inline__ void __DEFAULT_FN_ATTRS
__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {
__asm__("rep stosw" : : "D"(__dst), "a"(__x), "c"(__n));
__asm__ __volatile__("rep stosw" : "+D"(__dst), "+c"(__n) : "a"(__x)
: "memory");
}
#endif
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {
__asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n));
__asm__ __volatile__("rep movsq" : "+D"(__dst), "+S"(__src), "+c"(__n)
: : "memory");
}
static __inline__ void __DEFAULT_FN_ATTRS
__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
__asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n));
__asm__ __volatile__("rep stosq" : "+D"(__dst), "+c"(__n) : "a"(__x)
: "memory");
}
#endif

37
c_headers/invpcidintrin.h Normal file
View File

@@ -0,0 +1,37 @@
/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <invpcidintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __INVPCIDINTRIN_H
#define __INVPCIDINTRIN_H
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("invpcid")))
_invpcid(unsigned int __type, void *__descriptor) {
__builtin_ia32_invpcid(__type, __descriptor);
}
#endif /* __INVPCIDINTRIN_H */

View File

@@ -31,7 +31,7 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp")))
/// \brief Parses the LWPCB at the specified address and enables
/// Parses the LWPCB at the specified address and enables
/// profiling if valid.
///
/// \headerfile <x86intrin.h>
@@ -48,7 +48,7 @@ __llwpcb (void *__addr)
__builtin_ia32_llwpcb(__addr);
}
/// \brief Flushes the LWP state to memory and returns the address of the LWPCB.
/// Flushes the LWP state to memory and returns the address of the LWPCB.
///
/// \headerfile <x86intrin.h>
///
@@ -58,12 +58,12 @@ __llwpcb (void *__addr)
/// Address to the current Lightweight Profiling Control Block (LWPCB).
/// If LWP is not currently enabled, returns NULL.
static __inline__ void* __DEFAULT_FN_ATTRS
__slwpcb ()
__slwpcb (void)
{
return __builtin_ia32_slwpcb();
}
/// \brief Inserts programmed event record into the LWP event ring buffer
/// Inserts programmed event record into the LWP event ring buffer
/// and advances the ring buffer pointer.
///
/// \headerfile <x86intrin.h>
@@ -84,7 +84,7 @@ __slwpcb ()
(__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \
(unsigned int) (FLAGS)))
/// \brief Decrements the LWP programmed value sample event counter. If the result is
/// Decrements the LWP programmed value sample event counter. If the result is
/// negative, inserts an event record into the LWP event ring buffer in memory
/// and advances the ring buffer pointer.
///
@@ -104,7 +104,7 @@ __slwpcb ()
#ifdef __x86_64__
/// \brief Inserts programmed event record into the LWP event ring buffer
/// Inserts programmed event record into the LWP event ring buffer
/// and advances the ring buffer pointer.
///
/// \headerfile <x86intrin.h>
@@ -125,7 +125,7 @@ __slwpcb ()
(__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \
(unsigned int) (FLAGS)))
/// \brief Decrements the LWP programmed value sample event counter. If the result is
/// Decrements the LWP programmed value sample event counter. If the result is
/// negative, inserts an event record into the LWP event ring buffer in memory
/// and advances the ring buffer pointer.
///

View File

@@ -31,7 +31,7 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
/// \brief Counts the number of leading zero bits in the operand.
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -47,7 +47,7 @@ __lzcnt16(unsigned short __X)
return __X ? __builtin_clzs(__X) : 16;
}
/// \brief Counts the number of leading zero bits in the operand.
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -57,13 +57,14 @@ __lzcnt16(unsigned short __X)
/// An unsigned 32-bit integer whose leading zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of leading zero
/// bits in the operand.
/// \see _lzcnt_u32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
__lzcnt32(unsigned int __X)
{
return __X ? __builtin_clz(__X) : 32;
}
/// \brief Counts the number of leading zero bits in the operand.
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -73,6 +74,7 @@ __lzcnt32(unsigned int __X)
/// An unsigned 32-bit integer whose leading zeros are to be counted.
/// \returns An unsigned 32-bit integer containing the number of leading zero
/// bits in the operand.
/// \see __lzcnt32
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_lzcnt_u32(unsigned int __X)
{
@@ -80,7 +82,7 @@ _lzcnt_u32(unsigned int __X)
}
#ifdef __x86_64__
/// \brief Counts the number of leading zero bits in the operand.
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -90,13 +92,14 @@ _lzcnt_u32(unsigned int __X)
/// An unsigned 64-bit integer whose leading zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of leading zero
/// bits in the operand.
/// \see _lzcnt_u64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__lzcnt64(unsigned long long __X)
{
return __X ? __builtin_clzll(__X) : 64;
}
/// \brief Counts the number of leading zero bits in the operand.
/// Counts the number of leading zero bits in the operand.
///
/// \headerfile <x86intrin.h>
///
@@ -106,6 +109,7 @@ __lzcnt64(unsigned long long __X)
/// An unsigned 64-bit integer whose leading zeros are to be counted.
/// \returns An unsigned 64-bit integer containing the number of leading zero
/// bits in the operand.
/// \see __lzcnt64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_lzcnt_u64(unsigned long long __X)
{

View File

@@ -30,9 +30,9 @@
typedef float __v2sf __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))
static __inline__ void __DEFAULT_FN_ATTRS
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
_m_femms(void) {
__builtin_ia32_femms();
}
@@ -134,7 +134,7 @@ _m_pmulhrw(__m64 __m1, __m64 __m2) {
/* Handle the 3dnowa instructions here. */
#undef __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))
static __inline__ __m64 __DEFAULT_FN_ATTRS
_m_pf2iw(__m64 __m) {

View File

@@ -32,27 +32,27 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
/// \brief Clears the MMX state by setting the state of the x87 stack registers
/// Clears the MMX state by setting the state of the x87 stack registers
/// to empty.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> EMMS </c> instruction.
///
static __inline__ void __DEFAULT_FN_ATTRS
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
_mm_empty(void)
{
__builtin_ia32_emms();
}
/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
/// This intrinsic corresponds to the <c> MOVD </c> instruction.
///
/// \param __i
/// A 32-bit integer value.
@@ -64,12 +64,12 @@ _mm_cvtsi32_si64(int __i)
return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
}
/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
/// signed integer.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
/// This intrinsic corresponds to the <c> MOVD </c> instruction.
///
/// \param __m
/// A 64-bit integer vector.
@@ -81,11 +81,11 @@ _mm_cvtsi64_si32(__m64 __m)
return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
}
/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
/// Casts a 64-bit signed integer value into a 64-bit integer vector.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
///
/// \param __i
/// A 64-bit signed integer.
@@ -97,11 +97,11 @@ _mm_cvtsi64_m64(long long __i)
return (__m64)__i;
}
/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VMOVQ / MOVD </c> instruction.
/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
///
/// \param __m
/// A 64-bit integer vector.
@@ -113,7 +113,7 @@ _mm_cvtm64_si64(__m64 __m)
return (long long)__m;
}
/// \brief Converts 16-bit signed integers from both 64-bit integer vector
/// Converts 16-bit signed integers from both 64-bit integer vector
/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
/// a 64-bit integer vector of [8 x i8] as the result. Positive values
/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
@@ -143,7 +143,7 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Converts 32-bit signed integers from both 64-bit integer vector
/// Converts 32-bit signed integers from both 64-bit integer vector
/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
/// a 64-bit integer vector of [4 x i16] as the result. Positive values
/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
@@ -173,7 +173,7 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
}
/// \brief Converts 16-bit signed integers from both 64-bit integer vector
/// Converts 16-bit signed integers from both 64-bit integer vector
/// parameters of [4 x i16] into 8-bit unsigned integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
@@ -203,7 +203,7 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
/// and interleaves them into a 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
@@ -230,7 +230,7 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
@@ -253,7 +253,7 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
///
/// \headerfile <x86intrin.h>
@@ -274,7 +274,7 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
}
/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
/// and interleaves them into a 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
@@ -301,7 +301,7 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
@@ -324,7 +324,7 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
///
/// \headerfile <x86intrin.h>
@@ -345,7 +345,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
}
/// \brief Adds each 8-bit integer element of the first 64-bit integer vector
/// Adds each 8-bit integer element of the first 64-bit integer vector
/// of [8 x i8] to the corresponding 8-bit integer element of the second
/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
/// packed into a 64-bit integer vector of [8 x i8].
@@ -366,7 +366,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Adds each 16-bit integer element of the first 64-bit integer vector
/// Adds each 16-bit integer element of the first 64-bit integer vector
/// of [4 x i16] to the corresponding 16-bit integer element of the second
/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
/// packed into a 64-bit integer vector of [4 x i16].
@@ -387,7 +387,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Adds each 32-bit integer element of the first 64-bit integer vector
/// Adds each 32-bit integer element of the first 64-bit integer vector
/// of [2 x i32] to the corresponding 32-bit integer element of the second
/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
/// packed into a 64-bit integer vector of [2 x i32].
@@ -408,7 +408,7 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
}
/// \brief Adds each 8-bit signed integer element of the first 64-bit integer
/// Adds each 8-bit signed integer element of the first 64-bit integer
/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
@@ -430,7 +430,7 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Adds each 16-bit signed integer element of the first 64-bit integer
/// Adds each 16-bit signed integer element of the first 64-bit integer
/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
@@ -453,7 +453,7 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
/// Adds each 8-bit unsigned integer element of the first 64-bit integer
/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
@@ -475,7 +475,7 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
/// Adds each 16-bit unsigned integer element of the first 64-bit integer
/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
@@ -497,7 +497,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Subtracts each 8-bit integer element of the second 64-bit integer
/// Subtracts each 8-bit integer element of the second 64-bit integer
/// vector of [8 x i8] from the corresponding 8-bit integer element of the
/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
/// are packed into a 64-bit integer vector of [8 x i8].
@@ -518,7 +518,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Subtracts each 16-bit integer element of the second 64-bit integer
/// Subtracts each 16-bit integer element of the second 64-bit integer
/// vector of [4 x i16] from the corresponding 16-bit integer element of the
/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
/// results are packed into a 64-bit integer vector of [4 x i16].
@@ -539,7 +539,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Subtracts each 32-bit integer element of the second 64-bit integer
/// Subtracts each 32-bit integer element of the second 64-bit integer
/// vector of [2 x i32] from the corresponding 32-bit integer element of the
/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
/// results are packed into a 64-bit integer vector of [2 x i32].
@@ -560,7 +560,7 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
}
/// \brief Subtracts each 8-bit signed integer element of the second 64-bit
/// Subtracts each 8-bit signed integer element of the second 64-bit
/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
/// element of the first 64-bit integer vector of [8 x i8]. Positive results
/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
@@ -583,7 +583,7 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Subtracts each 16-bit signed integer element of the second 64-bit
/// Subtracts each 16-bit signed integer element of the second 64-bit
/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
/// element of the first 64-bit integer vector of [4 x i16]. Positive results
/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
@@ -606,7 +606,7 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
/// Subtracts each 8-bit unsigned integer element of the second 64-bit
/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
/// element of the first 64-bit integer vector of [8 x i8].
///
@@ -630,7 +630,7 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
/// Subtracts each 16-bit unsigned integer element of the second 64-bit
/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
/// integer element of the first 64-bit integer vector of [4 x i16].
///
@@ -654,7 +654,7 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
/// Multiplies each 16-bit signed integer element of the first 64-bit
/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
/// element of the second 64-bit integer vector of [4 x i16] and get four
/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
@@ -681,7 +681,7 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
/// Multiplies each 16-bit signed integer element of the first 64-bit
/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
@@ -702,7 +702,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
/// Multiplies each 16-bit signed integer element of the first 64-bit
/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
@@ -723,7 +723,7 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Left-shifts each 16-bit signed integer element of the first
/// Left-shifts each 16-bit signed integer element of the first
/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
/// of bits specified by the second parameter, which is a 64-bit integer. The
/// lower 16 bits of the results are packed into a 64-bit integer vector of
@@ -746,7 +746,7 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
}
/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
/// Left-shifts each 16-bit signed integer element of a 64-bit integer
/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
/// The lower 16 bits of the results are packed into a 64-bit integer vector
/// of [4 x i16].
@@ -768,7 +768,7 @@ _mm_slli_pi16(__m64 __m, int __count)
return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
}
/// \brief Left-shifts each 32-bit signed integer element of the first
/// Left-shifts each 32-bit signed integer element of the first
/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
/// of bits specified by the second parameter, which is a 64-bit integer. The
/// lower 32 bits of the results are packed into a 64-bit integer vector of
@@ -791,7 +791,7 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
}
/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
/// Left-shifts each 32-bit signed integer element of a 64-bit integer
/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
/// The lower 32 bits of the results are packed into a 64-bit integer vector
/// of [2 x i32].
@@ -813,7 +813,7 @@ _mm_slli_pi32(__m64 __m, int __count)
return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
}
/// \brief Left-shifts the first 64-bit integer parameter by the number of bits
/// Left-shifts the first 64-bit integer parameter by the number of bits
/// specified by the second 64-bit integer parameter. The lower 64 bits of
/// result are returned.
///
@@ -833,7 +833,7 @@ _mm_sll_si64(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
}
/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
/// Left-shifts the first parameter, which is a 64-bit integer, by the
/// number of bits specified by the second parameter, which is a 32-bit
/// integer. The lower 64 bits of result are returned.
///
@@ -853,7 +853,7 @@ _mm_slli_si64(__m64 __m, int __count)
return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
}
/// \brief Right-shifts each 16-bit integer element of the first parameter,
/// Right-shifts each 16-bit integer element of the first parameter,
/// which is a 64-bit integer vector of [4 x i16], by the number of bits
/// specified by the second parameter, which is a 64-bit integer.
///
@@ -877,7 +877,7 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
}
/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
/// of [4 x i16] by the number of bits specified by a 32-bit integer.
///
/// High-order bits are filled with the sign bit of the initial value of each
@@ -900,7 +900,7 @@ _mm_srai_pi16(__m64 __m, int __count)
return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
}
/// \brief Right-shifts each 32-bit integer element of the first parameter,
/// Right-shifts each 32-bit integer element of the first parameter,
/// which is a 64-bit integer vector of [2 x i32], by the number of bits
/// specified by the second parameter, which is a 64-bit integer.
///
@@ -924,7 +924,7 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
}
/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
/// of [2 x i32] by the number of bits specified by a 32-bit integer.
///
/// High-order bits are filled with the sign bit of the initial value of each
@@ -947,7 +947,7 @@ _mm_srai_pi32(__m64 __m, int __count)
return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
}
/// \brief Right-shifts each 16-bit integer element of the first parameter,
/// Right-shifts each 16-bit integer element of the first parameter,
/// which is a 64-bit integer vector of [4 x i16], by the number of bits
/// specified by the second parameter, which is a 64-bit integer.
///
@@ -970,7 +970,7 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
}
/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
/// Right-shifts each 16-bit integer element of a 64-bit integer vector
/// of [4 x i16] by the number of bits specified by a 32-bit integer.
///
/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
@@ -992,7 +992,7 @@ _mm_srli_pi16(__m64 __m, int __count)
return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
}
/// \brief Right-shifts each 32-bit integer element of the first parameter,
/// Right-shifts each 32-bit integer element of the first parameter,
/// which is a 64-bit integer vector of [2 x i32], by the number of bits
/// specified by the second parameter, which is a 64-bit integer.
///
@@ -1015,7 +1015,7 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
}
/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
/// Right-shifts each 32-bit integer element of a 64-bit integer vector
/// of [2 x i32] by the number of bits specified by a 32-bit integer.
///
/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
@@ -1037,7 +1037,7 @@ _mm_srli_pi32(__m64 __m, int __count)
return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
}
/// \brief Right-shifts the first 64-bit integer parameter by the number of bits
/// Right-shifts the first 64-bit integer parameter by the number of bits
/// specified by the second 64-bit integer parameter.
///
/// High-order bits are cleared.
@@ -1057,7 +1057,7 @@ _mm_srl_si64(__m64 __m, __m64 __count)
return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
}
/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
/// Right-shifts the first parameter, which is a 64-bit integer, by the
/// number of bits specified by the second parameter, which is a 32-bit
/// integer.
///
@@ -1078,7 +1078,7 @@ _mm_srli_si64(__m64 __m, int __count)
return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
}
/// \brief Performs a bitwise AND of two 64-bit integer vectors.
/// Performs a bitwise AND of two 64-bit integer vectors.
///
/// \headerfile <x86intrin.h>
///
@@ -1096,7 +1096,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
}
/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
/// Performs a bitwise NOT of the first 64-bit integer vector, and then
/// performs a bitwise AND of the intermediate result and the second 64-bit
/// integer vector.
///
@@ -1117,7 +1117,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
}
/// \brief Performs a bitwise OR of two 64-bit integer vectors.
/// Performs a bitwise OR of two 64-bit integer vectors.
///
/// \headerfile <x86intrin.h>
///
@@ -1135,7 +1135,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
}
/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
///
/// \headerfile <x86intrin.h>
///
@@ -1153,7 +1153,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
}
/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
/// [8 x i8] to determine if the element of the first vector is equal to the
/// corresponding element of the second vector.
///
@@ -1175,7 +1175,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
/// [4 x i16] to determine if the element of the first vector is equal to the
/// corresponding element of the second vector.
///
@@ -1197,7 +1197,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
/// [2 x i32] to determine if the element of the first vector is equal to the
/// corresponding element of the second vector.
///
@@ -1219,7 +1219,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
}
/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
/// [8 x i8] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
@@ -1241,7 +1241,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
}
/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
/// [4 x i16] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
@@ -1263,7 +1263,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
}
/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
/// Compares the 32-bit integer elements of two 64-bit integer vectors of
/// [2 x i32] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
@@ -1285,20 +1285,20 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
}
/// \brief Constructs a 64-bit integer vector initialized to zero.
/// Constructs a 64-bit integer vector initialized to zero.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.
/// This intrinsic corresponds to the <c> PXOR </c> instruction.
///
/// \returns An initialized 64-bit integer vector with all elements set to zero.
static __inline__ __m64 __DEFAULT_FN_ATTRS
_mm_setzero_si64(void)
{
return (__m64){ 0LL };
return __extension__ (__m64){ 0LL };
}
/// \brief Constructs a 64-bit integer vector initialized with the specified
/// Constructs a 64-bit integer vector initialized with the specified
/// 32-bit integer values.
///
/// \headerfile <x86intrin.h>
@@ -1319,7 +1319,7 @@ _mm_set_pi32(int __i1, int __i0)
return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
}
/// \brief Constructs a 64-bit integer vector initialized with the specified
/// Constructs a 64-bit integer vector initialized with the specified
/// 16-bit integer values.
///
/// \headerfile <x86intrin.h>
@@ -1342,7 +1342,7 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
}
/// \brief Constructs a 64-bit integer vector initialized with the specified
/// Constructs a 64-bit integer vector initialized with the specified
/// 8-bit integer values.
///
/// \headerfile <x86intrin.h>
@@ -1375,13 +1375,14 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
__b4, __b5, __b6, __b7);
}
/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
/// Constructs a 64-bit integer vector of [2 x i32], with each of the
/// 32-bit integer vector elements set to the specified 32-bit integer
/// value.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.
/// This intrinsic is a utility function and does not correspond to a specific
/// instruction.
///
/// \param __i
/// A 32-bit integer value used to initialize each vector element of the
@@ -1393,13 +1394,14 @@ _mm_set1_pi32(int __i)
return _mm_set_pi32(__i, __i);
}
/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
/// Constructs a 64-bit integer vector of [4 x i16], with each of the
/// 16-bit integer vector elements set to the specified 16-bit integer
/// value.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.
/// This intrinsic is a utility function and does not correspond to a specific
/// instruction.
///
/// \param __w
/// A 16-bit integer value used to initialize each vector element of the
@@ -1411,13 +1413,13 @@ _mm_set1_pi16(short __w)
return _mm_set_pi16(__w, __w, __w, __w);
}
/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
/// Constructs a 64-bit integer vector of [8 x i8], with each of the
/// 8-bit integer vector elements set to the specified 8-bit integer value.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPUNPCKLBW + VPSHUFLW / PUNPCKLBW +
/// PSHUFLW </c> instruction.
/// This intrinsic is a utility function and does not correspond to a specific
/// instruction.
///
/// \param __b
/// An 8-bit integer value used to initialize each vector element of the
@@ -1429,7 +1431,7 @@ _mm_set1_pi8(char __b)
return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
}
/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
/// Constructs a 64-bit integer vector, initialized in reverse order with
/// the specified 32-bit integer values.
///
/// \headerfile <x86intrin.h>
@@ -1450,7 +1452,7 @@ _mm_setr_pi32(int __i0, int __i1)
return _mm_set_pi32(__i1, __i0);
}
/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
/// Constructs a 64-bit integer vector, initialized in reverse order with
/// the specified 16-bit integer values.
///
/// \headerfile <x86intrin.h>
@@ -1473,7 +1475,7 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
return _mm_set_pi16(__w3, __w2, __w1, __w0);
}
/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
/// Constructs a 64-bit integer vector, initialized in reverse order with
/// the specified 8-bit integer values.
///
/// \headerfile <x86intrin.h>

View File

@@ -38,6 +38,7 @@ module _Builtin_intrinsics [system] [extern_c] {
explicit module neon {
requires neon
header "arm_neon.h"
header "arm_fp16.h"
export *
}
}
@@ -62,6 +63,17 @@ module _Builtin_intrinsics [system] [extern_c] {
textual header "fma4intrin.h"
textual header "mwaitxintrin.h"
textual header "clzerointrin.h"
textual header "wbnoinvdintrin.h"
textual header "cldemoteintrin.h"
textual header "waitpkgintrin.h"
textual header "movdirintrin.h"
textual header "pconfigintrin.h"
textual header "sgxintrin.h"
textual header "ptwriteintrin.h"
textual header "invpcidintrin.h"
textual header "__wmmintrin_aes.h"
textual header "__wmmintrin_pclmul.h"
explicit module mm_malloc {
requires !freestanding
@@ -128,14 +140,6 @@ module _Builtin_intrinsics [system] [extern_c] {
export aes
export pclmul
}
explicit module aes {
header "__wmmintrin_aes.h"
}
explicit module pclmul {
header "__wmmintrin_pclmul.h"
}
}
explicit module systemz {

63
c_headers/movdirintrin.h Normal file
View File

@@ -0,0 +1,63 @@
/*===------------------------- movdirintrin.h ------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <movdirintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _MOVDIRINTRIN_H
#define _MOVDIRINTRIN_H
/* Move doubleword as direct store */
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("movdiri")))
_directstoreu_u32 (void *__dst, unsigned int __value)
{
__builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);
}
#ifdef __x86_64__
/* Move quadword as direct store */
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("movdiri")))
_directstoreu_u64 (void *__dst, unsigned long __value)
{
__builtin_ia32_directstore_u64((unsigned long *)__dst, __value);
}
#endif /* __x86_64__ */
/*
* movdir64b - Move 64 bytes as direct store.
* The destination must be 64 byte aligned, and the store is atomic.
* The source address has no alignment requirement, and the load from
* the source address is not atomic.
*/
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("movdir64b")))
_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)
{
__builtin_ia32_movdir64b(__dst, __src);
}
#endif /* _MOVDIRINTRIN_H */

View File

@@ -25,8 +25,8 @@
#error "Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _MWAITXINTRIN_H
#define _MWAITXINTRIN_H
#ifndef __MWAITXINTRIN_H
#define __MWAITXINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mwaitx")))
@@ -44,4 +44,4 @@ _mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)
#undef __DEFAULT_FN_ATTRS
#endif /* _MWAITXINTRIN_H */
#endif /* __MWAITXINTRIN_H */

View File

@@ -21,10 +21,10 @@
*===-----------------------------------------------------------------------===
*/
#ifndef _NMMINTRIN_H
#define _NMMINTRIN_H
#ifndef __NMMINTRIN_H
#define __NMMINTRIN_H
/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
just include it now then. */
#include <smmintrin.h>
#endif /* _NMMINTRIN_H */
#endif /* __NMMINTRIN_H */

View File

@@ -11381,6 +11381,8 @@ half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);
* For each component of a vector type,
* result[i] = if MSB of c[i] is set ? b[i] : a[i].
* For a scalar type, result = c ? b : a.
* b and a must have the same type.
* c must have the same number of elements and bits as a.
*/
char __ovld __cnfn select(char a, char b, char c);
uchar __ovld __cnfn select(uchar a, uchar b, char c);
@@ -11394,60 +11396,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, char8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);
char16 __ovld __cnfn select(char16 a, char16 b, char16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);
short __ovld __cnfn select(short a, short b, char c);
ushort __ovld __cnfn select(ushort a, ushort b, char c);
short2 __ovld __cnfn select(short2 a, short2 b, char2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, char2 c);
short3 __ovld __cnfn select(short3 a, short3 b, char3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, char3 c);
short4 __ovld __cnfn select(short4 a, short4 b, char4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, char4 c);
short8 __ovld __cnfn select(short8 a, short8 b, char8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, char8 c);
short16 __ovld __cnfn select(short16 a, short16 b, char16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, char16 c);
int __ovld __cnfn select(int a, int b, char c);
uint __ovld __cnfn select(uint a, uint b, char c);
int2 __ovld __cnfn select(int2 a, int2 b, char2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, char2 c);
int3 __ovld __cnfn select(int3 a, int3 b, char3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, char3 c);
int4 __ovld __cnfn select(int4 a, int4 b, char4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, char4 c);
int8 __ovld __cnfn select(int8 a, int8 b, char8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, char8 c);
int16 __ovld __cnfn select(int16 a, int16 b, char16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, char16 c);
long __ovld __cnfn select(long a, long b, char c);
ulong __ovld __cnfn select(ulong a, ulong b, char c);
long2 __ovld __cnfn select(long2 a, long2 b, char2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, char2 c);
long3 __ovld __cnfn select(long3 a, long3 b, char3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, char3 c);
long4 __ovld __cnfn select(long4 a, long4 b, char4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, char4 c);
long8 __ovld __cnfn select(long8 a, long8 b, char8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, char8 c);
long16 __ovld __cnfn select(long16 a, long16 b, char16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, char16 c);
float __ovld __cnfn select(float a, float b, char c);
float2 __ovld __cnfn select(float2 a, float2 b, char2 c);
float3 __ovld __cnfn select(float3 a, float3 b, char3 c);
float4 __ovld __cnfn select(float4 a, float4 b, char4 c);
float8 __ovld __cnfn select(float8 a, float8 b, char8 c);
float16 __ovld __cnfn select(float16 a, float16 b, char16 c);
char __ovld __cnfn select(char a, char b, short c);
uchar __ovld __cnfn select(uchar a, uchar b, short c);
char2 __ovld __cnfn select(char2 a, char2 b, short2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, short2 c);
char3 __ovld __cnfn select(char3 a, char3 b, short3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, short3 c);
char4 __ovld __cnfn select(char4 a, char4 b, short4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, short4 c);
char8 __ovld __cnfn select(char8 a, char8 b, short8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, short8 c);
char16 __ovld __cnfn select(char16 a, char16 b, short16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, short16 c);
short __ovld __cnfn select(short a, short b, short c);
ushort __ovld __cnfn select(ushort a, ushort b, short c);
short2 __ovld __cnfn select(short2 a, short2 b, short2 c);
@@ -11460,60 +11409,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, short8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);
short16 __ovld __cnfn select(short16 a, short16 b, short16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);
int __ovld __cnfn select(int a, int b, short c);
uint __ovld __cnfn select(uint a, uint b, short c);
int2 __ovld __cnfn select(int2 a, int2 b, short2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, short2 c);
int3 __ovld __cnfn select(int3 a, int3 b, short3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, short3 c);
int4 __ovld __cnfn select(int4 a, int4 b, short4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, short4 c);
int8 __ovld __cnfn select(int8 a, int8 b, short8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, short8 c);
int16 __ovld __cnfn select(int16 a, int16 b, short16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, short16 c);
long __ovld __cnfn select(long a, long b, short c);
ulong __ovld __cnfn select(ulong a, ulong b, short c);
long2 __ovld __cnfn select(long2 a, long2 b, short2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, short2 c);
long3 __ovld __cnfn select(long3 a, long3 b, short3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, short3 c);
long4 __ovld __cnfn select(long4 a, long4 b, short4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, short4 c);
long8 __ovld __cnfn select(long8 a, long8 b, short8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, short8 c);
long16 __ovld __cnfn select(long16 a, long16 b, short16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, short16 c);
float __ovld __cnfn select(float a, float b, short c);
float2 __ovld __cnfn select(float2 a, float2 b, short2 c);
float3 __ovld __cnfn select(float3 a, float3 b, short3 c);
float4 __ovld __cnfn select(float4 a, float4 b, short4 c);
float8 __ovld __cnfn select(float8 a, float8 b, short8 c);
float16 __ovld __cnfn select(float16 a, float16 b, short16 c);
char __ovld __cnfn select(char a, char b, int c);
uchar __ovld __cnfn select(uchar a, uchar b, int c);
char2 __ovld __cnfn select(char2 a, char2 b, int2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, int2 c);
char3 __ovld __cnfn select(char3 a, char3 b, int3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, int3 c);
char4 __ovld __cnfn select(char4 a, char4 b, int4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, int4 c);
char8 __ovld __cnfn select(char8 a, char8 b, int8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, int8 c);
char16 __ovld __cnfn select(char16 a, char16 b, int16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, int16 c);
short __ovld __cnfn select(short a, short b, int c);
ushort __ovld __cnfn select(ushort a, ushort b, int c);
short2 __ovld __cnfn select(short2 a, short2 b, int2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, int2 c);
short3 __ovld __cnfn select(short3 a, short3 b, int3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, int3 c);
short4 __ovld __cnfn select(short4 a, short4 b, int4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, int4 c);
short8 __ovld __cnfn select(short8 a, short8 b, int8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, int8 c);
short16 __ovld __cnfn select(short16 a, short16 b, int16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, int16 c);
int __ovld __cnfn select(int a, int b, int c);
uint __ovld __cnfn select(uint a, uint b, int c);
int2 __ovld __cnfn select(int2 a, int2 b, int2 c);
@@ -11526,60 +11422,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, int8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);
int16 __ovld __cnfn select(int16 a, int16 b, int16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);
long __ovld __cnfn select(long a, long b, int c);
ulong __ovld __cnfn select(ulong a, ulong b, int c);
long2 __ovld __cnfn select(long2 a, long2 b, int2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, int2 c);
long3 __ovld __cnfn select(long3 a, long3 b, int3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, int3 c);
long4 __ovld __cnfn select(long4 a, long4 b, int4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, int4 c);
long8 __ovld __cnfn select(long8 a, long8 b, int8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, int8 c);
long16 __ovld __cnfn select(long16 a, long16 b, int16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, int16 c);
float __ovld __cnfn select(float a, float b, int c);
float2 __ovld __cnfn select(float2 a, float2 b, int2 c);
float3 __ovld __cnfn select(float3 a, float3 b, int3 c);
float4 __ovld __cnfn select(float4 a, float4 b, int4 c);
float8 __ovld __cnfn select(float8 a, float8 b, int8 c);
float16 __ovld __cnfn select(float16 a, float16 b, int16 c);
char __ovld __cnfn select(char a, char b, long c);
uchar __ovld __cnfn select(uchar a, uchar b, long c);
char2 __ovld __cnfn select(char2 a, char2 b, long2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, long2 c);
char3 __ovld __cnfn select(char3 a, char3 b, long3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, long3 c);
char4 __ovld __cnfn select(char4 a, char4 b, long4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, long4 c);
char8 __ovld __cnfn select(char8 a, char8 b, long8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, long8 c);
char16 __ovld __cnfn select(char16 a, char16 b, long16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, long16 c);
short __ovld __cnfn select(short a, short b, long c);
ushort __ovld __cnfn select(ushort a, ushort b, long c);
short2 __ovld __cnfn select(short2 a, short2 b, long2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, long2 c);
short3 __ovld __cnfn select(short3 a, short3 b, long3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, long3 c);
short4 __ovld __cnfn select(short4 a, short4 b, long4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, long4 c);
short8 __ovld __cnfn select(short8 a, short8 b, long8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, long8 c);
short16 __ovld __cnfn select(short16 a, short16 b, long16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, long16 c);
int __ovld __cnfn select(int a, int b, long c);
uint __ovld __cnfn select(uint a, uint b, long c);
int2 __ovld __cnfn select(int2 a, int2 b, long2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, long2 c);
int3 __ovld __cnfn select(int3 a, int3 b, long3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, long3 c);
int4 __ovld __cnfn select(int4 a, int4 b, long4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, long4 c);
int8 __ovld __cnfn select(int8 a, int8 b, long8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, long8 c);
int16 __ovld __cnfn select(int16 a, int16 b, long16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, long16 c);
long __ovld __cnfn select(long a, long b, long c);
ulong __ovld __cnfn select(ulong a, ulong b, long c);
long2 __ovld __cnfn select(long2 a, long2 b, long2 c);
@@ -11592,12 +11441,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, long8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);
long16 __ovld __cnfn select(long16 a, long16 b, long16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);
float __ovld __cnfn select(float a, float b, long c);
float2 __ovld __cnfn select(float2 a, float2 b, long2 c);
float3 __ovld __cnfn select(float3 a, float3 b, long3 c);
float4 __ovld __cnfn select(float4 a, float4 b, long4 c);
float8 __ovld __cnfn select(float8 a, float8 b, long8 c);
float16 __ovld __cnfn select(float16 a, float16 b, long16 c);
char __ovld __cnfn select(char a, char b, uchar c);
uchar __ovld __cnfn select(uchar a, uchar b, uchar c);
char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);
@@ -11610,60 +11454,7 @@ char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);
char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);
short __ovld __cnfn select(short a, short b, uchar c);
ushort __ovld __cnfn select(ushort a, ushort b, uchar c);
short2 __ovld __cnfn select(short2 a, short2 b, uchar2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uchar2 c);
short3 __ovld __cnfn select(short3 a, short3 b, uchar3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uchar3 c);
short4 __ovld __cnfn select(short4 a, short4 b, uchar4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uchar4 c);
short8 __ovld __cnfn select(short8 a, short8 b, uchar8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uchar8 c);
short16 __ovld __cnfn select(short16 a, short16 b, uchar16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uchar16 c);
int __ovld __cnfn select(int a, int b, uchar c);
uint __ovld __cnfn select(uint a, uint b, uchar c);
int2 __ovld __cnfn select(int2 a, int2 b, uchar2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, uchar2 c);
int3 __ovld __cnfn select(int3 a, int3 b, uchar3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, uchar3 c);
int4 __ovld __cnfn select(int4 a, int4 b, uchar4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, uchar4 c);
int8 __ovld __cnfn select(int8 a, int8 b, uchar8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, uchar8 c);
int16 __ovld __cnfn select(int16 a, int16 b, uchar16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, uchar16 c);
long __ovld __cnfn select(long a, long b, uchar c);
ulong __ovld __cnfn select(ulong a, ulong b, uchar c);
long2 __ovld __cnfn select(long2 a, long2 b, uchar2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uchar2 c);
long3 __ovld __cnfn select(long3 a, long3 b, uchar3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uchar3 c);
long4 __ovld __cnfn select(long4 a, long4 b, uchar4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uchar4 c);
long8 __ovld __cnfn select(long8 a, long8 b, uchar8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uchar8 c);
long16 __ovld __cnfn select(long16 a, long16 b, uchar16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uchar16 c);
float __ovld __cnfn select(float a, float b, uchar c);
float2 __ovld __cnfn select(float2 a, float2 b, uchar2 c);
float3 __ovld __cnfn select(float3 a, float3 b, uchar3 c);
float4 __ovld __cnfn select(float4 a, float4 b, uchar4 c);
float8 __ovld __cnfn select(float8 a, float8 b, uchar8 c);
float16 __ovld __cnfn select(float16 a, float16 b, uchar16 c);
char __ovld __cnfn select(char a, char b, ushort c);
uchar __ovld __cnfn select(uchar a, uchar b, ushort c);
char2 __ovld __cnfn select(char2 a, char2 b, ushort2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ushort2 c);
char3 __ovld __cnfn select(char3 a, char3 b, ushort3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ushort3 c);
char4 __ovld __cnfn select(char4 a, char4 b, ushort4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ushort4 c);
char8 __ovld __cnfn select(char8 a, char8 b, ushort8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ushort8 c);
char16 __ovld __cnfn select(char16 a, char16 b, ushort16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ushort16 c);
short __ovld __cnfn select(short a, short b, ushort c);
ushort __ovld __cnfn select(ushort a, ushort b, ushort c);
short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);
@@ -11676,60 +11467,7 @@ short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);
short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);
int __ovld __cnfn select(int a, int b, ushort c);
uint __ovld __cnfn select(uint a, uint b, ushort c);
int2 __ovld __cnfn select(int2 a, int2 b, ushort2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, ushort2 c);
int3 __ovld __cnfn select(int3 a, int3 b, ushort3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, ushort3 c);
int4 __ovld __cnfn select(int4 a, int4 b, ushort4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, ushort4 c);
int8 __ovld __cnfn select(int8 a, int8 b, ushort8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, ushort8 c);
int16 __ovld __cnfn select(int16 a, int16 b, ushort16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, ushort16 c);
long __ovld __cnfn select(long a, long b, ushort c);
ulong __ovld __cnfn select(ulong a, ulong b, ushort c);
long2 __ovld __cnfn select(long2 a, long2 b, ushort2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ushort2 c);
long3 __ovld __cnfn select(long3 a, long3 b, ushort3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ushort3 c);
long4 __ovld __cnfn select(long4 a, long4 b, ushort4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ushort4 c);
long8 __ovld __cnfn select(long8 a, long8 b, ushort8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ushort8 c);
long16 __ovld __cnfn select(long16 a, long16 b, ushort16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ushort16 c);
float __ovld __cnfn select(float a, float b, ushort c);
float2 __ovld __cnfn select(float2 a, float2 b, ushort2 c);
float3 __ovld __cnfn select(float3 a, float3 b, ushort3 c);
float4 __ovld __cnfn select(float4 a, float4 b, ushort4 c);
float8 __ovld __cnfn select(float8 a, float8 b, ushort8 c);
float16 __ovld __cnfn select(float16 a, float16 b, ushort16 c);
char __ovld __cnfn select(char a, char b, uint c);
uchar __ovld __cnfn select(uchar a, uchar b, uint c);
char2 __ovld __cnfn select(char2 a, char2 b, uint2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uint2 c);
char3 __ovld __cnfn select(char3 a, char3 b, uint3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uint3 c);
char4 __ovld __cnfn select(char4 a, char4 b, uint4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uint4 c);
char8 __ovld __cnfn select(char8 a, char8 b, uint8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uint8 c);
char16 __ovld __cnfn select(char16 a, char16 b, uint16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uint16 c);
short __ovld __cnfn select(short a, short b, uint c);
ushort __ovld __cnfn select(ushort a, ushort b, uint c);
short2 __ovld __cnfn select(short2 a, short2 b, uint2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uint2 c);
short3 __ovld __cnfn select(short3 a, short3 b, uint3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uint3 c);
short4 __ovld __cnfn select(short4 a, short4 b, uint4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uint4 c);
short8 __ovld __cnfn select(short8 a, short8 b, uint8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uint8 c);
short16 __ovld __cnfn select(short16 a, short16 b, uint16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uint16 c);
int __ovld __cnfn select(int a, int b, uint c);
uint __ovld __cnfn select(uint a, uint b, uint c);
int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);
@@ -11742,60 +11480,13 @@ int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);
int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);
long __ovld __cnfn select(long a, long b, uint c);
ulong __ovld __cnfn select(ulong a, ulong b, uint c);
long2 __ovld __cnfn select(long2 a, long2 b, uint2 c);
ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uint2 c);
long3 __ovld __cnfn select(long3 a, long3 b, uint3 c);
ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uint3 c);
long4 __ovld __cnfn select(long4 a, long4 b, uint4 c);
ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uint4 c);
long8 __ovld __cnfn select(long8 a, long8 b, uint8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uint8 c);
long16 __ovld __cnfn select(long16 a, long16 b, uint16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uint16 c);
float __ovld __cnfn select(float a, float b, uint c);
float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);
float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);
float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);
float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);
float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);
char __ovld __cnfn select(char a, char b, ulong c);
uchar __ovld __cnfn select(uchar a, uchar b, ulong c);
char2 __ovld __cnfn select(char2 a, char2 b, ulong2 c);
uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ulong2 c);
char3 __ovld __cnfn select(char3 a, char3 b, ulong3 c);
uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ulong3 c);
char4 __ovld __cnfn select(char4 a, char4 b, ulong4 c);
uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ulong4 c);
char8 __ovld __cnfn select(char8 a, char8 b, ulong8 c);
uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ulong8 c);
char16 __ovld __cnfn select(char16 a, char16 b, ulong16 c);
uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ulong16 c);
short __ovld __cnfn select(short a, short b, ulong c);
ushort __ovld __cnfn select(ushort a, ushort b, ulong c);
short2 __ovld __cnfn select(short2 a, short2 b, ulong2 c);
ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ulong2 c);
short3 __ovld __cnfn select(short3 a, short3 b, ulong3 c);
ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ulong3 c);
short4 __ovld __cnfn select(short4 a, short4 b, ulong4 c);
ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ulong4 c);
short8 __ovld __cnfn select(short8 a, short8 b, ulong8 c);
ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ulong8 c);
short16 __ovld __cnfn select(short16 a, short16 b, ulong16 c);
ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ulong16 c);
int __ovld __cnfn select(int a, int b, ulong c);
uint __ovld __cnfn select(uint a, uint b, ulong c);
int2 __ovld __cnfn select(int2 a, int2 b, ulong2 c);
uint2 __ovld __cnfn select(uint2 a, uint2 b, ulong2 c);
int3 __ovld __cnfn select(int3 a, int3 b, ulong3 c);
uint3 __ovld __cnfn select(uint3 a, uint3 b, ulong3 c);
int4 __ovld __cnfn select(int4 a, int4 b, ulong4 c);
uint4 __ovld __cnfn select(uint4 a, uint4 b, ulong4 c);
int8 __ovld __cnfn select(int8 a, int8 b, ulong8 c);
uint8 __ovld __cnfn select(uint8 a, uint8 b, ulong8 c);
int16 __ovld __cnfn select(int16 a, int16 b, ulong16 c);
uint16 __ovld __cnfn select(uint16 a, uint16 b, ulong16 c);
long __ovld __cnfn select(long a, long b, ulong c);
ulong __ovld __cnfn select(ulong a, ulong b, ulong c);
long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);
@@ -11808,12 +11499,7 @@ long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);
ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);
long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);
ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);
float __ovld __cnfn select(float a, float b, ulong c);
float2 __ovld __cnfn select(float2 a, float2 b, ulong2 c);
float3 __ovld __cnfn select(float3 a, float3 b, ulong3 c);
float4 __ovld __cnfn select(float4 a, float4 b, ulong4 c);
float8 __ovld __cnfn select(float8 a, float8 b, ulong8 c);
float16 __ovld __cnfn select(float16 a, float16 b, ulong16 c);
#ifdef cl_khr_fp64
double __ovld __cnfn select(double a, double b, long c);
double2 __ovld __cnfn select(double2 a, double2 b, long2 c);
@@ -11854,7 +11540,7 @@ half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);
*
* vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).
*
* The address computed as (p + (offset * n)) must be
* The address computed as (p + (offset * n)) must be
* 8-bit aligned if gentype is char, uchar;
* 16-bit aligned if gentype is short, ushort, half;
* 32-bit aligned if gentype is int, uint, float;
@@ -13141,13 +12827,14 @@ void __ovld __conv barrier(cl_mem_fence_flags flags);
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
typedef enum memory_scope
{
memory_scope_work_item,
memory_scope_work_group,
memory_scope_device,
memory_scope_all_svm_devices,
memory_scope_sub_group
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
@@ -13175,7 +12862,7 @@ void __ovld mem_fence(cl_mem_fence_flags flags);
* Read memory barrier that orders only
* loads.
* The flags argument specifies the memory
* address space and can be set to to a
* address space and can be set to a
* combination of the following literal
* values:
* CLK_LOCAL_MEM_FENCE
@@ -13187,7 +12874,7 @@ void __ovld read_mem_fence(cl_mem_fence_flags flags);
* Write memory barrier that orders only
* stores.
* The flags argument specifies the memory
* address space and can be set to to a
* address space and can be set to a
* combination of the following literal
* values:
* CLK_LOCAL_MEM_FENCE
@@ -13201,7 +12888,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags);
cl_mem_fence_flags __ovld get_fence(const void *ptr);
cl_mem_fence_flags __ovld get_fence(void *ptr);
/**
/**
* Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions
* and checked in Sema since they should be declared as
* addr gentype* to_addr (gentype*);
@@ -13952,11 +13639,11 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
// enum values aligned with what clang uses in EmitAtomicExpr()
typedef enum memory_order
{
memory_order_relaxed,
memory_order_acquire,
memory_order_release,
memory_order_acq_rel,
memory_order_seq_cst
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
@@ -14086,7 +13773,7 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera
// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.
// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);
uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
@@ -14884,7 +14571,7 @@ int printf(__constant const char* st, ...);
* only. The filter_mode specified in sampler
* must be set to CLK_FILTER_NEAREST; otherwise
* the values returned are undefined.
* The read_image{f|i|ui} calls that take
* integer coordinates must use a sampler with
* normalized coordinates set to
@@ -15734,8 +15421,8 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_dept
#define CLK_DEPTH_STENCIL 0x10BE
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
#define CLK_sRGB 0x10BF
#define CLK_sRGBA 0x10C1
#define CLK_sRGBx 0x10C0
#define CLK_sRGBA 0x10C1
#define CLK_sBGRA 0x10C2
#define CLK_ABGR 0x10C3
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
@@ -16199,6 +15886,313 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_subgroups cl_intel_subgroups
#if defined(cl_intel_subgroups)
// Intel-Specific Sub Group Functions
float __ovld __conv intel_sub_group_shuffle( float x, uint c );
float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );
float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );
float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );
float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );
float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );
int __ovld __conv intel_sub_group_shuffle( int x, uint c );
int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );
int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );
int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );
int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );
int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );
uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );
uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );
uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );
uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );
uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );
uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );
long __ovld __conv intel_sub_group_shuffle( long x, uint c );
ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );
float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );
float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );
float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );
float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );
float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );
float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );
int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );
int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );
int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );
int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );
int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );
int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );
uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );
uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );
uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );
uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );
uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );
uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );
long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );
ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );
float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );
float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );
float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );
float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );
float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );
float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );
int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );
int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );
int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );
int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );
int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );
int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );
uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );
uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );
uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );
uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );
uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );
uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );
long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );
ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );
float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );
float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );
float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );
float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );
float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );
float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );
int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );
int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );
int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );
int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );
int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );
int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );
uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );
uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );
uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );
uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );
uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );
uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read( const __global uint* p );
uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );
uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );
uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );
void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );
void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );
#ifdef cl_khr_fp16
half __ovld __conv intel_sub_group_shuffle( half x, uint c );
half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );
half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );
half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );
#endif
#if defined(cl_khr_fp64)
double __ovld __conv intel_sub_group_shuffle( double x, uint c );
double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );
double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );
double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );
#endif
#endif //cl_intel_subgroups
#if defined(cl_intel_subgroups_short)
short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );
short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );
short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );
short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );
short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );
ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );
ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );
ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );
ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );
ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );
short __ovld __conv intel_sub_group_shuffle( short x, uint c );
short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );
short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );
short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );
short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );
short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);
ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );
short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );
short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );
short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );
short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );
short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );
short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );
ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );
short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );
short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );
short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );
short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );
short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );
short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );
ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );
short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );
short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );
short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );
short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );
short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );
short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );
ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );
ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );
ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );
ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );
ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );
ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );
short __ovld __conv intel_sub_group_reduce_add( short x );
ushort __ovld __conv intel_sub_group_reduce_add( ushort x );
short __ovld __conv intel_sub_group_reduce_min( short x );
ushort __ovld __conv intel_sub_group_reduce_min( ushort x );
short __ovld __conv intel_sub_group_reduce_max( short x );
ushort __ovld __conv intel_sub_group_reduce_max( ushort x );
short __ovld __conv intel_sub_group_scan_exclusive_add( short x );
ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );
short __ovld __conv intel_sub_group_scan_exclusive_min( short x );
ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );
short __ovld __conv intel_sub_group_scan_exclusive_max( short x );
ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );
short __ovld __conv intel_sub_group_scan_inclusive_add( short x );
ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );
short __ovld __conv intel_sub_group_scan_inclusive_min( short x );
ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );
short __ovld __conv intel_sub_group_scan_inclusive_max( short x );
ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );
uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );
ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);
void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);
void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );
void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );
void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );
#endif // cl_intel_subgroups_short
#ifdef cl_amd_media_ops
uint __ovld amd_bitalign(uint a, uint b, uint c);
uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);

50
c_headers/pconfigintrin.h Normal file
View File

@@ -0,0 +1,50 @@
/*===---- pconfigintrin.h - X86 platform configuration ---------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <pconfigintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __PCONFIGINTRIN_H
#define __PCONFIGINTRIN_H
#define __PCONFIG_KEY_PROGRAM 0x00000001
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("pconfig")))
static __inline unsigned int __DEFAULT_FN_ATTRS
_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
{
unsigned int __result;
__asm__ ("pconfig"
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
: "cc");
return __result;
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@@ -1,4 +1,4 @@
/*===------------- pkuintrin.h - PKU intrinsics ------------------===
/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -40,7 +40,7 @@ _rdpkru_u32(void)
static __inline__ void __DEFAULT_FN_ATTRS
_wrpkru(unsigned int __val)
{
return __builtin_ia32_wrpkru(__val);
__builtin_ia32_wrpkru(__val);
}
#undef __DEFAULT_FN_ATTRS

View File

@@ -28,9 +28,9 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sse3")))
__attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
/// \brief Loads data from an unaligned memory location to elements in a 128-bit
/// Loads data from an unaligned memory location to elements in a 128-bit
/// vector.
///
/// If the address of the data is not 16-byte aligned, the instruction may
@@ -50,7 +50,7 @@ _mm_lddqu_si128(__m128i const *__p)
return (__m128i)__builtin_ia32_lddqu((char const *)__p);
}
/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
/// Adds the even-indexed values and subtracts the odd-indexed values of
/// two 128-bit vectors of [4 x float].
///
/// \headerfile <x86intrin.h>
@@ -69,7 +69,7 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
}
/// \brief Horizontally adds the adjacent pairs of values contained in two
/// Horizontally adds the adjacent pairs of values contained in two
/// 128-bit vectors of [4 x float].
///
/// \headerfile <x86intrin.h>
@@ -92,7 +92,7 @@ _mm_hadd_ps(__m128 __a, __m128 __b)
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in two
/// Horizontally subtracts the adjacent pairs of values contained in two
/// 128-bit vectors of [4 x float].
///
/// \headerfile <x86intrin.h>
@@ -115,8 +115,8 @@ _mm_hsub_ps(__m128 __a, __m128 __b)
return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
}
/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
/// vector of [4 x float] to float values stored in a 128-bit vector of
/// Moves and duplicates odd-indexed values from a 128-bit vector
/// of [4 x float] to float values stored in a 128-bit vector of
/// [4 x float].
///
/// \headerfile <x86intrin.h>
@@ -137,7 +137,7 @@ _mm_movehdup_ps(__m128 __a)
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
}
/// \brief Duplicates low-order (even-indexed) values from a 128-bit vector of
/// Duplicates even-indexed values from a 128-bit vector of
/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
///
/// \headerfile <x86intrin.h>
@@ -158,7 +158,7 @@ _mm_moveldup_ps(__m128 __a)
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
}
/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
/// Adds the even-indexed values and subtracts the odd-indexed values of
/// two 128-bit vectors of [2 x double].
///
/// \headerfile <x86intrin.h>
@@ -177,7 +177,7 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
}
/// \brief Horizontally adds the pairs of values contained in two 128-bit
/// Horizontally adds the pairs of values contained in two 128-bit
/// vectors of [2 x double].
///
/// \headerfile <x86intrin.h>
@@ -200,7 +200,7 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
}
/// \brief Horizontally subtracts the pairs of values contained in two 128-bit
/// Horizontally subtracts the pairs of values contained in two 128-bit
/// vectors of [2 x double].
///
/// \headerfile <x86intrin.h>
@@ -223,13 +223,13 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
}
/// \brief Moves and duplicates one double-precision value to double-precision
/// Moves and duplicates one double-precision value to double-precision
/// values stored in a 128-bit vector of [2 x double].
///
/// \headerfile <x86intrin.h>
///
/// \code
/// __m128d _mm_loaddup_pd(double const * dp);
/// __m128d _mm_loaddup_pd(double const *dp);
/// \endcode
///
/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
@@ -240,7 +240,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
/// duplicated values.
#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
/// \brief Moves and duplicates the double-precision value in the lower bits of
/// Moves and duplicates the double-precision value in the lower bits of
/// a 128-bit vector of [2 x double] to double-precision values stored in a
/// 128-bit vector of [2 x double].
///
@@ -259,7 +259,7 @@ _mm_movedup_pd(__m128d __a)
return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
}
/// \brief Establishes a linear address memory range to be monitored and puts
/// Establishes a linear address memory range to be monitored and puts
/// the processor in the monitor event pending state. Data stored in the
/// monitored address range causes the processor to exit the pending state.
///
@@ -280,7 +280,7 @@ _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
__builtin_ia32_monitor((void *)__p, __extensions, __hints);
}
/// \brief Used with the MONITOR instruction to wait while the processor is in
/// Used with the MONITOR instruction to wait while the processor is in
/// the monitor event pending state. Data stored in the monitored address
/// range causes the processor to exit the pending state.
///

View File

@@ -21,13 +21,13 @@
*===-----------------------------------------------------------------------===
*/
#ifndef _POPCNTINTRIN_H
#define _POPCNTINTRIN_H
#ifndef __POPCNTINTRIN_H
#define __POPCNTINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
/// \brief Counts the number of bits in the source operand having a value of 1.
/// Counts the number of bits in the source operand having a value of 1.
///
/// \headerfile <x86intrin.h>
///
@@ -43,7 +43,7 @@ _mm_popcnt_u32(unsigned int __A)
return __builtin_popcount(__A);
}
/// \brief Counts the number of bits in the source operand having a value of 1.
/// Counts the number of bits in the source operand having a value of 1.
///
/// \headerfile <x86intrin.h>
///
@@ -60,7 +60,7 @@ _popcnt32(int __A)
}
#ifdef __x86_64__
/// \brief Counts the number of bits in the source operand having a value of 1.
/// Counts the number of bits in the source operand having a value of 1.
///
/// \headerfile <x86intrin.h>
///
@@ -76,7 +76,7 @@ _mm_popcnt_u64(unsigned long long __A)
return __builtin_popcountll(__A);
}
/// \brief Counts the number of bits in the source operand having a value of 1.
/// Counts the number of bits in the source operand having a value of 1.
///
/// \headerfile <x86intrin.h>
///
@@ -95,4 +95,4 @@ _popcnt64(long long __A)
#undef __DEFAULT_FN_ATTRS
#endif /* _POPCNTINTRIN_H */
#endif /* __POPCNTINTRIN_H */

View File

@@ -28,8 +28,7 @@
#ifndef __PRFCHWINTRIN_H
#define __PRFCHWINTRIN_H
#if defined(__PRFCHW__) || defined(__3dNOW__)
/// \brief Loads a memory sequence containing the specified memory address into
/// Loads a memory sequence containing the specified memory address into
/// all data cache levels. The cache-coherency state is set to exclusive.
/// Data can be read from and written to the cache line without additional
/// delay.
@@ -46,7 +45,7 @@ _m_prefetch(void *__P)
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
}
/// \brief Loads a memory sequence containing the specified memory address into
/// Loads a memory sequence containing the specified memory address into
/// the L1 data cache and sets the cache-coherency to modified. This
/// provides a hint to the processor that the cache line will be modified.
/// It is intended for use when the cache line will be written to shortly
@@ -66,6 +65,5 @@ _m_prefetchw(void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
#endif
#endif /* __PRFCHWINTRIN_H */

51
c_headers/ptwriteintrin.h Normal file
View File

@@ -0,0 +1,51 @@
/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __PTWRITEINTRIN_H
#define __PTWRITEINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("ptwrite")))
static __inline__ void __DEFAULT_FN_ATTRS
_ptwrite32(unsigned int __value) {
__builtin_ia32_ptwrite32(__value);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
_ptwrite64(unsigned long long __value) {
__builtin_ia32_ptwrite64(__value);
}
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
#endif /* __PTWRITEINTRIN_H */

View File

@@ -21,7 +21,7 @@
*===-----------------------------------------------------------------------===
*/
#ifndef __X86INTRIN_H
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
#endif

70
c_headers/sgxintrin.h Normal file
View File

@@ -0,0 +1,70 @@
/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <sgxintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __SGXINTRIN_H
#define __SGXINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("sgx")))
static __inline unsigned int __DEFAULT_FN_ATTRS
_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
{
unsigned int __result;
__asm__ ("enclu"
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
: "cc");
return __result;
}
static __inline unsigned int __DEFAULT_FN_ATTRS
_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
{
unsigned int __result;
__asm__ ("encls"
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
: "cc");
return __result;
}
static __inline unsigned int __DEFAULT_FN_ATTRS
_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])
{
unsigned int __result;
__asm__ ("enclv"
: "=a" (__result), "=b" (__d[0]), "=c" (__d[1]), "=d" (__d[2])
: "a" (__leaf), "b" (__d[0]), "c" (__d[1]), "d" (__d[2])
: "cc");
return __result;
}
#undef __DEFAULT_FN_ATTRS
#endif

View File

@@ -29,10 +29,10 @@
#define __SHAINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128)))
#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); })
#define _mm_sha1rnds4_epu32(V1, V2, M) \
__builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)

View File

@@ -21,13 +21,13 @@
*===-----------------------------------------------------------------------===
*/
#ifndef _SMMINTRIN_H
#define _SMMINTRIN_H
#ifndef __SMMINTRIN_H
#define __SMMINTRIN_H
#include <tmmintrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128)))
/* SSE4 Rounding macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
@@ -46,7 +46,7 @@
#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
/// \brief Rounds up each element of the 128-bit vector of [4 x float] to an
/// Rounds up each element of the 128-bit vector of [4 x float] to an
/// integer and returns the rounded values in a 128-bit vector of
/// [4 x float].
///
@@ -63,7 +63,7 @@
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)
/// \brief Rounds up each element of the 128-bit vector of [2 x double] to an
/// Rounds up each element of the 128-bit vector of [2 x double] to an
/// integer and returns the rounded values in a 128-bit vector of
/// [2 x double].
///
@@ -80,7 +80,7 @@
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)
/// \brief Copies three upper elements of the first 128-bit vector operand to
/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
/// [4 x float]. Rounds up the lowest element of the second 128-bit vector
/// operand to an integer and copies it to the lowest element of the 128-bit
@@ -105,7 +105,7 @@
/// values.
#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
/// \brief Copies the upper element of the first 128-bit vector operand to the
/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds up the lower element of the second 128-bit vector operand to an
/// integer and copies it to the lower element of the 128-bit result vector
@@ -130,7 +130,7 @@
/// values.
#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
/// \brief Rounds down each element of the 128-bit vector of [4 x float] to an
/// Rounds down each element of the 128-bit vector of [4 x float] to an
/// an integer and returns the rounded values in a 128-bit vector of
/// [4 x float].
///
@@ -147,7 +147,7 @@
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)
/// \brief Rounds down each element of the 128-bit vector of [2 x double] to an
/// Rounds down each element of the 128-bit vector of [2 x double] to an
/// integer and returns the rounded values in a 128-bit vector of
/// [2 x double].
///
@@ -164,7 +164,7 @@
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)
/// \brief Copies three upper elements of the first 128-bit vector operand to
/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
/// [4 x float]. Rounds down the lowest element of the second 128-bit vector
/// operand to an integer and copies it to the lowest element of the 128-bit
@@ -189,7 +189,7 @@
/// values.
#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
/// \brief Copies the upper element of the first 128-bit vector operand to the
/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds down the lower element of the second 128-bit vector operand to an
/// integer and copies it to the lower element of the 128-bit result vector
@@ -214,7 +214,7 @@
/// values.
#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
/// \brief Rounds each element of the 128-bit vector of [4 x float] to an
/// Rounds each element of the 128-bit vector of [4 x float] to an
/// integer value according to the rounding control specified by the second
/// argument and returns the rounded values in a 128-bit vector of
/// [4 x float].
@@ -244,10 +244,10 @@
/// 10: Upward (toward positive infinity) \n
/// 11: Truncated
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_round_ps(X, M) __extension__ ({ \
(__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
#define _mm_round_ps(X, M) \
(__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
/// \brief Copies three upper elements of the first 128-bit vector operand to
/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
/// [4 x float]. Rounds the lowest element of the second 128-bit vector
/// operand to an integer value according to the rounding control specified
@@ -285,11 +285,11 @@
/// 11: Truncated
/// \returns A 128-bit vector of [4 x float] containing the copied and rounded
/// values.
#define _mm_round_ss(X, Y, M) __extension__ ({ \
#define _mm_round_ss(X, Y, M) \
(__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (M)); })
(__v4sf)(__m128)(Y), (M))
/// \brief Rounds each element of the 128-bit vector of [2 x double] to an
/// Rounds each element of the 128-bit vector of [2 x double] to an
/// integer value according to the rounding control specified by the second
/// argument and returns the rounded values in a 128-bit vector of
/// [2 x double].
@@ -319,10 +319,10 @@
/// 10: Upward (toward positive infinity) \n
/// 11: Truncated
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_round_pd(X, M) __extension__ ({ \
(__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
#define _mm_round_pd(X, M) \
(__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))
/// \brief Copies the upper element of the first 128-bit vector operand to the
/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
/// Rounds the lower element of the second 128-bit vector operand to an
/// integer value according to the rounding control specified by the third
@@ -360,12 +360,12 @@
/// 11: Truncated
/// \returns A 128-bit vector of [2 x double] containing the copied and rounded
/// values.
#define _mm_round_sd(X, Y, M) __extension__ ({ \
#define _mm_round_sd(X, Y, M) \
(__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (M)); })
(__v2df)(__m128d)(Y), (M))
/* SSE4 Packed Blending Intrinsics. */
/// \brief Returns a 128-bit vector of [2 x double] where the values are
/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
/// third operand, the control mask.
///
@@ -389,13 +389,11 @@
/// When a mask bit is 1, the corresponding 64-bit element in operand \a V2
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_blend_pd(V1, V2, M) __extension__ ({ \
(__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
(__v2df)(__m128d)(V2), \
(((M) & 0x01) ? 2 : 0), \
(((M) & 0x02) ? 3 : 1)); })
#define _mm_blend_pd(V1, V2, M) \
(__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
(__v2df)(__m128d)(V2), (int)(M))
/// \brief Returns a 128-bit vector of [4 x float] where the values are selected
/// Returns a 128-bit vector of [4 x float] where the values are selected
/// from either the first or second operand as specified by the third
/// operand, the control mask.
///
@@ -419,14 +417,11 @@
/// When a mask bit is 1, the corresponding 32-bit element in operand \a V2
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_blend_ps(V1, V2, M) __extension__ ({ \
(__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
(((M) & 0x01) ? 4 : 0), \
(((M) & 0x02) ? 5 : 1), \
(((M) & 0x04) ? 6 : 2), \
(((M) & 0x08) ? 7 : 3)); })
#define _mm_blend_ps(V1, V2, M) \
(__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
(__v4sf)(__m128)(V2), (int)(M))
/// \brief Returns a 128-bit vector of [2 x double] where the values are
/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
/// third operand, the control mask.
///
@@ -453,7 +448,7 @@ _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
(__v2df)__M);
}
/// \brief Returns a 128-bit vector of [4 x float] where the values are
/// Returns a 128-bit vector of [4 x float] where the values are
/// selected from either the first or second operand as specified by the
/// third operand, the control mask.
///
@@ -480,7 +475,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
(__v4sf)__M);
}
/// \brief Returns a 128-bit vector of [16 x i8] where the values are selected
/// Returns a 128-bit vector of [16 x i8] where the values are selected
/// from either of the first or second operand as specified by the third
/// operand, the control mask.
///
@@ -493,7 +488,7 @@ _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
/// \param __V2
/// A 128-bit vector of [16 x i8].
/// \param __M
/// A 128-bit vector operand, with mask bits 127, 119, 111 ... 7 specifying
/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying
/// how the values are to be copied. The position of the mask bit corresponds
/// to the most significant bit of a copied value. When a mask bit is 0, the
/// corresponding 8-bit element in operand \a __V1 is copied to the same
@@ -507,7 +502,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
(__v16qi)__M);
}
/// \brief Returns a 128-bit vector of [8 x i16] where the values are selected
/// Returns a 128-bit vector of [8 x i16] where the values are selected
/// from either of the first or second operand as specified by the third
/// operand, the control mask.
///
@@ -531,20 +526,12 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
/// When a mask bit is 1, the corresponding 16-bit element in operand \a V2
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [8 x i16] containing the copied values.
#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
(__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
(__v8hi)(__m128i)(V2), \
(((M) & 0x01) ? 8 : 0), \
(((M) & 0x02) ? 9 : 1), \
(((M) & 0x04) ? 10 : 2), \
(((M) & 0x08) ? 11 : 3), \
(((M) & 0x10) ? 12 : 4), \
(((M) & 0x20) ? 13 : 5), \
(((M) & 0x40) ? 14 : 6), \
(((M) & 0x80) ? 15 : 7)); })
#define _mm_blend_epi16(V1, V2, M) \
(__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
(__v8hi)(__m128i)(V2), (int)(M))
/* SSE4 Dword Multiply Instructions. */
/// \brief Multiples corresponding elements of two 128-bit vectors of [4 x i32]
/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
/// and returns the lower 32 bits of the each product in a 128-bit vector of
/// [4 x i32].
///
@@ -563,7 +550,7 @@ _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
return (__m128i) ((__v4su)__V1 * (__v4su)__V2);
}
/// \brief Multiplies corresponding even-indexed elements of two 128-bit
/// Multiplies corresponding even-indexed elements of two 128-bit
/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]
/// containing the products.
///
@@ -584,7 +571,7 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
}
/* SSE4 Floating Point Dot Product Instructions. */
/// \brief Computes the dot product of the two 128-bit vectors of [4 x float]
/// Computes the dot product of the two 128-bit vectors of [4 x float]
/// and returns it in the elements of the 128-bit result vector of
/// [4 x float].
///
@@ -616,11 +603,11 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// each [4 x float] subvector. If a bit is set, the dot product is returned
/// in the corresponding element; otherwise that element is set to zero.
/// \returns A 128-bit vector of [4 x float] containing the dot product.
#define _mm_dp_ps(X, Y, M) __extension__ ({ \
#define _mm_dp_ps(X, Y, M) \
(__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (M)); })
(__v4sf)(__m128)(Y), (M))
/// \brief Computes the dot product of the two 128-bit vectors of [2 x double]
/// Computes the dot product of the two 128-bit vectors of [2 x double]
/// and returns it in the elements of the 128-bit result vector of
/// [2 x double].
///
@@ -648,15 +635,15 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// input vectors are used as an input for dot product; otherwise that input
/// is treated as zero. Bits [1:0] determine which elements of the result
/// will receive a copy of the final dot product, with bit [0] corresponding
/// to the lowest element and bit [3] corresponding to the highest element of
/// to the lowest element and bit [1] corresponding to the highest element of
/// each [2 x double] vector. If a bit is set, the dot product is returned in
/// the corresponding element; otherwise that element is set to zero.
#define _mm_dp_pd(X, Y, M) __extension__ ({\
#define _mm_dp_pd(X, Y, M) \
(__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (M)); })
(__v2df)(__m128d)(Y), (M))
/* SSE4 Streaming Load Hint Instruction. */
/// \brief Loads integer values from a 128-bit aligned memory location to a
/// Loads integer values from a 128-bit aligned memory location to a
/// 128-bit integer vector.
///
/// \headerfile <x86intrin.h>
@@ -675,7 +662,7 @@ _mm_stream_load_si128 (__m128i const *__V)
}
/* SSE4 Packed Integer Min/Max Instructions. */
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser
/// of the two values.
///
@@ -694,7 +681,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the
/// greater value of the two.
///
@@ -713,7 +700,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser
/// value of the two.
///
@@ -732,7 +719,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the
/// greater value of the two.
///
@@ -751,7 +738,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser
/// value of the two.
///
@@ -770,7 +757,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the
/// greater value of the two.
///
@@ -789,7 +776,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser
/// value of the two.
///
@@ -808,7 +795,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2)
return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
}
/// \brief Compares the corresponding elements of two 128-bit vectors of
/// Compares the corresponding elements of two 128-bit vectors of
/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the
/// greater value of the two.
///
@@ -828,7 +815,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
}
/* SSE4 Insertion and Extraction from XMM Register Instructions. */
/// \brief Takes the first argument \a X and inserts an element from the second
/// Takes the first argument \a X and inserts an element from the second
/// argument \a Y as selected by the third argument \a N. That result then
/// has elements zeroed out also as selected by the third argument \a N. The
/// resulting 128-bit vector of [4 x float] is then returned.
@@ -866,11 +853,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Copies the selected bits from \a Y to result bits [127:96]. \n
/// Bits[3:0]: If any of these bits are set, the corresponding result
/// element is cleared.
/// \returns A 128-bit vector of [4 x float] containing the copied single-
/// precision floating point elements from the operands.
/// \returns A 128-bit vector of [4 x float] containing the copied
/// single-precision floating point elements from the operands.
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and
/// returns it, using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -893,15 +880,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Bits [127:96] of parameter \a X are returned.
/// \returns A 32-bit integer containing the extracted 32 bits of float data.
#define _mm_extract_ps(X, N) (__extension__ \
({ union { int __i; float __f; } __t; \
__v4sf __a = (__v4sf)(__m128)(X); \
__t.__f = __a[(N) & 3]; \
__t.__i;}))
({ union { int __i; float __f; } __t; \
__t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
__t.__i;}))
/* Miscellaneous insert and extract macros. */
/* Extract a single-precision float from X at index N into D. */
#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
(D) = __a[N]; }))
#define _MM_EXTRACT_FLOAT(D, X, N) \
{ (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
an index suitable for _mm_insert_ps. */
@@ -912,7 +898,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
_MM_MK_INSERTPS_NDX((N), 0, 0x0e))
/* Insert int into packed integer array at index. */
/// \brief Constructs a 128-bit vector of [16 x i8] by first making a copy of
/// Constructs a 128-bit vector of [16 x i8] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the lower 8 bits
/// of an integer parameter \a I into an offset specified by the immediate
/// value parameter \a N.
@@ -952,12 +938,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1110: Bits [119:112] of the result are used for insertion. \n
/// 1111: Bits [127:120] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi8(X, I, N) (__extension__ \
({ __v16qi __a = (__v16qi)(__m128i)(X); \
__a[(N) & 15] = (I); \
(__m128i)__a;}))
#define _mm_insert_epi8(X, I, N) \
(__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
(int)(I), (int)(N))
/// \brief Constructs a 128-bit vector of [4 x i32] by first making a copy of
/// Constructs a 128-bit vector of [4 x i32] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 32-bit
/// integer parameter \a I at the offset specified by the immediate value
/// parameter \a N.
@@ -985,13 +970,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 10: Bits [95:64] of the result are used for insertion. \n
/// 11: Bits [127:96] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi32(X, I, N) (__extension__ \
({ __v4si __a = (__v4si)(__m128i)(X); \
__a[(N) & 3] = (I); \
(__m128i)__a;}))
#define _mm_insert_epi32(X, I, N) \
(__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
(int)(I), (int)(N))
#ifdef __x86_64__
/// \brief Constructs a 128-bit vector of [2 x i64] by first making a copy of
/// Constructs a 128-bit vector of [2 x i64] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 64-bit
/// integer parameter \a I, using the immediate value parameter \a N as an
/// insertion location selector.
@@ -1017,16 +1001,15 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 0: Bits [63:0] of the result are used for insertion. \n
/// 1: Bits [127:64] of the result are used for insertion. \n
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi64(X, I, N) (__extension__ \
({ __v2di __a = (__v2di)(__m128i)(X); \
__a[(N) & 1] = (I); \
(__m128i)__a;}))
#define _mm_insert_epi64(X, I, N) \
(__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
(long long)(I), (int)(N))
#endif /* __x86_64__ */
/* Extract int from packed integer array at index. This returns the element
* as a zero extended value, so it is unsigned.
*/
/// \brief Extracts an 8-bit element from the 128-bit integer vector of
/// Extracts an 8-bit element from the 128-bit integer vector of
/// [16 x i8], using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -1061,11 +1044,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// \returns An unsigned integer, whose lower 8 bits are selected from the
/// 128-bit integer vector parameter and the remaining bits are assigned
/// zeros.
#define _mm_extract_epi8(X, N) (__extension__ \
({ __v16qi __a = (__v16qi)(__m128i)(X); \
(int)(unsigned char) __a[(N) & 15];}))
#define _mm_extract_epi8(X, N) \
(int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
(int)(N))
/// \brief Extracts a 32-bit element from the 128-bit integer vector of
/// Extracts a 32-bit element from the 128-bit integer vector of
/// [4 x i32], using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -1087,12 +1070,11 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Bits [127:96] of the parameter \a X are exracted.
/// \returns An integer, whose lower 32 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
#define _mm_extract_epi32(X, N) (__extension__ \
({ __v4si __a = (__v4si)(__m128i)(X); \
(int)__a[(N) & 3];}))
#define _mm_extract_epi32(X, N) \
(int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
#ifdef __x86_64__
/// \brief Extracts a 64-bit element from the 128-bit integer vector of
/// Extracts a 64-bit element from the 128-bit integer vector of
/// [2 x i64], using the immediate value parameter \a N as a selector.
///
/// \headerfile <x86intrin.h>
@@ -1111,13 +1093,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 0: Bits [63:0] are returned. \n
/// 1: Bits [127:64] are returned. \n
/// \returns A 64-bit integer.
#define _mm_extract_epi64(X, N) (__extension__ \
({ __v2di __a = (__v2di)(__m128i)(X); \
(long long)__a[(N) & 1];}))
#define _mm_extract_epi64(X, N) \
(long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
#endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// \headerfile <x86intrin.h>
@@ -1135,7 +1116,7 @@ _mm_testz_si128(__m128i __M, __m128i __V)
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
}
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
/// Tests whether the specified bits in a 128-bit integer vector are all
/// ones.
///
/// \headerfile <x86intrin.h>
@@ -1153,7 +1134,7 @@ _mm_testc_si128(__m128i __M, __m128i __V)
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
}
/// \brief Tests whether the specified bits in a 128-bit integer vector are
/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// \headerfile <x86intrin.h>
@@ -1172,7 +1153,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
}
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
/// Tests whether the specified bits in a 128-bit integer vector are all
/// ones.
///
/// \headerfile <x86intrin.h>
@@ -1189,7 +1170,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
/// otherwise.
#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
/// \brief Tests whether the specified bits in a 128-bit integer vector are
/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// \headerfile <x86intrin.h>
@@ -1208,7 +1189,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
/// FALSE otherwise.
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
/// \brief Tests whether the specified bits in a 128-bit integer vector are all
/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// \headerfile <x86intrin.h>
@@ -1227,7 +1208,7 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
/* SSE4 64-bit Packed Integer Comparisons. */
/// \brief Compares each of the corresponding 64-bit values of the 128-bit
/// Compares each of the corresponding 64-bit values of the 128-bit
/// integer vectors for equality.
///
/// \headerfile <x86intrin.h>
@@ -1246,7 +1227,7 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
}
/* SSE4 Packed Integer Sign-Extension. */
/// \brief Sign-extends each of the lower eight 8-bit integer elements of a
/// Sign-extends each of the lower eight 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
/// are unused.
@@ -1267,7 +1248,7 @@ _mm_cvtepi8_epi16(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
}
/// \brief Sign-extends each of the lower four 8-bit integer elements of a
/// Sign-extends each of the lower four 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
/// 128-bit vector of [4 x i32]. The upper twelve elements of the input
/// vector are unused.
@@ -1277,8 +1258,8 @@ _mm_cvtepi8_epi16(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign-
/// extended to 32-bit values.
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
/// sign-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi32(__m128i __V)
@@ -1288,7 +1269,7 @@ _mm_cvtepi8_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
}
/// \brief Sign-extends each of the lower two 8-bit integer elements of a
/// Sign-extends each of the lower two 8-bit integer elements of a
/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
/// vector are unused.
@@ -1298,8 +1279,8 @@ _mm_cvtepi8_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign-
/// extended to 64-bit values.
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
/// sign-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi8_epi64(__m128i __V)
@@ -1309,7 +1290,7 @@ _mm_cvtepi8_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
}
/// \brief Sign-extends each of the lower four 16-bit integer elements of a
/// Sign-extends each of the lower four 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
/// a 128-bit vector of [4 x i32]. The upper four elements of the input
/// vector are unused.
@@ -1319,8 +1300,8 @@ _mm_cvtepi8_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign-
/// extended to 32-bit values.
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
/// sign-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi16_epi32(__m128i __V)
@@ -1328,7 +1309,7 @@ _mm_cvtepi16_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
}
/// \brief Sign-extends each of the lower two 16-bit integer elements of a
/// Sign-extends each of the lower two 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper six elements of the input
/// vector are unused.
@@ -1338,8 +1319,8 @@ _mm_cvtepi16_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign-
/// extended to 64-bit values.
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
/// sign-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi16_epi64(__m128i __V)
@@ -1347,7 +1328,7 @@ _mm_cvtepi16_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
}
/// \brief Sign-extends each of the lower two 32-bit integer elements of a
/// Sign-extends each of the lower two 32-bit integer elements of a
/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
/// are unused.
@@ -1357,8 +1338,8 @@ _mm_cvtepi16_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign-
/// extended to 64-bit values.
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
/// sign-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepi32_epi64(__m128i __V)
@@ -1367,7 +1348,7 @@ _mm_cvtepi32_epi64(__m128i __V)
}
/* SSE4 Packed Integer Zero-Extension. */
/// \brief Zero-extends each of the lower eight 8-bit integer elements of a
/// Zero-extends each of the lower eight 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a
/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector
/// are unused.
@@ -1377,8 +1358,8 @@ _mm_cvtepi32_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero-
/// extended to 16-bit values.
/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
/// zero-extended to 16-bit values.
/// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu8_epi16(__m128i __V)
@@ -1386,7 +1367,7 @@ _mm_cvtepu8_epi16(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
}
/// \brief Zero-extends each of the lower four 8-bit integer elements of a
/// Zero-extends each of the lower four 8-bit integer elements of a
/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a
/// 128-bit vector of [4 x i32]. The upper twelve elements of the input
/// vector are unused.
@@ -1396,8 +1377,8 @@ _mm_cvtepu8_epi16(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero-
/// extended to 32-bit values.
/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
/// zero-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu8_epi32(__m128i __V)
@@ -1405,7 +1386,7 @@ _mm_cvtepu8_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
}
/// \brief Zero-extends each of the lower two 8-bit integer elements of a
/// Zero-extends each of the lower two 8-bit integer elements of a
/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input
/// vector are unused.
@@ -1415,8 +1396,8 @@ _mm_cvtepu8_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero-
/// extended to 64-bit values.
/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
/// zero-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu8_epi64(__m128i __V)
@@ -1424,7 +1405,7 @@ _mm_cvtepu8_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
}
/// \brief Zero-extends each of the lower four 16-bit integer elements of a
/// Zero-extends each of the lower four 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in
/// a 128-bit vector of [4 x i32]. The upper four elements of the input
/// vector are unused.
@@ -1434,8 +1415,8 @@ _mm_cvtepu8_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero-
/// extended to 32-bit values.
/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
/// zero-extended to 32-bit values.
/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu16_epi32(__m128i __V)
@@ -1443,7 +1424,7 @@ _mm_cvtepu16_epi32(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
}
/// \brief Zero-extends each of the lower two 16-bit integer elements of a
/// Zero-extends each of the lower two 16-bit integer elements of a
/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector
/// are unused.
@@ -1453,8 +1434,8 @@ _mm_cvtepu16_epi32(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero-
/// extended to 64-bit values.
/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
/// zero-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu16_epi64(__m128i __V)
@@ -1462,7 +1443,7 @@ _mm_cvtepu16_epi64(__m128i __V)
return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
}
/// \brief Zero-extends each of the lower two 32-bit integer elements of a
/// Zero-extends each of the lower two 32-bit integer elements of a
/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in
/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector
/// are unused.
@@ -1472,8 +1453,8 @@ _mm_cvtepu16_epi64(__m128i __V)
/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
///
/// \param __V
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero-
/// extended to 64-bit values.
/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
/// zero-extended to 64-bit values.
/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cvtepu32_epi64(__m128i __V)
@@ -1482,7 +1463,7 @@ _mm_cvtepu32_epi64(__m128i __V)
}
/* SSE4 Pack with Unsigned Saturation. */
/// \brief Converts 32-bit signed integers from both 128-bit integer vector
/// Converts 32-bit signed integers from both 128-bit integer vector
/// operands into 16-bit unsigned integers, and returns the packed result.
/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
/// 0x0000 are saturated to 0x0000.
@@ -1511,7 +1492,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
}
/* SSE4 Multiple Packed Sums of Absolute Difference. */
/// \brief Subtracts 8-bit unsigned integer values and computes the absolute
/// Subtracts 8-bit unsigned integer values and computes the absolute
/// values of the differences to the corresponding bits in the destination.
/// Then sums of the absolute differences are returned according to the bit
/// fields in the immediate operand.
@@ -1534,23 +1515,23 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
/// \code
/// // M2 represents bit 2 of the immediate operand
/// // M10 represents bits [1:0] of the immediate operand
/// i = M2 * 4
/// j = M10 * 4
/// i = M2 * 4;
/// j = M10 * 4;
/// for (k = 0; k < 8; k = k + 1) {
/// d0 = abs(X[i + k + 0] - Y[j + 0])
/// d1 = abs(X[i + k + 1] - Y[j + 1])
/// d2 = abs(X[i + k + 2] - Y[j + 2])
/// d3 = abs(X[i + k + 3] - Y[j + 3])
/// r[k] = d0 + d1 + d2 + d3
/// d0 = abs(X[i + k + 0] - Y[j + 0]);
/// d1 = abs(X[i + k + 1] - Y[j + 1]);
/// d2 = abs(X[i + k + 2] - Y[j + 2]);
/// d3 = abs(X[i + k + 3] - Y[j + 3]);
/// r[k] = d0 + d1 + d2 + d3;
/// }
/// \endcode
/// \returns A 128-bit integer vector containing the sums of the sets of
/// absolute differences between both operands.
#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
#define _mm_mpsadbw_epu8(X, Y, M) \
(__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
(__v16qi)(__m128i)(Y), (M)); })
(__v16qi)(__m128i)(Y), (M))
/// \brief Finds the minimum unsigned 16-bit element in the input 128-bit
/// Finds the minimum unsigned 16-bit element in the input 128-bit
/// vector of [8 x u16] and returns it and along with its index.
///
/// \headerfile <x86intrin.h>
@@ -1604,7 +1585,7 @@ _mm_minpos_epu16(__m128i __V)
#define _SIDD_UNIT_MASK 0x40
/* SSE4.2 Packed Comparison Intrinsics. */
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns a 128-bit integer vector representing the result
/// mask of the comparison.
@@ -1660,7 +1641,7 @@ _mm_minpos_epu16(__m128i __V)
(__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns an integer representing the result index of the
/// comparison.
@@ -1714,7 +1695,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns a 128-bit integer vector representing the result
/// mask of the comparison.
@@ -1775,7 +1756,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns an integer representing the result index of the
/// comparison.
@@ -1835,7 +1816,7 @@ _mm_minpos_epu16(__m128i __V)
(int)(M))
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
/// string in \a B is the maximum, otherwise, returns 0.
@@ -1885,7 +1866,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns
/// 0.
@@ -1934,7 +1915,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
///
@@ -1982,7 +1963,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a A is less than
/// the maximum, otherwise, returns 0.
@@ -2032,7 +2013,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a B is less than
/// the maximum, otherwise, returns 0.
@@ -2082,7 +2063,7 @@ _mm_minpos_epu16(__m128i __V)
(int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the
/// string in \a B is the maximum, otherwise, returns 0.
@@ -2137,7 +2118,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise,
/// returns 0.
@@ -2191,7 +2172,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns bit 0 of the resulting bit mask.
///
@@ -2244,7 +2225,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a A is less than
/// the maximum, otherwise, returns 0.
@@ -2299,7 +2280,7 @@ _mm_minpos_epu16(__m128i __V)
(__v16qi)(__m128i)(B), (int)(LB), \
(int)(M))
/// \brief Uses the immediate operand \a M to perform a comparison of string
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
/// \a A and \a B. Returns 1 if the length of the string in \a B is less than
/// the maximum, otherwise, returns 0.
@@ -2354,7 +2335,7 @@ _mm_minpos_epu16(__m128i __V)
(int)(M))
/* SSE4.2 Compare Packed Data -- Greater Than. */
/// \brief Compares each of the corresponding 64-bit values of the 128-bit
/// Compares each of the corresponding 64-bit values of the 128-bit
/// integer vectors to determine if the values in the first operand are
/// greater than those in the second operand.
///
@@ -2374,7 +2355,7 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
}
/* SSE4.2 Accumulate CRC32. */
/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned char operand.
///
/// \headerfile <x86intrin.h>
@@ -2394,7 +2375,7 @@ _mm_crc32_u8(unsigned int __C, unsigned char __D)
return __builtin_ia32_crc32qi(__C, __D);
}
/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned short operand.
///
/// \headerfile <x86intrin.h>
@@ -2414,7 +2395,7 @@ _mm_crc32_u16(unsigned int __C, unsigned short __D)
return __builtin_ia32_crc32hi(__C, __D);
}
/// \brief Adds the first unsigned integer operand to the CRC-32C checksum of
/// Adds the first unsigned integer operand to the CRC-32C checksum of
/// the second unsigned integer operand.
///
/// \headerfile <x86intrin.h>
@@ -2435,7 +2416,7 @@ _mm_crc32_u32(unsigned int __C, unsigned int __D)
}
#ifdef __x86_64__
/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the
/// Adds the unsigned integer operand to the CRC-32C checksum of the
/// unsigned 64-bit integer operand.
///
/// \headerfile <x86intrin.h>
@@ -2458,8 +2439,6 @@ _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
#undef __DEFAULT_FN_ATTRS
#ifdef __POPCNT__
#include <popcntintrin.h>
#endif
#endif /* _SMMINTRIN_H */
#endif /* __SMMINTRIN_H */

View File

@@ -46,9 +46,6 @@ typedef __builtin_va_list va_list;
#ifndef __GNUC_VA_LIST
#define __GNUC_VA_LIST 1
typedef __builtin_va_list __gnuc_va_list;
/* zig: added because glibc stdio.h was duplicately defining va_list
*/
#define _VA_LIST_DEFINED
#endif
#endif /* __STDARG_H */

View File

@@ -32,12 +32,15 @@
#define true 1
#define false 0
#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
/* Define _Bool, bool, false, true as a GNU extension. */
/* Define _Bool as a GNU extension. */
#define _Bool bool
#if __cplusplus < 201103L
/* For C++98, define bool, false, true as a GNU extension. */
#define bool bool
#define false false
#define true true
#endif
#endif
#define __bool_true_false_are_defined 1

View File

@@ -48,13 +48,7 @@
#if !__has_feature(modules)
#define _PTRDIFF_T
#endif
/* Zig: wrap in _PTRDIFF_T_DEFINED to protect against mingw defining it twice */
#if !defined(_PTRDIFF_T_DEFINED)
typedef __PTRDIFF_TYPE__ ptrdiff_t;
#define _PTRDIFF_T_DEFINED
#endif
#endif
#undef __need_ptrdiff_t
#endif /* defined(__need_ptrdiff_t) */
@@ -65,24 +59,7 @@ typedef __PTRDIFF_TYPE__ ptrdiff_t;
#if !__has_feature(modules)
#define _SIZE_T
#endif
/* Zig: added to avoid collisions with mingw */
#if !defined(_SIZE_T_DEFINED_)
#if !defined(_SIZE_T_DEFINED)
#if !defined(_BSD_SIZE_T_DEFINED_)
#if !defined(_SIZE_T_DECLARED)
typedef __SIZE_TYPE__ size_t;
#define _SIZE_T_DEFINED_
#define _SIZE_T_DEFINED
#define _BSD_SIZE_T_DEFINED_
#define _SIZE_T_DECLARED
#endif
#endif
#endif
#endif
#endif
#undef __need_size_t
#endif /*defined(__need_size_t) */
@@ -110,22 +87,7 @@ typedef __SIZE_TYPE__ rsize_t;
#define _WCHAR_T_DEFINED
#endif
#endif
/* zig added to prevent duplicate definition with mingw */
#if !defined(__INT_WCHAR_T_H)
#if !defined(_GCC_WCHAR_T)
#if !defined(_WCHAR_T_DECLARED)
#if !defined(_WCHAR_T_DEFINED)
#define __INT_WCHAR_T_H
#define _GCC_WCHAR_T
#define _WCHAR_T_DECLARED
#define _WCHAR_T_DEFINED
typedef __WCHAR_TYPE__ wchar_t;
#endif
#endif
#endif
#endif
#endif
#endif
#undef __need_wchar_t

View File

@@ -88,7 +88,7 @@
*
* To accommodate targets that are missing types that are exactly 8, 16, 32, or
* 64 bits wide, this implementation takes an approach of cascading
* redefintions, redefining __int_leastN_t to successively smaller exact-width
* redefinitions, redefining __int_leastN_t to successively smaller exact-width
* types. It is therefore important that the types are defined in order of
* descending widths.
*
@@ -461,7 +461,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
* As in the type definitions, this section takes an approach of
* successive-shrinking to determine which limits to use for the standard (8,
* 16, 32, 64) bit widths when they don't have exact representations. It is
* therefore important that the defintions be kept in order of decending
* therefore important that the definitions be kept in order of decending
* widths.
*
* Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the

View File

@@ -27,9 +27,10 @@
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
/// \brief Computes the absolute value of each of the packed 8-bit signed
/// Computes the absolute value of each of the packed 8-bit signed
/// integers in the source operand and stores the 8-bit unsigned integer
/// results in the destination.
///
@@ -41,13 +42,13 @@
/// A 64-bit vector of [8 x i8].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi8(__m64 __a)
{
return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
}
/// \brief Computes the absolute value of each of the packed 8-bit signed
/// Computes the absolute value of each of the packed 8-bit signed
/// integers in the source operand and stores the 8-bit unsigned integer
/// results in the destination.
///
@@ -65,7 +66,7 @@ _mm_abs_epi8(__m128i __a)
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
}
/// \brief Computes the absolute value of each of the packed 16-bit signed
/// Computes the absolute value of each of the packed 16-bit signed
/// integers in the source operand and stores the 16-bit unsigned integer
/// results in the destination.
///
@@ -77,13 +78,13 @@ _mm_abs_epi8(__m128i __a)
/// A 64-bit vector of [4 x i16].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi16(__m64 __a)
{
return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
}
/// \brief Computes the absolute value of each of the packed 16-bit signed
/// Computes the absolute value of each of the packed 16-bit signed
/// integers in the source operand and stores the 16-bit unsigned integer
/// results in the destination.
///
@@ -101,7 +102,7 @@ _mm_abs_epi16(__m128i __a)
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
}
/// \brief Computes the absolute value of each of the packed 32-bit signed
/// Computes the absolute value of each of the packed 32-bit signed
/// integers in the source operand and stores the 32-bit unsigned integer
/// results in the destination.
///
@@ -113,13 +114,13 @@ _mm_abs_epi16(__m128i __a)
/// A 64-bit vector of [2 x i32].
/// \returns A 64-bit integer vector containing the absolute values of the
/// elements in the operand.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi32(__m64 __a)
{
return (__m64)__builtin_ia32_pabsd((__v2si)__a);
}
/// \brief Computes the absolute value of each of the packed 32-bit signed
/// Computes the absolute value of each of the packed 32-bit signed
/// integers in the source operand and stores the 32-bit unsigned integer
/// results in the destination.
///
@@ -137,7 +138,7 @@ _mm_abs_epi32(__m128i __a)
return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
}
/// \brief Concatenates the two 128-bit integer vector operands, and
/// Concatenates the two 128-bit integer vector operands, and
/// right-shifts the result by the number of bytes specified in the immediate
/// operand.
///
@@ -157,11 +158,11 @@ _mm_abs_epi32(__m128i __a)
/// An immediate operand specifying how many bytes to right-shift the result.
/// \returns A 128-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
#define _mm_alignr_epi8(a, b, n) \
(__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
(__v16qi)(__m128i)(b), (n)); })
(__v16qi)(__m128i)(b), (n))
/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
///
/// \headerfile <x86intrin.h>
@@ -180,10 +181,10 @@ _mm_abs_epi32(__m128i __a)
/// An immediate operand specifying how many bytes to right-shift the result.
/// \returns A 64-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
(__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
#define _mm_alignr_pi8(a, b, n) \
(__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
///
/// \headerfile <x86intrin.h>
@@ -206,7 +207,7 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
}
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [4 x i32].
///
/// \headerfile <x86intrin.h>
@@ -229,7 +230,7 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
}
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16].
///
/// \headerfile <x86intrin.h>
@@ -246,13 +247,13 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
/// operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadd_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
}
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [2 x i32].
///
/// \headerfile <x86intrin.h>
@@ -269,15 +270,16 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
/// destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
/// operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadd_pi32(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
}
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
///
/// \headerfile <x86intrin.h>
///
@@ -299,9 +301,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
}
/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
///
/// \headerfile <x86intrin.h>
///
@@ -317,13 +320,13 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
/// destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// sums of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadds_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16].
///
/// \headerfile <x86intrin.h>
@@ -346,7 +349,7 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [4 x i32].
///
/// \headerfile <x86intrin.h>
@@ -369,7 +372,7 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16].
///
/// \headerfile <x86intrin.h>
@@ -386,13 +389,13 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
/// of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsub_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [2 x i32].
///
/// \headerfile <x86intrin.h>
@@ -409,16 +412,16 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
/// the destination.
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
/// of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsub_pi32(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
/// saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
@@ -440,10 +443,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
}
/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
/// saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
@@ -459,13 +462,13 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
/// the destination.
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
/// differences of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsubs_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
}
/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, adds pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
@@ -499,7 +502,7 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
}
/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, adds pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
@@ -523,13 +526,13 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_maddubs_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
}
/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits [16:1] to the destination.
///
@@ -549,7 +552,7 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
}
/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits [16:1] to the destination.
///
@@ -563,13 +566,13 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b)
/// A 64-bit vector of [4 x i16] containing one of the source operands.
/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
/// products of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
}
/// \brief Copies the 8-bit integers from a 128-bit integer vector to the
/// Copies the 8-bit integers from a 128-bit integer vector to the
/// destination or clears 8-bit values in the destination, as specified by
/// the second source operand.
///
@@ -595,7 +598,7 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
}
/// \brief Copies the 8-bit integers from a 64-bit integer vector to the
/// Copies the 8-bit integers from a 64-bit integer vector to the
/// destination or clears 8-bit values in the destination, as specified by
/// the second source operand.
///
@@ -614,13 +617,13 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
/// destination. \n
/// Bits [3:0] select the source byte to be copied.
/// \returns A 64-bit integer vector containing the copied or cleared values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_shuffle_pi8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
}
/// \brief For each 8-bit integer in the first source operand, perform one of
/// For each 8-bit integer in the first source operand, perform one of
/// the following actions as specified by the second source operand.
///
/// If the byte in the second source is negative, calculate the two's
@@ -646,7 +649,7 @@ _mm_sign_epi8(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
}
/// \brief For each 16-bit integer in the first source operand, perform one of
/// For each 16-bit integer in the first source operand, perform one of
/// the following actions as specified by the second source operand.
///
/// If the word in the second source is negative, calculate the two's
@@ -672,7 +675,7 @@ _mm_sign_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
}
/// \brief For each 32-bit integer in the first source operand, perform one of
/// For each 32-bit integer in the first source operand, perform one of
/// the following actions as specified by the second source operand.
///
/// If the doubleword in the second source is negative, calculate the two's
@@ -698,7 +701,7 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
}
/// \brief For each 8-bit integer in the first source operand, perform one of
/// For each 8-bit integer in the first source operand, perform one of
/// the following actions as specified by the second source operand.
///
/// If the byte in the second source is negative, calculate the two's
@@ -718,13 +721,13 @@ _mm_sign_epi32(__m128i __a, __m128i __b)
/// A 64-bit integer vector containing control bytes corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi8(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
}
/// \brief For each 16-bit integer in the first source operand, perform one of
/// For each 16-bit integer in the first source operand, perform one of
/// the following actions as specified by the second source operand.
///
/// If the word in the second source is negative, calculate the two's
@@ -744,13 +747,13 @@ _mm_sign_pi8(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing control words corresponding to
/// positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi16(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
}
/// \brief For each 32-bit integer in the first source operand, perform one of
/// For each 32-bit integer in the first source operand, perform one of
/// the following actions as specified by the second source operand.
///
/// If the doubleword in the second source is negative, calculate the two's
@@ -770,12 +773,13 @@ _mm_sign_pi16(__m64 __a, __m64 __b)
/// A 64-bit integer vector containing two control doublewords corresponding
/// to positions in the destination.
/// \returns A 64-bit integer vector containing the resultant values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi32(__m64 __a, __m64 __b)
{
return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_MMX
#endif /* __TMMINTRIN_H */

View File

@@ -76,7 +76,13 @@ typedef intptr_t _sleb128_t;
typedef uintptr_t _uleb128_t;
struct _Unwind_Context;
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
struct _Unwind_Control_Block;
typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
#else
struct _Unwind_Exception;
typedef struct _Unwind_Exception _Unwind_Exception;
#endif
typedef enum {
_URC_NO_REASON = 0,
#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
@@ -109,34 +115,73 @@ typedef enum {
} _Unwind_Action;
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
struct _Unwind_Exception *);
_Unwind_Exception *);
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
typedef struct _Unwind_Control_Block _Unwind_Control_Block;
typedef uint32_t _Unwind_EHT_Header;
struct _Unwind_Control_Block {
uint64_t exception_class;
void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);
/* unwinder cache (private fields for the unwinder's use) */
struct {
uint32_t reserved1; /* forced unwind stop function, 0 if not forced */
uint32_t reserved2; /* personality routine */
uint32_t reserved3; /* callsite */
uint32_t reserved4; /* forced unwind stop argument */
uint32_t reserved5;
} unwinder_cache;
/* propagation barrier cache (valid after phase 1) */
struct {
uint32_t sp;
uint32_t bitpattern[5];
} barrier_cache;
/* cleanup cache (preserved over cleanup) */
struct {
uint32_t bitpattern[4];
} cleanup_cache;
/* personality cache (for personality's benefit) */
struct {
uint32_t fnstart; /* function start address */
_Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */
uint32_t additional; /* additional data */
uint32_t reserved1;
} pr_cache;
long long int : 0; /* force alignment of next item to 8-byte boundary */
} __attribute__((__aligned__(8)));
#else
struct _Unwind_Exception {
_Unwind_Exception_Class exception_class;
_Unwind_Exception_Cleanup_Fn exception_cleanup;
#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)
_Unwind_Word private_[6];
#else
_Unwind_Word private_1;
_Unwind_Word private_2;
#endif
/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
* aligned". GCC has interpreted this to mean "use the maximum useful
* alignment for the target"; so do we. */
} __attribute__((__aligned__));
#endif
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
_Unwind_Exception_Class,
struct _Unwind_Exception *,
_Unwind_Exception *,
struct _Unwind_Context *,
void *);
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
struct _Unwind_Context *);
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,
_Unwind_Exception_Class,
_Unwind_Exception *,
struct _Unwind_Context *);
typedef _Unwind_Personality_Fn __personality_routine;
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
void *);
#if defined(__arm__) && !defined(__APPLE__)
#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))
typedef enum {
_UVRSC_CORE = 0, /* integer register */
_UVRSC_VFP = 1, /* vfp */
@@ -158,14 +203,12 @@ typedef enum {
_UVRSR_FAILED = 2
} _Unwind_VRS_Result;
#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
typedef uint32_t _Unwind_State;
#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)
#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)
#define _US_ACTION_MASK ((_Unwind_State)3)
#define _US_FORCE_UNWIND ((_Unwind_State)8)
#endif
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
_Unwind_VRS_RegClass __regclass,
@@ -224,13 +267,12 @@ _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
/* DWARF EH functions; currently not available on Darwin/ARM */
#if !defined(__APPLE__) || !defined(__arm__)
_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Stop_Fn, void *);
void _Unwind_DeleteException(struct _Unwind_Exception *);
void _Unwind_Resume(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,
void *);
void _Unwind_DeleteException(_Unwind_Exception *);
void _Unwind_Resume(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);
#endif
@@ -241,11 +283,11 @@ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,
_Unwind_Stop_Fn, void *);
void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
void _Unwind_SjLj_Resume(_Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);
void *_Unwind_FindEnclosingFunction(void *);

98
c_headers/vaesintrin.h Normal file
View File

@@ -0,0 +1,98 @@
/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __VAESINTRIN_H
#define __VAESINTRIN_H
/* Default attributes for YMM forms. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
/* Default attributes for ZMM forms. */
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesenc_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
(__v8di) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesdec_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesdec_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
(__v8di) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesenclast_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesenclast_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
(__v8di) __B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
(__v4di) __B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
(__v8di) __B);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_F
#endif

View File

@@ -0,0 +1,42 @@
/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __VPCLMULQDQINTRIN_H
#define __VPCLMULQDQINTRIN_H
#define _mm256_clmulepi64_epi128(A, B, I) \
(__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), \
(char)(I))
#define _mm512_clmulepi64_epi128(A, B, I) \
(__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
(char)(I))
#endif /* __VPCLMULQDQINTRIN_H */

56
c_headers/waitpkgintrin.h Normal file
View File

@@ -0,0 +1,56 @@
/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __WAITPKGINTRIN_H
#define __WAITPKGINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("waitpkg")))
static __inline__ void __DEFAULT_FN_ATTRS
_umonitor (void * __address)
{
__builtin_ia32_umonitor (__address);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_umwait (unsigned int __control, unsigned long long __counter)
{
return __builtin_ia32_umwait (__control,
(unsigned int)(__counter >> 32), (unsigned int)__counter);
}
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_tpause (unsigned int __control, unsigned long long __counter)
{
return __builtin_ia32_tpause (__control,
(unsigned int)(__counter >> 32), (unsigned int)__counter);
}
#undef __DEFAULT_FN_ATTRS
#endif /* __WAITPKGINTRIN_H */

View File

@@ -0,0 +1,38 @@
/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __WBNOINVDINTRIN_H
#define __WBNOINVDINTRIN_H
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("wbnoinvd")))
_wbnoinvd (void)
{
__builtin_ia32_wbnoinvd ();
}
#endif /* __WBNOINVDINTRIN_H */

View File

@@ -21,8 +21,8 @@
*===-----------------------------------------------------------------------===
*/
#ifndef _WMMINTRIN_H
#define _WMMINTRIN_H
#ifndef __WMMINTRIN_H
#define __WMMINTRIN_H
#include <emmintrin.h>
@@ -30,4 +30,4 @@
#include <__wmmintrin_pclmul.h>
#endif /* _WMMINTRIN_H */
#endif /* __WMMINTRIN_H */

View File

@@ -32,26 +32,6 @@
#include <mm3dnow.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
#include <bmiintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
#include <bmi2intrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
#include <lzcntintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
#include <popcntintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
#include <rdseedintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)
#include <prfchwintrin.h>
#endif
@@ -76,10 +56,6 @@
#include <lwpintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
#include <f16cintrin.h>
#endif
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)
#include <mwaitxintrin.h>
#endif
@@ -88,4 +64,5 @@
#include <clzerointrin.h>
#endif
#endif /* __X86INTRIN_H */

File diff suppressed because it is too large Load Diff

View File

@@ -31,7 +31,8 @@
#include <fma4intrin.h>
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
@@ -201,7 +202,7 @@ _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
@@ -237,17 +238,17 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
}
#define _mm_roti_epi8(A, N) __extension__ ({ \
(__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
#define _mm_roti_epi8(A, N) \
(__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
#define _mm_roti_epi16(A, N) __extension__ ({ \
(__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
#define _mm_roti_epi16(A, N) \
(__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
#define _mm_roti_epi32(A, N) __extension__ ({ \
(__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
#define _mm_roti_epi32(A, N) \
(__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
#define _mm_roti_epi64(A, N) __extension__ ({ \
(__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
#define _mm_roti_epi64(A, N) \
(__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A, __m128i __B)
@@ -297,37 +298,37 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
}
#define _mm_com_epu8(A, B, N) __extension__ ({ \
#define _mm_com_epu8(A, B, N) \
(__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (N)); })
(__v16qi)(__m128i)(B), (N))
#define _mm_com_epu16(A, B, N) __extension__ ({ \
#define _mm_com_epu16(A, B, N) \
(__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), (N)); })
(__v8hi)(__m128i)(B), (N))
#define _mm_com_epu32(A, B, N) __extension__ ({ \
#define _mm_com_epu32(A, B, N) \
(__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), (N)); })
(__v4si)(__m128i)(B), (N))
#define _mm_com_epu64(A, B, N) __extension__ ({ \
#define _mm_com_epu64(A, B, N) \
(__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), (N)); })
(__v2di)(__m128i)(B), (N))
#define _mm_com_epi8(A, B, N) __extension__ ({ \
#define _mm_com_epi8(A, B, N) \
(__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
(__v16qi)(__m128i)(B), (N)); })
(__v16qi)(__m128i)(B), (N))
#define _mm_com_epi16(A, B, N) __extension__ ({ \
#define _mm_com_epi16(A, B, N) \
(__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), (N)); })
(__v8hi)(__m128i)(B), (N))
#define _mm_com_epi32(A, B, N) __extension__ ({ \
#define _mm_com_epi32(A, B, N) \
(__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), (N)); })
(__v4si)(__m128i)(B), (N))
#define _mm_com_epi64(A, B, N) __extension__ ({ \
#define _mm_com_epi64(A, B, N) \
(__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), (N)); })
(__v2di)(__m128i)(B), (N))
#define _MM_PCOMCTRL_LT 0
#define _MM_PCOMCTRL_LE 1
@@ -722,24 +723,24 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
}
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
#define _mm_permute2_pd(X, Y, C, I) \
(__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
(__v2di)(__m128i)(C), (I)); })
(__v2di)(__m128i)(C), (I))
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
#define _mm256_permute2_pd(X, Y, C, I) \
(__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), \
(__v4di)(__m256i)(C), (I)); })
(__v4di)(__m256i)(C), (I))
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
#define _mm_permute2_ps(X, Y, C, I) \
(__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
(__v4si)(__m128i)(C), (I)); })
(__v4si)(__m128i)(C), (I))
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
#define _mm256_permute2_ps(X, Y, C, I) \
(__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), \
(__v8si)(__m256i)(C), (I)); })
(__v8si)(__m256i)(C), (I))
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)
@@ -765,18 +766,19 @@ _mm_frcz_pd(__m128d __A)
return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_frcz_ps(__m256 __A)
{
return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
}
static __inline__ __m256d __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_frcz_pd(__m256d __A)
{
return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
}
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS256
#endif /* __XOPINTRIN_H */

View File

@@ -1,4 +1,4 @@
/*===---- xsavecintrin.h - XSAVEC intrinsic ------------------------------------===
/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal

View File

@@ -1,4 +1,4 @@
/*===---- xsaveintrin.h - XSAVE intrinsic ------------------------------------===
/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -33,23 +33,23 @@
static __inline__ void __DEFAULT_FN_ATTRS
_xsave(void *__p, unsigned long long __m) {
return __builtin_ia32_xsave(__p, __m);
__builtin_ia32_xsave(__p, __m);
}
static __inline__ void __DEFAULT_FN_ATTRS
_xrstor(void *__p, unsigned long long __m) {
return __builtin_ia32_xrstor(__p, __m);
__builtin_ia32_xrstor(__p, __m);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
_xsave64(void *__p, unsigned long long __m) {
return __builtin_ia32_xsave64(__p, __m);
__builtin_ia32_xsave64(__p, __m);
}
static __inline__ void __DEFAULT_FN_ATTRS
_xrstor64(void *__p, unsigned long long __m) {
return __builtin_ia32_xrstor64(__p, __m);
__builtin_ia32_xrstor64(__p, __m);
}
#endif

View File

@@ -1,4 +1,4 @@
/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ------------------------------------===
/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -33,13 +33,13 @@
static __inline__ void __DEFAULT_FN_ATTRS
_xsaveopt(void *__p, unsigned long long __m) {
return __builtin_ia32_xsaveopt(__p, __m);
__builtin_ia32_xsaveopt(__p, __m);
}
#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
_xsaveopt64(void *__p, unsigned long long __m) {
return __builtin_ia32_xsaveopt64(__p, __m);
__builtin_ia32_xsaveopt64(__p, __m);
}
#endif

Some files were not shown because too many files have changed in this diff Show More