blob c2e0d9f9 (22478B) - Raw
1 //! This module provides functions for working conveniently with SIMD (Single Instruction; Multiple Data), 2 //! which may offer a potential boost in performance on some targets by performing the same operations on 3 //! multiple elements at once. 4 //! Please be aware that some functions are known to not work on MIPS. 5 6 const std = @import("std"); 7 const builtin = @import("builtin"); 8 9 pub fn suggestVectorSizeForCpu(comptime T: type, comptime cpu: std.Target.Cpu) ?usize { 10 // This is guesswork, if you have better suggestions can add it or edit the current here 11 // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it 12 const element_bit_size = @max(8, std.math.ceilPowerOfTwo(u16, @bitSizeOf(T)) catch unreachable); 13 const vector_bit_size: u16 = blk: { 14 if (cpu.arch.isX86()) { 15 if (T == bool and std.Target.x86.featureSetHas(.prefer_mask_registers)) return 64; 16 if (std.Target.x86.featureSetHas(cpu.features, .avx512f) and !std.Target.x86.featureSetHasAny(cpu.features, .{ .prefer_256_bit, .prefer_128_bit })) break :blk 512; 17 if (std.Target.x86.featureSetHasAny(cpu.features, .{ .prefer_256_bit, .avx2 }) and !std.Target.x86.featureSetHas(cpu.features, .prefer_128_bit)) break :blk 256; 18 if (std.Target.x86.featureSetHas(cpu.features, .sse)) break :blk 128; 19 if (std.Target.x86.featureSetHasAny(cpu.features, .{ .mmx, .@"3dnow" })) break :blk 64; 20 } else if (cpu.arch.isARM()) { 21 if (std.Target.arm.featureSetHas(cpu.features, .neon)) break :blk 128; 22 } else if (cpu.arch.isAARCH64()) { 23 // SVE allows up to 2048 bits in the specification, as of 2022 the most powerful machine has implemented 512-bit 24 // I think is safer to just be on 128 until is more common 25 // TODO: Check on this return when bigger values are more common 26 if (std.Target.aarch64.featureSetHas(cpu.features, .sve)) break :blk 128; 27 if (std.Target.aarch64.featureSetHas(cpu.features, .neon)) break :blk 128; 28 } else if (cpu.arch.isPPC() or cpu.arch.isPPC64()) { 29 if (std.Target.powerpc.featureSetHas(cpu.features, .altivec)) break :blk 128; 30 } else if (cpu.arch.isMIPS()) { 31 if (std.Target.mips.featureSetHas(cpu.features, .msa)) break :blk 128; 32 // TODO: Test MIPS capability to handle bigger vectors 33 // In theory MDMX and by extension mips3d have 32 registers of 64 bits which can use in parallel 34 // for multiple processing, but I don't know what's optimal here, if using 35 // the 2048 bits or using just 64 per vector or something in between 36 if (std.Target.mips.featureSetHas(cpu.features, std.Target.mips.Feature.mips3d)) break :blk 64; 37 } else if (cpu.arch.isRISCV()) { 38 // in risc-v the Vector Extension allows configurable vector sizes, but a standard size of 128 is a safe estimate 39 if (std.Target.riscv.featureSetHas(cpu.features, .v)) break :blk 128; 40 } else if (cpu.arch.isSPARC()) { 41 // TODO: Test Sparc capability to handle bigger vectors 42 // In theory Sparc have 32 registers of 64 bits which can use in parallel 43 // for multiple processing, but I don't know what's optimal here, if using 44 // the 2048 bits or using just 64 per vector or something in between 45 if (std.Target.sparc.featureSetHasAny(cpu.features, .{ .vis, .vis2, .vis3 })) break :blk 64; 46 } else if (cpu.arch.isWasm()) { 47 if (std.Target.wasm.featureSetHas(cpu.features, .simd128)) break :blk 128; 48 } 49 return null; 50 }; 51 if (vector_bit_size <= element_bit_size) return null; 52 53 return @divExact(vector_bit_size, element_bit_size); 54 } 55 56 /// Suggests a target-dependant vector size for a given type, or null if scalars are recommended. 57 /// Not yet implemented for every CPU architecture. 58 pub fn suggestVectorSize(comptime T: type) ?usize { 59 return suggestVectorSizeForCpu(T, builtin.cpu); 60 } 61 62 test "suggestVectorSizeForCpu works with signed and unsigned values" { 63 comptime var cpu = std.Target.Cpu.baseline(std.Target.Cpu.Arch.x86_64); 64 comptime cpu.features.addFeature(@enumToInt(std.Target.x86.Feature.avx512f)); 65 const signed_integer_size = suggestVectorSizeForCpu(i32, cpu).?; 66 const unsigned_integer_size = suggestVectorSizeForCpu(u32, cpu).?; 67 try std.testing.expectEqual(@as(usize, 16), unsigned_integer_size); 68 try std.testing.expectEqual(@as(usize, 16), signed_integer_size); 69 } 70 71 fn vectorLength(comptime VectorType: type) comptime_int { 72 return switch (@typeInfo(VectorType)) { 73 .Vector => |info| info.len, 74 .Array => |info| info.len, 75 else => @compileError("Invalid type " ++ @typeName(VectorType)), 76 }; 77 } 78 79 /// Returns the smallest type of unsigned ints capable of indexing any element within the given vector type. 80 pub fn VectorIndex(comptime VectorType: type) type { 81 return std.math.IntFittingRange(0, vectorLength(VectorType) - 1); 82 } 83 84 /// Returns the smallest type of unsigned ints capable of holding the length of the given vector type. 85 pub fn VectorCount(comptime VectorType: type) type { 86 return std.math.IntFittingRange(0, vectorLength(VectorType)); 87 } 88 89 /// Returns a vector containing the first `len` integers in order from 0 to `len`-1. 90 /// For example, `iota(i32, 8)` will return a vector containing `.{0, 1, 2, 3, 4, 5, 6, 7}`. 91 pub inline fn iota(comptime T: type, comptime len: usize) @Vector(len, T) { 92 comptime { 93 var out: [len]T = undefined; 94 for (&out, 0..) |*element, i| { 95 element.* = switch (@typeInfo(T)) { 96 .Int => @intCast(T, i), 97 .Float => @intToFloat(T, i), 98 else => @compileError("Can't use type " ++ @typeName(T) ++ " in iota."), 99 }; 100 } 101 return @as(@Vector(len, T), out); 102 } 103 } 104 105 /// Returns a vector containing the same elements as the input, but repeated until the desired length is reached. 106 /// For example, `repeat(8, [_]u32{1, 2, 3})` will return a vector containing `.{1, 2, 3, 1, 2, 3, 1, 2}`. 107 pub fn repeat(comptime len: usize, vec: anytype) @Vector(len, std.meta.Child(@TypeOf(vec))) { 108 const Child = std.meta.Child(@TypeOf(vec)); 109 110 return @shuffle(Child, vec, undefined, iota(i32, len) % @splat(len, @intCast(i32, vectorLength(@TypeOf(vec))))); 111 } 112 113 /// Returns a vector containing all elements of the first vector at the lower indices followed by all elements of the second vector 114 /// at the higher indices. 115 pub fn join(a: anytype, b: anytype) @Vector(vectorLength(@TypeOf(a)) + vectorLength(@TypeOf(b)), std.meta.Child(@TypeOf(a))) { 116 const Child = std.meta.Child(@TypeOf(a)); 117 const a_len = vectorLength(@TypeOf(a)); 118 const b_len = vectorLength(@TypeOf(b)); 119 120 return @shuffle(Child, a, b, @as([a_len]i32, iota(i32, a_len)) ++ @as([b_len]i32, ~iota(i32, b_len))); 121 } 122 123 /// Returns a vector whose elements alternates between those of each input vector. 124 /// For example, `interlace(.{[4]u32{11, 12, 13, 14}, [4]u32{21, 22, 23, 24}})` returns a vector containing `.{11, 21, 12, 22, 13, 23, 14, 24}`. 125 pub fn interlace(vecs: anytype) @Vector(vectorLength(@TypeOf(vecs[0])) * vecs.len, std.meta.Child(@TypeOf(vecs[0]))) { 126 // interlace doesn't work on MIPS, for some reason. 127 // Notes from earlier debug attempt: 128 // The indices are correct. The problem seems to be with the @shuffle builtin. 129 // On MIPS, the test that interlaces small_base gives { 0, 2, 0, 0, 64, 255, 248, 200, 0, 0 }. 130 // Calling this with two inputs seems to work fine, but I'll let the compile error trigger for all inputs, just to be safe. 131 comptime if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why interlace() doesn't work on MIPS"); 132 133 const VecType = @TypeOf(vecs[0]); 134 const vecs_arr = @as([vecs.len]VecType, vecs); 135 const Child = std.meta.Child(@TypeOf(vecs_arr[0])); 136 137 if (vecs_arr.len == 1) return vecs_arr[0]; 138 139 const a_vec_count = (1 + vecs_arr.len) >> 1; 140 const b_vec_count = vecs_arr.len >> 1; 141 142 const a = interlace(@ptrCast(*const [a_vec_count]VecType, vecs_arr[0..a_vec_count]).*); 143 const b = interlace(@ptrCast(*const [b_vec_count]VecType, vecs_arr[a_vec_count..]).*); 144 145 const a_len = vectorLength(@TypeOf(a)); 146 const b_len = vectorLength(@TypeOf(b)); 147 const len = a_len + b_len; 148 149 const indices = comptime blk: { 150 const count_up = iota(i32, len); 151 const cycle = @divFloor(count_up, @splat(len, @intCast(i32, vecs_arr.len))); 152 const select_mask = repeat(len, join(@splat(a_vec_count, true), @splat(b_vec_count, false))); 153 const a_indices = count_up - cycle * @splat(len, @intCast(i32, b_vec_count)); 154 const b_indices = shiftElementsRight(count_up - cycle * @splat(len, @intCast(i32, a_vec_count)), a_vec_count, 0); 155 break :blk @select(i32, select_mask, a_indices, ~b_indices); 156 }; 157 158 return @shuffle(Child, a, b, indices); 159 } 160 161 /// The contents of `interlaced` is evenly split between vec_count vectors that are returned as an array. They "take turns", 162 /// recieving one element from `interlaced` at a time. 163 pub fn deinterlace( 164 comptime vec_count: usize, 165 interlaced: anytype, 166 ) [vec_count]@Vector( 167 vectorLength(@TypeOf(interlaced)) / vec_count, 168 std.meta.Child(@TypeOf(interlaced)), 169 ) { 170 const vec_len = vectorLength(@TypeOf(interlaced)) / vec_count; 171 const Child = std.meta.Child(@TypeOf(interlaced)); 172 173 var out: [vec_count]@Vector(vec_len, Child) = undefined; 174 175 comptime var i: usize = 0; // for-loops don't work for this, apparently. 176 inline while (i < out.len) : (i += 1) { 177 const indices = comptime iota(i32, vec_len) * @splat(vec_len, @intCast(i32, vec_count)) + @splat(vec_len, @intCast(i32, i)); 178 out[i] = @shuffle(Child, interlaced, undefined, indices); 179 } 180 181 return out; 182 } 183 184 pub fn extract( 185 vec: anytype, 186 comptime first: VectorIndex(@TypeOf(vec)), 187 comptime count: VectorCount(@TypeOf(vec)), 188 ) @Vector(count, std.meta.Child(@TypeOf(vec))) { 189 const Child = std.meta.Child(@TypeOf(vec)); 190 const len = vectorLength(@TypeOf(vec)); 191 192 std.debug.assert(@intCast(comptime_int, first) + @intCast(comptime_int, count) <= len); 193 194 return @shuffle(Child, vec, undefined, iota(i32, count) + @splat(count, @intCast(i32, first))); 195 } 196 197 test "vector patterns" { 198 if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) { 199 // https://github.com/ziglang/zig/issues/12012 200 return error.SkipZigTest; 201 } 202 const base = @Vector(4, u32){ 10, 20, 30, 40 }; 203 const other_base = @Vector(4, u32){ 55, 66, 77, 88 }; 204 205 const small_bases = [5]@Vector(2, u8){ 206 @Vector(2, u8){ 0, 1 }, 207 @Vector(2, u8){ 2, 3 }, 208 @Vector(2, u8){ 4, 5 }, 209 @Vector(2, u8){ 6, 7 }, 210 @Vector(2, u8){ 8, 9 }, 211 }; 212 213 try std.testing.expectEqual([6]u32{ 10, 20, 30, 40, 10, 20 }, repeat(6, base)); 214 try std.testing.expectEqual([8]u32{ 10, 20, 30, 40, 55, 66, 77, 88 }, join(base, other_base)); 215 try std.testing.expectEqual([2]u32{ 20, 30 }, extract(base, 1, 2)); 216 217 if (comptime !builtin.cpu.arch.isMIPS()) { 218 try std.testing.expectEqual([8]u32{ 10, 55, 20, 66, 30, 77, 40, 88 }, interlace(.{ base, other_base })); 219 220 const small_braid = interlace(small_bases); 221 try std.testing.expectEqual([10]u8{ 0, 2, 4, 6, 8, 1, 3, 5, 7, 9 }, small_braid); 222 try std.testing.expectEqual(small_bases, deinterlace(small_bases.len, small_braid)); 223 } 224 } 225 226 /// Joins two vectors, shifts them leftwards (towards lower indices) and extracts the leftmost elements into a vector the size of a and b. 227 pub fn mergeShift(a: anytype, b: anytype, comptime shift: VectorCount(@TypeOf(a, b))) @TypeOf(a, b) { 228 const len = vectorLength(@TypeOf(a, b)); 229 230 return extract(join(a, b), shift, len); 231 } 232 233 /// Elements are shifted rightwards (towards higher indices). New elements are added to the left, and the rightmost elements are cut off 234 /// so that the size of the vector stays the same. 235 pub fn shiftElementsRight(vec: anytype, comptime amount: VectorCount(@TypeOf(vec)), shift_in: std.meta.Child(@TypeOf(vec))) @TypeOf(vec) { 236 // It may be possible to implement shifts and rotates with a runtime-friendly slice of two joined vectors, as the length of the 237 // slice would be comptime-known. This would permit vector shifts and rotates by a non-comptime-known amount. 238 // However, I am unsure whether compiler optimizations would handle that well enough on all platforms. 239 const len = vectorLength(@TypeOf(vec)); 240 241 return mergeShift(@splat(len, shift_in), vec, len - amount); 242 } 243 244 /// Elements are shifted leftwards (towards lower indices). New elements are added to the right, and the leftmost elements are cut off 245 /// so that no elements with indices below 0 remain. 246 pub fn shiftElementsLeft(vec: anytype, comptime amount: VectorCount(@TypeOf(vec)), shift_in: std.meta.Child(@TypeOf(vec))) @TypeOf(vec) { 247 const len = vectorLength(@TypeOf(vec)); 248 249 return mergeShift(vec, @splat(len, shift_in), amount); 250 } 251 252 /// Elements are shifted leftwards (towards lower indices). Elements that leave to the left will reappear to the right in the same order. 253 pub fn rotateElementsLeft(vec: anytype, comptime amount: VectorCount(@TypeOf(vec))) @TypeOf(vec) { 254 return mergeShift(vec, vec, amount); 255 } 256 257 /// Elements are shifted rightwards (towards higher indices). Elements that leave to the right will reappear to the left in the same order. 258 pub fn rotateElementsRight(vec: anytype, comptime amount: VectorCount(@TypeOf(vec))) @TypeOf(vec) { 259 return rotateElementsLeft(vec, vectorLength(@TypeOf(vec)) - amount); 260 } 261 262 pub fn reverseOrder(vec: anytype) @TypeOf(vec) { 263 const Child = std.meta.Child(@TypeOf(vec)); 264 const len = vectorLength(@TypeOf(vec)); 265 266 return @shuffle(Child, vec, undefined, @splat(len, @intCast(i32, len) - 1) - iota(i32, len)); 267 } 268 269 test "vector shifting" { 270 const base = @Vector(4, u32){ 10, 20, 30, 40 }; 271 272 try std.testing.expectEqual([4]u32{ 30, 40, 999, 999 }, shiftElementsLeft(base, 2, 999)); 273 try std.testing.expectEqual([4]u32{ 999, 999, 10, 20 }, shiftElementsRight(base, 2, 999)); 274 try std.testing.expectEqual([4]u32{ 20, 30, 40, 10 }, rotateElementsLeft(base, 1)); 275 try std.testing.expectEqual([4]u32{ 40, 10, 20, 30 }, rotateElementsRight(base, 1)); 276 try std.testing.expectEqual([4]u32{ 40, 30, 20, 10 }, reverseOrder(base)); 277 } 278 279 pub fn firstTrue(vec: anytype) ?VectorIndex(@TypeOf(vec)) { 280 const len = vectorLength(@TypeOf(vec)); 281 const IndexInt = VectorIndex(@TypeOf(vec)); 282 283 if (!@reduce(.Or, vec)) { 284 return null; 285 } 286 const indices = @select(IndexInt, vec, iota(IndexInt, len), @splat(len, ~@as(IndexInt, 0))); 287 return @reduce(.Min, indices); 288 } 289 290 pub fn lastTrue(vec: anytype) ?VectorIndex(@TypeOf(vec)) { 291 const len = vectorLength(@TypeOf(vec)); 292 const IndexInt = VectorIndex(@TypeOf(vec)); 293 294 if (!@reduce(.Or, vec)) { 295 return null; 296 } 297 const indices = @select(IndexInt, vec, iota(IndexInt, len), @splat(len, @as(IndexInt, 0))); 298 return @reduce(.Max, indices); 299 } 300 301 pub fn countTrues(vec: anytype) VectorCount(@TypeOf(vec)) { 302 const len = vectorLength(@TypeOf(vec)); 303 const CountIntType = VectorCount(@TypeOf(vec)); 304 305 const one_if_true = @select(CountIntType, vec, @splat(len, @as(CountIntType, 1)), @splat(len, @as(CountIntType, 0))); 306 return @reduce(.Add, one_if_true); 307 } 308 309 pub fn firstIndexOfValue(vec: anytype, value: std.meta.Child(@TypeOf(vec))) ?VectorIndex(@TypeOf(vec)) { 310 const len = vectorLength(@TypeOf(vec)); 311 312 return firstTrue(vec == @splat(len, value)); 313 } 314 315 pub fn lastIndexOfValue(vec: anytype, value: std.meta.Child(@TypeOf(vec))) ?VectorIndex(@TypeOf(vec)) { 316 const len = vectorLength(@TypeOf(vec)); 317 318 return lastTrue(vec == @splat(len, value)); 319 } 320 321 pub fn countElementsWithValue(vec: anytype, value: std.meta.Child(@TypeOf(vec))) VectorCount(@TypeOf(vec)) { 322 const len = vectorLength(@TypeOf(vec)); 323 324 return countTrues(vec == @splat(len, value)); 325 } 326 327 test "vector searching" { 328 const base = @Vector(8, u32){ 6, 4, 7, 4, 4, 2, 3, 7 }; 329 330 try std.testing.expectEqual(@as(?u3, 1), firstIndexOfValue(base, 4)); 331 try std.testing.expectEqual(@as(?u3, 4), lastIndexOfValue(base, 4)); 332 try std.testing.expectEqual(@as(?u3, null), lastIndexOfValue(base, 99)); 333 try std.testing.expectEqual(@as(u4, 3), countElementsWithValue(base, 4)); 334 } 335 336 /// Same as prefixScan, but with a user-provided, mathematically associative function. 337 pub fn prefixScanWithFunc( 338 comptime hop: isize, 339 vec: anytype, 340 /// The error type that `func` might return. Set this to `void` if `func` doesn't return an error union. 341 comptime ErrorType: type, 342 comptime func: fn (@TypeOf(vec), @TypeOf(vec)) if (ErrorType == void) @TypeOf(vec) else ErrorType!@TypeOf(vec), 343 /// When one operand of the operation performed by `func` is this value, the result must equal the other operand. 344 /// For example, this should be 0 for addition or 1 for multiplication. 345 comptime identity: std.meta.Child(@TypeOf(vec)), 346 ) if (ErrorType == void) @TypeOf(vec) else ErrorType!@TypeOf(vec) { 347 // I haven't debugged this, but it might be a cousin of sorts to what's going on with interlace. 348 comptime if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why prefixScan doesn't work on MIPS"); 349 350 const len = vectorLength(@TypeOf(vec)); 351 352 if (hop == 0) @compileError("hop can not be 0; you'd be going nowhere forever!"); 353 const abs_hop = if (hop < 0) -hop else hop; 354 355 var acc = vec; 356 comptime var i = 0; 357 inline while ((abs_hop << i) < len) : (i += 1) { 358 const shifted = if (hop < 0) shiftElementsLeft(acc, abs_hop << i, identity) else shiftElementsRight(acc, abs_hop << i, identity); 359 360 acc = if (ErrorType == void) func(acc, shifted) else try func(acc, shifted); 361 } 362 return acc; 363 } 364 365 /// Returns a vector whose elements are the result of performing the specified operation on the corresponding 366 /// element of the input vector and every hop'th element that came before it (or after, if hop is negative). 367 /// Supports the same operations as the @reduce() builtin. Takes O(logN) to compute. 368 /// The scan is not linear, which may affect floating point errors. This may affect the determinism of 369 /// algorithms that use this function. 370 pub fn prefixScan(comptime op: std.builtin.ReduceOp, comptime hop: isize, vec: anytype) @TypeOf(vec) { 371 const VecType = @TypeOf(vec); 372 const Child = std.meta.Child(VecType); 373 const len = vectorLength(VecType); 374 375 const identity = comptime switch (@typeInfo(Child)) { 376 .Bool => switch (op) { 377 .Or, .Xor => false, 378 .And => true, 379 else => @compileError("Invalid prefixScan operation " ++ @tagName(op) ++ " for vector of booleans."), 380 }, 381 .Int => switch (op) { 382 .Max => std.math.minInt(Child), 383 .Add, .Or, .Xor => 0, 384 .Mul => 1, 385 .And, .Min => std.math.maxInt(Child), 386 }, 387 .Float => switch (op) { 388 .Max => -std.math.inf(Child), 389 .Add => 0, 390 .Mul => 1, 391 .Min => std.math.inf(Child), 392 else => @compileError("Invalid prefixScan operation " ++ @tagName(op) ++ " for vector of floats."), 393 }, 394 else => @compileError("Invalid type " ++ @typeName(VecType) ++ " for prefixScan."), 395 }; 396 397 const fn_container = struct { 398 fn opFn(a: VecType, b: VecType) VecType { 399 return if (Child == bool) switch (op) { 400 .And => @select(bool, a, b, @splat(len, false)), 401 .Or => @select(bool, a, @splat(len, true), b), 402 .Xor => a != b, 403 else => unreachable, 404 } else switch (op) { 405 .And => a & b, 406 .Or => a | b, 407 .Xor => a ^ b, 408 .Add => a + b, 409 .Mul => a * b, 410 .Min => @min(a, b), 411 .Max => @max(a, b), 412 }; 413 } 414 }; 415 416 return prefixScanWithFunc(hop, vec, void, fn_container.opFn, identity); 417 } 418 419 test "vector prefix scan" { 420 if (comptime builtin.cpu.arch.isMIPS()) { 421 return error.SkipZigTest; 422 } 423 424 if (builtin.zig_backend == .stage2_llvm) { 425 // Regressed in LLVM 14: 426 // https://github.com/llvm/llvm-project/issues/55522 427 return error.SkipZigTest; 428 } 429 430 const int_base = @Vector(4, i32){ 11, 23, 9, -21 }; 431 const float_base = @Vector(4, f32){ 2, 0.5, -10, 6.54321 }; 432 const bool_base = @Vector(4, bool){ true, false, true, false }; 433 434 try std.testing.expectEqual(iota(u8, 32) + @splat(32, @as(u8, 1)), prefixScan(.Add, 1, @splat(32, @as(u8, 1)))); 435 try std.testing.expectEqual(@Vector(4, i32){ 11, 3, 1, 1 }, prefixScan(.And, 1, int_base)); 436 try std.testing.expectEqual(@Vector(4, i32){ 11, 31, 31, -1 }, prefixScan(.Or, 1, int_base)); 437 try std.testing.expectEqual(@Vector(4, i32){ 11, 28, 21, -2 }, prefixScan(.Xor, 1, int_base)); 438 try std.testing.expectEqual(@Vector(4, i32){ 11, 34, 43, 22 }, prefixScan(.Add, 1, int_base)); 439 try std.testing.expectEqual(@Vector(4, i32){ 11, 253, 2277, -47817 }, prefixScan(.Mul, 1, int_base)); 440 try std.testing.expectEqual(@Vector(4, i32){ 11, 11, 9, -21 }, prefixScan(.Min, 1, int_base)); 441 try std.testing.expectEqual(@Vector(4, i32){ 11, 23, 23, 23 }, prefixScan(.Max, 1, int_base)); 442 443 // Trying to predict all inaccuracies when adding and multiplying floats with prefixScans would be a mess, so we don't test those. 444 try std.testing.expectEqual(@Vector(4, f32){ 2, 0.5, -10, -10 }, prefixScan(.Min, 1, float_base)); 445 try std.testing.expectEqual(@Vector(4, f32){ 2, 2, 2, 6.54321 }, prefixScan(.Max, 1, float_base)); 446 447 try std.testing.expectEqual(@Vector(4, bool){ true, true, false, false }, prefixScan(.Xor, 1, bool_base)); 448 try std.testing.expectEqual(@Vector(4, bool){ true, true, true, true }, prefixScan(.Or, 1, bool_base)); 449 try std.testing.expectEqual(@Vector(4, bool){ true, false, false, false }, prefixScan(.And, 1, bool_base)); 450 451 try std.testing.expectEqual(@Vector(4, i32){ 11, 23, 20, 2 }, prefixScan(.Add, 2, int_base)); 452 try std.testing.expectEqual(@Vector(4, i32){ 22, 11, -12, -21 }, prefixScan(.Add, -1, int_base)); 453 try std.testing.expectEqual(@Vector(4, i32){ 11, 23, 9, -10 }, prefixScan(.Add, 3, int_base)); 454 }