std/crypto: make gimli slightly faster

Before:
       gimli-hash:        120 MiB/s
       gimli-aead:        130 MiB/s

After:
       gimli-hash:        195 MiB/s
       gimli-aead:        208 MiB/s

Also fixes in-place decryption by the way.

If the input & output buffers were the same, decryption used to fail.

Return on decryption error in the benchmark to detect similar issues
in future AEADs even in non release-fast mode.
This commit is contained in:
Frank Denis
2020-09-28 23:23:32 +02:00
parent 5c6cd5e2c9
commit 868a46eb43
2 changed files with 22 additions and 14 deletions

View File

@@ -168,7 +168,7 @@ pub fn benchmarkAead(comptime Aead: anytype, comptime bytes: comptime_int) !u64
const start = timer.lap();
while (offset < bytes) : (offset += in.len) {
Aead.encrypt(in[0..], tag[0..], in[0..], &[_]u8{}, nonce, key);
Aead.decrypt(in[0..], in[0..], tag, &[_]u8{}, nonce, key) catch unreachable;
try Aead.decrypt(in[0..], in[0..], tag, &[_]u8{}, nonce, key);
}
mem.doNotOptimizeAway(&in);
const end = timer.read();

View File

@@ -40,8 +40,8 @@ pub const State = struct {
pub fn permute(self: *Self) void {
const state = &self.data;
var round = @as(u32, 24);
while (round > 0) : (round -= 1) {
comptime var round = @as(u32, 24);
inline while (round > 0) : (round -= 1) {
var column = @as(usize, 0);
while (column < 4) : (column += 1) {
const x = math.rotl(u32, state[column], 24);
@@ -249,15 +249,19 @@ pub const Aead = struct {
in = in[State.RATE..];
out = out[State.RATE..];
}) {
for (buf[0..State.RATE]) |*p, i| {
p.* ^= in[i];
out[i] = p.*;
const d = in[0..State.RATE];
for (d) |v, i| {
buf[i] ^= v;
}
for (d) |_, i| {
out[i] = buf[i];
}
state.permute();
}
for (buf[0..in.len]) |*p, i| {
p.* ^= in[i];
out[i] = p.*;
const d = in[0..];
for (d) |v, i| {
buf[i] ^= v;
out[i] = buf[i];
}
// XOR 1 into the next byte of the state
@@ -291,15 +295,19 @@ pub const Aead = struct {
in = in[State.RATE..];
out = out[State.RATE..];
}) {
for (buf[0..State.RATE]) |*p, i| {
out[i] = p.* ^ in[i];
p.* = in[i];
const d = in[0..State.RATE].*;
for (d) |v, i| {
out[i] = buf[i] ^ v;
}
for (d) |v, i| {
buf[i] = v;
}
state.permute();
}
for (buf[0..in.len]) |*p, i| {
out[i] = p.* ^ in[i];
p.* = in[i];
const d = in[i];
out[i] = p.* ^ d;
p.* = d;
}
// XOR 1 into the next byte of the state