From 62d0e7be7c2a3c72e50dd9474ea72dfd7ec586b9 Mon Sep 17 00:00:00 2001 From: Ivan Frade Date: Fri, 10 Feb 2023 12:21:01 -0800 Subject: [PATCH] UInt24Array: Array of unsigned ints encoded in 3 bytes. The object size index stores positions of objects in the main index (when ordered by sha1). These positions are per-pack and usually a pack has <16 million objects (there are exceptions but rather rare). It could save some memory storing these positions in three bytes instead of four. Note that these positions are sorted and always positive. Implement a wrapper around a byte[] to access and search "ints" while they are stored as unsigned 3 bytes. Change-Id: Iaa26ce8e2272e706e35fe4cdb648fb6ca7591972 --- .../storage/file/UInt24ArrayTest.java | 79 ++++++++++++++++ .../internal/storage/file/UInt24Array.java | 93 +++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java new file mode 100644 index 000000000..a96c217e4 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/UInt24ArrayTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2023, Google LLC + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +import org.junit.Assert; +import org.junit.Test; + +public class UInt24ArrayTest { + + private static final byte[] DATA = { 0x00, 0x00, 0x00, // 0 + 0x00, 0x00, 0x05, // 5 + 0x00, 0x00, 0x0a, // 10 + 0x00, 0x00, 0x0f, // 15 + 0x00, 0x00, 0x14, // 20 + 0x00, 0x00, 0x19, // 25 + (byte) 0xff, 0x00, 0x00, // Uint with MSB=1 + (byte) 0xff, (byte) 0xff, (byte) 0xff, // MAX + }; + + private static final UInt24Array asArray = new UInt24Array(DATA); + + @Test + public void uInt24Array_size() { + assertEquals(8, asArray.size()); + } + + @Test + public void uInt24Array_get() { + assertEquals(0, asArray.get(0)); + assertEquals(5, asArray.get(1)); + assertEquals(10, asArray.get(2)); + assertEquals(15, asArray.get(3)); + assertEquals(20, asArray.get(4)); + assertEquals(25, asArray.get(5)); + assertEquals(0xff0000, asArray.get(6)); + assertEquals(0xffffff, asArray.get(7)); + assertThrows(IndexOutOfBoundsException.class, () -> asArray.get(9)); + } + + @Test + public void uInt24Array_getLastValue() { + assertEquals(0xffffff, asArray.getLastValue()); + } + + @Test + public void uInt24Array_find() { + assertEquals(0, asArray.binarySearch(0)); + assertEquals(1, asArray.binarySearch(5)); + assertEquals(2, asArray.binarySearch(10)); + assertEquals(3, asArray.binarySearch(15)); + assertEquals(4, asArray.binarySearch(20)); + assertEquals(5, asArray.binarySearch(25)); + assertEquals(6, asArray.binarySearch(0xff0000)); + assertEquals(7, asArray.binarySearch(0xffffff)); + assertThrows(IllegalArgumentException.class, + () -> asArray.binarySearch(Integer.MAX_VALUE)); + } + + @Test + public void uInt24Array_empty() { + Assert.assertTrue(UInt24Array.EMPTY.isEmpty()); + assertEquals(0, UInt24Array.EMPTY.size()); + assertEquals(-1, UInt24Array.EMPTY.binarySearch(1)); + assertThrows(IndexOutOfBoundsException.class, + () -> UInt24Array.EMPTY.getLastValue()); + assertThrows(IndexOutOfBoundsException.class, + () -> UInt24Array.EMPTY.get(0)); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java new file mode 100644 index 000000000..3a0a18bdb --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/UInt24Array.java @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2023, Google LLC + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * https://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.file; + +/** + * A view of a byte[] as a list of integers stored in 3 bytes. + * + * The ints are stored in big-endian ("network order"), so + * byte[]{aa,bb,cc} becomes the int 0x00aabbcc + */ +final class UInt24Array { + + public static final UInt24Array EMPTY = new UInt24Array( + new byte[0]); + + private static final int ENTRY_SZ = 3; + + private final byte[] data; + + private final int size; + + UInt24Array(byte[] data) { + this.data = data; + this.size = data.length / ENTRY_SZ; + } + + boolean isEmpty() { + return size == 0; + } + + int size() { + return size; + } + + int get(int index) { + if (index < 0 || index >= size) { + throw new IndexOutOfBoundsException(index); + } + int offset = index * ENTRY_SZ; + int e = data[offset] & 0xff; + e <<= 8; + e |= data[offset + 1] & 0xff; + e <<= 8; + e |= data[offset + 2] & 0xff; + return e; + } + + /** + * Search needle in the array. + * + * This assumes a sorted array. + * + * @param needle + * It cannot be bigger than 0xffffff (max unsigned three bytes). + * @return position of the needle in the array, -1 if not found. Runtime + * exception if the value is too big for 3 bytes. + */ + int binarySearch(int needle) { + if ((needle & 0xff000000) != 0) { + throw new IllegalArgumentException("Too big value for 3 bytes"); //$NON-NLS-1$ + } + if (size == 0) { + return -1; + } + int high = size; + if (high == 0) + return -1; + int low = 0; + do { + int mid = (low + high) >>> 1; + int cmp; + cmp = Integer.compare(needle, get(mid)); + if (cmp < 0) + high = mid; + else if (cmp == 0) { + return mid; + } else + low = mid + 1; + } while (low < high); + return -1; + } + + int getLastValue() { + return get(size - 1); + } +}