UInt24Array: Array of unsigned ints encoded in 3 bytes.

The object size index stores positions of objects in the main
index (when ordered by sha1). These positions are per-pack and usually
a pack has <16 million objects (there are exceptions but rather
rare). It could save some memory storing these positions in three bytes
instead of four. Note that these positions are sorted and always positive.

Implement a wrapper around a byte[] to access and search "ints" while
they are stored as unsigned 3 bytes.

Change-Id: Iaa26ce8e2272e706e35fe4cdb648fb6ca7591972
This commit is contained in:
Ivan Frade 2023-02-10 12:21:01 -08:00
parent 5b9ca7df42
commit 62d0e7be7c
2 changed files with 172 additions and 0 deletions

View File

@ -0,0 +1,79 @@
/*
* Copyright (C) 2023, Google LLC
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import org.junit.Assert;
import org.junit.Test;
public class UInt24ArrayTest {
private static final byte[] DATA = { 0x00, 0x00, 0x00, // 0
0x00, 0x00, 0x05, // 5
0x00, 0x00, 0x0a, // 10
0x00, 0x00, 0x0f, // 15
0x00, 0x00, 0x14, // 20
0x00, 0x00, 0x19, // 25
(byte) 0xff, 0x00, 0x00, // Uint with MSB=1
(byte) 0xff, (byte) 0xff, (byte) 0xff, // MAX
};
private static final UInt24Array asArray = new UInt24Array(DATA);
@Test
public void uInt24Array_size() {
assertEquals(8, asArray.size());
}
@Test
public void uInt24Array_get() {
assertEquals(0, asArray.get(0));
assertEquals(5, asArray.get(1));
assertEquals(10, asArray.get(2));
assertEquals(15, asArray.get(3));
assertEquals(20, asArray.get(4));
assertEquals(25, asArray.get(5));
assertEquals(0xff0000, asArray.get(6));
assertEquals(0xffffff, asArray.get(7));
assertThrows(IndexOutOfBoundsException.class, () -> asArray.get(9));
}
@Test
public void uInt24Array_getLastValue() {
assertEquals(0xffffff, asArray.getLastValue());
}
@Test
public void uInt24Array_find() {
assertEquals(0, asArray.binarySearch(0));
assertEquals(1, asArray.binarySearch(5));
assertEquals(2, asArray.binarySearch(10));
assertEquals(3, asArray.binarySearch(15));
assertEquals(4, asArray.binarySearch(20));
assertEquals(5, asArray.binarySearch(25));
assertEquals(6, asArray.binarySearch(0xff0000));
assertEquals(7, asArray.binarySearch(0xffffff));
assertThrows(IllegalArgumentException.class,
() -> asArray.binarySearch(Integer.MAX_VALUE));
}
@Test
public void uInt24Array_empty() {
Assert.assertTrue(UInt24Array.EMPTY.isEmpty());
assertEquals(0, UInt24Array.EMPTY.size());
assertEquals(-1, UInt24Array.EMPTY.binarySearch(1));
assertThrows(IndexOutOfBoundsException.class,
() -> UInt24Array.EMPTY.getLastValue());
assertThrows(IndexOutOfBoundsException.class,
() -> UInt24Array.EMPTY.get(0));
}
}

View File

@ -0,0 +1,93 @@
/*
* Copyright (C) 2023, Google LLC
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Distribution License v. 1.0 which is available at
* https://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.eclipse.jgit.internal.storage.file;
/**
* A view of a byte[] as a list of integers stored in 3 bytes.
*
* The ints are stored in big-endian ("network order"), so
* byte[]{aa,bb,cc} becomes the int 0x00aabbcc
*/
final class UInt24Array {
public static final UInt24Array EMPTY = new UInt24Array(
new byte[0]);
private static final int ENTRY_SZ = 3;
private final byte[] data;
private final int size;
UInt24Array(byte[] data) {
this.data = data;
this.size = data.length / ENTRY_SZ;
}
boolean isEmpty() {
return size == 0;
}
int size() {
return size;
}
int get(int index) {
if (index < 0 || index >= size) {
throw new IndexOutOfBoundsException(index);
}
int offset = index * ENTRY_SZ;
int e = data[offset] & 0xff;
e <<= 8;
e |= data[offset + 1] & 0xff;
e <<= 8;
e |= data[offset + 2] & 0xff;
return e;
}
/**
* Search needle in the array.
*
* This assumes a sorted array.
*
* @param needle
* It cannot be bigger than 0xffffff (max unsigned three bytes).
* @return position of the needle in the array, -1 if not found. Runtime
* exception if the value is too big for 3 bytes.
*/
int binarySearch(int needle) {
if ((needle & 0xff000000) != 0) {
throw new IllegalArgumentException("Too big value for 3 bytes"); //$NON-NLS-1$
}
if (size == 0) {
return -1;
}
int high = size;
if (high == 0)
return -1;
int low = 0;
do {
int mid = (low + high) >>> 1;
int cmp;
cmp = Integer.compare(needle, get(mid));
if (cmp < 0)
high = mid;
else if (cmp == 0) {
return mid;
} else
low = mid + 1;
} while (low < high);
return -1;
}
int getLastValue() {
return get(size - 1);
}
}