Implement core.checkstat = minimal

There is a huge performance issue when using both JGit (EGit) and Git
because JGit does not fill all dircache stat fields with the values Git
would expect. As a result thereof Git would typically revalidate a large
number of tracked files. This can take several minutes for large
repositories with many large files.

Since 1.8.2 Git will restrict stat checking to the size and whole second
part of the modification time stamp, if core.statinfo is set to
"minimal".

As JGit checks only size and modification time this is close to what
JGit already does. To make the match perfect ignore the sub-second part
of the modification time stamp if core.statinfo = minimal.

Change-Id: I8eaff1858a891571075a86db043f9d80da3d7503
This commit is contained in:
Robin Rosenberg 2013-01-30 21:50:22 +01:00
parent 51d0e1f26e
commit 8a9074fe21
4 changed files with 52 additions and 5 deletions

View File

@ -1,6 +1,7 @@
/*
* Copyright (C) 2010, Mathias Kinzler <mathias.kinzler@sap.com>
* Copyright (C) 2010, Chris Aniszczyk <caniszczyk@gmail.com>
* Copyright (C) 2012-2013, Robin Rosenberg
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
@ -194,4 +195,7 @@ public class ConfigConstants {
/** The "ff" key */
public static final String CONFIG_KEY_FF = "ff";
/** The "checkstat" key */
public static final String CONFIG_KEY_CHECKSTAT = "checkstat";
}

View File

@ -74,6 +74,25 @@ public static enum AutoCRLF {
INPUT;
}
/**
* Permissible values for {@code core.checkstat}
*
* @since 2.3
*/
public static enum CheckStat {
/**
* Only check the size and whole second part of time stamp when
* comparing the stat info in the dircache with actual file stat info.
*/
MINIMAL,
/**
* Check as much of the dircache stat info as possible. Implementation
* limits may apply.
*/
DEFAULT
}
private final int compression;
private final int packIndexVersion;

View File

@ -2,6 +2,7 @@
* Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
* Copyright (C) 2010, Christian Halstrick <christian.halstrick@sap.com>
* Copyright (C) 2010, Matthias Sohn <matthias.sohn@sap.com>
* Copyright (C) 2012-2013, Robin Rosenberg
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
@ -75,6 +76,7 @@
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.CoreConfig.CheckStat;
import org.eclipse.jgit.submodule.SubmoduleWalk;
import org.eclipse.jgit.util.FS;
import org.eclipse.jgit.util.IO;
@ -754,15 +756,23 @@ public MetadataDiff compareMetadata(DirCacheEntry entry) {
// Git under windows only stores seconds so we round the timestamp
// Java gives us if it looks like the timestamp in index is seconds
// only. Otherwise we compare the timestamp at millisecond precision.
// only. Otherwise we compare the timestamp at millisecond precision,
// unless core.checkstat is set to "minimal", in which case we only
// compare the whole second part.
long cacheLastModified = entry.getLastModified();
long fileLastModified = getEntryLastModified();
if (cacheLastModified % 1000 == 0)
fileLastModified = fileLastModified - fileLastModified % 1000;
long lastModifiedMillis = fileLastModified % 1000;
long cacheMillis = cacheLastModified % 1000;
if (getOptions().getCheckStat() == CheckStat.MINIMAL) {
fileLastModified = fileLastModified - lastModifiedMillis;
cacheLastModified = cacheLastModified - cacheMillis;
} else if (cacheMillis == 0)
fileLastModified = fileLastModified - lastModifiedMillis;
// Some Java version on Linux return whole seconds only even when
// the file systems supports more precision.
else if (fileLastModified % 1000 == 0)
cacheLastModified = cacheLastModified - cacheLastModified % 1000;
else if (lastModifiedMillis == 0)
cacheLastModified = cacheLastModified - cacheMillis;
if (fileLastModified != cacheLastModified)
return MetadataDiff.DIFFER_BY_TIMESTAMP;
else if (!entry.isSmudged())

View File

@ -1,5 +1,6 @@
/*
* Copyright (C) 2010, Marc Strapetz <marc.strapetz@syntevo.com>
* Copyright (C) 2012-2013, Robin Rosenberg
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
@ -46,6 +47,7 @@
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Config.SectionParser;
import org.eclipse.jgit.lib.CoreConfig.AutoCRLF;
import org.eclipse.jgit.lib.CoreConfig.CheckStat;
/** Options used by the {@link WorkingTreeIterator}. */
public class WorkingTreeOptions {
@ -60,11 +62,15 @@ public WorkingTreeOptions parse(final Config cfg) {
private final AutoCRLF autoCRLF;
private final CheckStat checkStat;
private WorkingTreeOptions(final Config rc) {
fileMode = rc.getBoolean(ConfigConstants.CONFIG_CORE_SECTION,
ConfigConstants.CONFIG_KEY_FILEMODE, true);
autoCRLF = rc.getEnum(ConfigConstants.CONFIG_CORE_SECTION, null,
ConfigConstants.CONFIG_KEY_AUTOCRLF, AutoCRLF.FALSE);
checkStat = rc.getEnum(ConfigConstants.CONFIG_CORE_SECTION, null,
ConfigConstants.CONFIG_KEY_CHECKSTAT, CheckStat.DEFAULT);
}
/** @return true if the execute bit on working files should be trusted. */
@ -76,4 +82,12 @@ public boolean isFileMode() {
public AutoCRLF getAutoCRLF() {
return autoCRLF;
}
/**
* @return how stat data is compared
* @since 2.3
*/
public CheckStat getCheckStat() {
return checkStat;
}
}