author | Junio C Hamano <gitster@pobox.com> | |
Sun, 11 May 2008 01:14:28 +0000 (18:14 -0700) | ||
committer | Junio C Hamano <gitster@pobox.com> | |
Sun, 11 May 2008 01:14:28 +0000 (18:14 -0700) |
* lt/case-insensitive:
Make git-add behave more sensibly in a case-insensitive environment
When adding files to the index, add support for case-independent matches
Make unpack-tree update removed files before any updated files
Make branch merging aware of underlying case-insensitive filsystems
Add 'core.ignorecase' option
Make hash_name_lookup able to do case-independent lookups
Make "index_name_exists()" return the cache_entry it found
Move name hashing functions into a file of its own
Make unpack_trees_options bit flags actual bitfields
Make git-add behave more sensibly in a case-insensitive environment
When adding files to the index, add support for case-independent matches
Make unpack-tree update removed files before any updated files
Make branch merging aware of underlying case-insensitive filsystems
Add 'core.ignorecase' option
Make hash_name_lookup able to do case-independent lookups
Make "index_name_exists()" return the cache_entry it found
Move name hashing functions into a file of its own
Make unpack_trees_options bit flags actual bitfields
Makefile | patch | blob | history | |
builtin-read-tree.c | patch | blob | history | |
cache.h | patch | blob | history | |
config.c | patch | blob | history | |
dir.c | patch | blob | history | |
environment.c | patch | blob | history | |
name-hash.c | [new file with mode: 0644] | patch | blob |
read-cache.c | patch | blob | history | |
unpack-trees.c | patch | blob | history | |
unpack-trees.h | patch | blob | history |
diff --git a/Makefile b/Makefile
index 9d84c8d799fc1044177197fb804411b05569dc83..649ee56c96f44c6b403907bf03826c2a3f577315 100644 (file)
--- a/Makefile
+++ b/Makefile
LIB_OBJS += mailmap.o
LIB_OBJS += match-trees.o
LIB_OBJS += merge-file.o
+LIB_OBJS += name-hash.o
LIB_OBJS += object.o
LIB_OBJS += pack-check.o
LIB_OBJS += pack-revindex.o
diff --git a/builtin-read-tree.c b/builtin-read-tree.c
index e9cfd2bbc5539ee0c9c048798383b837ff63991b..7ac30883bc72c51b0227828bef5758751e524f65 100644 (file)
--- a/builtin-read-tree.c
+++ b/builtin-read-tree.c
for (i = 0; i < active_nr; i++) {
struct cache_entry *ce = active_cache[i];
if (ce_stage(ce)) {
- remove_index_entry(ce);
+ remove_name_hash(ce);
if (last && !strcmp(ce->name, last->name))
continue;
cache_tree_invalidate_path(active_cache_tree, ce->name);
index 7fb8f3359dddbb8f93aa827947c3e5d9de04f639..80a8842db4b11631b28bc554772434d6ba51b3c1 100644 (file)
--- a/cache.h
+++ b/cache.h
#define CE_UPDATE (0x10000)
#define CE_REMOVE (0x20000)
#define CE_UPTODATE (0x40000)
+#define CE_ADDED (0x80000)
#define CE_HASHED (0x100000)
#define CE_UNHASHED (0x200000)
@@ -153,20 +154,6 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry
dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
}
-/*
- * We don't actually *remove* it, we can just mark it invalid so that
- * we won't find it in lookups.
- *
- * Not only would we have to search the lists (simple enough), but
- * we'd also have to rehash other hash buckets in case this makes the
- * hash bucket empty (common). So it's much better to just mark
- * it.
- */
-static inline void remove_index_entry(struct cache_entry *ce)
-{
- ce->ce_flags |= CE_UNHASHED;
-}
-
static inline unsigned create_ce_flags(size_t len, unsigned stage)
{
if (len >= CE_NAMEMASK)
extern struct index_state the_index;
+/* Name hashing */
+extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
+/*
+ * We don't actually *remove* it, we can just mark it invalid so that
+ * we won't find it in lookups.
+ *
+ * Not only would we have to search the lists (simple enough), but
+ * we'd also have to rehash other hash buckets in case this makes the
+ * hash bucket empty (common). So it's much better to just mark
+ * it.
+ */
+static inline void remove_name_hash(struct cache_entry *ce)
+{
+ ce->ce_flags |= CE_UNHASHED;
+}
+
+
#ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
#define active_cache (the_index.cache)
#define active_nr (the_index.cache_nr)
#define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL)
#define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
-#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen))
+#define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase))
#endif
enum object_type {
extern int discard_index(struct index_state *);
extern int unmerged_index(const struct index_state *);
extern int verify_path(const char *path);
-extern int index_name_exists(struct index_state *istate, const char *name, int namelen);
+extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
extern int index_name_pos(const struct index_state *, const char *name, int namelen);
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
extern int trust_executable_bit;
extern int quote_path_fully;
extern int has_symlinks;
+extern int ignore_case;
extern int assume_unchanged;
extern int prefer_symlink_refs;
extern int log_all_ref_updates;
diff --git a/config.c b/config.c
index b0ada515b9d839fc8691bc9af320353ff323b251..8fcb5db94f37c240734dfd9946c95b5f0f697ef8 100644 (file)
--- a/config.c
+++ b/config.c
return 0;
}
+ if (!strcmp(var, "core.ignorecase")) {
+ ignore_case = git_config_bool(var, value);
+ return 0;
+ }
+
if (!strcmp(var, "core.bare")) {
is_bare_repository_cfg = git_config_bool(var, value);
return 0;
index 9501476ecdb05fb576d8acfc6a1dac02df86c9b7..29d1d5ba31def46ba8b55905dc60773cc6cc167e 100644 (file)
--- a/dir.c
+++ b/dir.c
struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
{
- if (cache_name_exists(pathname, len))
+ if (cache_name_exists(pathname, len, ignore_case))
return NULL;
ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
diff --git a/environment.c b/environment.c
index fcd1ee5ef8c0999fce0095a2d6a792e5d221c50a..945574169b75ae1f0f0efad847d863104c1da3d1 100644 (file)
--- a/environment.c
+++ b/environment.c
int trust_executable_bit = 1;
int quote_path_fully = 1;
int has_symlinks = 1;
+int ignore_case;
int assume_unchanged;
int prefer_symlink_refs;
int is_bare_repository_cfg = -1; /* unspecified */
diff --git a/name-hash.c b/name-hash.c
--- /dev/null
+++ b/name-hash.c
@@ -0,0 +1,119 @@
+/*
+ * name-hash.c
+ *
+ * Hashing names in the index state
+ *
+ * Copyright (C) 2008 Linus Torvalds
+ */
+#define NO_THE_INDEX_COMPATIBILITY_MACROS
+#include "cache.h"
+
+/*
+ * This removes bit 5 if bit 6 is set.
+ *
+ * That will make US-ASCII characters hash to their upper-case
+ * equivalent. We could easily do this one whole word at a time,
+ * but that's for future worries.
+ */
+static inline unsigned char icase_hash(unsigned char c)
+{
+ return c & ~((c & 0x40) >> 1);
+}
+
+static unsigned int hash_name(const char *name, int namelen)
+{
+ unsigned int hash = 0x123;
+
+ do {
+ unsigned char c = *name++;
+ c = icase_hash(c);
+ hash = hash*101 + c;
+ } while (--namelen);
+ return hash;
+}
+
+static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
+{
+ void **pos;
+ unsigned int hash;
+
+ if (ce->ce_flags & CE_HASHED)
+ return;
+ ce->ce_flags |= CE_HASHED;
+ ce->next = NULL;
+ hash = hash_name(ce->name, ce_namelen(ce));
+ pos = insert_hash(hash, ce, &istate->name_hash);
+ if (pos) {
+ ce->next = *pos;
+ *pos = ce;
+ }
+}
+
+static void lazy_init_name_hash(struct index_state *istate)
+{
+ int nr;
+
+ if (istate->name_hash_initialized)
+ return;
+ for (nr = 0; nr < istate->cache_nr; nr++)
+ hash_index_entry(istate, istate->cache[nr]);
+ istate->name_hash_initialized = 1;
+}
+
+void add_name_hash(struct index_state *istate, struct cache_entry *ce)
+{
+ ce->ce_flags &= ~CE_UNHASHED;
+ if (istate->name_hash_initialized)
+ hash_index_entry(istate, ce);
+}
+
+static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
+{
+ if (len1 != len2)
+ return 0;
+
+ while (len1) {
+ unsigned char c1 = *name1++;
+ unsigned char c2 = *name2++;
+ len1--;
+ if (c1 != c2) {
+ c1 = toupper(c1);
+ c2 = toupper(c2);
+ if (c1 != c2)
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
+{
+ int len = ce_namelen(ce);
+
+ /*
+ * Always do exact compare, even if we want a case-ignoring comparison;
+ * we do the quick exact one first, because it will be the common case.
+ */
+ if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
+ return 1;
+
+ return icase && slow_same_name(name, namelen, ce->name, len);
+}
+
+struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
+{
+ unsigned int hash = hash_name(name, namelen);
+ struct cache_entry *ce;
+
+ lazy_init_name_hash(istate);
+ ce = lookup_hash(hash, &istate->name_hash);
+
+ while (ce) {
+ if (!(ce->ce_flags & CE_UNHASHED)) {
+ if (same_name(ce, name, namelen, icase))
+ return ce;
+ }
+ ce = ce->next;
+ }
+ return NULL;
+}
diff --git a/read-cache.c b/read-cache.c
index c3692f41adc6a8c65e9ae9e4c82910ea4a63d599..3b20a142ea93f67d8a1246ebd27797aad8035e6e 100644 (file)
--- a/read-cache.c
+++ b/read-cache.c
struct index_state the_index;
-static unsigned int hash_name(const char *name, int namelen)
-{
- unsigned int hash = 0x123;
-
- do {
- unsigned char c = *name++;
- hash = hash*101 + c;
- } while (--namelen);
- return hash;
-}
-
-static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
-{
- void **pos;
- unsigned int hash;
-
- if (ce->ce_flags & CE_HASHED)
- return;
- ce->ce_flags |= CE_HASHED;
- ce->next = NULL;
- hash = hash_name(ce->name, ce_namelen(ce));
- pos = insert_hash(hash, ce, &istate->name_hash);
- if (pos) {
- ce->next = *pos;
- *pos = ce;
- }
-}
-
-static void lazy_init_name_hash(struct index_state *istate)
-{
- int nr;
-
- if (istate->name_hash_initialized)
- return;
- for (nr = 0; nr < istate->cache_nr; nr++)
- hash_index_entry(istate, istate->cache[nr]);
- istate->name_hash_initialized = 1;
-}
-
static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{
- ce->ce_flags &= ~CE_UNHASHED;
istate->cache[nr] = ce;
- if (istate->name_hash_initialized)
- hash_index_entry(istate, ce);
+ add_name_hash(istate, ce);
}
static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
{
struct cache_entry *old = istate->cache[nr];
- remove_index_entry(old);
+ remove_name_hash(old);
set_index_entry(istate, nr, ce);
istate->cache_changed = 1;
}
-int index_name_exists(struct index_state *istate, const char *name, int namelen)
-{
- unsigned int hash = hash_name(name, namelen);
- struct cache_entry *ce;
-
- lazy_init_name_hash(istate);
- ce = lookup_hash(hash, &istate->name_hash);
-
- while (ce) {
- if (!(ce->ce_flags & CE_UNHASHED)) {
- if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags))
- return 1;
- }
- ce = ce->next;
- }
- return 0;
-}
-
/*
* This only updates the "non-critical" parts of the directory
* cache, ie the parts that aren't tracked by GIT, and only used
{
struct cache_entry *ce = istate->cache[pos];
- remove_index_entry(ce);
+ remove_name_hash(ce);
istate->cache_changed = 1;
istate->cache_nr--;
if (pos >= istate->cache_nr)
return pos;
}
+static int different_name(struct cache_entry *ce, struct cache_entry *alias)
+{
+ int len = ce_namelen(ce);
+ return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len);
+}
+
+/*
+ * If we add a filename that aliases in the cache, we will use the
+ * name that we already have - but we don't want to update the same
+ * alias twice, because that implies that there were actually two
+ * different files with aliasing names!
+ *
+ * So we use the CE_ADDED flag to verify that the alias was an old
+ * one before we accept it as
+ */
+static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_entry *alias)
+{
+ int len;
+ struct cache_entry *new;
+
+ if (alias->ce_flags & CE_ADDED)
+ die("Will not add file alias '%s' ('%s' already exists in index)", ce->name, alias->name);
+
+ /* Ok, create the new entry using the name of the existing alias */
+ len = ce_namelen(alias);
+ new = xcalloc(1, cache_entry_size(len));
+ memcpy(new->name, alias->name, len);
+ copy_cache_entry(new, ce);
+ free(ce);
+ return new;
+}
+
int add_file_to_index(struct index_state *istate, const char *path, int verbose)
{
- int size, namelen, pos;
+ int size, namelen;
struct stat st;
- struct cache_entry *ce;
+ struct cache_entry *ce, *alias;
unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY;
if (lstat(path, &st))
@@ -525,18 +498,19 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose)
ce->ce_mode = ce_mode_from_stat(ent, st.st_mode);
}
- pos = index_name_pos(istate, ce->name, namelen);
- if (0 <= pos &&
- !ce_stage(istate->cache[pos]) &&
- !ie_match_stat(istate, istate->cache[pos], &st, ce_option)) {
+ alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case);
+ if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, &st, ce_option)) {
/* Nothing changed, really */
free(ce);
- ce_mark_uptodate(istate->cache[pos]);
+ ce_mark_uptodate(alias);
+ alias->ce_flags |= CE_ADDED;
return 0;
}
-
if (index_path(ce->sha1, path, &st, 1))
die("unable to index file %s", path);
+ if (ignore_case && alias && different_name(ce, alias))
+ ce = create_alias_ce(ce, alias);
+ ce->ce_flags |= CE_ADDED;
if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE))
die("unable to add %s to index",path);
if (verbose)
diff --git a/unpack-trees.c b/unpack-trees.c
index a59f47557a2b3760c27b93fa678697c35211f952..feae846226237fce9b4ff4a4810a9961637a4ca5 100644 (file)
--- a/unpack-trees.c
+++ b/unpack-trees.c
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
- if (ce->ce_flags & (CE_UPDATE | CE_REMOVE))
- display_progress(progress, ++cnt);
if (ce->ce_flags & CE_REMOVE) {
+ display_progress(progress, ++cnt);
if (o->update)
unlink_entry(ce->name, last_symlink);
remove_index_entry_at(&o->result, i);
i--;
continue;
}
+ }
+
+ for (i = 0; i < index->cache_nr; i++) {
+ struct cache_entry *ce = index->cache[i];
+
if (ce->ce_flags & CE_UPDATE) {
+ display_progress(progress, ++cnt);
ce->ce_flags &= ~CE_UPDATE;
if (o->update) {
errs |= checkout_entry(ce, &state, NULL);
@@ -520,6 +525,22 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action,
return cnt;
}
+/*
+ * This gets called when there was no index entry for the tree entry 'dst',
+ * but we found a file in the working tree that 'lstat()' said was fine,
+ * and we're on a case-insensitive filesystem.
+ *
+ * See if we can find a case-insensitive match in the index that also
+ * matches the stat information, and assume it's that other file!
+ */
+static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, struct stat *st)
+{
+ struct cache_entry *src;
+
+ src = index_name_exists(o->src_index, dst->name, ce_namelen(dst), 1);
+ return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID);
+}
+
/*
* We do not want to remove or overwrite a working tree file that
* is not tracked, unless it is ignored.
if (!lstat(ce->name, &st)) {
int cnt;
int dtype = ce_to_dtype(ce);
+ struct cache_entry *result;
+
+ /*
+ * It may be that the 'lstat()' succeeded even though
+ * target 'ce' was absent, because there is an old
+ * entry that is different only in case..
+ *
+ * Ignore that lstat() if it matches.
+ */
+ if (ignore_case && icase_exists(o, ce, &st))
+ return 0;
if (o->dir && excluded(o->dir, ce->name, &dtype))
/*
* delete this path, which is in a subdirectory that
* is being replaced with a blob.
*/
- cnt = index_name_pos(&o->result, ce->name, strlen(ce->name));
- if (0 <= cnt) {
- struct cache_entry *ce = o->result.cache[cnt];
- if (ce->ce_flags & CE_REMOVE)
+ result = index_name_exists(&o->result, ce->name, ce_namelen(ce), 0);
+ if (result) {
+ if (result->ce_flags & CE_REMOVE)
return 0;
}
diff --git a/unpack-trees.h b/unpack-trees.h
index 50453ed20f755fea2e7138d7f01300b318f28dce..d436d6ced9939beeb4599dc8fddebe0890e55db8 100644 (file)
--- a/unpack-trees.h
+++ b/unpack-trees.h
struct unpack_trees_options *options);
struct unpack_trees_options {
- int reset;
- int merge;
- int update;
- int index_only;
- int nontrivial_merge;
- int trivial_merges_only;
- int verbose_update;
- int aggressive;
- int skip_unmerged;
- int gently;
+ unsigned int reset:1,
+ merge:1,
+ update:1,
+ index_only:1,
+ nontrivial_merge:1,
+ trivial_merges_only:1,
+ verbose_update:1,
+ aggressive:1,
+ skip_unmerged:1,
+ gently:1;
const char *prefix;
int pos;
struct dir_struct *dir;
void *unpack_data;
struct index_state *dst_index;
- const struct index_state *src_index;
+ struct index_state *src_index;
struct index_state result;
};