From a22c637124a2f591382d56546136a8e2bb2c2c66 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 22 Feb 2008 20:37:40 -0800 Subject: [PATCH] Fix name re-hashing semantics We handled the case of removing and re-inserting cache entries badly, which is something that merging commonly needs to do (removing the different stages, and then re-inserting one of them as the merged state). We even had a rather ugly special case for this failure case, where replace_index_entry() basically turned itself into a no-op if the new and the old entries were the same, exactly because the hash routines didn't handle it on their own. So what this patch does is to not just have the UNHASHED bit, but a HASHED bit too, and when you insert an entry into the name hash, that involves: - clear the UNHASHED bit, because now it's valid again for lookup (which is really all that UNHASHED meant) - if we're being lazy, we're done here (but we still want to clear the UNHASHED bit regardless of lazy mode, since we can become unlazy later, and so we need the UNHASHED bit to always be set correctly, even if we never actually insert the entry into the hash list) - if it was already hashed, we just leave it on the list - otherwise mark it HASHED and insert it into the list this all means that unhashing and rehashing a name all just works automatically. Obviously, you cannot change the name of an entry (that would be a serious bug), but nothing can validly do that anyway (you'd have to allocate a new struct cache_entry anyway since the name length could change), so that's not a new limitation. The code actually gets simpler in many ways, although the lazy hashing does mean that there are a few odd cases (ie something can be marked unhashed even though it was never on the hash in the first place, and isn't actually marked hashed!). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- cache.h | 4 +++- read-cache.c | 14 +++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cache.h b/cache.h index e1000bccb..0b69c92da 100644 --- a/cache.h +++ b/cache.h @@ -133,7 +133,9 @@ struct cache_entry { #define CE_UPDATE (0x10000) #define CE_REMOVE (0x20000) #define CE_UPTODATE (0x40000) -#define CE_UNHASHED (0x80000) + +#define CE_HASHED (0x100000) +#define CE_UNHASHED (0x200000) static inline unsigned create_ce_flags(size_t len, unsigned stage) { diff --git a/read-cache.c b/read-cache.c index e45f4b3d6..eb58b03f9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -37,8 +37,13 @@ static unsigned int hash_name(const char *name, int namelen) static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) { void **pos; - unsigned int hash = hash_name(ce->name, ce_namelen(ce)); + unsigned int hash; + if (ce->ce_flags & CE_HASHED) + return; + ce->ce_flags |= CE_HASHED; + ce->next = NULL; + hash = hash_name(ce->name, ce_namelen(ce)); pos = insert_hash(hash, ce, &istate->name_hash); if (pos) { ce->next = *pos; @@ -59,6 +64,7 @@ static void lazy_init_name_hash(struct index_state *istate) static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { + ce->ce_flags &= ~CE_UNHASHED; istate->cache[nr] = ce; if (istate->name_hash_initialized) hash_index_entry(istate, ce); @@ -82,10 +88,8 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache { struct cache_entry *old = istate->cache[nr]; - if (ce != old) { - remove_hash_entry(istate, old); - set_index_entry(istate, nr, ce); - } + remove_hash_entry(istate, old); + set_index_entry(istate, nr, ce); istate->cache_changed = 1; } -- 2.30.2