Code

remote: Make ref_remove_duplicates faster for large numbers of refs
authorJulian Phillips <julian@quantumfyre.co.uk>
Sun, 25 Oct 2009 21:28:11 +0000 (21:28 +0000)
committerJunio C Hamano <gitster@pobox.com>
Wed, 28 Oct 2009 06:38:43 +0000 (23:38 -0700)
The ref_remove_duplicates function was very slow at dealing with very
large numbers of refs.  This is because it was using a linear search
through all remaining refs to find any duplicates of the current ref.

Rewriting it to use a string list to keep track of which refs have
already been seen and removing duplicates when they are found is much
more efficient.

Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
remote.c

index 73d33f2584b061085fcaa0958c26b1c3d4665904..4f9f0ccc7b799a427130a7f7befdf6e931b30fc7 100644 (file)
--- a/remote.c
+++ b/remote.c
@@ -6,6 +6,7 @@
 #include "revision.h"
 #include "dir.h"
 #include "tag.h"
+#include "string-list.h"
 
 static struct refspec s_tag_refspec = {
        0,
@@ -734,29 +735,31 @@ int for_each_remote(each_remote_fn fn, void *priv)
 
 void ref_remove_duplicates(struct ref *ref_map)
 {
-       struct ref **posn;
-       struct ref *next;
-       for (; ref_map; ref_map = ref_map->next) {
+       struct string_list refs = { NULL, 0, 0, 0 };
+       struct string_list_item *item = NULL;
+       struct ref *prev = NULL, *next = NULL;
+       for (; ref_map; prev = ref_map, ref_map = next) {
+               next = ref_map->next;
                if (!ref_map->peer_ref)
                        continue;
-               posn = &ref_map->next;
-               while (*posn) {
-                       if ((*posn)->peer_ref &&
-                           !strcmp((*posn)->peer_ref->name,
-                                   ref_map->peer_ref->name)) {
-                               if (strcmp((*posn)->name, ref_map->name))
-                                       die("%s tracks both %s and %s",
-                                           ref_map->peer_ref->name,
-                                           (*posn)->name, ref_map->name);
-                               next = (*posn)->next;
-                               free((*posn)->peer_ref);
-                               free(*posn);
-                               *posn = next;
-                       } else {
-                               posn = &(*posn)->next;
-                       }
+
+               item = string_list_lookup(ref_map->peer_ref->name, &refs);
+               if (item) {
+                       if (strcmp(((struct ref *)item->util)->name,
+                                  ref_map->name))
+                               die("%s tracks both %s and %s",
+                                   ref_map->peer_ref->name,
+                                   ((struct ref *)item->util)->name,
+                                   ref_map->name);
+                       prev->next = ref_map->next;
+                       free(ref_map->peer_ref);
+                       free(ref_map);
                }
+
+               item = string_list_insert(ref_map->peer_ref->name, &refs);
+               item->util = ref_map;
        }
+       string_list_clear(&refs, 0);
 }
 
 int remote_has_url(struct remote *remote, const char *url)