Code

A new merge stragety 'subtree'.
authorJunio C Hamano <junkio@cox.net>
Fri, 16 Feb 2007 00:32:45 +0000 (16:32 -0800)
committerJunio C Hamano <junkio@cox.net>
Sat, 7 Apr 2007 09:29:40 +0000 (02:29 -0700)
This merge strategy largely piggy-backs on git-merge-recursive.
When merging trees A and B, if B corresponds to a subtree of A,
B is first adjusted to match the tree structure of A, instead of
reading the trees at the same level.  This adjustment is also
done to the common ancestor tree.

If you are pulling updates from git-gui repository into git.git
repository, the root level of the former corresponds to git-gui/
subdirectory of the latter.  The tree object of git-gui's toplevel
is wrapped in a fake tree object, whose sole entry has name 'git-gui'
and records object name of the true tree, before being used by
the 3-way merge code.

If you are merging the other way, only the git-gui/ subtree of
git.git is extracted and merged into git-gui's toplevel.

The detection of corresponding subtree is done by comparing the
pathnames and types in the toplevel of the tree.

Heuristics galore!  That's the git way ;-).

Signed-off-by: Junio C Hamano <junkio@cox.net>
.gitignore
Makefile
cache.h
git-merge.sh
match-trees.c [new file with mode: 0644]
merge-recursive.c
test-match-trees.c [new file with mode: 0644]

index b39f78fcdf18c201d89a4e69fa24a853185e97e5..9229e918cd1c8a43b6c73b0083ddfe973b1e3b88 100644 (file)
@@ -77,6 +77,7 @@ git-merge-ours
 git-merge-recursive
 git-merge-resolve
 git-merge-stupid
+git-merge-subtree
 git-mergetool
 git-mktag
 git-mktree
@@ -148,6 +149,7 @@ test-chmtime
 test-date
 test-delta
 test-dump-cache-tree
+test-match-trees
 common-cmds.h
 *.tar.gz
 *.dsc
index ac29c629e3927ad59142cf73d03eea501fc30962..a77d31de989f6de63bb2fbbd5ccef3c1c83352a8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -251,6 +251,8 @@ BUILT_INS = \
 # what 'all' will build and 'install' will install, in gitexecdir
 ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
 
+ALL_PROGRAMS += git-merge-subtree$X
+
 # what 'all' will build but not install in gitexecdir
 OTHER_PROGRAMS = git$X gitweb/gitweb.cgi
 ifndef NO_TCLTK
@@ -299,7 +301,7 @@ LIB_OBJS = \
        server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \
        tag.o tree.o usage.o config.o environment.o ctype.o copy.o \
        revision.o pager.o tree-walk.o xdiff-interface.o \
-       write_or_die.o trace.o list-objects.o grep.o \
+       write_or_die.o trace.o list-objects.o grep.o match-trees.o \
        alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
        color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
        convert.o
@@ -725,6 +727,9 @@ git$X: git.c common-cmds.h $(BUILTIN_OBJS) $(GITLIBS) GIT-CFLAGS
 
 help.o: common-cmds.h
 
+git-merge-subtree$X: git-merge-recursive$X
+       rm -f $@ && ln git-merge-recursive$X $@
+
 $(BUILT_INS): git$X
        $(QUIET_BUILT_IN)rm -f $@ && ln git$X $@
 
@@ -942,6 +947,9 @@ test-dump-cache-tree$X: dump-cache-tree.o $(GITLIBS)
 test-sha1$X: test-sha1.o $(GITLIBS)
        $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
+test-match-trees$X: test-match-trees.o $(GITLIBS)
+       $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
+
 test-chmtime$X: test-chmtime.c
        $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $<
 
diff --git a/cache.h b/cache.h
index 1b50c32b139256c5d8be96a85b02f1ce9855f15a..eb57507b804019f7d6b27a27a2b262d4b051e43b 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -496,4 +496,7 @@ extern void trace_argv_printf(const char **argv, int count, const char *format,
 extern int convert_to_git(const char *path, char **bufp, unsigned long *sizep);
 extern int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep);
 
+/* match-trees.c */
+void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, int);
+
 #endif /* CACHE_H */
index fa4589173f426d6172883c47479c52b8700cafa8..7ebbce4bdbaf243a7a5612c024216b8ccf8eae44 100755 (executable)
@@ -16,10 +16,10 @@ test -z "$(git ls-files -u)" ||
 LF='
 '
 
-all_strategies='recur recursive octopus resolve stupid ours'
+all_strategies='recur recursive octopus resolve stupid ours subtree'
 default_twohead_strategies='recursive'
 default_octopus_strategies='octopus'
-no_trivial_merge_strategies='ours'
+no_trivial_merge_strategies='ours subtree'
 use_strategies=
 
 index_merge=t
diff --git a/match-trees.c b/match-trees.c
new file mode 100644 (file)
index 0000000..23cafe4
--- /dev/null
@@ -0,0 +1,304 @@
+#include "cache.h"
+#include "tree.h"
+#include "tree-walk.h"
+
+static int score_missing(unsigned mode, const char *path)
+{
+       int score;
+
+       if (S_ISDIR(mode))
+               score = -1000;
+       else if (S_ISLNK(mode))
+               score = -500;
+       else
+               score = -50;
+       return score;
+}
+
+static int score_differs(unsigned mode1, unsigned mode2, const char *path)
+{
+       int score;
+
+       if (S_ISDIR(mode1) != S_ISDIR(mode2))
+               score = -100;
+       else if (S_ISLNK(mode1) != S_ISLNK(mode2))
+               score = -50;
+       else
+               score = -5;
+       return score;
+}
+
+static int score_matches(unsigned mode1, unsigned mode2, const char *path)
+{
+       int score;
+
+       /* Heh, we found SHA-1 collisions between different kind of objects */
+       if (S_ISDIR(mode1) != S_ISDIR(mode2))
+               score = -100;
+       else if (S_ISLNK(mode1) != S_ISLNK(mode2))
+               score = -50;
+
+       else if (S_ISDIR(mode1))
+               score = 1000;
+       else if (S_ISLNK(mode1))
+               score = 500;
+       else
+               score = 250;
+       return score;
+}
+
+/*
+ * Inspect two trees, and give a score that tells how similar they are.
+ */
+static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
+{
+       struct tree_desc one;
+       struct tree_desc two;
+       void *one_buf, *two_buf;
+       int score = 0;
+       enum object_type type;
+       unsigned long size;
+
+       one_buf = read_sha1_file(hash1, &type, &size);
+       if (!one_buf)
+               die("unable to read tree (%s)", sha1_to_hex(hash1));
+       if (type != OBJ_TREE)
+               die("%s is not a tree", sha1_to_hex(hash1));
+       init_tree_desc(&one, one_buf, size);
+       two_buf = read_sha1_file(hash2, &type, &size);
+       if (!two_buf)
+               die("unable to read tree (%s)", sha1_to_hex(hash2));
+       if (type != OBJ_TREE)
+               die("%s is not a tree", sha1_to_hex(hash2));
+       init_tree_desc(&two, two_buf, size);
+       while (one.size | two.size) {
+               const unsigned char *elem1 = elem1;
+               const unsigned char *elem2 = elem2;
+               const char *path1 = path1;
+               const char *path2 = path2;
+               unsigned mode1 = mode1;
+               unsigned mode2 = mode2;
+               int cmp;
+
+               if (one.size)
+                       elem1 = tree_entry_extract(&one, &path1, &mode1);
+               if (two.size)
+                       elem2 = tree_entry_extract(&two, &path2, &mode2);
+
+               if (!one.size) {
+                       /* two has more entries */
+                       score += score_missing(mode2, path2);
+                       update_tree_entry(&two);
+                       continue;
+               }
+               if (!two.size) {
+                       /* two lacks this entry */
+                       score += score_missing(mode1, path1);
+                       update_tree_entry(&one);
+                       continue;
+               }
+               cmp = base_name_compare(path1, strlen(path1), mode1,
+                                       path2, strlen(path2), mode2);
+               if (cmp < 0) {
+                       /* path1 does not appear in two */
+                       score += score_missing(mode1, path1);
+                       update_tree_entry(&one);
+                       continue;
+               }
+               else if (cmp > 0) {
+                       /* path2 does not appear in one */
+                       score += score_missing(mode2, path2);
+                       update_tree_entry(&two);
+                       continue;
+               }
+               else if (hashcmp(elem1, elem2))
+                       /* they are different */
+                       score += score_differs(mode1, mode2, path1);
+               else
+                       /* same subtree or blob */
+                       score += score_matches(mode1, mode2, path1);
+               update_tree_entry(&one);
+               update_tree_entry(&two);
+       }
+       free(one_buf);
+       free(two_buf);
+       return score;
+}
+
+/*
+ * Match one itself and its subtrees with two and pick the best match.
+ */
+static void match_trees(const unsigned char *hash1,
+                       const unsigned char *hash2,
+                       int *best_score,
+                       char **best_match,
+                       char *base,
+                       int recurse_limit)
+{
+       struct tree_desc one;
+       void *one_buf;
+       enum object_type type;
+       unsigned long size;
+
+       one_buf = read_sha1_file(hash1, &type, &size);
+       if (!one_buf)
+               die("unable to read tree (%s)", sha1_to_hex(hash1));
+       if (type != OBJ_TREE)
+               die("%s is not a tree", sha1_to_hex(hash1));
+       init_tree_desc(&one, one_buf, size);
+
+       while (one.size) {
+               const char *path;
+               const unsigned char *elem;
+               unsigned mode;
+               int score;
+
+               elem = tree_entry_extract(&one, &path, &mode);
+               if (!S_ISDIR(mode))
+                       goto next;
+               score = score_trees(elem, hash2);
+               if (*best_score < score) {
+                       char *newpath;
+                       newpath = xmalloc(strlen(base) + strlen(path) + 1);
+                       sprintf(newpath, "%s%s", base, path);
+                       free(*best_match);
+                       *best_match = newpath;
+                       *best_score = score;
+               }
+               if (recurse_limit) {
+                       char *newbase;
+                       newbase = xmalloc(strlen(base) + strlen(path) + 2);
+                       sprintf(newbase, "%s%s/", base, path);
+                       match_trees(elem, hash2, best_score, best_match,
+                                   newbase, recurse_limit - 1);
+                       free(newbase);
+               }
+
+       next:
+               update_tree_entry(&one);
+       }
+       free(one_buf);
+}
+
+/*
+ * A tree "hash1" has a subdirectory at "prefix".  Come up with a
+ * tree object by replacing it with another tree "hash2".
+ */
+static int splice_tree(const unsigned char *hash1,
+                      char *prefix,
+                      const unsigned char *hash2,
+                      unsigned char *result)
+{
+       char *subpath;
+       int toplen;
+       char *buf;
+       unsigned long sz;
+       struct tree_desc desc;
+       unsigned char *rewrite_here;
+       const unsigned char *rewrite_with;
+       unsigned char subtree[20];
+       enum object_type type;
+       int status;
+
+       subpath = strchr(prefix, '/');
+       if (!subpath)
+               toplen = strlen(prefix);
+       else {
+               toplen = subpath - prefix;
+               subpath++;
+       }
+
+       buf = read_sha1_file(hash1, &type, &sz);
+       if (!buf)
+               die("cannot read tree %s", sha1_to_hex(hash1));
+       init_tree_desc(&desc, buf, sz);
+
+       rewrite_here = NULL;
+       while (desc.size) {
+               const char *name;
+               unsigned mode;
+               const unsigned char *sha1;
+
+               sha1 = tree_entry_extract(&desc, &name, &mode);
+               if (strlen(name) == toplen &&
+                   !memcmp(name, prefix, toplen)) {
+                       if (!S_ISDIR(mode))
+                               die("entry %s in tree %s is not a tree",
+                                   name, sha1_to_hex(hash1));
+                       rewrite_here = (unsigned char *) sha1;
+                       break;
+               }
+               update_tree_entry(&desc);
+       }
+       if (!rewrite_here)
+               die("entry %.*s not found in tree %s",
+                   toplen, prefix, sha1_to_hex(hash1));
+       if (subpath) {
+               status = splice_tree(rewrite_here, subpath, hash2, subtree);
+               if (status)
+                       return status;
+               rewrite_with = subtree;
+       }
+       else
+               rewrite_with = hash2;
+       hashcpy(rewrite_here, rewrite_with);
+       status = write_sha1_file(buf, sz, tree_type, result);
+       free(buf);
+       return status;
+}
+
+/*
+ * We are trying to come up with a merge between one and two that
+ * results in a tree shape similar to one.  The tree two might
+ * correspond to a subtree of one, in which case it needs to be
+ * shifted down by prefixing otherwise empty directories.  On the
+ * other hand, it could cover tree one and we might need to pick a
+ * subtree of it.
+ */
+void shift_tree(const unsigned char *hash1,
+               const unsigned char *hash2,
+               unsigned char *shifted,
+               int depth_limit)
+{
+       char *add_prefix;
+       char *del_prefix;
+       int add_score, del_score;
+
+       add_score = del_score = score_trees(hash1, hash2);
+       add_prefix = xcalloc(1, 1);
+       del_prefix = xcalloc(1, 1);
+
+       /*
+        * See if one's subtree resembles two; if so we need to prefix
+        * two with a few fake trees to match the prefix.
+        */
+       match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
+
+       /*
+        * See if two's subtree resembles one; if so we need to
+        * pick only subtree of two.
+        */
+       match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
+
+       /* Assume we do not have to do any shifting */
+       hashcpy(shifted, hash2);
+
+       if (add_score < del_score) {
+               /* We need to pick a subtree of two */
+               unsigned mode;
+
+               if (!*del_prefix)
+                       return;
+
+               if (get_tree_entry(hash2, del_prefix, shifted, &mode))
+                       die("cannot find path %s in tree %s",
+                           del_prefix, sha1_to_hex(hash2));
+               return;
+       }
+
+       if (!*add_prefix)
+               return;
+
+       splice_tree(hash1, add_prefix, hash2, shifted);
+}
+
index 2b614b64ba71f4006685ed1e08c300385ece5dec..3096594b3e9f07785229236700c6039efd61c305 100644 (file)
 #include "path-list.h"
 #include "xdiff-interface.h"
 
+static int subtree_merge;
+
+static struct tree *shift_tree_object(struct tree *one, struct tree *two)
+{
+       unsigned char shifted[20];
+
+       /*
+        * NEEDSWORK: this limits the recursion depth to hardcoded
+        * value '2' to avoid excessive overhead.
+        */
+       shift_tree(one->object.sha1, two->object.sha1, shifted, 2);
+       if (!hashcmp(two->object.sha1, shifted))
+               return two;
+       return lookup_tree(shifted);
+}
+
 /*
  * A virtual commit has
  * - (const char *)commit->util set to the name, and
@@ -1137,6 +1153,12 @@ static int merge_trees(struct tree *head,
                       struct tree **result)
 {
        int code, clean;
+
+       if (subtree_merge) {
+               merge = shift_tree_object(head, merge);
+               common = shift_tree_object(head, common);
+       }
+
        if (sha_eq(common->object.sha1, merge->object.sha1)) {
                output(0, "Already uptodate!");
                *result = head;
@@ -1342,6 +1364,13 @@ int main(int argc, char *argv[])
        struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
        int index_fd;
 
+       if (argv[0]) {
+               int namelen = strlen(argv[0]);
+               if (8 < namelen &&
+                   !strcmp(argv[0] + namelen - 8, "-subtree"))
+                       subtree_merge = 1;
+       }
+
        git_config(merge_config);
        if (getenv("GIT_MERGE_VERBOSITY"))
                verbosity = strtol(getenv("GIT_MERGE_VERBOSITY"), NULL, 10);
diff --git a/test-match-trees.c b/test-match-trees.c
new file mode 100644 (file)
index 0000000..a3c4688
--- /dev/null
@@ -0,0 +1,24 @@
+#include "cache.h"
+#include "tree.h"
+
+int main(int ac, char **av)
+{
+       unsigned char hash1[20], hash2[20], shifted[20];
+       struct tree *one, *two;
+
+       if (get_sha1(av[1], hash1))
+               die("cannot parse %s as an object name", av[1]);
+       if (get_sha1(av[2], hash2))
+               die("cannot parse %s as an object name", av[2]);
+       one = parse_tree_indirect(hash1);
+       if (!one)
+               die("not a treeish %s", av[1]);
+       two = parse_tree_indirect(hash2);
+       if (!two)
+               die("not a treeish %s", av[2]);
+
+       shift_tree(one->object.sha1, two->object.sha1, shifted, -1);
+       printf("shifted: %s\n", sha1_to_hex(shifted));
+
+       exit(0);
+}