Code

git-svn: several graft-branches improvements
[git.git] / contrib / git-svn / git-svn.perl
index f026b240b89ce4857b5356661803affce9529f1f..be38f94170cacbc425b158e1a090ffa414460f10 100755 (executable)
@@ -34,6 +34,8 @@ use POSIX qw/strftime/;
 use IPC::Open3;
 use Memoize;
 memoize('revisions_eq');
+memoize('cmt_metadata');
+memoize('get_commit_time');
 
 my ($SVN_PATH, $SVN, $SVN_LOG, $_use_lib);
 $_use_lib = 1 unless $ENV{GIT_SVN_NO_LIB};
@@ -91,6 +93,8 @@ my %cmd = (
        'graft-branches' => [ \&graft_branches,
                        'Detect merges/branches from already imported history',
                        { 'merge-rx|m' => \@_opt_m,
+                         'branch|b=s' => \@_branch_from,
+                         'branch-all-refs|B' => \$_branch_all_refs,
                          'no-default-regex' => \$_no_default_regex,
                          'no-graft-copy' => \$_no_graft_copy } ],
        'multi-init' => [ \&multi_init,
@@ -590,13 +594,14 @@ sub graft_branches {
        my $l_map = read_url_paths();
        my @re = map { qr/$_/is } @_opt_m if @_opt_m;
        unless ($_no_default_regex) {
-               push @re, (     qr/\b(?:merge|merging|merged)\s+(\S.+)/is,
-                               qr/\b(?:from|of)\s+(\S.+)/is );
+               push @re, (qr/\b(?:merge|merging|merged)\s+with\s+([\w\.\-]+)/i,
+                       qr/\b(?:merge|merging|merged)\s+([\w\.\-]+)/i,
+                       qr/\b(?:from|of)\s+([\w\.\-]+)/i );
        }
        foreach my $u (keys %$l_map) {
                if (@re) {
                        foreach my $p (keys %{$l_map->{$u}}) {
-                               graft_merge_msg($grafts,$l_map,$u,$p);
+                               graft_merge_msg($grafts,$l_map,$u,$p,@re);
                        }
                }
                unless ($_no_graft_copy) {
@@ -607,6 +612,7 @@ sub graft_branches {
                        }
                }
        }
+       graft_tree_joins($grafts);
 
        write_grafts($grafts, $comments, $gr_file);
        unlink "$gr_file~$gr_sha1" if $gr_sha1;
@@ -879,6 +885,77 @@ sub common_prefix {
        return '';
 }
 
+# grafts set here are 'stronger' in that they're based on actual tree
+# matches, and won't be deleted from merge-base checking in write_grafts()
+sub graft_tree_joins {
+       my $grafts = shift;
+       map_tree_joins() if (@_branch_from && !%tree_map);
+       return unless %tree_map;
+
+       git_svn_each(sub {
+               my $i = shift;
+               defined(my $pid = open my $fh, '-|') or croak $!;
+               if (!$pid) {
+                       exec qw/git-rev-list --pretty=raw/,
+                                       "refs/remotes/$i" or croak $!;
+               }
+               while (<$fh>) {
+                       next unless /^commit ($sha1)$/o;
+                       my $c = $1;
+                       my ($t) = (<$fh> =~ /^tree ($sha1)$/o);
+                       next unless $tree_map{$t};
+
+                       my $l;
+                       do {
+                               $l = readline $fh;
+                       } until ($l =~ /^committer (?:.+) (\d+) ([\-\+]?\d+)$/);
+
+                       my ($s, $tz) = ($1, $2);
+                       if ($tz =~ s/^\+//) {
+                               $s += tz_to_s_offset($tz);
+                       } elsif ($tz =~ s/^\-//) {
+                               $s -= tz_to_s_offset($tz);
+                       }
+
+                       my ($url_a, $r_a, $uuid_a) = cmt_metadata($c);
+
+                       foreach my $p (@{$tree_map{$t}}) {
+                               next if $p eq $c;
+                               my $mb = eval {
+                                       safe_qx('git-merge-base', $c, $p)
+                               };
+                               next unless ($@ || $?);
+                               if (defined $r_a) {
+                                       # see if SVN says it's a relative
+                                       my ($url_b, $r_b, $uuid_b) =
+                                                       cmt_metadata($p);
+                                       next if (defined $url_b &&
+                                                       defined $url_a &&
+                                                       ($url_a eq $url_b) &&
+                                                       ($uuid_a eq $uuid_b));
+                                       if ($uuid_a eq $uuid_b) {
+                                               if ($r_b < $r_a) {
+                                                       $grafts->{$c}->{$p} = 2;
+                                                       next;
+                                               } elsif ($r_b > $r_a) {
+                                                       $grafts->{$p}->{$c} = 2;
+                                                       next;
+                                               }
+                                       }
+                               }
+                               my $ct = get_commit_time($p);
+                               if ($ct < $s) {
+                                       $grafts->{$c}->{$p} = 2;
+                               } elsif ($ct > $s) {
+                                       $grafts->{$p}->{$c} = 2;
+                               }
+                               # what should we do when $ct == $s ?
+                       }
+               }
+               close $fh or croak $?;
+       });
+}
+
 # this isn't funky-filename safe, but good enough for now...
 sub graft_file_copy_cmd {
        my ($grafts, $l_map, $u) = @_;
@@ -957,7 +1034,7 @@ sub process_merge_msg_matches {
                my $re = qr/\Q$w\E/i;
                foreach (keys %{$l_map->{$u}}) {
                        if (/$re/) {
-                               push @strong, $_;
+                               push @strong, $l_map->{$u}->{$_};
                                last;
                        }
                }
@@ -966,7 +1043,7 @@ sub process_merge_msg_matches {
                $re = qr/\Q$w\E/i;
                foreach (keys %{$l_map->{$u}}) {
                        if (/$re/) {
-                               push @strong, $_;
+                               push @strong, $l_map->{$u}->{$_};
                                last;
                        }
                }
@@ -979,7 +1056,7 @@ sub process_merge_msg_matches {
                return unless defined $rev;
        }
        foreach my $m (@strong) {
-               my ($r0, $s0) = find_rev_before($rev, $m);
+               my ($r0, $s0) = find_rev_before($rev, $m, 1);
                $grafts->{$c->{c}}->{$s0} = 1 if defined $s0;
        }
 }
@@ -1791,7 +1868,26 @@ sub git_commit {
                restore_index($index);
        }
        if (exists $tree_map{$tree}) {
-               push @tmp_parents, @{$tree_map{$tree}};
+               foreach my $p (@{$tree_map{$tree}}) {
+                       my $skip;
+                       foreach (@tmp_parents) {
+                               # see if a common parent is found
+                               my $mb = eval {
+                                       safe_qx('git-merge-base', $_, $p)
+                               };
+                               next if ($@ || $?);
+                               $skip = 1;
+                               last;
+                       }
+                       next if $skip;
+                       my ($url_p, $r_p, $uuid_p) = cmt_metadata($p);
+                       next if (($SVN_UUID eq $uuid_p) &&
+                                               ($log_msg->{revision} > $r_p));
+                       next if (defined $url_p && defined $SVN_URL &&
+                                               ($SVN_UUID eq $uuid_p) &&
+                                               ($url_p eq $SVN_URL));
+                       push @tmp_parents, $p;
+               }
        }
        foreach (@tmp_parents) {
                next if $seen_parent{$_};
@@ -2119,6 +2215,7 @@ sub init_vars {
        $GIT_SVN_INDEX = "$GIT_SVN_DIR/index";
        $SVN_URL = undef;
        $SVN_WC = "$GIT_SVN_DIR/tree";
+       %tree_map = ();
 }
 
 # convert GetOpt::Long specs for use by git-repo-config
@@ -2186,6 +2283,7 @@ sub write_grafts {
                        print $fh $_ foreach @{$comments->{$c}};
                }
                my $p = $grafts->{$c};
+               my %x; # real parents
                delete $p->{$c}; # commits are not self-reproducing...
                my $pid = open my $ch, '-|';
                defined $pid or croak $!;
@@ -2193,13 +2291,41 @@ sub write_grafts {
                        exec(qw/git-cat-file commit/, $c) or croak $!;
                }
                while (<$ch>) {
-                       if (/^parent ([a-f\d]{40})/) {
-                               $p->{$1} = 1;
+                       if (/^parent ($sha1)/) {
+                               $x{$1} = $p->{$1} = 1;
                        } else {
-                               last unless /^\S/i;
+                               last unless /^\S/;
                        }
                }
                close $ch; # breaking the pipe
+
+               # if real parents are the only ones in the grafts, drop it
+               next if join(' ',sort keys %$p) eq join(' ',sort keys %x);
+
+               my (@ip, @jp, $mb);
+               my %del = %x;
+               @ip = @jp = keys %$p;
+               foreach my $i (@ip) {
+                       next if $del{$i} || $p->{$i} == 2;
+                       foreach my $j (@jp) {
+                               next if $i eq $j || $del{$j} || $p->{$j} == 2;
+                               $mb = eval { safe_qx('git-merge-base',$i,$j) };
+                               next unless $mb;
+                               chomp $mb;
+                               next if $x{$mb};
+                               if ($mb eq $j) {
+                                       delete $p->{$i};
+                                       $del{$i} = 1;
+                               } elsif ($mb eq $i) {
+                                       delete $p->{$j};
+                                       $del{$j} = 1;
+                               }
+                       }
+               }
+
+               # if real parents are the only ones in the grafts, drop it
+               next if join(' ',sort keys %$p) eq join(' ',sort keys %x);
+
                print $fh $c, ' ', join(' ', sort keys %$p),"\n";
        }
        if ($comments->{'END'}) {
@@ -2219,7 +2345,7 @@ sub read_url_paths {
 }
 
 sub extract_metadata {
-       my $id = shift;
+       my $id = shift or return (undef, undef, undef);
        my ($url, $rev, $uuid) = ($id =~ /^git-svn-id:\s(\S+?)\@(\d+)
                                                        \s([a-f\d\-]+)$/x);
        if (!$rev || !$uuid || !$url) {
@@ -2230,6 +2356,31 @@ sub extract_metadata {
        return ($url, $rev, $uuid);
 }
 
+sub cmt_metadata {
+       return extract_metadata((grep(/^git-svn-id: /,
+               safe_qx(qw/git-cat-file commit/, shift)))[-1]);
+}
+
+sub get_commit_time {
+       my $cmt = shift;
+       defined(my $pid = open my $fh, '-|') or croak $!;
+       if (!$pid) {
+               exec qw/git-rev-list --pretty=raw -n1/, $cmt or croak $!;
+       }
+       while (<$fh>) {
+               /^committer\s(?:.+) (\d+) ([\-\+]?\d+)$/ or next;
+               my ($s, $tz) = ($1, $2);
+               if ($tz =~ s/^\+//) {
+                       $s += tz_to_s_offset($tz);
+               } elsif ($tz =~ s/^\-//) {
+                       $s -= tz_to_s_offset($tz);
+               }
+               close $fh;
+               return $s;
+       }
+       die "Can't get commit time for commit: $cmt\n";
+}
+
 sub tz_to_s_offset {
        my ($tz) = @_;
        $tz =~ s/(\d\d)$//;
@@ -2498,8 +2649,7 @@ sub svn_grab_base_rev {
        chomp(my $c = do { local $/; <$fh> });
        close $fh;
        if (defined $c && length $c) {
-               my ($url, $rev, $uuid) = extract_metadata((grep(/^git-svn-id: /,
-                       safe_qx(qw/git-cat-file commit/, $c)))[-1]);
+               my ($url, $rev, $uuid) = cmt_metadata($c);
                return ($rev, $c);
        }
        return (undef, undef);
@@ -2655,6 +2805,10 @@ sub find_graft_path_parents {
                my $i = $tree_paths->{$x};
                my ($r, $parent) = find_rev_before($r0, $i, 1);
                if (defined $r && defined $parent && revisions_eq($x,$r,$r0)) {
+                       my ($url_b, undef, $uuid_b) = cmt_metadata($c);
+                       my ($url_a, undef, $uuid_a) = cmt_metadata($parent);
+                       next if ($url_a && $url_b && $url_a eq $url_b &&
+                                                       $uuid_b eq $uuid_a);
                        $grafts->{$c}->{$parent} = 1;
                }
        }