1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
9 #
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
12 #
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
47 }
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
70 }
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
74 }
75 }
76 # NEEDSWORK: Maybe warn on unrecognized lines?
77 }
78 close ($f);
79 }
81 sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 }
89 close ($f);
90 }
92 # convert getopts specs for use by git config
93 my %longmap = (
94 'A:' => 'authors-file',
95 'M:' => 'merge-regex',
96 'P:' => undef,
97 'R' => 'track-revisions',
98 'S:' => 'ignore-paths',
99 );
101 sub read_repo_config {
102 # Split the string between characters, unless there is a ':'
103 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
104 my @opts = split(/ *(?!:)/, shift);
105 foreach my $o (@opts) {
106 my $key = $o;
107 $key =~ s/://g;
108 my $arg = 'git config';
109 $arg .= ' --bool' if ($o !~ /:$/);
110 my $ckey = $key;
112 if (exists $longmap{$o}) {
113 # An uppercase option like -R cannot be
114 # expressed in the configuration, as the
115 # variable names are downcased.
116 $ckey = $longmap{$o};
117 next if (! defined $ckey);
118 $ckey =~ s/-//g;
119 }
120 chomp(my $tmp = `$arg --get cvsimport.$ckey`);
121 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
122 no strict 'refs';
123 my $opt_name = "opt_" . $key;
124 if (!$$opt_name) {
125 $$opt_name = $tmp;
126 }
127 }
128 }
129 }
131 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
132 read_repo_config($opts);
133 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
135 # turn the Getopt::Std specification in a Getopt::Long one,
136 # with support for multiple -M options
137 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
138 or usage();
139 usage if $opt_h;
141 if (@ARGV == 0) {
142 chomp(my $module = `git config --get cvsimport.module`);
143 push(@ARGV, $module) if $? == 0;
144 }
145 @ARGV <= 1 or usage("You can't specify more than one CVS module");
147 if ($opt_d) {
148 $ENV{"CVSROOT"} = $opt_d;
149 } elsif (-f 'CVS/Root') {
150 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
151 $opt_d = <$f>;
152 chomp $opt_d;
153 close $f;
154 $ENV{"CVSROOT"} = $opt_d;
155 } elsif ($ENV{"CVSROOT"}) {
156 $opt_d = $ENV{"CVSROOT"};
157 } else {
158 usage("CVSROOT needs to be set");
159 }
160 $opt_s ||= "-";
161 $opt_a ||= 0;
163 my $git_tree = $opt_C;
164 $git_tree ||= ".";
166 my $remote;
167 if (defined $opt_r) {
168 $remote = 'refs/remotes/' . $opt_r;
169 $opt_o ||= "master";
170 } else {
171 $opt_o ||= "origin";
172 $remote = 'refs/heads';
173 }
175 my $cvs_tree;
176 if ($#ARGV == 0) {
177 $cvs_tree = $ARGV[0];
178 } elsif (-f 'CVS/Repository') {
179 open my $f, '<', 'CVS/Repository' or
180 die 'Failed to open CVS/Repository';
181 $cvs_tree = <$f>;
182 chomp $cvs_tree;
183 close $f;
184 } else {
185 usage("CVS module has to be specified");
186 }
188 our @mergerx = ();
189 if ($opt_m) {
190 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
191 }
192 if (@opt_M) {
193 push (@mergerx, map { qr/$_/ } @opt_M);
194 }
196 # Remember UTC of our starting time
197 # we'll want to avoid importing commits
198 # that are too recent
199 our $starttime = time();
201 select(STDERR); $|=1; select(STDOUT);
204 package CVSconn;
205 # Basic CVS dialog.
206 # We're only interested in connecting and downloading, so ...
208 use File::Spec;
209 use File::Temp qw(tempfile);
210 use POSIX qw(strftime dup2);
212 sub new {
213 my ($what,$repo,$subdir) = @_;
214 $what=ref($what) if ref($what);
216 my $self = {};
217 $self->{'buffer'} = "";
218 bless($self,$what);
220 $repo =~ s#/+$##;
221 $self->{'fullrep'} = $repo;
222 $self->conn();
224 $self->{'subdir'} = $subdir;
225 $self->{'lines'} = undef;
227 return $self;
228 }
230 sub find_password_entry {
231 my ($cvspass, @cvsroot) = @_;
232 my ($file, $delim) = @$cvspass;
233 my $pass;
234 local ($_);
236 if (open(my $fh, $file)) {
237 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
238 CVSPASSFILE:
239 while (<$fh>) {
240 chomp;
241 s/^\/\d+\s+//;
242 my ($w, $p) = split($delim,$_,2);
243 for my $cvsroot (@cvsroot) {
244 if ($w eq $cvsroot) {
245 $pass = $p;
246 last CVSPASSFILE;
247 }
248 }
249 }
250 close($fh);
251 }
252 return $pass;
253 }
255 sub conn {
256 my $self = shift;
257 my $repo = $self->{'fullrep'};
258 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
259 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
261 my ($proxyhost,$proxyport);
262 if ($param && ($param =~ m/proxy=([^;]+)/)) {
263 $proxyhost = $1;
264 # Default proxyport, if not specified, is 8080.
265 $proxyport = 8080;
266 if ($ENV{"CVS_PROXY_PORT"}) {
267 $proxyport = $ENV{"CVS_PROXY_PORT"};
268 }
269 if ($param =~ m/proxyport=([^;]+)/) {
270 $proxyport = $1;
271 }
272 }
273 $repo ||= '/';
275 # if username is not explicit in CVSROOT, then use current user, as cvs would
276 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
277 my $rr2 = "-";
278 unless ($port) {
279 $rr2 = ":pserver:$user\@$serv:$repo";
280 $port=2401;
281 }
282 my $rr = ":pserver:$user\@$serv:$port$repo";
284 if ($pass) {
285 $pass = $self->_scramble($pass);
286 } else {
287 my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
288 [$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
289 my @loc = ();
290 foreach my $cvspass (@cvspass) {
291 my $p = find_password_entry($cvspass, $rr, $rr2);
292 if ($p) {
293 push @loc, $cvspass->[0];
294 $pass = $p;
295 }
296 }
298 if (1 < @loc) {
299 die("Multiple cvs password files have ".
300 "entries for CVSROOT $opt_d: @loc");
301 } elsif (!$pass) {
302 $pass = "A";
303 }
304 }
306 my ($s, $rep);
307 if ($proxyhost) {
309 # Use a HTTP Proxy. Only works for HTTP proxies that
310 # don't require user authentication
311 #
312 # See: http://www.ietf.org/rfc/rfc2817.txt
314 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
315 die "Socket to $proxyhost: $!\n" unless defined $s;
316 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
317 or die "Write to $proxyhost: $!\n";
318 $s->flush();
320 $rep = <$s>;
322 # The answer should look like 'HTTP/1.x 2yy ....'
323 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
324 die "Proxy connect: $rep\n";
325 }
326 # Skip up to the empty line of the proxy server output
327 # including the response headers.
328 while ($rep = <$s>) {
329 last if (!defined $rep ||
330 $rep eq "\n" ||
331 $rep eq "\r\n");
332 }
333 } else {
334 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
335 die "Socket to $serv: $!\n" unless defined $s;
336 }
338 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
339 or die "Write to $serv: $!\n";
340 $s->flush();
342 $rep = <$s>;
344 if ($rep ne "I LOVE YOU\n") {
345 $rep="<unknown>" unless $rep;
346 die "AuthReply: $rep\n";
347 }
348 $self->{'socketo'} = $s;
349 $self->{'socketi'} = $s;
350 } else { # local or ext: Fork off our own cvs server.
351 my $pr = IO::Pipe->new();
352 my $pw = IO::Pipe->new();
353 my $pid = fork();
354 die "Fork: $!\n" unless defined $pid;
355 my $cvs = 'cvs';
356 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
357 my $rsh = 'rsh';
358 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
360 my @cvs = ($cvs, 'server');
361 my ($local, $user, $host);
362 $local = $repo =~ s/:local://;
363 if (!$local) {
364 $repo =~ s/:ext://;
365 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
366 ($user, $host) = ($1, $2);
367 }
368 if (!$local) {
369 if ($user) {
370 unshift @cvs, $rsh, '-l', $user, $host;
371 } else {
372 unshift @cvs, $rsh, $host;
373 }
374 }
376 unless ($pid) {
377 $pr->writer();
378 $pw->reader();
379 dup2($pw->fileno(),0);
380 dup2($pr->fileno(),1);
381 $pr->close();
382 $pw->close();
383 exec(@cvs);
384 }
385 $pw->writer();
386 $pr->reader();
387 $self->{'socketo'} = $pw;
388 $self->{'socketi'} = $pr;
389 }
390 $self->{'socketo'}->write("Root $repo\n");
392 # Trial and error says that this probably is the minimum set
393 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
395 $self->{'socketo'}->write("valid-requests\n");
396 $self->{'socketo'}->flush();
398 chomp(my $rep=$self->readline());
399 if ($rep !~ s/^Valid-requests\s*//) {
400 $rep="<unknown>" unless $rep;
401 die "Expected Valid-requests from server, but got: $rep\n";
402 }
403 chomp(my $res=$self->readline());
404 die "validReply: $res\n" if $res ne "ok";
406 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
407 $self->{'repo'} = $repo;
408 }
410 sub readline {
411 my ($self) = @_;
412 return $self->{'socketi'}->getline();
413 }
415 sub _file {
416 # Request a file with a given revision.
417 # Trial and error says this is a good way to do it. :-/
418 my ($self,$fn,$rev) = @_;
419 $self->{'socketo'}->write("Argument -N\n") or return undef;
420 $self->{'socketo'}->write("Argument -P\n") or return undef;
421 # -kk: Linus' version doesn't use it - defaults to off
422 if ($opt_k) {
423 $self->{'socketo'}->write("Argument -kk\n") or return undef;
424 }
425 $self->{'socketo'}->write("Argument -r\n") or return undef;
426 $self->{'socketo'}->write("Argument $rev\n") or return undef;
427 $self->{'socketo'}->write("Argument --\n") or return undef;
428 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
429 $self->{'socketo'}->write("Directory .\n") or return undef;
430 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
431 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
432 $self->{'socketo'}->write("co\n") or return undef;
433 $self->{'socketo'}->flush() or return undef;
434 $self->{'lines'} = 0;
435 return 1;
436 }
437 sub _line {
438 # Read a line from the server.
439 # ... except that 'line' may be an entire file. ;-)
440 my ($self, $fh) = @_;
441 die "Not in lines" unless defined $self->{'lines'};
443 my $line;
444 my $res=0;
445 while (defined($line = $self->readline())) {
446 # M U gnupg-cvs-rep/AUTHORS
447 # Updated gnupg-cvs-rep/
448 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
449 # /AUTHORS/1.1///T1.1
450 # u=rw,g=rw,o=rw
451 # 0
452 # ok
454 if ($line =~ s/^(?:Created|Updated) //) {
455 $line = $self->readline(); # path
456 $line = $self->readline(); # Entries line
457 my $mode = $self->readline(); chomp $mode;
458 $self->{'mode'} = $mode;
459 defined (my $cnt = $self->readline())
460 or die "EOF from server after 'Changed'\n";
461 chomp $cnt;
462 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
463 $line="";
464 $res = $self->_fetchfile($fh, $cnt);
465 } elsif ($line =~ s/^ //) {
466 print $fh $line;
467 $res += length($line);
468 } elsif ($line =~ /^M\b/) {
469 # output, do nothing
470 } elsif ($line =~ /^Mbinary\b/) {
471 my $cnt;
472 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
473 chomp $cnt;
474 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
475 $line="";
476 $res += $self->_fetchfile($fh, $cnt);
477 } else {
478 chomp $line;
479 if ($line eq "ok") {
480 # print STDERR "S: ok (".length($res).")\n";
481 return $res;
482 } elsif ($line =~ s/^E //) {
483 # print STDERR "S: $line\n";
484 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
485 $line = $self->readline(); # filename
486 $line = $self->readline(); # OK
487 chomp $line;
488 die "Unknown: $line" if $line ne "ok";
489 return -1;
490 } else {
491 die "Unknown: $line\n";
492 }
493 }
494 }
495 return undef;
496 }
497 sub file {
498 my ($self,$fn,$rev) = @_;
499 my $res;
501 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
502 DIR => File::Spec->tmpdir(), UNLINK => 1);
504 $self->_file($fn,$rev) and $res = $self->_line($fh);
506 if (!defined $res) {
507 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
508 truncate $fh, 0;
509 $self->conn();
510 $self->_file($fn,$rev) or die "No file command send";
511 $res = $self->_line($fh);
512 die "Retry failed" unless defined $res;
513 }
514 close ($fh);
516 return ($name, $res);
517 }
518 sub _fetchfile {
519 my ($self, $fh, $cnt) = @_;
520 my $res = 0;
521 my $bufsize = 1024 * 1024;
522 while ($cnt) {
523 if ($bufsize > $cnt) {
524 $bufsize = $cnt;
525 }
526 my $buf;
527 my $num = $self->{'socketi'}->read($buf,$bufsize);
528 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
529 print $fh $buf;
530 $res += $num;
531 $cnt -= $num;
532 }
533 return $res;
534 }
536 sub _scramble {
537 my ($self, $pass) = @_;
538 my $scrambled = "A";
540 return $scrambled unless $pass;
542 my $pass_len = length($pass);
543 my @pass_arr = split("", $pass);
544 my $i;
546 # from cvs/src/scramble.c
547 my @shifts = (
548 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
549 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
550 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
551 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
552 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
553 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
554 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
555 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
556 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
557 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
558 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
559 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
560 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
561 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
562 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
563 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
564 );
566 for ($i = 0; $i < $pass_len; $i++) {
567 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
568 }
570 return $scrambled;
571 }
573 package main;
575 my $cvs = CVSconn->new($opt_d, $cvs_tree);
578 sub pdate($) {
579 my ($d) = @_;
580 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
581 or die "Unparseable date: $d\n";
582 my $y=$1; $y-=1900 if $y>1900;
583 return timegm($6||0,$5,$4,$3,$2-1,$y);
584 }
586 sub pmode($) {
587 my ($mode) = @_;
588 my $m = 0;
589 my $mm = 0;
590 my $um = 0;
591 for my $x(split(//,$mode)) {
592 if ($x eq ",") {
593 $m |= $mm&$um;
594 $mm = 0;
595 $um = 0;
596 } elsif ($x eq "u") { $um |= 0700;
597 } elsif ($x eq "g") { $um |= 0070;
598 } elsif ($x eq "o") { $um |= 0007;
599 } elsif ($x eq "r") { $mm |= 0444;
600 } elsif ($x eq "w") { $mm |= 0222;
601 } elsif ($x eq "x") { $mm |= 0111;
602 } elsif ($x eq "=") { # do nothing
603 } else { die "Unknown mode: $mode\n";
604 }
605 }
606 $m |= $mm&$um;
607 return $m;
608 }
610 sub getwd() {
611 my $pwd = `pwd`;
612 chomp $pwd;
613 return $pwd;
614 }
616 sub is_sha1 {
617 my $s = shift;
618 return $s =~ /^[a-f0-9]{40}$/;
619 }
621 sub get_headref ($) {
622 my $name = shift;
623 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
624 return undef unless $? == 0;
625 chomp $r;
626 return $r;
627 }
629 my $user_filename_prepend = '';
630 sub munge_user_filename {
631 my $name = shift;
632 return File::Spec->file_name_is_absolute($name) ?
633 $name :
634 $user_filename_prepend . $name;
635 }
637 -d $git_tree
638 or mkdir($git_tree,0777)
639 or die "Could not create $git_tree: $!";
640 if ($git_tree ne '.') {
641 $user_filename_prepend = getwd() . '/';
642 chdir($git_tree);
643 }
645 my $last_branch = "";
646 my $orig_branch = "";
647 my %branch_date;
648 my $tip_at_start = undef;
650 my $git_dir = $ENV{"GIT_DIR"} || ".git";
651 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
652 $ENV{"GIT_DIR"} = $git_dir;
653 my $orig_git_index;
654 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
656 my %index; # holds filenames of one index per branch
658 unless (-d $git_dir) {
659 system(qw(git init));
660 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
661 system(qw(git read-tree --empty));
662 die "Cannot init an empty tree: $?\n" if $?;
664 $last_branch = $opt_o;
665 $orig_branch = "";
666 } else {
667 open(F, "-|", qw(git symbolic-ref HEAD)) or
668 die "Cannot run git symbolic-ref: $!\n";
669 chomp ($last_branch = <F>);
670 $last_branch = basename($last_branch);
671 close(F);
672 unless ($last_branch) {
673 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
674 $last_branch = "master";
675 }
676 $orig_branch = $last_branch;
677 $tip_at_start = `git rev-parse --verify HEAD`;
679 # Get the last import timestamps
680 my $fmt = '($ref, $author) = (%(refname), %(author));';
681 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
682 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
683 while (defined(my $entry = <H>)) {
684 my ($ref, $author);
685 eval($entry) || die "cannot eval refs list: $@";
686 my ($head) = ($ref =~ m|^$remote/(.*)|);
687 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
688 $branch_date{$head} = $1;
689 }
690 close(H);
691 if (!exists $branch_date{$opt_o}) {
692 die "Branch '$opt_o' does not exist.\n".
693 "Either use the correct '-o branch' option,\n".
694 "or import to a new repository.\n";
695 }
696 }
698 -d $git_dir
699 or die "Could not create git subdir ($git_dir).\n";
701 # now we read (and possibly save) author-info as well
702 -f "$git_dir/cvs-authors" and
703 read_author_info("$git_dir/cvs-authors");
704 if ($opt_A) {
705 read_author_info(munge_user_filename($opt_A));
706 write_author_info("$git_dir/cvs-authors");
707 }
709 # open .git/cvs-revisions, if requested
710 open my $revision_map, '>>', "$git_dir/cvs-revisions"
711 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
712 if defined $opt_R;
715 #
716 # run cvsps into a file unless we are getting
717 # it passed as a file via $opt_P
718 #
719 my $cvspsfile;
720 unless ($opt_P) {
721 print "Running cvsps...\n" if $opt_v;
722 my $pid = open(CVSPS,"-|");
723 my $cvspsfh;
724 die "Cannot fork: $!\n" unless defined $pid;
725 unless ($pid) {
726 my @opt;
727 @opt = split(/,/,$opt_p) if defined $opt_p;
728 unshift @opt, '-z', $opt_z if defined $opt_z;
729 unshift @opt, '-q' unless defined $opt_v;
730 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
731 push @opt, '--cvs-direct';
732 }
733 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
734 die "Could not start cvsps: $!\n";
735 }
736 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
737 DIR => File::Spec->tmpdir());
738 while (<CVSPS>) {
739 print $cvspsfh $_;
740 }
741 close CVSPS;
742 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
743 close $cvspsfh;
744 } else {
745 $cvspsfile = munge_user_filename($opt_P);
746 }
748 open(CVS, "<$cvspsfile") or die $!;
750 ## cvsps output:
751 #---------------------
752 #PatchSet 314
753 #Date: 1999/09/18 13:03:59
754 #Author: wkoch
755 #Branch: STABLE-BRANCH-1-0
756 #Ancestor branch: HEAD
757 #Tag: (none)
758 #Log:
759 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
760 #Members:
761 # README:1.57->1.57.2.1
762 # VERSION:1.96->1.96.2.1
763 #
764 #---------------------
766 my $state = 0;
768 sub update_index (\@\@) {
769 my $old = shift;
770 my $new = shift;
771 open(my $fh, '|-', qw(git update-index -z --index-info))
772 or die "unable to open git update-index: $!";
773 print $fh
774 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
775 @$old),
776 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
777 @$new)
778 or die "unable to write to git update-index: $!";
779 close $fh
780 or die "unable to write to git update-index: $!";
781 $? and die "git update-index reported error: $?";
782 }
784 sub write_tree () {
785 open(my $fh, '-|', qw(git write-tree))
786 or die "unable to open git write-tree: $!";
787 chomp(my $tree = <$fh>);
788 is_sha1($tree)
789 or die "Cannot get tree id ($tree): $!";
790 close($fh)
791 or die "Error running git write-tree: $?\n";
792 print "Tree ID $tree\n" if $opt_v;
793 return $tree;
794 }
796 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
797 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
799 # commits that cvsps cannot place anywhere...
800 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
802 sub commit {
803 if ($branch eq $opt_o && !$index{branch} &&
804 !get_headref("$remote/$branch")) {
805 # looks like an initial commit
806 # use the index primed by git init
807 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
808 $index{$branch} = "$git_dir/index";
809 } else {
810 # use an index per branch to speed up
811 # imports of projects with many branches
812 unless ($index{$branch}) {
813 $index{$branch} = tmpnam();
814 $ENV{GIT_INDEX_FILE} = $index{$branch};
815 if ($ancestor) {
816 system("git", "read-tree", "$remote/$ancestor");
817 } else {
818 system("git", "read-tree", "$remote/$branch");
819 }
820 die "read-tree failed: $?\n" if $?;
821 }
822 }
823 $ENV{GIT_INDEX_FILE} = $index{$branch};
825 update_index(@old, @new);
826 @old = @new = ();
827 my $tree = write_tree();
828 my $parent = get_headref("$remote/$last_branch");
829 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
831 my @commit_args;
832 push @commit_args, ("-p", $parent) if $parent;
834 # loose detection of merges
835 # based on the commit msg
836 foreach my $rx (@mergerx) {
837 next unless $logmsg =~ $rx && $1;
838 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
839 if (my $sha1 = get_headref("$remote/$mparent")) {
840 push @commit_args, '-p', "$remote/$mparent";
841 print "Merge parent branch: $mparent\n" if $opt_v;
842 }
843 }
845 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
846 $ENV{GIT_AUTHOR_NAME} = $author_name;
847 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
848 $ENV{GIT_AUTHOR_DATE} = $commit_date;
849 $ENV{GIT_COMMITTER_NAME} = $author_name;
850 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
851 $ENV{GIT_COMMITTER_DATE} = $commit_date;
852 my $pid = open2(my $commit_read, my $commit_write,
853 'git', 'commit-tree', $tree, @commit_args);
855 # compatibility with git2cvs
856 substr($logmsg,32767) = "" if length($logmsg) > 32767;
857 $logmsg =~ s/[\s\n]+\z//;
859 if (@skipped) {
860 $logmsg .= "\n\n\nSKIPPED:\n\t";
861 $logmsg .= join("\n\t", @skipped) . "\n";
862 @skipped = ();
863 }
865 print($commit_write "$logmsg\n") && close($commit_write)
866 or die "Error writing to git commit-tree: $!\n";
868 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
869 chomp(my $cid = <$commit_read>);
870 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
871 print "Commit ID $cid\n" if $opt_v;
872 close($commit_read);
874 waitpid($pid,0);
875 die "Error running git commit-tree: $?\n" if $?;
877 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
878 or die "Cannot write branch $branch for update: $!\n";
880 if ($revision_map) {
881 print $revision_map "@$_ $cid\n" for @commit_revisions;
882 }
883 @commit_revisions = ();
885 if ($tag) {
886 my ($xtag) = $tag;
887 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
888 $xtag =~ tr/_/\./ if ( $opt_u );
889 $xtag =~ s/[\/]/$opt_s/g;
890 $xtag =~ s/\[//g;
892 system('git' , 'tag', '-f', $xtag, $cid) == 0
893 or die "Cannot create tag $xtag: $!\n";
895 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
896 }
897 };
899 my $commitcount = 1;
900 while (<CVS>) {
901 chomp;
902 if ($state == 0 and /^-+$/) {
903 $state = 1;
904 } elsif ($state == 0) {
905 $state = 1;
906 redo;
907 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
908 $patchset = 0+$_;
909 $state=2;
910 } elsif ($state == 2 and s/^Date:\s+//) {
911 $date = pdate($_);
912 unless ($date) {
913 print STDERR "Could not parse date: $_\n";
914 $state=0;
915 next;
916 }
917 $state=3;
918 } elsif ($state == 3 and s/^Author:\s+//) {
919 s/\s+$//;
920 if (/^(.*?)\s+<(.*)>/) {
921 ($author_name, $author_email) = ($1, $2);
922 } elsif ($conv_author_name{$_}) {
923 $author_name = $conv_author_name{$_};
924 $author_email = $conv_author_email{$_};
925 } else {
926 $author_name = $author_email = $_;
927 }
928 $state = 4;
929 } elsif ($state == 4 and s/^Branch:\s+//) {
930 s/\s+$//;
931 tr/_/\./ if ( $opt_u );
932 s/[\/]/$opt_s/g;
933 $branch = $_;
934 $state = 5;
935 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
936 s/\s+$//;
937 $ancestor = $_;
938 $ancestor = $opt_o if $ancestor eq "HEAD";
939 $state = 6;
940 } elsif ($state == 5) {
941 $ancestor = undef;
942 $state = 6;
943 redo;
944 } elsif ($state == 6 and s/^Tag:\s+//) {
945 s/\s+$//;
946 if ($_ eq "(none)") {
947 $tag = undef;
948 } else {
949 $tag = $_;
950 }
951 $state = 7;
952 } elsif ($state == 7 and /^Log:/) {
953 $logmsg = "";
954 $state = 8;
955 } elsif ($state == 8 and /^Members:/) {
956 $branch = $opt_o if $branch eq "HEAD";
957 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
958 # skip
959 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
960 $state = 11;
961 next;
962 }
963 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
964 # skip if the commit is too recent
965 # given that the cvsps default fuzz is 300s, we give ourselves another
966 # 300s just in case -- this also prevents skipping commits
967 # due to server clock drift
968 print "skip patchset $patchset: $date too recent\n" if $opt_v;
969 $state = 11;
970 next;
971 }
972 if (exists $ignorebranch{$branch}) {
973 print STDERR "Skipping $branch\n";
974 $state = 11;
975 next;
976 }
977 if ($ancestor) {
978 if ($ancestor eq $branch) {
979 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
980 $ancestor = $opt_o;
981 }
982 if (defined get_headref("$remote/$branch")) {
983 print STDERR "Branch $branch already exists!\n";
984 $state=11;
985 next;
986 }
987 my $id = get_headref("$remote/$ancestor");
988 if (!$id) {
989 print STDERR "Branch $ancestor does not exist!\n";
990 $ignorebranch{$branch} = 1;
991 $state=11;
992 next;
993 }
995 system(qw(git update-ref -m cvsimport),
996 "$remote/$branch", $id);
997 if($? != 0) {
998 print STDERR "Could not create branch $branch\n";
999 $ignorebranch{$branch} = 1;
1000 $state=11;
1001 next;
1002 }
1003 }
1004 $last_branch = $branch if $branch ne $last_branch;
1005 $state = 9;
1006 } elsif ($state == 8) {
1007 $logmsg .= "$_\n";
1008 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1009 # VERSION:1.96->1.96.2.1
1010 my $init = ($2 eq "INITIAL");
1011 my $fn = $1;
1012 my $rev = $3;
1013 $fn =~ s#^/+##;
1014 if ($opt_S && $fn =~ m/$opt_S/) {
1015 print "SKIPPING $fn v $rev\n";
1016 push(@skipped, $fn);
1017 next;
1018 }
1019 push @commit_revisions, [$fn, $rev];
1020 print "Fetching $fn v $rev\n" if $opt_v;
1021 my ($tmpname, $size) = $cvs->file($fn,$rev);
1022 if ($size == -1) {
1023 push(@old,$fn);
1024 print "Drop $fn\n" if $opt_v;
1025 } else {
1026 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1027 my $pid = open(my $F, '-|');
1028 die $! unless defined $pid;
1029 if (!$pid) {
1030 exec("git", "hash-object", "-w", $tmpname)
1031 or die "Cannot create object: $!\n";
1032 }
1033 my $sha = <$F>;
1034 chomp $sha;
1035 close $F;
1036 my $mode = pmode($cvs->{'mode'});
1037 push(@new,[$mode, $sha, $fn]); # may be resurrected!
1038 }
1039 unlink($tmpname);
1040 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1041 my $fn = $1;
1042 my $rev = $2;
1043 $fn =~ s#^/+##;
1044 push @commit_revisions, [$fn, $rev];
1045 push(@old,$fn);
1046 print "Delete $fn\n" if $opt_v;
1047 } elsif ($state == 9 and /^\s*$/) {
1048 $state = 10;
1049 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1050 $commitcount++;
1051 if ($opt_L && $commitcount > $opt_L) {
1052 last;
1053 }
1054 commit();
1055 if (($commitcount & 1023) == 0) {
1056 system(qw(git repack -a -d));
1057 }
1058 $state = 1;
1059 } elsif ($state == 11 and /^-+$/) {
1060 $state = 1;
1061 } elsif (/^-+$/) { # end of unknown-line processing
1062 $state = 1;
1063 } elsif ($state != 11) { # ignore stuff when skipping
1064 print STDERR "* UNKNOWN LINE * $_\n";
1065 }
1066 }
1067 commit() if $branch and $state != 11;
1069 unless ($opt_P) {
1070 unlink($cvspsfile);
1071 }
1073 # The heuristic of repacking every 1024 commits can leave a
1074 # lot of unpacked data. If there is more than 1MB worth of
1075 # not-packed objects, repack once more.
1076 my $line = `git count-objects`;
1077 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1078 my ($n_objects, $kb) = ($1, $2);
1079 1024 < $kb
1080 and system(qw(git repack -a -d));
1081 }
1083 foreach my $git_index (values %index) {
1084 if ($git_index ne "$git_dir/index") {
1085 unlink($git_index);
1086 }
1087 }
1089 if (defined $orig_git_index) {
1090 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1091 } else {
1092 delete $ENV{GIT_INDEX_FILE};
1093 }
1095 # Now switch back to the branch we were in before all of this happened
1096 if ($orig_branch) {
1097 print "DONE.\n" if $opt_v;
1098 if ($opt_i) {
1099 exit 0;
1100 }
1101 my $tip_at_end = `git rev-parse --verify HEAD`;
1102 if ($tip_at_start ne $tip_at_end) {
1103 for ($tip_at_start, $tip_at_end) { chomp; }
1104 print "Fetched into the current branch.\n" if $opt_v;
1105 system(qw(git read-tree -u -m),
1106 $tip_at_start, $tip_at_end);
1107 die "Fast-forward update failed: $?\n" if $?;
1108 }
1109 else {
1110 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1111 die "Could not merge $opt_o into the current branch.\n" if $?;
1112 }
1113 } else {
1114 $orig_branch = "master";
1115 print "DONE; creating $orig_branch branch\n" if $opt_v;
1116 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1117 unless defined get_headref('refs/heads/master');
1118 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1119 if ($opt_r && $opt_o ne 'HEAD');
1120 system('git', 'update-ref', 'HEAD', "$orig_branch");
1121 unless ($opt_i) {
1122 system(qw(git checkout -f));
1123 die "checkout failed: $?\n" if $?;
1124 }
1125 }