1 #!/usr/bin/perl
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to aggregate CVS check-ins into related changes.
7 # Fortunately, "cvsps" does that for us; all we have to do is to parse
8 # its output.
9 #
10 # Checking out the files is done by a single long-running CVS connection
11 # / server process.
12 #
13 # The head revision is on branch "origin" by default.
14 # You can change that with the '-o' option.
16 use 5.008;
17 use strict;
18 use warnings;
19 use Getopt::Long;
20 use File::Spec;
21 use File::Temp qw(tempfile tmpnam);
22 use File::Path qw(mkpath);
23 use File::Basename qw(basename dirname);
24 use Time::Local;
25 use IO::Socket;
26 use IO::Pipe;
27 use POSIX qw(strftime dup2 ENOENT);
28 use IPC::Open2;
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
33 our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
34 my (%conv_author_name, %conv_author_email);
36 sub usage(;$) {
37 my $msg = shift;
38 print(STDERR "Error: $msg\n") if $msg;
39 print STDERR <<END;
40 Usage: git cvsimport # fetch/update GIT from CVS
41 [-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
42 [-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
43 [-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
44 [-r remote] [-R] [CVS_module]
45 END
46 exit(1);
47 }
49 sub read_author_info($) {
50 my ($file) = @_;
51 my $user;
52 open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54 while (<$f>) {
55 # Expected format is this:
56 # exon=Andreas Ericsson <ae@op5.se>
57 if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
58 $user = $1;
59 $conv_author_name{$user} = $2;
60 $conv_author_email{$user} = $3;
61 }
62 # However, we also read from CVSROOT/users format
63 # to ease migration.
64 elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
65 my $mapped;
66 ($user, $mapped) = ($1, $3);
67 if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
68 $conv_author_name{$user} = $1;
69 $conv_author_email{$user} = $2;
70 }
71 elsif ($mapped =~ /^<?(.*)>?$/) {
72 $conv_author_name{$user} = $user;
73 $conv_author_email{$user} = $1;
74 }
75 }
76 # NEEDSWORK: Maybe warn on unrecognized lines?
77 }
78 close ($f);
79 }
81 sub write_author_info($) {
82 my ($file) = @_;
83 open my $f, '>', $file or
84 die("Failed to open $file for writing: $!");
86 foreach (keys %conv_author_name) {
87 print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>\n";
88 }
89 close ($f);
90 }
92 # convert getopts specs for use by git config
93 sub read_repo_config {
94 # Split the string between characters, unless there is a ':'
95 # So "abc:de" becomes ["a", "b", "c:", "d", "e"]
96 my @opts = split(/ *(?!:)/, shift);
97 foreach my $o (@opts) {
98 my $key = $o;
99 $key =~ s/://g;
100 my $arg = 'git config';
101 $arg .= ' --bool' if ($o !~ /:$/);
103 chomp(my $tmp = `$arg --get cvsimport.$key`);
104 if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
105 no strict 'refs';
106 my $opt_name = "opt_" . $key;
107 if (!$$opt_name) {
108 $$opt_name = $tmp;
109 }
110 }
111 }
112 }
114 my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
115 read_repo_config($opts);
116 Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
118 # turn the Getopt::Std specification in a Getopt::Long one,
119 # with support for multiple -M options
120 GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
121 or usage();
122 usage if $opt_h;
124 if (@ARGV == 0) {
125 chomp(my $module = `git config --get cvsimport.module`);
126 push(@ARGV, $module) if $? == 0;
127 }
128 @ARGV <= 1 or usage("You can't specify more than one CVS module");
130 if ($opt_d) {
131 $ENV{"CVSROOT"} = $opt_d;
132 } elsif (-f 'CVS/Root') {
133 open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
134 $opt_d = <$f>;
135 chomp $opt_d;
136 close $f;
137 $ENV{"CVSROOT"} = $opt_d;
138 } elsif ($ENV{"CVSROOT"}) {
139 $opt_d = $ENV{"CVSROOT"};
140 } else {
141 usage("CVSROOT needs to be set");
142 }
143 $opt_s ||= "-";
144 $opt_a ||= 0;
146 my $git_tree = $opt_C;
147 $git_tree ||= ".";
149 my $remote;
150 if (defined $opt_r) {
151 $remote = 'refs/remotes/' . $opt_r;
152 $opt_o ||= "master";
153 } else {
154 $opt_o ||= "origin";
155 $remote = 'refs/heads';
156 }
158 my $cvs_tree;
159 if ($#ARGV == 0) {
160 $cvs_tree = $ARGV[0];
161 } elsif (-f 'CVS/Repository') {
162 open my $f, '<', 'CVS/Repository' or
163 die 'Failed to open CVS/Repository';
164 $cvs_tree = <$f>;
165 chomp $cvs_tree;
166 close $f;
167 } else {
168 usage("CVS module has to be specified");
169 }
171 our @mergerx = ();
172 if ($opt_m) {
173 @mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
174 }
175 if (@opt_M) {
176 push (@mergerx, map { qr/$_/ } @opt_M);
177 }
179 # Remember UTC of our starting time
180 # we'll want to avoid importing commits
181 # that are too recent
182 our $starttime = time();
184 select(STDERR); $|=1; select(STDOUT);
187 package CVSconn;
188 # Basic CVS dialog.
189 # We're only interested in connecting and downloading, so ...
191 use File::Spec;
192 use File::Temp qw(tempfile);
193 use POSIX qw(strftime dup2);
195 sub new {
196 my ($what,$repo,$subdir) = @_;
197 $what=ref($what) if ref($what);
199 my $self = {};
200 $self->{'buffer'} = "";
201 bless($self,$what);
203 $repo =~ s#/+$##;
204 $self->{'fullrep'} = $repo;
205 $self->conn();
207 $self->{'subdir'} = $subdir;
208 $self->{'lines'} = undef;
210 return $self;
211 }
213 sub conn {
214 my $self = shift;
215 my $repo = $self->{'fullrep'};
216 if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
217 my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
219 my ($proxyhost,$proxyport);
220 if ($param && ($param =~ m/proxy=([^;]+)/)) {
221 $proxyhost = $1;
222 # Default proxyport, if not specified, is 8080.
223 $proxyport = 8080;
224 if ($ENV{"CVS_PROXY_PORT"}) {
225 $proxyport = $ENV{"CVS_PROXY_PORT"};
226 }
227 if ($param =~ m/proxyport=([^;]+)/) {
228 $proxyport = $1;
229 }
230 }
231 $repo ||= '/';
233 # if username is not explicit in CVSROOT, then use current user, as cvs would
234 $user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
235 my $rr2 = "-";
236 unless ($port) {
237 $rr2 = ":pserver:$user\@$serv:$repo";
238 $port=2401;
239 }
240 my $rr = ":pserver:$user\@$serv:$port$repo";
242 if ($pass) {
243 $pass = $self->_scramble($pass);
244 } else {
245 open(H,$ENV{'HOME'}."/.cvspass") and do {
246 # :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
247 while (<H>) {
248 chomp;
249 s/^\/\d+\s+//;
250 my ($w,$p) = split(/\s/,$_,2);
251 if ($w eq $rr or $w eq $rr2) {
252 $pass = $p;
253 last;
254 }
255 }
256 };
257 $pass = "A" unless $pass;
258 }
260 my ($s, $rep);
261 if ($proxyhost) {
263 # Use a HTTP Proxy. Only works for HTTP proxies that
264 # don't require user authentication
265 #
266 # See: http://www.ietf.org/rfc/rfc2817.txt
268 $s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
269 die "Socket to $proxyhost: $!\n" unless defined $s;
270 $s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
271 or die "Write to $proxyhost: $!\n";
272 $s->flush();
274 $rep = <$s>;
276 # The answer should look like 'HTTP/1.x 2yy ....'
277 if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
278 die "Proxy connect: $rep\n";
279 }
280 # Skip up to the empty line of the proxy server output
281 # including the response headers.
282 while ($rep = <$s>) {
283 last if (!defined $rep ||
284 $rep eq "\n" ||
285 $rep eq "\r\n");
286 }
287 } else {
288 $s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
289 die "Socket to $serv: $!\n" unless defined $s;
290 }
292 $s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
293 or die "Write to $serv: $!\n";
294 $s->flush();
296 $rep = <$s>;
298 if ($rep ne "I LOVE YOU\n") {
299 $rep="<unknown>" unless $rep;
300 die "AuthReply: $rep\n";
301 }
302 $self->{'socketo'} = $s;
303 $self->{'socketi'} = $s;
304 } else { # local or ext: Fork off our own cvs server.
305 my $pr = IO::Pipe->new();
306 my $pw = IO::Pipe->new();
307 my $pid = fork();
308 die "Fork: $!\n" unless defined $pid;
309 my $cvs = 'cvs';
310 $cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
311 my $rsh = 'rsh';
312 $rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
314 my @cvs = ($cvs, 'server');
315 my ($local, $user, $host);
316 $local = $repo =~ s/:local://;
317 if (!$local) {
318 $repo =~ s/:ext://;
319 $local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
320 ($user, $host) = ($1, $2);
321 }
322 if (!$local) {
323 if ($user) {
324 unshift @cvs, $rsh, '-l', $user, $host;
325 } else {
326 unshift @cvs, $rsh, $host;
327 }
328 }
330 unless ($pid) {
331 $pr->writer();
332 $pw->reader();
333 dup2($pw->fileno(),0);
334 dup2($pr->fileno(),1);
335 $pr->close();
336 $pw->close();
337 exec(@cvs);
338 }
339 $pw->writer();
340 $pr->reader();
341 $self->{'socketo'} = $pw;
342 $self->{'socketi'} = $pr;
343 }
344 $self->{'socketo'}->write("Root $repo\n");
346 # Trial and error says that this probably is the minimum set
347 $self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
349 $self->{'socketo'}->write("valid-requests\n");
350 $self->{'socketo'}->flush();
352 chomp(my $rep=$self->readline());
353 if ($rep !~ s/^Valid-requests\s*//) {
354 $rep="<unknown>" unless $rep;
355 die "Expected Valid-requests from server, but got: $rep\n";
356 }
357 chomp(my $res=$self->readline());
358 die "validReply: $res\n" if $res ne "ok";
360 $self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
361 $self->{'repo'} = $repo;
362 }
364 sub readline {
365 my ($self) = @_;
366 return $self->{'socketi'}->getline();
367 }
369 sub _file {
370 # Request a file with a given revision.
371 # Trial and error says this is a good way to do it. :-/
372 my ($self,$fn,$rev) = @_;
373 $self->{'socketo'}->write("Argument -N\n") or return undef;
374 $self->{'socketo'}->write("Argument -P\n") or return undef;
375 # -kk: Linus' version doesn't use it - defaults to off
376 if ($opt_k) {
377 $self->{'socketo'}->write("Argument -kk\n") or return undef;
378 }
379 $self->{'socketo'}->write("Argument -r\n") or return undef;
380 $self->{'socketo'}->write("Argument $rev\n") or return undef;
381 $self->{'socketo'}->write("Argument --\n") or return undef;
382 $self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
383 $self->{'socketo'}->write("Directory .\n") or return undef;
384 $self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
385 # $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
386 $self->{'socketo'}->write("co\n") or return undef;
387 $self->{'socketo'}->flush() or return undef;
388 $self->{'lines'} = 0;
389 return 1;
390 }
391 sub _line {
392 # Read a line from the server.
393 # ... except that 'line' may be an entire file. ;-)
394 my ($self, $fh) = @_;
395 die "Not in lines" unless defined $self->{'lines'};
397 my $line;
398 my $res=0;
399 while (defined($line = $self->readline())) {
400 # M U gnupg-cvs-rep/AUTHORS
401 # Updated gnupg-cvs-rep/
402 # /daten/src/rsync/gnupg-cvs-rep/AUTHORS
403 # /AUTHORS/1.1///T1.1
404 # u=rw,g=rw,o=rw
405 # 0
406 # ok
408 if ($line =~ s/^(?:Created|Updated) //) {
409 $line = $self->readline(); # path
410 $line = $self->readline(); # Entries line
411 my $mode = $self->readline(); chomp $mode;
412 $self->{'mode'} = $mode;
413 defined (my $cnt = $self->readline())
414 or die "EOF from server after 'Changed'\n";
415 chomp $cnt;
416 die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
417 $line="";
418 $res = $self->_fetchfile($fh, $cnt);
419 } elsif ($line =~ s/^ //) {
420 print $fh $line;
421 $res += length($line);
422 } elsif ($line =~ /^M\b/) {
423 # output, do nothing
424 } elsif ($line =~ /^Mbinary\b/) {
425 my $cnt;
426 die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
427 chomp $cnt;
428 die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
429 $line="";
430 $res += $self->_fetchfile($fh, $cnt);
431 } else {
432 chomp $line;
433 if ($line eq "ok") {
434 # print STDERR "S: ok (".length($res).")\n";
435 return $res;
436 } elsif ($line =~ s/^E //) {
437 # print STDERR "S: $line\n";
438 } elsif ($line =~ /^(Remove-entry|Removed) /i) {
439 $line = $self->readline(); # filename
440 $line = $self->readline(); # OK
441 chomp $line;
442 die "Unknown: $line" if $line ne "ok";
443 return -1;
444 } else {
445 die "Unknown: $line\n";
446 }
447 }
448 }
449 return undef;
450 }
451 sub file {
452 my ($self,$fn,$rev) = @_;
453 my $res;
455 my ($fh, $name) = tempfile('gitcvs.XXXXXX',
456 DIR => File::Spec->tmpdir(), UNLINK => 1);
458 $self->_file($fn,$rev) and $res = $self->_line($fh);
460 if (!defined $res) {
461 print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
462 truncate $fh, 0;
463 $self->conn();
464 $self->_file($fn,$rev) or die "No file command send";
465 $res = $self->_line($fh);
466 die "Retry failed" unless defined $res;
467 }
468 close ($fh);
470 return ($name, $res);
471 }
472 sub _fetchfile {
473 my ($self, $fh, $cnt) = @_;
474 my $res = 0;
475 my $bufsize = 1024 * 1024;
476 while ($cnt) {
477 if ($bufsize > $cnt) {
478 $bufsize = $cnt;
479 }
480 my $buf;
481 my $num = $self->{'socketi'}->read($buf,$bufsize);
482 die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
483 print $fh $buf;
484 $res += $num;
485 $cnt -= $num;
486 }
487 return $res;
488 }
490 sub _scramble {
491 my ($self, $pass) = @_;
492 my $scrambled = "A";
494 return $scrambled unless $pass;
496 my $pass_len = length($pass);
497 my @pass_arr = split("", $pass);
498 my $i;
500 # from cvs/src/scramble.c
501 my @shifts = (
502 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
503 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
504 114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
505 111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
506 41, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
507 125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
508 36,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
509 58,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
510 225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
511 199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
512 174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
513 207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
514 192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
515 227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
516 182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
517 243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
518 );
520 for ($i = 0; $i < $pass_len; $i++) {
521 $scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
522 }
524 return $scrambled;
525 }
527 package main;
529 my $cvs = CVSconn->new($opt_d, $cvs_tree);
532 sub pdate($) {
533 my ($d) = @_;
534 m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
535 or die "Unparseable date: $d\n";
536 my $y=$1; $y-=1900 if $y>1900;
537 return timegm($6||0,$5,$4,$3,$2-1,$y);
538 }
540 sub pmode($) {
541 my ($mode) = @_;
542 my $m = 0;
543 my $mm = 0;
544 my $um = 0;
545 for my $x(split(//,$mode)) {
546 if ($x eq ",") {
547 $m |= $mm&$um;
548 $mm = 0;
549 $um = 0;
550 } elsif ($x eq "u") { $um |= 0700;
551 } elsif ($x eq "g") { $um |= 0070;
552 } elsif ($x eq "o") { $um |= 0007;
553 } elsif ($x eq "r") { $mm |= 0444;
554 } elsif ($x eq "w") { $mm |= 0222;
555 } elsif ($x eq "x") { $mm |= 0111;
556 } elsif ($x eq "=") { # do nothing
557 } else { die "Unknown mode: $mode\n";
558 }
559 }
560 $m |= $mm&$um;
561 return $m;
562 }
564 sub getwd() {
565 my $pwd = `pwd`;
566 chomp $pwd;
567 return $pwd;
568 }
570 sub is_sha1 {
571 my $s = shift;
572 return $s =~ /^[a-f0-9]{40}$/;
573 }
575 sub get_headref ($) {
576 my $name = shift;
577 my $r = `git rev-parse --verify '$name' 2>/dev/null`;
578 return undef unless $? == 0;
579 chomp $r;
580 return $r;
581 }
583 my $user_filename_prepend = '';
584 sub munge_user_filename {
585 my $name = shift;
586 return File::Spec->file_name_is_absolute($name) ?
587 $name :
588 $user_filename_prepend . $name;
589 }
591 -d $git_tree
592 or mkdir($git_tree,0777)
593 or die "Could not create $git_tree: $!";
594 if ($git_tree ne '.') {
595 $user_filename_prepend = getwd() . '/';
596 chdir($git_tree);
597 }
599 my $last_branch = "";
600 my $orig_branch = "";
601 my %branch_date;
602 my $tip_at_start = undef;
604 my $git_dir = $ENV{"GIT_DIR"} || ".git";
605 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
606 $ENV{"GIT_DIR"} = $git_dir;
607 my $orig_git_index;
608 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
610 my %index; # holds filenames of one index per branch
612 unless (-d $git_dir) {
613 system(qw(git init));
614 die "Cannot init the GIT db at $git_tree: $?\n" if $?;
615 system(qw(git read-tree --empty));
616 die "Cannot init an empty tree: $?\n" if $?;
618 $last_branch = $opt_o;
619 $orig_branch = "";
620 } else {
621 open(F, "-|", qw(git symbolic-ref HEAD)) or
622 die "Cannot run git symbolic-ref: $!\n";
623 chomp ($last_branch = <F>);
624 $last_branch = basename($last_branch);
625 close(F);
626 unless ($last_branch) {
627 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
628 $last_branch = "master";
629 }
630 $orig_branch = $last_branch;
631 $tip_at_start = `git rev-parse --verify HEAD`;
633 # Get the last import timestamps
634 my $fmt = '($ref, $author) = (%(refname), %(author));';
635 my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
636 open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
637 while (defined(my $entry = <H>)) {
638 my ($ref, $author);
639 eval($entry) || die "cannot eval refs list: $@";
640 my ($head) = ($ref =~ m|^$remote/(.*)|);
641 $author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
642 $branch_date{$head} = $1;
643 }
644 close(H);
645 if (!exists $branch_date{$opt_o}) {
646 die "Branch '$opt_o' does not exist.\n".
647 "Either use the correct '-o branch' option,\n".
648 "or import to a new repository.\n";
649 }
650 }
652 -d $git_dir
653 or die "Could not create git subdir ($git_dir).\n";
655 # now we read (and possibly save) author-info as well
656 -f "$git_dir/cvs-authors" and
657 read_author_info("$git_dir/cvs-authors");
658 if ($opt_A) {
659 read_author_info(munge_user_filename($opt_A));
660 write_author_info("$git_dir/cvs-authors");
661 }
663 # open .git/cvs-revisions, if requested
664 open my $revision_map, '>>', "$git_dir/cvs-revisions"
665 or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
666 if defined $opt_R;
669 #
670 # run cvsps into a file unless we are getting
671 # it passed as a file via $opt_P
672 #
673 my $cvspsfile;
674 unless ($opt_P) {
675 print "Running cvsps...\n" if $opt_v;
676 my $pid = open(CVSPS,"-|");
677 my $cvspsfh;
678 die "Cannot fork: $!\n" unless defined $pid;
679 unless ($pid) {
680 my @opt;
681 @opt = split(/,/,$opt_p) if defined $opt_p;
682 unshift @opt, '-z', $opt_z if defined $opt_z;
683 unshift @opt, '-q' unless defined $opt_v;
684 unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
685 push @opt, '--cvs-direct';
686 }
687 exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
688 die "Could not start cvsps: $!\n";
689 }
690 ($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
691 DIR => File::Spec->tmpdir());
692 while (<CVSPS>) {
693 print $cvspsfh $_;
694 }
695 close CVSPS;
696 $? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
697 close $cvspsfh;
698 } else {
699 $cvspsfile = munge_user_filename($opt_P);
700 }
702 open(CVS, "<$cvspsfile") or die $!;
704 ## cvsps output:
705 #---------------------
706 #PatchSet 314
707 #Date: 1999/09/18 13:03:59
708 #Author: wkoch
709 #Branch: STABLE-BRANCH-1-0
710 #Ancestor branch: HEAD
711 #Tag: (none)
712 #Log:
713 # See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
714 #Members:
715 # README:1.57->1.57.2.1
716 # VERSION:1.96->1.96.2.1
717 #
718 #---------------------
720 my $state = 0;
722 sub update_index (\@\@) {
723 my $old = shift;
724 my $new = shift;
725 open(my $fh, '|-', qw(git update-index -z --index-info))
726 or die "unable to open git update-index: $!";
727 print $fh
728 (map { "0 0000000000000000000000000000000000000000\t$_\0" }
729 @$old),
730 (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
731 @$new)
732 or die "unable to write to git update-index: $!";
733 close $fh
734 or die "unable to write to git update-index: $!";
735 $? and die "git update-index reported error: $?";
736 }
738 sub write_tree () {
739 open(my $fh, '-|', qw(git write-tree))
740 or die "unable to open git write-tree: $!";
741 chomp(my $tree = <$fh>);
742 is_sha1($tree)
743 or die "Cannot get tree id ($tree): $!";
744 close($fh)
745 or die "Error running git write-tree: $?\n";
746 print "Tree ID $tree\n" if $opt_v;
747 return $tree;
748 }
750 my ($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg);
751 my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
753 # commits that cvsps cannot place anywhere...
754 $ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
756 sub commit {
757 if ($branch eq $opt_o && !$index{branch} &&
758 !get_headref("$remote/$branch")) {
759 # looks like an initial commit
760 # use the index primed by git init
761 $ENV{GIT_INDEX_FILE} = "$git_dir/index";
762 $index{$branch} = "$git_dir/index";
763 } else {
764 # use an index per branch to speed up
765 # imports of projects with many branches
766 unless ($index{$branch}) {
767 $index{$branch} = tmpnam();
768 $ENV{GIT_INDEX_FILE} = $index{$branch};
769 if ($ancestor) {
770 system("git", "read-tree", "$remote/$ancestor");
771 } else {
772 system("git", "read-tree", "$remote/$branch");
773 }
774 die "read-tree failed: $?\n" if $?;
775 }
776 }
777 $ENV{GIT_INDEX_FILE} = $index{$branch};
779 update_index(@old, @new);
780 @old = @new = ();
781 my $tree = write_tree();
782 my $parent = get_headref("$remote/$last_branch");
783 print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
785 my @commit_args;
786 push @commit_args, ("-p", $parent) if $parent;
788 # loose detection of merges
789 # based on the commit msg
790 foreach my $rx (@mergerx) {
791 next unless $logmsg =~ $rx && $1;
792 my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
793 if (my $sha1 = get_headref("$remote/$mparent")) {
794 push @commit_args, '-p', "$remote/$mparent";
795 print "Merge parent branch: $mparent\n" if $opt_v;
796 }
797 }
799 my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date));
800 $ENV{GIT_AUTHOR_NAME} = $author_name;
801 $ENV{GIT_AUTHOR_EMAIL} = $author_email;
802 $ENV{GIT_AUTHOR_DATE} = $commit_date;
803 $ENV{GIT_COMMITTER_NAME} = $author_name;
804 $ENV{GIT_COMMITTER_EMAIL} = $author_email;
805 $ENV{GIT_COMMITTER_DATE} = $commit_date;
806 my $pid = open2(my $commit_read, my $commit_write,
807 'git', 'commit-tree', $tree, @commit_args);
809 # compatibility with git2cvs
810 substr($logmsg,32767) = "" if length($logmsg) > 32767;
811 $logmsg =~ s/[\s\n]+\z//;
813 if (@skipped) {
814 $logmsg .= "\n\n\nSKIPPED:\n\t";
815 $logmsg .= join("\n\t", @skipped) . "\n";
816 @skipped = ();
817 }
819 print($commit_write "$logmsg\n") && close($commit_write)
820 or die "Error writing to git commit-tree: $!\n";
822 print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
823 chomp(my $cid = <$commit_read>);
824 is_sha1($cid) or die "Cannot get commit id ($cid): $!\n";
825 print "Commit ID $cid\n" if $opt_v;
826 close($commit_read);
828 waitpid($pid,0);
829 die "Error running git commit-tree: $?\n" if $?;
831 system('git' , 'update-ref', "$remote/$branch", $cid) == 0
832 or die "Cannot write branch $branch for update: $!\n";
834 if ($revision_map) {
835 print $revision_map "@$_ $cid\n" for @commit_revisions;
836 }
837 @commit_revisions = ();
839 if ($tag) {
840 my ($xtag) = $tag;
841 $xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
842 $xtag =~ tr/_/\./ if ( $opt_u );
843 $xtag =~ s/[\/]/$opt_s/g;
844 $xtag =~ s/\[//g;
846 system('git' , 'tag', '-f', $xtag, $cid) == 0
847 or die "Cannot create tag $xtag: $!\n";
849 print "Created tag '$xtag' on '$branch'\n" if $opt_v;
850 }
851 };
853 my $commitcount = 1;
854 while (<CVS>) {
855 chomp;
856 if ($state == 0 and /^-+$/) {
857 $state = 1;
858 } elsif ($state == 0) {
859 $state = 1;
860 redo;
861 } elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
862 $patchset = 0+$_;
863 $state=2;
864 } elsif ($state == 2 and s/^Date:\s+//) {
865 $date = pdate($_);
866 unless ($date) {
867 print STDERR "Could not parse date: $_\n";
868 $state=0;
869 next;
870 }
871 $state=3;
872 } elsif ($state == 3 and s/^Author:\s+//) {
873 s/\s+$//;
874 if (/^(.*?)\s+<(.*)>/) {
875 ($author_name, $author_email) = ($1, $2);
876 } elsif ($conv_author_name{$_}) {
877 $author_name = $conv_author_name{$_};
878 $author_email = $conv_author_email{$_};
879 } else {
880 $author_name = $author_email = $_;
881 }
882 $state = 4;
883 } elsif ($state == 4 and s/^Branch:\s+//) {
884 s/\s+$//;
885 tr/_/\./ if ( $opt_u );
886 s/[\/]/$opt_s/g;
887 $branch = $_;
888 $state = 5;
889 } elsif ($state == 5 and s/^Ancestor branch:\s+//) {
890 s/\s+$//;
891 $ancestor = $_;
892 $ancestor = $opt_o if $ancestor eq "HEAD";
893 $state = 6;
894 } elsif ($state == 5) {
895 $ancestor = undef;
896 $state = 6;
897 redo;
898 } elsif ($state == 6 and s/^Tag:\s+//) {
899 s/\s+$//;
900 if ($_ eq "(none)") {
901 $tag = undef;
902 } else {
903 $tag = $_;
904 }
905 $state = 7;
906 } elsif ($state == 7 and /^Log:/) {
907 $logmsg = "";
908 $state = 8;
909 } elsif ($state == 8 and /^Members:/) {
910 $branch = $opt_o if $branch eq "HEAD";
911 if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
912 # skip
913 print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
914 $state = 11;
915 next;
916 }
917 if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
918 # skip if the commit is too recent
919 # given that the cvsps default fuzz is 300s, we give ourselves another
920 # 300s just in case -- this also prevents skipping commits
921 # due to server clock drift
922 print "skip patchset $patchset: $date too recent\n" if $opt_v;
923 $state = 11;
924 next;
925 }
926 if (exists $ignorebranch{$branch}) {
927 print STDERR "Skipping $branch\n";
928 $state = 11;
929 next;
930 }
931 if ($ancestor) {
932 if ($ancestor eq $branch) {
933 print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
934 $ancestor = $opt_o;
935 }
936 if (defined get_headref("$remote/$branch")) {
937 print STDERR "Branch $branch already exists!\n";
938 $state=11;
939 next;
940 }
941 my $id = get_headref("$remote/$ancestor");
942 if (!$id) {
943 print STDERR "Branch $ancestor does not exist!\n";
944 $ignorebranch{$branch} = 1;
945 $state=11;
946 next;
947 }
949 system(qw(git update-ref -m cvsimport),
950 "$remote/$branch", $id);
951 if($? != 0) {
952 print STDERR "Could not create branch $branch\n";
953 $ignorebranch{$branch} = 1;
954 $state=11;
955 next;
956 }
957 }
958 $last_branch = $branch if $branch ne $last_branch;
959 $state = 9;
960 } elsif ($state == 8) {
961 $logmsg .= "$_\n";
962 } elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
963 # VERSION:1.96->1.96.2.1
964 my $init = ($2 eq "INITIAL");
965 my $fn = $1;
966 my $rev = $3;
967 $fn =~ s#^/+##;
968 if ($opt_S && $fn =~ m/$opt_S/) {
969 print "SKIPPING $fn v $rev\n";
970 push(@skipped, $fn);
971 next;
972 }
973 push @commit_revisions, [$fn, $rev];
974 print "Fetching $fn v $rev\n" if $opt_v;
975 my ($tmpname, $size) = $cvs->file($fn,$rev);
976 if ($size == -1) {
977 push(@old,$fn);
978 print "Drop $fn\n" if $opt_v;
979 } else {
980 print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
981 my $pid = open(my $F, '-|');
982 die $! unless defined $pid;
983 if (!$pid) {
984 exec("git", "hash-object", "-w", $tmpname)
985 or die "Cannot create object: $!\n";
986 }
987 my $sha = <$F>;
988 chomp $sha;
989 close $F;
990 my $mode = pmode($cvs->{'mode'});
991 push(@new,[$mode, $sha, $fn]); # may be resurrected!
992 }
993 unlink($tmpname);
994 } elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
995 my $fn = $1;
996 my $rev = $2;
997 $fn =~ s#^/+##;
998 push @commit_revisions, [$fn, $rev];
999 push(@old,$fn);
1000 print "Delete $fn\n" if $opt_v;
1001 } elsif ($state == 9 and /^\s*$/) {
1002 $state = 10;
1003 } elsif (($state == 9 or $state == 10) and /^-+$/) {
1004 $commitcount++;
1005 if ($opt_L && $commitcount > $opt_L) {
1006 last;
1007 }
1008 commit();
1009 if (($commitcount & 1023) == 0) {
1010 system(qw(git repack -a -d));
1011 }
1012 $state = 1;
1013 } elsif ($state == 11 and /^-+$/) {
1014 $state = 1;
1015 } elsif (/^-+$/) { # end of unknown-line processing
1016 $state = 1;
1017 } elsif ($state != 11) { # ignore stuff when skipping
1018 print STDERR "* UNKNOWN LINE * $_\n";
1019 }
1020 }
1021 commit() if $branch and $state != 11;
1023 unless ($opt_P) {
1024 unlink($cvspsfile);
1025 }
1027 # The heuristic of repacking every 1024 commits can leave a
1028 # lot of unpacked data. If there is more than 1MB worth of
1029 # not-packed objects, repack once more.
1030 my $line = `git count-objects`;
1031 if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1032 my ($n_objects, $kb) = ($1, $2);
1033 1024 < $kb
1034 and system(qw(git repack -a -d));
1035 }
1037 foreach my $git_index (values %index) {
1038 if ($git_index ne "$git_dir/index") {
1039 unlink($git_index);
1040 }
1041 }
1043 if (defined $orig_git_index) {
1044 $ENV{GIT_INDEX_FILE} = $orig_git_index;
1045 } else {
1046 delete $ENV{GIT_INDEX_FILE};
1047 }
1049 # Now switch back to the branch we were in before all of this happened
1050 if ($orig_branch) {
1051 print "DONE.\n" if $opt_v;
1052 if ($opt_i) {
1053 exit 0;
1054 }
1055 my $tip_at_end = `git rev-parse --verify HEAD`;
1056 if ($tip_at_start ne $tip_at_end) {
1057 for ($tip_at_start, $tip_at_end) { chomp; }
1058 print "Fetched into the current branch.\n" if $opt_v;
1059 system(qw(git read-tree -u -m),
1060 $tip_at_start, $tip_at_end);
1061 die "Fast-forward update failed: $?\n" if $?;
1062 }
1063 else {
1064 system(qw(git merge cvsimport HEAD), "$remote/$opt_o");
1065 die "Could not merge $opt_o into the current branch.\n" if $?;
1066 }
1067 } else {
1068 $orig_branch = "master";
1069 print "DONE; creating $orig_branch branch\n" if $opt_v;
1070 system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1071 unless defined get_headref('refs/heads/master');
1072 system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1073 if ($opt_r && $opt_o ne 'HEAD');
1074 system('git', 'update-ref', 'HEAD', "$orig_branch");
1075 unless ($opt_i) {
1076 system(qw(git checkout -f));
1077 die "checkout failed: $?\n" if $?;
1078 }
1079 }