Code

Merge branch 'jn/maint-gitweb-grep-fix'
authorJunio C Hamano <gitster@pobox.com>
Tue, 17 Jan 2012 00:45:56 +0000 (16:45 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 17 Jan 2012 00:45:56 +0000 (16:45 -0800)
* jn/maint-gitweb-grep-fix:
  gitweb: Harden "grep" search against filenames with ':'
  gitweb: Fix file links in "grep" search

1  2 
gitweb/gitweb.perl

diff --combined gitweb/gitweb.perl
index 285fe88403a9d65aafd9a9cf7628a4dde0aa9829,08020b0776bf9d7da98080041235c42d8fecd03f..abb5a79afce445c73693593d32cced0ccdb90fe3
@@@ -759,7 -759,6 +759,7 @@@ our @cgi_param_mapping = 
        extra_options => "opt",
        search_use_regexp => "sr",
        ctag => "by_tag",
 +      diff_style => "ds",
        # this must be last entry (for manipulation from JavaScript)
        javascript => "js"
  );
@@@ -1443,8 -1442,8 +1443,8 @@@ sub validate_refname 
  sub to_utf8 {
        my $str = shift;
        return undef unless defined $str;
 -      if (utf8::valid($str)) {
 -              utf8::decode($str);
 +
 +      if (utf8::is_utf8($str) || utf8::decode($str)) {
                return $str;
        } else {
                return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
@@@ -1696,7 -1695,6 +1696,7 @@@ sub chop_and_escape_str 
        my ($str) = @_;
  
        my $chopped = chop_str(@_);
 +      $str = to_utf8($str);
        if ($chopped eq $str) {
                return esc_html($chopped);
        } else {
@@@ -2227,119 -2225,93 +2227,119 @@@ sub format_diff_cc_simplified 
        return $result;
  }
  
 -# format patch (diff) line (not to be used for diff headers)
 -sub format_diff_line {
 -      my $line = shift;
 -      my ($from, $to) = @_;
 -      my $diff_class = "";
 -
 -      chomp $line;
 +sub diff_line_class {
 +      my ($line, $from, $to) = @_;
  
 +      # ordinary diff
 +      my $num_sign = 1;
 +      # combined diff
        if ($from && $to && ref($from->{'href'}) eq "ARRAY") {
 -              # combined diff
 -              my $prefix = substr($line, 0, scalar @{$from->{'href'}});
 -              if ($line =~ m/^\@{3}/) {
 -                      $diff_class = " chunk_header";
 -              } elsif ($line =~ m/^\\/) {
 -                      $diff_class = " incomplete";
 -              } elsif ($prefix =~ tr/+/+/) {
 -                      $diff_class = " add";
 -              } elsif ($prefix =~ tr/-/-/) {
 -                      $diff_class = " rem";
 -              }
 -      } else {
 -              # assume ordinary diff
 -              my $char = substr($line, 0, 1);
 -              if ($char eq '+') {
 -                      $diff_class = " add";
 -              } elsif ($char eq '-') {
 -                      $diff_class = " rem";
 -              } elsif ($char eq '@') {
 -                      $diff_class = " chunk_header";
 -              } elsif ($char eq "\\") {
 -                      $diff_class = " incomplete";
 -              }
 +              $num_sign = scalar @{$from->{'href'}};
 +      }
 +
 +      my @diff_line_classifier = (
 +              { regexp => qr/^\@\@{$num_sign} /, class => "chunk_header"},
 +              { regexp => qr/^\\/,               class => "incomplete"  },
 +              { regexp => qr/^ {$num_sign}/,     class => "ctx" },
 +              # classifier for context must come before classifier add/rem,
 +              # or we would have to use more complicated regexp, for example
 +              # qr/(?= {0,$m}\+)[+ ]{$num_sign}/, where $m = $num_sign - 1;
 +              { regexp => qr/^[+ ]{$num_sign}/,   class => "add" },
 +              { regexp => qr/^[- ]{$num_sign}/,   class => "rem" },
 +      );
 +      for my $clsfy (@diff_line_classifier) {
 +              return $clsfy->{'class'}
 +                      if ($line =~ $clsfy->{'regexp'});
        }
 -      $line = untabify($line);
 -      if ($from && $to && $line =~ m/^\@{2} /) {
 -              my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
 -                      $line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
  
 -              $from_lines = 0 unless defined $from_lines;
 -              $to_lines   = 0 unless defined $to_lines;
 +      # fallback
 +      return "";
 +}
  
 -              if ($from->{'href'}) {
 -                      $from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
 -                                           -class=>"list"}, $from_text);
 -              }
 -              if ($to->{'href'}) {
 -                      $to_text   = $cgi->a({-href=>"$to->{'href'}#l$to_start",
 -                                           -class=>"list"}, $to_text);
 -              }
 -              $line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
 -                      "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 -              return "<div class=\"diff$diff_class\">$line</div>\n";
 -      } elsif ($from && $to && $line =~ m/^\@{3}/) {
 -              my ($prefix, $ranges, $section) = $line =~ m/^(\@+) (.*?) \@+(.*)$/;
 -              my (@from_text, @from_start, @from_nlines, $to_text, $to_start, $to_nlines);
 +# assumes that $from and $to are defined and correctly filled,
 +# and that $line holds a line of chunk header for unified diff
 +sub format_unidiff_chunk_header {
 +      my ($line, $from, $to) = @_;
  
 -              @from_text = split(' ', $ranges);
 -              for (my $i = 0; $i < @from_text; ++$i) {
 -                      ($from_start[$i], $from_nlines[$i]) =
 -                              (split(',', substr($from_text[$i], 1)), 0);
 -              }
 +      my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
 +              $line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
  
 -              $to_text   = pop @from_text;
 -              $to_start  = pop @from_start;
 -              $to_nlines = pop @from_nlines;
 +      $from_lines = 0 unless defined $from_lines;
 +      $to_lines   = 0 unless defined $to_lines;
  
 -              $line = "<span class=\"chunk_info\">$prefix ";
 -              for (my $i = 0; $i < @from_text; ++$i) {
 -                      if ($from->{'href'}[$i]) {
 -                              $line .= $cgi->a({-href=>"$from->{'href'}[$i]#l$from_start[$i]",
 -                                                -class=>"list"}, $from_text[$i]);
 -                      } else {
 -                              $line .= $from_text[$i];
 -                      }
 -                      $line .= " ";
 -              }
 -              if ($to->{'href'}) {
 -                      $line .= $cgi->a({-href=>"$to->{'href'}#l$to_start",
 -                                        -class=>"list"}, $to_text);
 +      if ($from->{'href'}) {
 +              $from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
 +                                   -class=>"list"}, $from_text);
 +      }
 +      if ($to->{'href'}) {
 +              $to_text   = $cgi->a({-href=>"$to->{'href'}#l$to_start",
 +                                   -class=>"list"}, $to_text);
 +      }
 +      $line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
 +              "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 +      return $line;
 +}
 +
 +# assumes that $from and $to are defined and correctly filled,
 +# and that $line holds a line of chunk header for combined diff
 +sub format_cc_diff_chunk_header {
 +      my ($line, $from, $to) = @_;
 +
 +      my ($prefix, $ranges, $section) = $line =~ m/^(\@+) (.*?) \@+(.*)$/;
 +      my (@from_text, @from_start, @from_nlines, $to_text, $to_start, $to_nlines);
 +
 +      @from_text = split(' ', $ranges);
 +      for (my $i = 0; $i < @from_text; ++$i) {
 +              ($from_start[$i], $from_nlines[$i]) =
 +                      (split(',', substr($from_text[$i], 1)), 0);
 +      }
 +
 +      $to_text   = pop @from_text;
 +      $to_start  = pop @from_start;
 +      $to_nlines = pop @from_nlines;
 +
 +      $line = "<span class=\"chunk_info\">$prefix ";
 +      for (my $i = 0; $i < @from_text; ++$i) {
 +              if ($from->{'href'}[$i]) {
 +                      $line .= $cgi->a({-href=>"$from->{'href'}[$i]#l$from_start[$i]",
 +                                        -class=>"list"}, $from_text[$i]);
                } else {
 -                      $line .= $to_text;
 +                      $line .= $from_text[$i];
                }
 -              $line .= " $prefix</span>" .
 -                       "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 -              return "<div class=\"diff$diff_class\">$line</div>\n";
 +              $line .= " ";
        }
 -      return "<div class=\"diff$diff_class\">" . esc_html($line, -nbsp=>1) . "</div>\n";
 +      if ($to->{'href'}) {
 +              $line .= $cgi->a({-href=>"$to->{'href'}#l$to_start",
 +                                -class=>"list"}, $to_text);
 +      } else {
 +              $line .= $to_text;
 +      }
 +      $line .= " $prefix</span>" .
 +               "<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
 +      return $line;
 +}
 +
 +# process patch (diff) line (not to be used for diff headers),
 +# returning class and HTML-formatted (but not wrapped) line
 +sub process_diff_line {
 +      my $line = shift;
 +      my ($from, $to) = @_;
 +
 +      my $diff_class = diff_line_class($line, $from, $to);
 +
 +      chomp $line;
 +      $line = untabify($line);
 +
 +      if ($from && $to && $line =~ m/^\@{2} /) {
 +              $line = format_unidiff_chunk_header($line, $from, $to);
 +              return $diff_class, $line;
 +
 +      } elsif ($from && $to && $line =~ m/^\@{3}/) {
 +              $line = format_cc_diff_chunk_header($line, $from, $to);
 +              return $diff_class, $line;
 +
 +      }
 +      return $diff_class, esc_html($line, -nbsp=>1);
  }
  
  # Generates undef or something like "_snapshot_" or "snapshot (_tbz2_ _zip_)",
@@@ -2836,8 -2808,8 +2836,8 @@@ sub git_get_projects_list 
                my $dir = $projects_list;
                # remove the trailing "/"
                $dir =~ s!/+$!!;
 -              my $pfxlen = length("$projects_list");
 -              my $pfxdepth = ($projects_list =~ tr!/!!);
 +              my $pfxlen = length("$dir");
 +              my $pfxdepth = ($dir =~ tr!/!!);
                # when filtering, search only given subdirectory
                if ($filter) {
                        $dir .= "/$filter";
@@@ -4861,97 -4833,8 +4861,97 @@@ sub git_difftree_body 
        print "</table>\n";
  }
  
 +sub print_sidebyside_diff_chunk {
 +      my @chunk = @_;
 +      my (@ctx, @rem, @add);
 +
 +      return unless @chunk;
 +
 +      # incomplete last line might be among removed or added lines,
 +      # or both, or among context lines: find which
 +      for (my $i = 1; $i < @chunk; $i++) {
 +              if ($chunk[$i][0] eq 'incomplete') {
 +                      $chunk[$i][0] = $chunk[$i-1][0];
 +              }
 +      }
 +
 +      # guardian
 +      push @chunk, ["", ""];
 +
 +      foreach my $line_info (@chunk) {
 +              my ($class, $line) = @$line_info;
 +
 +              # print chunk headers
 +              if ($class && $class eq 'chunk_header') {
 +                      print $line;
 +                      next;
 +              }
 +
 +              ## print from accumulator when type of class of lines change
 +              # empty contents block on start rem/add block, or end of chunk
 +              if (@ctx && (!$class || $class eq 'rem' || $class eq 'add')) {
 +                      print join '',
 +                              '<div class="chunk_block ctx">',
 +                                      '<div class="old">',
 +                                      @ctx,
 +                                      '</div>',
 +                                      '<div class="new">',
 +                                      @ctx,
 +                                      '</div>',
 +                              '</div>';
 +                      @ctx = ();
 +              }
 +              # empty add/rem block on start context block, or end of chunk
 +              if ((@rem || @add) && (!$class || $class eq 'ctx')) {
 +                      if (!@add) {
 +                              # pure removal
 +                              print join '',
 +                                      '<div class="chunk_block rem">',
 +                                              '<div class="old">',
 +                                              @rem,
 +                                              '</div>',
 +                                      '</div>';
 +                      } elsif (!@rem) {
 +                              # pure addition
 +                              print join '',
 +                                      '<div class="chunk_block add">',
 +                                              '<div class="new">',
 +                                              @add,
 +                                              '</div>',
 +                                      '</div>';
 +                      } else {
 +                              # assume that it is change
 +                              print join '',
 +                                      '<div class="chunk_block chg">',
 +                                              '<div class="old">',
 +                                              @rem,
 +                                              '</div>',
 +                                              '<div class="new">',
 +                                              @add,
 +                                              '</div>',
 +                                      '</div>';
 +                      }
 +                      @rem = @add = ();
 +              }
 +
 +              ## adding lines to accumulator
 +              # guardian value
 +              last unless $line;
 +              # rem, add or change
 +              if ($class eq 'rem') {
 +                      push @rem, $line;
 +              } elsif ($class eq 'add') {
 +                      push @add, $line;
 +              }
 +              # context line
 +              if ($class eq 'ctx') {
 +                      push @ctx, $line;
 +              }
 +      }
 +}
 +
  sub git_patchset_body {
 -      my ($fd, $difftree, $hash, @hash_parents) = @_;
 +      my ($fd, $diff_style, $difftree, $hash, @hash_parents) = @_;
        my ($hash_parent) = $hash_parents[0];
  
        my $is_combined = (@hash_parents > 1);
        my $diffinfo;
        my $to_name;
        my (%from, %to);
 +      my @chunk; # for side-by-side diff
  
        print "<div class=\"patchset\">\n";
  
  
                        next PATCH if ($patch_line =~ m/^diff /);
  
 -                      print format_diff_line($patch_line, \%from, \%to);
 +                      my ($class, $line) = process_diff_line($patch_line, \%from, \%to);
 +                      my $diff_classes = "diff";
 +                      $diff_classes .= " $class" if ($class);
 +                      $line = "<div class=\"$diff_classes\">$line</div>\n";
 +
 +                      if ($diff_style eq 'sidebyside' && !$is_combined) {
 +                              if ($class eq 'chunk_header') {
 +                                      print_sidebyside_diff_chunk(@chunk);
 +                                      @chunk = ( [ $class, $line ] );
 +                              } else {
 +                                      push @chunk, [ $class, $line ];
 +                              }
 +                      } else {
 +                              # default 'inline' style and unknown styles
 +                              print $line;
 +                      }
                }
  
        } continue {
 +              if (@chunk) {
 +                      print_sidebyside_diff_chunk(@chunk);
 +                      @chunk = ();
 +              }
                print "</div>\n"; # class="patch"
        }
  
@@@ -5836,7 -5699,7 +5836,7 @@@ sub git_search_files 
        my %co = @_;
  
        local $/ = "\n";
-       open my $fd, "-|", git_cmd(), 'grep', '-n',
+       open my $fd, "-|", git_cmd(), 'grep', '-n', '-z',
                $search_use_regexp ? ('-E', '-i') : '-F',
                $searchtext, $co{'tree'}
                        or die_error(500, "Open git-grep failed");
        my $lastfile = '';
        while (my $line = <$fd>) {
                chomp $line;
-               my ($file, $lno, $ltext, $binary);
+               my ($file, $file_href, $lno, $ltext, $binary);
                last if ($matches++ > 1000);
                if ($line =~ /^Binary file (.+) matches$/) {
                        $file = $1;
                        $binary = 1;
                } else {
-                       (undef, $file, $lno, $ltext) = split(/:/, $line, 4);
+                       ($file, $lno, $ltext) = split(/\0/, $line, 3);
+                       $file =~ s/^$co{'tree'}://;
                }
                if ($file ne $lastfile) {
                        $lastfile and print "</td></tr>\n";
                        } else {
                                print "<tr class=\"light\">\n";
                        }
+                       $file_href = href(action=>"blob", hash_base=>$co{'id'},
+                                         file_name=>$file);
                        print "<td class=\"list\">".
-                               $cgi->a({-href => href(action=>"blob", hash=>$co{'hash'},
-                                                      file_name=>"$file"),
-                                       -class => "list"}, esc_path($file));
+                               $cgi->a({-href => $file_href, -class => "list"}, esc_path($file));
                        print "</td><td>\n";
                        $lastfile = $file;
                }
                                $ltext = esc_html($ltext, -nbsp=>1);
                        }
                        print "<div class=\"pre\">" .
-                               $cgi->a({-href => href(action=>"blob", hash=>$co{'hash'},
-                                                      file_name=>"$file").'#l'.$lno,
-                                       -class => "linenr"}, sprintf('%4i', $lno))
-                               . ' ' .  $ltext . "</div>\n";
+                               $cgi->a({-href => $file_href.'#l'.$lno,
+                                       -class => "linenr"}, sprintf('%4i', $lno)) .
+                               ' ' .  $ltext . "</div>\n";
                }
        }
        if ($lastfile) {
@@@ -6244,9 -6107,7 +6244,9 @@@ sub git_blame_common 
                        -type=>"text/plain", -charset => "utf-8",
                        -status=> "200 OK");
                local $| = 1; # output autoflush
 -              print while <$fd>;
 +              while (my $line = <$fd>) {
 +                      print to_utf8($line);
 +              }
                close $fd
                        or print "ERROR $!\n";
  
@@@ -7088,7 -6949,6 +7088,7 @@@ sub git_object 
  
  sub git_blobdiff {
        my $format = shift || 'html';
 +      my $diff_style = $input_params{'diff_style'} || 'inline';
  
        my $fd;
        my @difftree;
                my $formats_nav =
                        $cgi->a({-href => href(action=>"blobdiff_plain", -replay=>1)},
                                "raw");
 +              $formats_nav .= diff_style_nav($diff_style);
                git_header_html(undef, $expires);
                if (defined $hash_base && (my %co = parse_commit($hash_base))) {
                        git_print_page_nav('','', $hash_base,$co{'tree'},$hash_base, $formats_nav);
        if ($format eq 'html') {
                print "<div class=\"page_body\">\n";
  
 -              git_patchset_body($fd, [ \%diffinfo ], $hash_base, $hash_parent_base);
 +              git_patchset_body($fd, $diff_style,
 +                                [ \%diffinfo ], $hash_base, $hash_parent_base);
                close $fd;
  
                print "</div>\n"; # class="page_body"
@@@ -7225,31 -7083,9 +7225,31 @@@ sub git_blobdiff_plain 
        git_blobdiff('plain');
  }
  
 +# assumes that it is added as later part of already existing navigation,
 +# so it returns "| foo | bar" rather than just "foo | bar"
 +sub diff_style_nav {
 +      my ($diff_style, $is_combined) = @_;
 +      $diff_style ||= 'inline';
 +
 +      return "" if ($is_combined);
 +
 +      my @styles = (inline => 'inline', 'sidebyside' => 'side by side');
 +      my %styles = @styles;
 +      @styles =
 +              @styles[ map { $_ * 2 } 0..$#styles/2 ];
 +
 +      return join '',
 +              map { " | ".$_ }
 +              map {
 +                      $_ eq $diff_style ? $styles{$_} :
 +                      $cgi->a({-href => href(-replay=>1, diff_style => $_)}, $styles{$_})
 +              } @styles;
 +}
 +
  sub git_commitdiff {
        my %params = @_;
        my $format = $params{-format} || 'html';
 +      my $diff_style = $input_params{'diff_style'} || 'inline';
  
        my ($patch_max) = gitweb_get_feature('patches');
        if ($format eq 'patch') {
                                $cgi->a({-href => href(action=>"patch", -replay=>1)},
                                        "patch");
                }
 +              $formats_nav .= diff_style_nav($diff_style, @{$co{'parents'}} > 1);
  
                if (defined $hash_parent &&
                    $hash_parent ne '-c' && $hash_parent ne '--cc') {
                                }
                        }
                        $formats_nav .= ': ' .
 -                              $cgi->a({-href => href(action=>"commitdiff",
 -                                                     hash=>$hash_parent)},
 +                              $cgi->a({-href => href(-replay=>1,
 +                                                     hash=>$hash_parent, hash_base=>undef)},
                                        esc_html($hash_parent_short)) .
                                ')';
                } elsif (!$co{'parent'}) {
                        # single parent commit
                        $formats_nav .=
                                ' (parent: ' .
 -                              $cgi->a({-href => href(action=>"commitdiff",
 -                                                     hash=>$co{'parent'})},
 +                              $cgi->a({-href => href(-replay=>1,
 +                                                     hash=>$co{'parent'}, hash_base=>undef)},
                                        esc_html(substr($co{'parent'}, 0, 7))) .
                                ')';
                } else {
                        # merge commit
                        if ($hash_parent eq '--cc') {
                                $formats_nav .= ' | ' .
 -                                      $cgi->a({-href => href(action=>"commitdiff",
 +                                      $cgi->a({-href => href(-replay=>1,
                                                               hash=>$hash, hash_parent=>'-c')},
                                                'combined');
                        } else { # $hash_parent eq '-c'
                                $formats_nav .= ' | ' .
 -                                      $cgi->a({-href => href(action=>"commitdiff",
 +                                      $cgi->a({-href => href(-replay=>1,
                                                               hash=>$hash, hash_parent=>'--cc')},
                                                'compact');
                        }
                        $formats_nav .=
                                ' (merge: ' .
                                join(' ', map {
 -                                      $cgi->a({-href => href(action=>"commitdiff",
 -                                                             hash=>$_)},
 +                                      $cgi->a({-href => href(-replay=>1,
 +                                                             hash=>$_, hash_base=>undef)},
                                                esc_html(substr($_, 0, 7)));
                                } @{$co{'parents'}} ) .
                                ')';
                                  $use_parents ? @{$co{'parents'}} : $hash_parent);
                print "<br/>\n";
  
 -              git_patchset_body($fd, \@difftree, $hash,
 +              git_patchset_body($fd, $diff_style,
 +                                \@difftree, $hash,
                                  $use_parents ? @{$co{'parents'}} : $hash_parent);
                close $fd;
                print "</div>\n"; # class="page_body"
@@@ -7865,12 -7699,11 +7865,12 @@@ sub git_opml 
                -charset => 'utf-8',
                -content_disposition => 'inline; filename="opml.xml"');
  
 +      my $title = esc_html($site_name);
        print <<XML;
  <?xml version="1.0" encoding="utf-8"?>
  <opml version="1.0">
  <head>
 -  <title>$site_name OPML Export</title>
 +  <title>$title OPML Export</title>
  </head>
  <body>
  <outline text="git RSS feeds">