summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 2169368)
raw | patch | inline | side by side (parent: 2169368)
author | Martin Koegler <mkoegler@auto.tuwien.ac.at> | |
Sun, 3 Jun 2007 15:42:44 +0000 (17:42 +0200) | ||
committer | Junio C Hamano <gitster@pobox.com> | |
Sun, 3 Jun 2007 23:51:53 +0000 (16:51 -0700) |
gitweb assumes that everything is in UTF-8. If a text contains invalid
UTF-8 character sequences, the text must be in a different encoding.
This commit introduces $fallback_encoding which would be used as input
encoding if gitweb encounters text with is not valid UTF-8.
Add basic test for this in t/t9500-gitweb-standalone-no-errors.sh
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Tested-by: Alexandre Julliard <julliard@winehq.org>
Tested-by: Ismail Dönmez <ismail@pardus.org.tr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
UTF-8 character sequences, the text must be in a different encoding.
This commit introduces $fallback_encoding which would be used as input
encoding if gitweb encounters text with is not valid UTF-8.
Add basic test for this in t/t9500-gitweb-standalone-no-errors.sh
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Tested-by: Alexandre Julliard <julliard@winehq.org>
Tested-by: Ismail Dönmez <ismail@pardus.org.tr>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
gitweb/gitweb.perl | patch | blob | history | |
t/t9500-gitweb-standalone-no-errors.sh | patch | blob | history |
diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index c3921cb0baf162b8294fe47b2fff749114f3061f..e92596c295576d20d76eef23c004e7d2a5db6a3a 100755 (executable)
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
# (relative to the current git repository)
our $mimetypes_file = undef;
+# assume this charset if line contains non-UTF-8 characters;
+# it should be valid encoding (see Encoding::Supported(3pm) for list),
+# for which encoding all byte sequences are valid, for example
+# 'iso-8859-1' aka 'latin1' (it is decoded without checking, so it
+# could be even 'utf-8' for the old behavior)
+our $fallback_encoding = 'latin1';
+
# You define site-wide feature defaults here; override them with
# $GITWEB_CONFIG as necessary.
our %feature = (
return $input;
}
+# decode sequences of octets in utf8 into Perl's internal form,
+# which is utf-8 with utf8 flag set if needed. gitweb writes out
+# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
+sub to_utf8 {
+ my $str = shift;
+ my $res;
+ eval { $res = decode_utf8($str, Encode::FB_CROAK); };
+ if (defined $res) {
+ return $res;
+ } else {
+ return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
+ }
+}
+
# quote unsafe chars, but keep the slash, even when it's not
# correct, but quoted slashes look too horrible in bookmarks
sub esc_param {
my $str = shift;
my %opts = @_;
- $str = decode_utf8($str);
+ $str = to_utf8($str);
$str = $cgi->escapeHTML($str);
if ($opts{'-nbsp'}) {
$str =~ s/ / /g;
my $str = shift;
my %opts = @_;
- $str = decode_utf8($str);
+ $str = to_utf8($str);
$str = $cgi->escapeHTML($str);
if ($opts{'-nbsp'}) {
$str =~ s/ / /g;
if (length($short) < length($long)) {
return $cgi->a({-href => $href, -class => "list subject",
- -title => decode_utf8($long)},
+ -title => to_utf8($long)},
esc_html($short) . $extra);
} else {
return $cgi->a({-href => $href, -class => "list subject"},
if (check_export_ok("$projectroot/$path")) {
my $pr = {
path => $path,
- owner => decode_utf8($owner),
+ owner => to_utf8($owner),
};
push @list, $pr;
(my $forks_path = $path) =~ s/\.git$//;
$pr = unescape($pr);
$ow = unescape($ow);
if ($pr eq $project) {
- $owner = decode_utf8($ow);
+ $owner = to_utf8($ow);
last;
}
}
}
my $owner = $gcos;
$owner =~ s/[,;].*$//;
- return decode_utf8($owner);
+ return to_utf8($owner);
}
## ......................................................................
my $title = "$site_name";
if (defined $project) {
- $title .= " - " . decode_utf8($project);
+ $title .= " - " . to_utf8($project);
if (defined $action) {
$title .= "/$action";
if (defined $file_name) {
print "<div class=\"page_path\">";
print $cgi->a({-href => href(action=>"tree", hash_base=>$hb),
- -title => 'tree root'}, decode_utf8("[$project]"));
+ -title => 'tree root'}, to_utf8("[$project]"));
print " / ";
if (defined $name) {
my @dirname = split '/', $name;
($pr->{'age'}, $pr->{'age_string'}) = @aa;
if (!defined $pr->{'descr'}) {
my $descr = git_get_project_description($pr->{'path'}) || "";
- $pr->{'descr_long'} = decode_utf8($descr);
+ $pr->{'descr_long'} = to_utf8($descr);
$pr->{'descr'} = chop_str($descr, 25, 5);
}
if (!defined $pr->{'owner'}) {
my $git = git_cmd_str();
my $name = $project;
$name =~ s/\047/\047\\\047\047/g;
- my $filename = decode_utf8(basename($project));
+ my $filename = to_utf8(basename($project));
my $cmd;
if ($suffix eq 'zip') {
$filename .= "-$hash.$suffix";
index b92ab6331204ca40dd51b618623e5241ae08dd37..44ae503b992ee26023a19830039062e01be9aa57 100755 (executable)
'gitweb_run "p=.git;a=atom"'
test_debug 'cat gitweb.log'
+# ----------------------------------------------------------------------
+# encoding/decoding
+
+test_expect_success \
+ 'encode(commit): utf8' \
+ '. ../t3901-utf8.txt &&
+ echo "UTF-8" >> file &&
+ git add file &&
+ git commit -F ../t3900/1-UTF-8.txt &&
+ gitweb_run "p=.git;a=commit"'
+test_debug 'cat gitweb.log'
+
+test_expect_success \
+ 'encode(commit): iso-8859-1' \
+ '. ../t3901-8859-1.txt &&
+ echo "ISO-8859-1" >> file &&
+ git add file &&
+ git config i18n.commitencoding ISO-8859-1 &&
+ git commit -F ../t3900/ISO-8859-1.txt &&
+ git config --unset i18n.commitencoding &&
+ gitweb_run "p=.git;a=commit"'
+test_debug 'cat gitweb.log'
+
+test_expect_success \
+ 'encode(log): utf-8 and iso-8859-1' \
+ 'gitweb_run "p=.git;a=log"'
+test_debug 'cat gitweb.log'
+
test_done