abf6b60c1c64b8f3a3bc779de7229101f9bafc17
1 <?php
2 /*
3 * accept-to-gettext.inc -- convert information in 'Accept-*' headers to
4 * gettext language identifiers.
5 * Copyright (c) 2003, Wouter Verhelst <wouter@debian.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Usage:
22 *
23 * $locale=al2gt(<array of supported languages/charsets in gettext syntax>,
24 * <MIME type of document>);
25 * setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever...
26 *
27 * Example:
28 *
29 * $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8');
30 * $locale=al2gt($langs, 'text/html');
31 * setlocale('LC_ALL', $locale);
32 *
33 * Note that this will send out header information (to be
34 * RFC2616-compliant), so it must be called before anything is sent to
35 * the user.
36 *
37 * Assumptions made:
38 * * Charset encodings are written the same way as the Accept-Charset
39 * HTTP header specifies them (RFC2616), except that they're parsed
40 * case-insensitive.
41 * * Country codes and language codes are the same in both gettext and
42 * the Accept-Language syntax (except for the case differences, which
43 * are dealt with easily). If not, some input may be ignored.
44 * * The provided gettext-strings are fully qualified; i.e., no "en_US";
45 * always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been
46 * used. "en.ISO-8859-15" is OK, though.
47 * * The language is more important than the charset; i.e., if the
48 * following is given:
49 *
50 * Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3
51 * Accept-Charset: ISO-8859-15, utf-8;q=0.5
52 *
53 * And the supplied parameter contains (amongst others) nl_BE.UTF-8
54 * and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked.
55 *
56 * $Log: accept-to-gettext.inc,v $
57 * Revision 1.1.1.1 2003/11/19 19:31:15 wouter
58 * * moved to new CVS repo after death of the old
59 * * Fixed code to apply a default to both Accept-Charset and
60 * Accept-Language if none of those headers are supplied; patch from
61 * Dominic Chambers <dominic@encasa.com>
62 *
63 * Revision 1.2 2003/08/14 10:23:59 wouter
64 * Removed little error in Content-Type header syntaxis.
65 *
66 */
68 /* not really important, this one; perhaps I could've put it inline with
69 * the rest. */
70 function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval,
71 $gtlang)
72 {
73 if($curlscore < $langval) {
74 $curlscore=$langval;
75 $curcscore=$charval;
76 $curgtlang=$gtlang;
77 } else if ($curlscore == $langval) {
78 if($curcscore < $charval) {
79 $curcscore=$charval;
80 $curgtlang=$gtlang;
81 }
82 }
83 return array($curlscore, $curcscore, $curgtlang);
84 }
87 function al2gt($gettextlangs, $mime)
88 {
89 /* Check if ACCEPT_LANGUAGE isset */
90 if(!isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])){
91 $_SERVER["HTTP_ACCEPT_LANGUAGE"] = "";
92 }
94 /* default to "everything is acceptable", as RFC2616 specifies */
95 $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' :
96 $_SERVER["HTTP_ACCEPT_LANGUAGE"]);
97 $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' :
98 $_SERVER["HTTP_ACCEPT_CHARSET"]);
99 $alparts=@preg_split("/,/",$acceptLang);
100 $acparts=@preg_split("/,/",$acceptChar);
102 /* Parse the contents of the Accept-Language header.*/
103 foreach($alparts as $part) {
104 $part=trim($part);
105 if(preg_match("/;/", $part)) {
106 $lang=@preg_split("/;/",$part);
107 $score=@preg_split("/=/",$lang[1]);
108 $alscores[$lang[0]]=$score[1];
109 } else {
110 $alscores[$part]=1;
111 }
112 }
114 /* Do the same for the Accept-Charset header. */
116 /* RFC2616: ``If no "*" is present in an Accept-Charset field, then
117 * all character sets not explicitly mentioned get a quality value of
118 * 0, except for ISO-8859-1, which gets a quality value of 1 if not
119 * explicitly mentioned.''
120 *
121 * Making it 2 for the time being, so that we
122 * can distinguish between "not specified" and "specified as 1" later
123 * on. */
124 $acscores["ISO-8859-1"]=2;
126 foreach($acparts as $part) {
127 $part=trim($part);
128 if(preg_match("/;/", $part)) {
129 $cs=@preg_split("/;/",$part);
130 $score=@preg_split("/=/",$cs[1]);
131 $acscores[strtoupper($cs[0])]=$score[1];
132 } else {
133 $acscores[strtoupper($part)]=1;
134 }
135 }
136 if($acscores["ISO-8859-1"]==2) {
137 $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1);
138 }
140 /*
141 * Loop through the available languages/encodings, and pick the one
142 * with the highest score, excluding the ones with a charset the user
143 * did not include.
144 */
145 $curlscore=0;
146 $curcscore=0;
147 $curgtlang=NULL;
148 foreach($gettextlangs as $gtlang) {
150 $tmp1=preg_replace("/\_/","-",$gtlang);
151 $tmp2=@preg_split("/\./",$tmp1);
152 $allang=strtolower($tmp2[0]);
153 $gtcs=strtoupper($tmp2[1]);
154 $noct=@preg_split("/-/",$allang);
156 if(!isset($alscores["*"])){
157 $alscores["*"] = "";
158 }
160 if(!isset($alscores[$allang])){
161 $alscores[$allang] = "";
162 }
164 if(!isset($alscores[$noct[0]])){
165 $alscores[$noct[0]] = "";
166 }
167 if(!isset($acscores[$gtcs])){
168 $acscores[$gtcs] = "";
169 }
170 $testvals=array(
171 array($alscores[$allang], $acscores[$gtcs]),
172 array($alscores[$noct[0]], $acscores[$gtcs]),
173 array($alscores[$allang], $acscores["*"]),
174 array($alscores[$noct[0]], $acscores["*"]),
175 array($alscores["*"], $acscores[$gtcs]),
176 array($alscores["*"], $acscores["*"]));
178 $found=FALSE;
179 foreach($testvals as $tval) {
180 if(!$found && isset($tval[0]) && isset($tval[1])) {
181 $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0],
182 $tval[1], $gtlang);
183 $curlscore=$arr[0];
184 $curcscore=$arr[1];
185 $curgtlang=$arr[2];
186 $found=TRUE;
187 }
188 }
189 }
191 /* We must re-parse the gettext-string now, since we may have found it
192 * through a "*" qualifier.*/
194 $gtparts=@preg_split("/\./",$curgtlang);
195 $tmp=strtolower($gtparts[0]);
196 $lang=preg_replace("/\_/", "-", $tmp);
197 $charset=$gtparts[1];
199 header("Content-Language: $lang");
200 header("Content-Type: $mime; charset=$charset");
202 return $curgtlang;
203 }
205 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
206 ?>