include/accept-to-gettext.inc

   1 <?php
   2 /*
   3  * accept-to-gettext.inc -- convert information in 'Accept-*' headers to
   4  * gettext language identifiers.
   5  * Copyright (c) 2003, Wouter Verhelst <wouter@debian.org>
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  * Usage:
  22  *
  23  *  $locale=al2gt(<array of supported languages/charsets in gettext syntax>,
  24  *                <MIME type of document>);
  25  *  setlocale('LC_ALL', $locale); // or 'LC_MESSAGES', or whatever...
  26  *
  27  * Example:
  28  *
  29  *  $langs=array('nl_BE.ISO-8859-15','nl_BE.UTF-8','en_US.UTF-8','en_GB.UTF-8');
  30  *  $locale=al2gt($langs, 'text/html');
  31  *  setlocale('LC_ALL', $locale);
  32  *
  33  * Note that this will send out header information (to be
  34  * RFC2616-compliant), so it must be called before anything is sent to
  35  * the user.
  36  *
  37  * Assumptions made:
  38  * * Charset encodings are written the same way as the Accept-Charset
  39  *   HTTP header specifies them (RFC2616), except that they're parsed
  40  *   case-insensitive.
  41  * * Country codes and language codes are the same in both gettext and
  42  *   the Accept-Language syntax (except for the case differences, which
  43  *   are dealt with easily). If not, some input may be ignored.
  44  * * The provided gettext-strings are fully qualified; i.e., no "en_US";
  45  *   always "en_US.ISO-8859-15" or "en_US.UTF-8", or whichever has been
  46  *   used. "en.ISO-8859-15" is OK, though.
  47  * * The language is more important than the charset; i.e., if the
  48  *   following is given:
  49  *
  50  *   Accept-Language: nl-be, nl;q=0.8, en-us;q=0.5, en;q=0.3
  51  *   Accept-Charset: ISO-8859-15, utf-8;q=0.5
  52  *
  53  *   And the supplied parameter contains (amongst others) nl_BE.UTF-8
  54  *   and nl.ISO-8859-15, then nl_BE.UTF-8 will be picked.
  55  *
  56  * $Log: accept-to-gettext.inc,v $
  57  * Revision 1.1.1.1  2003/11/19 19:31:15  wouter
  58  * * moved to new CVS repo after death of the old
  59  * * Fixed code to apply a default to both Accept-Charset and
  60  *   Accept-Language if none of those headers are supplied; patch from
  61  *   Dominic Chambers <dominic@encasa.com>
  62  *
  63  * Revision 1.2  2003/08/14 10:23:59  wouter
  64  * Removed little error in Content-Type header syntaxis.
  65  *
  66  */
  67
  68 /* not really important, this one; perhaps I could've put it inline with
  69  * the rest. */
  70 function find_match($curlscore,$curcscore,$curgtlang,$langval,$charval,
  71                     $gtlang)
  72 {
  73   if($curlscore < $langval) {
  74     $curlscore=$langval;
  75     $curcscore=$charval;
  76     $curgtlang=$gtlang;
  77   } else if ($curlscore == $langval) {
  78     if($curcscore < $charval) {
  79       $curcscore=$charval;
  80       $curgtlang=$gtlang;
  81     }
  82   }
  83   return array($curlscore, $curcscore, $curgtlang);
  84 }
  85
  86
  87 function al2gt($gettextlangs, $mime)
  88 {
  89   /* Check if ACCEPT_LANGUAGE isset */
  90   if(!isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])){
  91     $_SERVER["HTTP_ACCEPT_LANGUAGE"] = "";
  92   }
  93   if(!isset($_SERVER["HTTP_ACCEPT_CHARSET"])){
  94     $_SERVER["HTTP_ACCEPT_CHARSET"] = "";
  95   }
  96
  97   /* default to "everything is acceptable", as RFC2616 specifies */
  98   $acceptLang=(($_SERVER["HTTP_ACCEPT_LANGUAGE"] == '') ? '*' :
  99     $_SERVER["HTTP_ACCEPT_LANGUAGE"]);
 100   /* Commented out due to problems with IE7, defaulting to the one below... */
 101   #$acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? '*' :
 102   #  $_SERVER["HTTP_ACCEPT_CHARSET"]);
 103   $acceptChar=(($_SERVER["HTTP_ACCEPT_CHARSET"] == '') ? 'ISO-8859-1,utf-8;q=0.7,*;q=0.7' :
 104     $_SERVER["HTTP_ACCEPT_CHARSET"]);
 105   $alparts=@preg_split("/,/",$acceptLang);
 106   $acparts=@preg_split("/,/",$acceptChar);
 107
 108   /* Parse the contents of the Accept-Language header.*/
 109   foreach($alparts as $part) {
 110     $part=trim($part);
 111     if(preg_match("/;/", $part)) {
 112       $lang=@preg_split("/;/",$part);
 113       $score=@preg_split("/=/",$lang[1]);
 114       $alscores[$lang[0]]=$score[1];
 115     } else {
 116       $alscores[$part]=1;
 117     }
 118   }
 119
 120   /* Do the same for the Accept-Charset header. */
 121
 122   /* RFC2616: ``If no "*" is present in an Accept-Charset field, then
 123    * all character sets not explicitly mentioned get a quality value of
 124    * 0, except for ISO-8859-1, which gets a quality value of 1 if not
 125    * explicitly mentioned.''
 126    *
 127    * Making it 2 for the time being, so that we
 128    * can distinguish between "not specified" and "specified as 1" later
 129    * on. */
 130   $acscores["ISO-8859-1"]=2;
 131
 132   foreach($acparts as $part) {
 133     $part=trim($part);
 134     if(preg_match("/;/", $part)) {
 135       $cs=@preg_split("/;/",$part);
 136       $score=@preg_split("/=/",$cs[1]);
 137       $acscores[strtoupper($cs[0])]=$score[1];
 138     } else {
 139       $acscores[strtoupper($part)]=1;
 140     }
 141   }
 142   if($acscores["ISO-8859-1"]==2) {
 143     $acscores["ISO-8859-1"]=(isset($acscores["*"])?$acscores["*"]:1);
 144   }
 145
 146   /*
 147    * Loop through the available languages/encodings, and pick the one
 148    * with the highest score, excluding the ones with a charset the user
 149    * did not include.
 150    */
 151   $curlscore=0;
 152   $curcscore=0;
 153   $curgtlang=NULL;
 154   foreach($gettextlangs as $gtlang) {
 155
 156     $tmp1=preg_replace("/\_/","-",$gtlang);
 157     $tmp2=@preg_split("/\./",$tmp1);
 158     $allang=strtolower($tmp2[0]);
 159     $gtcs=strtoupper($tmp2[1]);
 160     $noct=@preg_split("/-/",$allang);
 161
 162     if(!isset($alscores["*"])){
 163       $alscores["*"] = "";
 164     }
 165
 166     if(!isset($alscores[$allang])){
 167       $alscores[$allang] = "";
 168     }
 169
 170     if(!isset($alscores[$noct[0]])){
 171       $alscores[$noct[0]] = "";
 172     }
 173
 174     if(!isset($acscores[$gtcs])){
 175       $acscores[$gtcs] = "";
 176     }
 177     $testvals=array(
 178         array($alscores[$allang], $acscores[$gtcs]),
 179         array($alscores[$noct[0]], $acscores[$gtcs]),
 180         array($alscores[$allang], $acscores["*"]),
 181         array($alscores[$noct[0]], $acscores["*"]),
 182         array($alscores["*"], $acscores[$gtcs]),
 183         array($alscores["*"], $acscores["*"]));
 184
 185     $found=FALSE;
 186     foreach($testvals as $tval) {
 187       if(!$found && isset($tval[0]) && isset($tval[1])) {
 188         $arr=find_match($curlscore, $curcscore, $curgtlang, $tval[0],
 189             $tval[1], $gtlang);
 190         $curlscore=$arr[0];
 191         $curcscore=$arr[1];
 192         $curgtlang=$arr[2];
 193         $found=TRUE;
 194       }
 195     }
 196   }
 197
 198   /* We must re-parse the gettext-string now, since we may have found it
 199    * through a "*" qualifier.*/
 200   $gtparts=@preg_split("/\./",$curgtlang);
 201   $tmp=strtolower($gtparts[0]);
 202   $lang=preg_replace("/\_/", "-", $tmp);
 203   header("Content-Language: $lang");
 204   if(isset($gtparts[1])){
 205     $charset=$gtparts[1];
 206     header("Content-Type: $mime; charset=$charset");
 207   }
 208   return $curgtlang;
 209 }
 210
 211 // vim:tabstop=2:expandtab:shiftwidth=2:filetype=php:syntax:ruler:
 212 ?>