roundup/cgi/accept_language.py

   1 """Parse the Accept-Language header as defined in RFC2616.
   2
   3 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4
   4 for details.  This module should follow the spec.
   5 Author: Hernan M. Foffani (hfoffani@gmail.com)
   6 Some use samples:
   7
   8 >>> parse("da, en-gb;q=0.8, en;q=0.7")
   9 ['da', 'en_gb', 'en']
  10 >>> parse("en;q=0.2, fr;q=1")
  11 ['fr', 'en']
  12 >>> parse("zn; q = 0.2 ,pt-br;q =1")
  13 ['pt_br', 'zn']
  14 >>> parse("es-AR")
  15 ['es_AR']
  16 >>> parse("es-es-cat")
  17 ['es_es_cat']
  18 >>> parse("")
  19 []
  20 >>> parse(None)
  21 []
  22 >>> parse("   ")
  23 []
  24 >>> parse("en,")
  25 ['en']
  26 """
  27
  28 import re
  29 import heapq
  30
  31 # regexp for languange-range search
  32 nqlre = "([A-Za-z]+[-[A-Za-z]+]*)$"
  33 # regexp for languange-range search with quality value
  34 qlre  = "([A-Za-z]+[-[A-Za-z]+]*);q=([\d\.]+)"
  35 # both
  36 lre   = re.compile(nqlre + "|" + qlre)
  37
  38 ascii = ''.join([chr(x) for x in xrange(256)])
  39 whitespace = ' \t\n\r\v\f'
  40
  41 def parse(language_header):
  42     """parse(string_with_accept_header_content) -> languages list"""
  43
  44     if language_header is None: return []
  45
  46     # strip whitespaces.
  47     lh = language_header.translate(ascii, whitespace)
  48
  49     # if nothing, return
  50     if lh == "": return []
  51
  52     # split by commas and parse the quality values.
  53     pls = [lre.findall(x) for x in lh.split(',')]
  54
  55     # drop uncomformant
  56     qls = [x[0] for x in pls if len(x) > 0]
  57
  58     # use a heap queue to sort by quality values.
  59     # the value of each item is 1.0 complement.
  60     pq = []
  61     for l in qls:
  62         if l[0] != '':
  63             heapq.heappush(pq, (0.0, l[0]))
  64         else:
  65             heapq.heappush(pq, (1.0-float(l[2]), l[1]))
  66
  67     # get the languages ordered by quality
  68     # and replace - by _
  69     return [x[1].replace('-','_') for x in pq]
  70
  71 if __name__ == "__main__":
  72     import doctest
  73     doctest.testmod()
  74
  75 # vim: set et sts=4 sw=4 :