2008-08-26 11:22:17 +02:00
#!/usr/local/bin/python
# -*- coding: iso-8859-1 -*-
2008-08-26 19:35:52 +02:00
# $Id$
2008-08-26 11:22:17 +02:00
2008-10-27 11:35:07 +01:00
# Copyright (c) 2004, 2008 Kungliga Tekniska Högskolan
2008-08-26 11:22:17 +02:00
# (Royal Institute of Technology, Stockholm, Sweden).
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Institute nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
import re
import string
def read ( ) :
""" return a dict of tables from rfc4518 """
ret = { }
#2.2. Map
#
# SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
# points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
# VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
# mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
# mapped to nothing.
t = [ ]
t . append ( " 00AD; ; Map to nothing " )
t . append ( " 1806; ; Map to nothing " )
t . append ( " 034F; ; Map to nothing " )
t . append ( " 180B; ; Map to nothing " )
t . append ( " 180C; ; Map to nothing " )
t . append ( " 180D; ; Map to nothing " )
t . append ( " FE00; ; Map to nothing " )
t . append ( " FE01; ; Map to nothing " )
t . append ( " FE02; ; Map to nothing " )
t . append ( " FE03; ; Map to nothing " )
t . append ( " FE04; ; Map to nothing " )
t . append ( " FE05; ; Map to nothing " )
t . append ( " FE06; ; Map to nothing " )
t . append ( " FE07; ; Map to nothing " )
t . append ( " FE08; ; Map to nothing " )
t . append ( " FE09; ; Map to nothing " )
t . append ( " FE0A; ; Map to nothing " )
t . append ( " FE0B; ; Map to nothing " )
t . append ( " FE0C; ; Map to nothing " )
t . append ( " FE0D; ; Map to nothing " )
t . append ( " FE0E; ; Map to nothing " )
t . append ( " FE0F; ; Map to nothing " )
t . append ( " FFFC; ; Map to nothing " )
# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
t . append ( " 0009; 0020 ; Map to SPACE " )
t . append ( " 000A; 0020 ; Map to SPACE " )
t . append ( " 000B; 0020 ; Map to SPACE " )
t . append ( " 000C; 0020 ; Map to SPACE " )
t . append ( " 000D; 0020 ; Map to SPACE " )
t . append ( " 0085; 0020 ; Map to SPACE " )
# All other control code (e.g., Cc) points or code points with a
# control function (e.g., Cf) are mapped to nothing. The following is
# a complete list of these code points: U+0000-0008, 000E-001F, 007F-
# 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
# 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
t . append ( " 0000-0008; ; Map to nothing " )
t . append ( " 000E-001F; ; Map to nothing " )
t . append ( " 007F-0084; ; Map to nothing " )
t . append ( " 0086-009F; ; Map to nothing " )
t . append ( " 06DD; ; Map to nothing " )
t . append ( " 070F; ; Map to nothing " )
t . append ( " 180E; ; Map to nothing " )
t . append ( " 200C-200F; ; Map to nothing " )
t . append ( " 202A-202E; ; Map to nothing " )
t . append ( " 2060-2063; ; Map to nothing " )
t . append ( " 206A-206F; ; Map to nothing " )
t . append ( " FEFF; ; Map to nothing " )
t . append ( " FFF9-FFFB; ; Map to nothing " )
t . append ( " 1D173-1D17A; ; Map to nothing " )
t . append ( " E0001; ; Map to nothing " )
t . append ( " E0020-E007F; ; Map to nothing " )
# ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
# points with Separator (space, line, or paragraph) property (e.g., Zs,
# Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
# list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
# 202F, 205F, 3000.
t . append ( " 200B; ; Map to nothing " )
t . append ( " 0020; 0020; Map to SPACE " )
t . append ( " 00A0; 0020; Map to SPACE " )
t . append ( " 1680; 0020; Map to SPACE " )
t . append ( " 2000-200A; 0020; Map to SPACE " )
t . append ( " 2028-2029; 0020; Map to SPACE " )
t . append ( " 202F; 0020; Map to SPACE " )
t . append ( " 205F; 0020; Map to SPACE " )
t . append ( " 3000; 0020; Map to SPACE " )
ret [ " rfc4518-map " ] = t
# For case ignore, numeric, and stored prefix string matching rules,
# characters are case folded per B.2 of [RFC3454].
t = [ ]
#2.4. Prohibit
# The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited.
t . append ( " FFFD; " )
ret [ " rfc4518-error " ] = t
t = [ ]
return ret