From 6a7f1507a7b73fa4f691ea3cb596d34a053992c7 Mon Sep 17 00:00:00 2001 From: Wim Lewis Date: Fri, 20 Jan 2012 01:37:38 -0800 Subject: [PATCH 1/2] An implementation of the SASLprep stringprep profile. --- sasl/stringprep.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 sasl/stringprep.py diff --git a/sasl/stringprep.py b/sasl/stringprep.py new file mode 100644 index 0000000..1520f4f --- /dev/null +++ b/sasl/stringprep.py @@ -0,0 +1,55 @@ +"""SASL stringprep profile + + + + +Copyright (C) 2012, Wim Lewis . +""" + +from __future__ import absolute_import +from stringprep import * +import unicodedata + +__all__ = ( 'prepare', ) + +def prepare(s): + """Prepare a Unicode string according to the SASLprep stringprep profile. + Returns the prepared string, or raises a UnicodeError on failue.""" + + # Step 1 - Map + buf = u'' + for ch in s: + if in_table_c12(ch): + buf += u' ' + elif not in_table_b1(ch): + buf += ch + + # Step 2 - Normalize + buf = unicodedata.normalize('NFKC', buf) + + # Step 3 - Prohibited characters + for ch in buf: + if ( in_table_c21(ch) or + in_table_c22(ch) or + in_table_c3(ch) or + in_table_c4(ch) or + in_table_c5(ch) or + in_table_c6(ch) or + in_table_c7(ch) or + in_table_c8(ch) or + in_table_c9(ch) ): + raise UnicodeError("Invalid character %r" % (ch,)) + + # Step 4 - bidi mark checking + # If there are any characters in categort D1 (randAL), then do extra checks. + if any(map(in_table_d1, buf)): + # If there are any R+AL characters, the first and last + # characters must be R+AL. + if not in_table_d1(buf[0]) or not in_table_d1(buf[-1]): + raise UnicodeError("bidi rejected by stringprep (6.3)") + # And there must not be any L (table d2). + if any(map(in_table_d2, buf)): + raise UnicodeError("bidi rejected by stringprep (6.2)") + + return buf + From 59fabe077f31caf9249046bd9ad1da05f3eb572b Mon Sep 17 00:00:00 2001 From: Wim Lewis Date: Fri, 20 Jan 2012 18:52:34 -0800 Subject: [PATCH 2/2] Add trace profile, doctests, cleanup. --- sasl/stringprep.py | 91 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 15 deletions(-) diff --git a/sasl/stringprep.py b/sasl/stringprep.py index 1520f4f..88a8ead 100644 --- a/sasl/stringprep.py +++ b/sasl/stringprep.py @@ -1,20 +1,38 @@ -"""SASL stringprep profile +"""SASL stringprep_ profiles - - +Defines functions which implement a few SASL-related stringprep +profiles. Each function takes a (unicode) string and either returns +the prepared string or raises an exception, usually a UnicodeError. -Copyright (C) 2012, Wim Lewis . +.. _stringprep: http://www.ietf.org/rfc/rfc3454.txt """ +# Written in 2012 by Wim Lewis . +# This file is in the public domain. It may be used, +# distributed, and modified without restriction. + from __future__ import absolute_import from stringprep import * import unicodedata -__all__ = ( 'prepare', ) +__all__ = ( 'traceprep', 'saslprep' ) +__docformat__ = 'reStructuredText en' + +def saslprep(s): + """Prepare a string according to the SASLprep_ stringprep profile. + + >>> saslprep(u'Hi\u2003\uff2d\uff4f\uff4d!') + u'Hi Mom!' -def prepare(s): - """Prepare a Unicode string according to the SASLprep stringprep profile. - Returns the prepared string, or raises a UnicodeError on failue.""" + >>> saslprep(u'Hi\\rMom!') + Traceback (most recent call last): + UnicodeError: Prohibited character u'\\r' + + >>> saslprep(u'Num\u00ADber \u2168') + u'Number IX' + + .. _SASLprep: http://www.ietf.org/rfc/rfc4013.txt + """ # Step 1 - Map buf = u'' @@ -23,10 +41,10 @@ def prepare(s): buf += u' ' elif not in_table_b1(ch): buf += ch - + # Step 2 - Normalize buf = unicodedata.normalize('NFKC', buf) - + # Step 3 - Prohibited characters for ch in buf: if ( in_table_c21(ch) or @@ -38,10 +56,16 @@ def prepare(s): in_table_c7(ch) or in_table_c8(ch) or in_table_c9(ch) ): - raise UnicodeError("Invalid character %r" % (ch,)) - + raise UnicodeError("Prohibited character %r" % (ch,)) + # Step 4 - bidi mark checking - # If there are any characters in categort D1 (randAL), then do extra checks. + _bidi_check(buf) + + return buf + +def _bidi_check(buf): + "Perform the checks from RFC3454 section 6, and raise on failure." + # If there are any characters in category D1 (R and AL), then do extra checks. if any(map(in_table_d1, buf)): # If there are any R+AL characters, the first and last # characters must be R+AL. @@ -50,6 +74,43 @@ def prepare(s): # And there must not be any L (table d2). if any(map(in_table_d2, buf)): raise UnicodeError("bidi rejected by stringprep (6.2)") - - return buf +def traceprep(s): + """Prepare a Unicode string according to the trace_ stringprep profile. + + .. _trace: http://www.ietf.org/rfc/rfc4505.txt + """ + + # Trace defines no mapping or normalization. + + # Prohibited characters + for ch in s: + if ( in_table_c21(ch) or + in_table_c22(ch) or + in_table_c3(ch) or + in_table_c4(ch) or + in_table_c5(ch) or + in_table_c6(ch) or + # But not table C.7. + in_table_c8(ch) or + in_table_c9(ch) ): + raise UnicodeError("Prohibited character %r" % (ch,)) + + # Step 4 - bidi mark checking + _bidi_check(s) + + return s + +__test__ = { + 'bidi': """ + >>> _bidi_check(u'\u0627\u0031\u0628') + + >>> _bidi_check(u'\u0627' + u'1') + Traceback (most recent call last): + UnicodeError: bidi rejected by stringprep (6.3) + + >>> _bidi_check(u'\u05c0 foo \u05c0') + Traceback (most recent call last): + UnicodeError: bidi rejected by stringprep (6.2) + """ +}