aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Include/floatobject.h1
-rw-r--r--Include/intobject.h1
-rw-r--r--Lib/string.py705
-rw-r--r--Lib/stringold.py705
-rw-r--r--Objects/abstract.c180
-rw-r--r--Objects/floatobject.c51
-rw-r--r--Objects/intobject.c43
-rw-r--r--Objects/stringobject.c1045
-rw-r--r--Python/bltinmodule.c109
9 files changed, 1818 insertions, 1022 deletions
diff --git a/Include/floatobject.h b/Include/floatobject.h
index 4491f592a68..d8fd376727a 100644
--- a/Include/floatobject.h
+++ b/Include/floatobject.h
@@ -50,6 +50,7 @@ extern DL_IMPORT(PyTypeObject) PyFloat_Type;
#define PyFloat_Check(op) ((op)->ob_type == &PyFloat_Type)
+extern DL_IMPORT(PyObject *) PyFloat_FromString Py_PROTO((PyObject*, char**));
extern DL_IMPORT(PyObject *) PyFloat_FromDouble Py_PROTO((double));
extern DL_IMPORT(double) PyFloat_AsDouble Py_PROTO((PyObject *));
diff --git a/Include/intobject.h b/Include/intobject.h
index e6eb49deca0..35be0ef97f6 100644
--- a/Include/intobject.h
+++ b/Include/intobject.h
@@ -61,6 +61,7 @@ extern DL_IMPORT(PyTypeObject) PyInt_Type;
#define PyInt_Check(op) ((op)->ob_type == &PyInt_Type)
+extern DL_IMPORT(PyObject *) PyInt_FromString Py_PROTO((char*, char**, int));
extern DL_IMPORT(PyObject *) PyInt_FromLong Py_PROTO((long));
extern DL_IMPORT(long) PyInt_AsLong Py_PROTO((PyObject *));
extern DL_IMPORT(long) PyInt_GetMax Py_PROTO((void));
diff --git a/Lib/string.py b/Lib/string.py
index e449c207656..2c3083e221c 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -1,8 +1,9 @@
# module 'string' -- A collection of string operations
-# Warning: most of the code you see here isn't normally used nowadays.
-# At the end of this file most functions are replaced by built-in
-# functions imported from built-in module "strop".
+# Warning: most of the code you see here isn't normally used nowadays. With
+# Python 1.6, many of these functions are implemented as methods on the
+# standard string object. They used to be implemented by a built-in module
+# called strop, but strop is now obsolete itself.
"""Common string manipulations.
@@ -30,9 +31,6 @@ octdigits = '01234567'
# Case conversion helpers
_idmap = ''
for i in range(256): _idmap = _idmap + chr(i)
-_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
-_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
-_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
del i
# Backward compatible names for exceptions
@@ -43,544 +41,391 @@ atol_error = ValueError
# convert UPPER CASE letters to lower case
def lower(s):
- """lower(s) -> string
+ """lower(s) -> string
- Return a copy of the string s converted to lowercase.
+ Return a copy of the string s converted to lowercase.
- """
- res = ''
- for c in s:
- res = res + _lower[ord(c)]
- return res
+ """
+ return s.lower()
# Convert lower case letters to UPPER CASE
def upper(s):
- """upper(s) -> string
+ """upper(s) -> string
- Return a copy of the string s converted to uppercase.
+ Return a copy of the string s converted to uppercase.
- """
- res = ''
- for c in s:
- res = res + _upper[ord(c)]
- return res
+ """
+ return s.upper()
# Swap lower case letters and UPPER CASE
def swapcase(s):
- """swapcase(s) -> string
+ """swapcase(s) -> string
- Return a copy of the string s with upper case characters
- converted to lowercase and vice versa.
+ Return a copy of the string s with upper case characters
+ converted to lowercase and vice versa.
- """
- res = ''
- for c in s:
- res = res + _swapcase[ord(c)]
- return res
+ """
+ return s.swapcase()
# Strip leading and trailing tabs and spaces
def strip(s):
- """strip(s) -> string
+ """strip(s) -> string
- Return a copy of the string s with leading and trailing
- whitespace removed.
+ Return a copy of the string s with leading and trailing
+ whitespace removed.
- """
- i, j = 0, len(s)
- while i < j and s[i] in whitespace: i = i+1
- while i < j and s[j-1] in whitespace: j = j-1
- return s[i:j]
+ """
+ return s.strip()
# Strip leading tabs and spaces
def lstrip(s):
- """lstrip(s) -> string
+ """lstrip(s) -> string
- Return a copy of the string s with leading whitespace removed.
+ Return a copy of the string s with leading whitespace removed.
- """
- i, j = 0, len(s)
- while i < j and s[i] in whitespace: i = i+1
- return s[i:j]
+ """
+ return s.lstrip()
# Strip trailing tabs and spaces
def rstrip(s):
- """rstrip(s) -> string
+ """rstrip(s) -> string
- Return a copy of the string s with trailing whitespace
- removed.
+ Return a copy of the string s with trailing whitespace
+ removed.
- """
- i, j = 0, len(s)
- while i < j and s[j-1] in whitespace: j = j-1
- return s[i:j]
+ """
+ return s.rstrip()
# Split a string into a list of space/tab-separated words
# NB: split(s) is NOT the same as splitfields(s, ' ')!
def split(s, sep=None, maxsplit=0):
- """split(str [,sep [,maxsplit]]) -> list of strings
-
- Return a list of the words in the string s, using sep as the
- delimiter string. If maxsplit is nonzero, splits into at most
- maxsplit words If sep is not specified, any whitespace string
- is a separator. Maxsplit defaults to 0.
-
- (split and splitfields are synonymous)
-
- """
- if sep is not None: return splitfields(s, sep, maxsplit)
- res = []
- i, n = 0, len(s)
- if maxsplit <= 0: maxsplit = n
- count = 0
- while i < n:
- while i < n and s[i] in whitespace: i = i+1
- if i == n: break
- if count >= maxsplit:
- res.append(s[i:])
- break
- j = i
- while j < n and s[j] not in whitespace: j = j+1
- count = count + 1
- res.append(s[i:j])
- i = j
- return res
-
-# Split a list into fields separated by a given string
-# NB: splitfields(s, ' ') is NOT the same as split(s)!
-# splitfields(s, '') returns [s] (in analogy with split() in nawk)
-def splitfields(s, sep=None, maxsplit=0):
- """splitfields(str [,sep [,maxsplit]]) -> list of strings
-
- Return a list of the words in the string s, using sep as the
- delimiter string. If maxsplit is nonzero, splits into at most
- maxsplit words If sep is not specified, any whitespace string
- is a separator. Maxsplit defaults to 0.
-
- (split and splitfields are synonymous)
-
- """
- if sep is None: return split(s, None, maxsplit)
- res = []
- nsep = len(sep)
- if nsep == 0:
- return [s]
- ns = len(s)
- if maxsplit <= 0: maxsplit = ns
- i = j = 0
- count = 0
- while j+nsep <= ns:
- if s[j:j+nsep] == sep:
- count = count + 1
- res.append(s[i:j])
- i = j = j + nsep
- if count >= maxsplit: break
- else:
- j = j + 1
- res.append(s[i:])
- return res
-
-# Join words with spaces between them
-def join(words, sep = ' '):
- """join(list [,sep]) -> string
+ """split(str [,sep [,maxsplit]]) -> list of strings
- Return a string composed of the words in list, with
- intervening occurences of sep. Sep defaults to a single
- space.
+ Return a list of the words in the string s, using sep as the
+ delimiter string. If maxsplit is nonzero, splits into at most
+ maxsplit words If sep is not specified, any whitespace string
+ is a separator. Maxsplit defaults to 0.
- (joinfields and join are synonymous)
+ (split and splitfields are synonymous)
- """
- return joinfields(words, sep)
+ """
+ return s.split(sep, maxsplit)
+splitfields = split
# Join fields with optional separator
-def joinfields(words, sep = ' '):
- """joinfields(list [,sep]) -> string
+def join(words, sep = ' '):
+ """join(list [,sep]) -> string
- Return a string composed of the words in list, with
- intervening occurences of sep. The default separator is a
- single space.
+ Return a string composed of the words in list, with
+ intervening occurences of sep. The default separator is a
+ single space.
- (joinfields and join are synonymous)
+ (joinfields and join are synonymous)
- """
- res = ''
- for w in words:
- res = res + (sep + w)
- return res[len(sep):]
+ """
+ return sep.join(words)
+joinfields = join
-# Find substring, raise exception if not found
-def index(s, sub, i = 0, last=None):
- """index(s, sub [,start [,end]]) -> int
+# for a little bit of speed
+_apply = apply
- Return the lowest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
+# Find substring, raise exception if not found
+def index(s, *args):
+ """index(s, sub [,start [,end]]) -> int
- Raise ValueError if not found.
+ Like find but raises ValueError when the substring is not found.
- """
- if last is None: last = len(s)
- res = find(s, sub, i, last)
- if res < 0:
- raise ValueError, 'substring not found in string.index'
- return res
+ """
+ return _apply(s.index, args)
# Find last substring, raise exception if not found
-def rindex(s, sub, i = 0, last=None):
- """rindex(s, sub [,start [,end]]) -> int
-
- Return the highest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
+def rindex(s, *args):
+ """rindex(s, sub [,start [,end]]) -> int
- Raise ValueError if not found.
+ Like rfind but raises ValueError when the substring is not found.
- """
- if last is None: last = len(s)
- res = rfind(s, sub, i, last)
- if res < 0:
- raise ValueError, 'substring not found in string.index'
- return res
+ """
+ return _apply(s.rindex, args)
# Count non-overlapping occurrences of substring
-def count(s, sub, i = 0, last=None):
- """count(s, sub[, start[,end]]) -> int
-
- Return the number of occurrences of substring sub in string
- s[start:end]. Optional arguments start and end are
- interpreted as in slice notation.
-
- """
- Slen = len(s) # cache this value, for speed
- if last is None:
- last = Slen
- elif last < 0:
- last = max(0, last + Slen)
- elif last > Slen:
- last = Slen
- if i < 0: i = max(0, i + Slen)
- n = len(sub)
- m = last + 1 - n
- if n == 0: return m-i
- r = 0
- while i < m:
- if sub == s[i:i+n]:
- r = r+1
- i = i+n
- else:
- i = i+1
- return r
+def count(s, *args):
+ """count(s, sub[, start[,end]]) -> int
+
+ Return the number of occurrences of substring sub in string
+ s[start:end]. Optional arguments start and end are
+ interpreted as in slice notation.
+
+ """
+ return _apply(s.count, args)
# Find substring, return -1 if not found
-def find(s, sub, i = 0, last=None):
- """find(s, sub [,start [,end]]) -> in
-
- Return the lowest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
-
- Return -1 on failure.
-
- """
- Slen = len(s) # cache this value, for speed
- if last is None:
- last = Slen
- elif last < 0:
- last = max(0, last + Slen)
- elif last > Slen:
- last = Slen
- if i < 0: i = max(0, i + Slen)
- n = len(sub)
- m = last + 1 - n
- while i < m:
- if sub == s[i:i+n]: return i
- i = i+1
- return -1
+def find(s, *args):
+ """find(s, sub [,start [,end]]) -> in
+
+ Return the lowest index in s where substring sub is found,
+ such that sub is contained within s[start,end]. Optional
+ arguments start and end are interpreted as in slice notation.
+
+ Return -1 on failure.
+
+ """
+ return _apply(s.find, args)
# Find last substring, return -1 if not found
-def rfind(s, sub, i = 0, last=None):
- """rfind(s, sub [,start [,end]]) -> int
-
- Return the highest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
-
- Return -1 on failure.
-
- """
- Slen = len(s) # cache this value, for speed
- if last is None:
- last = Slen
- elif last < 0:
- last = max(0, last + Slen)
- elif last > Slen:
- last = Slen
- if i < 0: i = max(0, i + Slen)
- n = len(sub)
- m = last + 1 - n
- r = -1
- while i < m:
- if sub == s[i:i+n]: r = i
- i = i+1
- return r
-
-# "Safe" environment for eval()
-_safe_env = {"__builtins__": {}}
+def rfind(s, *args):
+ """rfind(s, sub [,start [,end]]) -> int
+
+ Return the highest index in s where substring sub is found,
+ such that sub is contained within s[start,end]. Optional
+ arguments start and end are interpreted as in slice notation.
+
+ Return -1 on failure.
+
+ """
+ return _apply(s.rfind, args)
+
+# for a bit of speed
+_float = float
+_int = int
+_long = long
+_StringType = type('')
# Convert string to float
-_re = None
-def atof(str):
- """atof(s) -> float
-
- Return the floating point number represented by the string s.
-
- """
- global _re
- if _re is None:
- # Don't fail if re doesn't exist -- just skip the syntax check
- try:
- import re
- except ImportError:
- _re = 0
- else:
- _re = re
- sign = ''
- s = strip(str)
- if s and s[0] in '+-':
- sign = s[0]
- s = s[1:]
- if not s:
- raise ValueError, 'non-float argument to string.atof'
- while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
- if _re and not _re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s):
- raise ValueError, 'non-float argument to string.atof'
- try:
- return float(eval(sign + s, _safe_env))
- except SyntaxError:
- raise ValueError, 'non-float argument to string.atof'
+def atof(s):
+ """atof(s) -> float
+
+ Return the floating point number represented by the string s.
+
+ """
+ if type(s) == _StringType:
+ return _float(s)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
# Convert string to integer
-def atoi(str, base=10):
- """atoi(s [,base]) -> int
-
- Return the integer represented by the string s in the given
- base, which defaults to 10. The string s must consist of one
- or more digits, possibly preceded by a sign. If base is 0, it
- is chosen from the leading characters of s, 0 for octal, 0x or
- 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
- accepted.
-
- """
- if base != 10:
- # We only get here if strop doesn't define atoi()
- raise ValueError, "this string.atoi doesn't support base != 10"
- sign = ''
- s = strip(str)
- if s and s[0] in '+-':
- sign = s[0]
- s = s[1:]
- if not s:
- raise ValueError, 'non-integer argument to string.atoi'
- while s[0] == '0' and len(s) > 1: s = s[1:]
- for c in s:
- if c not in digits:
- raise ValueError, 'non-integer argument to string.atoi'
- return eval(sign + s, _safe_env)
+def atoi(*args):
+ """atoi(s [,base]) -> int
+
+ Return the integer represented by the string s in the given
+ base, which defaults to 10. The string s must consist of one
+ or more digits, possibly preceded by a sign. If base is 0, it
+ is chosen from the leading characters of s, 0 for octal, 0x or
+ 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
+ accepted.
+
+ """
+ try:
+ s = args[0]
+ except IndexError:
+ raise TypeError('function requires at least 1 argument: %d given' %
+ len(args))
+ # Don't catch type error resulting from too many arguments to int(). The
+ # error message isn't compatible but the error type is, and this function
+ # is complicated enough already.
+ if type(s) == _StringType:
+ return _apply(_int, args)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
# Convert string to long integer
-def atol(str, base=10):
- """atol(s [,base]) -> long
-
- Return the long integer represented by the string s in the
- given base, which defaults to 10. The string s must consist
- of one or more digits, possibly preceded by a sign. If base
- is 0, it is chosen from the leading characters of s, 0 for
- octal, 0x or 0X for hexadecimal. If base is 16, a preceding
- 0x or 0X is accepted. A trailing L or l is not accepted,
- unless base is 0.
-
- """
- if base != 10:
- # We only get here if strop doesn't define atol()
- raise ValueError, "this string.atol doesn't support base != 10"
- sign = ''
- s = strip(str)
- if s and s[0] in '+-':
- sign = s[0]
- s = s[1:]
- if not s:
- raise ValueError, 'non-integer argument to string.atol'
- while s[0] == '0' and len(s) > 1: s = s[1:]
- for c in s:
- if c not in digits:
- raise ValueError, 'non-integer argument to string.atol'
- return eval(sign + s + 'L', _safe_env)
+def atol(*args):
+ """atol(s [,base]) -> long
+
+ Return the long integer represented by the string s in the
+ given base, which defaults to 10. The string s must consist
+ of one or more digits, possibly preceded by a sign. If base
+ is 0, it is chosen from the leading characters of s, 0 for
+ octal, 0x or 0X for hexadecimal. If base is 16, a preceding
+ 0x or 0X is accepted. A trailing L or l is not accepted,
+ unless base is 0.
+
+ """
+ try:
+ s = args[0]
+ except IndexError:
+ raise TypeError('function requires at least 1 argument: %d given' %
+ len(args))
+ # Don't catch type error resulting from too many arguments to long(). The
+ # error message isn't compatible but the error type is, and this function
+ # is complicated enough already.
+ if type(s) == _StringType:
+ return _apply(_long, args)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
# Left-justify a string
def ljust(s, width):
- """ljust(s, width) -> string
+ """ljust(s, width) -> string
- Return a left-justified version of s, in a field of the
- specified width, padded with spaces as needed. The string is
- never truncated.
+ Return a left-justified version of s, in a field of the
+ specified width, padded with spaces as needed. The string is
+ never truncated.
- """
- n = width - len(s)
- if n <= 0: return s
- return s + ' '*n
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ return s + ' '*n
# Right-justify a string
def rjust(s, width):
- """rjust(s, width) -> string
+ """rjust(s, width) -> string
- Return a right-justified version of s, in a field of the
- specified width, padded with spaces as needed. The string is
- never truncated.
+ Return a right-justified version of s, in a field of the
+ specified width, padded with spaces as needed. The string is
+ never truncated.
- """
- n = width - len(s)
- if n <= 0: return s
- return ' '*n + s
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ return ' '*n + s
# Center a string
def center(s, width):
- """center(s, width) -> string
+ """center(s, width) -> string
- Return a center version of s, in a field of the specified
- width. padded with spaces as needed. The string is never
- truncated.
+ Return a center version of s, in a field of the specified
+ width. padded with spaces as needed. The string is never
+ truncated.
- """
- n = width - len(s)
- if n <= 0: return s
- half = n/2
- if n%2 and width%2:
- # This ensures that center(center(s, i), j) = center(s, j)
- half = half+1
- return ' '*half + s + ' '*(n-half)
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ half = n/2
+ if n%2 and width%2:
+ # This ensures that center(center(s, i), j) = center(s, j)
+ half = half+1
+ return ' '*half + s + ' '*(n-half)
# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
# Decadent feature: the argument may be a string or a number
# (Use of this is deprecated; it should be a string as with ljust c.s.)
def zfill(x, width):
- """zfill(x, width) -> string
+ """zfill(x, width) -> string
- Pad a numeric string x with zeros on the left, to fill a field
- of the specified width. The string x is never truncated.
+ Pad a numeric string x with zeros on the left, to fill a field
+ of the specified width. The string x is never truncated.
- """
- if type(x) == type(''): s = x
- else: s = `x`
- n = len(s)
- if n >= width: return s
- sign = ''
- if s[:1] in ('-', '+'):
- sign, s = s[0], s[1:]
- return sign + '0'*(width-n) + s
+ """
+ if type(x) == type(''): s = x
+ else: s = `x`
+ n = len(s)
+ if n >= width: return s
+ sign = ''
+ if s[0] in ('-', '+'):
+ sign, s = s[0], s[1:]
+ return sign + '0'*(width-n) + s
# Expand tabs in a string.
# Doesn't take non-printing chars into account, but does understand \n.
def expandtabs(s, tabsize=8):
- """expandtabs(s [,tabsize]) -> string
-
- Return a copy of the string s with all tab characters replaced
- by the appropriate number of spaces, depending on the current
- column, and the tabsize (default 8).
-
- """
- res = line = ''
- for c in s:
- if c == '\t':
- c = ' '*(tabsize - len(line)%tabsize)
- line = line + c
- if c == '\n':
- res = res + line
- line = ''
- return res + line
+ """expandtabs(s [,tabsize]) -> string
+
+ Return a copy of the string s with all tab characters replaced
+ by the appropriate number of spaces, depending on the current
+ column, and the tabsize (default 8).
+
+ """
+ res = line = ''
+ for c in s:
+ if c == '\t':
+ c = ' '*(tabsize - len(line) % tabsize)
+ line = line + c
+ if c == '\n':
+ res = res + line
+ line = ''
+ return res + line
# Character translation through look-up table.
def translate(s, table, deletions=""):
- """translate(s,table [,deletechars]) -> string
-
- Return a copy of the string s, where all characters occurring
- in the optional argument deletechars are removed, and the
- remaining characters have been mapped through the given
- translation table, which must be a string of length 256.
-
- """
- if type(table) != type('') or len(table) != 256:
- raise TypeError, \
- "translation table must be 256 characters long"
- res = ""
- for c in s:
- if c not in deletions:
- res = res + table[ord(c)]
- return res
+ """translate(s,table [,deletechars]) -> string
+
+ Return a copy of the string s, where all characters occurring
+ in the optional argument deletechars are removed, and the
+ remaining characters have been mapped through the given
+ translation table, which must be a string of length 256.
+
+ """
+ return s.translate(table, deletions)
# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
def capitalize(s):
- """capitalize(s) -> string
+ """capitalize(s) -> string
- Return a copy of the string s with only its first character
- capitalized.
+ Return a copy of the string s with only its first character
+ capitalized.
- """
- return upper(s[:1]) + lower(s[1:])
+ """
+ return s.capitalize()
# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
# See also regsub.capwords().
def capwords(s, sep=None):
- """capwords(s, [sep]) -> string
+ """capwords(s, [sep]) -> string
- Split the argument into words using split, capitalize each
- word using capitalize, and join the capitalized words using
- join. Note that this replaces runs of whitespace characters by
- a single space.
+ Split the argument into words using split, capitalize each
+ word using capitalize, and join the capitalized words using
+ join. Note that this replaces runs of whitespace characters by
+ a single space.
- """
- return join(map(capitalize, split(s, sep)), sep or ' ')
+ """
+ return join(map(capitalize, s.split(sep)), sep or ' ')
# Construct a translation string
_idmapL = None
def maketrans(fromstr, tostr):
- """maketrans(frm, to) -> string
-
- Return a translation table (a string of 256 bytes long)
- suitable for use in string.translate. The strings frm and to
- must be of the same length.
-
- """
- if len(fromstr) != len(tostr):
- raise ValueError, "maketrans arguments must have same length"
- global _idmapL
- if not _idmapL:
- _idmapL = map(None, _idmap)
- L = _idmapL[:]
- fromstr = map(ord, fromstr)
- for i in range(len(fromstr)):
- L[fromstr[i]] = tostr[i]
- return joinfields(L, "")
+ """maketrans(frm, to) -> string
+
+ Return a translation table (a string of 256 bytes long)
+ suitable for use in string.translate. The strings frm and to
+ must be of the same length.
+
+ """
+ if len(fromstr) != len(tostr):
+ raise ValueError, "maketrans arguments must have same length"
+ global _idmapL
+ if not _idmapL:
+ _idmapL = map(None, _idmap)
+ L = _idmapL[:]
+ fromstr = map(ord, fromstr)
+ for i in range(len(fromstr)):
+ L[fromstr[i]] = tostr[i]
+ return joinfields(L, "")
# Substring replacement (global)
-def replace(str, old, new, maxsplit=0):
- """replace (str, old, new[, maxsplit]) -> string
+def replace(s, old, new, maxsplit=0):
+ """replace (str, old, new[, maxsplit]) -> string
- Return a copy of string str with all occurrences of substring
- old replaced by new. If the optional argument maxsplit is
- given, only the first maxsplit occurrences are replaced.
+ Return a copy of string str with all occurrences of substring
+ old replaced by new. If the optional argument maxsplit is
+ given, only the first maxsplit occurrences are replaced.
- """
- return joinfields(splitfields(str, old, maxsplit), new)
+ """
+ return s.replace(old, new, maxsplit)
+# XXX: transitional
+#
+# If string objects do not have methods, then we need to use the old string.py
+# library, which uses strop for many more things than just the few outlined
+# below.
+try:
+ ''.upper
+except AttributeError:
+ from stringold import *
+
# Try importing optional built-in module "strop" -- if it exists,
# it redefines some string operations that are 100-1000 times faster.
# It also defines values for whitespace, lowercase and uppercase
# that match <ctype.h>'s definitions.
try:
- from strop import *
- letters = lowercase + uppercase
+ from strop import maketrans, lowercase, uppercase, whitespace
+ letters = lowercase + uppercase
except ImportError:
- pass # Use the original, slow versions
+ pass # Use the original versions
diff --git a/Lib/stringold.py b/Lib/stringold.py
index 92158ee9d37..2c3083e221c 100644
--- a/Lib/stringold.py
+++ b/Lib/stringold.py
@@ -1,8 +1,9 @@
# module 'string' -- A collection of string operations
-# Warning: most of the code you see here isn't normally used nowadays.
-# At the end of this file most functions are replaced by built-in
-# functions imported from built-in module "strop".
+# Warning: most of the code you see here isn't normally used nowadays. With
+# Python 1.6, many of these functions are implemented as methods on the
+# standard string object. They used to be implemented by a built-in module
+# called strop, but strop is now obsolete itself.
"""Common string manipulations.
@@ -30,9 +31,6 @@ octdigits = '01234567'
# Case conversion helpers
_idmap = ''
for i in range(256): _idmap = _idmap + chr(i)
-_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
-_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
-_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
del i
# Backward compatible names for exceptions
@@ -43,544 +41,391 @@ atol_error = ValueError
# convert UPPER CASE letters to lower case
def lower(s):
- """lower(s) -> string
+ """lower(s) -> string
- Return a copy of the string s converted to lowercase.
+ Return a copy of the string s converted to lowercase.
- """
- res = ''
- for c in s:
- res = res + _lower[ord(c)]
- return res
+ """
+ return s.lower()
# Convert lower case letters to UPPER CASE
def upper(s):
- """upper(s) -> string
+ """upper(s) -> string
- Return a copy of the string s converted to uppercase.
+ Return a copy of the string s converted to uppercase.
- """
- res = ''
- for c in s:
- res = res + _upper[ord(c)]
- return res
+ """
+ return s.upper()
# Swap lower case letters and UPPER CASE
def swapcase(s):
- """swapcase(s) -> string
+ """swapcase(s) -> string
- Return a copy of the string s with upper case characters
- converted to lowercase and vice versa.
+ Return a copy of the string s with upper case characters
+ converted to lowercase and vice versa.
- """
- res = ''
- for c in s:
- res = res + _swapcase[ord(c)]
- return res
+ """
+ return s.swapcase()
# Strip leading and trailing tabs and spaces
def strip(s):
- """strip(s) -> string
+ """strip(s) -> string
- Return a copy of the string s with leading and trailing
- whitespace removed.
+ Return a copy of the string s with leading and trailing
+ whitespace removed.
- """
- i, j = 0, len(s)
- while i < j and s[i] in whitespace: i = i+1
- while i < j and s[j-1] in whitespace: j = j-1
- return s[i:j]
+ """
+ return s.strip()
# Strip leading tabs and spaces
def lstrip(s):
- """lstrip(s) -> string
+ """lstrip(s) -> string
- Return a copy of the string s with leading whitespace removed.
+ Return a copy of the string s with leading whitespace removed.
- """
- i, j = 0, len(s)
- while i < j and s[i] in whitespace: i = i+1
- return s[i:j]
+ """
+ return s.lstrip()
# Strip trailing tabs and spaces
def rstrip(s):
- """rstrip(s) -> string
+ """rstrip(s) -> string
- Return a copy of the string s with trailing whitespace
- removed.
+ Return a copy of the string s with trailing whitespace
+ removed.
- """
- i, j = 0, len(s)
- while i < j and s[j-1] in whitespace: j = j-1
- return s[i:j]
+ """
+ return s.rstrip()
# Split a string into a list of space/tab-separated words
# NB: split(s) is NOT the same as splitfields(s, ' ')!
def split(s, sep=None, maxsplit=0):
- """split(str [,sep [,maxsplit]]) -> list of strings
-
- Return a list of the words in the string s, using sep as the
- delimiter string. If maxsplit is nonzero, splits into at most
- maxsplit words If sep is not specified, any whitespace string
- is a separator. Maxsplit defaults to 0.
-
- (split and splitfields are synonymous)
-
- """
- if sep is not None: return splitfields(s, sep, maxsplit)
- res = []
- i, n = 0, len(s)
- if maxsplit <= 0: maxsplit = n
- count = 0
- while i < n:
- while i < n and s[i] in whitespace: i = i+1
- if i == n: break
- if count >= maxsplit:
- res.append(s[i:])
- break
- j = i
- while j < n and s[j] not in whitespace: j = j+1
- count = count + 1
- res.append(s[i:j])
- i = j
- return res
-
-# Split a list into fields separated by a given string
-# NB: splitfields(s, ' ') is NOT the same as split(s)!
-# splitfields(s, '') returns [s] (in analogy with split() in nawk)
-def splitfields(s, sep=None, maxsplit=0):
- """splitfields(str [,sep [,maxsplit]]) -> list of strings
-
- Return a list of the words in the string s, using sep as the
- delimiter string. If maxsplit is nonzero, splits into at most
- maxsplit words If sep is not specified, any whitespace string
- is a separator. Maxsplit defaults to 0.
-
- (split and splitfields are synonymous)
-
- """
- if sep is None: return split(s, None, maxsplit)
- res = []
- nsep = len(sep)
- if nsep == 0:
- return [s]
- ns = len(s)
- if maxsplit <= 0: maxsplit = ns
- i = j = 0
- count = 0
- while j+nsep <= ns:
- if s[j:j+nsep] == sep:
- count = count + 1
- res.append(s[i:j])
- i = j = j + nsep
- if count >= maxsplit: break
- else:
- j = j + 1
- res.append(s[i:])
- return res
-
-# Join words with spaces between them
-def join(words, sep = ' '):
- """join(list [,sep]) -> string
+ """split(str [,sep [,maxsplit]]) -> list of strings
- Return a string composed of the words in list, with
- intervening occurences of sep. Sep defaults to a single
- space.
+ Return a list of the words in the string s, using sep as the
+ delimiter string. If maxsplit is nonzero, splits into at most
+ maxsplit words If sep is not specified, any whitespace string
+ is a separator. Maxsplit defaults to 0.
- (joinfields and join are synonymous)
+ (split and splitfields are synonymous)
- """
- return joinfields(words, sep)
+ """
+ return s.split(sep, maxsplit)
+splitfields = split
# Join fields with optional separator
-def joinfields(words, sep = ' '):
- """joinfields(list [,sep]) -> string
+def join(words, sep = ' '):
+ """join(list [,sep]) -> string
- Return a string composed of the words in list, with
- intervening occurences of sep. The default separator is a
- single space.
+ Return a string composed of the words in list, with
+ intervening occurences of sep. The default separator is a
+ single space.
- (joinfields and join are synonymous)
+ (joinfields and join are synonymous)
- """
- res = ''
- for w in words:
- res = res + (sep + w)
- return res[len(sep):]
+ """
+ return sep.join(words)
+joinfields = join
-# Find substring, raise exception if not found
-def index(s, sub, i = 0, last=None):
- """index(s, sub [,start [,end]]) -> int
+# for a little bit of speed
+_apply = apply
- Return the lowest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
+# Find substring, raise exception if not found
+def index(s, *args):
+ """index(s, sub [,start [,end]]) -> int
- Raise ValueError if not found.
+ Like find but raises ValueError when the substring is not found.
- """
- if last is None: last = len(s)
- res = find(s, sub, i, last)
- if res < 0:
- raise ValueError, 'substring not found in string.index'
- return res
+ """
+ return _apply(s.index, args)
# Find last substring, raise exception if not found
-def rindex(s, sub, i = 0, last=None):
- """rindex(s, sub [,start [,end]]) -> int
-
- Return the highest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
+def rindex(s, *args):
+ """rindex(s, sub [,start [,end]]) -> int
- Raise ValueError if not found.
+ Like rfind but raises ValueError when the substring is not found.
- """
- if last is None: last = len(s)
- res = rfind(s, sub, i, last)
- if res < 0:
- raise ValueError, 'substring not found in string.index'
- return res
+ """
+ return _apply(s.rindex, args)
# Count non-overlapping occurrences of substring
-def count(s, sub, i = 0, last=None):
- """count(s, sub[, start[,end]]) -> int
-
- Return the number of occurrences of substring sub in string
- s[start:end]. Optional arguments start and end are
- interpreted as in slice notation.
-
- """
- Slen = len(s) # cache this value, for speed
- if last is None:
- last = Slen
- elif last < 0:
- last = max(0, last + Slen)
- elif last > Slen:
- last = Slen
- if i < 0: i = max(0, i + Slen)
- n = len(sub)
- m = last + 1 - n
- if n == 0: return m-i
- r = 0
- while i < m:
- if sub == s[i:i+n]:
- r = r+1
- i = i+n
- else:
- i = i+1
- return r
+def count(s, *args):
+ """count(s, sub[, start[,end]]) -> int
+
+ Return the number of occurrences of substring sub in string
+ s[start:end]. Optional arguments start and end are
+ interpreted as in slice notation.
+
+ """
+ return _apply(s.count, args)
# Find substring, return -1 if not found
-def find(s, sub, i = 0, last=None):
- """find(s, sub [,start [,end]]) -> in
-
- Return the lowest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
-
- Return -1 on failure.
-
- """
- Slen = len(s) # cache this value, for speed
- if last is None:
- last = Slen
- elif last < 0:
- last = max(0, last + Slen)
- elif last > Slen:
- last = Slen
- if i < 0: i = max(0, i + Slen)
- n = len(sub)
- m = last + 1 - n
- while i < m:
- if sub == s[i:i+n]: return i
- i = i+1
- return -1
+def find(s, *args):
+ """find(s, sub [,start [,end]]) -> in
+
+ Return the lowest index in s where substring sub is found,
+ such that sub is contained within s[start,end]. Optional
+ arguments start and end are interpreted as in slice notation.
+
+ Return -1 on failure.
+
+ """
+ return _apply(s.find, args)
# Find last substring, return -1 if not found
-def rfind(s, sub, i = 0, last=None):
- """rfind(s, sub [,start [,end]]) -> int
-
- Return the highest index in s where substring sub is found,
- such that sub is contained within s[start,end]. Optional
- arguments start and end are interpreted as in slice notation.
-
- Return -1 on failure.
-
- """
- Slen = len(s) # cache this value, for speed
- if last is None:
- last = Slen
- elif last < 0:
- last = max(0, last + Slen)
- elif last > Slen:
- last = Slen
- if i < 0: i = max(0, i + Slen)
- n = len(sub)
- m = last + 1 - n
- r = -1
- while i < m:
- if sub == s[i:i+n]: r = i
- i = i+1
- return r
-
-# "Safe" environment for eval()
-_safe_env = {"__builtins__": {}}
+def rfind(s, *args):
+ """rfind(s, sub [,start [,end]]) -> int
+
+ Return the highest index in s where substring sub is found,
+ such that sub is contained within s[start,end]. Optional
+ arguments start and end are interpreted as in slice notation.
+
+ Return -1 on failure.
+
+ """
+ return _apply(s.rfind, args)
+
+# for a bit of speed
+_float = float
+_int = int
+_long = long
+_StringType = type('')
# Convert string to float
-_re = None
-def atof(str):
- """atof(s) -> float
-
- Return the floating point number represented by the string s.
-
- """
- global _re
- if _re is None:
- # Don't fail if re doesn't exist -- just skip the syntax check
- try:
- import re
- except ImportError:
- _re = 0
- else:
- _re = re
- sign = ''
- s = strip(str)
- if s and s[0] in '+-':
- sign = s[0]
- s = s[1:]
- if not s:
- raise ValueError, 'non-float argument to string.atof'
- while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
- if _re and not _re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s):
- raise ValueError, 'non-float argument to string.atof'
- try:
- return float(eval(sign + s, _safe_env))
- except SyntaxError:
- raise ValueError, 'non-float argument to string.atof'
+def atof(s):
+ """atof(s) -> float
+
+ Return the floating point number represented by the string s.
+
+ """
+ if type(s) == _StringType:
+ return _float(s)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
# Convert string to integer
-def atoi(str, base=10):
- """atoi(s [,base]) -> int
-
- Return the integer represented by the string s in the given
- base, which defaults to 10. The string s must consist of one
- or more digits, possibly preceded by a sign. If base is 0, it
- is chosen from the leading characters of s, 0 for octal, 0x or
- 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
- accepted.
-
- """
- if base != 10:
- # We only get here if strop doesn't define atoi()
- raise ValueError, "this string.atoi doesn't support base != 10"
- sign = ''
- s = strip(str)
- if s and s[0] in '+-':
- sign = s[0]
- s = s[1:]
- if not s:
- raise ValueError, 'non-integer argument to string.atoi'
- while s[0] == '0' and len(s) > 1: s = s[1:]
- for c in s:
- if c not in digits:
- raise ValueError, 'non-integer argument to string.atoi'
- return eval(sign + s, _safe_env)
+def atoi(*args):
+ """atoi(s [,base]) -> int
+
+ Return the integer represented by the string s in the given
+ base, which defaults to 10. The string s must consist of one
+ or more digits, possibly preceded by a sign. If base is 0, it
+ is chosen from the leading characters of s, 0 for octal, 0x or
+ 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
+ accepted.
+
+ """
+ try:
+ s = args[0]
+ except IndexError:
+ raise TypeError('function requires at least 1 argument: %d given' %
+ len(args))
+ # Don't catch type error resulting from too many arguments to int(). The
+ # error message isn't compatible but the error type is, and this function
+ # is complicated enough already.
+ if type(s) == _StringType:
+ return _apply(_int, args)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
# Convert string to long integer
-def atol(str, base=10):
- """atol(s [,base]) -> long
-
- Return the long integer represented by the string s in the
- given base, which defaults to 10. The string s must consist
- of one or more digits, possibly preceded by a sign. If base
- is 0, it is chosen from the leading characters of s, 0 for
- octal, 0x or 0X for hexadecimal. If base is 16, a preceding
- 0x or 0X is accepted. A trailing L or l is not accepted,
- unless base is 0.
-
- """
- if base != 10:
- # We only get here if strop doesn't define atol()
- raise ValueError, "this string.atol doesn't support base != 10"
- sign = ''
- s = strip(str)
- if s and s[0] in '+-':
- sign = s[0]
- s = s[1:]
- if not s:
- raise ValueError, 'non-integer argument to string.atol'
- while s[0] == '0' and len(s) > 1: s = s[1:]
- for c in s:
- if c not in digits:
- raise ValueError, 'non-integer argument to string.atol'
- return eval(sign + s + 'L', _safe_env)
+def atol(*args):
+ """atol(s [,base]) -> long
+
+ Return the long integer represented by the string s in the
+ given base, which defaults to 10. The string s must consist
+ of one or more digits, possibly preceded by a sign. If base
+ is 0, it is chosen from the leading characters of s, 0 for
+ octal, 0x or 0X for hexadecimal. If base is 16, a preceding
+ 0x or 0X is accepted. A trailing L or l is not accepted,
+ unless base is 0.
+
+ """
+ try:
+ s = args[0]
+ except IndexError:
+ raise TypeError('function requires at least 1 argument: %d given' %
+ len(args))
+ # Don't catch type error resulting from too many arguments to long(). The
+ # error message isn't compatible but the error type is, and this function
+ # is complicated enough already.
+ if type(s) == _StringType:
+ return _apply(_long, args)
+ else:
+ raise TypeError('argument 1: expected string, %s found' %
+ type(s).__name__)
+
# Left-justify a string
def ljust(s, width):
- """ljust(s, width) -> string
+ """ljust(s, width) -> string
- Return a left-justified version of s, in a field of the
- specified width, padded with spaces as needed. The string is
- never truncated.
+ Return a left-justified version of s, in a field of the
+ specified width, padded with spaces as needed. The string is
+ never truncated.
- """
- n = width - len(s)
- if n <= 0: return s
- return s + ' '*n
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ return s + ' '*n
# Right-justify a string
def rjust(s, width):
- """rjust(s, width) -> string
+ """rjust(s, width) -> string
- Return a right-justified version of s, in a field of the
- specified width, padded with spaces as needed. The string is
- never truncated.
+ Return a right-justified version of s, in a field of the
+ specified width, padded with spaces as needed. The string is
+ never truncated.
- """
- n = width - len(s)
- if n <= 0: return s
- return ' '*n + s
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ return ' '*n + s
# Center a string
def center(s, width):
- """center(s, width) -> string
+ """center(s, width) -> string
- Return a center version of s, in a field of the specified
- width. padded with spaces as needed. The string is never
- truncated.
+ Return a center version of s, in a field of the specified
+ width. padded with spaces as needed. The string is never
+ truncated.
- """
- n = width - len(s)
- if n <= 0: return s
- half = n/2
- if n%2 and width%2:
- # This ensures that center(center(s, i), j) = center(s, j)
- half = half+1
- return ' '*half + s + ' '*(n-half)
+ """
+ n = width - len(s)
+ if n <= 0: return s
+ half = n/2
+ if n%2 and width%2:
+ # This ensures that center(center(s, i), j) = center(s, j)
+ half = half+1
+ return ' '*half + s + ' '*(n-half)
# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
# Decadent feature: the argument may be a string or a number
# (Use of this is deprecated; it should be a string as with ljust c.s.)
def zfill(x, width):
- """zfill(x, width) -> string
+ """zfill(x, width) -> string
- Pad a numeric string x with zeros on the left, to fill a field
- of the specified width. The string x is never truncated.
+ Pad a numeric string x with zeros on the left, to fill a field
+ of the specified width. The string x is never truncated.
- """
- if type(x) == type(''): s = x
- else: s = `x`
- n = len(s)
- if n >= width: return s
- sign = ''
- if s[0] in ('-', '+'):
- sign, s = s[0], s[1:]
- return sign + '0'*(width-n) + s
+ """
+ if type(x) == type(''): s = x
+ else: s = `x`
+ n = len(s)
+ if n >= width: return s
+ sign = ''
+ if s[0] in ('-', '+'):
+ sign, s = s[0], s[1:]
+ return sign + '0'*(width-n) + s
# Expand tabs in a string.
# Doesn't take non-printing chars into account, but does understand \n.
def expandtabs(s, tabsize=8):
- """expandtabs(s [,tabsize]) -> string
-
- Return a copy of the string s with all tab characters replaced
- by the appropriate number of spaces, depending on the current
- column, and the tabsize (default 8).
-
- """
- res = line = ''
- for c in s:
- if c == '\t':
- c = ' '*(tabsize - len(line)%tabsize)
- line = line + c
- if c == '\n':
- res = res + line
- line = ''
- return res + line
+ """expandtabs(s [,tabsize]) -> string
+
+ Return a copy of the string s with all tab characters replaced
+ by the appropriate number of spaces, depending on the current
+ column, and the tabsize (default 8).
+
+ """
+ res = line = ''
+ for c in s:
+ if c == '\t':
+ c = ' '*(tabsize - len(line) % tabsize)
+ line = line + c
+ if c == '\n':
+ res = res + line
+ line = ''
+ return res + line
# Character translation through look-up table.
def translate(s, table, deletions=""):
- """translate(s,table [,deletechars]) -> string
-
- Return a copy of the string s, where all characters occurring
- in the optional argument deletechars are removed, and the
- remaining characters have been mapped through the given
- translation table, which must be a string of length 256.
-
- """
- if type(table) != type('') or len(table) != 256:
- raise TypeError, \
- "translation table must be 256 characters long"
- res = ""
- for c in s:
- if c not in deletions:
- res = res + table[ord(c)]
- return res
+ """translate(s,table [,deletechars]) -> string
+
+ Return a copy of the string s, where all characters occurring
+ in the optional argument deletechars are removed, and the
+ remaining characters have been mapped through the given
+ translation table, which must be a string of length 256.
+
+ """
+ return s.translate(table, deletions)
# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
def capitalize(s):
- """capitalize(s) -> string
+ """capitalize(s) -> string
- Return a copy of the string s with only its first character
- capitalized.
+ Return a copy of the string s with only its first character
+ capitalized.
- """
- return upper(s[:1]) + lower(s[1:])
+ """
+ return s.capitalize()
# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
# See also regsub.capwords().
def capwords(s, sep=None):
- """capwords(s, [sep]) -> string
+ """capwords(s, [sep]) -> string
- Split the argument into words using split, capitalize each
- word using capitalize, and join the capitalized words using
- join. Note that this replaces runs of whitespace characters by
- a single space.
+ Split the argument into words using split, capitalize each
+ word using capitalize, and join the capitalized words using
+ join. Note that this replaces runs of whitespace characters by
+ a single space.
- """
- return join(map(capitalize, split(s, sep)), sep or ' ')
+ """
+ return join(map(capitalize, s.split(sep)), sep or ' ')
# Construct a translation string
_idmapL = None
def maketrans(fromstr, tostr):
- """maketrans(frm, to) -> string
-
- Return a translation table (a string of 256 bytes long)
- suitable for use in string.translate. The strings frm and to
- must be of the same length.
-
- """
- if len(fromstr) != len(tostr):
- raise ValueError, "maketrans arguments must have same length"
- global _idmapL
- if not _idmapL:
- _idmapL = map(None, _idmap)
- L = _idmapL[:]
- fromstr = map(ord, fromstr)
- for i in range(len(fromstr)):
- L[fromstr[i]] = tostr[i]
- return joinfields(L, "")
+ """maketrans(frm, to) -> string
+
+ Return a translation table (a string of 256 bytes long)
+ suitable for use in string.translate. The strings frm and to
+ must be of the same length.
+
+ """
+ if len(fromstr) != len(tostr):
+ raise ValueError, "maketrans arguments must have same length"
+ global _idmapL
+ if not _idmapL:
+ _idmapL = map(None, _idmap)
+ L = _idmapL[:]
+ fromstr = map(ord, fromstr)
+ for i in range(len(fromstr)):
+ L[fromstr[i]] = tostr[i]
+ return joinfields(L, "")
# Substring replacement (global)
-def replace(str, old, new, maxsplit=0):
- """replace (str, old, new[, maxsplit]) -> string
+def replace(s, old, new, maxsplit=0):
+ """replace (str, old, new[, maxsplit]) -> string
- Return a copy of string str with all occurrences of substring
- old replaced by new. If the optional argument maxsplit is
- given, only the first maxsplit occurrences are replaced.
+ Return a copy of string str with all occurrences of substring
+ old replaced by new. If the optional argument maxsplit is
+ given, only the first maxsplit occurrences are replaced.
- """
- return joinfields(splitfields(str, old, maxsplit), new)
+ """
+ return s.replace(old, new, maxsplit)
+# XXX: transitional
+#
+# If string objects do not have methods, then we need to use the old string.py
+# library, which uses strop for many more things than just the few outlined
+# below.
+try:
+ ''.upper
+except AttributeError:
+ from stringold import *
+
# Try importing optional built-in module "strop" -- if it exists,
# it redefines some string operations that are 100-1000 times faster.
# It also defines values for whitespace, lowercase and uppercase
# that match <ctype.h>'s definitions.
try:
- from strop import *
- letters = lowercase + uppercase
+ from strop import maketrans, lowercase, uppercase, whitespace
+ letters = lowercase + uppercase
except ImportError:
- pass # Use the original, slow versions
+ pass # Use the original versions
diff --git a/Objects/abstract.c b/Objects/abstract.c
index be986f5fde7..c120769687d 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -53,123 +53,6 @@ null_error()
return NULL;
}
-/* Copied with modifications from stropmodule.c: atoi, atof, atol */
-
-static PyObject *
-int_from_string(v)
- PyObject *v;
-{
- char *s, *end;
- long x;
- char buffer[256]; /* For errors */
-
- s = PyString_AS_STRING(v);
- while (*s && isspace(Py_CHARMASK(*s)))
- s++;
- errno = 0;
- x = PyOS_strtol(s, &end, 10);
- if (end == s || !isdigit(end[-1]))
- goto bad;
- while (*end && isspace(Py_CHARMASK(*end)))
- end++;
- if (*end != '\0') {
- bad:
- sprintf(buffer, "invalid literal for int(): %.200s", s);
- PyErr_SetString(PyExc_ValueError, buffer);
- return NULL;
- }
- else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) {
- PyErr_SetString(PyExc_ValueError,
- "null byte in argument for int()");
- return NULL;
- }
- else if (errno != 0) {
- sprintf(buffer, "int() literal too large: %.200s", s);
- PyErr_SetString(PyExc_ValueError, buffer);
- return NULL;
- }
- return PyInt_FromLong(x);
-}
-
-static PyObject *
-long_from_string(v)
- PyObject *v;
-{
- char *s, *end;
- PyObject *x;
- char buffer[256]; /* For errors */
-
- s = PyString_AS_STRING(v);
- while (*s && isspace(Py_CHARMASK(*s)))
- s++;
- x = PyLong_FromString(s, &end, 10);
- if (x == NULL) {
- if (PyErr_ExceptionMatches(PyExc_ValueError))
- goto bad;
- return NULL;
- }
- while (*end && isspace(Py_CHARMASK(*end)))
- end++;
- if (*end != '\0') {
- bad:
- sprintf(buffer, "invalid literal for long(): %.200s", s);
- PyErr_SetString(PyExc_ValueError, buffer);
- Py_XDECREF(x);
- return NULL;
- }
- else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) {
- PyErr_SetString(PyExc_ValueError,
- "null byte in argument for long()");
- return NULL;
- }
- return x;
-}
-
-static PyObject *
-float_from_string(v)
- PyObject *v;
-{
- extern double strtod Py_PROTO((const char *, char **));
- char *s, *last, *end;
- double x;
- char buffer[256]; /* For errors */
-
- s = PyString_AS_STRING(v);
- last = s + PyString_GET_SIZE(v);
- while (*s && isspace(Py_CHARMASK(*s)))
- s++;
- if (s[0] == '\0') {
- PyErr_SetString(PyExc_ValueError, "empty string for float()");
- return NULL;
- }
- errno = 0;
- PyFPE_START_PROTECT("float_from_string", return 0)
- x = strtod(s, &end);
- PyFPE_END_PROTECT(x)
- /* Believe it or not, Solaris 2.6 can move end *beyond* the null
- byte at the end of the string, when the input is inf(inity) */
- if (end > last)
- end = last;
- while (*end && isspace(Py_CHARMASK(*end)))
- end++;
- if (*end != '\0') {
- sprintf(buffer, "invalid literal for float(): %.200s", s);
- PyErr_SetString(PyExc_ValueError, buffer);
- return NULL;
- }
- else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) {
- PyErr_SetString(PyExc_ValueError,
- "null byte in argument for float()");
- return NULL;
- }
- else if (errno != 0) {
- sprintf(buffer, "float() literal too large: %.200s", s);
- PyErr_SetString(PyExc_ValueError, buffer);
- return NULL;
- }
- return PyFloat_FromDouble(x);
-}
-
/* Operations on any object */
int
@@ -713,7 +596,7 @@ PyNumber_Int(o)
if (o == NULL)
return null_error();
if (PyString_Check(o))
- return int_from_string(o);
+ return PyInt_FromString(PyString_AS_STRING(o), NULL, 10);
m = o->ob_type->tp_as_number;
if (m && m->nb_int)
return m->nb_int(o);
@@ -721,6 +604,61 @@ PyNumber_Int(o)
return type_error("object can't be converted to int");
}
+/* There are two C API functions for converting a string to a long,
+ * PyNumber_Long() and PyLong_FromString(). Both are used in builtin_long,
+ * reachable from Python with the built-in function long().
+ *
+ * The difference is this: PyNumber_Long will raise an exception when the
+ * string cannot be converted to a long. The most common situation is
+ * where a float string is passed in; this raises a ValueError.
+ * PyLong_FromString does not raise an exception; it silently truncates the
+ * float to an integer.
+ *
+ * You can see the different behavior from Python with the following:
+ *
+ * long('9.5')
+ * => ValueError: invalid literal for long(): 9.5
+ *
+ * long('9.5', 10)
+ * => 9L
+ *
+ * The first example ends up calling PyNumber_Long(), while the second one
+ * calls PyLong_FromString().
+ */
+static PyObject *
+long_from_string(v)
+ PyObject *v;
+{
+ char *s, *end;
+ PyObject *x;
+ char buffer[256]; /* For errors */
+
+ s = PyString_AS_STRING(v);
+ while (*s && isspace(Py_CHARMASK(*s)))
+ s++;
+ x = PyLong_FromString(s, &end, 10);
+ if (x == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_ValueError))
+ goto bad;
+ return NULL;
+ }
+ while (*end && isspace(Py_CHARMASK(*end)))
+ end++;
+ if (*end != '\0') {
+ bad:
+ sprintf(buffer, "invalid literal for long(): %.200s", s);
+ PyErr_SetString(PyExc_ValueError, buffer);
+ Py_XDECREF(x);
+ return NULL;
+ }
+ else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) {
+ PyErr_SetString(PyExc_ValueError,
+ "null byte in argument for long()");
+ return NULL;
+ }
+ return x;
+}
+
PyObject *
PyNumber_Long(o)
PyObject *o;
@@ -730,6 +668,10 @@ PyNumber_Long(o)
if (o == NULL)
return null_error();
if (PyString_Check(o))
+ /* need to do extra error checking that PyLong_FromString()
+ * doesn't do. In particular long('9.5') must raise an
+ * exception, not truncate the float.
+ */
return long_from_string(o);
m = o->ob_type->tp_as_number;
if (m && m->nb_long)
@@ -747,7 +689,7 @@ PyNumber_Float(o)
if (o == NULL)
return null_error();
if (PyString_Check(o))
- return float_from_string(o);
+ return PyFloat_FromString(o, NULL);
m = o->ob_type->tp_as_number;
if (m && m->nb_float)
return m->nb_float(o);
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index ba373091295..cb5d9e36edd 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -149,6 +149,57 @@ PyFloat_FromDouble(fval)
return (PyObject *) op;
}
+PyObject *
+PyFloat_FromString(v, pend)
+ PyObject *v;
+ char **pend;
+{
+ extern double strtod Py_PROTO((const char *, char **));
+ char *s, *last, *end;
+ double x;
+ char buffer[256]; /* For errors */
+
+ if (!PyString_Check(v))
+ return NULL;
+ s = PyString_AS_STRING(v);
+
+ last = s + PyString_GET_SIZE(v);
+ while (*s && isspace(Py_CHARMASK(*s)))
+ s++;
+ if (s[0] == '\0') {
+ PyErr_SetString(PyExc_ValueError, "empty string for float()");
+ return NULL;
+ }
+ errno = 0;
+ PyFPE_START_PROTECT("PyFloat_FromString", return 0)
+ x = strtod(s, &end);
+ PyFPE_END_PROTECT(x)
+ /* Believe it or not, Solaris 2.6 can move end *beyond* the null
+ byte at the end of the string, when the input is inf(inity) */
+ if (end > last)
+ end = last;
+ while (*end && isspace(Py_CHARMASK(*end)))
+ end++;
+ if (*end != '\0') {
+ sprintf(buffer, "invalid literal for float(): %.200s", s);
+ PyErr_SetString(PyExc_ValueError, buffer);
+ return NULL;
+ }
+ else if (end != PyString_AS_STRING(v) + PyString_GET_SIZE(v)) {
+ PyErr_SetString(PyExc_ValueError,
+ "null byte in argument for float()");
+ return NULL;
+ }
+ else if (errno != 0) {
+ sprintf(buffer, "float() literal too large: %.200s", s);
+ PyErr_SetString(PyExc_ValueError, buffer);
+ return NULL;
+ }
+ if (pend)
+ *pend = end;
+ return PyFloat_FromDouble(x);
+}
+
static void
float_dealloc(op)
PyFloatObject *op;
diff --git a/Objects/intobject.c b/Objects/intobject.c
index f2d77e1a730..45c21868a6e 100644
--- a/Objects/intobject.c
+++ b/Objects/intobject.c
@@ -32,6 +32,7 @@ PERFORMANCE OF THIS SOFTWARE.
/* Integer object implementation */
#include "Python.h"
+#include <ctype.h>
#ifdef HAVE_LIMITS_H
#include <limits.h>
@@ -218,6 +219,48 @@ PyInt_AsLong(op)
return val;
}
+PyObject *
+PyInt_FromString(s, pend, base)
+ char *s;
+ char **pend;
+ int base;
+{
+ char *end;
+ long x;
+ char buffer[256]; /* For errors */
+
+ if ((base != 0 && base < 2) || base > 36) {
+ PyErr_SetString(PyExc_ValueError, "invalid base for int()");
+ return NULL;
+ }
+
+ while (*s && isspace(Py_CHARMASK(*s)))
+ s++;
+ errno = 0;
+ if (base == 0 && s[0] == '0')
+ x = (long) PyOS_strtoul(s, &end, base);
+ else
+ x = PyOS_strtol(s, &end, base);
+ if (end == s || !isalnum(end[-1]))
+ goto bad;
+ while (*end && isspace(Py_CHARMASK(*end)))
+ end++;
+ if (*end != '\0') {
+ bad:
+ sprintf(buffer, "invalid literal for int(): %.200s", s);
+ PyErr_SetString(PyExc_ValueError, buffer);
+ return NULL;
+ }
+ else if (errno != 0) {
+ sprintf(buffer, "int() literal too large: %.200s", s);
+ PyErr_SetString(PyExc_ValueError, buffer);
+ return NULL;
+ }
+ if (pend)
+ *pend = end;
+ return PyInt_FromLong(x);
+}
+
/* Methods */
/* ARGSUSED */
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index eecb0060b58..264ed9ac31d 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -525,6 +525,1049 @@ static PyBufferProcs string_as_buffer = {
(getcharbufferproc)string_buffer_getcharbuf,
};
+
+
+#define LEFTSTRIP 0
+#define RIGHTSTRIP 1
+#define BOTHSTRIP 2
+
+
+static PyObject *
+split_whitespace(s, len, maxsplit)
+ char *s;
+ int len;
+ int maxsplit;
+{
+ int i = 0, j, err;
+ int countsplit = 0;
+ PyObject* item;
+ PyObject *list = PyList_New(0);
+
+ if (list == NULL)
+ return NULL;
+
+ while (i < len) {
+ while (i < len && isspace(Py_CHARMASK(s[i]))) {
+ i = i+1;
+ }
+ j = i;
+ while (i < len && !isspace(Py_CHARMASK(s[i]))) {
+ i = i+1;
+ }
+ if (j < i) {
+ item = PyString_FromStringAndSize(s+j, (int)(i-j));
+ if (item == NULL)
+ goto finally;
+
+ err = PyList_Append(list, item);
+ Py_DECREF(item);
+ if (err < 0)
+ goto finally;
+
+ countsplit++;
+ while (i < len && isspace(Py_CHARMASK(s[i]))) {
+ i = i+1;
+ }
+ if (maxsplit && (countsplit >= maxsplit) && i < len) {
+ item = PyString_FromStringAndSize(
+ s+i, (int)(len - i));
+ if (item == NULL)
+ goto finally;
+
+ err = PyList_Append(list, item);
+ Py_DECREF(item);
+ if (err < 0)
+ goto finally;
+
+ i = len;
+ }
+ }
+ }
+ return list;
+ finally:
+ Py_DECREF(list);
+ return NULL;
+}
+
+
+static char split__doc__[] =
+"S.split([sep [,maxsplit]]) -> list of strings\n\
+\n\
+Return a list of the words in the string S, using sep as the\n\
+delimiter string. If maxsplit is nonzero, splits into at most\n\
+maxsplit words If sep is not specified, any whitespace string\n\
+is a separator. Maxsplit defaults to 0.";
+
+static PyObject *
+string_split(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ int len = PyString_GET_SIZE(self), n, i, j, err;
+ int splitcount, maxsplit;
+ char *s = PyString_AS_STRING(self), *sub;
+ PyObject *list, *item;
+
+ sub = NULL;
+ n = 0;
+ splitcount = 0;
+ maxsplit = 0;
+ if (!PyArg_ParseTuple(args, "|z#i", &sub, &n, &maxsplit))
+ return NULL;
+ if (sub == NULL)
+ return split_whitespace(s, len, maxsplit);
+ if (n == 0) {
+ PyErr_SetString(PyExc_ValueError, "empty separator");
+ return NULL;
+ }
+
+ list = PyList_New(0);
+ if (list == NULL)
+ return NULL;
+
+ i = j = 0;
+ while (i+n <= len) {
+ if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
+ item = PyString_FromStringAndSize(s+j, (int)(i-j));
+ if (item == NULL)
+ goto fail;
+ err = PyList_Append(list, item);
+ Py_DECREF(item);
+ if (err < 0)
+ goto fail;
+ i = j = i + n;
+ splitcount++;
+ if (maxsplit && (splitcount >= maxsplit))
+ break;
+ }
+ else
+ i++;
+ }
+ item = PyString_FromStringAndSize(s+j, (int)(len-j));
+ if (item == NULL)
+ goto fail;
+ err = PyList_Append(list, item);
+ Py_DECREF(item);
+ if (err < 0)
+ goto fail;
+
+ return list;
+
+ fail:
+ Py_DECREF(list);
+ return NULL;
+}
+
+
+static char join__doc__[] =
+"S.join(sequence) -> string\n\
+\n\
+Return a string which is the concatenation of the string representation\n\
+of every element in the sequence. The separator between elements is S.";
+
+static PyObject *
+string_join(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *sep = PyString_AS_STRING(self);
+ int seplen = PyString_GET_SIZE(self);
+ PyObject *res = NULL;
+ int reslen = 0;
+ char *p;
+ int seqlen = 0;
+ int sz = 100;
+ int i, slen;
+ PyObject *seq;
+
+ if (!PyArg_ParseTuple(args, "O", &seq))
+ return NULL;
+
+ seqlen = PySequence_Length(seq);
+ if (seqlen < 0 && PyErr_Occurred())
+ return NULL;
+
+ if (seqlen == 1) {
+ /* Optimization if there's only one item */
+ PyObject *item = PySequence_GetItem(seq, 0);
+ PyObject *stritem = PyObject_Str(item);
+ Py_DECREF(item);
+ return stritem;
+ }
+ if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
+ return NULL;
+ p = PyString_AsString(res);
+
+ /* optimize for lists. all others (tuples and arbitrary sequences)
+ * just use the abstract interface.
+ */
+ if (PyList_Check(seq)) {
+ for (i = 0; i < seqlen; i++) {
+ PyObject *item = PyList_GET_ITEM(seq, i);
+ PyObject *sitem = PyObject_Str(item);
+ if (!sitem)
+ goto finally;
+ slen = PyString_GET_SIZE(sitem);
+ while (reslen + slen + seplen >= sz) {
+ if (_PyString_Resize(&res, sz*2))
+ goto finally;
+ sz *= 2;
+ p = PyString_AsString(res) + reslen;
+ }
+ if (i > 0) {
+ memcpy(p, sep, seplen);
+ p += seplen;
+ reslen += seplen;
+ }
+ memcpy(p, PyString_AS_STRING(sitem), slen);
+ p += slen;
+ reslen += slen;
+ }
+ }
+ else {
+ for (i = 0; i < seqlen; i++) {
+ PyObject *item = PySequence_GetItem(seq, i);
+ PyObject *sitem;
+ if (!item || !(sitem = PyObject_Str(item))) {
+ Py_XDECREF(item);
+ goto finally;
+ }
+ slen = PyString_GET_SIZE(sitem);
+ while (reslen + slen + seplen >= sz) {
+ if (_PyString_Resize(&res, sz*2))
+ goto finally;
+ sz *= 2;
+ p = PyString_AsString(res) + reslen;
+ }
+ if (i > 0) {
+ memcpy(p, sep, seplen);
+ p += seplen;
+ reslen += seplen;
+ }
+ memcpy(p, PyString_AS_STRING(sitem), slen);
+ p += slen;
+ reslen += slen;
+ }
+ }
+ if (_PyString_Resize(&res, reslen))
+ goto finally;
+ return res;
+
+ finally:
+ Py_DECREF(res);
+ return NULL;
+}
+
+
+
+static long
+string_find_internal(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *sub;
+ int len = PyString_GET_SIZE(self);
+ int n, i = 0, last = INT_MAX;
+
+ if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last))
+ return -2;
+
+ if (last > len)
+ last = len;
+ if (last < 0)
+ last += len;
+ if (last < 0)
+ last = 0;
+ if (i < 0)
+ i += len;
+ if (i < 0)
+ i = 0;
+
+ if (n == 0 && i <= last)
+ return (long)i;
+
+ last -= n;
+ for (; i <= last; ++i)
+ if (s[i] == sub[0] &&
+ (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
+ return (long)i;
+
+ return -1;
+}
+
+
+static char find__doc__[] =
+"S.find(sub [,start [,end]]) -> int\n\
+\n\
+Return the lowest index in S where substring sub is found,\n\
+such that sub is contained within s[start,end]. Optional\n\
+arguments start and end are interpreted as in slice notation.\n\
+\n\
+Return -1 on failure.";
+
+static PyObject *
+string_find(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ long result = string_find_internal(self, args);
+ if (result == -2)
+ return NULL;
+ return PyInt_FromLong(result);
+}
+
+
+static char index__doc__[] =
+"S.index(sub [,start [,end]]) -> int\n\
+\n\
+Like S.find() but raise ValueError when the substring is not found.";
+
+static PyObject *
+string_index(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ long result = string_find_internal(self, args);
+ if (result == -2)
+ return NULL;
+ if (result == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "substring not found in string.index");
+ return NULL;
+ }
+ return PyInt_FromLong(result);
+}
+
+
+static long
+string_rfind_internal(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *sub;
+ int len = PyString_GET_SIZE(self), n, j;
+ int i = 0, last = INT_MAX;
+
+ if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last))
+ return -2;
+
+ if (last > len)
+ last = len;
+ if (last < 0)
+ last += len;
+ if (last < 0)
+ last = 0;
+ if (i < 0)
+ i += len;
+ if (i < 0)
+ i = 0;
+
+ if (n == 0 && i <= last)
+ return (long)last;
+
+ for (j = last-n; j >= i; --j)
+ if (s[j] == sub[0] &&
+ (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
+ return (long)j;
+
+ return -1;
+}
+
+
+static char rfind__doc__[] =
+"S.rfind(sub [,start [,end]]) -> int\n\
+\n\
+Return the highest index in S where substring sub is found,\n\
+such that sub is contained within s[start,end]. Optional\n\
+arguments start and end are interpreted as in slice notation.\n\
+\n\
+Return -1 on failure.";
+
+static PyObject *
+string_rfind(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ long result = string_rfind_internal(self, args);
+ if (result == -2)
+ return NULL;
+ return PyInt_FromLong(result);
+}
+
+
+static char rindex__doc__[] =
+"S.rindex(sub [,start [,end]]) -> int\n\
+\n\
+Like S.rfind() but raise ValueError when the substring is not found.";
+
+static PyObject *
+string_rindex(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ long result = string_rfind_internal(self, args);
+ if (result == -2)
+ return NULL;
+ if (result == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "substring not found in string.rindex");
+ return NULL;
+ }
+ return PyInt_FromLong(result);
+}
+
+
+static PyObject *
+do_strip(self, args, striptype)
+ PyStringObject *self;
+ PyObject *args;
+ int striptype;
+{
+ char *s = PyString_AS_STRING(self);
+ int len = PyString_GET_SIZE(self), i, j;
+ PyObject *scobj = NULL;
+ int count = -1;
+
+ if (!PyArg_ParseTuple(args, "|Oi", scobj, count))
+ return NULL;
+
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
+ while (i < len && isspace(Py_CHARMASK(s[i]))) {
+ i++;
+ }
+ }
+
+ j = len;
+ if (striptype != LEFTSTRIP) {
+ do {
+ j--;
+ } while (j >= i && isspace(Py_CHARMASK(s[j])));
+ j++;
+ }
+
+ if (i == 0 && j == len) {
+ Py_INCREF(self);
+ return (PyObject*)self;
+ }
+ else
+ return PyString_FromStringAndSize(s+i, j-i);
+}
+
+
+static char strip__doc__[] =
+"S.strip() -> string\n\
+\n\
+Return a copy of the string S with leading and trailing\n\
+whitespace removed.";
+
+static PyObject *
+string_strip(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ return do_strip(self, args, BOTHSTRIP);
+}
+
+
+static char lstrip__doc__[] =
+"S.lstrip() -> string\n\
+\n\
+Return a copy of the string S with leading whitespace removed.";
+
+static PyObject *
+string_lstrip(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ return do_strip(self, args, LEFTSTRIP);
+}
+
+
+static char rstrip__doc__[] =
+"S.rstrip() -> string\n\
+\n\
+Return a copy of the string S with trailing whitespace removed.";
+
+static PyObject *
+string_rstrip(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ return do_strip(self, args, RIGHTSTRIP);
+}
+
+
+static char lower__doc__[] =
+"S.lower() -> string\n\
+\n\
+Return a copy of the string S converted to lowercase.";
+
+static PyObject *
+string_lower(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *s_new;
+ int i, n = PyString_GET_SIZE(self);
+ PyObject *new;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+ new = PyString_FromStringAndSize(NULL, n);
+ if (new == NULL)
+ return NULL;
+ s_new = PyString_AsString(new);
+ for (i = 0; i < n; i++) {
+ int c = Py_CHARMASK(*s++);
+ if (isupper(c)) {
+ *s_new = tolower(c);
+ } else
+ *s_new = c;
+ s_new++;
+ }
+ return new;
+}
+
+
+static char upper__doc__[] =
+"S.upper() -> string\n\
+\n\
+Return a copy of the string S converted to uppercase.";
+
+static PyObject *
+string_upper(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *s_new;
+ int i, n = PyString_GET_SIZE(self);
+ PyObject *new;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+ new = PyString_FromStringAndSize(NULL, n);
+ if (new == NULL)
+ return NULL;
+ s_new = PyString_AsString(new);
+ for (i = 0; i < n; i++) {
+ int c = Py_CHARMASK(*s++);
+ if (islower(c)) {
+ *s_new = toupper(c);
+ } else
+ *s_new = c;
+ s_new++;
+ }
+ return new;
+}
+
+
+static char capitalize__doc__[] =
+"S.capitalize() -> string\n\
+\n\
+Return a copy of the string S with only its first character\n\
+capitalized.";
+
+static PyObject *
+string_capitalize(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *s_new;
+ int i, n = PyString_GET_SIZE(self);
+ PyObject *new;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+ new = PyString_FromStringAndSize(NULL, n);
+ if (new == NULL)
+ return NULL;
+ s_new = PyString_AsString(new);
+ if (0 < n) {
+ int c = Py_CHARMASK(*s++);
+ if (islower(c))
+ *s_new = toupper(c);
+ else
+ *s_new = c;
+ s_new++;
+ }
+ for (i = 1; i < n; i++) {
+ int c = Py_CHARMASK(*s++);
+ if (isupper(c))
+ *s_new = tolower(c);
+ else
+ *s_new = c;
+ s_new++;
+ }
+ return new;
+}
+
+
+static char count__doc__[] =
+"S.count(sub[, start[, end]]) -> int\n\
+\n\
+Return the number of occurrences of substring sub in string\n\
+S[start:end]. Optional arguments start and end are\n\
+interpreted as in slice notation.";
+
+static PyObject *
+string_count(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *sub;
+ int len = PyString_GET_SIZE(self), n;
+ int i = 0, last = INT_MAX;
+ int m, r;
+
+ if (!PyArg_ParseTuple(args, "t#|ii", &sub, &n, &i, &last))
+ return NULL;
+ if (last > len)
+ last = len;
+ if (last < 0)
+ last += len;
+ if (last < 0)
+ last = 0;
+ if (i < 0)
+ i += len;
+ if (i < 0)
+ i = 0;
+ m = last + 1 - n;
+ if (n == 0)
+ return PyInt_FromLong((long) (m-i));
+
+ r = 0;
+ while (i < m) {
+ if (!memcmp(s+i, sub, n)) {
+ r++;
+ i += n;
+ } else {
+ i++;
+ }
+ }
+ return PyInt_FromLong((long) r);
+}
+
+
+static char swapcase__doc__[] =
+"S.swapcase() -> string\n\
+\n\
+Return a copy of the string S with upper case characters\n\
+converted to lowercase and vice versa.";
+
+static PyObject *
+string_swapcase(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *s = PyString_AS_STRING(self), *s_new;
+ int i, n = PyString_GET_SIZE(self);
+ PyObject *new;
+
+ if (!PyArg_ParseTuple(args, ""))
+ return NULL;
+ new = PyString_FromStringAndSize(NULL, n);
+ if (new == NULL)
+ return NULL;
+ s_new = PyString_AsString(new);
+ for (i = 0; i < n; i++) {
+ int c = Py_CHARMASK(*s++);
+ if (islower(c)) {
+ *s_new = toupper(c);
+ }
+ else if (isupper(c)) {
+ *s_new = tolower(c);
+ }
+ else
+ *s_new = c;
+ s_new++;
+ }
+ return new;
+}
+
+
+static char translate__doc__[] =
+"S.translate(table [,deletechars]) -> string\n\
+\n\
+Return a copy of the string S, where all characters occurring\n\
+in the optional argument deletechars are removed, and the\n\
+remaining characters have been mapped through the given\n\
+translation table, which must be a string of length 256.";
+
+static PyObject *
+string_translate(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ register char *input, *table, *output;
+ register int i, c, changed = 0;
+ PyObject *input_obj = (PyObject*)self;
+ char *table1, *output_start, *del_table=NULL;
+ int inlen, tablen, dellen = 0;
+ PyObject *result;
+ int trans_table[256];
+
+ if (!PyArg_ParseTuple(args, "t#|t#",
+ &table1, &tablen, &del_table, &dellen))
+ return NULL;
+ if (tablen != 256) {
+ PyErr_SetString(PyExc_ValueError,
+ "translation table must be 256 characters long");
+ return NULL;
+ }
+
+ table = table1;
+ inlen = PyString_Size(input_obj);
+ result = PyString_FromStringAndSize((char *)NULL, inlen);
+ if (result == NULL)
+ return NULL;
+ output_start = output = PyString_AsString(result);
+ input = PyString_AsString(input_obj);
+
+ if (dellen == 0) {
+ /* If no deletions are required, use faster code */
+ for (i = inlen; --i >= 0; ) {
+ c = Py_CHARMASK(*input++);
+ if (Py_CHARMASK((*output++ = table[c])) != c)
+ changed = 1;
+ }
+ if (changed)
+ return result;
+ Py_DECREF(result);
+ Py_INCREF(input_obj);
+ return input_obj;
+ }
+
+ for (i = 0; i < 256; i++)
+ trans_table[i] = Py_CHARMASK(table[i]);
+
+ for (i = 0; i < dellen; i++)
+ trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
+
+ for (i = inlen; --i >= 0; ) {
+ c = Py_CHARMASK(*input++);
+ if (trans_table[c] != -1)
+ if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
+ continue;
+ changed = 1;
+ }
+ if (!changed) {
+ Py_DECREF(result);
+ Py_INCREF(input_obj);
+ return input_obj;
+ }
+ /* Fix the size of the resulting string */
+ if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
+ return NULL;
+ return result;
+}
+
+
+/* What follows is used for implementing replace(). Perry Stoll. */
+
+/*
+ mymemfind
+
+ strstr replacement for arbitrary blocks of memory.
+
+ Locates the first occurance in the memory pointed to by MEM of the
+ contents of memory pointed to by PAT. Returns the index into MEM if
+ found, or -1 if not found. If len of PAT is greater than length of
+ MEM, the function returns -1.
+*/
+static int
+mymemfind(mem, len, pat, pat_len)
+ char *mem;
+ int len;
+ char *pat;
+ int pat_len;
+{
+ register int ii;
+
+ /* pattern can not occur in the last pat_len-1 chars */
+ len -= pat_len;
+
+ for (ii = 0; ii <= len; ii++) {
+ if (mem[ii] == pat[0] &&
+ (pat_len == 1 ||
+ memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
+ return ii;
+ }
+ }
+ return -1;
+}
+
+/*
+ mymemcnt
+
+ Return the number of distinct times PAT is found in MEM.
+ meaning mem=1111 and pat==11 returns 2.
+ mem=11111 and pat==11 also return 2.
+ */
+static int
+mymemcnt(mem, len, pat, pat_len)
+ char *mem;
+ int len;
+ char *pat;
+ int pat_len;
+{
+ register int offset = 0;
+ int nfound = 0;
+
+ while (len >= 0) {
+ offset = mymemfind(mem, len, pat, pat_len);
+ if (offset == -1)
+ break;
+ mem += offset + pat_len;
+ len -= offset + pat_len;
+ nfound++;
+ }
+ return nfound;
+}
+
+/*
+ mymemreplace
+
+ Return a string in which all occurences of PAT in memory STR are
+ replaced with SUB.
+
+ If length of PAT is less than length of STR or there are no occurences
+ of PAT in STR, then the original string is returned. Otherwise, a new
+ string is allocated here and returned.
+
+ on return, out_len is:
+ the length of output string, or
+ -1 if the input string is returned, or
+ unchanged if an error occurs (no memory).
+
+ return value is:
+ the new string allocated locally, or
+ NULL if an error occurred.
+*/
+static char *
+mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
+ char *str;
+ int len; /* input string */
+ char *pat;
+ int pat_len; /* pattern string to find */
+ char *sub;
+ int sub_len; /* substitution string */
+ int count; /* number of replacements, 0 == all */
+ int *out_len;
+
+{
+ char *out_s;
+ char *new_s;
+ int nfound, offset, new_len;
+
+ if (len == 0 || pat_len > len)
+ goto return_same;
+
+ /* find length of output string */
+ nfound = mymemcnt(str, len, pat, pat_len);
+ if (count > 0)
+ nfound = nfound > count ? count : nfound;
+ if (nfound == 0)
+ goto return_same;
+ new_len = len + nfound*(sub_len - pat_len);
+
+ new_s = (char *)malloc(new_len);
+ if (new_s == NULL) return NULL;
+
+ *out_len = new_len;
+ out_s = new_s;
+
+ while (len > 0) {
+ /* find index of next instance of pattern */
+ offset = mymemfind(str, len, pat, pat_len);
+ /* if not found, break out of loop */
+ if (offset == -1) break;
+
+ /* copy non matching part of input string */
+ memcpy(new_s, str, offset); /* copy part of str before pat */
+ str += offset + pat_len; /* move str past pattern */
+ len -= offset + pat_len; /* reduce length of str remaining */
+
+ /* copy substitute into the output string */
+ new_s += offset; /* move new_s to dest for sub string */
+ memcpy(new_s, sub, sub_len); /* copy substring into new_s */
+ new_s += sub_len; /* offset new_s past sub string */
+
+ /* break when we've done count replacements */
+ if (--count == 0) break;
+ }
+ /* copy any remaining values into output string */
+ if (len > 0)
+ memcpy(new_s, str, len);
+ return out_s;
+
+ return_same:
+ *out_len = -1;
+ return str;
+}
+
+
+static char replace__doc__[] =
+"S.replace (old, new[, maxsplit]) -> string\n\
+\n\
+Return a copy of string S with all occurrences of substring\n\
+old replaced by new. If the optional argument maxsplit is\n\
+given, only the first maxsplit occurrences are replaced.";
+
+static PyObject *
+string_replace(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char *str = PyString_AS_STRING(self), *pat,*sub,*new_s;
+ int len = PyString_GET_SIZE(self), pat_len,sub_len,out_len;
+ int count = 0;
+ PyObject *new;
+
+ if (!PyArg_ParseTuple(args, "t#t#|i",
+ &pat, &pat_len, &sub, &sub_len, &count))
+ return NULL;
+ if (pat_len <= 0) {
+ PyErr_SetString(PyExc_ValueError, "empty pattern string");
+ return NULL;
+ }
+ new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
+ if (new_s == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ if (out_len == -1) {
+ /* we're returning another reference to self */
+ new = (PyObject*)self;
+ Py_INCREF(new);
+ }
+ else {
+ new = PyString_FromStringAndSize(new_s, out_len);
+ free(new_s);
+ }
+ return new;
+}
+
+
+static char startswith__doc__[] =
+"S.startswith(prefix[, start[, end]]) -> int\n\
+\n\
+Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
+optional start, test S beginning at that position. With optional end, stop\n\
+comparing S at that position.";
+
+static PyObject *
+string_startswith(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char* str = PyString_AS_STRING(self);
+ int len = PyString_GET_SIZE(self);
+ char* prefix;
+ int plen;
+ int start = 0;
+ int end = -1;
+
+ if (!PyArg_ParseTuple(args, "t#|ii", &prefix, &plen, &start, &end))
+ return NULL;
+
+ /* adopt Java semantics for index out of range. it is legal for
+ * offset to be == plen, but this only returns true if prefix is
+ * the empty string.
+ */
+ if (start < 0 || start+plen > len)
+ return PyInt_FromLong(0);
+
+ if (!memcmp(str+start, prefix, plen)) {
+ /* did the match end after the specified end? */
+ if (end < 0)
+ return PyInt_FromLong(1);
+ else if (end - start < plen)
+ return PyInt_FromLong(0);
+ else
+ return PyInt_FromLong(1);
+ }
+ else return PyInt_FromLong(0);
+}
+
+
+static char endswith__doc__[] =
+"S.endswith(suffix[, start[, end]]) -> int\n\
+\n\
+Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
+optional start, test S beginning at that position. With optional end, stop\n\
+comparing S at that position.";
+
+static PyObject *
+string_endswith(self, args)
+ PyStringObject *self;
+ PyObject *args;
+{
+ char* str = PyString_AS_STRING(self);
+ int len = PyString_GET_SIZE(self);
+ char* suffix;
+ int plen;
+ int start = 0;
+ int end = -1;
+ int lower, upper;
+
+ if (!PyArg_ParseTuple(args, "t#|ii", &suffix, &plen, &start, &end))
+ return NULL;
+
+ if (start < 0 || start > len || plen > len)
+ return PyInt_FromLong(0);
+
+ upper = (end >= 0 && end <= len) ? end : len;
+ lower = (upper - plen) > start ? (upper - plen) : start;
+
+ if (upper-lower >= plen && !memcmp(str+lower, suffix, plen))
+ return PyInt_FromLong(1);
+ else return PyInt_FromLong(0);
+}
+
+
+
+static PyMethodDef
+string_methods[] = {
+ /* counterparts of the obsolete stropmodule functions */
+ {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
+ {"count", (PyCFunction)string_count, 1, count__doc__},
+ {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
+ {"find", (PyCFunction)string_find, 1, find__doc__},
+ {"index", (PyCFunction)string_index, 1, index__doc__},
+ {"join", (PyCFunction)string_join, 1, join__doc__},
+ {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
+ {"lower", (PyCFunction)string_lower, 1, lower__doc__},
+ /* maketrans */
+ {"replace", (PyCFunction)string_replace, 1, replace__doc__},
+ {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
+ {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
+ {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
+ {"split", (PyCFunction)string_split, 1, split__doc__},
+ {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
+ {"strip", (PyCFunction)string_strip, 1, strip__doc__},
+ {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
+ {"translate", (PyCFunction)string_translate, 1, strip__doc__},
+ {"upper", (PyCFunction)string_upper, 1, upper__doc__},
+ /* TBD */
+/* {"ljust" (PyCFunction)string_ljust, 1, ljust__doc__}, */
+/* {"rjust" (PyCFunction)string_rjust, 1, rjust__doc__}, */
+/* {"center" (PyCFunction)string_center, 1, center__doc__}, */
+/* {"zfill" (PyCFunction)string_zfill, 1, zfill__doc__}, */
+/* {"expandtabs" (PyCFunction)string_expandtabs, 1, ljust__doc__}, */
+/* {"capwords" (PyCFunction)string_capwords, 1, capwords__doc__}, */
+ {NULL, NULL} /* sentinel */
+};
+
+static PyObject *
+string_getattr(s, name)
+ PyStringObject *s;
+ char *name;
+{
+ return Py_FindMethod(string_methods, (PyObject*)s, name);
+}
+
+
PyTypeObject PyString_Type = {
PyObject_HEAD_INIT(&PyType_Type)
0,
@@ -533,7 +1576,7 @@ PyTypeObject PyString_Type = {
sizeof(char),
(destructor)string_dealloc, /*tp_dealloc*/
(printfunc)string_print, /*tp_print*/
- 0, /*tp_getattr*/
+ (getattrfunc)string_getattr, /*tp_getattr*/
0, /*tp_setattr*/
(cmpfunc)string_compare, /*tp_compare*/
(reprfunc)string_repr, /*tp_repr*/
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c
index 9bb8784b0fd..c220d84100d 100644
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -812,24 +812,6 @@ globals and locals. If only globals is given, locals defaults to it.";
static PyObject *
-builtin_float(self, args)
- PyObject *self;
- PyObject *args;
-{
- PyObject *v;
-
- if (!PyArg_ParseTuple(args, "O:float", &v))
- return NULL;
- return PyNumber_Float(v);
-}
-
-static char float_doc[] =
-"float(x) -> floating point number\n\
-\n\
-Convert a string or number to a floating point number, if possible.";
-
-
-static PyObject *
builtin_getattr(self, args)
PyObject *self;
PyObject *args;
@@ -1251,17 +1233,79 @@ builtin_int(self, args)
PyObject *args;
{
PyObject *v;
+ int base = -909; /* unlikely! */
- if (!PyArg_ParseTuple(args, "O:int", &v))
+ if (!PyArg_ParseTuple(args, "O|i:int", &v, &base))
return NULL;
- return PyNumber_Int(v);
+ if (base == -909)
+ return PyNumber_Int(v);
+ else if (!PyString_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't convert non-string with explicit base");
+ return NULL;
+ }
+ return PyInt_FromString(PyString_AS_STRING(v), NULL, base);
}
static char int_doc[] =
-"int(x) -> integer\n\
+"int(x[, base]) -> integer\n\
+\n\
+Convert a string or number to an integer, if possible. A floating point\n\
+argument will be truncated towards zero (this does not include a string\n\
+representation of a floating point number!) When converting a string, use\n\
+the optional base. It is an error to supply a base when converting a\n\
+non-string.";
+
+
+static PyObject *
+builtin_long(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ PyObject *v;
+ int base = -909; /* unlikely! */
+
+ if (!PyArg_ParseTuple(args, "O|i:long", &v, &base))
+ return NULL;
+ if (base == -909)
+ return PyNumber_Long(v);
+ else if (!PyString_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't convert non-string with explicit base");
+ return NULL;
+ }
+ return PyLong_FromString(PyString_AS_STRING(v), NULL, base);
+}
+
+static char long_doc[] =
+"long(x) -> long integer\n\
+long(x, base) -> long integer\n\
+\n\
+Convert a string or number to a long integer, if possible. A floating\n\
+point argument will be truncated towards zero (this does not include a\n\
+string representation of a floating point number!) When converting a\n\
+string, use the given base. It is an error to supply a base when\n\
+converting a non-string.";
+
+
+static PyObject *
+builtin_float(self, args)
+ PyObject *self;
+ PyObject *args;
+{
+ PyObject *v;
+
+ if (!PyArg_ParseTuple(args, "O:float", &v))
+ return NULL;
+ if (PyString_Check(v))
+ return PyFloat_FromString(v, NULL);
+ return PyNumber_Float(v);
+}
+
+static char float_doc[] =
+"float(x) -> floating point number\n\
\n\
-Convert a string or number to an integer, if possible.\n\
-A floating point argument will be truncated towards zero.";
+Convert a string or number to a floating point number, if possible.";
static PyObject *
@@ -1352,25 +1396,6 @@ Return the dictionary containing the current scope's local variables.";
static PyObject *
-builtin_long(self, args)
- PyObject *self;
- PyObject *args;
-{
- PyObject *v;
-
- if (!PyArg_ParseTuple(args, "O:long", &v))
- return NULL;
- return PyNumber_Long(v);
-}
-
-static char long_doc[] =
-"long(x) -> long integer\n\
-\n\
-Convert a string or number to a long integer, if possible.\n\
-A floating point argument will be truncated towards zero.";
-
-
-static PyObject *
min_max(args, sign)
PyObject *args;
int sign;