The need to parse some strings and convert them to int or float numbers is more than frequent. The first idea that comes to mind is to simply try to convert them:
In [1]:
from __future__ import print_function # ensure Python3 compatibility
def str2digit(s):
""" returns an adequate number if possible, else returns ``None`` """
try:
return int(s)
except:
try:
return float(s)
except:
return None
tests = {'+1.15': 1.15,
'-0.73': -0.73,
'-5E-3': -0.005,
'-5E3': -5000.0,
'.2': 0.2,
'05': 5,
'7': 7,
'7m': None,
'm7': None,
'A': None}
for test, result in tests.iteritems():
assert(str2digit(test)==result)
Ok. This works great! Let’s try now with regular expressions. The tricky part is to pass all the above tests.
In [2]:
import re
regex = re.compile(r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?(\s+|$)')
for test, result in tests.iteritems():
m = regex.match(test)
res = None if not m else m.group(0)
assert str2digit(res)==result
Perfect! Waht if we need to deal with numbers and units, like “-1.3mm/s^2”?
float numbers with units¶
In [3]:
regex = re.compile(r"""^(?P<magnitude>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)\s* # magnitude
(?P<unit>(\w+ # mm
(\^[+-]*\d*)* # ^2
([*/]\w+(\^[+-]*\d*)*)* # /sec^3
)*
)""", re.X)
tests = {'+1.15': (1.15, ''),
'-0.73mm/s^2*K': (-0.73, 'mm/s^2*K'),
'-5E-3ft': (-0.005, 'ft'),
'-5.9E3in^+2': (-5900.0, 'in^+2'),
'-5.9E3in^-2': (-5900.0, 'in^-2'),
'-5E3in^2': (-5000.0, 'in^2'),
'.2m': (0.2, 'm'),
'05kg': (5, 'kg'),
'7mm': (7, 'mm'),
'-7m': (-7, 'm'),
'-m7': (None, None)}
for test, (expmag, expunit) in tests.iteritems():
try:
magnitude, unit = regex.search(test).group('magnitude'), regex.search(test).group('unit')
except AttributeError:
magnitude, unit = None, None
assert str2digit(magnitude)==expmag
assert unit==expunit