Python regex, numbers and units

The need to parse some strings and convert them to int or float numbers is more than frequent. The first idea that comes to mind is to simply try to convert them:

In [1]:
from __future__ import print_function # ensure Python3 compatibility

def str2digit(s):
""" returns an adequate number if possible, else returns None """
try:
return int(s)
except:
try:
return float(s)
except:
return None

tests = {'+1.15': 1.15,
'-0.73': -0.73,
'-5E-3': -0.005,
'-5E3': -5000.0,
'.2': 0.2,
'05': 5,
'7': 7,
'7m': None,
'm7': None,
'A': None}

for test, result in tests.iteritems():
assert(str2digit(test)==result)


Ok. This works great! Let’s try now with regular expressions. The tricky part is to pass all the above tests.

In [2]:
import re
regex = re.compile(r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?(\s+|\$)')

for test, result in tests.iteritems():
m = regex.match(test)
res = None if not m else m.group(0)
assert str2digit(res)==result


Perfect! Waht if we need to deal with numbers and units, like “-1.3mm/s^2”?

float numbers with units¶

In [3]:
regex = re.compile(r"""^(?P<magnitude>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)\s*  # magnitude
(?P<unit>(\w+                                              # mm
(\^[+-]*\d*)*                                     # ^2
([*/]\w+(\^[+-]*\d*)*)*                           # /sec^3
)*
)""", re.X)

tests = {'+1.15': (1.15, ''),
'-0.73mm/s^2*K': (-0.73, 'mm/s^2*K'),
'-5E-3ft': (-0.005, 'ft'),
'-5.9E3in^+2': (-5900.0, 'in^+2'),
'-5.9E3in^-2': (-5900.0, 'in^-2'),
'-5E3in^2': (-5000.0, 'in^2'),
'.2m': (0.2, 'm'),
'05kg': (5, 'kg'),
'7mm': (7, 'mm'),
'-7m': (-7, 'm'),
'-m7': (None, None)}

for test, (expmag, expunit) in tests.iteritems():
try:
magnitude, unit = regex.search(test).group('magnitude'), regex.search(test).group('unit')
except AttributeError:
magnitude, unit = None, None
assert str2digit(magnitude)==expmag
assert unit==expunit