Python regex, numbers and units

The need to parse some strings and convert them to int or float numbers is more than frequent. The first idea that comes to mind is to simply try to convert them:

In [1]:
from __future__ import print_function # ensure Python3 compatibility

def str2digit(s):
    """ returns an adequate number if possible, else returns ``None`` """
    try:
        return int(s)
    except:
        try:
            return float(s)
        except:
            return None
        
tests = {'+1.15': 1.15,
         '-0.73': -0.73,
         '-5E-3': -0.005,
         '-5E3': -5000.0,
         '.2': 0.2,
         '05': 5,
         '7': 7,
         '7m': None,
         'm7': None,
         'A': None}

for test, result in tests.iteritems():    
    assert(str2digit(test)==result) 

Ok. This works great! Let’s try now with regular expressions. The tricky part is to pass all the above tests.

In [2]:
import re
regex = re.compile(r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?(\s+|$)')


for test, result in tests.iteritems():    
    m = regex.match(test)
    res = None if not m else m.group(0)
    assert str2digit(res)==result

Perfect! Waht if we need to deal with numbers and units, like “-1.3mm/s^2”?

float numbers with units

In [3]:
regex = re.compile(r"""^(?P<magnitude>[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)\s*  # magnitude
                        (?P<unit>(\w+                                              # mm
                                 (\^[+-]*\d*)*                                     # ^2 
                                 ([*/]\w+(\^[+-]*\d*)*)*                           # /sec^3
                                 )*
                    )""", re.X)

tests = {'+1.15': (1.15, ''),
         '-0.73mm/s^2*K': (-0.73, 'mm/s^2*K'),
         '-5E-3ft': (-0.005, 'ft'),
         '-5.9E3in^+2': (-5900.0, 'in^+2'),
         '-5.9E3in^-2': (-5900.0, 'in^-2'),
         '-5E3in^2': (-5000.0, 'in^2'),
         '.2m': (0.2, 'm'),
         '05kg': (5, 'kg'),
         '7mm': (7, 'mm'),
         '-7m': (-7, 'm'),
         '-m7': (None, None)}

for test, (expmag, expunit) in tests.iteritems():  
    try:
        magnitude, unit = regex.search(test).group('magnitude'), regex.search(test).group('unit')
    except AttributeError:
        magnitude, unit = None, None
    assert str2digit(magnitude)==expmag
    assert unit==expunit

Tags cloud

Links and friends

social