Source code for Exscript.util.match

#
# Copyright (C) 2010-2017 Samuel Abels
# The MIT License (MIT)
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
Shorthands for regular expression matching.
"""
from __future__ import print_function, absolute_import
import re
from ..protocols import Protocol


def _first_match(string, compiled):
    match = compiled.search(string)
    if match is None and compiled.groups <= 1:
        return None
    elif match is None:
        return (None,) * compiled.groups
    elif compiled.groups == 0:
        return string
    elif compiled.groups == 1:
        return match.groups()[0]
    else:
        return match.groups()


[docs]def first_match(string, regex, flags=re.M): """ Matches the given string against the given regex. - If no match is found and the regular expression has zero or one groups, this function returns None. - If no match is found and the regular expression has more than one group, this function returns a tuple of None. The number of elements in the tuple equals the number of groups in the regular expression. - If a match is found and the regular expression has no groups, the entire string is returned. - If a match is found and the regular expression has one group, the matching string from the group is returned. - If a match is found and the regular expression has multiple groups, a tuple containing the matching strings from the groups is returned. This behavior ensures that the following assignments can never fail:: foo = 'my test' match = first_match(foo, r'aaa') # Returns None match = first_match(foo, r'\S+') # Returns 'my test' match = first_match(foo, r'(aaa)') # Returns None match = first_match(foo, r'(\S+)') # Returns 'my' match = first_match(foo, r'(aaa) (\S+)') # Returns (None, None) match = first_match(foo, r'(\S+) (\S+)') # Returns ('my', 'foo') :type string: string|Exscript.protocols.Protocol :param string: The string that is matched, or a Protocol object. :type regex: string :param regex: A regular expression. :type flags: int :param flags: The flags for compiling the regex; e.g. re.I :rtype: string|tuple :return: A match, or a tuple of matches. """ if isinstance(string, Protocol): string = string.response return _first_match(string, re.compile(regex, flags))
[docs]def any_match(string, regex, flags=re.M): """ Matches the given string against the given regex. - If no match is found, this function returns an empty list. - If a match is found and the regular expression has no groups, a list of matching lines returned. - If a match is found and the regular expression has one group, a list of matching strings is returned. - If a match is found and the regular expression has multiple groups, a list containing tuples of matching strings is returned. This behavior ensures that the following can never fail:: foo = '1 uno\\n2 due' for m in any_match(foo, r'aaa'): # Returns [] print(m) for m in any_match(foo, r'\S+'): # Returns ['1 uno', '2 due'] print(m) for m in any_match(foo, r'(aaa)'): # Returns [] print(m) for m in any_match(foo, r'(\S+)'): # Returns ['1', '2'] print(m) for one, two in any_match(foo, r'(aaa) (\S+)'): # Returns [] print(m) for one, two in any_match(foo, r'(\S+) (\S+)'): # Returns [('1', 'uno'), ('2', 'due')] print(m) :type string: string|Exscript.protocols.Protocol :param string: The string that is matched, or a Protocol object. :type regex: string :param regex: A regular expression. :type flags: int :param flags: The flags for compiling the regex; e.g. re.I :rtype: list[string|tuple] :return: A list of strings, or a list of tuples. """ if isinstance(string, Protocol): string = string.response compiled = re.compile(regex, flags) results = [] if compiled.groups <= 1: for line in string.split('\n'): match = _first_match(line, compiled) if match is None: continue results.append(match) else: for line in string.split('\n'): match = _first_match(line, compiled) if match[0] is None: continue results.append(match) return results