From dfac4b4398d291a7d57d83a0bfb27ea380d8b447 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Thu, 1 Oct 2020 19:00:15 +0700 Subject: [PATCH 1/2] Add some type hints Add some type hints for python --- pycrfsuite/_dumpparser.py | 12 ++++++------ pycrfsuite/_logparser.py | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pycrfsuite/_dumpparser.py b/pycrfsuite/_dumpparser.py index 7f490fe..1d4f460 100644 --- a/pycrfsuite/_dumpparser.py +++ b/pycrfsuite/_dumpparser.py @@ -47,7 +47,7 @@ def __init__(self): self.state = None self.result = ParsedDump() - def feed(self, line): + def feed(self, line: str): # Strip initial ws and line terminator, but allow for ws at the end of feature names. line = line.lstrip().rstrip('\r\n') if not line: @@ -61,26 +61,26 @@ def feed(self, line): else: getattr(self, 'parse_%s' % self.state)(line) - def parse_FILEHEADER(self, line): + def parse_FILEHEADER(self, line: str): m = re.match(r"(\w+): (.*)", line) self.result.header[m.group(1)] = m.group(2) - def parse_LABELS(self, line): + def parse_LABELS(self, line: str): m = re.match(r"(\d+): (.*)", line) self.result.labels[m.group(2)] = m.group(1) - def parse_ATTRIBUTES(self, line): + def parse_ATTRIBUTES(self, line: str): m = re.match(r"(\d+): (.*)", line) self.result.attributes[m.group(2)] = m.group(1) - def parse_TRANSITIONS(self, line): + def parse_TRANSITIONS(self, line: str): m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line) from_, to_ = m.group(1), m.group(2) assert from_ in self.result.labels assert to_ in self.result.labels self.result.transitions[(from_, to_)] = float(m.group(3)) - def parse_STATE_FEATURES(self, line): + def parse_STATE_FEATURES(self, line: str): m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line) attr, label = m.group(1), m.group(2) assert attr in self.result.attributes diff --git a/pycrfsuite/_logparser.py b/pycrfsuite/_logparser.py index d41baee..e43c36c 100644 --- a/pycrfsuite/_logparser.py +++ b/pycrfsuite/_logparser.py @@ -22,7 +22,7 @@ def __init__(self): self.log = [] self.events = [] - def feed(self, line): + def feed(self, line: str): # if line != '\n': self.log.append(line) if self.state is None: @@ -43,11 +43,11 @@ def last_log(self): event, start, end = self.events[-1] return ''.join(self.log[start:end]) - def handle_STARTING(self, line): + def handle_STARTING(self, line: str): if line.startswith('Feature generation'): self.state = 'FEATGEN' - def handle_FEATGEN(self, line): + def handle_FEATGEN(self, line: str): if line in "0123456789.10": self.featgen_percent += 2 return 'featgen_progress' @@ -62,7 +62,7 @@ def handle_FEATGEN(self, line): self.state = 'AFTER_FEATGEN' return 'featgen_end' - def handle_AFTER_FEATGEN(self, line): + def handle_AFTER_FEATGEN(self, line: str): if self._iteration_head(line) is not None: self.state = 'ITERATION' self.handle_ITERATION(line) @@ -72,7 +72,7 @@ def handle_AFTER_FEATGEN(self, line): self.state = 'AFTER_ITERATION' return 'prepare_error' - def handle_ITERATION(self, line): + def handle_ITERATION(self, line: str): if self._iteration_head(line) is not None: self.last_iteration = { 'num': self._iteration_head(line), @@ -83,7 +83,7 @@ def handle_ITERATION(self, line): self.state = 'AFTER_ITERATION' return 'iteration' - def add_re(key, pattern, typ): + def add_re(key: str, pattern: str, typ: str): m = re.match(pattern, line) if m: self.last_iteration[key] = typ(m.group(1)) @@ -136,7 +136,7 @@ def add_re(key, pattern, typ): 'f1': None, }) - def handle_AFTER_ITERATION(self, line): + def handle_AFTER_ITERATION(self, line: str): if self._iteration_head(line) is not None: self.state = 'ITERATION' return self.handle_ITERATION(line) @@ -149,18 +149,18 @@ def handle_AFTER_ITERATION(self, line): self.state = 'STORING' return 'optimization_end' - def handle_STORING(self, line): + def handle_STORING(self, line: str): if line == '\n': return 'end' elif self._seconds(line): self.storing_seconds = self._seconds(line) - def _iteration_head(self, line): + def _iteration_head(self, line: str): m = re.match(r'\*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n', line) if m: return int(m.group(1)) - def _seconds(self, line): + def _seconds(self, line: str): m = re.match(r'Seconds required: (\d+\.\d+)', line) if m: return float(m.group(1)) From 07f551b3b3eb836955121aff3e9b658c93eb92d9 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 2 Oct 2020 13:45:59 +0700 Subject: [PATCH 2/2] Change type hints to Type checking --- pycrfsuite/_dumpparser.py | 18 ++++++++++++------ pycrfsuite/_logparser.py | 30 ++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/pycrfsuite/_dumpparser.py b/pycrfsuite/_dumpparser.py index 1d4f460..232e036 100644 --- a/pycrfsuite/_dumpparser.py +++ b/pycrfsuite/_dumpparser.py @@ -47,7 +47,8 @@ def __init__(self): self.state = None self.result = ParsedDump() - def feed(self, line: str): + def feed(self, line): + # type: (str) -> None # Strip initial ws and line terminator, but allow for ws at the end of feature names. line = line.lstrip().rstrip('\r\n') if not line: @@ -61,26 +62,31 @@ def feed(self, line: str): else: getattr(self, 'parse_%s' % self.state)(line) - def parse_FILEHEADER(self, line: str): + def parse_FILEHEADER(self, line): + # type: (str) -> None m = re.match(r"(\w+): (.*)", line) self.result.header[m.group(1)] = m.group(2) - def parse_LABELS(self, line: str): + def parse_LABELS(self, line): + # type: (str) -> None m = re.match(r"(\d+): (.*)", line) self.result.labels[m.group(2)] = m.group(1) - def parse_ATTRIBUTES(self, line: str): + def parse_ATTRIBUTES(self, line): + # type: (str) -> None m = re.match(r"(\d+): (.*)", line) self.result.attributes[m.group(2)] = m.group(1) - def parse_TRANSITIONS(self, line: str): + def parse_TRANSITIONS(self, line): + # type: (str) -> None m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line) from_, to_ = m.group(1), m.group(2) assert from_ in self.result.labels assert to_ in self.result.labels self.result.transitions[(from_, to_)] = float(m.group(3)) - def parse_STATE_FEATURES(self, line: str): + def parse_STATE_FEATURES(self, line): + # type: (str) -> None m = re.match(r"\(\d+\) (.+) --> (.+): ([+-]?\d+\.\d+)", line) attr, label = m.group(1), m.group(2) assert attr in self.result.attributes diff --git a/pycrfsuite/_logparser.py b/pycrfsuite/_logparser.py index e43c36c..7d05cb5 100644 --- a/pycrfsuite/_logparser.py +++ b/pycrfsuite/_logparser.py @@ -22,7 +22,8 @@ def __init__(self): self.log = [] self.events = [] - def feed(self, line: str): + def feed(self, line): + # type: (str) -> None # if line != '\n': self.log.append(line) if self.state is None: @@ -43,11 +44,13 @@ def last_log(self): event, start, end = self.events[-1] return ''.join(self.log[start:end]) - def handle_STARTING(self, line: str): + def handle_STARTING(self, line): + # type: (str) -> None if line.startswith('Feature generation'): self.state = 'FEATGEN' - def handle_FEATGEN(self, line: str): + def handle_FEATGEN(self, line): + # type: (str) -> str if line in "0123456789.10": self.featgen_percent += 2 return 'featgen_progress' @@ -62,7 +65,8 @@ def handle_FEATGEN(self, line: str): self.state = 'AFTER_FEATGEN' return 'featgen_end' - def handle_AFTER_FEATGEN(self, line: str): + def handle_AFTER_FEATGEN(self, line): + # type: (str) -> str if self._iteration_head(line) is not None: self.state = 'ITERATION' self.handle_ITERATION(line) @@ -72,7 +76,8 @@ def handle_AFTER_FEATGEN(self, line: str): self.state = 'AFTER_ITERATION' return 'prepare_error' - def handle_ITERATION(self, line: str): + def handle_ITERATION(self, line): + # type: (str) -> None if self._iteration_head(line) is not None: self.last_iteration = { 'num': self._iteration_head(line), @@ -83,7 +88,8 @@ def handle_ITERATION(self, line: str): self.state = 'AFTER_ITERATION' return 'iteration' - def add_re(key: str, pattern: str, typ: str): + def add_re(key, pattern, typ): + # type: (str,str,str) -> None m = re.match(pattern, line) if m: self.last_iteration[key] = typ(m.group(1)) @@ -136,7 +142,8 @@ def add_re(key: str, pattern: str, typ: str): 'f1': None, }) - def handle_AFTER_ITERATION(self, line: str): + def handle_AFTER_ITERATION(self, line): + # type: (str) -> None if self._iteration_head(line) is not None: self.state = 'ITERATION' return self.handle_ITERATION(line) @@ -149,18 +156,21 @@ def handle_AFTER_ITERATION(self, line: str): self.state = 'STORING' return 'optimization_end' - def handle_STORING(self, line: str): + def handle_STORING(self, line): + # type: (str) -> None if line == '\n': return 'end' elif self._seconds(line): self.storing_seconds = self._seconds(line) - def _iteration_head(self, line: str): + def _iteration_head(self, line): + # type: (str) -> None m = re.match(r'\*{5} (?:Iteration|Epoch) #(\d+) \*{5}\n', line) if m: return int(m.group(1)) - def _seconds(self, line: str): + def _seconds(self, line): + # type: (str) -> float m = re.match(r'Seconds required: (\d+\.\d+)', line) if m: return float(m.group(1))