Source code for k1lib.cli.grep

# AUTOGENERATED FILE! PLEASE DON'T EDIT
__all__ = ["grep", "grepTemplate"]
import re, k1lib
from k1lib.cli.init import BaseCli, Table, Row; import k1lib.cli as cli
from collections import deque; from typing import Iterator, Union, Callable, Any
inf = float("inf")
[docs]class grep(BaseCli):
[docs] def __init__(self, pattern:Union[str, Callable[[Any], bool]], before:int=0, after:int=0, N:int=float("inf"), sep:bool=False, col:int=None): """Find lines that has the specified pattern. Example:: # returns ['d', 'd'] "abcde12d34" | grep("d") | deref() # returns ['c', 'd', '2', 'd'], 2 sections of ['c', 'd'] and ['2', 'd'] "abcde12d34" | grep("d", 1) | deref() # returns ['c', 'd'] "abcde12d34" | grep("d", 1, N=1) | deref() # returns ['d', 'e', 'd', '3', '4'], 2 sections of ['d', 'e'] and ['d', '3', '4'] "abcde12d34" | grep("d", 0, 3).till("e") | deref() # returns [['0', '1', '2'], ['3', '1', '4']] "0123145" | grep("1", 2, 1, sep=True) | deref() You can also separate out the sections:: # returns [['c', 'd'], ['2', 'd']] "abcde12d34" | grep("d", 1, sep=True) | deref() # returns [['c', 'd']] "abcde12d34" | grep("d", 1, N=1, sep=True) | deref() # returns [['1', '2', '3'], ['1', '4', '5']] "0123145" | grep("1", sep=True).till() | deref() You can also put in predicates instead of regex patterns:: # returns ['d', 'd'] "abcde12d34" | grep(lambda x: x == "d") | deref() # also returns ['d', 'd'] "abcde12d34" | filt(lambda x: x == "d") | deref() # returns ['d', 'e', 'd', '3', '4'] "abcde12d34" | grep(lambda x: x == "d").till(lambda x: x == "e") | deref() The first scenario looks like a regular filter function, already implemented by :class:`~k1lib.cli.filt.filt`, but :class:`grep` brings in more clustering features for the price of reduced execution speed. So for simple scenarios it's advised that you use :class:`~k1lib.cli.filt.filt`. See also: :class:`~k1lib.cli.structural.groupBy` Also, there's a `whole tutorial <../tutorials/cli.html>`_ devoted to just this cli :param pattern: regex pattern to search for in a line :param before: lines before the hit. Outputs independent lines :param after: lines after the hit. Outputs independent lines :param N: max sections to output :param sep: whether to separate out the sections as lists :param col: searches for pattern in a specific column""" super().__init__() if isinstance(pattern, str): self._f = re.compile(pattern).search # make func quickly accessible else: self._f = cli.op.solidify(pattern) self.before = before; self.after = after; self.col = col; self.N = N; self.sep = sep self.tillPattern = None; self.tillAfter = None; self._tillF = lambda x: False
[docs] def till(self, pattern:Union[str, Callable[[Any], bool]]=None): """Greps until some other pattern appear. Inclusive, so you might want to trim the last line. Example:: # returns ['5', '6', '7', '8'], includes last item range(10) | join("") | grep("5").till("8") | deref() # returns ['d', 'e', 'd', '3', '4'] "abcde12d34" | grep("d").till("e") | deref() # returns ['d', 'e'] "abcde12d34" | grep("d", N=1).till("e") | deref() If initial pattern and till pattern are the same, then you don't have use this method at all. Instead, do something like this:: # returns ['1', '2', '3'] "0123145" | grep("1", after=1e9, N=1) | deref()""" if pattern is None: self._tillF = self._f elif isinstance(pattern, str): self._tillF = re.compile(pattern).search else: self._tillF = cli.op.solidify(pattern) self.tillAfter = self.after; self.after = inf; return self
[docs] def __ror__(self, it:Iterator[str]) -> Iterator[str]: self.sectionIdx = 0; col = self.col; _f = self._f; _tillF = self._tillF if self.sep: elems = []; idx = 0 s = self._clone(); s.sep = False for line in (it | s): if s.sectionIdx > idx: # outputs whatever remaining if len(elems) > 0: yield list(elems) idx = s.sectionIdx; elems = [] elems.append(line) yield list(elems); return queue = deque([], self.before); counter = 0 # remaining lines after to display cRO = k1lib.RunOnce(); cRO.done() for line in it: if col != None: line = list(line); elem = line[col] else: elem = line if _f(elem): # new section self.sectionIdx += 1; counter = self.after+1; cRO.revert() if self.sectionIdx > self.N: return yield from queue; queue.clear(); yield line elif _tillF(elem) and counter == inf: # closing section counter = self.tillAfter + 1; cRO.revert(); yield line if counter == 0: queue.append(line) # saves recent past lines elif counter > 0: # yielding "after" section if cRO.done(): yield line counter -= 1
def _clone(self): answer = grep(self._f, self.before, self.after, self.N, self.sep, self.col) answer._tillF = self._tillF; answer.tillAfter = self.tillAfter; return answer
[docs]class grepTemplate(BaseCli):
[docs] def __init__(self, pattern:str, template:str): """Searches over all lines, pick out the match, and expands it to the templateand yields""" super().__init__() self.pattern = re.compile(pattern); self.template = template
[docs] def __ror__(self, it:Iterator[str]): super().__ror__(it) for line in it: matchObj = self.pattern.search(line) if matchObj is None: continue yield matchObj.expand(self.template)