# AUTOGENERATED FILE! PLEASE DON'T EDIT
__all__ = ["grep", "grepTemplate"]
import re, k1lib
from k1lib.cli.init import BaseCli, Table, Row; import k1lib.cli as cli
from collections import deque; from typing import Iterator
inf = float("inf")
[docs]class grep(BaseCli):
[docs] def __init__(self, pattern:str, before:int=0, after:int=0, N:int=float("inf"), sep:bool=False):
"""Find lines that has the specified pattern.
Example::
# returns ['d', 'd']
"abcde12d34" | grep("d") | deref()
# returns ['c', 'd', '2', 'd'], 2 sections of ['c', 'd'] and ['2', 'd']
"abcde12d34" | grep("d", 1) | deref()
# returns ['c', 'd']
"abcde12d34" | grep("d", 1, N=1) | deref()
# returns ['d', 'e', 'd', '3', '4'], 2 sections of ['d', 'e'] and ['d', '3', '4']
"abcde12d34" | grep("d", 0, 3).till("e") | deref()
# returns [['0', '1', '2'], ['3', '1', '4']]
"0123145" | grep("1", 2, 1, sep=True) | deref()
You can also separate out the sections::
# returns [['c', 'd'], ['2', 'd']]
"abcde12d34" | grep("d", 1, sep=True) | deref()
# returns [['c', 'd']]
"abcde12d34" | grep("d", 1, N=1, sep=True) | deref()
# returns [['1', '2', '3'], ['1', '4', '5']]
"0123145" | grep("1", sep=True).till() | deref()
:param pattern: regex pattern to search for in a line
:param before: lines before the hit. Outputs independent lines
:param after: lines after the hit. Outputs independent lines
:param N: max sections to output
:param sep: whether to separate out the sections as lists"""
super().__init__()
self.pattern = re.compile(pattern)
self.before = before; self.after = after
self.N = N; self.sep = sep; self.tillPattern = None
[docs] def till(self, pattern:str=None):
"""Greps until some other pattern appear. Inclusive, so you might want to
trim the last line. Example::
# returns ['5', '6', '7', '8'], includes last item
range(10) | join("") | grep("5").till("8") | deref()
# returns ['d', 'e', 'd', '3', '4']
"abcde12d34" | grep("d").till("e") | deref()
# returns ['d', 'e']
"abcde12d34" | grep("d", N=1).till("e") | deref()
If initial pattern and till pattern are the same, then you don't have use this method at
all. Instead, do something like this::
# returns ['1', '2', '3']
"0123145" | grep("1", after=1e9, N=1) | deref()"""
if pattern == self.pattern.pattern: pattern = None
# "\ue000" is in unicode's private use area, so extremely unlikely that we
# will actually run into it in normal text processing, because it's not text
self.tillPattern = re.compile(pattern or "\ue000")
self.tillAfter = self.after; self.after = inf; return self
[docs] def __ror__(self, it:Iterator[str]) -> Iterator[str]:
self.sectionIdx = 0; tillPattern = self.tillPattern
if self.sep:
self.sep = False; elems = []; idx = 0
for line in (it | self):
if self.sectionIdx > idx: # outputs whatever remaining
if len(elems) > 0: yield list(elems)
idx = self.sectionIdx; elems = []
elems.append(line)
yield list(elems); return
queue = deque([], self.before); counter = 0 # remaining lines after to display
cRO = k1lib.RunOnce(); cRO.done()
for line in it:
if self.pattern.search(line): # new section
self.sectionIdx += 1; counter = self.after+1; cRO.revert()
if self.sectionIdx > self.N: return
yield from queue; queue.clear(); yield line
elif tillPattern is not None and tillPattern.search(line) and counter == inf: # closing section
counter = self.tillAfter + 1; cRO.revert(); yield line
if counter == 0:
queue.append(line) # saves recent past lines
elif counter > 0: # yielding "after" section
if cRO.done(): yield line
counter -= 1
[docs]class grepTemplate(BaseCli):
[docs] def __init__(self, pattern:str, template:str):
"""Searches over all lines, pick out the match, and expands
it to the templateand yields"""
super().__init__()
self.pattern = re.compile(pattern); self.template = template
[docs] def __ror__(self, it:Iterator[str]):
super().__ror__(it)
for line in it:
matchObj = self.pattern.search(line)
if matchObj is None: continue
yield matchObj.expand(self.template)