Source code for k1lib.bioinfo.cli.filt

# AUTOGENERATED FILE! PLEASE DON'T EDIT
"""
This is for functions that cuts out specific parts of the table
"""
from typing import Callable, Union, List, overload, Iterator, Any, Set
from k1lib.bioinfo.cli.init import patchDefaultDelim, BaseCli, settings
import k1lib.bioinfo.cli as cli
__all__ = ["filt", "isValue", "inside", "nonEmptyStream",
           "startswith", "endswith",
           "isNumeric", "inRange",
           "head", "nhead",
           "columns", "cut", "rows", "every", "intersection"]
[docs]class filt(BaseCli):
[docs] def __init__(self, predicate:Callable[[str], bool], column:int=0, delim:str=None): """Filters out lines. :param column: - if integer, then predicate(line.split(delim)[column]) - if None, then predicate(line)""" self.predicate = predicate; self.column = column self.delim = patchDefaultDelim(delim)
[docs] def __ror__(self, it:Iterator[str]): if self.column is not None: for line in it: elems = line.split(self.delim) if len(elems) <= self.column: continue if self.predicate(elems[self.column]): yield line else: for line in it: if self.predicate(line): yield line
[docs] def __invert__(self): """Negate the condition""" return filt(lambda s: not self.predicate(s), self.column, self.delim)
[docs]class isValue(filt):
[docs] def __init__(self, value, column:int=0, delim:str=None): """Filters out lines' column that is different from the given value""" super().__init__(lambda l: l == value, column, delim)
[docs]class inside(filt):
[docs] def __init__(self, values:Set[Any], column:int=0, delim:str=None): """Filters out lines' column that is not in the specified set""" super().__init__(lambda l: l in values, column, delim)
[docs]class nonEmptyStream(BaseCli): """Filters out streams that have no rows"""
[docs] def __ror__(self, streams:Iterator[Iterator[Any]]) -> Iterator[Iterator[Any]]: for stream in streams: try: it = iter(stream) firstValue = next(it) def newGen(): yield firstValue; yield from it yield newGen() except StopIteration: pass
[docs]class startswith(filt):
[docs] def __init__(self, s:str, column:int=0, delim:str=None): """Filters out lines' column that don't start with `s`""" super().__init__(lambda l: l.startswith(s), column, delim)
[docs]class endswith(filt): def __init__(self, s:str, column:int=0, delim:str=None): super().__init__(lambda l: l.endswith(s), column, delim)
[docs]class isNumeric(BaseCli):
[docs] def __init__(self, column:int=None, delim:str=None): """Filters out a line if that column is not a number""" self.column = column; self.delim = patchDefaultDelim(delim)
[docs] def __ror__(self, it:Iterator[str]): if self.column is not None: for line in it: try: float(line.split(self.delim)[self.column]); yield line except ValueError: pass else: for line in it: try: float(line); yield line except ValueError: pass
[docs]class inRange(BaseCli):
[docs] def __init__(self, min:float=None, max:float=None, column:int=None, delim:str=None): """Checks whether a column is in range or not""" self.min = min if min is not None else float("-inf") self.max = max if max is not None else float("inf") self.column = column; self.delim = patchDefaultDelim(delim)
[docs] def __ror__(self, it:Iterator[str]): if self.column is not None: for line in it: value = float(line.split(self.delim)[self.column]) if value >= self.min and value < self.max: yield line else: if not settings["strict"]: it = it | cli.numeric() for value in it: if value >= self.min and value < self.max: yield value
[docs]class nhead(BaseCli):
[docs] def __init__(self, n:int=1): """Only outputs after first {n} lines, preferable over row()[n:]""" self.n = n
[docs] def __ror__(self, it:Iterator[str]): for i, line in enumerate(it): if i < self.n: continue yield line
[docs]class columns(BaseCli):
[docs] def __init__(self, *columns:Union[int, slice, List[int]], delim:str=None): """Cuts out specific columns, separated by `delim`""" if len(columns) == 1 and isinstance(columns[0], (list, tuple, slice)): columns = columns[0] self.columns = columns; self.delim = patchDefaultDelim(delim)
[docs] def __ror__(self, it:Iterator[str]): columns = self.columns if isinstance(columns, int): columns = set([columns]) if isinstance(columns, list): columns = set(columns) for i, line in enumerate(it): if i == 0 and isinstance(columns, slice): columns = set(range(len(line.split(self.delim)))[columns]) yield self.delim.join(elem for i, elem in enumerate(line.split(self.delim)) if i in columns)
def __getitem__(self, idx): return cut(idx, delim=self.delim)
cut = columns
[docs]class rows(BaseCli):
[docs] def __init__(self, *rows): """Cuts out specific rows. Can do `rows()[5:10]` to get rows 5 to 10""" if len(rows) == 1 and isinstance(rows[0], (list, tuple)): rows = rows[0] self.rows = rows
def __getitem__(self, _slice): answer = rows(); answer.rows = _slice; return answer
[docs] def __ror__(self, it:Iterator[str]): l = list(it) if isinstance(self.rows, slice): self.rows = range(len(l))[self.rows] for row in self.rows: yield l[row]
[docs]class every(BaseCli):
[docs] def __init__(self, length:int, offset:int=0): """Get lines every `length`, starting at a specific `offset`""" self.length = length; self.offset = offset
[docs] def __ror__(self, it:Iterator[str]): for i, line in enumerate(it): if (i - self.offset) % self.length == 0: yield line
[docs]class intersection(BaseCli): """Returns the intersection of multiple streams. Example:: [[1, 2, 3, 4, 5], [7, 2, 4, 6, 5]] | intersection() # will return set([2, 4, 5]) """
[docs] def __ror__(self, its:Iterator[Iterator[Any]]) -> Set[Any]: answer = None for it in its: if answer is None: answer = set(it); continue answer = answer.intersection(it) return answer