Source code for k1lib.bioinfo.cli.utils

# AUTOGENERATED FILE! PLEASE DON'T EDIT
"""
This is for all short utilities that has the boilerplate feeling
"""
from k1lib.bioinfo.cli.init import patchDefaultDelim, BaseCli, settings, Table
import k1lib.bioinfo.cli as cli, numbers
from typing import overload, Iterator, Any, List, Set, Union
__all__ = ["size", "shape", "item", "identity",
           "toStr", "to1Str", "toNumpy",
           "toList", "wrapList", "toSet", "toIter", "toRange",
           "equals", "reverse", "ignore",
           "toSum", "toAvg", "headerIdx", "dereference"]
[docs]class size(BaseCli):
[docs] def __init__(self, idx=None): """Returns number of rows and columns in the input. :param idx: if idx is None return (rows, columns). If 0 or 1, then rows or columns""" self.idx = idx
[docs] def __ror__(self, it:Iterator[str]): if self.idx == 0: # get rows only rows = 0 for line in it: rows += 1 return rows if self.idx == 1: # get #columns only return len(next(it)) columns = -1; rows = 0 for row in it: if columns == -1: try: columns = len(list(row)) except AttributeError: columns = None rows += 1 if columns == -1: columns = None return rows, columns
shape = size
[docs]class item(BaseCli): """Returns the first row"""
[docs] def __ror__(self, it:Iterator[str]): return next(iter(it))
[docs]class identity(BaseCli): """Yields whatever the input is. Useful for multiple streams"""
[docs] def __ror__(self, it:Iterator[Any]): return it
[docs]class toStr(BaseCli):
[docs] def __init__(self): """Converts every line (possibly just a number) to a string."""
[docs] def __ror__(self, it:Iterator[str]): for line in it: yield str(line)
[docs]class to1Str(BaseCli):
[docs] def __init__(self, delim:str=None): """Merges all strings into 1, with `delim` in the middle""" self.delim = patchDefaultDelim(delim)
[docs] def __ror__(self, it:Iterator[str]): yield self.delim.join(it | toStr())
[docs]class toNumpy(BaseCli): """Converts generator to numpy array"""
[docs] def __ror__(self, it:Iterator[float]): import numpy as np return np.array(list(it))
[docs]class toList(BaseCli): """Converts generator to list. :class:`list` would do the same, but this is just to maintain the style"""
[docs] def __ror__(self, it:Iterator[Any]) -> List[Any]: return list(it)
[docs]class wrapList(BaseCli): """Wraps inputs inside a list"""
[docs] def __ror__(self, it:Any) -> List[Any]: return [it]
[docs]class toSet(BaseCli): """Converts generator to set. :class:`set` would do the same, but this is just to maintain the style"""
[docs] def __ror__(self, it:Iterator[Any]) -> Set[Any]: return set(it)
[docs]class toIter(BaseCli): """Converts object to iterator. `iter()` would do the same, but this is just to maintain the style"""
[docs] def __ror__(self, it) -> Iterator[Any]: return iter(it)
[docs]class toRange(BaseCli): """Returns iter(range(len(it))), effectively"""
[docs] def __ror__(self, it:Iterator[Any]) -> Iterator[int]: for i, _ in enumerate(it): yield i
class _EarlyExp(Exception): pass
[docs]class equals: """Checks if all incoming columns/streams are identical"""
[docs] def __ror__(self, streams:Iterator[Iterator[str]]): streams = list(streams) for row in zip(*streams): sampleElem = row[0] try: for elem in row: if sampleElem != elem: yield False; raise _EarlyExp() yield True except _EarlyExp: pass
[docs]class reverse(BaseCli): """Prints last line first, first line last"""
[docs] def __ror__(self, it:Iterator[str]) -> List[str]: return reversed(list(it))
[docs]class ignore(BaseCli): """Just executes everything, ignoring the output"""
[docs] def __ror__(self, it:Iterator[Any]): for _ in it: pass
[docs]class toSum(BaseCli): """Calculates the sum of list of numbers"""
[docs] def __ror__(self, it:Iterator[float]): s = 0 for v in it: s += v return s
[docs]class toAvg(BaseCli): """Calculates average of list of numbers"""
[docs] def __ror__(self, it:Iterator[float]): s = 0; i = -1 for i, v in enumerate(it): s += v i += 1 if not settings["strict"] and i == 0: return float("nan") return s / i
[docs]def headerIdx(): """Cuts out first line, put an index column next to it, and prints it out. Useful when you want to know what your column's index is to cut it out. Example:: # returns [[0, 'a'], [1, 'b'], [2, 'c']] ["abc"] | headerIdx() | dereference()""" return item() | wrapList() | cli.transpose() | cli.insertIdColumn(True)
[docs]class dereference(BaseCli): """Recursively converts any iterator into a list. Only :class:`str`, :class:`numbers.Number` are not converted. Example: .. code-block:: iter(range(5)) # returns something like "<range_iterator at 0x7fa8c52ca870>" iter(range(5)) | deference() # returns [0, 1, 2, 3, 4] """
[docs] def __ror__(self, it:Iterator[Any]) -> List[Any]: return [(e if isinstance(e, (numbers.Number, str)) else (e | self)) for e in it]