# AUTOGENERATED FILE! PLEASE DON'T EDIT
"""
This is for all short utilities that has the boilerplate feeling
"""
from k1lib.bioinfo.cli.init import patchDefaultDelim, BaseCli, settings, Table
import k1lib.bioinfo.cli as cli, numbers, torch, numpy as np
from typing import overload, Iterator, Any, List, Set, Union
__all__ = ["size", "shape", "item", "identity",
"toStr", "to1Str", "toNumpy", "toTensor",
"toList", "wrapList", "toSet", "toIter", "toRange",
"equals", "reverse", "ignore",
"toSum", "toAvg", "toMax", "toMin",
"lengths", "headerIdx", "dereference"]
[docs]class size(BaseCli):
[docs] def __init__(self, idx=None):
"""Returns number of rows and columns in the input.
:param idx: if idx is None return (rows, columns). If 0 or 1, then rows
or columns"""
super().__init__(); self.idx = idx
[docs] def __ror__(self, it:Iterator[str]):
super().__ror__(it)
if self.idx == 0: # get rows only
rows = 0
for line in it: rows += 1
return rows
if self.idx == 1: # get #columns only
return len(next(it))
columns = -1; rows = 0
for row in it:
if columns == -1:
try: columns = len(list(row))
except AttributeError: columns = None
rows += 1
if columns == -1: columns = None
return rows, columns
shape = size
[docs]class item(BaseCli):
"""Returns the first row"""
[docs] def __ror__(self, it:Iterator[str]):
return next(iter(it))
[docs]class identity(BaseCli):
"""Yields whatever the input is. Useful for multiple streams"""
[docs] def __ror__(self, it:Iterator[Any]):
return it
[docs]class toStr(BaseCli):
"""Converts every line (possibly just a number) to a string."""
[docs] def __ror__(self, it:Iterator[str]):
for line in it: yield str(line)
[docs]class to1Str(BaseCli):
[docs] def __init__(self, delim:str=None):
"""Merges all strings into 1, with `delim` in the middle"""
super().__init__(); self.delim = patchDefaultDelim(delim)
[docs] def __ror__(self, it:Iterator[str]):
super().__ror__(it); yield self.delim.join(it | toStr())
[docs]class toNumpy(BaseCli):
"""Converts generator to numpy array"""
[docs] def __ror__(self, it:Iterator[float]):
return np.array(list(it))
[docs]class toTensor(BaseCli):
"""Converts generator to :class:`torch.Tensor`"""
[docs] def __ror__(self, it):
return torch.tensor(list(it))
[docs]class toList(BaseCli):
"""Converts generator to list. :class:`list` would do the
same, but this is just to maintain the style"""
[docs] def __ror__(self, it:Iterator[Any]) -> List[Any]:
return list(it)
[docs]class wrapList(BaseCli):
"""Wraps inputs inside a list"""
[docs] def __ror__(self, it:Any) -> List[Any]:
return [it]
[docs]class toSet(BaseCli):
"""Converts generator to set. :class:`set` would do the
same, but this is just to maintain the style"""
[docs] def __ror__(self, it:Iterator[Any]) -> Set[Any]:
return set(it)
[docs]class toIter(BaseCli):
"""Converts object to iterator. `iter()` would do the
same, but this is just to maintain the style"""
[docs] def __ror__(self, it) -> Iterator[Any]:
return iter(it)
[docs]class toRange(BaseCli):
"""Returns iter(range(len(it))), effectively"""
[docs] def __ror__(self, it:Iterator[Any]) -> Iterator[int]:
for i, _ in enumerate(it): yield i
class _EarlyExp(Exception): pass
[docs]class equals:
"""Checks if all incoming columns/streams are identical"""
[docs] def __ror__(self, streams:Iterator[Iterator[str]]):
streams = list(streams)
for row in zip(*streams):
sampleElem = row[0]
try:
for elem in row:
if sampleElem != elem: yield False; raise _EarlyExp()
yield True
except _EarlyExp: pass
[docs]class reverse(BaseCli):
"""Prints last line first, first line last"""
[docs] def __ror__(self, it:Iterator[str]) -> List[str]:
return reversed(list(it))
[docs]class ignore(BaseCli):
"""Just executes everything, ignoring the output"""
[docs] def __ror__(self, it:Iterator[Any]):
for _ in it: pass
[docs]class toSum(BaseCli):
"""Calculates the sum of list of numbers"""
[docs] def __ror__(self, it:Iterator[float]):
s = 0
for v in it: s += v
return s
[docs]class toAvg(BaseCli):
"""Calculates average of list of numbers"""
[docs] def __ror__(self, it:Iterator[float]):
s = 0; i = -1
for i, v in enumerate(it):
s += v
i += 1
if not settings["strict"] and i == 0: return float("nan")
return s / i
[docs]class toMax(BaseCli):
"""Calculates the max of a bunch of numbers"""
[docs] def __ror__(self, it:Iterator[float]) -> float: return max(it)
[docs]class toMin(BaseCli):
"""Calculates the min of a bunch of numbers"""
[docs] def __ror__(self, it:Iterator[float]) -> float: return min(it)
[docs]class lengths(BaseCli):
"""Returns the lengths of each row."""
[docs] def __ror__(self, it:Iterator[List[Any]]) -> Iterator[int]:
for e in it: yield len(e)
Number = numbers.Number; Tensor = torch.Tensor; NpNumber = np.number
class inv_dereference(BaseCli):
def __init__(self, ignoreTensors=False):
"""Kinda the inverse to :class:`dereference`"""
super().__init__(); self.ignoreTensors = ignoreTensors
def __ror__(self, it:Iterator[Any]) -> List[Any]:
super().__ror__(it); ignoreTensors = self.ignoreTensors;
for e in it:
if isinstance(e, cli.ctx.Promise): e = e()
if e is None or isinstance(e, (Number, NpNumber, str)): yield e
elif isinstance(e, Tensor):
if not ignoreTensors and len(e.shape) == 0: yield e.item()
else: yield e
else:
try: yield e | self
except: yield e
[docs]class dereference(BaseCli):
[docs] def __init__(self, ignoreTensors=False, maxDepth=float("inf")):
"""Recursively converts any iterator into a list. Only :class:`str`,
:class:`numbers.Number` are not converted. Example::
# returns something like "<range_iterator at 0x7fa8c52ca870>"
iter(range(5))
# returns [0, 1, 2, 3, 4]
iter(range(5)) | deference()
You can also specify a ``maxDepth``::
# returns something like "<list_iterator at 0x7f810cf0fdc0>"
iter([range(3)]) | dereference(maxDepth=0)
# returns [range(3)]
iter([range(3)]) | dereference(maxDepth=1)
# returns [[0, 1, 2]]
iter([range(3)]) | dereference(maxDepth=2)
:param ignoreTensors: if True, then don't loop over :class:`torch.Tensor`
internals
.. warning::
Can work well with PyTorch Tensors, but not Numpy's array as they screw things up
with the __ror__ operator, so do torch.from_numpy(...) first."""
super().__init__(); self.ignoreTensors = ignoreTensors
self.maxDepth = maxDepth; self.depth = 0
[docs] def __ror__(self, it:Iterator[Any]) -> List[Any]:
super().__ror__(it); answer = []; ignoreTensors = self.ignoreTensors
if self.depth >= self.maxDepth: return it
self.depth += 1
for e in it:
if isinstance(e, cli.ctx.Promise): e = e()
if e is None or isinstance(e, (Number, NpNumber, str)):
answer.append(e)
elif isinstance(e, Tensor):
if not ignoreTensors and len(e.shape) == 0:
answer.append(e.item())
else: answer.append(e)
else:
try: answer.append(e | self)
except: answer.append(e)
self.depth -= 1
return answer
[docs] def __invert__(self) -> BaseCli:
"""Returns a :class:`~k1lib.bioinfo.cli.init.BaseCli` that makes
everything an iterator."""
return inv_dereference(self.ignoreTensors)