Source code for k1lib.cli.init

# AUTOGENERATED FILE! PLEASE DON'T EDIT
from typing import List, Iterator, Any, NewType, TypeVar, Generic
import k1lib.cli as cli; from numbers import Number
import k1lib, itertools, copy, xml, warnings, traceback, sys; import numpy as np
import xml.etree.ElementTree
try: import torch; hasTorch = True
except: hasTorch = False; torch = k1lib.dep("torch")

__all__ = ["BaseCli", "Table", "T", "fastF", "yieldT",
           "serial", "oneToMany", "mtmS"]
settings = k1lib.Settings()
atomic = k1lib.Settings()
settings.add("atomic", atomic, "classes/types that are considered atomic and specified cli tools should never try to iterate over them")
settings.add("defaultDelim", "\t", "default delimiter used in-between columns when creating tables. Defaulted to tab character.")
settings.add("defaultIndent", "  ", "default indent used for displaying nested structures")
settings.add("strict", False, "turning it on can help you debug stuff, but could also be a pain to work with")
settings.add("inf", float("inf"), "infinity definition for many clis. Here because you might want to temporarily not loop things infinitely")
k1lib.settings.add("cli", settings, "from k1lib.cli module")
yieldT = object()
def patchDefaultDelim(st:str):
    """
:param s:
    - if not None, returns self
    - else returns the default delimiter in :attr:`~k1lib.settings`"""
    return settings.defaultDelim if st is None else st
def patchDefaultIndent(st:str):
    """
:param s:
    - if not None, returns self
    - else returns the default indent character in :attr:`~k1lib.settings`"""
    return settings.defaultIndent if st is None else st
T = TypeVar("T")
"""Generic type variable"""
class _MetaType(type):
    def __getitem__(self, generic):
        d = {"__args__": generic, "_n": self._n, "__doc__": self.__doc__}
        return _MetaType(self._n, (), d)
    def __repr__(self):
        def main(self):
            def trueName(o):
                if isinstance(o, _MetaType): return main(o)
                try: return o.__name__
                except: return f"{o}"
            if hasattr(self, "__args__"):
                if isinstance(self.__args__, tuple):
                    return f"{self._n}[{', '.join([trueName(e) for e in self.__args__])}]"
                else: return f"{self._n}[{trueName(self.__args__)}]"
            return self._n
        return main(self)
def newTypeHint(name, docs=""):
    """Creates a new type hint that can be sliced and yet still looks fine
in sphinx. Crudely written by my poorly understood idea of Python's
metaclasses. Seriously, this shit is bonkers, read over it https://stackoverflow.com/questions/100003/what-are-metaclasses-in-python

Example::

    Table = newTypeHint("Table", "some docs")
    Table[int] # prints out as "Table[int]", and sphinx fell for it too
    Table[Table[str], float] # prints out as "Table[Table[str], float]"
"""
    return _MetaType(name, (), {"_n": name, "__doc__": docs})
#Table = newTypeHint("Table", """Essentially just Iterator[List[T]]. This class is just here so that I can generate the docs with nicely formatted types like "Table[str]".""")
#Table = NewType("Table", List)
class Table(Generic[T]):
    """Essentially just Iterator[List[T]]. This class is just here so that I can generate the docs with nicely formatted types like "Table[str]"."""
    pass
Table._name = "Table"
#Table.__module__ = "cli"
class Row(list):
    """Not really used currently. Just here for potential future feature"""
    pass
[docs]class BaseCli:
    """A base class for all the cli stuff. You can definitely create new cli tools that
have the same feel without extending from this class, but advanced stream operations
(like ``+``, ``&``, ``.all()``, ``|``) won't work.

At the moment, you don't have to call super().__init__() and super().__ror__(),
as __init__'s only job right now is to solidify any :class:`~k1lib.cli.modifier.op`
passed to it, and __ror__ does nothing."""
[docs]    def __init__(self, fs:list=[]):
        """Not expected to be instantiated by the end user.

**fs param**

Expected to use it like this::

    class A(BaseCli):
        def __init__(self, f):
            fs = [f]; super().__init__(fs); self.f = fs[0]

Where ``f`` is some (potentially exotic) function. This will replace f with a "normal"
function that's executable. See source code of :class:`~k1lib.cli.filt.filt` for an
example of why this is useful. Currently, it will:

- Replace with last recorded ``4 in op()``, if ``f`` is :data:`True`, because Python does
  not allow returning complex objects from __contains__ method
- Solidifies every :class:`~k1lib.cli.modifier.op`."""
        if isinstance(fs, tuple): raise AttributeError("`fs` should not be a tuple. Use a list instead, so that new functions can be returned")
        _k1_init_l = []
        for _k1_init_f in fs: cli.op.solidify(_k1_init_f); _k1_init_l.append(_k1_init_f)
        fs.clear(); fs.extend(_k1_init_l);
[docs]    def hint(self, _hint:"cli.typehint.tBase"):
        """Specifies output type hint."""
        self._hint = _hint; return self
    @property
    def hasHint(self): return "_hint" in self.__dict__ and self._hint is not None
    def _typehint(self, inp:"cli.typehint.tBase"=None) -> "cli.typehint.tBase": return cli.typehint.tAny() if "_hint" not in self.__dict__ else self._hint
[docs]    def __and__(self, cli:"BaseCli") -> "oneToMany":
        """Duplicates input stream to multiple joined clis.
Example::

    # returns [[5], [0, 1, 2, 3, 4]]
    range(5) | (shape() & iden()) | deref()

Kinda like :class:`~k1lib.cli.modifier.apply`. There're just multiple ways of doing
this. This I think, is more intuitive, and :class:`~k1lib.cli.modifier.apply` is more
for lambdas and columns mode. Performances are pretty much identical."""
        if isinstance(self, oneToMany): return self._copy()._after(cli)
        if isinstance(cli, oneToMany): return cli._copy()._before(self)
        return oneToMany(self, cli)
[docs]    def __add__(self, cli:"BaseCli") -> "mtmS":
        """Parallel pass multiple streams to multiple clis.
Example::

    # returns [8, 15]
    [2, 3] | ((op() * 4) + (op() * 5)) | deref()"""
        if isinstance(self, mtmS): return self._copy()._after(cli)
        if isinstance(cli, mtmS): return cli._copy()._before(self)
        return mtmS(self, cli)
[docs]    def all(self, n:int=1) -> "BaseCli":
        """Applies this cli to all incoming streams.
Example::

    # returns (3,)
    torch.randn(3, 4) | toMean().all() | shape()
    # returns (3, 4)
    torch.randn(3, 4, 5) | toMean().all(2) | shape()

:param n: how many times should I chain ``.all()``?"""
        if n < 0: raise AttributeError(f"Does not make sense for `n` to be \"{n}\"")
        s = self
        for i in range(n): s = cli.apply(s)
        return s
[docs]    def __or__(self, cli) -> "serial":
        """Joins clis end-to-end.
Example::

    c = apply(op() ** 2) | deref()
    # returns [0, 1, 4, 9, 16]
    range(5) | c"""
        if isinstance(self, serial): return self._copy()._after(cli)
        if isinstance(cli, serial): return cli._copy()._before(self)
        return serial(self, cli)
[docs]    def __ror__(self, it): return NotImplemented
[docs]    def f(self) -> Table[Table[int]]:
        """Creates a normal function :math:`f(x)` which is equivalent to
``x | self``."""
        return lambda it: self.__ror__(it)
[docs]    def __lt__(self, it):
        """Backup pipe symbol `>`, purely for style, so that you can do something like
this::

    range(4) > file("a.txt")"""
        return self.__ror__(it)
[docs]    def __call__(self, it, *args):
        """Another way to do ``it | cli``. If multiple arguments are fed, then the
argument list is passed to cli instead of just the first element. Example::

    @applyS
    def f(it):
        return it
    f(2) # returns 2
    f(2, 3) # returns [2, 3]"""
        if len(args) == 0: return self.__ror__(it)
        else: return self.__ror__([it, *args])
    def __neg__(self):
        """Alias for __invert__, for clis that support inverting stuff."""
        return ~self
def _k1_init_frames():
    _k1_init_frames_count = 0
    try:
        while True:
            yield sys._getframe(_k1_init_frames_count) # `sys._getframe()` trick stolen from pd.DataFrame.query
            _k1_init_frames_count += 1
    except: pass
def _k1_global_frame():
    try:
        _k1_init_frames_ans = {}
        for _k1_init_frames_frame in reversed(list(_k1_init_frames())):
            _k1_init_frames_ans = {**_k1_init_frames_ans, **_k1_init_frames_frame.f_locals}
        return _k1_init_frames_ans
    except: return {}
[docs]def fastF(c, x=None):
    """Tries to figure out what's going on, is it a normal function, or an applyS,
or a BaseCli, etc., and return a really fast function for execution. Example::

    # both returns 16, fastF returns "lambda x: x**2", so it's really fast
    fastF(op()**2)(4)
    fastF(applyS(lambda x: x**2))(4)

At the moment, parameter ``x`` does nothing, but potentially in the future, you can
pass in an example input to the cli, so that this returns an optimized, C compiled
version.

:param x: sample data for the cli"""
    if isinstance(c, str): return fastF(eval(f"lambda x: {c}", _k1_global_frame()))
    if isinstance(c, cli.op): return c.ab_fastF()
    if isinstance(c, cli.applyS):
        f = fastF(c.f)
        if len(c.args) == 0 and len(c.kwargs) == 0: return f
        else: return lambda x, *args, **kwargs: f(x, *c.args, **c.kwargs)
    if isinstance(c, BaseCli): return c.__ror__
    return c
[docs]class serial(BaseCli):
[docs]    def __init__(self, *clis:List[BaseCli]):
        """Merges clis into 1, feeding end to end. Used in chaining clis
together without a prime iterator. Meaning, without this, stuff like this
fails to run::

    [1, 2] | a() | b() # runs
    c = a() | b(); [1, 2] | c # doesn't run if this class doesn't exist"""
        fs = list(clis); super().__init__(fs); self.clis = fs; self._cache()
    def _cache(self):
        self._hasTrace = any(isinstance(c, cli.trace) for c in self.clis)
        self._cliCs = [fastF(c) for c in self.clis]; return self
    def _typehint(self, inp=None):
        for c in self.clis: inp = c._typehint(inp) or cli.typehint.tAny()
        return inp
[docs]    def __ror__(self, it:Iterator[Any]) -> Iterator[Any]:
        if self._hasTrace: # slower, but tracable
            for cli in self.clis: it = it | cli
        else: # faster, but not tracable
            for cli in self._cliCs: it = cli(it)
        return it
    def _before(self, c): self.clis = [c] + self.clis; return self._cache()
    def _after(self, c): self.clis = self.clis + [c]; return self._cache()
    def _copy(self): return serial(*self.clis)
atomic.add("baseAnd", (Number, np.number, str, dict, bool, bytes, list, tuple, *([torch.Tensor] if hasTorch else []), np.ndarray, xml.etree.ElementTree.Element), "used by BaseCli.__and__")
def _iterable(it):
    try: iter(it); return True
    except: return False
[docs]class oneToMany(BaseCli):
[docs]    def __init__(self, *clis:List[BaseCli]):
        """Duplicates 1 stream into multiple streams, each for a cli in the
list. Used in the "a & b" joining operator. See also: :meth:`BaseCli.__and__`"""
        fs = list(clis); super().__init__(fs); self.clis = fs; self._cache()
    def _typehint(self, inp):
        ts = []
        for f in self.clis:
            try: ts.append(f._typehint(inp))
            except: ts.append(cli.typehint.tAny())
        return cli.typehint.tCollection(*ts).reduce()
[docs]    def __ror__(self, it:Iterator[Any]) -> Iterator[Iterator[Any]]:
        if isinstance(it, atomic.baseAnd) or isinstance(it, k1lib.cli.splitSeek) or not _iterable(it):
            for cli in self._cliCs: yield cli(it)
        else:
            its = itertools.tee(it, len(self.clis))
            for cli, it in zip(self._cliCs, its): yield cli(it)
    def _cache(self): self._cliCs = [fastF(c) for c in self.clis]; return self
    def _before(self, c): self.clis = [c] + self.clis; return self._cache()
    def _after(self, c): self.clis = self.clis + [c]; return self._cache()
    def _copy(self): return oneToMany(*self.clis)
[docs]class mtmS(BaseCli):
[docs]    def __init__(self, *clis:List[BaseCli]):
        """Applies multiple streams to multiple clis independently. Used in
the "a + b" joining operator. See also: :meth:`BaseCli.__add__`.

Weird name is actually a shorthand for "many to many specific"."""
        fs = list(clis); super().__init__(fs=fs); self.clis = fs; self._cache()
    def _inpTypeHintExpand(self, t):
        n = len(self.clis);
        if isinstance(t, (cli.typehint.tCollection, *cli.typehint.tListIterSet, cli.typehint.tArrayTypes)): return t.expand(n)
        else: return [cli.typehint.tAny()]*n
    def _typehint(self, t):
        n = len(self.clis); outTs = []
        for c, t in zip(self.clis, self._inpTypeHintExpand(t)):
            try: outTs.append(c._typehint(t))
            except: outTs.append(cli.typehint.tAny())
        return cli.typehint.tCollection(*outTs).reduce()
    def _cache(self): self._cliCs = [fastF(c) for c in self.clis]; return self
    def _before(self, c): self.clis = [c] + self.clis; return self._cache()
    def _after(self, c): self.clis = self.clis + [c]; return self._cache()
[docs]    def __ror__(self, its:Iterator[Any]) -> Iterator[Any]:
        for cli, it in zip(self._cliCs, its): yield cli(it)
[docs]    @staticmethod
    def f(f, i:int, n:int=100):
        """Convenience method, so
that this::

    mtmS(iden(), op()**2, iden(), iden(), iden())
    # also the same as this btw:
    (iden() + op()**2 + iden() + iden() + iden())

is the same as this::

    mtmS.f(op()**2, 1, 5)

Example::

    # returns [5, 36, 7, 8, 9]
    range(5, 10) | mtmS.f(op()**2, 1, 5) | deref()

:param i: where should I put the function?
:param n: how many clis in total? Defaulted to 100"""
        return mtmS(*([cli.iden()]*i + [f] + [cli.iden()]*(n-i-1)))
    def _copy(self): return mtmS(*self.clis)
[docs]def patchNumpy():
    """Patches numpy arrays and data types, so that piping like
this work::

    a = np.random.randn(3)
    a | shape() # returns (3,)"""
    try:
        if np._k1_patched: return
    except: pass
    try:
        import forbiddenfruit, inspect; #forbiddenfruit.reverse(np.ndarray, "__or__") # old version
        oldOr = np.ndarray.__or__
        def _newNpOr(self, v):
            if isinstance(v, BaseCli): return NotImplemented
            try: return oldOr(self, v)
            except: warnings.warn(traceback.format_exc())
        forbiddenfruit.curse(np.ndarray, "__or__", _newNpOr)
        a = [getattr(np, dk) for dk in np.__dict__.keys()] # patching all numpy's numeric types
        for _type in [x for x in a if inspect.isclass(x) and issubclass(x, np.number) and not issubclass(x, np.integer)]:
            _oldOr = _type.__or__
            def _typeNewOr(self, v):
                if isinstance(v, BaseCli): return NotImplemented
                try: return _oldOr(self, v)
                except: warnings.warn(traceback.format_exc())
            forbiddenfruit.curse(_type, "__or__", _typeNewOr)
        np._k1_patched = True
    except Exception as e: warnings.warn(f"Tried to patch __or__ operator of built-in type `np.ndarray` but can't because: {e}")
dict_keys = type({"a": 3}.keys());   oldDKOr = dict_keys.__or__
dict_items = type({"a": 3}.items()); oldDIOr = dict_items.__or__
oldSetOr = set.__or__
[docs]def patchDict():
    """Patches dictionaries's items and keys, so that piping
works::

    d = {"a": 3, "b": 4}
    d.keys() | deref() # returns ["a", "b"]
    d.items() | deref() # returns [["a", 3], ["b", 4]]"""
    try:
        if np._k1_dict_patched: return
    except: pass
    try:
        import forbiddenfruit, traceback
        def _newDOr(self, v):
            """Why is this so weird? For some reason, if you patch dict_keys, you will
            also patch dict_items. So, if you were to have 2 functions, one for each,
            then they will override each other. The way forward is to have 1 single
            function detect whether it's dict_keys or dict_items, and call the correct
            original function. So why are there 2 curses? Well cause I'm lazy to check
            for this behavior in multiple python versions, so just have 2 to make sure."""
            if isinstance(v, BaseCli): return NotImplemented
            try:
                # print(self, type(self), v, type(v))
                if isinstance(self, dict_keys): return oldDKOr(self, v)
                elif isinstance(self, dict):
                    if isinstance(v, dict_keys): return oldSetOr(set(self.keys()), set(v))
                    return oldDIOr(self, v)
                elif isinstance(self, set):
                    if isinstance(v, dict_keys): return oldSetOr(self, set(v))
                    return oldSetOr(self, v)
            except:
                print(self, type(self), v, type(v))
                warnings.warn(traceback.format_exc())
                return NotImplemented
        forbiddenfruit.curse(dict_keys, "__or__", _newDOr)
        forbiddenfruit.curse(dict_items, "__or__", _newDOr)
        np._k1_dict_patched = True
    except Exception as e: warnings.warn(f"Tried to patch __or__ operator of built-in type `dict_keys` and `dict_items` but can't because: {e}")
[docs]def patchPandas():
    """Patches panda's :class:`pandas.core.series.Series` and
:class:`pandas.core.frame.DataFrame` so that piping works::

    pd.read_csv("a.csv")["col3"] | shape()"""
    try:
        import pandas as pd
    except: return
    try:
        if pd._k1_patched: return
    except: pass
    try:
        import forbiddenfruit
        oldPdSOr = pd.core.series.Series.__or__
        def _newPdSOr(self, v):
            if isinstance(v, BaseCli): return NotImplemented
            try: return oldPdSOr(self, v)
            except: warnings.warn(traceback.format_exc())
        forbiddenfruit.curse(pd.core.series.Series, "__or__", _newPdSOr)
        
        oldPdDFOr = pd.core.frame.DataFrame
        def _newPdDFOr(self, v):
            if isinstance(v, BaseCli): return NotImplemented
            try: return oldPdDFOr(self, v)
            except: warnings.warn(traceback.format_exc())
        forbiddenfruit.curse(pd.core.frame.DataFrame, "__or__", _newPdDFOr)
        pd._k1_patched = True
    except Exception as e: warnings.warn(f"Tried to patch __or__ operator of built-in type `pd.core.series.Series` but can't because: {e}")