# AUTOGENERATED FILE! PLEASE DON'T EDIT
"""
.. module:: k1lib
"""
from typing import Callable, Iterator, Tuple, Union, Dict, Any, List
from k1lib import isNumeric; import k1lib, contextlib, warnings
import random, torch, math, sys, io, os, numpy as np
import matplotlib.pyplot as plt
__all__ = ["Object", "Range", "Domain", "AutoIncrement", "Wrapper", "Every",
"RunOnce", "MaxDepth", "MovingAvg", "Absorber",
"Settings", "settings", "_settings", "UValue"]
[docs]class Object:
"""Convenience class that acts like :class:`~collections.defaultdict`. You
can use it like a normal object::
a = k1lib.Object()
a.b = 3
print(a.b) # outputs "3"
``__repr__()`` output is pretty nice too:
.. code-block:: text
<class '__main__.Object'>, with attrs:
- b
You can instantiate it from a dict::
a = k1lib.Object.fromDict({"b": 3, "c": 4})
print(a.c) # outputs "4"
And you can specify a default value, just like defaultdict::
a = k1lib.Object().withAutoDeclare(lambda: [])
a.texts.extend(["factorio", "world of warcraft"])
print(a.texts[0]) # outputs "factorio"
.. warning::
Default values only work with variables that don't start with an
underscore "_".
Treating it like defaultdict is okay too::
a = k1lib.Object().withAutoDeclare(lambda: [])
a["movies"].append("dune")
print(a.movies[0]) # outputs "dune" """
def __init__(self): self._defaultValueGenerator = None; self.repr = None
[docs] @staticmethod
def fromDict(_dict:Dict[str, Any]):
"""Creates an object with attributes from a dictionary"""
answer = Object(); answer.__dict__.update(_dict); return answer
@property
def state(self) -> dict:
"""Essentially ``__dict__``, but only outputs the fields you
defined. If your framework intentionally set some attributes, those
will be reported too, so beware"""
answer = dict(self.__dict__); del answer["_defaultValueGenerator"]
del answer["repr"]; return answer
[docs] def withAutoDeclare(self, defaultValueGenerator):
"""Sets this Object up so that if a field doesn't
exist, it will automatically create it with a
default value."""
self._defaultValueGenerator = defaultValueGenerator; return self
def __getitem__(self, idx): return getattr(self, idx)
def __setitem__(self, idx, value): setattr(self, idx, value)
def __iter__(self): yield from self.state.values()
def __contains__(self, item:str): return item in self.__dict__
def __getattr__(self, attr):
if attr.startswith("_"): raise AttributeError()
if attr == "getdoc": raise AttributeError("This param is used internally in module `IPython.core.oinspect`, so you kinda have to set it specifically yourself instead of relying on auto declare")
if self._defaultValueGenerator != None:
self.__dict__[attr] = self._defaultValueGenerator()
return self.__dict__[attr]
raise AttributeError
def __delitem__(self, key): del self.__dict__[key]
[docs] def withRepr(self, _repr:str):
"""Specify output of ``__repr__()``. Legacy code. You can just
monkey patch it instead."""
self.repr = _repr; return self
def __repr__(self):
_dict = "\n".join([f"- {k}" for k in self.state.keys()])
return self.repr or f"{type(self)}, with attrs:\n{_dict}"
ninf = float("-inf"); inf = float("inf")
[docs]class Range:
"""A range of numbers. It's just 2 numbers really: start and stop
This is essentially a convenience class to provide a nice, clean
abstraction and to eliminate errors. You can transform values::
Range(10, 20).toUnit(13) # returns 0.3
Range(10, 20).fromUnit(0.3) # returns 13
Range(10, 20).toRange(Range(20, 10), 13) # returns 17
You can also do random math operations on it::
(Range(10, 20) * 2 + 3) == Range(23, 43) # returns True
Range(10, 20) == ~Range(20, 10) # returns True"""
[docs] def __init__(self, start=0, stop=None):
"""Creates a new Range.
There are different ``__init__`` functions for many situations:
- Range(2, 11.1): create range [2, 11.1]
- Range(15.2): creates range [0, 15.2]
- Range(Range(2, 3)): create range [2, 3]. This serves as sort of a catch-all
- Range(slice(2, 5, 2)): creates range [2, 5]. Can also be a :class:`range`
- Range(slice(2, -1), 10): creates range [2, 9]
- Range([1, 2, 7, 5]): creates range [1, 5]. Can also be a tuple
"""
if (isNumeric(start) and isNumeric(stop)):
self.start, self.stop = start, stop
elif isNumeric(start) and stop == None:
self.start, self.stop = 0, start
elif stop == None and isinstance(start, (range, slice, Range)):
self.start, self.stop = start.start, start.stop
elif isNumeric(stop) and isinstance(start, slice):
r = range(stop)[start]; self.start, self.stop = r.start, r.stop
elif isinstance(start, (list, tuple)):
self.start, self.stop = start[0], start[-1]
else: raise AttributeError(f"Don't understand {start} and {stop}")
self.delta = self.stop - self.start
[docs] def __getitem__(self, index):
"""0 for start, 1 for stop
You can also pass in a :class:`slice` object, in which case, a range subset
will be returned. Code kinda looks like this::
range(start, stop)[index]"""
if index == 0: return self.start
if index == 1: return self.stop
if type(index) == slice:
return Range(range(self.start, self.stop)[index])
raise Exception(f"Can't get index {index} of range [{self.start}, {self.stop}]")
[docs] def fixOrder(self) -> "Range":
"""If start greater than stop, switch the 2, else do nothing"""
if self.start > self.stop:
self.start, self.stop = self.stop, self.start
return self
def _common(self, x, f:Callable[[float], float]):
if isNumeric(x): return f(x)
if isinstance(x, (list, tuple)):
return [self._common(elem, f) for elem in x]
if isinstance(x, (range, slice, Range)):
return Range(self._common(x.start if x.start != None else 0, f), self._common(x.stop if x.stop != None else 1, f))
raise AttributeError(f"Doesn't understand {x}")
def __iter__(self): yield self.start; yield self.stop
[docs] def intIter(self, step:int=1) -> Iterator[int]:
"""Returns integers within this Range"""
return range(int(self.start), int(self.stop), step)
[docs] def toUnit(self, x):
"""Converts x from current range to [0, 1] range. Example::
r = Range(2, 10)
r.toUnit(5) # will return 0.375, as that is (5-2)/(10-2)
You can actually pass in a lot in place of x::
r = Range(0, 10)
r.toUnit([5, 3, 6]) # will be [0.5, 0.3, 0.6]. Can also be a tuple
r.toUnit(slice(5, 6)) # will be slice(0.5, 0.6). Can also be a range, or Range
.. note::
In the last case, if ``start`` is None, it gets defaulted to 0, and
if ``end`` is None, it gets defaulted to 1
"""
def f(x):
if self.delta == 0: return float("nan")
return (x - self.start) / self.delta
return self._common(x, lambda x: float("nan") if self.delta == 0 else (x - self.start) / self.delta)
[docs] def fromUnit(self, x):
"""Converts x from [0, 1] range to this range. Example::
r = Range(0, 10)
r.fromUnit(0.3) # will return 3
x can be a lot of things, see :meth:`toUnit` for more"""
return self._common(x, lambda x: x * self.delta + self.start)
[docs] def toRange(self, _range:"Range", x):
"""Converts x from current range to another range. Example::
r = Range(0, 10)
r.toRange(Range(0, 100), 6) # will return 60
x can be a lot of things, see :meth:`toUnit` for more."""
return self._common(x, lambda x: Range(_range).fromUnit(self.toUnit(x)))
[docs] def fromRange(self, _range:"Range", x):
"""Reverse of :meth:`toRange`, effectively."""
return _range.toRange(self, x)
@property
def range_(self):
"""Returns a :class:`range` object with start and stop values
rounded off"""
return range(math.floor(self.start+0.001), math.floor(self.stop+0.001))
@property
def slice_(self):
"""Returns a :class:`slice` object with start and stop values
rounded off"""
return slice(math.floor(self.start+0.001), math.floor(self.stop+0.001))
[docs] @staticmethod
def proportionalSlice(r1, r2, r1Slice:slice) -> Tuple["Range", "Range"]:
"""Slices r1 and r2 proportionally. Best to explain using an
example. Let's say you have 2 arrays created from a time-dependent
procedure like this::
a = []; b = []
for t in range(100):
if t % 3 == 0: a.append(t)
if t % 5 == 0: b.append(1 - t)
len(a), len(b) # returns (34, 20)
a and b are of different lengths, but you want to plot both from 30%
mark to 50% mark (for a, it's elements 10 -> 17, for b it's 6 -> 10),
as they are time-dependent. As you can probably tell, to get the indicies
10, 17, 6, 10 is messy. So, you can do something like this instead::
r1, r2 = Range.proportionalSlice(Range(len(a)), Range(len(b)), slice(10, 17))
This will return the Ranges [10, 17] and [5.88, 10]
Then, you can plot both of them side by side like this::
fig, axes = plt.subplots(ncols=2)
axes[0].plot(r1.range_, a[r1.slice_])
axes[1].plot(r2.range_, a[r2.slice_])
"""
r1, r2 = Range(r1), Range(r2)
ar1 = r1[r1Slice]; ar2 = r1.toRange(r2, ar1)
return ar1, ar2
[docs] def bound(self, rs:Union[range, slice]) -> Union[range, slice]:
"""If input range|slice's stop and start is missing, then use this
range's start and stop instead."""
start = rs.start or self.start
stop = rs.stop or self.stop
return type(rs)(start, stop)
[docs] def copy(self): return Range(self.start, self.stop)
def __str__(self): return f"[{self.start}, {self.stop}]"
def __eq__(self, _range):
_range = Range(_range)
return (_range.start == self.start or abs(_range.start - self.start) < 1e-9) and\
(_range.stop == self.stop or abs(_range.stop - self.stop) < 1e-9)
def __contains__(self, x:float): return x >= self.start and x < self.stop
def __neg__(self): return Range(-self.start, -self.stop)
[docs] def __invert__(self): return Range(self.stop, self.start)
def __add__(self, num): return Range(self.start + num, self.stop + num)
def __radd__(self, num): return self + num
def __mul__(self, num): return Range(self.start * num, self.stop * num)
def __rmul__(self, num): return self * num
def __truediv__(self, num): return num * (1/num)
def __rtruediv__(self, num): raise "Doesn't make sense to do this!"
def __round__(self): return Range(round(self.start), round(self.stop))
def __ceil__(self): return Range(math.ceil(self.start), math.ceil(self.stop))
def __floor__(self): return Range(math.floor(self.start), math.floor(self.stop))
def __repr__(self):
return f"""A range of numbers: [{self.start}, {self.stop}]. Can do:
- r.toUnit(x): will convert x from range [{self.start}, {self.stop}] to [0, 1]
- r.fromUnit(x): will convert x from range [0, 1] to range [{self.start}, {self.stop}]
- r.toRange([a, b], x): will convert x from range [{self.start}, {self.stop}] to range [a, b]
- r[0], r[1], r.start, r.stop: get start and stop values of range
Note: for conversion methods, you can pass in"""
def yieldLowest(r1s:Iterator[Range], r2s:Iterator[Range]):
"""Given 2 :class:`Range` generators with lengths a and b, yield every
object (a + b) so that :class:`Range`s with smaller start point gets yielded
first. Assumes that each generator:
- Does not intersect with itself
- Is sorted by start point already
.. warning::
This method will sometimes yield the same objects given by the Iterators.
Make sure you copy each :class:`Range` if your use case requires"""
r1s = iter(r1s); r2s = iter(r2s)
r1 = next(r1s, None)
if r1 is None: yield from r2s; return
r2 = next(r2s, None)
if r2 is None: yield r1; yield from r1s; return
while True:
while r1.start <= r2.start:
yield r1
r1 = next(r1s, None)
if r1 is None: yield r2; yield from r2s; return
while r2.start <= r1.start:
yield r2
r2 = next(r2s, None)
if r2 is None: yield r1; yield from r1s; return
def join(r1s:Iterator[Range], r2s:Iterator[Range]):
"""Joins 2 :class:`Range` generators, so that overlaps gets merged
together.
.. warning::
This method will sometimes yield the same objects given by the Iterators.
Make sure you copy each :class:`Range` if your use case requires"""
it = yieldLowest(r1s, r2s); r = next(it, None)
if r is None: return
while True:
nr = next(it, None)
if nr is None: yield r; return
if r.stop >= nr.start:
r = r.copy(); r.stop = max(r.stop, nr.stop)
else: yield r; r = nr
def neg(rs:List[Range]):
"""Returns R - rs, where R is the set of real numbers."""
rs = iter(rs); r = next(rs, None)
if r is None: yield Range(ninf, inf); return
if ninf < r.start: yield Range(ninf, r.start) # check -inf case
while True:
start = r.stop
r = next(rs, None)
if r is None:
if start < inf: yield Range(start, inf)
return
yield Range(start, r.start)
[docs]class Domain:
[docs] def __init__(self, *ranges, dontCheck:bool=False):
"""Creates a new domain.
:param ranges: each element is a :class:`Range`, although any format will be fine as this selects for that
:param dontCheck: don't sanitize inputs, intended to boost perf internally only
A domain is just an array of :class:`Range` that represents what intervals on
the real number line is chosen. Some examples::
inf = float("inf") # shorthand for infinity
Domain([5, 7.5], [2, 3]) # represents "[2, 3) U [5, 7.5)"
Domain([2, 3.2], [3, 8]) # represents "[2, 8)" as overlaps are merged
-Domain([2, 3]) # represents "(-inf, 2) U [3, inf)", so essentially R - d, with R being the set of real numbers
-Domain([-inf, 3]) # represents "[3, inf)"
Domain.fromInts(2, 3, 6) # represents "[2, 4) U [6, 7)"
You can also do arithmetic on them, and check "in" oeprator::
Domain([2, 3]) + Domain([4, 5]) # represents "[2, 3) U [4, 5)"
Domain([2, 3]) + Domain([2.9, 5]) # represents "[2, 5)", also merges overlaps
3 in Domain([2, 3]) # returns False
2 in Domain([2, 3]) # returns True"""
if dontCheck: self.ranges = list(ranges); return
# convert all to Range type, fix its order, and sort based on .start
ranges = [(r if isinstance(r, Range) else Range(r)).fixOrder() for r in ranges]
ranges = sorted(ranges, key=lambda r: r.start)
# merges overlapping segments
self.ranges = list(join(ranges, []))
[docs] @staticmethod
def fromInts(*ints:List[int]):
"""Returns a new :class:`Domain` which has ranges [i, i+1] for each
int given."""
return Domain(*(Range(i, i+1) for i in ints))
[docs] def copy(self): return Domain(*(r.copy() for r in self.ranges))
[docs] def intIter(self, step:int=1, start:int=0):
"""Yields ints in all ranges of this domain. If first range's domain
is :math:`(-\inf, a)`, then starts at the specified integer"""
if len(self.ranges) == 0: return
for r in self.ranges:
x = int(start) if r.start == -inf else int(r.start)
while x < r.stop: yield x; x += step
def __neg__(self): return Domain(*neg(self.ranges), dontCheck=True)
def __add__(self, domain): return Domain(*(r.copy() for r in join(self.ranges, domain.ranges)), dontCheck=True)
def __sub__(self, domain): return self + (-domain)
def __eq__(self, domain): return self.ranges == domain.ranges
def __str__(self): return f"Domain: {', '.join(r for r in self.ranges)}"
def __contains__(self, x): return any(x in r for r in self.ranges)
def __repr__(self):
rs = '\n'.join(f"- {r}" for r in self.ranges)
return f"""Domain:\n{rs}\n\nCan:
- 3 in d: check whether a number is in this domain or not
- d1 + d2: joins 2 domain
- -d: excludes the domain from R
- d1 - d2: same as d1 + (-d2)"""
[docs]class AutoIncrement:
[docs] def __init__(self, initialValue:int=-1, n:int=float("inf"), prefix:str=None):
"""Creates a new AutoIncrement object. Every time the object is called
it gets incremented by 1 automatically. Example::
a = k1lib.AutoIncrement()
a() # returns 0
a() # returns 1
a() # returns 2
a.value # returns 2
a.value # returns 2
a() # returns 3
a = AutoIncrement(n=3, prefix="cluster_")
a() # returns "cluster_0"
a() # returns "cluster_1"
a() # returns "cluster_2"
a() # returns "cluster_0"
:param n: if specified, then will wrap around to 0 when hit this number
:param prefix: if specified, will yield strings with specified prefix"""
self.value = initialValue; self.n = n; self.prefix = prefix
[docs] @staticmethod
def random() -> "AutoIncrement":
"""Creates a new AutoIncrement object that has a random integer initial value"""
return AutoIncrement(random.randint(0, 1e9))
@property
def value(self):
"""Get the value as-is, without auto incrementing it"""
if self.prefix is None: return self._value
return f"{self.prefix}{self._value}"
@value.setter
def value(self, value): self._value = value
[docs] def __call__(self):
"""Increments internal counter, and return it."""
self._value += 1
if self._value >= self.n: self._value = 0
return self.value
[docs]class Wrapper:
value:Any
"""Internal value of this :class:`Wrapper`"""
[docs] def __init__(self, value):
"""Creates a wrapper for some value and get it by calling it.
Example::
a = k1lib.Wrapper(list(range(int(1e7))))
# returns [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
a()[:10]
This exists just so that Jupyter Lab's contextual help won't automatically
display the (possibly humongous) value. Could be useful if you want to pass a
value by reference everywhere like this::
o = k1lib.Wrapper(None)
def f(obj):
obj.value = 3
f(o)
o() # returns 3"""
self.value = value
def __call__(self): return self.value
[docs]class Every:
[docs] def __init__(self, n):
"""Returns True every interval.
Example::
e = k1lib.Every(4)
e() # returns True
e() # returns False
e() # returns False
e() # returns False
e() # returns True"""
self.n = n; self.i = -1
[docs] def __call__(self) -> bool:
"""Returns True or False based on internal count."""
self.i += 1; return self.value
@property
def value(self) -> bool:
if self.i % self.n: return False
else: return True
[docs]class RunOnce:
[docs] def __init__(self):
"""Returns False first time only.
Example::
r = k1lib.RunOnce()
r.done() # returns False
r.done() # returns True
r.done() # returns True
r.revert()
r.done() # returns False
r.done() # returns True
r.done() # returns True
May be useful in situations like::
class A:
def __init__(self):
self.ro = k1lib.RunOnce()
def f(self, x):
if self.ro.done(): return 3 + x
return 5 + x
a = A()
a.f(4) # returns 9
a.f(4) # returns 7"""
self.value = False
[docs] def done(self):
"""Whether this has been called once before."""
v = self.value
self.value = True
return v
def __call__(self):
"""Alias of :meth:`done`."""
return self.done()
[docs] def revert(self):
self.value = False
[docs]class MaxDepth:
[docs] def __init__(self, maxDepth:int, depth:int=0):
"""Convenience utility to check for graph max depth.
Example::
def f(d):
print(d.depth)
if d: f(d.enter())
# prints "0\\n1\\n2\\n3"
f(k1lib.MaxDepth(3))
Of course, this might look unpleasant to the end user, so this is more
likely for internal tools."""
self.maxDepth = maxDepth; self.depth = depth
[docs] def enter(self) -> "MaxDepth":
return MaxDepth(self.maxDepth, self.depth + 1)
def __bool__(self):
return self.depth < self.maxDepth
def __call__(self):
"""Alias of :meth:`__bool__`."""
return bool(self)
[docs]class MovingAvg:
[docs] def __init__(self, initV:float=0, alpha=0.9, debias=False):
"""Smoothes out sequential data using momentum.
Example::
a = k1lib.MovingAvg(5)
a(3).value # returns 4.8, because 0.9*5 + 0.1*3 = 4.8
a(3).value # returns 4.62
Difference between normal and debias modes::
x = torch.linspace(0, 10, 100); y = torch.cos(x) | op().item().all() | deref()
plt.plot(x, y);
a = k1lib.MovingAvg(debias=False); plt.plot(x, y | apply(lambda y: a(y).value) | deref())
a = k1lib.MovingAvg(debias=True); plt.plot(x, y | apply(lambda y: a(y).value) | deref())
plt.legend(["Signal", "Normal", "Debiased"])
.. image:: images/movingAvg.png
As you can see, normal mode still has the influence of the initial value at
0 and can't rise up fast, whereas the debias mode will ignore the initial
value and immediately snaps to the first saved value.
:param initV: initial value
:param alpha: number in [0, 1]. Basically how much to keep old value?
:param debias: whether to debias the initial value"""
self.value = initV; self.alpha = alpha; self.debias = debias
self.m = self.value; self.t = 0
def __call__(self, value):
"""Updates the average with a new value"""
self.m = self.m * self.alpha + value * (1 - self.alpha)
if self.debias:
self.t += 1
self.value = self.m / (1 - self.alpha**self.t)
else: self.value = self.m
return self
def __add__(self, o): return self.value + o
def __radd__(self, o): return o + self.value
def __sub__(self, o): return self.value - o
def __rsub__(self, o): return o - self.value
def __mul__(self, o): return self.value * o
def __rmul__(self, o): return o * self.value
def __truediv__(self, o): return self.value / o
def __rtruediv__(self, o): return o / self.value
def __repr__(self):
return f"Moving average: {self.value}, alpha: {self.alpha}"
sen = "_ab_sentinel"
[docs]class Absorber:
"""Creates an object that absorbes every operation done on it. Could be
useful in some scenarios::
ab = k1lib.Absorber()
# absorbs all operations done on the object
abs(ab[::3].sum(dim=1))
t = torch.randn(5, 3, 3)
# returns transformed tensor of size [2, 3]
ab.ab_operate(t)
Another::
ab = Absorber()
ab[2] = -50
# returns [0, 1, -50, 3, 4]
ab.ab_operate(list(range(5)))
Because this object absorbs every operation done on it, you have to be gentle with
it, as any unplanned disturbances might throw your code off. Best to create a new
one on the fly, and pass them immediately to functions, because if you're in a
notebook environment like Jupyter, it might poke at variables.
For extended code example that utilizes this, check over :class:`k1lib.cli.modifier.op`
source code."""
[docs] def __init__(self, initDict:dict=dict()):
"""Creates a new Absorber.
:param initDict: initial variables to set, as setattr operation is normally absorbed"""
self._ab_sentinel = True
self._ab_steps = []
for k, v in initDict.items(): setattr(self, k, v)
self._ab_sentinel = False
[docs] def ab_operate(self, x):
"""Special method to actually operate on an object and get the result. Not
absorbed. Example::
# returns 6
(op() * 2).ab_operate(3)"""
for desc, step in self._ab_steps: x = step(x)
return x
[docs] def ab_fastF(self):
"""Returns a function that operates on the input (just like :meth:`ab_operate`),
but much faster, suitable for high performance tasks. Example::
f = (k1lib.Absorber() * 2).ab_fastF()
# returns 6
f(3)"""
s = self._ab_steps; l = len(s)
if l == 0: return lambda x: x
if l == 1: return s[0][1]
if l == 2:
a, b = s[0][1], s[1][1]
return lambda x: b(a(x))
if l == 3:
a, b, c = s[0][1], s[1][1], s[2][1]
return lambda x: c(b(a(x)))
if l == 4:
a, b, c, d = s[0][1], s[1][1], s[2][1], s[3][1]
return lambda x: d(c(b(a(x))))
if l == 5:
a, b, c, d, e = s[0][1], s[1][1], s[2][1], s[3][1], s[4][1]
return lambda x: e(d(c(b(a(x)))))
return self.ab_operate
def __getattr__(self, idx):
if isinstance(idx, str) and idx.startswith("_"): raise AttributeError()
self._ab_steps.append([["__getattr__", idx], lambda x: getattr(x, idx)]); return self
def __setattr__(self, k, v):
"""Only allows legit variable setting when '_ab_sentinel' is True. Absorbs
operations if it's False."""
if k == sen: self.__dict__[k] = v
else:
if self.__dict__[sen]: self.__dict__[k] = v
else:
def f(x): setattr(x, k, v); return x
self._ab_steps.append([["__setattr__", [k, v]], f])
return self
def __getitem__(self, idx):
self._ab_steps.append([["__getitem__", idx], lambda x: x[idx]]); return self
def __setitem__(self, k, v):
def f(x): x[k] = v; return x
self._ab_steps.append([["__setitem__", [k, v]], f]); return self
def __call__(self, *args, **kwargs):
self._ab_steps.append([["__call__", [args, kwargs]], lambda x: x(*args, **kwargs)]); return self
def __len__(self): self._ab_steps.append([["__len__" ], lambda x: len(x)]); return self
def __add__(self, o): self._ab_steps.append([["__add__", o], lambda x: x+o ]); return self
def __radd__(self, o): self._ab_steps.append([["__radd__", o], lambda x: o+x ]); return self
def __sub__(self, o): self._ab_steps.append([["__sub__", o], lambda x: x-o ]); return self
def __rsub__(self, o): self._ab_steps.append([["__rsub__", o], lambda x: o-x ]); return self
def __mul__(self, o): self._ab_steps.append([["__mul__", o], lambda x: x*o ]); return self
def __rmul__(self, o): self._ab_steps.append([["__rmul__", o], lambda x: o*x ]); return self
def __matmul__(self, o): self._ab_steps.append([["__matmul__", o], lambda x: x@o ]); return self
def __rmatmul__(self, o): self._ab_steps.append([["__rmatmul__", o], lambda x: o@x ]); return self
def __truediv__(self, o): self._ab_steps.append([["__truediv__", o], lambda x: x/o ]); return self
def __rtruediv__(self, o): self._ab_steps.append([["__rtruediv__", o], lambda x: o/x ]); return self
def __floordiv__(self, o): self._ab_steps.append([["__floordiv__", o], lambda x: x//o]); return self
def __rfloordiv__(self, o): self._ab_steps.append([["__rfloordiv__", o], lambda x: o//x]); return self
def __mod__(self, o): self._ab_steps.append([["__mod__", o], lambda x: x%o ]); return self
def __rmod__(self, o): self._ab_steps.append([["__rmod__", o], lambda x: o%x ]); return self
def __pow__(self, o): self._ab_steps.append([["__pow__", o], lambda x: x**o]); return self
def __rpow__(self, o): self._ab_steps.append([["__rpow__", o], lambda x: o**x]); return self
def __lshift__(self, o): self._ab_steps.append([["__lshift__", o], lambda x: x<<o]); return self
def __rlshift__(self, o): self._ab_steps.append([["__rlshift__", o], lambda x: o<<x]); return self
def __rshift__(self, o): self._ab_steps.append([["__rshift__", o], lambda x: x>>o]); return self
def __rrshift__(self, o): self._ab_steps.append([["__rrshift__", o], lambda x: o>>x]); return self
def __and__(self, o): self._ab_steps.append([["__and__", o], lambda x: x&o ]); return self
def __rand__(self, o): self._ab_steps.append([["__rand__", o], lambda x: o&x ]); return self
def __xor__(self, o): self._ab_steps.append([["__xor__", o], lambda x: x^o ]); return self
def __rxor__(self, o): self._ab_steps.append([["__rxor__", o], lambda x: o^x ]); return self
def __or__(self, o): self._ab_steps.append([["__or__", o], lambda x: x|o ]); return self
[docs] def __ror__(self, o): self._ab_steps.append([["__ror__", o], lambda x: o|x ]); return self
def __lt__(self, o): self._ab_steps.append([["__lt__", o], lambda x: x<o ]); return self
def __le__(self, o): self._ab_steps.append([["__le__", o], lambda x: x<=o]); return self
def __eq__(self, o): self._ab_steps.append([["__eq__", o], lambda x: x==o]); return self
def __ne__(self, o): self._ab_steps.append([["__ne__", o], lambda x: x!=o]); return self
def __gt__(self, o): self._ab_steps.append([["__gt__", o], lambda x: x>o ]); return self
def __ge__(self, o): self._ab_steps.append([["__ge__", o], lambda x: x>=o]); return self
def __neg__(self): self._ab_steps.append([["__neg__"], lambda x: -x ]); return self
def __pos__(self): self._ab_steps.append([["__pos__"], lambda x: +x ]); return self
def __abs__(self): self._ab_steps.append([["__abs__"], lambda x: abs(x) ]); return self
[docs] def __invert__(self): self._ab_steps.append([["__invert__"], lambda x: ~x ]); return self
[docs] def ab_int(self):
"""Replacement for ``int(ab)``, as that requires returning an actual :class:`int`."""
self._ab_steps.append([["__int__"], lambda x: int(x) ]); return self
def __int__(self): return self.int()
[docs] def ab_float(self):
"""Replacement for ``float(ab)``, as that requires returning an actual :class:`float`."""
self._ab_steps.append([["__float__"], lambda x: float(x)]); return self
def __float__(self): return self.float()
[docs] def ab_str(self):
"""Replacement for ``str(ab)``, as that requires returning an actual :class:`str`."""
self._ab_steps.append([["__str__"], lambda x: str(x) ]); return self
[docs] def ab_len(self):
"""Replacement for ``len(ab)``, as that requires returning an actual :class:`int`."""
self._ab_steps.append([["__len__"], lambda x: len(x) ]); return self
[docs] def ab_contains(self, key):
"""Replacement for ``key in ab``, as that requires returning an actual :class:`int`."""
self._ab_steps.append([["__contains__", key], lambda x: key in x]); return self
sep = "\u200b" # weird separator, guaranteed (mostly) to not appear anywhere in the
# settings, so that I can pretty print it
[docs]class Settings:
[docs] def __init__(self, **kwargs):
"""Creates a new settings object. Basically fancy version of :class:`dict`.
Example::
s = k1lib.Settings(a=3, b="42")
s.c = k1lib.Settings(d=8)
s.a # returns 3
s.b # returns "42"
s.c.d # returns 8
print(s) # prints nested settings nicely"""
self._setattr_sentinel = True
for k, v in kwargs.items(): setattr(self, k, v)
self._docs = dict(); self._cbs = dict()
self._setattr_sentinel = False
[docs] @contextlib.contextmanager
def context(self, **kwargs):
"""Context manager to temporarily modify some settings. Applies
to all sub-settings. Example::
s = k1lib.Settings(a=3, b="42", c=k1lib.Settings(d=8))
with s.context(a=4):
s.c.d = 20
s.a # returns 4
s.c.d # returns 20
s.a # returns 3
s.c.d # returns 8"""
oldValues = dict(self.__dict__); err = None
for k in kwargs.keys():
if k not in oldValues:
raise RuntimeError(f"'{k}' settings not found!")
try:
with contextlib.ExitStack() as stack:
for _, sub in self._subSettings():
stack.enter_context(sub.context())
for k, v in kwargs.items(): setattr(self, k, v)
yield
finally:
for k, v in oldValues.items(): setattr(self, k, v)
[docs] def add(self, k:str, v:Any, docs:str="", cb:Callable[["Settings", Any], None]=None) -> "Settings":
"""Long way to add a variable. Advantage of this is that you can slip in extra
documentation for the variable. Example::
s = k1lib.Settings()
s.add("a", 3, "some docs")
print(s) # displays the extra docs
:param cb: callback that takes in (settings, new value) if any property changes"""
setattr(self, k, v); self._docs[k] = docs
self._cbs[k] = cb; return self
def _docsOf(self, k:str):
return f"{self._docs[k]}" if k in self._docs else ""
def _subSettings(self) -> List[Tuple[str, "Settings"]]:
return [(k, v) for k, v in self.__dict__.items() if isinstance(v, Settings) and not k.startswith("_")]
def _simpleSettings(self) -> List[Tuple[str, Any]]:
return [(k, v) for k, v in self.__dict__.items() if not isinstance(v, Settings) and not k.startswith("_")]
def __setattr__(self, k, v):
self.__dict__[k] = v
if k != "_setattr_sentinel" and not self._setattr_sentinel:
if k in self._cbs and self._cbs[k] is not None: self._cbs[k](self, v)
def __repr__(self):
"""``includeDocs`` mainly used internally when generating docs in sphinx."""
ks = list(k for k in self.__dict__ if not k.startswith("_"))
kSpace = max([1, *(ks | k1lib.cli.lengths())]); s = "Settings:\n"
for k, v in self._simpleSettings():
s += f"- {k.ljust(kSpace)} = {k1lib.limitChars(str(v), settings.displayCutoff)}{sep}{self._docsOf(k)}\n"
for k, v in self._subSettings():
sub = v.__repr__().split("\n")[1:-1] | k1lib.cli.tab(" ") | k1lib.cli.join("\n")
s += f"- {k.ljust(kSpace)} = <Settings>{sep}{self._docsOf(k)}\n" + sub + "\n"
return s.split("\n") | k1lib.cli.op().split(sep).all() | k1lib.cli.pretty(sep) | k1lib.cli.join("\n")
_settings = Settings().add("test", Settings().add("bio", True, "whether to test bioinformatics clis that involve strange command line tools like samtools and bwa"))
settings = Settings().add("displayCutoff", 50, "cutoff length when displaying a Settings object")
settings.add("svgScale", 0.7, "default svg scales for clis that displays graphviz graphs")
def _cb_wd(s, p):
if p != None: p = os.path.abspath(os.path.expanduser(p)); _oschdir(p)
s.__dict__["wd"] = p
def oschdir(path): settings.wd = path
_oschdir = os.chdir; os.chdir = oschdir; os.chdir.__doc__ = _oschdir.__doc__
settings.add("wd", os.getcwd(), "default working directory, will get from `os.getcwd()`. Will update using `os.chdir()` automatically when changed", _cb_wd)
settings.add("cancelRun_newLine", True, "whether to add a new line character at the end of the cancel run/epoch/batch message")
startup = Settings().add("or_patch", True, "whether to remove __or__() method from numpy array and pandas data frame and series. This would make cli operations with them a lot more pleasant, but also means you have to convert numpy floats to normal floats before doing a bitwise or to it")
settings.add("startup", startup, "these settings have to be applied like this: `import k1lib; k1lib.settings.startup.or_patch = False; from k1lib.imports import *` to ensure that the values are set")
def sign(v): return 1 if v > 0 else -1
def roundOff(a, b):
m = (a + b) / 2
return m
dec = math.log10(abs(a-m)+1e-7) # decimal place
factor = 10**(sign(dec) * math.floor(abs(dec)+1e-7)+1)
return factor*round(m/factor)
def toPrecision(num, sig=1):
if num == 0: return 0
s = sign(num); num = abs(num)
fac = 10**(-math.floor(math.log10(num))+sig-1)
return s*round(num*fac)/fac
def niceUS(mean, std):
if std < 1e-12: return mean, std
pres = 2 if std/10**math.floor(math.log10(std)) < 2 else 1
std = toPrecision(std, pres)
fac = 10**(-math.floor(math.log10(std))+pres-1)
return round(mean*fac)/fac, std
def removeOutliers(t, fraction=0.01):
b = int(len(t)*fraction/2)
return t.sort().values[b:-b]
def _US(v): return [*v] if isinstance(v, UValue) else [v, 0]
[docs]class UValue:
_unit = torch.randn(2, 5, 100000)
[docs] def __init__(self, mean=0, std=1):
"""Creates a new "uncertain value", which has a mean and a standard
deviation. You can then do math operations on them as normal, and the
propagation errors will be automatically calculated for you. Make sure to
run the calculation multiple times as the mean and std values fluctuates by
a little run-by-run. Example::
# returns UValue(mean=4.7117, std=3.4736) object
abs(k1lib.UValue() * 5 + 3)
You can also instantiate from an existing list/numpy array/pytorch tensor::
# returns UValue(mean=24.5, std=14.58) object
k1lib.UValue.fromSeries(range(50))
You can also do arbitrary complex math operations::
# returns UValue(mean=0.5544, std=0.4871)
(20 + k1lib.UValue()).f(np.sin)
# same as above, but takes longer to run!
(20 + k1lib.UValue()).f(math.sin)
I suggest you to make your arbitrary function out of numpy's operations,
as those are a fair bit faster than regular Python.
If you have a list of :class:`UValue`, and want to plot them with error
bars, then you can do something like this::
x = np.linspace(0, 6)
y = list(np.sin(x)*10) | apply(k1lib.UValue) | toList()
plt.errorbar(x, *(y | transpose()));
There are several caveats however:
.. note::
First is the problem of theoretically vs actually sample a
distribution. Let's see an example::
# returns theoretical value UValue(mean=8000.0, std=1200.0) -> 8000.0 ± 1200.0
k1lib.UValue(20) ** 3
# prints out actual mean and std value of (8064.1030, 1204.3529)
a = k1lib.UValue(20).sample() ** 3
print(a.mean(), a.std())
So far so good. However, let's create some uncertainty in "3"::
# returns theoretical value UValue(mean=8000.0, std=23996.0) -> 10000.0 ± 20000.0
k1lib.UValue(20) ** k1lib.UValue(3)
# prints out actual mean and std value of (815302.8750, 27068828.), but is very unstable and changes a lot
a = k1lib.UValue(20).sample() ** k1lib.UValue(3).sample()
print(a.mean(), a.std())
Woah, what happens here? The actual mean and std values are
completely different from the theoretical values. This is
mainly due to UValue(3) has some outlier values large enough
to boost the result up multiple times. Even removing 1% of
values on either end of the spectrum does not quite work. So,
becareful to interpret these uncertainty values, and in some
case the theoretical estimates from math are actually very
unstable and will not be observed in real life.
.. note::
Then there's the problem of each complex operation, say ``(v*2+3)/5``
will be done step by step, meaning ``a=v*2`` mean and std will be
calculated first, then ignoring the calculated sample values and just
go with the mean and std, sample a bunch of values from there and calculate
``a+3`` mean and std. Rinse and repeat. This means that these 2 statements
may differ by a lot::
# prints out (0.15867302766786406, 0.12413313456900205)
x = np.linspace(-3, 3, 1000); sq = (abs(x)-0.5)**2; y = sq*np.exp(-sq)
print(y.mean(), y.std())
# returns UValue(mean=0.081577, std=0.32757) -> 0.1 ± 0.3
x = k1lib.UValue(0, 1); sq = (abs(x)-0.5)**2; y = sq*(-sq).f(np.exp)
Why this weird function? It converts from a single nice hump into multiple
complex humps. Anyway, this serves to demonstrate that the result from the
``calculate -> get mean, std -> sample from new distribution -> calculate``
process might be different from just calculating from start to end and then
get the mean and std.
.. note::
Lastly, you might have problems when using the same UValue multiple times in
an expression::
a = UValue(10, 1)
a * 2 # has mean 20, std 2
a + a # has mean 20, std 1.4"""
if isinstance(mean, torch.Tensor): mean = mean.item()
if isinstance(std, torch.Tensor): std = std.item()
self.mean = mean; self.std = std
@staticmethod
def _sample(mean, std, n=None, _class=0):
t = UValue._unit[_class, random.randint(0, 4)]
if n is not None: t = t[:n]
return t * std + mean
[docs] def sample(self, n=100, _class=0):
"""Gets a sample :class:`torch.Tensor` representative of this
uncertain value. Example::
# returns tensor([-5.1095, 3.3117, -2.5759, ..., -2.5810, -1.8131, 1.8339])
(k1lib.UValue() * 5).sample()"""
return UValue._sample(*self, n, _class)
[docs] @staticmethod
def fromSeries(series, unbiased=True):
"""Creates a :class:`UValue` from a bunch of numbers
:param series: can be a list of numbers, numpy array or PyTorch tensor
:param unbiased: if True, Bessel’s correction will be used"""
if isinstance(series, np.ndarray):
series = torch.tensor(series)
elif not isinstance(series, torch.Tensor):
series = torch.tensor(list(series))
series = series * 1.0
return UValue(series.mean(), series.std(unbiased=unbiased))
[docs] @staticmethod
def fromBounds(min_, max_):
"""Creates a :class:`UValue` from min and max values.
Example::
# returns UValue(mean=2.5, std=0.5)
k1lib.UValue.fromBounds(2, 3)"""
mid = (min_ + max_)/2
return k1lib.UValue(mid, abs(max_-mid))
def __iter__(self): yield self.mean; yield self.std
def _niceValue(self, v, _class=0):
if isinstance(v, UValue): return [UValue._sample(*v, None, _class), UValue._sample(*v, None, _class)]
return [UValue._sample(v, 0, None, _class), UValue._sample(v, 0, None, _class)]
def _postProcess(self, c1, c2):
if c1.hasNan() or c2.hasNan():
warnings.warn("Calculations has NaN values. They will be replaced with 0, which can affect accuracy of mean and std calculations")
c1.clearNan(); c2.clearNan()
c1 = removeOutliers(c1); c2 = removeOutliers(c2);
return UValue(roundOff(c1.mean().item(), c2.mean().item()), roundOff(c1.std().item(), c2.std().item()))
@property
def exact(self):
"""Whether this UValue is exact or not"""
return self.std == 0
@staticmethod
def _isValueExact(v):
if isinstance(v, UValue): return v.exact
try: len(v); return False
except: return True
@staticmethod
def _value(v): # gets mean value
if isinstance(v, UValue): return v.mean
try: len(v); raise RuntimeError("Can't convert a series into an exact value")
except: return v
[docs] def test(self, v):
"""Returns how many sigma a particular value is."""
return (v-self.mean)/self.std
[docs] def f(self, func):
"""Covered in :meth:`__init__` docs"""
if self.exact: return UValue(func(self.mean), 0)
f = func; a1, a2 = self._niceValue(self)
try: return self._postProcess(f(a1), f(a2))
except:
f = lambda xs: torch.tensor([func(x) for x in xs[:10000]])
return self._postProcess(f(a1), f(a2))
[docs] def bounds(self):
"""Returns (mean-std, mean+std)"""
return self.mean - self.std, self.mean + self.std
def _op2(self, func, a, b):
if UValue._isValueExact(a) and UValue._isValueExact(b):
return UValue(func(UValue._value(a), UValue._value(b)), 0)
f = func; a1, a2 = self._niceValue(a, 0); b1, b2 = self._niceValue(b, 1)
try: return self._postProcess(f(a1, b1), f(a2, b2))
except:
f = lambda xs, ys: torch.tensor([func(x, y).item() for x, y in zip(xs[:10000], ys[:10000])])
return self._postProcess(f(a1, b1), f(a2, b2))
[docs] @staticmethod
def combine(*values, samples=1000):
"""Combines multiple UValues into 1.
Example::
a = k1lib.UValue(5, 1)
b = k1lib.UValue(7, 1)
# both returns 6.0 ± 1.4
k1lib.UValue.combine(a, b)
[a, b] | k1lib.UValue.combine()
This will sample each UValue by default 1000 times, put them into a
single series and get a UValue from that. Why not just take the
average instead? Because the standard deviation will be less, and
will not actually reflect the action of combining UValues together::
# returns 6.0 ± 0.7, which is narrower than expected
(a + b) / 2"""
if len(values) == 0: return ~k1lib.cli.aS(UValue.combine)
return UValue.fromSeries(torch.cat([v.sample(1000) for v in values]))
def __add__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m1+m2, math.sqrt(s1**2 + s2**2))
return self._op2(lambda a, b: a+b, v, self) # representative of how this would work stochastically
def __radd__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m1+m2, math.sqrt(s1**2 + s2**2))
def __sub__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m1-m2, math.sqrt(s1**2 + s2**2))
def __rsub__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m2-m1, math.sqrt(s1**2 + s2**2))
def __mul__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m1*m2, math.sqrt(m2**2*s1**2 + m1**2*s2**2))
def __rmul__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m1*m2, math.sqrt(m2**2*s1**2 + m1**2*s2**2))
def __truediv__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v)
return UValue(m1/m2, math.sqrt(1/m2**2*s1**2 + m1**2/m2**4*s2**2))
def __rtruediv__(self, v):
m1, s1 = _US(v); m2, s2 = _US(self)
return UValue(m1/m2, math.sqrt(1/m2**2*s1**2 + m1**2/m2**4*s2**2))
def __pow__(self, v):
m1, s1 = _US(self); m2, s2 = _US(v); m = m1**m2
return UValue(m, math.sqrt((m2*m/m1)**2*s1**2 + (math.log(m1)*m)**2*s2**2))
def __rpow__(self, v):
m1, s1 = _US(v); m2, s2 = _US(self); m = m1**m2
return UValue(m, math.sqrt((m2*m/m1)**2*s1**2 + (math.log(m1)*m)**2*s2**2))
def __abs__(self): return self.f(lambda a: abs(a)) # can't convert to pure math that makes sense
def __neg__(self): return 0 - self
def __repr__(self):
mean, std = niceUS(self.mean, self.std)
return f"UValue(mean={toPrecision(self.mean, 5)}, std={toPrecision(self.std, 5)}) -> {mean} ± {std}"
[docs] def plot(self, name=None):
"""Quickly plots a histogram of the distribution.
Possible to plot multiple histograms in 1 plot."""
plt.hist(self.sample(None).numpy(), bins=100, alpha=0.7, label=name)
if name != None: plt.legend()