Source code for k1lib.cli.conv

# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
"""
This is for all short utilities that converts from 1 data type to another. They
might feel they have different styles, as :class:`toFloat` converts object iterator to
float iterator, while :class:`toPIL` converts single image url to single PIL image,
whereas :class:`toSum` converts float iterator into a single float value.

The general convention is, if the intended operation sounds simple (convert to floats,
strings, types, ...), then most likely it will convert iterator to iterator, as you
can always use the function directly if you only want to apply it on 1 object.

If it sounds complicated (convert to PIL image, tensor, ...) then most likely it will
convert object to object. Lastly, there are some that just feels right to input
an iterator and output a single object (like getting max, min, std, mean values)."""
__all__ = ["toTensor", "toRange", "toList",
           "toSum", "toProd", "toAvg", "toMean", "toStd", "toMax", "toMin", "toPIL", "toImg",
           "toRgb", "toRgba", "toGray", "toDict",
           "toFloat", "toInt", "toBytes", "toHtml", "toAscii", "toHash", "toCsv"]
import re, k1lib, math, os, numpy as np, io, base64, unicodedata
from k1lib.cli.init import BaseCli, T, yieldT; import k1lib.cli as cli
from k1lib.cli.typehint import *; import matplotlib as mpl; import matplotlib.pyplot as plt
from collections import deque, defaultdict; from typing import Iterator, Any, List, Set, Tuple, Dict, Callable, Union
settings = k1lib.settings.cli
try: import PIL; hasPIL = True
except: hasPIL = False
try: import torch; hasTorch = True
except: torch = k1lib.dep("torch"); hasTorch = False
try: import rdkit; hasRdkit = True
except: hasRdkit = False
try: import graphviz; hasGraphviz = True
except: hasGraphviz = False
try: import plotly; import plotly.express as px; hasPlotly = True
except: hasPlotly = False
[docs]class toTensor(BaseCli): # toTensor
[docs] def __init__(self, dtype=None): # toTensor """Converts generator to :class:`torch.Tensor`. Essentially ``torch.tensor(list(it))``. Default dtype is float32 Also checks if input is a PIL Image. If yes, turn it into a :class:`torch.Tensor` and return.""" # toTensor self.dtype = dtype or torch.float32 # toTensor
[docs] def __ror__(self, it:Iterator[float]) -> "torch.Tensor": # toTensor try: # toTensor import PIL; pic=it # toTensor if isinstance(pic, PIL.Image.Image): # stolen from torchvision ToTensor transform # toTensor mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32} # toTensor img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) # toTensor if pic.mode == '1': img = 255 * img # toTensor img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) # toTensor return img.permute((2, 0, 1)).contiguous().to(self.dtype) # put it from HWC to CHW format # toTensor except: pass # toTensor if isinstance(it, np.ndarray): return torch.tensor(it).to(self.dtype) # toTensor return torch.tensor(list(it)).to(self.dtype) # toTensor
[docs]class toList(BaseCli): # this still exists cause some LLVM optimizations are done on this, and too tired to change that at the moment # toList
[docs] def __init__(self): # toList """Converts generator to list. Example:: # returns [0, 1, 2, 3, 4] range(5) | toList() # returns [0, 1, 2, 3, 4] range(5) | aS(list) So this cli is sort of outdated. It still works fine, nothing wrong with it, but just do ``aS(list)`` instead. It's not removed to avoid breaking old projects.""" # toList super().__init__() # toList
# toList def _typehint(self, inp): # toList if isinstance(inp, tListIterSet): return tList(inp.child) # toList if isinstance(inp, tCollection): return inp # toList return tList(tAny()) # toList
[docs] def __ror__(self, it:Iterator[Any]) -> List[Any]: return list(it) # toList
def _toRange(it): # _toRange for i, _ in enumerate(it): yield i # _toRange
[docs]class toRange(BaseCli): # toRange
[docs] def __init__(self): # toRange """Returns iter(range(len(it))), effectively. Example:: # returns [0, 1, 2] [3, 2, 5] | toRange() | deref()""" # toRange super().__init__() # toRange
[docs] def __ror__(self, it:Iterator[Any]) -> Iterator[int]: # toRange try: return range(len(it)) # toRange except: return _toRange(it) # toRange
tOpt.addPass(lambda cs, ts, _: [cs[0]], [toRange, toRange]) # toRange settings.add("arrayTypes", (torch.Tensor, np.ndarray) if hasTorch else (np.ndarray,), "default array types used to accelerate clis") # toRange def genericTypeHint(inp): # genericTypeHint if isinstance(inp, tListIterSet): return inp.child # genericTypeHint if isinstance(inp, tCollection): return inp.children[0] # genericTypeHint if isinstance(inp, tArrayTypes): return inp.child # genericTypeHint return tAny() # genericTypeHint
[docs]class toSum(BaseCli): # toSum
[docs] def __init__(self): # toSum """Calculates the sum of list of numbers. Can pipe in :class:`torch.Tensor` or :class:`numpy.ndarray`. Example:: # returns 45 range(10) | toSum()""" # toSum super().__init__() # toSum
def _all_array_opt(self, it, level): # toSum bm = np if isinstance(it, np.ndarray) else (torch if hasTorch and isinstance(it, torch.Tensor) else None) # toSum return NotImplemented if bm is None else bm.sum(it, tuple(range(level, len(it.shape)))) # toSum def _typehint(self, inp): return genericTypeHint(inp) # toSum
[docs] def __ror__(self, it:Iterator[float]): # toSum if isinstance(it, settings.arrayTypes): return it.sum() # toSum return sum(it) # toSum
[docs]class toProd(BaseCli): # toProd
[docs] def __init__(self): # toProd """Calculates the product of a list of numbers. Can pipe in :class:`torch.Tensor` or :class:`numpy.ndarray`. Example:: # returns 362880 range(1,10) | toProd()""" # toProd super().__init__() # toProd
def _all_array_opt(self, it, level): # toProd if isinstance(it, np.ndarray): return np.prod(it, tuple(range(level, len(it.shape)))) # toProd elif hasTorch and isinstance(it, torch.Tensor): # toProd for i in range(level, len(it.shape)): it = torch.prod(it, level) # toProd return it # toProd return NotImplemented # toProd def _typehint(self, inp): return genericTypeHint(inp) # toProd
[docs] def __ror__(self, it): # toProd if isinstance(it, settings.arrayTypes): return it.prod() # toProd else: return math.prod(it) # toProd
[docs]class toAvg(BaseCli): # toAvg
[docs] def __init__(self): # toAvg """Calculates average of list of numbers. Can pipe in :class:`torch.Tensor` or :class:`numpy.ndarray`. Example:: # returns 4.5 range(10) | toAvg() # returns nan [] | toAvg()""" # toAvg super().__init__() # toAvg
def _all_array_opt(self, it, level): # toAvg bm = np if isinstance(it, np.ndarray) else (torch if hasTorch and isinstance(it, torch.Tensor) else None) # toAvg return NotImplemented if bm is None else bm.mean(it, tuple(range(level, len(it.shape)))) # toAvg def _typehint(self, inp): # toAvg i = None # toAvg if isinstance(inp, tListIterSet): i = inp.child # toAvg if isinstance(inp, tCollection): i = inp.children[0] # toAvg if isinstance(inp, tArrayTypes): i = inp.child # toAvg if i is not None: return float if i == int else i # toAvg return tAny() # toAvg
[docs] def __ror__(self, it:Iterator[float]): # toAvg if isinstance(it, settings.arrayTypes): return it.mean() # toAvg s = 0; i = -1 # toAvg for i, v in enumerate(it): s += v # toAvg i += 1 # toAvg if not k1lib.settings.cli.strict and i == 0: return float("nan") # toAvg return s / i # toAvg
if hasTorch: # toAvg torchVer = int(torch.__version__.split(".")[0]) # toAvg if torchVer >= 2: # toAvg def torchStd(it, ddof, dim=None): return torch.std(it, dim, correction=ddof) # toAvg else: # toAvg def torchStd(it, ddof, dim=None): # toAvg if ddof == 0: return torch.std(it, dim, unbiased=False) # toAvg if ddof == 1: return torch.std(it, dim, unbiased=True) # toAvg raise Exception(f"Please install PyTorch 2, as version 1 don't support correction factor of {ddof}") # toAvg else: # toAvg def torchStd(it, ddof): raise Exception("PyTorch not installed") # toAvg
[docs]class toStd(BaseCli): # toStd
[docs] def __init__(self, ddof:int=0): # toStd """Calculates standard deviation of list of numbers. Can pipe in :class:`torch.Tensor` or :class:`numpy.ndarray` to be faster. Example:: # returns 2.8722813232690143 range(10) | toStd() # returns nan [] | toStd() :param ddof: "delta degree of freedom". The divisor used in calculations is ``N - ddof``""" # toStd self.ddof = ddof # toStd
def _all_array_opt(self, it, level): # toStd n = len(it.shape); ddof = self.ddof; dim = tuple(range(level, n)) # toStd if isinstance(it, np.ndarray): return np.std(it, ddof=ddof, axis=dim) # toStd elif hasTorch and isinstance(it, torch.Tensor): return torchStd(it, ddof, dim) # toStd return NotImplemented # toStd
[docs] def __ror__(self, it): # toStd ddof = self.ddof # toStd if isinstance(it, settings.arrayTypes): # toStd if isinstance(it, np.ndarray): return np.std(it, ddof=ddof) # toStd elif hasTorch and isinstance(it, torch.Tensor): return torchStd(it, ddof) # toStd return np.std(np.array(list(it))) # toStd
toMean = toAvg # toStd
[docs]class toMax(BaseCli): # toMax
[docs] def __init__(self): # toMax """Calculates the max of a bunch of numbers. Can pipe in :class:`torch.Tensor` or :class:`numpy.ndarray`. Example:: # returns 6 [2, 5, 6, 1, 2] | toMax()""" # toMax super().__init__() # toMax
def _all_array_opt(self, it, level): # toMax if isinstance(it, np.ndarray): return np.max(it, tuple(range(level, len(it.shape)))) # toMax elif hasTorch and isinstance(it, torch.Tensor): # toMax for i in range(level, len(it.shape)): it = torch.max(it, level)[0] # toMax return it # toMax return NotImplemented # toMax
[docs] def __ror__(self, it:Iterator[float]) -> float: # toMax if isinstance(it, settings.arrayTypes): return it.max() # toMax return max(it) # toMax
[docs]class toMin(BaseCli): # toMin
[docs] def __init__(self): # toMin """Calculates the min of a bunch of numbers. Can pipe in :class:`torch.Tensor` or :class:`numpy.ndarray`. Example:: # returns 1 [2, 5, 6, 1, 2] | toMin()""" # toMin super().__init__() # toMin
def _all_array_opt(self, it, level): # toMin if isinstance(it, np.ndarray): return np.min(it, tuple(range(level, len(it.shape)))) # toMin elif hasTorch and isinstance(it, torch.Tensor): # toMin for i in range(level, len(it.shape)): it = torch.min(it, level)[0] # toMin return it # toMin return NotImplemented # toMin
[docs] def __ror__(self, it:Iterator[float]) -> float: # toMin if isinstance(it, settings.arrayTypes): return it.min() # toMin return min(it) # toMin
settings.add("font", None, "default font file. Best to use .ttf files, used by toPIL()") # toMin settings.add("chem", k1lib.Settings().add("imgSize", 200, "default image size used in toPIL() when drawing rdkit molecules"), "chemistry-related settings") # toMin def cropToContentNp(ogIm, pad=10): # cropToContentNp dim = len(ogIm.shape); im = ogIm # cropToContentNp if dim > 2: im = im.mean(0) # cropToContentNp coords = np.argwhere(im.max()-im); x_min, y_min = coords.min(axis=0); x_max, y_max = coords.max(axis=0) # cropToContentNp return ogIm[x_min-pad:x_max+1+pad, y_min-pad:y_max+1+pad] if dim == 2 else ogIm[:,x_min-pad:x_max+1+pad, y_min-pad:y_max+1+pad] # cropToContentNp def cropToContentPIL(im, pad=0): # cropToContentPIL im = im | toTensor(int) | cli.op().numpy() | cli.aS(cropToContentNp, pad) # cropToContentPIL return torch.from_numpy(im).permute(1, 2, 0) | toImg() if len(im.shape) > 2 else im | toImg() # cropToContentPIL
[docs]class toPIL(BaseCli): # toPIL
[docs] def __init__(self, closeFig=True, crop=True): # toPIL """Converts multiple data types into a PIL image. Example:: # grabs first image in the current folder ls(".") | toPIL().all() | item() # converts from tensor/array to image torch.randn(100, 200) | toPIL() # grabs image, converts to byte stream, and converts back to image "abc.jpg" | toPIL() | toBytes() | toPIL() # converts paragraphs to image ["abc", "def"] | toPIL() # converts SMILES string to molecule, then to image "c1ccc(C)cc1" | toMol() | toImg() You can also save a matplotlib figure by piping in a :class:`matplotlib.figure.Figure` object:: x = np.linspace(0, 4) plt.plot(x, x**2) plt.gcf() | toPIL() .. note:: If you are working with image tensors, which is typically have dimensions of (C, H, W), you have to permute it to PIL's (H, W, C) first before passing it into this cli. Also it's expected that your tensor image ranges from 0-255, and not 0-1. Make sure you renormalize it :param closeFig: if input is a matplotlib figure, then closes the figure after generating the image :param crop: whether to crop white spaces around an image or not""" # toPIL import PIL; self.PIL = PIL; self.closeFig = closeFig; self.crop = crop # toPIL
def _typehint(self, inp): # toPIL return PIL.Image.Image # toPIL
[docs] def __ror__(self, path) -> "PIL.Image.Image": # toPIL if isinstance(path, str): # toPIL return self.PIL.Image.open(os.path.expanduser(path)) # toPIL if isinstance(path, bytes): # toPIL return self.PIL.Image.open(io.BytesIO(path)) # toPIL if isinstance(path, torch.Tensor): path = path.numpy() # toPIL if isinstance(path, np.ndarray): # toPIL return self.PIL.Image.fromarray(path.astype("uint8")) # toPIL if isinstance(path, mpl.figure.Figure): # toPIL canvas = path.canvas; canvas.draw() # toPIL img = self.PIL.Image.frombytes('RGB', canvas.get_width_height(), canvas.tostring_rgb()) # toPIL if self.closeFig: plt.close(path) # toPIL return img | cli.aS(cropToContentPIL) # toPIL if isinstance(path, graphviz.Digraph): # toPIL import tempfile; a = tempfile.NamedTemporaryFile() # toPIL path.render(a.name, format="jpeg"); # toPIL fn = f"{a.name}.jpeg"; im = fn | toImg() # toPIL try: os.remove(fn) # toPIL except: pass # toPIL return im # toPIL if hasRdkit and isinstance(path, rdkit.Chem.rdchem.Mol): # toPIL sz = settings.chem.imgSize # toPIL return self.__ror__(rdkit.Chem.Draw.MolsToGridImage([path], subImgSize=[sz, sz]).data) | (cli.aS(cropToContentPIL) if self.crop else cli.iden()) # toPIL path = path | cli.deref() # toPIL if len(path) > 0 and isinstance(path[0], str): # toPIL from PIL import ImageDraw # toPIL h = path | cli.shape(0); w = path | cli.shape(0).all() | cli.aS(max) # toPIL image = self.PIL.Image.new("L", ((w+1)*20, (h+1)*60), 255) # toPIL font = PIL.ImageFont.truetype(settings.font, 18) if settings.font else None # toPIL ImageDraw.Draw(image).text((20, 20), path | cli.join("\n"), 0, font=font) # toPIL return np.array(image)/255 | (cli.aS(cropToContentNp) if self.crop else iden()) | cli.op()*255 | toImg() # toPIL return NotImplemented # toPIL
toImg = toPIL # toPIL
[docs]class toRgb(BaseCli): # toRgb
[docs] def __init__(self): # toRgb """Converts greyscale/rgb PIL image to rgb image. Example:: # reads image file and converts it to rgb "a.png" | toPIL() | toRgb()""" # toRgb import PIL; self.PIL = PIL # toRgb
def _typehint(self, inp): return inp # toRgb
[docs] def __ror__(self, i): # toRgb if i.getbands() == ("R", "G", "B"): return i # toRgb rgbI = self.PIL.Image.new("RGB", i.size) # toRgb rgbI.paste(i); return rgbI # toRgb
[docs]class toRgba(BaseCli): # toRgba
[docs] def __init__(self): # toRgba """Converts random PIL image to rgba image. Example:: # reads image file and converts it to rgba "a.png" | toPIL() | toRgba()""" # toRgba import PIL; self.PIL = PIL # toRgba
def _typehint(self, inp): return inp # toRgba
[docs] def __ror__(self, i): # toRgba if i.getbands() == ("R", "G", "B", "A"): return i # toRgba rgbI = self.PIL.Image.new("RGBA", i.size) # toRgba rgbI.paste(i); return rgbI # toRgba
[docs]class toGray(BaseCli): # toGray
[docs] def __init__(self): # toGray """Converts random PIL image to a grayscale image. Example:: # reads image file and converts it to rgba "a.png" | toPIL() | toGray()""" # toGray import PIL; self.PIL = PIL # toGray
def _typehint(self, inp): return inp # toGray
[docs] def __ror__(self, i): # toGray if i.getbands() == ("L"): return i # toGray return self.PIL.ImageOps.grayscale(i) # toGray
[docs]class toDict(BaseCli): # toDict
[docs] def __init__(self, rows=True, f=None): # toDict """Converts 2 Iterators, 1 key, 1 value into a dictionary. Example:: # returns {1: 3, 2: 4} [[1, 3], [2, 4]] | toDict() # returns {1: 3, 2: 4} [[1, 2], [3, 4]] | toDict(False) If ``rows`` is a string, then it will build a dictionary from key-value pairs delimited by this character. For example:: ['gene_id "ENSG00000290825.1"', 'transcript_id "ENST00000456328.2"', 'gene_type "lncRNA"', 'gene_name "DDX11L2"', 'transcript_type "lncRNA"', 'transcript_name "DDX11L2-202"', 'level 2', 'transcript_support_level "1"', 'tag "basic"', 'tag "Ensembl_canonical"', 'havana_transcript "OTTHUMT00000362751.1"'] | toDict(" ") That returns:: {'gene_id': '"ENSG00000290825.1"', 'transcript_id': '"ENST00000456328.2"', 'gene_type': '"lncRNA"', 'gene_name': '"DDX11L2"', 'transcript_type': '"lncRNA"', 'transcript_name': '"DDX11L2-202"', 'level': '2', 'transcript_support_level': '"1"', 'tag': '"Ensembl_canonical"', 'havana_transcript': '"OTTHUMT00000362751.1"'} :param rows: if True, reads input in row by row, else reads in list of columns :param f: if specified, return a defaultdict that uses this function as its generator""" # toDict self.rows = rows # toDict if f is not None: self.f = lambda d: defaultdict(f, d) # toDict else: self.f = lambda x: x # toDict
[docs] def __ror__(self, it:Tuple[Iterator[T], Iterator[T]]) -> dict: # toDict r = self.rows; f = self.f # toDict if r: # toDict if isinstance(r, str): return it | cli.apply(cli.aS(lambda x: x.split(" ")) | cli.head(1).split() | cli.item() + cli.join(" ")) | toDict() # toDict return f({_k:_v for _k, _v in it}) # toDict return f({_k:_v for _k, _v in zip(*it)}) # toDict
def _toop(toOp, c, force, defaultValue): # _toop return cli.apply(toOp, c) | (cli.apply(lambda x: x or defaultValue, c) if force else cli.filt(cli.op() != None, c)) # _toop def _toFloat(e) -> Union[float, None]: # _toFloat try: return float(e) # _toFloat except: return None # _toFloat
[docs]class toFloat(BaseCli): # toFloat
[docs] def __init__(self, *columns, mode=2): # toFloat """Converts every row into a float. Example:: # returns [1, 3, -2.3] ["1", "3", "-2.3"] | toFloat() | deref() # returns [[1.0, 'a'], [2.3, 'b'], [8.0, 'c']] [["1", "a"], ["2.3", "b"], [8, "c"]] | toFloat(0) | deref() With weird rows:: # returns [[1.0, 'a'], [8.0, 'c']] [["1", "a"], ["c", "b"], [8, "c"]] | toFloat(0) | deref() # returns [[1.0, 'a'], [0.0, 'b'], [8.0, 'c']] [["1", "a"], ["c", "b"], [8, "c"]] | toFloat(0, force=True) | deref() This also works well with :class:`torch.Tensor` and :class:`numpy.ndarray`, as they will not be broken up into an iterator:: # returns a numpy array, instead of an iterator np.array(range(10)) | toFloat() :param columns: if nothing, then will convert each row. If available, then convert all the specified columns :param mode: different conversion styles - 0: simple ``float()`` function, fastest, but will throw errors if it can't be parsed - 1: if there are errors, then replace it with zero - 2: if there are errors, then eliminate the row""" # toFloat self.columns = columns; self.mode = mode; # toFloat
[docs] def __ror__(self, it): # toFloat columns = self.columns; mode = self.mode # toFloat if len(columns) == 0: # toFloat if isinstance(it, np.ndarray): return it.astype(float) # toFloat if isinstance(it, torch.Tensor): return it.float() # toFloat if mode == 0: return it | cli.apply(float) # toFloat return it | _toop(_toFloat, None, mode == 1, 0.0) # toFloat else: return it | cli.init.serial(*(_toop(_toFloat, c, mode == 1, 0.0) for c in columns)) # toFloat
def _toInt(e) -> Union[int, None]: # _toInt try: return int(float(e)) # _toInt except: return None # _toInt
[docs]class toInt(BaseCli): # toInt
[docs] def __init__(self, *columns, mode=2): # toInt """Converts every row into an integer. Example:: # returns [1, 3, -2] ["1", "3", "-2.3"] | toInt() | deref() :param columns: if nothing, then will convert each row. If available, then convert all the specified columns :param mode: different conversion styles - 0: simple ``float()`` function, fastest, but will throw errors if it can't be parsed - 1: if there are errors, then replace it with zero - 2: if there are errors, then eliminate the row See also: :meth:`toFloat`""" # toInt self.columns = columns; self.mode = mode; # toInt
[docs] def __ror__(self, it): # toInt columns = self.columns; mode = self.mode # toInt if len(columns) == 0: # toInt if isinstance(it, np.ndarray): return it.astype(int) # toInt if isinstance(it, torch.Tensor): return it.int() # toInt if mode == 0: return it | cli.apply(int) # toInt return it | _toop(_toInt, None, mode == 1, 0.0) # toInt else: return it | cli.init.serial(*(_toop(_toInt, c, mode == 1, 0.0) for c in columns)) # toInt
[docs]class toBytes(BaseCli): # toBytes
[docs] def __init__(self, imgType="JPEG"): # toBytes """Converts several object types to bytes. Example:: # converts string to bytes "abc" | toBytes() # converts image to base64 bytes torch.randn(200, 100) | toImg() | toBytes() .. admonition:: Custom datatype It is possible to build objects that can interoperate with this cli, like this:: class custom1: def __init__(self, config=None): ... def _toBytes(self): return b"abc" custom1() | toBytes() # returns b"abc" When called upon, :class:`toBytes` will detect that the input has the ``_toBytes`` method, which will prompt it to execute that method of the complex object. Of course, this means that you can return anything, not necessarily bytes, but to maintain intuitiveness, you should return either bytes or iterator of bytes :param imgType: if input is an image then this is the image type. Can change to "PNG" or sth like that""" # toBytes self.imgType = imgType # toBytes
[docs] def __ror__(self, it): # toBytes if isinstance(it, str): return it.encode() # toBytes if hasPIL: # toBytes if isinstance(it, PIL.Image.Image): # toBytes it = it | toRgb(); buffered = io.BytesIO() # toBytes it.save(buffered, format=self.imgType); return buffered.getvalue() # toBytes if hasattr(it, "_toBytes"): return it._toBytes() # toBytes import dill; return dill.dumps(it) # toBytes
[docs]class toHtml(BaseCli): # toHtml
[docs] def __init__(self): # toHtml """Converts several object types to bytes. Example:: # converts PIL image to html <img> tag torch.randn(200, 100) | toImg() | toHtml() """ # toHtml pass # toHtml
[docs] def __ror__(self, it): # toHtml if hasPIL: # toHtml if isinstance(it, PIL.Image.Image): # toHtml it = it | toBytes(imgType="PNG") | cli.aS(base64.b64encode) | cli.op().decode() # toHtml return f"<img src=\"data:image/png;base64, {it}\" />" # toHtml if hasPlotly: # toHtml if isinstance(it, plotly.graph_objs._figure.Figure): # toHtml out = io.StringIO(); it.write_html(out); out.seek(0); return out.read() # toHtml try: return it._repr_html_() # toHtml except: return it.__repr__() # toHtml
try: # toHtml from rdkit import Chem # toHtml from rdkit.Chem import Draw # toHtml from rdkit.Chem import AllChem # toHtml from rdkit.Chem.Draw import IPythonConsole # toHtml IPythonConsole.drawOptions.addAtomIndices = True # toHtml __all__ = [*__all__, "toMol", "toSmiles"] # toHtml def toMol(): # toHtml """Smiles to molecule. Example:: "c1ccc(C)cc1" | toMol()""" # toHtml return cli.aS(Chem.MolFromSmiles) # toHtml def toSmiles(): # toHtml """Molecule to smiles. Example:: "c1ccc(C)cc1" | toMol() | toSmiles()""" # toHtml return cli.aS(Chem.MolToSmiles) # toHtml except: pass # toHtml import unicodedata, hashlib # toHtml
[docs]def toAscii(): # toAscii """Converts complex unicode text to its base ascii form. Example:: "hà nội" | toAscii() # returns "ha noi" Taken from https://stackoverflow.com/questions/2365411/convert-unicode-to-ascii-without-errors-in-python""" # toAscii return cli.aS(lambda word: unicodedata.normalize('NFKD', word).encode('ascii', 'ignore')) # toAscii
[docs]def toHash() -> str: # toHash """Converts some string into some hash string. Example:: "abc" | toHash() # returns 'gASVJAAAAAAAAABDILp4Fr+PAc/qQUFA3l2uIiOwA2Gjlhd6nLQQ/2HyABWtlC4=' Why not just use the builtin function ``hash("abc")``? Because it generates different hashes for different interpreter sessions, and that breaks many of my applications that need the hash value to stay constant forever.""" # toHash def hashF(msg:str) -> str: m = hashlib.sha256(); m.update(f"{msg}".encode()); return k1lib.encode(m.digest()) # toHash return cli.aS(hashF) # toHash
import csv; pd = k1lib.dep("pandas") # toHash
[docs]class toCsv(BaseCli): # toCsv
[docs] def __init__(self, allSheets=False): # toCsv """Converts a csv file name into a table. Example:: "abc.csv" | toCsv() # returns table of values "def.xlsx" | toCsv() # returns table of values in the first sheet "def.xlsx" | toCsv(True) # returns List[Sheet name (str), table of values] :param allSheets: if input is an Excel sheet, whether to read in all sheets or just the first sheet. No effect if input is a normal csv file""" # toCsv self.allSheets = allSheets # toCsv
[docs] def __ror__(self, fn:str): # toCsv fn = os.path.expanduser(fn) # toCsv if fn.endswith(".xls") or fn.endswith(".xlsx"): # toCsv if self.allSheets: return [[k, v.values] for k,v in pd.read_excel(fn, sheet_name=None).items()] # toCsv else: return pd.read_excel(fn).values # toCsv def gen(): # toCsv with open(fn) as f: yield from csv.reader(f) # toCsv return gen() # toCsv