# AUTOGENERATED FILE! PLEASE DON'T EDIT
"""All tools related to cif file format that describes protein structures.
Expected to use behind the "cif" module name, like this::
from k1lib.imports import *
cif.cat("abc.cif")
"""
import k1lib
import k1lib.cli as cli;
from k1lib.cli.init import BaseCli, yieldT
from typing import Iterator, List
from k1lib.cli.typehint import *
import k1lib._k1a as k1a
__all__ = ["tables", "records"]
hasTable = lambda: cli.filt(cli.grep("loop_") | cli.shape(0) | (cli.op() > 0))
toBlocks = lambda: cli.cat() | cli.grep("^#", sep=True).till()
def collect(l):
inBlock = False; tmp = []
for e in l:
if e.startswith(";"):
inBlock = not inBlock
if not inBlock: yield ("".join(tmp))[1:]; continue
if not inBlock: yield e
else: tmp.append(e)
[docs]def tables(name=None, dikt=True):
"""Loads table info.
Dictionary mode::
# both return output below
"1z7z.cif" | cif.tables() | op()["_audit_author"]
"1z7z.cif" | cif.tables("_audit_author")
Potential output::
{'name': ("'Xiao, C.'",
"'Bator-Kelly, C.M.'",
"'Rieder, E.'",
"'Chipman, P.R.'",
"'Craig, A.'",
"'Kuhn, R.J.'",
"'Wimmer, E.'",
"'Rossmann, M.G.'"),
'pdbx_ordinal': ('1', '2', '3', '4', '5', '6', '7', '8')}
Result is a dictionary of ``table name -> dict()``. That inner dictionary maps from
column name to a list of elements. All columns should have the same number of elements.
Table mode::
# both return output below
"1z7z.cif" | cif.tables("_audit_author", dikt=False)
"1z7z.cif" | cif.tables(dikt=False) | op()["_audit_author"]
Potential output::
[['name', 'pdbx_ordinal'],
["'Xiao, C.'", '1'],
["'Bator-Kelly, C.M.'", '2'],
["'Rieder, E.'", '3'],
["'Chipman, P.R.'", '4'],
["'Craig, A.'", '5'],
["'Kuhn, R.J.'", '6'],
["'Wimmer, E.'", '7'],
["'Rossmann, M.G.'", '8']]
Result is a dictionary of ``table name -> List[List[str]]``. So basically you're
getting the table directly.
:param name: if specified, only grabs the specified table, else returns every table
:param dikt: whether to return a dict or table for each table"""
def inner(url): # f is iden() or deref(), depending on perf characteristics that you want
a = url | toBlocks() | hasTable() | cli.apply(~cli.head(2) | cli.op().strip().all()) | cli.deref() # preprocessing, split to blocks
b = a | cli.apply(~cli.filt(cli.op().startswith("_")) | (cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")).all() | cli.joinStreams())
fieldss = a | cli.apply(cli.filt(cli.op().startswith("_")) | cli.op().split(".")[1].all()) | cli.toList().all()
f = (cli.transpose() | cli.apply(cli.item() & ~cli.head(1)) | cli.transpose() | cli.toDict(False)) if dikt else cli.iden() # asDict
tableNames = a | cli.op()[0].split(".")[0].all() | cli.deref()
c = [b, fieldss, tableNames] | cli.transpose() | (cli.filt(cli.op() == name, 2) if name is not None else cli.iden()) | cli.cut(0, 1)\
| ~cli.apply(lambda l, fields: collect(l) | cli.batched(len(fields), True) | cli.insert(fields) | f)
d = [tableNames, c] | cli.toDict(False)
if name is None: return d
else:
if len(d) != 1: return None
else: return d.values() | cli.item()
return cli.aS(inner)
[docs]def records():
"""Load record info.
Example::
"1z7z.cif" | cif.records() | op()["_exptl"] | deref()
Potential output::
[['entry_id', '1Z7Z'],
['method', "'ELECTRON MICROSCOPY'"],
['crystals_number', '?']]
Result is a dictionary of ``record name -> (n, 2) table``"""
each = ~cli.head(1) | cli.op().strip().all() | cli.apply(cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")) | cli.joinStreams() | cli.aS(collect) | cli.batched(2)\
| (cli.item(2) | cli.op().split(".")[0]) & (cli.apply(cli.op().split(".")[1], 0))
return toBlocks() | ~hasTable() | cli.filt(cli.op().ab_len() > 1) | each.all() | cli.toDict()