from k1lib.imports import *
from k1lib.bioinfo.cli import *
import pydicom


["abc", "de"] | applyMp(lambda x: len(x)) | deref()

[3, 2]


#base = "/home/kelvin/hdd/data/osic-pulmonary-fibrosis-progression"
base = "/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression"


ls(base)

['/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression/train',
 '/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression/test',
 '/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression/train.csv',
 '/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression/test.csv',
 '/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression/sample_submission.csv']


None | cmd(f"tree {base}") | headOut()

/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression
├── sample_submission.csv
├── test
│   ├── ID00419637202311204720264
│   │   ├── 10.dcm
│   │   ├── 11.dcm
│   │   ├── 12.dcm
│   │   ├── 13.dcm
│   │   ├── 14.dcm
│   │   ├── 15.dcm


tc = k1lib.Wrapper(cat(f"{base}/train.csv") | table(",") | deref())
tc() | display(3); tc() | shape()

Patient                     Weeks   FVC    Percent            Age   Sex    SmokingStatus   
ID00007637202177411956430   -4      2315   58.2536487166583   79    Male   Ex-smoker       
ID00007637202177411956430   5       2214   55.7121288374434   79    Male   Ex-smoker

(1550, 7)


x, y = tc() | cut(0) | count() | ~sort() | cut(0) | count() | sort(1) | permute(1, 0) | transpose() | deref()
plt.bar(x, y); plt.xlabel("Patient #scans"); plt.ylabel("Frequency");


tc() | unique(0) | shape(0)

177


plt.hist(tc() | cut(1) | ~head(1) | toFloat() | toList(), bins=30); plt.xlabel("Weeks"); plt.ylabel("Frequency");


tc() | toFloat(1) | inRange(max=0, column=1) | (display(None) & (shape(0) | stdout())) | ignore()

ID00007637202177411956430   -4.0   2315   58.2536487166583   79   Male     Ex-smoker      
ID00023637202179104603099   -3.0   1536   65.3061224489796   71   Female   Ex-smoker      
ID00051637202185848464638   -1.0   1697   81.6454173682945   73   Female   Ex-smoker      
ID00076637202199015035026   -4.0   2298   52.7499770452667   51   Male     Never smoked   
ID00086637202203494931510   -5.0   3367   117.628563443264   65   Female   Never smoked   
ID00093637202205278167493   -1.0   3695   84.9581532235813   69   Male     Ex-smoker      
ID00122637202216437668965   -4.0   2581   69.5012925463162   58   Male     Ex-smoker      
ID00133637202223847701934   -2.0   3195   92.8563124854685   83   Male     Never smoked   
ID00222637202259066229764   -1.0   2644   62.8326996197719   70   Male     Ex-smoker      
ID00233637202260580149633   -3.0   3829   100.784375658033   68   Male     Ex-smoker      
ID00340637202287399835821   -1.0   2345   59.0382678751259   68   Male     Ex-smoker      
11


sqrt2 = math.sqrt(2)
def metric(delta, sig):
    sig = np.clip(sig, 70, inf)
    delta = np.clip(np.abs(delta), -inf, 1000)
    return -sqrt2 * delta / sig - np.log(sqrt2 * sig)


x = torch.linspace(50, 200, 100)
for delta in list(range(0, 1200, 200)): plt.plot(x, metric(delta, x), label=f"{delta}");
plt.legend(); plt.xlabel("sigma"); plt.ylabel("metric (higher the better)");


plt.hist(tc() | cut(2) | toFloat() | toList(), bins=30); plt.xlabel("FVC"); plt.ylabel("Frequency");


plt.hist(tc() | cut(3) | toFloat() | toList(), bins=30); plt.xlabel("percent"); plt.ylabel("Frequency");


tc() | toFloat(3) | inRange(100, inf, 3) | (display(3) & (shape(0) | stdout())) | ignore()
195 / (tc() | shape(0)) * 100

ID00012637202177665765362   35   3759   103.076669957223   65   Male     Never smoked   
ID00020637202178344345685   18   2297   117.770713699754   66   Female   Never smoked   
ID00020637202178344345685   19   2145   109.977440525021   66   Female   Never smoked   
195

12.580645161290322


plt.hist(tc() | unique(0) | cut(4) | toFloat() | toList(), bins=30); plt.xlabel("Age"); plt.ylabel("Frequency");


x, y = tc() | cut(5) | ~head(1) | count() | permute(1, 0) | transpose()
plt.bar(x, y); plt.ylabel("Frequency");


x, y = tc() | cut(6) | ~head(1) | count() | permute(1, 0) | transpose()
plt.bar(x, y); plt.ylabel("Frequency");


ss = k1lib.Wrapper(cat(f"{base}/sample_submission.csv") | table(",") | expandE(lambda r: r.split("_"), 0) | deref())


ss() | display(3); ss() | shape()

Patient                     Week   FVC    Confidence   
ID00419637202311204720264   -12    2000   100          
ID00421637202311550012437   -12    2000   100

(731, 4)


plt.hist(ss() | cut(1) | toFloat() | toList(), bins=30);


ss() | (cut(2) & cut(3)) | (count() | display()).all() | ignore()

1     FVC    0%     
730   2000   100%   
1     Confidence   0%     
730   100          100%


dirs = os.listdir(f"{base}/train")
tc() | cut(0) | ~inSet(dirs) | headOut()
dirs | ~inSet(tc() | cut(0)) | headOut()

Patient


dirs | apply(lambda f: len(os.listdir(f"{base}/train/{f}"))) | toSum()

33026


def analyze(patientDir, returns=False):
    import os
    dcms = os.listdir(f"{base}/train/{patientDir}")
    nums = dcms | sortF(lambda r: float(r.split(".")[0])) | split(".", 0) | toInt() | deref()
    if returns: return nums
    else:
        r = nums | (toMin() & toMax()) | applyS(lambda x: range(next(x), next(x)+1)) | deref()
        if nums != r:
            print(patientDir, r | ~inSet(nums) | toList())


for patientDir in dirs: analyze(patientDir)

ID00186637202242472088675 [65, 98, 154, 161, 164, 201, 234, 285, 331, 351]
ID00068637202190879923934 [46, 117]
ID00202637202249376026949 [127]
ID00381637202299644114027 [36, 37, 38, 39, 40, 41, 42, 43, 44, 59, 81, 82, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 184, 192, 193, 194, 195, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 270, 271, 281, 282, 283, 284, 285, 286, 287, 288, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 404, 405, 406, 407, 408, 409, 410, 411]
ID00221637202258717315571 [35, 36, 327, 357, 358, 359, 360, 361, 362, 391, 392, 393, 394, 395, 397, 398, 399, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 531, 532, 533, 534, 535, 536, 537, 538, 540, 541, 542, 543]
ID00015637202177877247924 [3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254, 256, 258, 260, 262, 264, 266, 268, 270, 272, 274, 276, 278, 280, 282, 284, 286, 288, 290, 292, 294, 296, 298, 300, 302, 304, 306, 308, 310, 312, 314, 316, 318, 320, 322, 324, 326, 328, 330, 332, 334, 336, 338, 340, 342, 344, 346, 348, 350, 352, 354, 356, 358, 360, 362, 364, 366, 368, 370, 372, 374, 376, 378, 380, 382, 384, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 408, 410, 412, 414, 416, 418, 420, 422, 424, 426, 428, 430, 432, 434, 436, 438, 440, 442, 444, 446, 448, 450, 452, 454, 456, 458, 460, 462, 464, 466, 468, 470, 472, 474, 476, 478, 480, 482, 484, 486, 488, 490, 492, 494, 496, 498, 500, 502, 504, 506, 508, 510, 512, 514, 516, 518, 520, 522, 524, 526, 528, 530, 532, 534, 536, 538, 540, 542, 544, 546, 548, 550, 552, 554, 556, 558, 560, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 616, 618, 620, 622, 624, 626, 628, 630, 632, 634, 636, 638, 640, 642]
ID00342637202287526592911 [5]


%%time
fields = sorted(list(f"{base}/train" | ls() | applyMp(\
    ls() | apply(lambda dcmF: dir(pydicom.read_file(dcmF))) | breakIf(lambda e: e.startswith("_")).all() | union()\
) | union() | ~inSet(["PixelData"])))

CPU times: user 50.2 ms, sys: 83.1 ms, total: 133 ms
Wall time: 8.97 s


%%time
# patient stream, Iterator[dcmO]
_ps = k1lib.Wrapper(f"{base}/train" | ls() | applyMp(\
    ls() | sortF(lambda dcmF: float(dcmF.split("/")[-1].split(".")[0]))\
    | apply(lambda dcmF: pydicom.read_file(dcmF))\
    | apply(lambda dcmO: [getattr(dcmO, field, None) for field in fields]) | toList()\
) | joinStreams() | insertRow(*fields) | deref())

CPU times: user 7.35 s, sys: 229 ms, total: 7.58 s
Wall time: 10.6 s


# some filters
def ps(): return _ps() | apply(lambda r: "".join(r), 23) # patient name


ps() | display()

BitsAllocated   BitsStored   BodyPartExamined   Columns   ConvolutionKernel   DeidentificationMethod   DistanceSourceToDetector   DistanceSourceToPatient   FocalSpots   FrameOfReferenceUID                           GantryDetectorTilt   GeneratorPower   HighBit   ImageOrientationPatient                      ImagePositionPatient                               ImageType                                         InstanceNumber   KVP   LargestImagePixelValue   Manufacturer   ManufacturerModelName   Modality   PatientID                   PatientName                 PatientOrientation   PatientPosition   PatientSex   PhotometricInterpretation   PixelPaddingValue   PixelRepresentation   PixelSpacing                     PositionReferenceIndicator   RescaleIntercept   RescaleSlope   RescaleType   RevolutionTime   RotationDirection   Rows   SOPInstanceUID                                 SamplesPerPixel   SeriesInstanceUID                             SingleCollimationWidth   SliceLocation   SliceThickness   SmallestImagePixelValue   SpacingBetweenSlices   SpatialResolution   SpecificCharacterSet   SpiralPitchFactor   StudyID   StudyInstanceUID                              TableFeedPerRotation   TableHeight   TableSpeed   TotalCollimationWidth   WindowCenter         WindowCenterWidthExplanation   WindowWidth           XRayTubeCurrent   
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-63.5']    ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   1                120   3245                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.9463146288515712963633997841404523038     1                 2.25.47956916717451292264780760321974497731   None                     -63.5           1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   492               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-73.5']    ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   2                120   3340                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.65126428428746723320146291533300269472    1                 2.25.47956916717451292264780760321974497731   None                     -73.5           1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   466               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-83.5']    ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   3                120   3203                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.92410940641125329671602685303131497613    1                 2.25.47956916717451292264780760321974497731   None                     -83.5           1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   474               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-93.5']    ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   4                120   3088                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.79060946248985024985454773267933758323    1                 2.25.47956916717451292264780760321974497731   None                     -93.5           1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   485               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-103.5']   ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   5                120   3092                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.24054830837068092403015662083163705060    1                 2.25.47956916717451292264780760321974497731   None                     -103.5          1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   486               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-113.5']   ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   6                120   3134                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.37863222002173583735535776147245377513    1                 2.25.47956916717451292264780760321974497731   None                     -113.5          1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   474               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-123.5']   ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   7                120   2943                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.41137366387152427112337946392346330533    1                 2.25.47956916717451292264780760321974497731   None                     -123.5          1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   447               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-133.5']   ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   8                120   2943                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.146825428325898645450166070755582094649   1                 2.25.47956916717451292264780760321974497731   None                     -133.5          1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   388               
16              12           Chest              512       B70f                Table;                   1040                       570                       1.2          2.25.42475716911114150817601785542698928730   0                    34               11        ['1.0', '0.0', '0.0', '0.0', '1.0', '0.0']   ['-144.2099609375', '-307.2099609375', '-143.5']   ['ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI']   9                120   2952                     SIEMENS        Sensation 64            CT         ID00011637202177653955184   ID00011637202177653955184   None                 HFS                            MONOCHROME2                 None                0                     ['0.580078125', '0.580078125']                                -1024              1              None          None             CW                  512    2.25.125224569633841097600297661567635371876   1                 2.25.47956916717451292264780760321974497731   None                     -143.5          1                0                         None                   None                ISO_IR 100             None                          2.25.58893941013546715314915610943575006591   None                   159           None         None                    ['-500.0', '40.0']   ['WINDOW1', 'WINDOW2']         ['1500.0', '350.0']   314


%%time
singleValues = [i for i, field in enumerate(fields) if (ps() | cut(i) | ~isValue(None) | count() | ~sort() | shape(0) == 2)]
[[fields[fi], ps() | cut(fi) | ~head(1) | item()] for fi in singleValues] | display(None)

BitsAllocated                  16                       
BodyPartExamined               Chest                    
DeidentificationMethod         Table;                   
GantryDetectorTilt             0                        
Modality                       CT                       
PatientSex                                              
PhotometricInterpretation      MONOCHROME2              
RescaleSlope                   1                        
RotationDirection              CW                       
SamplesPerPixel                1                        
SmallestImagePixelValue        0                        
StudyID                                                 
WindowCenterWidthExplanation   ['WINDOW1', 'WINDOW2']   
CPU times: user 8.65 s, sys: 0 ns, total: 8.65 s
Wall time: 8.65 s


multiValue = range(len(fields)) | ~inSet(singleValues) | toList(); len(multiValue)

46


def explore(column):
    print(fields[column])
    ps() | ~head(1) | cut(column) | count() | ~sort() | display(None)


multiValue | batched(10, True) | deref()

[[1, 3, 4, 6, 7, 8, 9, 11, 12, 13],
 [14, 15, 16, 17, 18, 19, 20, 22, 23, 24],
 [25, 28, 29, 30, 31, 32, 34, 35, 37, 38],
 [40, 41, 42, 43, 45, 46, 47, 48, 50, 51],
 [52, 53, 54, 55, 57, 58]]


hists = [11, 14, 16, 18, 30, 37, 42, 52, 53, 58] # potential to be histogramable


def plotHist(column, cli=identity()):
    print(column, fields[column])
    plt.hist(ps() | ~head(1) | cut(column) | ~isValue(None) | cli | toFloat() | toList(), bins=30); plt.xlabel(fields[column])


def categorical(column, cli=identity()):
    plt.bar(*(ps() | ~head(1) | cut(column) | cli | count() | apply(lambda e: "None" if e is None else e, 1) | permute(1, 0) | transpose()));
    plt.xlabel(fields[column]); print(fields[column]); plt.xticks(rotation=90)


plotHist(1)

1 BitsStored


plotHist(3)

3 Columns


categorical(4, apply(lambda row: "".join(row)))

ConvolutionKernel


plotHist(6)

6 DistanceSourceToDetector


plotHist(7)

7 DistanceSourceToPatient


plotHist(8, ~filt(lambda r: type(r) == list))

8 FocalSpots


ps() | cut(9) | count() | ~sort() | headOut()

[1018, '2.25.103850621004807268762115331499439182923', '3%']
[825, '2.25.26067589961801356770842553401614363097', '2%']
[602, '2.25.10943608917813761096224961520231208680', '2%']
[577, '2.25.145280983379892090408836326254764157961', '2%']
[574, '2.25.71482738880009091674153143085572530828', '2%']
[524, None, '2%']
[521, '2.25.27392021258224904475429237145953835284', '2%']
[512, '2.25.93280772249744696158009885016953113343', '2%']
[498, '2.25.101620500601016521223324645841506278865', '2%']
[497, '2.25.51512508295801930592408267572786009206', '2%']


plotHist(11)

11 GeneratorPower


a = np.array(ps() | cut(11) | ~isValue(None) | toFloat() | toList())
plt.hist(a[a<2000], bins=30); plt.xlabel(fields[11]);


plotHist(12)

12 HighBit


categorical(13, ~isValue(None) | apply(lambda row: " ".join([str(round(e)) for e in row]))); plt.yscale("log");

ImageOrientationPatient


x, y, z = ps() | cut(14) | ~isValue(None) | toFloat(0, 1, 2) | transpose()
plt.hist(x, bins=30, alpha=0.5, label="x");
plt.hist(y, bins=30, alpha=0.5, label="y");
plt.hist(z, bins=100, alpha=0.5, label="z"); plt.legend(); fields[14]

'ImagePositionPatient'


ps() | cut(15) | count() | ~sort() | display(None); fields[15]

12915   ('ORIGINAL', 'PRIMARY', 'AXIAL')                                                                           39%   
9988    ('ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SPI')                                                            30%   
7133    ('ORIGINAL', 'PRIMARY', 'AXIAL', 'HELIX')                                                                  22%   
1245    ('ORIGINAL', 'SECONDARY', 'AXIAL')                                                                         4%    
549     ('ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM5 SEQ')                                                            2%    
394     ('DERIVED', 'SECONDARY', 'AXIAL', 'CT_SOM5 SPI')                                                           1%    
346     ('ORIGINAL', 'PRIMARY', 'AXIAL', 'CT_SOM7 SPI DUAL', 'STD', 'SNRG', 'DET_AB')                              1%    
253     ('ORIGINAL', 'PRIMARY', 'AXIAL', 'VOLUME')                                                                 1%    
64      ('DERIVED', 'PRIMARY', 'AXIAL', 'JP2K LOSSY 6:1')                                                          0%    
62      1                                                                                                          0%    
52      ('DERIVED', 'SECONDARY', 'REFORMATTED', 'AVERAGE')                                                         0%    
24      ('DERIVED', 'SECONDARY', 'OTHER', 'CSA MPR', '', 'CSAPARALLEL', 'AXIAL', 'CT_SOM5 SPI', 'CSA RESAMPLED')   0%    
1       ImageType                                                                                                  0%    
1       ('DERIVED', 'SECONDARY', 'REFORMATTED')                                                                    0%

'ImageType'


ps() | ~head(1) | cut(15) | count() | cut(1) | toSet() | union() | ~isValue("") | deref()

['STD',
 'DERIVED',
 'AVERAGE',
 'AXIAL',
 'OTHER',
 'SNRG',
 'JP2K LOSSY 6:1',
 'CSA RESAMPLED',
 'DET_AB',
 'HELIX',
 'CSA MPR',
 '1',
 'PRIMARY',
 'CT_SOM5 SEQ',
 'CT_SOM7 SPI DUAL',
 'SECONDARY',
 'REFORMATTED',
 'CT_SOM5 SPI',
 'ORIGINAL',
 'CSAPARALLEL',
 'VOLUME']


plotHist(16)

16 InstanceNumber


plotHist(17)

17 KVP


plotHist(18)

18 LargestImagePixelValue


categorical(19)

Manufacturer


categorical(20)

ManufacturerModelName


categorical(24, apply(lambda r: "".join(r) if isinstance(r, list) else r))

PatientOrientation


categorical(25)

PatientPosition


categorical(28, toStr()); plt.yscale("log");

PixelPaddingValue


plotHist(29)

29 PixelRepresentation


plt.hist(ps() | cut(30) | toFloat(0) | cut(0) | toList(), bins=30); plt.xlabel(fields[30]);


all(ps() | cut(30) | toFloat(0, 1) | transpose() | equals())

True


categorical(31)

PositionReferenceIndicator


plotHist(32)

32 RescaleIntercept


categorical(34)

RescaleType


plotHist(35)

35 RevolutionTime


plotHist(37)

37 Rows


ps() | cut(38) | ~isValue(None) | count() | ~sort() | display(4)

1   SOPInstanceUID                                0%   
1   2.25.9463146288515712963633997841404523038    0%   
1   2.25.65126428428746723320146291533300269472   0%   
1   2.25.92410940641125329671602685303131497613   0%


ps() | cut(40) | ~isValue(None) | count() | ~sort() | display(4)

1018   1.3.6.1.4.1.19291.2.1.2.1162211771921352226130345608929   3%   
825    2.25.67300783861445963103644197317110142934               2%   
602    1.3.6.1.4.1.19291.2.1.2.1162211771921352226130349723959   2%   
577    2.25.168335002352647095587144455728050514290              2%


plotHist(41)

41 SingleCollimationWidth


plotHist(42)

42 SliceLocation


plotHist(43)

43 SliceThickness


plotHist(45)

45 SpacingBetweenSlices


a = np.array(ps() | cut(45) | ~isValue(None) | toFloat() | toList())
plt.hist(a[(a>-3) * (a<3)], bins=30); plt.xlabel(fields[45])

Text(0.5, 0, 'SpacingBetweenSlices')


plotHist(46)

46 SpatialResolution


categorical(47, apply(lambda r: " - ".join(r) if isinstance(r, list) else r))

SpecificCharacterSet


plotHist(48)

48 SpiralPitchFactor


ps() | cut(50) | ~isValue(None) | count() | ~sort() | headOut()

[1018, '2.25.128099341455079131013013265801613167022', '3%']
[825, '2.25.44730338876702058263099469144293477993', '2%']
[602, '2.25.12038526814304474367356176974755630418', '2%']
[577, '2.25.168009088382578703118904593371568532454', '2%']
[574, '2.25.27570211803454260334890711816742630716', '2%']
[521, '2.25.21250775107781698674952036145409229511', '2%']
[512, '2.25.51524201997550974060815639537176759519', '2%']
[498, '2.25.138385002285841551528805699638475741461', '2%']
[497, '2.25.263916267626722293569390930007441210', '2%']
[493, '2.25.51234585641398128930095037592177078611', '1%']


ps() | cut(50) | table(".") | cut(0, 1) | stitch(".") | toFloat() | count() | display()

33026   2.25   100%


plotHist(51)

51 TableFeedPerRotation


plotHist(52)

52 TableHeight


plotHist(53)

53 TableSpeed


plotHist(54)

54 TotalCollimationWidth


categorical(55, instanceOf([str, float]) | toInt() | toStr()); plt.yscale("log");

WindowCenter


plotHist(57, instanceOf([float, str]));

57 WindowWidth


plotHist(58)

58 XRayTubeCurrent


fields | insertIdColumn() | display(None)

0    BitsAllocated                  
1    BitsStored                     
2    BodyPartExamined               
3    Columns                        
4    ConvolutionKernel              
5    DeidentificationMethod         
6    DistanceSourceToDetector       
7    DistanceSourceToPatient        
8    FocalSpots                     
9    FrameOfReferenceUID            
10   GantryDetectorTilt             
11   GeneratorPower                 
12   HighBit                        
13   ImageOrientationPatient        
14   ImagePositionPatient           
15   ImageType                      
16   InstanceNumber                 
17   KVP                            
18   LargestImagePixelValue         
19   Manufacturer                   
20   ManufacturerModelName          
21   Modality                       
22   PatientID                      
23   PatientName                    
24   PatientOrientation             
25   PatientPosition                
26   PatientSex                     
27   PhotometricInterpretation      
28   PixelPaddingValue              
29   PixelRepresentation            
30   PixelSpacing                   
31   PositionReferenceIndicator     
32   RescaleIntercept               
33   RescaleSlope                   
34   RescaleType                    
35   RevolutionTime                 
36   RotationDirection              
37   Rows                           
38   SOPInstanceUID                 
39   SamplesPerPixel                
40   SeriesInstanceUID              
41   SingleCollimationWidth         
42   SliceLocation                  
43   SliceThickness                 
44   SmallestImagePixelValue        
45   SpacingBetweenSlices           
46   SpatialResolution              
47   SpecificCharacterSet           
48   SpiralPitchFactor              
49   StudyID                        
50   StudyInstanceUID               
51   TableFeedPerRotation           
52   TableHeight                    
53   TableSpeed                     
54   TotalCollimationWidth          
55   WindowCenter                   
56   WindowCenterWidthExplanation   
57   WindowWidth                    
58   XRayTubeCurrent

OSIC¶

Train.csv¶

Patient #scans distribution¶

Weeks distribution¶

Metric¶

FVC?¶

Percent¶

Age¶

Sex¶

Smoking status¶

Sample_submission.csv¶

Train/¶