From b7e66250f4564dabe15101f0214d8399130d0102 Mon Sep 17 00:00:00 2001 From: Beliy Nikita <beliy.nikita@outlook.com> Date: Mon, 23 Mar 2020 15:23:42 +0100 Subject: [PATCH] added README --- example1/resources/Appariement.xlsx | Bin 5471 -> 5425 bytes example1/resources/README.md | 327 +++++++++++++++++++ example1/resources/dataset_description.json | 5 + example1/resources/map/bidsmap.yaml | 2 +- example1/resources/map/bidsmap_noPlugin.yaml | 2 +- example1/resources/plugins/bidsify_plugin.py | 212 ++++++++---- example1/resources/plugins/definitions.py | 20 +- example1/resources/plugins/process_plugin.py | 120 ++++--- example1/resources/plugins/rename_plugin.py | 1 + 9 files changed, 576 insertions(+), 113 deletions(-) create mode 100644 example1/resources/README.md create mode 100644 example1/resources/dataset_description.json diff --git a/example1/resources/Appariement.xlsx b/example1/resources/Appariement.xlsx index 29036a340230af04b8190f9691673a48c4f9b02d..1417168e898a376032013809371d3dee434519f8 100644 GIT binary patch delta 2351 zcmZuzcU05a5)P3Pq(~riq)X_a1QqlkAV^0-3sMA8N+1$is6yzSP=0EB8e|bQ0s(0T z5CLfxsz?n&AYcI$v$DpB5?yfL@wuKi|J-}#&fGKeotbZDL9;<q$jNROHw*-UKtP7* z*ajg<u3g(F0OvyB9mLFCuJXIt%ViyG_y9tN%v#ru_rFKbuY>r))U95Wb9^z>mIx=5 zy`1u7y|?Qh#TPKUaE1@cW7$Q2c*xxg%gHdjpxiq}mkvb6Uuw-deQq{*xbyMY2W3a= z_zmdziYl>c{OG=D9d^zE;RH9gJqgjNrkjU>iF(iUs~X;ccucbF;BE@TZ}{YsEclzK zn9iOnb&yXhs*1T|{)OLSzR!r!^X6Q#E#J45B@v2TZ(g%5U2;0-n?KP>lWZfJ#~OF- z6@ILI|Er7fu@ny2$&L?lr-~Ih2Lgdiz#!1S@^;!*{uHV0TO39Y@BvHW2O}#F!{$d( zPVbJStoPXr#Gu|V9$?^xHnFH-N8U58(43Nt`KQyPm$*G7Yz%4w8EG|@Uk|f)6X3r1 zR}(M%bs<`}lcQl@<aPZo7yBEqy1ItCNMEiO-?0`6;JN+<>+#t)2=5S6(3K~6b~QEK z+@51UuyGV(L1M_|s<<!V!S2qFqEs0rSTHiOK(s@H+^r~hGEJ_e=aBr!=JyZX&oB0U z&g*w02%t&f_9Yn~El=1oM%9da#MsW}`(E2!Hl5YD0q*t8J@{FAka_DgQSTkU;Bch= zHEz~xnyQkBS;JBU$&rH{+Z=aNLM-xiA2~ChsC>5F=;bM-s!Z%GKCaiMQIUtY=B3;D zt!wML(j7bpQ{oaC3PYzt7taA^SpuYisaqCrgX|-;Bm9|(6xB?{QS9c`u_?1O7guDJ zb4qx@p-(#Jj<N(SwRngWS0&y3x2o+I^2I+1Z<QBZV3sv(8J&Px=k+{GIK8@F=Qh@6 zilsbno~Q|NpDO6k!~+|$)oQDnu#SDsQEowuL<q&pJIJ81eAxZuR;bK@S+<=Km3eJy zi1pI6CR8nj?M3m(?Bli4r!gPKo~@D&%o^aK`|pn1`<%)J+)i6m?8AVY`8hR^n|EN< zmiNY6MN3~Dvr%rk%`a^u>U}}V{qfM4p5?-#R9LR@)it?Je3gm%i9&L4G~)Sj=_`xI zm0GG}m>$~5asn~;{062My*n!p4BwOfO$ctBOg2~d&U9H;iFTc#j4Z`SIHuLw)}Qc} z>U(Hzk)1QB>mQ!ex~80!of9gHyZ{TJTOLM+3Hk>sk<?q}kDkcRX{U!U6Kj%IYGcN{ z)*D-9FU=d_TV_GGtM*oPE67)+I$}P?M@aHUV4%_O3N9Aj$v=V%tQN?X%d2aTa3oc% zDL)!<!a*ICTX-WjHfQEpQ;7?{%;Wfue7ZNWgH@OOnAYtf^!-l{A)Np6=*hlJGm>Xj zxzX7Gm5{6RC^tz_BW&N>FuCm4@sbQdx`QMGu{4EO9HCgu#kyfT*c_J0$LaQ!+E|(w zv$};2PtP&u#80G4c$7ab1<Xw@dmc=XhU|aVp2(om@()gfml`-}29ESoO3)5&oxC<y z2cMaa&72RU0PLgOsU&hG1T86FXtmt5a<lExy^mkRd7?`j%gYEo-K}jY*@i&4jEVO^ zGk(O??lCf2M!to}yjAeDh?wTCh1zG`Ec#c_27v|Qw-dgdAy8$pP(3g`3t*#hwYVE! z8ZP;ClIKh!%aA=W{dQUTR*M27x^PqC<qemvbKa2_rXqD3dA?oZPe%)$xK}6gTV2*B zn8}}R^ya`tY-tjHHQd+RX@<;;)F{-x)2XPU>YHd9U9_pxkK@37KrlCIm`E%}WKt!Z z`DWfsTu}gCCbZ+-hSjuf3TCr4n%^^~;t;Pt(S>=7*73D8{0{A8YuJb%;Rb>33jIbq zO)TD{;2pkgLhu9JKQ04868|vpvamjWw&H*TT1{jL!7t1cQPbErpLX?_?Hu~+C+=cu zSsN{RYIL*+>lmX+5CJ~aso?spxg6uq&vmLWId*NzmgcL?qzCB)l2S=w>oRtXFDF=0 z{6CA%R>C=F9MH~u^#|Ne#hj8(Y8H)>Wlt1UBL93hiRo54FBPgm5+U*WWC`Glx1iHj zTY9dEtYy8Vk8g_O)7XB!s!(q7bR%1xO!pAMxf~kQ_Tk;ypYc>Z*GRm<upM%@D=65j zVXeYbm-=X!%!ry4UQdc>(tmlV(^M&7sIh+>qvjL1p+guCjaic2L1*O7r3vOX<)(1` zhK>^e6;Z{X?9qoMvr*bf3-<+-W-%(rUO-jN6wx{PUcWkYx|xsnJ3Z{-#V0Q|bvcfs z=)Y)YhTZZMpjOvfv|kmZGi$qcU-9ka(FgF<<8@l}W|`fQ+*X0Fd~^lHYBNzUWa`zs zc}%R2sM(uQbFz?YeG;(PlGtFqFwdI93BjQut)KC)4WA(hWLK_@JQZ@1U+^P~NCKr# z3LQh025XvCaPi3Ob-0o7Edff2JhVq}K!IEuhX@}}fLgJ;pN`God?o_YCl`)YF!PD8 z^h4BfU7B1shqSsgcnGyRQC}oK-v`4fh!s?_=v_#WY~9<kg_Hco-g<n4)~svT*4|8f zCAKhQ?Zb*eSoj+UecWsCG4K(g3nlbM@|#K0MKtNr8B!*iYOAK5><4_T#3t<3kEYfW zIVcTa^%<^@b8r6rUjp3M17%Y0BK>#4zc_#UdhFGn9ngo$UK+S;1D^l;By6gH;{GW> zX)#No?b3AmIW&P#F++tN&9C*mGc5lnhk-6J_>To*RZP~WjrYeOjm1WW`eU)7x^d?* zPIg>i80hD|;k0B!=<(WajTv<iXuCUpk<XAH&Qa0vn1HDN;|iRC$#M~ZB$y0jC#Ib$ z0?J_$Tx5U>Q&8Md{hA~6m!CnPi9G=8zeXW<VFiZUK%lmR5c`)=AQY&FiUT&{BD`~= HJ1>6&x9ABY delta 2365 zcmaJ@cU05a68;g2v>+wasI<@mB1kuj0wO&Usx(1eDJmhM8bt|6d7*?BaOoW>B4xn@ z(I5y5OA!lFLMOO@C<#p>2(sXw<8$91Z~nRW%(-*Vd~?71%{0KL;b4?CI|l^d=H><< zkzoyBaW?k-m88U`jb{LUR$9dg@f|27vZ2wt+vu`kl?DWnqSO`z?R~T9yC<HzjFuV_ zmH%oan(+_>v4@_rz00{U$oR$sNh}>8M%CZ5y*fIE=um?{62728=!ZUfaugf%j91G( z$HOEquk7Sz&H|x!j?M0YU0ovG*mgjqQdbQoS|e?I{QVg*d<`Dz%`;q2nOC~n(Rjll ztmTD79rxx;r+oaIE5vVMd!zDGzfT0GxH8J!#~{+m)+14{IL*FR@2b1hS3z~z4BTx@ z&RMZZ@2dGb+rg&{6Lvu;>qFegvWfl~004YI0PrvDsD0-TlK=pc69|g8c9_!_Y-QGA zw@x?Ya|l<$9L~ycaHqIsWmi*G$VVQg9<M8du|JP+Pe(%}R|-~Sy>H@|x<`(e>uh~f zs+bZ8Hg?hcT<W<P-b`>cNqG5gZ>#Bx?xvxvU7AcU?=PRd>Q|n>`j+!a5-%p%^_HDI zHfuXO<F9Li1pN*JVMAv+9;B@@XOO<qqZg%YU>5J0F<2xW*KM3^F$?9Z*x4Jsbj|9@ zodG&JneSQ>)+Q}x<(!$->?_5w4tAEkiNZWZEvnwZl#}aXyzUpFzQXe1KBZ0Ip4-uW zMdQafT}y5pss?wY!eP%mWLv|L8~o+mil+;+;Lp(rw!i=t;l~EY@z37FT+=|&j%#Rz zo!sSxB=sn;%#`y^w80IIJpYpfY^(z<E5mP>ppvX`{!=K+-Lk!B&brm7Ni%f@g06Cu zjYM?a*b<N{*p4m9yl3eQ7Z0|nvKnMviKT+RcAuRN_g2Fv32~;EXf(AlH-auyFM1S< zlZTod-G)?ebQNUcEofk;v=Fg`DdDQt>XH>hj<-j3-*}jsliO-OcRp{Cv!XfLWRs^q z($zwkMh&X!ky{GR&rJMwk8NzKYIs`1le1-mV{c}GJij)1x0UzE>SHx?fk2ZzIIjvk z7H;pYJ;mFzHVsR2y%?7S<58aqeRy|%F-5Na@R9eX=<|>9B3_OPkcuvQ?N8uCIb3rd z<{`<j9HjnuxJTW|R%9L#fowOjo9Vp#x)58kOF0?SnTZ%OG6|3U<GLQhXBIYE1*mpr z8*dyp4IyMvgm%w%-hyiCn|h2A=!Pc}bziIoT@z&Io5i`3=Om8F)t+jkGWs_kXpKAh zk}3JLDH=XAxDi~Gn{$5&A87qK^ioF(386)hPb`wJDEBwboEZI3r3(vEZt?qS5SCL- zlrTUJndwKVf$3l7$WNF0SuKd^T*jRX`3+NH^WiK>-$Q7fFLbihVw;&Xlb{tX;a*&* zkM@>_=S-GXgX8l=R!f$<i*LO#(<k1fnd!&6;H5g_MG4`~q4VLB`Ap=^)`tUkFo<>P z$9`73ObKewaEz$CLpOtxrC!n=`Em(G1VzvY)wqFT>{Og7qwvaj%Gt3JVtuu*3xADl z>HD%&R{62#RDLeEX1!<Nwp_~WNM@VhRKIZ&>4LL`>4L`KuK`FTRx=G2pbFCJnU=%1 z%Fbjh2^oAM#+8y5n<LDIxOZ#0=8+0#BoB9dtBfdGyP;V8j#+)uuugTz4npiGonNJ9 zXl}K_{TN5ah0P{~*Ui@wKI=k;UGjz1%cKgJ<SjS%wU@hPITEEyjgp>sZL_Hhr%Ga$ z6D#<|S#9Flib_VmwF)D5(*t{iqT2Ddk#;*fr4|vsrn(o7otXWz#j{hx`;}`g!`Wm~ zkT(_0>Dl~d2>PIE;+EwMa3TAhLO7|j2IgWbW)vqISiH3+oc;hcY;ye&MstY%74~G5 zy4*pFQHXz{V=V5*T)D0$6}TRNh<-IcaO&&f19j5PpnUV;005T2cXi_ZuQWGyas2{1 z(B?)?9umj*O)gmMAGK~0(7^W=WvDE3nB*z87s~`VSxg*P-pgnn++607K_h}2cWVj@ zrM;wUR5k@ko3-+xe^{`czg<2_mdA7i)6(UNu~rhyB6)L*tCL=<Ba7^&?@|+NuvPMi z&iWGVo}(^l)oM>`Y)-pHJt4X;Rajc%)`G}0sh(QHD18}y&T4)o8T^)f?dBJ+%C)s_ zuiMhevg@}`M}o~{Hu4Jk+DkQ}$*)H~-wn?#AB;Q`bEyfE<?umoTkAz1Q*z9)rsd)N zL}J+b<sHA=h9@fOVqH63;ud8|u4G)^W4pi72d$4(EPUF=9F)h-{_;9=U!)`1{#U$7 z^usVb=7kI-V!F-`hba!Fq>>fK>4P@+D`~&$adr1PE3-9iGtJ$Xwbz&Lh_`kaq}sBo zlfy3M`-eomjVa>R#>BjHigNCA-66;=WQ)}VXhRkO0c2lp9<&$AhaqcTqH>7G$}B7E zagO0I%3aAABhBA9{v$wdX-L=t(6QjTGx!=%o={MVC{8iUx0x^LWfmbh?OsZn&yA0# zjj;SW%CDzpwJlc*N1~k=OHrN%!SD}<xoX{P>5Eec1F3N`a?BC~X4$TG?}WZdk+PnA zl)w%TZdakS-TAfeNMSUJ4?AHs*U@`u^&WS#Ps>=`Zbs4EPzfc;k+PLzaON<k34dRv zKtOhm_vQqLles#t{Oijzq>a<JB3uVh*=b3gSA~6PC+`-;ymnus=}`F>5$>ZsjE5N? zE>4GbPClO8{0ME9_K`BKq;BZ&Z&dBD@V#CKHSe~^F4^zbJMsTEOp*OpX^auN`VS#L z3ZDb-Qx)~2*^&%JbfgbFKNEbv$^K-&kkUkyz7HQyMWAepq!E#0ZU+%`%j6gC`(I*I z834dPod*CV9st1kp9l`Q<?kJ=jzjyStl2;i;KwwB@~OT@nb{9P?5EE6uTUz<79z!c yaQy+LNP!SBwqX(hBF%H)`i_`X4-o~i3zE7a(%+N%0D-b)kT^vZxp)t-0)GSftSck{ diff --git a/example1/resources/README.md b/example1/resources/README.md new file mode 100644 index 0000000..7de8b90 --- /dev/null +++ b/example1/resources/README.md @@ -0,0 +1,327 @@ +# Bidscoin Example 1 + +## Introduction + +This dataset is an purely fictional, designed to demonstrate the core +features of `bidscoin` bidsifier tool. + +The structure of dataset is modelled of real-life dataset, currently unpublished. +Several simplifications has been applied, conserving only the MRI images structure, +general participants book-keeping, naming schema and few auxiliary files. + +MRI recordings are stored in Nifti format with an additional json file containing +the dump of Dicom header, created by [hmri toolbox](https://hmri-group.github.io/hMRI-toolbox/) +for [SPM12](https://www.fil.ion.ucl.ac.uk/spm/software/spm12/). +All `.nii` images are replaced by an empty file, and any personal information is removed +from json files. + +## Experiment description + +The experiment is designed to study of effect of fatigue on memory performance. + +5 participants are separated into pairs with matched sex, age and years of education. +First persons of pairs are used for study (patient group), while paired persons are used for control. + +During experiment, each participant is scanned 3 times (sessions), for each of session they are asked to perform either a memory or a stroop task: + +- **HLC** with memory task performed after a tiring task (High Cognitive Load) + - In additional to functional and structural, a diffusion scan is present +- **LCL** with memory task performed without tiring task (Low Cognitive Load) + - Session contains structural and functional MRI scans +- **STROOP** with a standard stroop task + - session contains only multi parametric mapping MRI (MPM) + +The order in which each scan is performed may vary from participant to participant. + +## Original dataset structure + +The original data is stored in `source` directory. Data corresponding to each participants +is stored in `source/<participant id>` sub-folder, where `<participant id>` the code of +participant padded with `0`. + +Inside participants sub-folders, 3 folders of session data is places. The folder names +don't have a direct correspondence with session, bit represent a code applied by a scanner, +in form `sXYZ`. + +The image data is stored directly in session sub-folder `nii`. +For **LCL** and **HCL** sessions, task and assessment are stored in `inp` sub-folder. + +Tiring task, and stroop task data are not present in dataset. + +### Memory task description + +Task consist of a classic n-back working memory update task. +A set of letters is presented to participant. Each letter is presented during `1.7s`, +followed by `0.5s` fixation cross presentation. Participant is asked to remember +if such letter was present in the last, 2 cards ago or 3 cards age (1back, 2back, 3back). +A participant response ("c" for correct, "n" for non-correct) is registered alongside with +expected response. +A fill task consists of 18 blocks of 1,2,3-back tasks, with 16 presented letters in each block. + +Task results are formatted following [bids](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/05-task-events.html), +and stored in `source/<subject>/<session>/nii/FCsepNBack.tsv` file. + +### Assesment description + +Each task is followed by visual analogue assessment (VAS) questioner, where participant is +asked to estimate his psychological state from bad (0) to good (100). +In particular the next estimations are requested: + +- **Motivation** +- **Hapiness** +- **Fatigue** +- **Openness** +- **Stress** +- **Anxiety** +- **Effort** + +The results are formatted following [bids](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#phenotypic-and-assessment-data), +and stored in `source/<subject>/<session>/nii/VAS.tsv` + +### MRI scanning sessions + +#### LCL + +During the LCL session, the next acquisitions are taken: + +- localisation (protocol: `localizer`) +- a short fMRI sequence with inverted phase-encoding direction +(protocol: `cmrr_mbep2d_bold_mb2_invertpe`) +- a fMRI sequence during nBack task execution (protocol: `cmrr_mbep2d_bold_mb2_task_nfat`) +- a short fMRI sequence with inverted phase-encoding direction +(protocol: `cmrr_mbep2d_bold_mb2_invertpe`) +- a fMRI sequence without task execution -- in resting state (protocol: `cmrr_mbep2d_bold_mb2_rest`) +- a magnitude encoded fieldmap sequence (protocol: `gre_field_mapping`) +- a phase-difference fieldmap sequence (protocol: `gre_field_mapping`) +- a FLAIR sequence (protocol: `t1_mpr_sag_p2_iso`) +- a T2 weighted sequence (protocol: `t2_spc_da-fl_sag_p2_iso`) + +#### HCL + +During HCL session, the next acquisitions are taken: + +- localisation (protocol: `localizer`) +- a short fMRI sequence with inverted phase-encoding direction +(protocol: `cmrr_mbep2d_bold_mb2_invertpe`) +- a fMRI sequence during nBack task execution (protocol: `cmrr_mbep2d_bold_mb2_task_fat`) +- a short fMRI sequence with inverted phase-encoding direction +(protocol: `cmrr_mbep2d_bold_mb2_invertpe`) +- a fMRI sequence without task execution -- in resting state (protocol: `cmrr_mbep2d_bold_mb2_rest`) +- a magnitude encoded fieldmap sequence (protocol: `gre_field_mapping`) +- a phase-difference fieldmap sequence (protocol: `gre_field_mapping`) +- a diffusion sequence with inverted gradient direction (protocol: `cmrr_mbep2d_diff_NODDI_invertpe`) +- a diffusion sequence with normal gradient direction (protocol: `cmrr_mbep2d_diff_NODDI`) +- a diffusion sequence without RF-pulse (protocol: `cmrr_mbep2d_diff_NODDI_noise`) + + +#### STROOP + +During STROOP session, the next acquisitions are taken: + +- localisation (protocol: `localizer`) +- head-localised fieldmap for PD weighted sMRI (protocol: `al_mtflash3d_sensArray`) +- body-localised fieldmap for PD weighted sMRI (protocol: `al_mtflash3d_sensBody`) +- magnitude-encoded PD weighted structural MRI (protocol: `al_mtflash3d_PDw`) +- phase-encoded PD weighted structural MRI (protocol: `al_mtflash3d_PDw`) +- head-localised fieldmap for T1 weighted sMRI (protocol: `al_mtflash3d_sensArray`) +- body-localised fieldmap for T1 weighted sMRI (protocol: `al_mtflash3d_sensBody`) +- magnitude-encoded T1 weighted structural MRI (protocol: `al_mtflash3d_T1w`) +- phase-encoded T1 weighted structural MRI (protocol: `al_mtflash3d_T1w`) +- head-localised fieldmap for PD weighted sMRI (protocol: `al_mtflash3d_sensArray`) +- body-localised fieldmap for PD weighted sMRI (protocol: `al_mtflash3d_sensBody`) +- magnitude-encoded MT weighted structural MRI (protocol: `al_mtflash3d_MTw`) +- phase-encoded MT weighted structural MRI (protocol: `al_mtflash3d_MTw`) +- a B1 mapping with RF flip-angle relaxation (protocol: `al_B1mapping`) +- a magnitude encoded fieldmap sequence (protocol: `gre_field_mapping`) +- a phase-difference fieldmap sequence (protocol: `gre_field_mapping`) + +### Additional files + +All non-data files corresponding to dataset are stored in `resources` subfolder + +#### Participants bookkeeping `Appariement.xlsx` + +`Appariement.xlsx` is an excel table containing the list of participants with key +demographic data. + +Columns are, in order: + +- **Patient**: Id of participant, padded with `0` +- **Sex**: Sex of participant, either `M` (male) or `F` (female) +- **Age**: Age of participant, in years +- **Education**: Years of education +- **1**: Name of the first scanned session (session *OUT* signify dropped-out participant) +- **2**: Name of the second scanned session +- **3**: Name of the third scanned session +- **Control**: Id of paired participant, padded with `0` +- **Sex**: Sex of paired participant, either `M` (male) or `F` (female) +- **Age**: Age of paired participant, in years +- **Education**: Years of education +- **1**: Name of the first scanned session (session *OUT* signify dropped-out participant) +- **2**: Name of the second scanned session +- **3**: Name of the third scanned session + +#### Sidecar json files + +Prepeared json files to use as [descriptions](https://bids-specification.readthedocs.io/en/stable/02-common-principles.html#tabular-files) +for bidsified `.tsv` files: + +- `participants.json` is a sidecar json file for `participant.tsv` file, containing list +of participants together with demographic information + - alternative files `participants_add.json` and `participants_remove.json` are used for +demonstration of participant table manipulations by `bidscoin` +- `FCsepNBack.json` is sidecar json file for task table +- `VAS.json` is sidecar json file for VAS + + +#### bval and bvec files + +`bval` and `bvec` files used to accompany [diffusion data](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/01-magnetic-resonance-imaging-data.html#diffusion-imaging-data) +are placed in `resources/diffusion` folder. They are common to all diffusion images used +in this dataset. + +#### Bidsmap files + +Generated bidsmap files, that can be used to bidsify this dataset are placed in `resources/map` directory: + +- `bidsmap.yaml` must be used together with plugins +- `bidsmap_noPlugin.yaml` can be used without plugins + +These files can be used with `-b` option directly, or copied into `bids/code/bidscoin` directory. + +#### Plugins + +The plugins are stored in `resources/plugins` directory, and contains commented example of additional data management provided by `bidscoin` infrastructure. + +- `definitions.py` contains some common functions used by plugin and list of sessions and protocols used to check dataset validity +- `rename_plugin.py` retrieves the demographic data and sessions names from `Appariement.xlsx`bookkeeping file +- `process_plugin.py` contains some example of intermediate data processing, namely merging functional and diffusion 3D images into 4D images, it also shows example of subject demographic data modification +- `bidsify_plugin.py` contains examples of recording metadata modification in order to facilitate recordings identification + +#### Dataset description files +[The dataset description](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#dataset-description) +consists of two files: + +- `dataset_description.json`, a minimal example of json file describing dataset +- `README.md`, this file + + +## How to run example + +Dataset bidsification is composed of two steps: data preparation and data bidsification. +An optional data-processing step can be inserted between preparation and bidsification. + +A one-time step of bidsmap creation may be necessary. + +### Data preparation + +In this step, a generic user-defined dataset is organized in a standardized way. + +To run data preparation, it will be enough to run from `example1` directory + +``` +python3 bidscoin.py prepare --part-template resources/participants.json --recfolder nii=MRI --plugin resources/plugins/rename_plugin.py source/ renamed/ +``` + +The options `--part-template resources/participants.json` will tell bidscoin to use participant json file as template for `participants.tsv` file. +The column `participant_id` will be filled automatically, while other columns will be filled +by default by `n/a`, unless they are set in plugin: + +``` +session.sub_values["sex"] = "M" +``` + +Without `--part-template` option the only column in participants file will be `participant_id`. + +Option `--recfolder nii=MRI` will tell to `bidscoin` that image files are MRI and stored in `nii` folder. +Without this option `bidscoin` will be unable to find image files. + +Option `--plugin resources/plugins/rename-plugin.py` will tell to bidscoin to load corresponding plugin. + +Parameters `source/` and `renamed/` tells to bidscoin where to search for source dataset and where place prepared dataset. + +After the execution of preparation, the `rename` folder should contain folders and files: + +- **code/bidscoin**, with log files of the last execution of preparation step +- **participants.tsv** and **participants.json** files with formatted and filled participant list, all columns for all subjects must be filled except `handiness`, which should contain only `n/a` +- **sub-00X** folders for subjects 1-4 + - **ses-HCL** sub-folders with bidsified session name (either `ses-LCL`, if run with plugin, of `ses-s01905` if run without plugin) + - **auxiliary** folder with task and VAS tables and json (only if run with plugin) + - **MRI** subfolder containing MRI data + - **00x-<seq_name>** folders with original image data organased by sequences + +This is prepared dataset, and can be modified freely at condition to conserve general structure. +For example the participant table can be corrected if contain wrong or missing values. + +Running bidscoin with all options can be tedious. To streamline the experience, the majority of options can be saved in configuration file by running + +``` +python3 bidscoin.py -c conf.yamel --conf-save prepare <options> source/ renamed/ +``` + +This will create a local `conf.yamel` file with passed options. +To load the configuration: + +``` +python3 bidscoin.py -c conf.yamel prepare source/ renamed/ +``` + +Passing other options and using switch `--conf-save` will update configuration file. + + +### Bidsmap creation + +Bidsmap is created/tested with `map` command: + +``` +python3 bidscoin.py map --plugin resources/plugins/bidsify_plugin.py --template bidsmap_template.yaml renamed/ bids/ +``` + +The option `--plugin resources/plugins/bidsify_plugin.py` will load correspondent plugin (the used plugin is the same as for bidsification to ensure that all modifications needed to +identify scans are applied). + +The option `--template bidsmap_template.yaml` tells which template will be used. The template +reads the common metatdata and tries to guess the modality. This is based on protocol names and can vary from institute to institute. +The `bidsmap_template.yaml` works with example dataset, but for real data a different template may be needed. + +The parameters `renamed/` and `bids/` tells where prepared dataset is stored and where the bidsified dataset will be placed. + +First execution of `map` usually results into huge amount of warnings and occasional errors. +These warnings and errors must be corrected. The details of various warnings and corrections to apply can be found in `bidscoin` documentation. + +The working bidsmap can be found in `resources/map` directory. +If placed in `bids/code/bidscoin/` directory, the `map` should not produce any warnings. + + + +### Process step + +The process step is an optional step, which allow limited data manipulation before bidsification. +Without plugins, it just verifies that all data is identifiable, and files with same bids name +do not exists in bids dataset. +So it can be used as check before bidsification. + +With plugins, it can be used for data manipulation, and metadata completion. +For example `resources/plugins/process_plugin.py` fills the `nandiness` column, and merges +fMRI and diffusion images in single 4D image. + +``` +python3 bidscoin.py process --plugin resources/plugins/process_plugin.py renamed/ bids/ +``` + +After running, the column `handiness` must be filled and fMRI files +(for ex. in `renamed/sub-002/ses-LCL/MRI/004-cmrr_mbep2d_bold_mb2_task_nfat/`) +must be merged in one file. + +This step can be easily replaced by any custom script and/or pipeline. The only advantage +is some `bids` and `bidscoin` specific checks and recording identification. + + +### Bidsification step + +The final step is bidsification, it is run with `bidsify` command: + +``` +python3 bidscoin.py map --plugin resources/plugins/bidsify_plugin.py renamed/ bids/ +``` + diff --git a/example1/resources/dataset_description.json b/example1/resources/dataset_description.json new file mode 100644 index 0000000..5a60c8c --- /dev/null +++ b/example1/resources/dataset_description.json @@ -0,0 +1,5 @@ +{ + "Name": "Bidscoin Example 1", + "BIDSVersion": "1.2.0", + "License": "PD" +} diff --git a/example1/resources/map/bidsmap.yaml b/example1/resources/map/bidsmap.yaml index 7171f5f..89395f4 100644 --- a/example1/resources/map/bidsmap.yaml +++ b/example1/resources/map/bidsmap.yaml @@ -1,5 +1,5 @@ Options: - version: 2.3.1 + version: 2.3.0 bidsignore: [] PlugIns: path: resources/plugins/bidsify_plugin.py diff --git a/example1/resources/map/bidsmap_noPlugin.yaml b/example1/resources/map/bidsmap_noPlugin.yaml index 2bf758f..af46be7 100644 --- a/example1/resources/map/bidsmap_noPlugin.yaml +++ b/example1/resources/map/bidsmap_noPlugin.yaml @@ -1,5 +1,5 @@ Options: - version: 2.3.1 + version: 2.3.o bidsignore: [] PlugIns: path: ~ diff --git a/example1/resources/plugins/bidsify_plugin.py b/example1/resources/plugins/bidsify_plugin.py index 6703e2b..916ec57 100644 --- a/example1/resources/plugins/bidsify_plugin.py +++ b/example1/resources/plugins/bidsify_plugin.py @@ -5,19 +5,61 @@ import random from definitions import checkSeries + +# defining logger this way will prefix plugin messages +# with plugin name logger = logging.getLogger(__name__) -# global variables -rawfolder = "" -bidsfolder = "" +############################# +# global bidscoin variables # +############################# + +# Folder with prepared dataset +preparedfolder = None +# folder with bidsified dataset +bidsfolder = None +# switch if is a dry-run (test run) dry_run = False -participants_table = None -rec_path = "" -countSeries = {} + +##################### +# Session variables # +##################### + +# Some sequences within session (namely fMRI and MPM structural) follows same +# protocol, thus it is impossible to identify them only using +# metadata +# we will identify them by order they appear in session + +# list of sequences in order of acquisition in current session +seq_list = list() + + +##################### +# Sequence variable # +##################### + +# The index of current sequence, corresponds to order in the sequence list +seq_index = -1 + +# Identified tag for fMRI and MPM MRI +# This tag will override "SeriesDescription" DICOM tag +IntendedFor = "" def InitEP(source: str, destination: str, dry: bool) -> int: + """ + Initialisation of plugin + + 1. Saves source/destination folders and dry_run switch + + Parameters + ---------- + source: str + path to source dataset + destination: + path to prepared dataset + """ global rawfolder global bidsfolder global dry_run @@ -28,31 +70,81 @@ def InitEP(source: str, destination: str, dry: bool) -> int: def SubjectEP(scan): + """ + Subject modification + """ + + #################### + # Subject renaming # + #################### + + # This will demonstrate the subject renaming + # namely increasing the id by 1 sub_id = int(scan.subject[4:]) scan.subject = "sub-{:03d}".format(sub_id + 1) + # changing also in participant.tsv file + if scan.sub_values["paired"]: + pair_id = int(scan.sub_values["paired"][4:]) + scan.sub_values["paired"] = "sub-{:03d}".format(pair_id + 1) + + ################################# + # Subject metadata manipulation # + ################################# + + # these modifications will appear only if corresponding + # columns are declared in participants.json + # they will not allow to add/remove columns + + # to modify the columns, use --part-template cli option + + # this will remove information on sex of subject, from bidsified dataset, + # but not corresponding columns scan.sub_values["sex"] = None + + # this will fill new column "random" + # if this column is in participant.json, it will be shown + # in bidsified participant.tsv scan.sub_values["random"] = random.random() def SessionEP(scan): - global series - global sid - sub = scan.subject - ses = scan.session - # path = os.path.join(rawfolder, - # sub, ses, - # "MRI") + """ + Session files modification + + 1. Stores the list of sequences in session + 2. Checks the sequences + 3. Copies HCL and LCL task and KSS/VAS files + to bidsified dataset + """ + + ###################################### + # Initialisation of sesion variables # + ###################################### + # retrieving list of sequences and puttintg them into list + global seq_list + global seq_index path = os.path.join(scan.in_path, "MRI") - series = sorted(os.listdir(path)) - series = [s.split("-", 1)[1] for s in series] - sid = -1 - checkSeries(path, sub, ses, False) - # copytng behevioral data + seq_list = sorted(os.listdir(path)) + seq_list = [s.split("-", 1)[1] for s in seq_list] + seq_index = -1 + + ################################# + # Checking sequences in session # + ################################# + checkSeries(path, scan.subject, scan.session, False) + + ############################################# + # Checking for existance of auxiliary files # + ############################################# + + # all the copy instructions must be protected by + # if not dry_run + aux_input = os.path.join(scan.in_path, "auxiliary") - if ses in ("ses-LCL", "ses-HCL"): + if scan.session in ("ses-LCL", "ses-HCL"): if not os.path.isdir(aux_input): logger.error("Session {}/{} do not contain auxiliary folder" - .format(sub, ses)) + .format(scan.subject, scan.session)) raise FileNotFoundError("folder {} not found" .format(aux_input)) beh = os.path.join(scan.in_path, "beh") @@ -63,88 +155,96 @@ def SessionEP(scan): ("VAS.tsv", "task-rest_beh.tsv"), ("VAS.json", "task-rest_beh.json")): source = "{}/{}".format(aux_input, old) - dest = "{}/{}_{}_{}".format(beh, sub, ses, new) + dest = "{}/{}_{}_{}".format(beh, scan.subject, scan.session, new) if not os.path.isfile(source): if dry_run: logger.error("{}/{}: File {} not found" - .format(sub, ses, source)) + .format(scan.subject, scan.session, source)) else: logger.critical("{}/{}: File {} not found" - .format(sub, ses, source)) + .format(scan.subject, + scan.session, + source)) raise FileNotFoundError(source) if os.path.isfile(dest): logger.warning("{}/{}: File {} already exists" - .format(sub, ses, dest)) + .format(scan.subject, scan.session, dest)) if not dry_run: shutil.copy2(source, dest) -series = list() -sid = -1 -Intended = "" - - def SequenceEP(recording): - global series - global sid - global Intended - Intended = "" - sid += 1 - recid = series[sid] + """ + Sequence identification + """ + global seq_index + global IntendedFor + IntendedFor = "" + seq_index += 1 + recid = seq_list[seq_index] + + # checking if current sequence corresponds in correct place in list if recid != recording.recId(): logger.warning("{}: Id mismatch folder {}" .format(recording.recIdentity(False), recid)) + # The inverted fMRI are taken just before normal fMRI + # looking into the following sequence will identify + # the current one if recid == "cmrr_mbep2d_bold_mb2_invertpe": - mod = series[sid + 1] + mod = seq_list[seq_index + 1] if mod.endswith("cmrr_mbep2d_bold_mb2_task_fat"): - Intended = "nBack" + IntendedFor = "nBack" elif mod.endswith("cmrr_mbep2d_bold_mb2_task_nfat"): - Intended = "nBack" + IntendedFor = "nBack" elif mod.endswith("cmrr_mbep2d_bold_mb2_rest"): - Intended = "rest" + IntendedFor = "rest" else: - Intended = "invalid" + IntendedFor = "invalid" logger.warning("{}: Unknown session {}" .format(recording.recIdentity(), mod)) + # fmap images are taken for HCL, LCL and MPM (STROOP) + # sessions elif recid == "gre_field_mapping": if recording.sesId() in ("ses-HCL", "ses-LCL"): - Intended = "HCL/LCL" + IntendedFor = "HCL/LCL" elif recording.sesId() == "ses-STROOP": - Intended = "STROOP" + IntendedFor = "STROOP" else: logger.warning("{}: Unknown session {}" .format(recording.recIdentity(), recording.sesId())) - Intended = "invalid" + IntendedFor = "invalid" + # fmaps sesnsBody and sesnArray are taken just before + # structural PD , T1 and MT. Looking into next sequences + # will allow the identification elif recid == "al_mtflash3d_sensArray": - det = series[sid + 2] + det = seq_list[seq_index + 2] if det.endswith("al_mtflash3d_PDw"): - Intended = "PDw" + IntendedFor = "PDw" elif det.endswith("al_mtflash3d_T1w"): - Intended = "T1w" - recording.setAttribute("Intended", "T1w") + IntendedFor = "T1w" elif det.endswith("al_mtflash3d_MTw"): - Intended = "MTw" + IntendedFor = "MTw" else: logger.warning("{}: Unable determine modality" .format(recording.recIdentity())) - Intended = "invalid" + IntendedFor = "invalid" elif recid == "al_mtflash3d_sensBody": - det = series[sid + 1] + det = seq_list[seq_index + 1] if det.endswith("al_mtflash3d_PDw"): - Intended = "PDw" + IntendedFor = "PDw" elif det.endswith("al_mtflash3d_T1w"): - Intended = "T1w" + IntendedFor = "T1w" elif det.endswith("al_mtflash3d_MTw"): - Intended = "MTw" + IntendedFor = "MTw" else: logger.warning("{}: Unable determine modality" .format(recording.recIdentity())) - Intended = "invalid" + IntendedFor = "invalid" def RecordingEP(recording): - if Intended != "": - recording.setAttribute("SeriesDescription", Intended) + if IntendedFor != "": + recording.setAttribute("SeriesDescription", IntendedFor) diff --git a/example1/resources/plugins/definitions.py b/example1/resources/plugins/definitions.py index 7b30193..ba0e37b 100644 --- a/example1/resources/plugins/definitions.py +++ b/example1/resources/plugins/definitions.py @@ -6,7 +6,7 @@ import os # and appear with this file-name logger = logging.getLogger(__name__) -# path to the root folder of plugin +# path to the root folder of plugin # (bidscoin_example/example1/resources) # usefull to retrieve auxiliary files plugin_root = os.path.normpath( @@ -17,9 +17,9 @@ plugin_root = os.path.normpath( # of scans Series = { "ses-LCL": ('localizer', - 'cmrr_mbep2d_bold_mb2_invertpe', - 'cmrr_mbep2d_bold_mb2_task_nfat', - 'cmrr_mbep2d_bold_mb2_invertpe', + 'cmrr_mbep2d_bold_mb2_invertpe', + 'cmrr_mbep2d_bold_mb2_task_nfat', + 'cmrr_mbep2d_bold_mb2_invertpe', 'cmrr_mbep2d_bold_mb2_rest', 'gre_field_mapping', 'gre_field_mapping', @@ -68,7 +68,7 @@ def checkSeries(path: str, subject: str, session: str, critical: bool) -> bool: """ - Retrieve list of series from path and checks + Retrieve list of series from path and checks its compatibility with defined list Parameters: @@ -80,7 +80,7 @@ def checkSeries(path: str, session: str Name of session to check critical: bool - If True, mismatches will creeate exceptions + If True, mismatches will creeate exceptions and critical level log entries """ if session not in Series: @@ -89,7 +89,7 @@ def checkSeries(path: str, return False passed = True series = sorted(os.listdir(path)) - series = [s.split("-",1)[1] for s in series] + series = [s.split("-", 1)[1] for s in series] for ind, s in enumerate(series): if s not in Series[session]: msg = "{}/{}: Invalid serie {}".format(subject, session, s) @@ -139,9 +139,9 @@ def checkSeries(path: str, return passed -def reportError(msg: str, critical: bool, error: type=ValueError) -> None: +def reportError(msg: str, critical: bool, error: type = ValueError) -> None: """ - reports error. + reports error. If critical, an exception of type error will raise Parametres: @@ -155,6 +155,6 @@ def reportError(msg: str, critical: bool, error: type=ValueError) -> None: """ if critical: logger.critical(msg) - raise exception(msg) + raise Exception(msg) else: logger.error(msg) diff --git a/example1/resources/plugins/process_plugin.py b/example1/resources/plugins/process_plugin.py index ec84388..f8a42f5 100644 --- a/example1/resources/plugins/process_plugin.py +++ b/example1/resources/plugins/process_plugin.py @@ -5,6 +5,9 @@ import random from definitions import checkSeries, plugin_root + +# defining logger this way will prefix plugin messages +# with plugin name logger = logging.getLogger(__name__) ############################# @@ -23,19 +26,25 @@ dry_run = False # Session variables # ##################### -# list of sequences in current session -# used to identify fMRI and MPM MRI images -series = list() +# Some sequences within session (namely fMRI and MPM structural) follows same +# protocol, thus it is impossible to identify them only using +# metadata +# we will identify them by order they appear in session + +# list of sequences in order of acquisition in current session +seq_list = list() + ##################### # Sequence variable # ##################### -# The id of current sequence -sid = -1 +# The index of current sequence, corresponds to order in the sequence list +seq_index = -1 # Identified tag for fMRI and MPM MRI -Intended = "" +# This tag will override "SeriesDescription" DICOM tag +IntendedFor = "" def InitEP(source: str, destination: str, dry: bool) -> int: @@ -87,25 +96,27 @@ def SessionEP(scan): ###################################### # Initialisation of sesion variables # ###################################### - global series - global sid - sub = scan.subject - ses = scan.session + # retrieving list of sequences and puttintg them into list + global seq_list + global seq_index path = os.path.join(scan.in_path, "MRI") - series = sorted(os.listdir(path)) - series = [s.split("-", 1)[1] for s in series] - sid = -1 - checkSeries(path, sub, ses, False) + seq_list = sorted(os.listdir(path)) + seq_list = [s.split("-", 1)[1] for s in seq_list] + seq_index = -1 + ################################# + # Checking sequences in session # + ################################# + checkSeries(path, scan.subject, scan.session, False) ############################################# # Checking for existance of auxiliary files # ############################################# - aux_input = os.path.join(session.in_path, "auxiliary") - if ses in ("ses-LCL", "ses-HCL"): + aux_input = os.path.join(scan.in_path, "auxiliary") + if scan.session in ("ses-LCL", "ses-HCL"): if not os.path.isdir(aux_input): logger.error("Session {}/{} do not contain auxiliary folder" - .format(sub, ses)) + .format(scan.subject, scan.session)) return -1 for old, new in (("FCsepNBack.tsv", "task-rest_events.tsv"), ("FCsepNBack.json", "task-rest_events.json"), @@ -114,76 +125,89 @@ def SessionEP(scan): source = "{}/{}".format(aux_input, old) if not os.path.isfile(source): logger.error("{}/{}: File {} not found" - .format(sub, ses, source)) + .format(scan.subject, scan.session, source)) def SequenceEP(recording): """ Sequence identification """ - global series - global sid - global Intended - Intended = "" - sid += 1 - recid = series[sid] + + global seq_index + global IntendedFor + IntendedFor = "" + seq_index += 1 + recid = seq_list[seq_index] + + # checking if current sequence corresponds in correct place in list if recid != recording.recId(): logger.warning("{}: Id mismatch folder {}" .format(recording.recIdentity(False), recid)) + + # The inverted fMRI are taken just before normal fMRI + # looking into the following sequence will identify + # the current one if recid == "cmrr_mbep2d_bold_mb2_invertpe": - mod = series[sid + 1] + mod = seq_list[seq_index + 1] if mod.endswith("cmrr_mbep2d_bold_mb2_task_fat"): - Intended = "nBack" + IntendedFor = "nBack" elif mod.endswith("cmrr_mbep2d_bold_mb2_task_nfat"): - Intended = "nBack" + IntendedFor = "nBack" elif mod.endswith("cmrr_mbep2d_bold_mb2_rest"): - Intended = "rest" + IntendedFor = "rest" else: - Intended = "invalid" + IntendedFor = "invalid" logger.warning("{}: Unknown session {}" .format(recording.recIdentity(), mod)) + # fmap images are taken for HCL, LCL and MPM (STROOP) + # sessions elif recid == "gre_field_mapping": if recording.sesId() in ("ses-HCL", "ses-LCL"): - Intended = "HCL/LCL" + IntendedFor = "HCL/LCL" elif recording.sesId() == "ses-STROOP": - Intended = "STROOP" + IntendedFor = "STROOP" else: logger.warning("{}: Unknown session {}" .format(recording.recIdentity(), recording.sesId())) - Intended = "invalid" + IntendedFor = "invalid" + # fmaps sesnsBody and sesnArray are taken just before + # structural PD , T1 and MT. Looking into next sequences + # will allow the identification elif recid == "al_mtflash3d_sensArray": - det = series[sid + 2] + det = seq_list[seq_index + 2] if det.endswith("al_mtflash3d_PDw"): - Intended = "PDw" + IntendedFor = "PDw" elif det.endswith("al_mtflash3d_T1w"): - Intended = "T1w" - recording.setAttribute("Intended", "T1w") + IntendedFor = "T1w" elif det.endswith("al_mtflash3d_MTw"): - Intended = "MTw" + IntendedFor = "MTw" else: logger.warning("{}: Unable determine modality" .format(recording.recIdentity())) - Intended = "invalid" + IntendedFor = "invalid" elif recid == "al_mtflash3d_sensBody": - det = series[sid + 1] + det = seq_list[seq_index + 1] if det.endswith("al_mtflash3d_PDw"): - Intended = "PDw" + IntendedFor = "PDw" elif det.endswith("al_mtflash3d_T1w"): - Intended = "T1w" + IntendedFor = "T1w" elif det.endswith("al_mtflash3d_MTw"): - Intended = "MTw" + IntendedFor = "MTw" else: logger.warning("{}: Unable determine modality" .format(recording.recIdentity())) - Intended = "invalid" + IntendedFor = "invalid" def RecordingEP(recording): - if Intended != "": - recording.setAttribute("SeriesDescription", Intended) + """ + Setting "SeriesDescription" tag for given recording. + """ + if IntendedFor != "": + recording.setAttribute("SeriesDescription", IntendedFor) def SequenceEndEP(outfolder, recording): @@ -192,6 +216,7 @@ def SequenceEndEP(outfolder, recording): """ modality = recording.Modality() + # only for fMRI and diffusion images if modality not in ("func", "dwi"): return @@ -201,8 +226,11 @@ def SequenceEndEP(outfolder, recording): .format(recording.recIdentity(index=False), modality)) first_file = os.path.join(outfolder, recording.files[0]) + # "convertion" is just copy of first file in sequence + # in real application a real external tool should be used shutil.copy2(first_file, f4D + ".nii") first_file = os.path.splitext(first_file)[0] + ".json" + # copying the first file json to allow the identification shutil.copy2(first_file, f4D + ".json") # copying fake bval and bvec values @@ -219,6 +247,8 @@ def SequenceEndEP(outfolder, recording): "NODDI.bvec"), os.path.join(outfolder, "4D.bvec")) + + # Removing now obsolete files for f_nii in recording.files: f_nii = os.path.join(outfolder, f_nii) f_json = os.path.splitext(f_nii)[0] + ".json" diff --git a/example1/resources/plugins/rename_plugin.py b/example1/resources/plugins/rename_plugin.py index 7264928..e0be9bf 100644 --- a/example1/resources/plugins/rename_plugin.py +++ b/example1/resources/plugins/rename_plugin.py @@ -25,6 +25,7 @@ preparedfolder = None # switch if is a dry-run (test run) dry_run = False + ########################### # global plugin variables # ########################### -- GitLab