diff --git a/example1/resources/Appariement.xlsx b/example1/resources/Appariement.xlsx index 5c3c88a613e7a47fc3e84301ccf2230783fcbdfa..29036a340230af04b8190f9691673a48c4f9b02d 100644 Binary files a/example1/resources/Appariement.xlsx and b/example1/resources/Appariement.xlsx differ diff --git a/example1/resources/participants_add.json b/example1/resources/participants_add.json new file mode 100644 index 0000000000000000000000000000000000000000..5c5bae2dfd2cb5ba0269fe96d938815ec0b94214 --- /dev/null +++ b/example1/resources/participants_add.json @@ -0,0 +1,73 @@ +{ + "participant_id":{ + "LongName":"Participant Id", + "Description":"label identifying a particular subject" + }, + "age":{ + "LongName":"Age", + "Description":"Age of a subject", + "Units":"year" + }, + "sex":{ + "LongName":"Sex", + "Description":"Sex of a subject", + "Levels":{ + "F":"Female", + "M":"Male" + } + }, + "education":{ + "LongName":"Education level", + "Description":"Education level" + }, + "group":{ + "LongName":"group", + "Description":"Group subject belongs", + "Levels":{ + "patient":"patient", + "control":"control" + } + }, + "handiness":{ + "LongName":"Handiness of subject", + "Levels":{ + "r":"right-handed", + "l":"left-handed" + } + }, + "paired":{ + "LongName":"Paired Id", + "Description":"Subject Id paired with this subject" + }, + "ses_1":{ + "LongName":"First session", + "Description":"Id of the first session taken by subject", + "Levels":{ + "ses-LCL":"Low charge level", + "ses-HCL":"High charge level", + "ses-STROOP":"Multiparametric scan" + } + }, + "ses_2":{ + "LongName":"Second session", + "Description":"Id of the second session taken by subject", + "Levels":{ + "ses-LCL":"Low charge level", + "ses-HCL":"High charge level", + "ses-STROOP":"Multiparametric scan" + } + }, + "ses_3":{ + "LongName":"Third session", + "Description":"Id of the second session taken by subject", + "Levels":{ + "ses-LCL":"Low charge level", + "ses-HCL":"High charge level", + "ses-STROOP":"Multiparametric scan" + } + }, + "random":{ + "LongName":"Rundom number between 0 and 1", + "Description":"Test of adding columns in process/bidsify step" + } +} diff --git a/example1/resources/participants_remove.json b/example1/resources/participants_remove.json new file mode 100644 index 0000000000000000000000000000000000000000..317371dfd2113fc9ec649ea2589d88e20bce0acf --- /dev/null +++ b/example1/resources/participants_remove.json @@ -0,0 +1,64 @@ +{ + "participant_id":{ + "LongName":"Participant Id", + "Description":"label identifying a particular subject" + }, + "sex":{ + "LongName":"Sex", + "Description":"Sex of a subject", + "Levels":{ + "F":"Female", + "M":"Male" + } + }, + "education":{ + "LongName":"Education level", + "Description":"Education level" + }, + "group":{ + "LongName":"group", + "Description":"Group subject belongs", + "Levels":{ + "patient":"patient", + "control":"control" + } + }, + "handiness":{ + "LongName":"Handiness of subject", + "Levels":{ + "r":"right-handed", + "l":"left-handed" + } + }, + "paired":{ + "LongName":"Paired Id", + "Description":"Subject Id paired with this subject" + }, + "ses_1":{ + "LongName":"First session", + "Description":"Id of the first session taken by subject", + "Levels":{ + "ses-LCL":"Low charge level", + "ses-HCL":"High charge level", + "ses-STROOP":"Multiparametric scan" + } + }, + "ses_2":{ + "LongName":"Second session", + "Description":"Id of the second session taken by subject", + "Levels":{ + "ses-LCL":"Low charge level", + "ses-HCL":"High charge level", + "ses-STROOP":"Multiparametric scan" + } + }, + "ses_3":{ + "LongName":"Third session", + "Description":"Id of the second session taken by subject", + "Levels":{ + "ses-LCL":"Low charge level", + "ses-HCL":"High charge level", + "ses-STROOP":"Multiparametric scan" + } + } +} diff --git a/example1/resources/plugins/bidsify_plugin.py b/example1/resources/plugins/bidsify_plugin.py index 7cfafc451fe787128c9018093e94a702658f7b25..6703e2be5f2e30af1941b1d18fc52515ecf4cf42 100644 --- a/example1/resources/plugins/bidsify_plugin.py +++ b/example1/resources/plugins/bidsify_plugin.py @@ -1,6 +1,7 @@ import os import shutil import logging +import random from definitions import checkSeries @@ -26,27 +27,35 @@ def InitEP(source: str, destination: str, dry: bool) -> int: dry_run = dry +def SubjectEP(scan): + sub_id = int(scan.subject[4:]) + scan.subject = "sub-{:03d}".format(sub_id + 1) + scan.sub_values["sex"] = None + scan.sub_values["random"] = random.random() + + def SessionEP(scan): global series global sid sub = scan.subject ses = scan.session - path = os.path.join(rawfolder, - sub, ses, - "MRI") + # path = os.path.join(rawfolder, + # sub, ses, + # "MRI") + path = os.path.join(scan.in_path, "MRI") series = sorted(os.listdir(path)) - series = [s.split("-",1)[1] for s in series] + series = [s.split("-", 1)[1] for s in series] sid = -1 checkSeries(path, sub, ses, False) # copytng behevioral data - aux_input = os.path.join(rawfolder, sub, ses, "auxiliary") + aux_input = os.path.join(scan.in_path, "auxiliary") if ses in ("ses-LCL", "ses-HCL"): if not os.path.isdir(aux_input): logger.error("Session {}/{} do not contain auxiliary folder" .format(sub, ses)) raise FileNotFoundError("folder {} not found" .format(aux_input)) - beh = os.path.join(bidsfolder, sub, ses, "beh") + beh = os.path.join(scan.in_path, "beh") if not dry_run: os.makedirs(beh, exist_ok=True) for old, new in (("FCsepNBack.tsv", "task-rest_events.tsv"), @@ -81,7 +90,7 @@ def SequenceEP(recording): global Intended Intended = "" sid += 1 - recid = series[sid] + recid = series[sid] if recid != recording.recId(): logger.warning("{}: Id mismatch folder {}" .format(recording.recIdentity(False), diff --git a/example1/resources/plugins/rename_plugin.py b/example1/resources/plugins/rename_plugin.py index bafe39b2bbc9481b68186974568774d69bae5f59..726492895ee2a9dbb0d20b6f7e88e7163bc93a1a 100644 --- a/example1/resources/plugins/rename_plugin.py +++ b/example1/resources/plugins/rename_plugin.py @@ -3,126 +3,229 @@ import pandas import logging import shutil -from tools import tools -from bidsMeta import BIDSfieldLibrary +from tools import tools from bids import BidsSession -from definitions import Series, checkSeries +from definitions import Series, checkSeries, plugin_root + +# defining logger this way will prefix plugin messages +# with plugin name logger = logging.getLogger(__name__) -# global variables -rawfolder = "" -bidsfolder = "" -dry_run = False +############################# +# global bidscoin variables # +############################# -resources = os.path.join(os.path.dirname(__file__), "..") +# Folder with source dataset +rawfolder = None +# folder with prepared dataset +preparedfolder = None +# switch if is a dry-run (test run) +dry_run = False -df_subjects = None +########################### +# global plugin variables # +########################### + +# map of individual sessions +# key: source folde session (s01234) +# value: bidsified session (ses-HCL) scans_map = {} # scale to convert ms in log-files to seconds time_scale = 1e-3 -# list of subjects exel-file columns -excel_col_list = {'Patient' : 'pat', - 'S_A_E' : "pat_sae", +# subject balck-list +# subject folders in this list will be skipped +# by plugin +sub_black_list = [] + +# subject xls table columns and their renaiming +excel_col_list = {"Patient": "pat", + "Sex": "pat_sex", + "Age": "pat_age", + "Education": "pat_edu", 1: "pat_1", 2: "pat_2", 3: "pat_3", - 'Control' : "cnt", - 'S_A_E.1': "cnt_sae", - '1.1': "cnt_1", '2.1': "cnt_2", '3.1': "cnt_3", + 'Control': "cnt", + "Sex.1": "cnt_sex", + "Age.1": "cnt_age", + "Education.1": "cnt_edu", + "1.1": "cnt_1", "2.1": "cnt_2", "3.1": "cnt_3" } -# sub_columns = BIDSfieldLibrary() + +# columns prefixes for patient and control subjects +# 0 == False == Control +# 1 == True == Patient +sub_prefix = ["cnt", "pat"] + +# pandas dataframe with list of subjects +df_subjects = None def InitEP(source: str, destination: str, dry: bool, - subjects: str="") -> int: + subjects: str = "") -> int: + """ + Initialisation of plugin + + 1. Saves source/destination folders and dry_run switch + 2. Loads subjects xls table + + Parameters + ---------- + source: str + path to source dataset + destination: + path to prepared dataset + subjects: str + path to subjects xls file, if empty is looked + in source dataset folder + """ + global rawfolder - global bidsfolder + global preparefolder global dry_run - global subject_file rawfolder = source - bidsfolder = destination + preparefolder = destination dry_run = dry + + ######################### + # Loading subjects list # + ######################### if subjects: subject_file = subjects else: - subject_file = os.path.join(resources, "Appariement.xlsx") - logger.info(subject_file) - + subject_file = os.path.join(plugin_root, "Appariement.xlsx") if not os.path.isfile(subject_file): raise FileNotFoundError("Subject file '{}' not found" .format(subject_file)) - # creating df for subjects + # creating dataframe for subjects global df_subjects df_subjects = pandas.read_excel(subject_file, sheet_name=0, header=0, - usecols=[0,1,2,3,4,5,6,7,8,9,10]) - df_subjects.rename(index=str, columns=excel_col_list,inplace=True) + usecols="A:N" + ) + df_subjects.rename(index=str, columns=excel_col_list, inplace=True) df_subjects = df_subjects[df_subjects['pat'].notnull() | df_subjects['cnt'].notnull()] def SubjectEP(session: BidsSession) -> int: - sub_id = int(session.subject) - index = df_subjects.loc[df_subjects["pat"] == sub_id].index - status = 0 + """ + Subject determination and initialisation + + 1. Checks if subject not in balck list + 2. Loads demographics from subject table + 3. Creates session parcing dictionary + + Parameters + ---------- + session: BidsSession + + Returns + ------- + int: + if 0, plugin succesfull + if > 0, plugin failed, an exception will be raised + if < 0, plugin failed, and subject will be skipped + """ + + ################################# + # Skipping if in the black list # + ################################# + if session.subject in sub_black_list: + logger.info("Subject '{}' is in black_list" + .format(session.subject)) + return -1 + + ################################ + # Retriving subject from table # + ################################ + try: + # in case if folder name in source dataset + # cannot be converted to integer + sub_id = int(session.subject) + except ValueError as e: + logger.error("Subject {}: Can't determine subject Id for: {}" + .format(session.subject, e)) + return -1 + + # storing bidsified subject id into session object + # optional, but can be easely retrieved + session.sub_values["participant_id"] = "sub-" + session.subject + # looking for subject in dataframe prefix = "pat" + index = df_subjects.loc[df_subjects[prefix] == sub_id].index + # storing participant group in session + session.sub_values["group"] = "patient" + if len(index) == 0: # Subject not in patient list, looking in control - index = df_subjects.loc[df_subjects["cnt"] == sub_id].index + prefix = "cnt" + index = df_subjects.loc[df_subjects[prefix] == sub_id].index + session.sub_values["group"] = "control" if len(index) == 0: raise KeyError("Subject {} not found in table" .format(sub_id)) - status = 1 - prefix = "cnt" + if len(index) > 1: + logger.warning("Subject {}: several column entries present" + .format(sub_id)) index = index[0] # retrieving demographics - # <sex>_<age>_<education> - line = df_subjects.loc[index, prefix + "_sae"].split("_") - sex = line[0] - age = int(line[1]) - education = int(line[2]) - session.sub_values["participant_id"] = "sub-" + session.subject - session.sub_values["sex"] = sex - session.sub_values["age"] = age - session.sub_values["education"] = education + sex = df_subjects.loc[index, prefix + "_sex"] + age = df_subjects.loc[index, prefix + "_age"] + education = df_subjects.loc[index, prefix + "_edu"] + + # session initialised values are Null + # fill them only if they are retrieved from table + if pandas.notna(sex): + session.sub_values["sex"] = sex + if pandas.notna(age): + session.sub_values["age"] = float(age) + if pandas.notna(education): + session.sub_values["education"] = float(education) # looking for pairing - if status == 0: - session.sub_values["group"] = "patient" - session.sub_values["paired"] = "sub-{:03}".format(int(df_subjects - .loc[index, "cnt"])) - else: - session.sub_values["group"] = "control" - session.sub_values["paired"] = "sub-{:03}".format(int(df_subjects - .loc[index, "pat"])) + paired = df_subjects.loc[index, sub_prefix[prefix == "cnt"]] + if pandas.notna(paired): + session.sub_values["paired"] = "sub-{:03}".format(int(paired)) # looking for order of sessions - global scans_map - scans_map = {} + scans_map.clear() scans_order = sorted([os.path.basename(s) for s in tools.lsdirs(os.path.join(rawfolder, session.subject), "s*") ]) + # looping over session defined in columns for ind, s in enumerate(("_1", "_2", "_3")): v = "ses-" + str(df_subjects.loc[index, prefix + s]).strip() ses = "ses" + s if v == "ses-nan": + # Session not defined in table, but existing + # in source dataset session.sub_values[ses] = "" logger.warning("Subject {}({}): missing {} value" .format(session.sub_values["participant_id"], session.sub_values["group"], ses) ) + elif v == "ses-OUT": + # participant left study + logger.warning("Subject {}({}): seems to be abandoned study" + .format(session.sub_values["participant_id"], + session.sub_values["group"], + ses) + ) + return -1 elif v not in Series: + # invalid session name logger.critical("Subject {}({}): Invalid {}: {}" .format(session.sub_values["participant_id"], session.sub_values["group"], @@ -132,10 +235,13 @@ def SubjectEP(session: BidsSession) -> int: raise KeyError("Invalid {}: {}" .format(ses, v)) else: + # session retrieved, storing values session.sub_values[ses] = v - scans_map[scans_order[ind]] = session.sub_values[ses] + scans_map[scans_order[ind]] = v # checking if all scans are identifyable + # if not, additional scans will be stored + # with original names for scan in scans_order: if scan not in scans_map: logger.error("Subject {}({}): Can't identify session {}" @@ -144,45 +250,68 @@ def SubjectEP(session: BidsSession) -> int: scan)) scans_map[scan] = scan + # opional, the sub- prefix added automatically + # if not present session.subject = "sub-" + session.subject - return 0 def SessionEP(session: BidsSession) -> int: - # retrieving correct session name + """ + 1. Set-up session name + + Parameters + ---------- + session: BidsSession + """ + # Setting session name from map session.session = scans_map[session.session] - return 0 def SessionEndEP(session: BidsSession): - path = os.path.join(bidsfolder, + """ + 1. Checks the series in the prepared folder + 2. Extract KSS/VAS data from kss_dict to tsv file + 3. Parces in-scan nBack and KSS/VAS log files + """ + # path contain destination folder, where + # all data files are placed + path = os.path.join(preparefolder, session.getPath(True)) out_path = os.path.join(path, "MRI") - checkSeries(out_path, - session.subject, session.session, - False) - # parcing log files + # checking if session contains correct series + if not dry_run: + checkSeries(out_path, + session.subject, session.session, + False) + + ############################################ + # Retrieving in-scan task and KSS/VAS data # + ############################################ if session.session == "ses-STROOP": return 0 + # where tsv files are + inp_dir = os.path.join(session.in_path, "inp") + # where tsv files should be + aux_dir = os.path.join(path, "auxiliary") + if not os.path.isdir(inp_dir): + raise NotADirectoryError(inp_dir) + + # do not copy if we are in dry mode + if not dry_run: + os.makedirs(aux_dir, exist_ok=True) + # just copy file, in real life application + # you may parce files + for file in ("FCsepNBack.tsv", "VAS.tsv"): + file = os.path.join(inp_dir, file) + if not os.path.isfile(file): + raise FileNotFoundError(file) + shutil.copy2(file, aux_dir) - logs = os.path.join(session.in_path, "inp") - aux_d = os.path.join(path, "auxiliary") - if not os.path.isdir(logs): - raise NotADirectoryError(logs) - - os.makedirs(aux_d, exist_ok=True) - for file in ("FCsepNBack.tsv", "VAS.tsv"): - file = os.path.join(logs, file) - if not os.path.isfile(file): - raise FileNotFoundError(file) - shutil.copy2(file, aux_d) - - for file in ("FCsepNBack.json", "VAS.json"): - file = os.path.join(resources, file) - if not os.path.isfile(file): - raise FileNotFoundError(file) - shutil.copy2(file, aux_d) - - return 0 + # copiyng correspondent json files + for file in ("FCsepNBack.json", "VAS.json"): + file = os.path.join(plugin_root, file) + if not os.path.isfile(file): + raise FileNotFoundError(file) + shutil.copy2(file, aux_dir)