adapted prepare

f7acb04e · nbeliy · d37f682d · f7acb04e · f7acb04e · f7acb04e
Commit f7acb04e authored 5 years ago by nbeliy
--- a/example1/resources/Appariement.xlsx
+++ b/example1/resources/Appariement.xlsx
--- a/example1/resources/participants_add.json
+++ b/example1/resources/participants_add.json
+{
+  "participant_id":{
+    "LongName":"Participant Id",
+    "Description":"label identifying a particular subject"
+  },
+  "age":{
+    "LongName":"Age",
+    "Description":"Age of a subject",
+    "Units":"year"
+  },
+  "sex":{
+    "LongName":"Sex",
+    "Description":"Sex of a subject",
+    "Levels":{
+      "F":"Female",
+      "M":"Male"
+    }
+  },
+  "education":{
+    "LongName":"Education level",
+    "Description":"Education level"
+  },
+  "group":{
+    "LongName":"group",
+    "Description":"Group subject belongs",
+    "Levels":{
+      "patient":"patient",
+      "control":"control"
+    }
+  },
+  "handiness":{
+    "LongName":"Handiness of subject",
+    "Levels":{
+      "r":"right-handed",
+      "l":"left-handed"
+    }
+  },
+  "paired":{
+    "LongName":"Paired Id",
+    "Description":"Subject Id paired with this subject"
+  },
+  "ses_1":{
+    "LongName":"First session",
+    "Description":"Id of the first session taken by subject",
+    "Levels":{
+      "ses-LCL":"Low charge level",
+      "ses-HCL":"High charge level",
+      "ses-STROOP":"Multiparametric scan"
+    }
+  },
+  "ses_2":{
+    "LongName":"Second session",
+    "Description":"Id of the second session taken by subject",
+    "Levels":{
+      "ses-LCL":"Low charge level",
+      "ses-HCL":"High charge level",
+      "ses-STROOP":"Multiparametric scan"
+    }
+  },
+  "ses_3":{
+    "LongName":"Third session",
+    "Description":"Id of the second session taken by subject",
+    "Levels":{
+      "ses-LCL":"Low charge level",
+      "ses-HCL":"High charge level",
+      "ses-STROOP":"Multiparametric scan"
+    }
+  },
+  "random":{
+    "LongName":"Rundom number between 0 and 1",
+    "Description":"Test of adding columns in process/bidsify step"
+  }
+}
--- a/example1/resources/participants_remove.json
+++ b/example1/resources/participants_remove.json
+{
+  "participant_id":{
+    "LongName":"Participant Id",
+    "Description":"label identifying a particular subject"
+  },
+  "sex":{
+    "LongName":"Sex",
+    "Description":"Sex of a subject",
+    "Levels":{
+      "F":"Female",
+      "M":"Male"
+    }
+  },
+  "education":{
+    "LongName":"Education level",
+    "Description":"Education level"
+  },
+  "group":{
+    "LongName":"group",
+    "Description":"Group subject belongs",
+    "Levels":{
+      "patient":"patient",
+      "control":"control"
+    }
+  },
+  "handiness":{
+    "LongName":"Handiness of subject",
+    "Levels":{
+      "r":"right-handed",
+      "l":"left-handed"
+    }
+  },
+  "paired":{
+    "LongName":"Paired Id",
+    "Description":"Subject Id paired with this subject"
+  },
+  "ses_1":{
+    "LongName":"First session",
+    "Description":"Id of the first session taken by subject",
+    "Levels":{
+      "ses-LCL":"Low charge level",
+      "ses-HCL":"High charge level",
+      "ses-STROOP":"Multiparametric scan"
+    }
+  },
+  "ses_2":{
+    "LongName":"Second session",
+    "Description":"Id of the second session taken by subject",
+    "Levels":{
+      "ses-LCL":"Low charge level",
+      "ses-HCL":"High charge level",
+      "ses-STROOP":"Multiparametric scan"
+    }
+  },
+  "ses_3":{
+    "LongName":"Third session",
+    "Description":"Id of the second session taken by subject",
+    "Levels":{
+      "ses-LCL":"Low charge level",
+      "ses-HCL":"High charge level",
+      "ses-STROOP":"Multiparametric scan"
+    }
+  }
+}
--- a/example1/resources/plugins/bidsify_plugin.py
+++ b/example1/resources/plugins/bidsify_plugin.py
 import os
 import shutil
 import logging
+import random

 from definitions import checkSeries

@@ -26,27 +27,35 @@ def InitEP(source: str, destination: str, dry: bool) -> int:
    dry_run = dry


+def SubjectEP(scan):
+    sub_id = int(scan.subject[4:])
+    scan.subject = "sub-{:03d}".format(sub_id + 1)
+    scan.sub_values["sex"] = None
+    scan.sub_values["random"] = random.random()
+
+
 def SessionEP(scan):
    global series
    global sid
    sub = scan.subject
    ses = scan.session
-    path = os.path.join(rawfolder,
-                        sub, ses,
-                        "MRI")
+    # path = os.path.join(rawfolder,
+    #                     sub, ses,
+    #                     "MRI")
+    path = os.path.join(scan.in_path, "MRI")
    series = sorted(os.listdir(path))
-    series = [s.split("-",1)[1] for s in series]
+    series = [s.split("-", 1)[1] for s in series]
    sid = -1
    checkSeries(path, sub, ses, False)
    # copytng behevioral data
-    aux_input = os.path.join(rawfolder, sub, ses, "auxiliary")
+    aux_input = os.path.join(scan.in_path, "auxiliary")
    if ses in ("ses-LCL", "ses-HCL"):
        if not os.path.isdir(aux_input):
            logger.error("Session {}/{} do not contain auxiliary folder"
                         .format(sub, ses))
            raise FileNotFoundError("folder {} not found"
                                    .format(aux_input))
-        beh = os.path.join(bidsfolder, sub, ses, "beh")
+        beh = os.path.join(scan.in_path, "beh")
        if not dry_run:
            os.makedirs(beh, exist_ok=True)
        for old, new in (("FCsepNBack.tsv", "task-rest_events.tsv"),
@@ -81,7 +90,7 @@ def SequenceEP(recording):
    global Intended
    Intended = ""
    sid += 1
-    recid = series[sid] 
+    recid = series[sid]
    if recid != recording.recId():
        logger.warning("{}: Id mismatch folder {}"
                       .format(recording.recIdentity(False),

--- a/example1/resources/plugins/rename_plugin.py
+++ b/example1/resources/plugins/rename_plugin.py
@@ -3,126 +3,229 @@ import pandas
 import logging
 import shutil

-from tools import tools 
-from bidsMeta import BIDSfieldLibrary
+from tools import tools
 from bids import BidsSession

-from definitions import Series, checkSeries
+from definitions import Series, checkSeries, plugin_root

+
+# defining logger this way will prefix plugin messages
+# with plugin name
 logger = logging.getLogger(__name__)

-# global variables
-rawfolder = ""
-bidsfolder = ""
-dry_run = False

+#############################
+# global bidscoin variables #
+#############################

-resources = os.path.join(os.path.dirname(__file__), "..")
+# Folder with source dataset
+rawfolder = None
+# folder with prepared dataset
+preparedfolder = None
+# switch if is a dry-run (test run)
+dry_run = False

-df_subjects = None
+###########################
+# global plugin variables #
+###########################
+
+# map of individual sessions
+#   key: source folde session (s01234)
+#   value: bidsified session (ses-HCL)
 scans_map = {}

 # scale to convert ms in log-files to seconds
 time_scale = 1e-3

-# list of subjects exel-file columns 
-excel_col_list = {'Patient' : 'pat',
-                  'S_A_E' : "pat_sae",
+# subject balck-list
+#   subject folders in this list will be skipped
+#   by plugin
+sub_black_list = []
+
+# subject xls table columns and their renaiming
+excel_col_list = {"Patient": "pat",
+                  "Sex": "pat_sex",
+                  "Age": "pat_age",
+                  "Education": "pat_edu",
                  1: "pat_1", 2: "pat_2", 3: "pat_3",
-                  'Control' : "cnt",
-                  'S_A_E.1': "cnt_sae",
-                  '1.1': "cnt_1", '2.1': "cnt_2", '3.1': "cnt_3",
+                  'Control': "cnt",
+                  "Sex.1": "cnt_sex",
+                  "Age.1": "cnt_age",
+                  "Education.1": "cnt_edu",
+                  "1.1": "cnt_1", "2.1": "cnt_2", "3.1": "cnt_3"
                  }

-# sub_columns = BIDSfieldLibrary()
+
+# columns prefixes for patient and control subjects
+#   0 == False == Control
+#   1 == True == Patient
+sub_prefix = ["cnt", "pat"]
+
+# pandas dataframe with list of subjects
+df_subjects = None


 def InitEP(source: str, destination: str,
           dry: bool,
-           subjects: str="") -> int:
+           subjects: str = "") -> int:
+    """
+    Initialisation of plugin
+
+    1. Saves source/destination folders and dry_run switch
+    2. Loads subjects xls table
+
+    Parameters
+    ----------
+    source: str
+        path to source dataset
+    destination:
+        path to prepared dataset
+    subjects: str
+        path to subjects xls file, if empty is looked
+        in source dataset folder
+    """
+
    global rawfolder
-    global bidsfolder
+    global preparefolder
    global dry_run
-    global subject_file

    rawfolder = source
-    bidsfolder = destination
+    preparefolder = destination
    dry_run = dry
+
+    #########################
+    # Loading subjects list #
+    #########################
    if subjects:
        subject_file = subjects
    else:
-        subject_file = os.path.join(resources, "Appariement.xlsx")
-    logger.info(subject_file)
-
+        subject_file = os.path.join(plugin_root, "Appariement.xlsx")
    if not os.path.isfile(subject_file):
        raise FileNotFoundError("Subject file '{}' not found"
                                .format(subject_file))

-    # creating df for subjects
+    # creating dataframe for subjects
    global df_subjects
    df_subjects = pandas.read_excel(subject_file,
                                    sheet_name=0, header=0,
-                                    usecols=[0,1,2,3,4,5,6,7,8,9,10])
-    df_subjects.rename(index=str, columns=excel_col_list,inplace=True)
+                                    usecols="A:N"
+                                    )
+    df_subjects.rename(index=str, columns=excel_col_list, inplace=True)
    df_subjects = df_subjects[df_subjects['pat'].notnull()
                              | df_subjects['cnt'].notnull()]


 def SubjectEP(session: BidsSession) -> int:
-    sub_id = int(session.subject)
-    index = df_subjects.loc[df_subjects["pat"] == sub_id].index 
-    status = 0
+    """
+    Subject determination and initialisation
+
+    1. Checks if subject not in balck list
+    2. Loads demographics from subject table
+    3. Creates session parcing dictionary
+
+    Parameters
+    ----------
+    session: BidsSession
+
+    Returns
+    -------
+    int:
+        if 0, plugin succesfull
+        if > 0, plugin failed, an exception will be raised
+        if < 0, plugin failed, and subject will be skipped
+    """
+
+    #################################
+    # Skipping if in the black list #
+    #################################
+    if session.subject in sub_black_list:
+        logger.info("Subject '{}' is in black_list"
+                    .format(session.subject))
+        return -1
+
+    ################################
+    # Retriving subject from table #
+    ################################
+    try:
+        # in case if folder name in source dataset
+        # cannot be converted to integer
+        sub_id = int(session.subject)
+    except ValueError as e:
+        logger.error("Subject {}: Can't determine subject Id for: {}"
+                     .format(session.subject, e))
+        return -1
+
+    # storing bidsified subject id into session object
+    # optional, but can be easely retrieved
+    session.sub_values["participant_id"] = "sub-" + session.subject
+    # looking for subject in dataframe
    prefix = "pat"
+    index = df_subjects.loc[df_subjects[prefix] == sub_id].index
+    # storing participant group in session
+    session.sub_values["group"] = "patient"
+
    if len(index) == 0:
        # Subject not in patient list, looking in control
-        index = df_subjects.loc[df_subjects["cnt"] == sub_id].index
+        prefix = "cnt"
+        index = df_subjects.loc[df_subjects[prefix] == sub_id].index
+        session.sub_values["group"] = "control"
        if len(index) == 0:
            raise KeyError("Subject {} not found in table"
                           .format(sub_id))
-        status = 1
-        prefix = "cnt"
+    if len(index) > 1:
+        logger.warning("Subject {}: several column entries present"
+                       .format(sub_id))
    index = index[0]

    # retrieving demographics
-    # <sex>_<age>_<education>
-    line = df_subjects.loc[index, prefix + "_sae"].split("_")
-    sex = line[0]
-    age = int(line[1])
-    education = int(line[2])
-    session.sub_values["participant_id"] = "sub-" + session.subject
-    session.sub_values["sex"] = sex
-    session.sub_values["age"] = age
-    session.sub_values["education"] = education
+    sex = df_subjects.loc[index, prefix + "_sex"]
+    age = df_subjects.loc[index, prefix + "_age"]
+    education = df_subjects.loc[index, prefix + "_edu"]
+
+    # session initialised values are Null
+    # fill them only if they are retrieved from table
+    if pandas.notna(sex):
+        session.sub_values["sex"] = sex
+    if pandas.notna(age):
+        session.sub_values["age"] = float(age)
+    if pandas.notna(education):
+        session.sub_values["education"] = float(education)

    # looking for pairing
-    if status == 0:
-        session.sub_values["group"] = "patient"
-        session.sub_values["paired"] = "sub-{:03}".format(int(df_subjects
-                                                  .loc[index, "cnt"]))
-    else:
-        session.sub_values["group"] = "control"
-        session.sub_values["paired"] = "sub-{:03}".format(int(df_subjects
-                                                  .loc[index, "pat"]))
+    paired = df_subjects.loc[index, sub_prefix[prefix == "cnt"]]
+    if pandas.notna(paired):
+        session.sub_values["paired"] = "sub-{:03}".format(int(paired))

    # looking for order of sessions
-    global scans_map
-    scans_map = {}
+    scans_map.clear()
    scans_order = sorted([os.path.basename(s) for s in
                          tools.lsdirs(os.path.join(rawfolder,
                                                    session.subject),
                                       "s*")
                          ])
+    # looping over session defined in columns
    for ind, s in enumerate(("_1", "_2", "_3")):
        v = "ses-" + str(df_subjects.loc[index, prefix + s]).strip()
        ses = "ses" + s
        if v == "ses-nan":
+            # Session not defined in table, but existing
+            # in source dataset
            session.sub_values[ses] = ""
            logger.warning("Subject {}({}): missing {} value"
                           .format(session.sub_values["participant_id"],
                                   session.sub_values["group"],
                                   ses)
                           )
+        elif v == "ses-OUT":
+            # participant left study
+            logger.warning("Subject {}({}): seems to be abandoned study"
+                           .format(session.sub_values["participant_id"],
+                                   session.sub_values["group"],
+                                   ses)
+                           )
+            return -1
        elif v not in Series:
+            # invalid session name
            logger.critical("Subject {}({}): Invalid {}: {}"
                            .format(session.sub_values["participant_id"],
                                    session.sub_values["group"],
@@ -132,10 +235,13 @@ def SubjectEP(session: BidsSession) -> int:
            raise KeyError("Invalid {}: {}"
                           .format(ses, v))
        else:
+            # session retrieved, storing values
            session.sub_values[ses] = v
-            scans_map[scans_order[ind]] = session.sub_values[ses]
+            scans_map[scans_order[ind]] = v

    # checking if all scans are identifyable
+    # if not, additional scans will be stored
+    # with original names
    for scan in scans_order:
        if scan not in scans_map:
            logger.error("Subject {}({}): Can't identify session {}"
@@ -144,45 +250,68 @@ def SubjectEP(session: BidsSession) -> int:
                                 scan))
            scans_map[scan] = scan

+    # opional, the sub- prefix added automatically
+    # if not present
    session.subject = "sub-" + session.subject
-    return 0


 def SessionEP(session: BidsSession) -> int:
-    # retrieving correct session name
+    """
+    1. Set-up session name
+
+    Parameters
+    ----------
+    session: BidsSession
+    """
+    # Setting session name from map
    session.session = scans_map[session.session]
-    return 0


 def SessionEndEP(session: BidsSession):
-    path = os.path.join(bidsfolder,
+    """
+    1. Checks the series in the prepared folder
+    2. Extract KSS/VAS data from kss_dict to tsv file
+    3. Parces in-scan nBack and KSS/VAS log files
+    """
+    # path contain destination folder, where
+    # all data files are placed
+    path = os.path.join(preparefolder,
                        session.getPath(True))
    out_path = os.path.join(path,
                            "MRI")
-    checkSeries(out_path,
-                session.subject, session.session,
-                False)

-    # parcing log files
+    # checking if session contains correct series
+    if not dry_run:
+        checkSeries(out_path,
+                    session.subject, session.session,
+                    False)
+
+    ############################################
+    # Retrieving in-scan task and KSS/VAS data #
+    ############################################
    if session.session == "ses-STROOP":
        return 0
+    # where tsv files are
+    inp_dir = os.path.join(session.in_path, "inp")
+    # where tsv files should be
+    aux_dir = os.path.join(path, "auxiliary")
+    if not os.path.isdir(inp_dir):
+        raise NotADirectoryError(inp_dir)
+
+    # do not copy if we are in dry mode
+    if not dry_run:
+        os.makedirs(aux_dir, exist_ok=True)
+        # just copy file, in real life application
+        # you may parce files
+        for file in ("FCsepNBack.tsv", "VAS.tsv"):
+            file = os.path.join(inp_dir, file)
+            if not os.path.isfile(file):
+                raise FileNotFoundError(file)
+            shutil.copy2(file, aux_dir)

-    logs = os.path.join(session.in_path, "inp")
-    aux_d = os.path.join(path, "auxiliary")
-    if not os.path.isdir(logs):
-        raise NotADirectoryError(logs)
-
-    os.makedirs(aux_d, exist_ok=True)
-    for file in ("FCsepNBack.tsv", "VAS.tsv"):
-        file = os.path.join(logs, file)
-        if not os.path.isfile(file):
-            raise FileNotFoundError(file)
-        shutil.copy2(file, aux_d)
-
-    for file in ("FCsepNBack.json", "VAS.json"):
-        file = os.path.join(resources, file)
-        if not os.path.isfile(file):
-            raise FileNotFoundError(file)
-        shutil.copy2(file, aux_d)
-
-    return 0
+        # copiyng correspondent json files
+        for file in ("FCsepNBack.json", "VAS.json"):
+            file = os.path.join(plugin_root, file)
+            if not os.path.isfile(file):
+                raise FileNotFoundError(file)
+            shutil.copy2(file, aux_dir)