Skip to content
Snippets Groups Projects
Commit f7acb04e authored by nbeliy's avatar nbeliy
Browse files

adapted prepare

parent d37f682d
No related branches found
No related tags found
No related merge requests found
No preview for this file type
{
"participant_id":{
"LongName":"Participant Id",
"Description":"label identifying a particular subject"
},
"age":{
"LongName":"Age",
"Description":"Age of a subject",
"Units":"year"
},
"sex":{
"LongName":"Sex",
"Description":"Sex of a subject",
"Levels":{
"F":"Female",
"M":"Male"
}
},
"education":{
"LongName":"Education level",
"Description":"Education level"
},
"group":{
"LongName":"group",
"Description":"Group subject belongs",
"Levels":{
"patient":"patient",
"control":"control"
}
},
"handiness":{
"LongName":"Handiness of subject",
"Levels":{
"r":"right-handed",
"l":"left-handed"
}
},
"paired":{
"LongName":"Paired Id",
"Description":"Subject Id paired with this subject"
},
"ses_1":{
"LongName":"First session",
"Description":"Id of the first session taken by subject",
"Levels":{
"ses-LCL":"Low charge level",
"ses-HCL":"High charge level",
"ses-STROOP":"Multiparametric scan"
}
},
"ses_2":{
"LongName":"Second session",
"Description":"Id of the second session taken by subject",
"Levels":{
"ses-LCL":"Low charge level",
"ses-HCL":"High charge level",
"ses-STROOP":"Multiparametric scan"
}
},
"ses_3":{
"LongName":"Third session",
"Description":"Id of the second session taken by subject",
"Levels":{
"ses-LCL":"Low charge level",
"ses-HCL":"High charge level",
"ses-STROOP":"Multiparametric scan"
}
},
"random":{
"LongName":"Rundom number between 0 and 1",
"Description":"Test of adding columns in process/bidsify step"
}
}
{
"participant_id":{
"LongName":"Participant Id",
"Description":"label identifying a particular subject"
},
"sex":{
"LongName":"Sex",
"Description":"Sex of a subject",
"Levels":{
"F":"Female",
"M":"Male"
}
},
"education":{
"LongName":"Education level",
"Description":"Education level"
},
"group":{
"LongName":"group",
"Description":"Group subject belongs",
"Levels":{
"patient":"patient",
"control":"control"
}
},
"handiness":{
"LongName":"Handiness of subject",
"Levels":{
"r":"right-handed",
"l":"left-handed"
}
},
"paired":{
"LongName":"Paired Id",
"Description":"Subject Id paired with this subject"
},
"ses_1":{
"LongName":"First session",
"Description":"Id of the first session taken by subject",
"Levels":{
"ses-LCL":"Low charge level",
"ses-HCL":"High charge level",
"ses-STROOP":"Multiparametric scan"
}
},
"ses_2":{
"LongName":"Second session",
"Description":"Id of the second session taken by subject",
"Levels":{
"ses-LCL":"Low charge level",
"ses-HCL":"High charge level",
"ses-STROOP":"Multiparametric scan"
}
},
"ses_3":{
"LongName":"Third session",
"Description":"Id of the second session taken by subject",
"Levels":{
"ses-LCL":"Low charge level",
"ses-HCL":"High charge level",
"ses-STROOP":"Multiparametric scan"
}
}
}
import os
import shutil
import logging
import random
from definitions import checkSeries
......@@ -26,27 +27,35 @@ def InitEP(source: str, destination: str, dry: bool) -> int:
dry_run = dry
def SubjectEP(scan):
sub_id = int(scan.subject[4:])
scan.subject = "sub-{:03d}".format(sub_id + 1)
scan.sub_values["sex"] = None
scan.sub_values["random"] = random.random()
def SessionEP(scan):
global series
global sid
sub = scan.subject
ses = scan.session
path = os.path.join(rawfolder,
sub, ses,
"MRI")
# path = os.path.join(rawfolder,
# sub, ses,
# "MRI")
path = os.path.join(scan.in_path, "MRI")
series = sorted(os.listdir(path))
series = [s.split("-",1)[1] for s in series]
series = [s.split("-", 1)[1] for s in series]
sid = -1
checkSeries(path, sub, ses, False)
# copytng behevioral data
aux_input = os.path.join(rawfolder, sub, ses, "auxiliary")
aux_input = os.path.join(scan.in_path, "auxiliary")
if ses in ("ses-LCL", "ses-HCL"):
if not os.path.isdir(aux_input):
logger.error("Session {}/{} do not contain auxiliary folder"
.format(sub, ses))
raise FileNotFoundError("folder {} not found"
.format(aux_input))
beh = os.path.join(bidsfolder, sub, ses, "beh")
beh = os.path.join(scan.in_path, "beh")
if not dry_run:
os.makedirs(beh, exist_ok=True)
for old, new in (("FCsepNBack.tsv", "task-rest_events.tsv"),
......@@ -81,7 +90,7 @@ def SequenceEP(recording):
global Intended
Intended = ""
sid += 1
recid = series[sid]
recid = series[sid]
if recid != recording.recId():
logger.warning("{}: Id mismatch folder {}"
.format(recording.recIdentity(False),
......
......@@ -3,126 +3,229 @@ import pandas
import logging
import shutil
from tools import tools
from bidsMeta import BIDSfieldLibrary
from tools import tools
from bids import BidsSession
from definitions import Series, checkSeries
from definitions import Series, checkSeries, plugin_root
# defining logger this way will prefix plugin messages
# with plugin name
logger = logging.getLogger(__name__)
# global variables
rawfolder = ""
bidsfolder = ""
dry_run = False
#############################
# global bidscoin variables #
#############################
resources = os.path.join(os.path.dirname(__file__), "..")
# Folder with source dataset
rawfolder = None
# folder with prepared dataset
preparedfolder = None
# switch if is a dry-run (test run)
dry_run = False
df_subjects = None
###########################
# global plugin variables #
###########################
# map of individual sessions
# key: source folde session (s01234)
# value: bidsified session (ses-HCL)
scans_map = {}
# scale to convert ms in log-files to seconds
time_scale = 1e-3
# list of subjects exel-file columns
excel_col_list = {'Patient' : 'pat',
'S_A_E' : "pat_sae",
# subject balck-list
# subject folders in this list will be skipped
# by plugin
sub_black_list = []
# subject xls table columns and their renaiming
excel_col_list = {"Patient": "pat",
"Sex": "pat_sex",
"Age": "pat_age",
"Education": "pat_edu",
1: "pat_1", 2: "pat_2", 3: "pat_3",
'Control' : "cnt",
'S_A_E.1': "cnt_sae",
'1.1': "cnt_1", '2.1': "cnt_2", '3.1': "cnt_3",
'Control': "cnt",
"Sex.1": "cnt_sex",
"Age.1": "cnt_age",
"Education.1": "cnt_edu",
"1.1": "cnt_1", "2.1": "cnt_2", "3.1": "cnt_3"
}
# sub_columns = BIDSfieldLibrary()
# columns prefixes for patient and control subjects
# 0 == False == Control
# 1 == True == Patient
sub_prefix = ["cnt", "pat"]
# pandas dataframe with list of subjects
df_subjects = None
def InitEP(source: str, destination: str,
dry: bool,
subjects: str="") -> int:
subjects: str = "") -> int:
"""
Initialisation of plugin
1. Saves source/destination folders and dry_run switch
2. Loads subjects xls table
Parameters
----------
source: str
path to source dataset
destination:
path to prepared dataset
subjects: str
path to subjects xls file, if empty is looked
in source dataset folder
"""
global rawfolder
global bidsfolder
global preparefolder
global dry_run
global subject_file
rawfolder = source
bidsfolder = destination
preparefolder = destination
dry_run = dry
#########################
# Loading subjects list #
#########################
if subjects:
subject_file = subjects
else:
subject_file = os.path.join(resources, "Appariement.xlsx")
logger.info(subject_file)
subject_file = os.path.join(plugin_root, "Appariement.xlsx")
if not os.path.isfile(subject_file):
raise FileNotFoundError("Subject file '{}' not found"
.format(subject_file))
# creating df for subjects
# creating dataframe for subjects
global df_subjects
df_subjects = pandas.read_excel(subject_file,
sheet_name=0, header=0,
usecols=[0,1,2,3,4,5,6,7,8,9,10])
df_subjects.rename(index=str, columns=excel_col_list,inplace=True)
usecols="A:N"
)
df_subjects.rename(index=str, columns=excel_col_list, inplace=True)
df_subjects = df_subjects[df_subjects['pat'].notnull()
| df_subjects['cnt'].notnull()]
def SubjectEP(session: BidsSession) -> int:
sub_id = int(session.subject)
index = df_subjects.loc[df_subjects["pat"] == sub_id].index
status = 0
"""
Subject determination and initialisation
1. Checks if subject not in balck list
2. Loads demographics from subject table
3. Creates session parcing dictionary
Parameters
----------
session: BidsSession
Returns
-------
int:
if 0, plugin succesfull
if > 0, plugin failed, an exception will be raised
if < 0, plugin failed, and subject will be skipped
"""
#################################
# Skipping if in the black list #
#################################
if session.subject in sub_black_list:
logger.info("Subject '{}' is in black_list"
.format(session.subject))
return -1
################################
# Retriving subject from table #
################################
try:
# in case if folder name in source dataset
# cannot be converted to integer
sub_id = int(session.subject)
except ValueError as e:
logger.error("Subject {}: Can't determine subject Id for: {}"
.format(session.subject, e))
return -1
# storing bidsified subject id into session object
# optional, but can be easely retrieved
session.sub_values["participant_id"] = "sub-" + session.subject
# looking for subject in dataframe
prefix = "pat"
index = df_subjects.loc[df_subjects[prefix] == sub_id].index
# storing participant group in session
session.sub_values["group"] = "patient"
if len(index) == 0:
# Subject not in patient list, looking in control
index = df_subjects.loc[df_subjects["cnt"] == sub_id].index
prefix = "cnt"
index = df_subjects.loc[df_subjects[prefix] == sub_id].index
session.sub_values["group"] = "control"
if len(index) == 0:
raise KeyError("Subject {} not found in table"
.format(sub_id))
status = 1
prefix = "cnt"
if len(index) > 1:
logger.warning("Subject {}: several column entries present"
.format(sub_id))
index = index[0]
# retrieving demographics
# <sex>_<age>_<education>
line = df_subjects.loc[index, prefix + "_sae"].split("_")
sex = line[0]
age = int(line[1])
education = int(line[2])
session.sub_values["participant_id"] = "sub-" + session.subject
session.sub_values["sex"] = sex
session.sub_values["age"] = age
session.sub_values["education"] = education
sex = df_subjects.loc[index, prefix + "_sex"]
age = df_subjects.loc[index, prefix + "_age"]
education = df_subjects.loc[index, prefix + "_edu"]
# session initialised values are Null
# fill them only if they are retrieved from table
if pandas.notna(sex):
session.sub_values["sex"] = sex
if pandas.notna(age):
session.sub_values["age"] = float(age)
if pandas.notna(education):
session.sub_values["education"] = float(education)
# looking for pairing
if status == 0:
session.sub_values["group"] = "patient"
session.sub_values["paired"] = "sub-{:03}".format(int(df_subjects
.loc[index, "cnt"]))
else:
session.sub_values["group"] = "control"
session.sub_values["paired"] = "sub-{:03}".format(int(df_subjects
.loc[index, "pat"]))
paired = df_subjects.loc[index, sub_prefix[prefix == "cnt"]]
if pandas.notna(paired):
session.sub_values["paired"] = "sub-{:03}".format(int(paired))
# looking for order of sessions
global scans_map
scans_map = {}
scans_map.clear()
scans_order = sorted([os.path.basename(s) for s in
tools.lsdirs(os.path.join(rawfolder,
session.subject),
"s*")
])
# looping over session defined in columns
for ind, s in enumerate(("_1", "_2", "_3")):
v = "ses-" + str(df_subjects.loc[index, prefix + s]).strip()
ses = "ses" + s
if v == "ses-nan":
# Session not defined in table, but existing
# in source dataset
session.sub_values[ses] = ""
logger.warning("Subject {}({}): missing {} value"
.format(session.sub_values["participant_id"],
session.sub_values["group"],
ses)
)
elif v == "ses-OUT":
# participant left study
logger.warning("Subject {}({}): seems to be abandoned study"
.format(session.sub_values["participant_id"],
session.sub_values["group"],
ses)
)
return -1
elif v not in Series:
# invalid session name
logger.critical("Subject {}({}): Invalid {}: {}"
.format(session.sub_values["participant_id"],
session.sub_values["group"],
......@@ -132,10 +235,13 @@ def SubjectEP(session: BidsSession) -> int:
raise KeyError("Invalid {}: {}"
.format(ses, v))
else:
# session retrieved, storing values
session.sub_values[ses] = v
scans_map[scans_order[ind]] = session.sub_values[ses]
scans_map[scans_order[ind]] = v
# checking if all scans are identifyable
# if not, additional scans will be stored
# with original names
for scan in scans_order:
if scan not in scans_map:
logger.error("Subject {}({}): Can't identify session {}"
......@@ -144,45 +250,68 @@ def SubjectEP(session: BidsSession) -> int:
scan))
scans_map[scan] = scan
# opional, the sub- prefix added automatically
# if not present
session.subject = "sub-" + session.subject
return 0
def SessionEP(session: BidsSession) -> int:
# retrieving correct session name
"""
1. Set-up session name
Parameters
----------
session: BidsSession
"""
# Setting session name from map
session.session = scans_map[session.session]
return 0
def SessionEndEP(session: BidsSession):
path = os.path.join(bidsfolder,
"""
1. Checks the series in the prepared folder
2. Extract KSS/VAS data from kss_dict to tsv file
3. Parces in-scan nBack and KSS/VAS log files
"""
# path contain destination folder, where
# all data files are placed
path = os.path.join(preparefolder,
session.getPath(True))
out_path = os.path.join(path,
"MRI")
checkSeries(out_path,
session.subject, session.session,
False)
# parcing log files
# checking if session contains correct series
if not dry_run:
checkSeries(out_path,
session.subject, session.session,
False)
############################################
# Retrieving in-scan task and KSS/VAS data #
############################################
if session.session == "ses-STROOP":
return 0
# where tsv files are
inp_dir = os.path.join(session.in_path, "inp")
# where tsv files should be
aux_dir = os.path.join(path, "auxiliary")
if not os.path.isdir(inp_dir):
raise NotADirectoryError(inp_dir)
# do not copy if we are in dry mode
if not dry_run:
os.makedirs(aux_dir, exist_ok=True)
# just copy file, in real life application
# you may parce files
for file in ("FCsepNBack.tsv", "VAS.tsv"):
file = os.path.join(inp_dir, file)
if not os.path.isfile(file):
raise FileNotFoundError(file)
shutil.copy2(file, aux_dir)
logs = os.path.join(session.in_path, "inp")
aux_d = os.path.join(path, "auxiliary")
if not os.path.isdir(logs):
raise NotADirectoryError(logs)
os.makedirs(aux_d, exist_ok=True)
for file in ("FCsepNBack.tsv", "VAS.tsv"):
file = os.path.join(logs, file)
if not os.path.isfile(file):
raise FileNotFoundError(file)
shutil.copy2(file, aux_d)
for file in ("FCsepNBack.json", "VAS.json"):
file = os.path.join(resources, file)
if not os.path.isfile(file):
raise FileNotFoundError(file)
shutil.copy2(file, aux_d)
return 0
# copiyng correspondent json files
for file in ("FCsepNBack.json", "VAS.json"):
file = os.path.join(plugin_root, file)
if not os.path.isfile(file):
raise FileNotFoundError(file)
shutil.copy2(file, aux_dir)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment