diff --git a/nmriprep/argprep/argprep.py b/nmriprep/argprep/argprep.py index 8967d67..5cf57a9 100644 --- a/nmriprep/argprep/argprep.py +++ b/nmriprep/argprep/argprep.py @@ -1,167 +1,176 @@ +from collections import defaultdict + import nibabel as nb import numpy as np -from ..image import convert_nef_to_grey, read_tiff, save_slice +from ..image import convert_nef_to_grey, save_slice from ..parser import get_argprep_parser from ..plotting import plot_curve, plot_mosaic, plot_single_slice -from ..utils import find_files, inverse_rodbard, rodbard +from ..utils import find_files, inverse_rodbard, parse_kv, rodbard from .calibration import calibrate_standard +from .fieldprep import find_fields -def main(): - args = get_argprep_parser().parse_args() - src_dir = args.source_directory.absolute() - out_dir = src_dir.parent / 'preproc' if not args.output else args.output - verbose = args.save_intermediate +def subject_workflow(sub_files, src_dir, out_dir, args, verbose): + sub_id = sub_files[0].parent.stem + sub_dir = out_dir / sub_id + sub_dir.mkdir(exist_ok=True, parents=True) + fig_dir = sub_dir / 'figures' + fig_dir.mkdir(exist_ok=True) # attempt to find flat field info flatfield_correction = {} - if args.dark_field and args.flat_field: - flatfield_correction['dark'] = read_tiff(args.dark_field) - flatfield_correction['flat'] = read_tiff(args.flat_field) - else: - # try searching for a preprocessed darkfield - latest_ff = find_files(out_dir.glob('*flatfield.tiff')) - latest_df = find_files(out_dir.glob('*darkfield.tiff')) - if latest_ff and latest_df: - flatfield_correction['dark'] = read_tiff(latest_df[-1]) - flatfield_correction['flat'] = read_tiff(latest_ff[-1]) - else: - print('Skipping flat field correction...') - flatfield_correction = None - - # identify subjects for pipeline - all_subject_ids = set( - [ - part.split('-')[1] - for fpath in src_dir.glob('*.nef') - for part in fpath.stem.split('_') - if 'subj' in part - ] + flatfield_correction['dark'] = find_fields( + args.dark_field, sub_dir.glob('*darkfield.tif*') + ) + flatfield_correction['flat'] = find_fields( + args.flat_field, sub_dir.glob('*flatfield.tif*') + ) + if any(v is None for v in flatfield_correction.values()): + print('Skipping flat field correction...') + flatfield_correction = None + + # calibrate standards to transform GV to radioactivity + popt, std_rad, std_gv, std_stem = calibrate_standard( + sub_files, + src_dir, + flatfield_correction=flatfield_correction, + out_dir=sub_dir if verbose else None, ) - if args.subject_id: - print(f'Processing subjects: {args.subject_id}') - subjects_to_process = [ - subj for subj in all_subject_ids if subj in args.subject_id - ] - else: - print('Processing all subjects') - subjects_to_process = all_subject_ids - - for sub_id in subjects_to_process: - sub_dir = out_dir / sub_id - sub_dir.mkdir(exist_ok=True, parents=True) - fig_dir = sub_dir / 'figures' - fig_dir.mkdir(exist_ok=True) - - # calibrate standards to transform GV to radioactivity - popt, std_rad, std_gv, std_stem = calibrate_standard( - sub_id, - src_dir, - args.standard_type, - flatfield_correction=flatfield_correction, - out_dir=sub_dir if verbose else None, - ) + if verbose: + plot_curve(std_rad, std_gv, rodbard(std_rad, *popt), sub_dir, std_stem) - if verbose: - plot_curve(std_rad, std_gv, rodbard(std_rad, *popt), sub_dir, std_stem) + print(f'success! fitting data with parameters: {popt}') - print(f'success! fitting data with parameters: {popt}') + # find subject files + slide_files = [fname for fname in sub_files if 'slide' in fname.stem] + if len(slide_files) < 1: + raise FileNotFoundError(f'No slide files found for {sub_id}') - # find subject files - slide_files = find_files(src_dir.glob(f'*subj-{sub_id}*slide*.nef')) - if len(slide_files) < 1: - raise FileNotFoundError(f'No slide files found for {sub_id} in {src_dir}') + # convert nefs to grey value + data_gv = np.stack( + [ + convert_nef_to_grey( + fname, + flatfield_correction=flatfield_correction, + crop_row=args.crop_height, + crop_col=args.crop_width, + invert=True, + ) + for fname in slide_files + ], + axis=2, + ) - # convert nefs to grey value - data_gv = np.stack( + if args.rotate > 0: + data_gv = np.rot90(data_gv, k=args.rotate, axes=(0, 1)) + + # calibrate slice data + print('Converting slide data to radioactivity') + data_radioactivity = inverse_rodbard(data_gv, *popt) + # clip "negative" values + data_radioactivity[np.isnan(data_radioactivity)] = 0 + print('Success! Generating output...') + + if args.save_nii: + # write out nii image (e.g. for Jim) + # find information on number of slides from last file name + last_section_parts = slide_files[-1].stem.split('_') + max_slide = int( [ - convert_nef_to_grey( - fname, - flatfield_correction=flatfield_correction, - crop_row=args.crop_height, - crop_col=args.crop_width, - invert=True, - ) - for fname in slide_files - ], - axis=2, + last_section_parts[idx].split('-')[-1] + for idx, val in enumerate(last_section_parts) + if 'slide' in val + ][0] ) - if args.rotate > 0: - data_gv = np.rot90(data_gv, k=args.rotate, axes=(0, 1)) - - # calibrate slice data - print('Converting slide data to radioactivity') - data_radioactivity = inverse_rodbard(data_gv, *popt) - # clip "negative" values - data_radioactivity[np.isnan(data_radioactivity)] = 0 - print('Success! Generating output...') - - out_stem = std_stem.split('_standard')[0] - - if args.save_nii: - # write out nii image (e.g. for Jim) - # find information on number of slides from last file name - last_section_parts = slide_files[-1].stem.split('_') - max_slide = int( - [ - last_section_parts[idx].split('-')[-1] - for idx, val in enumerate(last_section_parts) - if 'slide' in val - ][0] + # iterate through slides and collect sections to concatenate + for slide_no in range(max_slide): + slide_val = str(slide_no + 1).zfill(2) + slide_sections = { + fname: idx + for idx, fname in enumerate(slide_files) + if f'slide-{str(slide_val).zfill(2)}' in str(fname.stem) + } + + nb.Nifti1Image( + data_radioactivity[..., sorted(slide_sections.values())], + affine=None, + ).to_filename( + sub_dir / f'{std_stem}_slide-{slide_val}_desc-preproc_ARG.nii.gz' ) - # iterate through slides and collect sections to concatenate - for slide_no in range(max_slide): - slide_val = str(slide_no + 1).zfill(2) - slide_sections = { - fname: idx - for idx, fname in enumerate(slide_files) - if f'slide-{str(slide_val).zfill(2)}' in str(fname.stem) - } - - nb.Nifti1Image( - data_radioactivity[..., sorted(slide_sections.values())], - affine=None, - ).to_filename( - sub_dir / f'{out_stem}_slide-{slide_val}_desc-preproc_ARG.nii.gz' - ) + if args.save_tif: + print('Saving individual files...') + for idx, fname in enumerate(slide_files): + print(f'{fname.stem}') + save_slice( + data_radioactivity[..., idx], + sub_dir / f'{fname.stem}_desc-preproc_ARG.tif', + ) - if args.save_tif: - print('Saving individual files...') - for idx, fname in enumerate(slide_files): - print(f'{fname.stem}') - save_slice( + if verbose: + # plot image + plot_single_slice( data_radioactivity[..., idx], - sub_dir / f'{fname.stem}_desc-preproc_ARG.tif', + fig_dir / f'{fname.stem}_desc-preproc_ARG.png', ) - if verbose: - # plot image - plot_single_slice( - data_radioactivity[..., idx], - fig_dir / f'{fname.stem}_desc-preproc_ARG.png', - ) - - # plot fits - plot_curve( - std_rad=std_rad, - std_gv=std_gv, - fitted_gv=rodbard(std_rad, *popt), - out_dir=fig_dir, - out_stem=fname.stem, - data_rad=data_radioactivity[..., idx].ravel(), - data_gv=data_gv[..., idx].ravel(), - ) - if args.mosaic_slices: - sliced = ( - data_radioactivity - if -1 in args.mosaic_slices - else data_radioactivity[..., args.mosaic_slices] - ) - plot_mosaic(sliced, out_dir / f'{out_stem}_desc-preproc_ARG.png') + # plot fits + plot_curve( + std_rad=std_rad, + std_gv=std_gv, + fitted_gv=rodbard(std_rad, *popt), + out_dir=fig_dir, + out_stem=fname.stem, + data_rad=data_radioactivity[..., idx].ravel(), + data_gv=data_gv[..., idx].ravel(), + ) + if args.mosaic_slices: + sliced = ( + data_radioactivity + if -1 in args.mosaic_slices + else data_radioactivity[..., args.mosaic_slices] + ) + plot_mosaic(sliced, out_dir / f'{std_stem}_desc-preproc_ARG.png') + + +def main(): + args = get_argprep_parser().parse_args() + src_dir = args.source_directory.absolute() + out_dir = src_dir.parent / 'preproc' if not args.output else args.output + verbose = args.save_intermediate + + # identify subjects for pipeline + fnames = find_files(src_dir.rglob('*.nef')) + subdirs = defaultdict(list) + [subdirs[p.parent].append(p) for p in fnames] + all_subject_directories = [subdir for subdir in subdirs.keys()] + + if args.subject_id: + print(f'Processing subjects: {args.subject_id}') + subjects_to_process = [ + subj + for subj in all_subject_directories + if subj.stem.replace('sub-', '') in args.subject_id + ] + else: + print('Processing all subjects') + subjects_to_process = all_subject_directories + + for sub_id in subjects_to_process: + sub_files = subdirs[sub_id] + if 'film' in sub_files[0].stem: + subfilms = defaultdict(list) + [ + subfilms[parse_kv(fname.stem)['film']].append(fname) + for fname in sub_files + ] + for film_idx in subfilms.keys(): + subject_workflow(subfilms[film_idx], src_dir, out_dir, args, verbose) + else: + subject_workflow(sub_files, src_dir, out_dir, args, verbose) if __name__ == '__main__': diff --git a/nmriprep/argprep/calibration.py b/nmriprep/argprep/calibration.py index cfa73df..b4cf042 100644 --- a/nmriprep/argprep/calibration.py +++ b/nmriprep/argprep/calibration.py @@ -6,13 +6,26 @@ from ..image import convert_nef_to_grey from ..plotting import plot_roi -from ..utils import find_files, rodbard +from ..utils import parse_kv, rodbard def get_image_patch(center_coord, square_apothem: int = 100): return slice(center_coord - square_apothem, center_coord + square_apothem) +def get_dataset_standard(source_dir): + if not (source_dir / 'dataset_description.json').exists(): + raise FileNotFoundError( + f'Dataset description file missing from {source_dir.resolve()}' + ) + with (source_dir / 'dataset_description.json').open('r') as fname: + standard_info = json.load(fname)['standard'] + range_type = ( + standard_info['range'] if 'range' in standard_info.keys() else 'default' + ) + return (standard_info['isotope'], range_type) + + def get_standard_value(array, medfilt_radius=40, square_size=900, roi_fig_name=None): import statistics @@ -48,7 +61,7 @@ def get_standard_value(array, medfilt_radius=40, square_size=900, roi_fig_name=N for peak1, peak2 in zip(peak_loc[:-1], peak_loc[1:]): troughs = trough_loc[np.logical_and(trough_loc > peak1, trough_loc < peak2)] thresholds.append( - min(troughs) if troughs else statistics.mean([peak1, peak2]) + min(troughs) if len(troughs) > 0 else statistics.mean([peak1, peak2]) ) regions = np.digitize(gray_medfilt, bins=thresholds) @@ -99,19 +112,37 @@ def get_standard_value(array, medfilt_radius=40, square_size=900, roi_fig_name=N return np.median(gray[roi]) -def calibrate_standard( - sub_id, src_dir, standard_type, flatfield_correction=None, out_dir=None -): +def calibrate_standard(sub_files, src_dir, flatfield_correction=None, out_dir=None): from scipy.optimize import curve_fit from .. import data # load standard information + isotope, range_type = get_dataset_standard(src_dir) with importlib.resources.open_text(data, 'standards.json') as f: - standard_vals = pd.read_json(f)[standard_type].dropna() - - standard_files = find_files(src_dir.glob(f'*subj-{sub_id}*standard*.nef')) - assert len(standard_files) == len(standard_vals) + standard_vals = pd.Series(json.load(f)[isotope][range_type]) + + standard_files = [ + fname for fname in sub_files if f'standard-{isotope}' in fname.stem + ] + if len(standard_files) < 1: + raise FileNotFoundError( + f'No {isotope} standard files found in {sub_files[0].parent}' + ) + if len(standard_files) != len(standard_vals): + # try filtering standard files by range type in case there are two sets of standard images + standard_files = [ + fname for fname in standard_files if f'range-{range_type}' in fname.stem + ] + if len(standard_files) != len(standard_vals): + raise RuntimeError( + f'Number of standard files {len(standard_files)} does not match standard values' + ) + out_stem = '_'.join( + f'{k}-{v}' + for k, v in parse_kv(standard_files[0].stem).items() + if 'standard' not in k + ) standards_df = pd.DataFrame( { @@ -142,7 +173,7 @@ def calibrate_standard( X = standards_df['radioactivity (uCi/g)'] y = standards_df['median grey'] # https://www.myassays.com/four-parameter-logistic-regression.html - print(f'fitting Rodbard curve for {standard_files[0].stem[:-3]}') + print(f'fitting Rodbard curve for {out_stem}') popt, _ = curve_fit( rodbard, X, @@ -153,9 +184,8 @@ def calibrate_standard( ) # save for calibrating slice images - out_stem = standard_files[0].stem[:-3] if out_dir: - standards_df.to_json(f'{out_dir / out_stem}_standards.json') + standards_df.to_json(out_dir / f'{out_stem}_standards.json') with (out_dir / f'{out_stem}_calibration.json').open(mode='w') as f: json.dump(dict(zip(['min', 'slope', 'ED50', 'max'], popt)), f) return popt, X, y, out_stem diff --git a/nmriprep/argprep/fieldprep.py b/nmriprep/argprep/fieldprep.py new file mode 100644 index 0000000..b305e36 --- /dev/null +++ b/nmriprep/argprep/fieldprep.py @@ -0,0 +1,53 @@ +from collections import defaultdict +from pathlib import Path + +import numpy as np + +from ..image import convert_nef_to_grey, read_tiff, save_slice +from ..parser import get_fieldprep_parser +from ..utils import find_files, parse_kv + + +def find_fields(user=None, search_map=None): + fieldpath = user if user is not None else find_files(search_map)[-1] + field = read_tiff(fieldpath) if fieldpath else None + return field + + +def fieldprep(): + args = get_fieldprep_parser().parse_args() + + if args.dark_field: + raise NotImplementedError('Dark field processing coming soon...') + + if args.flat_field: + ff_dir = args.flat_field + fnames = find_files(ff_dir.rglob('*flatfield*.nef')) + if len(fnames) < 1: + raise FileNotFoundError(f'No flat field files found in {ff_dir.absolute()}') + + subdirs = defaultdict(list) + [subdirs[p.parent].append(p) for p in fnames] + + for subdir in subdirs.keys(): + sub_files = subdirs[subdir] + fname_parts = parse_kv(sub_files[0].stem) + out_stem = '_'.join( + f'{k}-{v}' for k, v in fname_parts.items() if 'flatfield' not in k + ) + out_dir = ( + Path(str(subdir).replace('sourcedata', 'preproc')).resolve() + if not args.output + else args.output + ) + out_dir.mkdir(parents=True, exist_ok=True) + + data = np.median( + np.stack( + [convert_nef_to_grey(fname) for fname in sub_files], + axis=2, + ), + axis=2, + ) + save_slice(data, out_dir / f'{out_stem}_flatfield.tif') + return diff --git a/nmriprep/data/standards.json b/nmriprep/data/standards.json index 7c10819..99b8449 100644 --- a/nmriprep/data/standards.json +++ b/nmriprep/data/standards.json @@ -1 +1,40 @@ -{"C14": {"background": 0.0, "1": 39.0, "2": 78.0, "3": 159.0, "4": 292.0, "5": 446.0, "6": 656.0, "7": 869.0, "8": 1098.0}, "H3": {"background": 0.0, "1": 2.0, "2": 3.7, "3": 8.0, "4": 16.6, "5": 36.6, "6": 63.1, "7": 138.1, "8": 243.1, "9": 489.1}} \ No newline at end of file +{ + "C14": { + "default": { + "background": 0.0, + "1": 39.0, + "2": 78.0, + "3": 159.0, + "4": 292.0, + "5": 446.0, + "6": 656.0, + "7": 869.0, + "8": 1098.0 + } + }, + "H3": { + "wide" : { + "background": 0.0, + "1": 2.0, + "2": 3.7, + "3": 8.0, + "4": 16.6, + "5": 36.6, + "6": 63.1, + "7": 138.1, + "8": 243.1, + "9": 489.1 + }, + "narrow" : { + "background": 0.0, + "1": 3.0, + "2": 4.7, + "3": 8.7, + "4": 23.6, + "5": 33.5, + "6": 40.2, + "7": 66.7, + "8": 109.4 + } + } +} \ No newline at end of file diff --git a/nmriprep/measure.py b/nmriprep/measure.py index 19cd2b5..2b034ad 100644 --- a/nmriprep/measure.py +++ b/nmriprep/measure.py @@ -62,7 +62,9 @@ def roi_extract(): merge_keys = [col for col in summary_df.columns if 'values' not in col] for region in args.norm_regions: array_col = f'{region}_values' - main_df[array_col] = normalise_by_region(main_df, region) + main_df[array_col] = normalise_by_region( + main_df, region, measure=args.norm_measure + ) summary_df = summary_df.merge( summarise_vals( main_df[merge_keys + [array_col]], diff --git a/nmriprep/parser.py b/nmriprep/parser.py index 1a85efb..4a32f81 100644 --- a/nmriprep/parser.py +++ b/nmriprep/parser.py @@ -9,9 +9,6 @@ def get_argprep_parser(): description='Convert .nef image to .nii with uCi/g voxels', formatter_class=ArgumentDefaultsHelpFormatter, ) - parser.add_argument( - 'standard_type', help='radioisotope of standards', choices=['H3', 'C14'] - ) parser.add_argument('source_directory', help='raw data directory', type=Path) parser.add_argument('--flat-field', help='Path to flat field image', type=Path) parser.add_argument('--dark-field', help='Path to dark field image', type=Path) @@ -48,6 +45,25 @@ def get_argprep_parser(): return parser +def get_fieldprep_parser(): + """Build parser object.""" + + parser = ArgumentParser( + description='Convert dark/flatfield .nef images for processing with argprep', + formatter_class=ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + '--flat-field', help='Path to directory containing flat field image', type=Path + ) + parser.add_argument( + '--dark-field', help='Path to directory containing dark field image', type=Path + ) + parser.add_argument( + '--output', help='Optionally specify output directory', type=Path + ) + return parser + + def get_roiextract_parser(): """Build parser object.""" @@ -75,6 +91,13 @@ def get_roiextract_parser(): nargs='*', type=str, ) + parser.add_argument( + '--norm-measure', + help='summary statistic for normalisation', + choices=['mean', 'median'], + default='median', + type=str, + ) parser.add_argument( '--output', help='name of output .json file', default='roi_values', type=str ) diff --git a/nmriprep/utils.py b/nmriprep/utils.py index d85af53..4d5308b 100644 --- a/nmriprep/utils.py +++ b/nmriprep/utils.py @@ -68,16 +68,23 @@ def inverse_rodbard(y, min_, slope, ed50, max_): return ed50 * (((min_ - max_) / (y - max_)) - 1.0) ** (1.0 / slope) -def normalise_by_region(df, region): +def normalise_by_region(df, region, measure='median'): + summary_measure = {'mean': np.mean, 'median': np.median}[measure] region_df = df.query((f'region == "{region}"')) - if region_df.duplicated(['subj', 'slide', 'section']).any(): - raise ValueError('region_df has duplicate keys!') - region_df[f'median_{region}_values'] = df['values'].apply(np.median) + if region_df.duplicated(['sub', 'slide', 'section']).any(): + # hemisphere duplicates are allowed, but those are the only exception + if not region_df.duplicated(['sub', 'slide', 'section', 'hemi']).any(): + region_df = region_df.groupby( + ['sub', 'slide', 'section'], as_index=False + ).agg({'values': lambda arrs: np.concatenate(arrs.values)}) + else: + raise ValueError('region_df has duplicate keys!') + region_df[f'{measure}_{region}_values'] = df['values'].apply(summary_measure) out = df.merge( region_df, - on=['subj', 'slide', 'section'], + on=['sub', 'slide', 'section'], how='left', suffixes=('', '_r'), validate='m:1', ) - return out['values'] / out[f'median_{region}_values'] + return out['values'] / out[f'{measure}_{region}_values'] diff --git a/pyproject.toml b/pyproject.toml index 5e6b66c..0d17c58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ Homepage = "https://github.com/brainkcl/nmriprep" [project.scripts] argprep = "nmriprep.argprep.argprep:main" +fieldprep = "nmriprep.argprep.fieldprep:fieldprep" roi_extract = "nmriprep.measure:roi_extract" [tool.hatch.build]