diff --git a/morph_utils/ccf.py b/morph_utils/ccf.py index 898840e..46c7523 100644 --- a/morph_utils/ccf.py +++ b/morph_utils/ccf.py @@ -13,6 +13,8 @@ import matplotlib.pyplot as plt from morph_utils.query import get_id_by_name, get_structures, query_pinning_info_cell_locator from morph_utils.measurements import get_node_spacing +from morph_utils.modifications import resample_morphology + NAME_MAP_FILE = files('morph_utils') / 'data/ccf_structure_name_map.json' with open(NAME_MAP_FILE, "r") as fn: @@ -289,7 +291,8 @@ def get_ccf_structure(voxel, name_map=None, annotation=None, coordinate_to_voxel def projection_matrix_for_swc(input_swc_file, mask_method = "tip_and_branch", tip_count = False, annotation=None, annotation_path = None, volume_shape=(1320, 800, 1140), - resolution=10, node_type_list=[2]): + resolution=10, node_type_list=[2], + resample_spacing=None): """ Given a swc file, quantify the projection matrix. That is the amount of axon in each structure. This function assumes there is equivalent internode spacing (i.e. the input swc file should be resampled prior to running this code). @@ -307,6 +310,8 @@ def projection_matrix_for_swc(input_swc_file, mask_method = "tip_and_branch", volume_shape (tuple, optional): the size in voxels of the ccf atlas (annotation volume). Defaults to (1320, 800, 1140). resolution (int, optional): resolution (um/pixel) of the annotation volume node_type_list (list of ints): node type to extract projection data for, typically axon (2) + resample_spacing (float or None): if not None, will resample the input morphology to the designated + internode spacing Returns: filename (str) @@ -340,7 +345,10 @@ def projection_matrix_for_swc(input_swc_file, mask_method = "tip_and_branch", z_midline = z_size / 2 morph = morphology_from_swc(input_swc_file) - morph = move_soma_to_left_hemisphere(morph, resolution, volume_shape, z_midline) + morph = move_soma_to_left_hemisphere(morph, resolution, volume_shape, z_midline) + if resample_spacing is not None: + morph = resample_morphology(morph, resample_spacing) + spacing = get_node_spacing(morph)[0] morph_df = pd.DataFrame(morph.nodes()) @@ -349,6 +357,10 @@ def projection_matrix_for_swc(input_swc_file, mask_method = "tip_and_branch", morph_df = morph_df[morph_df['type'].isin(node_type_list)] # annotate each node + if morph_df.empty: + print("Its empty") + return input_swc_file, {} + morph_df['ccf_structure'] = morph_df.apply(lambda rw: full_name_to_abbrev_dict[get_ccf_structure( np.array([rw.x, rw.y, rw.z]) , name_map, annotation, True)], axis=1) # roll up fiber tracts diff --git a/morph_utils/executable_scripts/projection_matrix_for_single_cell.py b/morph_utils/executable_scripts/projection_matrix_for_single_cell.py new file mode 100644 index 0000000..ae1a844 --- /dev/null +++ b/morph_utils/executable_scripts/projection_matrix_for_single_cell.py @@ -0,0 +1,111 @@ +import os +from tqdm import tqdm +import pandas as pd +import argschema as ags +from morph_utils.ccf import projection_matrix_for_swc + +class IO_Schema(ags.ArgSchema): + input_swc_file = ags.fields.InputFile(description='directory with micron resolution ccf registered files') + output_projection_csv = ags.fields.OutputFile(description="output projection csv") + projection_threshold = ags.fields.Int(default=0) + normalize_proj_mat = ags.fields.Boolean(default=True) + mask_method = ags.fields.Str(default="tip_and_branch",description = " 'tip_and_branch', 'branch', 'tip', or 'tip_or_branch' ") + tip_count = ags.fields.Boolean(default=False, description="when true, this will measure a matrix of number of tips instead of number of nodes") + annotation_path = ags.fields.Str(default="",description = "Optional. Path to annotation .nrrd file. Defaults to 10um ccf atlas") + resolution = ags.fields.Int(default=10, description="Optional. ccf resolution (micron/pixel") + volume_shape = ags.fields.List(ags.fields.Int, default=[1320, 800, 1140], description = "Optional. Size of input annotation") + resample_spacing = ags.fields.Float(allow_none=True, default=None, description = 'internode spacing to resample input morphology with') + + +def normalize_projection_columns_per_cell(input_df, projection_column_identifiers=['ipsi', 'contra']): + """ + :param input_df: input projection df + :param projection_column_identifiers: list of identifiers for projection columns. i.e. strings that identify projection columns from metadata columns + :return: normalized projection matrix + """ + proj_cols = [c for c in input_df.columns if any([ider in c for ider in projection_column_identifiers])] + input_df[proj_cols] = input_df[proj_cols].fillna(0) + + res = input_df[proj_cols].T / input_df[proj_cols].sum(axis=1) + input_df[proj_cols] = res.T + + return input_df + + +def main(input_swc_file, + output_projection_csv, + resolution, + projection_threshold, + normalize_proj_mat, + mask_method, + tip_count, + annotation_path, + volume_shape, + resample_spacing, + **kwargs): + + if annotation_path == "": + annotation_path = None + + results = [] + res = projection_matrix_for_swc(input_swc_file=input_swc_file, + tip_count = tip_count, + mask_method = mask_method, + annotation=None, + annotation_path = annotation_path, + volume_shape=volume_shape, + resolution=resolution, + resample_spacing=resample_spacing) + results = [res] + + output_projection_csv = output_projection_csv.replace(".csv", f"_{mask_method}.csv") + projection_records = {} + # branch_and_tip_projection_records = {} + for res in results: + fn = os.path.abspath(res[0]) + proj_records = res[1] + # brnch_tip_records = res[1] + + projection_records[fn] = proj_records + # branch_and_tip_projection_records[fn] = brnch_tip_records + + proj_df = pd.DataFrame(projection_records).T.fillna(0) + # proj_df_mask = pd.DataFrame(branch_and_tip_projection_records).T.fillna(0) + + proj_df.to_csv(output_projection_csv) + # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + + if projection_threshold != 0: + output_projection_csv = output_projection_csv.replace(".csv", + "{}thresh.csv".format(projection_threshold)) + # output_projection_csv_tip_branch_mask = output_projection_csv_tip_branch_mask.replace(".csv", + # "{}thresh.csv".format( + # projection_threshold)) + + proj_df_arr = proj_df.values + proj_df_arr[proj_df_arr < projection_threshold] = 0 + proj_df = pd.DataFrame(proj_df_arr, columns=proj_df.columns, index=proj_df.index) + proj_df.to_csv(output_projection_csv) + + # proj_df_mask_arr = proj_df_mask.values + # proj_df_mask_arr[proj_df_mask_arr < projection_threshold] = 0 + # proj_df_mask = pd.DataFrame(proj_df_mask_arr, columns=proj_df_mask.columns, index=proj_df_mask.index) + # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + + if normalize_proj_mat: + output_projection_csv = output_projection_csv.replace(".csv", "_norm.csv") + # output_projection_csv_tip_branch_mask = output_projection_csv_tip_branch_mask.replace(".csv", "_norm.csv") + + proj_df = normalize_projection_columns_per_cell(proj_df) + proj_df.to_csv(output_projection_csv) + + # proj_df_mask = normalize_projection_columns_per_cell(proj_df_mask) + # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + +def console_script(): + module = ags.ArgSchemaParser(schema_type=IO_Schema) + main(**module.args) + +if __name__ == "__main__": + module = ags.ArgSchemaParser(schema_type=IO_Schema) + main(**module.args) diff --git a/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py b/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py index bc8b554..8992f40 100644 --- a/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py +++ b/morph_utils/executable_scripts/projection_matrix_from_swc_directory.py @@ -2,11 +2,14 @@ from tqdm import tqdm import pandas as pd import argschema as ags +import time +import subprocess from morph_utils.ccf import projection_matrix_for_swc + class IO_Schema(ags.ArgSchema): ccf_swc_directory = ags.fields.InputDir(description='directory with micron resolution ccf registered files') - output_projection_csv = ags.fields.OutputFile(description="output projection csv") + output_directory = ags.fields.OutputDir(description="output directory") projection_threshold = ags.fields.Int(default=0) normalize_proj_mat = ags.fields.Boolean(default=True) mask_method = ags.fields.Str(default="tip_and_branch",description = " 'tip_and_branch', 'branch', 'tip', or 'tip_or_branch' ") @@ -14,7 +17,12 @@ class IO_Schema(ags.ArgSchema): annotation_path = ags.fields.Str(default="",description = "Optional. Path to annotation .nrrd file. Defaults to 10um ccf atlas") resolution = ags.fields.Int(default=10, description="Optional. ccf resolution (micron/pixel") volume_shape = ags.fields.List(ags.fields.Int, default=[1320, 800, 1140], description = "Optional. Size of input annotation") + resample_spacing = ags.fields.Float(allow_none=True, default=None, description = 'internode spacing to resample input morphology with') + run_host = ags.fields.String(default='local',description='either ["local" or "hpc"]. Will run either locally or submit jobs to the HPC') + virtual_env_name = ags.fields.Str(default='skeleton_keys_4',description='Name of virtual conda env to activate on hpc. not needed if running local') + output_projection_csv = ags.fields.OutputFile(allow_none=True,default=None, description="output projection csv, when running local only") + def normalize_projection_columns_per_cell(input_df, projection_column_identifiers=['ipsi', 'contra']): """ :param input_df: input projection df @@ -31,7 +39,7 @@ def normalize_projection_columns_per_cell(input_df, projection_column_identifier def main(ccf_swc_directory, - output_projection_csv, + output_directory, resolution, projection_threshold, normalize_proj_mat, @@ -39,69 +47,160 @@ def main(ccf_swc_directory, tip_count, annotation_path, volume_shape, + resample_spacing, + run_host, + virtual_env_name, + output_projection_csv, **kwargs): + if run_host not in ['local','hpc']: + raise ValueError(f"Invalid run_host parameter entered ({run_host})") + if annotation_path == "": annotation_path = None + if run_host == 'hpc': + + output_directory = os.path.abspath(output_directory) + single_sp_proj_dir = os.path.join(output_directory,"SingleCellProjections") + job_dir = os.path.join(output_directory,"JobDir") + for dd in [single_sp_proj_dir, job_dir]: + if not os.path.exists(dd): + os.mkdir(dd) + results = [] for swc_fn in tqdm([f for f in os.listdir(ccf_swc_directory) if ".swc" in f]): - swc_pth = os.path.join(ccf_swc_directory, swc_fn) - - res = projection_matrix_for_swc(input_swc_file=swc_pth, - tip_count = tip_count, - mask_method = mask_method, - annotation=None, - annotation_path = annotation_path, - volume_shape=volume_shape, - resolution=resolution) - results.append(res) + swc_pth = os.path.abspath(os.path.join(ccf_swc_directory, swc_fn)) - output_projection_csv = output_projection_csv.replace(".csv", f"_{mask_method}.csv") - projection_records = {} - # branch_and_tip_projection_records = {} - for res in results: - fn = os.path.abspath(res[0]) - proj_records = res[1] - # brnch_tip_records = res[1] - - projection_records[fn] = proj_records - # branch_and_tip_projection_records[fn] = brnch_tip_records - - proj_df = pd.DataFrame(projection_records).T.fillna(0) - # proj_df_mask = pd.DataFrame(branch_and_tip_projection_records).T.fillna(0) - - proj_df.to_csv(output_projection_csv) - # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) - - if projection_threshold != 0: - output_projection_csv = output_projection_csv.replace(".csv", - "{}thresh.csv".format(projection_threshold)) - # output_projection_csv_tip_branch_mask = output_projection_csv_tip_branch_mask.replace(".csv", - # "{}thresh.csv".format( - # projection_threshold)) - - proj_df_arr = proj_df.values - proj_df_arr[proj_df_arr < projection_threshold] = 0 - proj_df = pd.DataFrame(proj_df_arr, columns=proj_df.columns, index=proj_df.index) - proj_df.to_csv(output_projection_csv) + if run_host=='local': + res = projection_matrix_for_swc(input_swc_file=swc_pth, + tip_count = tip_count, + mask_method = mask_method, + annotation=None, + annotation_path = annotation_path, + volume_shape=volume_shape, + resolution=resolution, + resample_spacing= resample_spacing) + results.append(res) + + else: + + output_projection_csv = os.path.join(single_sp_proj_dir, swc_fn.replace(".swc",".csv")) + + if not os.path.exists(output_projection_csv): + + job_file = os.path.join(job_dir,swc_fn.replace(".swc",".sh")) + log_file = os.path.join(job_dir,swc_fn.replace(".swc",".log")) + + command = "morph_utils_extract_projection_matrix_single_cell " + command = command+ f" --input_swc_file '{swc_pth}'" + command = command+ f" --output_projection_csv {output_projection_csv}" + command = command+ f" --projection_threshold {projection_threshold}" + command = command+ f" --normalize_proj_mat {normalize_proj_mat}" + command = command+ f" --mask_method {mask_method}" + command = command+ f" --tip_count {tip_count}" + command = command+ f" --annotation_path {annotation_path}" + command = command+ f" --resolution {resolution}" + # command = command+ f" --volume_shape {volume_shape}" + command = command+ f" --resample_spacing {resample_spacing}" + + + activate_command = f"source activate {virtual_env_name}" + command_list = [activate_command, command] + + slurm_kwargs = { + "--job-name": f"{swc_fn}", + "--mail-type": "NONE", + "--cpus-per-task": "1", + "--nodes": "1", + "--kill-on-invalid-dep": "yes", + "--mem": "24gb", + "--time": "1:00:00", + "--partition": "celltypes", + "--output": log_file + } + + dag_node = { + "job_file":job_file, + "slurm_kwargs":slurm_kwargs, + "slurm_commands":command_list + } + + job_file = dag_node["job_file"] + slurm_kwargs = dag_node["slurm_kwargs"] + command_list = dag_node["slurm_commands"] + + job_string_list = [f"#SBATCH {k}={v}" for k, v in slurm_kwargs.items()] + job_string_list = job_string_list + command_list + job_string_list = ["#!/bin/bash"] + job_string_list + + if os.path.exists(job_file): + os.remove(job_file) + + with open(job_file, 'w') as job_f: + for val in job_string_list: + job_f.write(val) + job_f.write('\n') + + + command = "sbatch {}".format(job_file) + command_list = command.split(" ") + result = subprocess.run(command_list, stdout=subprocess.PIPE) + std_out = result.stdout.decode('utf-8') + + job_id = std_out.split("Submitted batch job ")[-1].replace("\n", "") + time.sleep(0.1) + + if results != []: + + output_projection_csv = output_projection_csv.replace(".csv", f"_{mask_method}.csv") + projection_records = {} + # branch_and_tip_projection_records = {} + for res in results: + fn = os.path.abspath(res[0]) + proj_records = res[1] + # brnch_tip_records = res[1] - # proj_df_mask_arr = proj_df_mask.values - # proj_df_mask_arr[proj_df_mask_arr < projection_threshold] = 0 - # proj_df_mask = pd.DataFrame(proj_df_mask_arr, columns=proj_df_mask.columns, index=proj_df_mask.index) - # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + projection_records[fn] = proj_records + # branch_and_tip_projection_records[fn] = brnch_tip_records - if normalize_proj_mat: - output_projection_csv = output_projection_csv.replace(".csv", "_norm.csv") - # output_projection_csv_tip_branch_mask = output_projection_csv_tip_branch_mask.replace(".csv", "_norm.csv") + proj_df = pd.DataFrame(projection_records).T.fillna(0) + # proj_df_mask = pd.DataFrame(branch_and_tip_projection_records).T.fillna(0) - proj_df = normalize_projection_columns_per_cell(proj_df) proj_df.to_csv(output_projection_csv) - - # proj_df_mask = normalize_projection_columns_per_cell(proj_df_mask) # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + if projection_threshold != 0: + output_projection_csv = output_projection_csv.replace(".csv", + "{}thresh.csv".format(projection_threshold)) + # output_projection_csv_tip_branch_mask = output_projection_csv_tip_branch_mask.replace(".csv", + # "{}thresh.csv".format( + # projection_threshold)) + + proj_df_arr = proj_df.values + proj_df_arr[proj_df_arr < projection_threshold] = 0 + proj_df = pd.DataFrame(proj_df_arr, columns=proj_df.columns, index=proj_df.index) + proj_df.to_csv(output_projection_csv) + + # proj_df_mask_arr = proj_df_mask.values + # proj_df_mask_arr[proj_df_mask_arr < projection_threshold] = 0 + # proj_df_mask = pd.DataFrame(proj_df_mask_arr, columns=proj_df_mask.columns, index=proj_df_mask.index) + # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + + if normalize_proj_mat: + output_projection_csv = output_projection_csv.replace(".csv", "_norm.csv") + # output_projection_csv_tip_branch_mask = output_projection_csv_tip_branch_mask.replace(".csv", "_norm.csv") + + proj_df = normalize_projection_columns_per_cell(proj_df) + proj_df.to_csv(output_projection_csv) + + # proj_df_mask = normalize_projection_columns_per_cell(proj_df_mask) + # proj_df_mask.to_csv(output_projection_csv_tip_branch_mask) + + + + def console_script(): module = ags.ArgSchemaParser(schema_type=IO_Schema) main(**module.args) diff --git a/morph_utils/executable_scripts/validate_swc_dir.py b/morph_utils/executable_scripts/validate_swc_dir.py index 59116c2..e8edbbc 100644 --- a/morph_utils/executable_scripts/validate_swc_dir.py +++ b/morph_utils/executable_scripts/validate_swc_dir.py @@ -4,17 +4,18 @@ from multiprocessing import Pool import pandas as pd from morph_utils.validation import ivscc_validate_morph - +import json class IO_Schema(ags.ArgSchema): swc_input_directory = ags.fields.InputDir(description='directory with swc files to validate') report_csv = ags.fields.OutputFile(description='directory with micron resolution ccf registered files') + marker_file_output_dir = ags.fields.OutputDir(default=None,description='directory to save marker files identifying the issues in the swc file',allow_none=True) soma_child_distance_threshold = ags.fields.Int(default=50, descreiption='max distance a somas child may be from soma') use_multiprocessing = ags.fields.Bool(default=True, description="weather to use cpu multiprocessing or not") -def main(swc_input_directory, soma_child_distance_threshold, report_csv, use_multiprocessing, +def main(swc_input_directory, soma_child_distance_threshold, report_csv, marker_file_output_dir, use_multiprocessing, **kwargs): print("Soma-Child Distance Threshold: {}".format(soma_child_distance_threshold)) print("Use Multiprocessing: {}".format(use_multiprocessing)) @@ -30,29 +31,56 @@ def main(swc_input_directory, soma_child_distance_threshold, report_csv, use_mul p = Pool() reslist = p.starmap(ivscc_validate_morph, parallel_inputs) - res_df = pd.DataFrame.from_records(reslist) - # res_df['error_list'] = res_df['error_list'].apply(lambda x: ast.literal_eval(x)) - max_num_errs = res_df['error_list'].apply(lambda x: len(x)).max() - err_cols = ["Error_{}".format(i) for i in range(max_num_errs)] + # TODO cleanup the returned logic of ivscc_validate_morph and parsing of this + csv_records = [] + for res in reslist: + if res['errors']!=[]: + + input_path = res['file_name'] + input_file = os.path.basename(input_path) + + + for error_dict in res['errors']: + this_error_message = error_dict['error'] + nodes_impacted = error_dict['Nodes'] + for no in nodes_impacted: + + this_res = {"file_name":input_file, + "file_path":input_path, + 'error':this_error_message, + "node":no + } + csv_records.append(this_res) + + # ofile = os.path.join(marker_file_output_dir, input_file.replace(".swc",'.json')) + # with open(ofile, "w") as f: + # json.dump(res,f) + res_df = pd.DataFrame.from_records(csv_records) + res_df.to_csv(report_csv) + + # res_df = pd.DataFrame.from_records(reslist) + # # res_df['error_list'] = res_df['error_list'].apply(lambda x: ast.literal_eval(x)) + # max_num_errs = res_df['error_list'].apply(lambda x: len(x)).max() + # err_cols = ["Error_{}".format(i) for i in range(max_num_errs)] - records = [] - for idx, row in res_df.iterrows(): + # records = [] + # for idx, row in res_df.iterrows(): - sp_dict = {"full_swc_path": row.file_name, - "swc_file_name": os.path.basename(row.file_name)} - for ec in err_cols: - sp_dict[ec] = None + # sp_dict = {"full_swc_path": row.file_name, + # "swc_file_name": os.path.basename(row.file_name)} + # for ec in err_cols: + # sp_dict[ec] = None - err_lst = row.error_list - for ct, sp_error in enumerate(err_lst): - sp_dict['Error_{}'.format(ct)] = sp_error + # err_lst = row.error_list + # for ct, sp_error in enumerate(err_lst): + # sp_dict['Error_{}'.format(ct)] = sp_error - if len(err_lst) != 0: - records.append(sp_dict) + # if len(err_lst) != 0: + # records.append(sp_dict) - final_qc_df = pd.DataFrame.from_records(records) + # final_qc_df = pd.DataFrame.from_records(records) - final_qc_df.to_csv(report_csv) + # final_qc_df.to_csv(report_csv) def console_script(): diff --git a/morph_utils/validation.py b/morph_utils/validation.py index 3dd8799..fad8e58 100644 --- a/morph_utils/validation.py +++ b/morph_utils/validation.py @@ -27,6 +27,12 @@ def ivscc_validate_morph(input_swc, distance_threshold=50, expected_types=[1, 2, :param expected_types: expected node types :return: error_list: list of all errors encountered; list """ + + meta_records = {"errors":[]} + node_list = [] + axon_origin_node_list = [] + root_node_list = [] + morph = morphology_from_swc(input_swc) nodes_to_qc = morph.nodes() @@ -37,10 +43,13 @@ def ivscc_validate_morph(input_swc, distance_threshold=50, expected_types=[1, 2, number_of_roots = 0 number_nodes_per_type = defaultdict(int) number_of_nodes_at_each_coord = defaultdict(int) - + node_coord_to_id = {} soma = morph.get_soma() if soma is None: error_list.append("No Soma Found") + node_list.append({}) + error_dict = {'error':"No Soma Found", "Nodes":[]} + meta_records['errors'].append(error_dict) else: soma_coord = (soma['x'], soma['y'], soma['z']) @@ -51,13 +60,19 @@ def ivscc_validate_morph(input_swc, distance_threshold=50, expected_types=[1, 2, number_nodes_per_type[cur_node_type] += 1 number_of_nodes_at_each_coord[cur_node_coord] += 1 - + node_coord_to_id[cur_node_coord] = no['id'] + if parent_id != -1: if parent_id in all_node_ids: if cur_node_type not in expected_types: error_list.append("Unexpected Node Type Found: {}".format(cur_node_type)) + node_list.append(no) + error_dict = {'error':"Unexpected Node Type Found: {}".format(cur_node_type), + "Nodes":[no]} + meta_records['errors'].append(error_dict) + # check that its compartment parent_node = morph.node_by_id(parent_id) parent_type = parent_node['type'] @@ -68,62 +83,140 @@ def ivscc_validate_morph(input_swc, distance_threshold=50, expected_types=[1, 2, # Otherwise, this is unacceptable UNLESS it's axon stem from basasl if (cur_node_type == 2) & (parent_type == 3): axon_origins += 1 + axon_origin_node_list.append(no) else: error_list.append( "Node type {} has parent node of type {}".format(cur_node_type, parent_type)) + node_list.append(no) + error_dict = { + 'error':"Node type {} has parent node of type {}".format(cur_node_type, parent_type), + "Nodes":[no] + } + meta_records['errors'].append(error_dict) + else: # This node is a child of the soma if cur_node_type == 2: axon_origins += 1 + axon_origin_node_list.append(no) # Make sure it's not a furcation node cur_node_children = morph.get_children(no) if len(cur_node_children) > 1: error_list.append("Node {} is an immediate child of the soma and branches".format(no['id'])) - + node_list.append(no) + error_dict = { + 'error':"Node {} is an immediate child of the soma and branches".format(no['id']), + "Nodes":[no] + } + meta_records['errors'].append(error_dict) + + # And make sure it's not too far away from the soma if soma: dist = euclidean(cur_node_coord, soma_coord) if dist > distance_threshold: error_list.append("Soma child node {} is {} distance from soma".format(no['id'], dist)) + node_list.append(no) + error_dict = { + 'error':"Soma child node {} is {} distance from soma".format(no['id'], dist), + "Nodes":[no] + } + meta_records['errors'].append(error_dict) + else: error_list.append( "Node {}, Type {}, Parent ID {} Not In Morphology".format(no['id'], no['type'], parent_id)) - + node_list.append(no) + + error_dict = { + 'error':"Node {}, Type {}, Parent ID {} Not In Morphology".format(no['id'], no['type'], parent_id), + "Nodes":[no] + } + meta_records['errors'].append(error_dict) + else: number_of_roots += 1 + root_node_list.append(no) if axon_origins > 1: error_list.append("Multiple Axon Origins ({} found)".format(axon_origins)) - + [node_list.append(i) for i in axon_origin_node_list] + error_dict = { + 'error':"Multiple Axon Origins ({} found)".format(axon_origins), + "Nodes":axon_origin_node_list + } + meta_records['errors'].append(error_dict) + + if number_of_roots > 1: error_list.append("Multiple Root Nodes ({} found)".format(number_of_roots)) + [node_list.append(i) for i in root_node_list] + + error_dict = { + 'error':"Multiple Root Nodes ({} found)".format(number_of_roots), + "Nodes":root_node_list + } + meta_records['errors'].append(error_dict) + duplicate_coords = [k for k, v in number_of_nodes_at_each_coord.items() if v != 1] + duplicate_nodes = [morph.node_by_id(node_coord_to_id[c]) for c in duplicate_coords] num_duplicate_coords = len(duplicate_coords) if num_duplicate_coords != 0: error_list.append("Nodes With Identical X,Y,Z Coordinates Found ({} found)".format(num_duplicate_coords)) - + error_dict = { + 'error':"Nodes With Identical X,Y,Z Coordinates Found ({} found)".format(num_duplicate_coords), + "Nodes":duplicate_nodes + } + meta_records['errors'].append(error_dict) + + + for dup_coord in duplicate_coords: + dup_no_id = node_coord_to_id[dup_coord] + this_no = morph.node_by_id(dup_no_id) + node_list.append(this_no) + # Loop Check has_loops, confidence = check_for_loops(morph) if has_loops: error_list.append("Loop Found In Morphology") + error_dict = { + 'error':"Loop Found In Morphology", + "Nodes":[] + } + meta_records['errors'].append(error_dict) + else: if confidence == "Ambiguous": error_list.append("Unable to check for loops due to missing root") + error_dict = { + 'error':"Unable to check for loops due to missing root", + "Nodes":[] + } + meta_records['errors'].append(error_dict) # Soma ID Check: if (soma is not None) and (soma['id'] != 1): error_list.append("Soma Node ID Is Not 1") - + node_list.append(soma) + error_dict = { + 'error':"Soma ID Is Not 1", + "Nodes":[soma] + } + meta_records['errors'].append(error_dict) + + + meta_records['file_name'] = os.path.abspath(input_swc) + error_list = list(set(error_list)) res_dict = {"file_name": os.path.abspath(input_swc), "error_list": error_list} - return res_dict + return meta_records def check_for_loops(morphology): @@ -245,6 +338,7 @@ def morphology_parent_node_qc(morph, types_to_check=[2, 3, 4]): nodes_to_qc = [n for n in morph.nodes() if n['type'] in types_to_check] all_node_ids = [n['id'] for n in morph.nodes()] error_list = [] + axon_origin_nodes = [] axon_origins = 0 for no in nodes_to_qc: parent_id = no['parent'] @@ -260,6 +354,8 @@ def morphology_parent_node_qc(morph, types_to_check=[2, 3, 4]): # Otherwise, this is unacceptable UNLESS it's axon stem from basasl if (cur_node_type == 2) & (parent_type == 3): axon_origins += 1 + axon_origin_nodes.append(no) + else: error_list.append("Node type {} has parent node of type {}".format(cur_node_type, parent_type)) diff --git a/setup.cfg b/setup.cfg index fec0758..9b73ca8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,5 +25,6 @@ console_scripts = morph_utils_sort_morphology_ids = morph_utils.executable_scripts.sort_morphology_ids:console_script morph_utils_full_morph_soma_correction = morph_utils.executable_scripts.full_morphology_soma_correction:console_script morph_utils_extract_projection_matrix = morph_utils.executable_scripts.projection_matrix_from_swc_directory:console_script + morph_utils_extract_projection_matrix_single_cell = morph_utils.executable_scripts.projection_matrix_for_single_cell:console_script morph_utils_move_somas_left_hemisphere = morph_utils.executable_scripts.move_somas_to_left_hemisphere_swc_directory:console_script - morph_utils_local_crop_ccf_swcs = morph_utils.executable_scripts.local_crop_ccf_swc_directory:console_script \ No newline at end of file + morph_utils_local_crop_ccf_swcs = morph_utils.executable_scripts.local_crop_ccf_swc_directory:console_script