Source code for neuralib.atlas.data

from __future__ import annotations

from pathlib import Path
from typing import Literal, overload

import numpy as np
import polars as pl
from brainglobe_atlasapi import BrainGlobeAtlas
from neuralib.io import load_json, save_json
from neuralib.io.core import ATLAS_CACHE_DIRECTORY
from neuralib.typing import PathLike
from neuralib.util.deprecation import deprecated_func
from neuralib.util.tqdm import download_with_tqdm
from neuralib.util.utils import ensure_dir
from neuralib.util.verbose import print_load, print_save
from tqdm import tqdm

__all__ = [
    'ATLAS_NAME',
    'load_bg_structure_tree',
    'load_bg_volumes',
    'get_children',
    'get_annotation_ids',
    'get_leaf_in_annotation',
    'build_annotation_leaf_map',
    #
    'get_dorsal_cortex'
]

ATLAS_NAME = Literal[
    'allen_mouse_10um',
    'allen_mouse_25um',
    'allen_mouse_50um',
    'allen_mouse_100um',
    'kim_mouse_10um',
    'kim_mouse_25um',
    'kim_mouse_50um',
    'kim_mouse_100um',
    'perens_lsfm_mouse_20um',
    'perens_stereotaxic_mouse_mri_25um',
    'princeton_mouse_20um',
]
"""Atlas Name From BrainGlobeAtlas"""


[docs] def load_bg_structure_tree(atlas_name: ATLAS_NAME = 'allen_mouse_10um', *, check_latest: bool = True, paired: bool = False) -> pl.DataFrame: """ Load structure dataframe or dict from `brainglobe_atlasapi` :param atlas_name: :attr:`~neuralib.atlas.data.ATLAS_NAME` :param check_latest: If check the brainglobe api latest version :param paired: To only ``acronym`` & ``parent_acronym`` fields :return: """ file = BrainGlobeAtlas(atlas_name, check_latest=check_latest).root_dir / 'structures.csv' # pyright: ignore[reportArgumentType] df = pl.read_csv(file).with_columns(pl.col('parent_structure_id').cast(pl.Int64)) df = df.join( df.select([pl.col("id").alias("parent_structure_id"), pl.col("acronym").alias("parent_acronym")]), on="parent_structure_id", how="left" ) if paired: name = df.select(pl.col('acronym'), pl.col('id'), pl.col('parent_structure_id')) join_df = name.join(name, left_on='parent_structure_id', right_on='id') parent_child = join_df.select(pl.col('acronym'), pl.col('acronym_right').alias('parent_acronym')) return parent_child else: return df
[docs] def load_bg_volumes(atlas_name: ATLAS_NAME = 'allen_mouse_10um', cached_file: PathLike | None = None, force: bool = False) -> pl.DataFrame: """ Load structure tree dataframe with volume for each region ``volume_mm3`` :: ┌─────────┬─────┬─────────────────┬─────────────────┬────────────────┬────────────────┬────────────┐ │ acronym ┆ id ┆ name ┆ structure_id_pa ┆ parent_structu ┆ parent_acronym ┆ volume_mm3 │ │ --- ┆ --- ┆ --- ┆ th ┆ re_id ┆ --- ┆ --- │ │ str ┆ i64 ┆ str ┆ --- ┆ --- ┆ str ┆ f64 │ │ ┆ ┆ ┆ str ┆ i64 ┆ ┆ │ ╞═════════╪═════╪═════════════════╪═════════════════╪════════════════╪════════════════╪════════════╡ │ VI ┆ 653 ┆ Abducens ┆ /997/8/343/1065 ┆ 370 ┆ MY-mot ┆ 0.030332 │ │ ┆ ┆ nucleus ┆ /354/370/653/ ┆ ┆ ┆ │ │ AOB ┆ 151 ┆ Accessory ┆ /997/8/567/688/ ┆ 698 ┆ OLF ┆ 0.652032 │ │ ┆ ┆ olfactory bulb ┆ 695/698/151/ ┆ ┆ ┆ │ │ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │ │ von ┆ 949 ┆ vomeronasal ┆ /997/1009/967/9 ┆ 967 ┆ cm ┆ 0.013428 │ │ ┆ ┆ nerve ┆ 49/ ┆ ┆ ┆ │ └─────────┴─────┴─────────────────┴─────────────────┴────────────────┴────────────────┴────────────┘ :param atlas_name: :attr:`~neuralib.atlas.data.ATLAS_NAME` :param cached_file: Cached file path. :param force: Force overwrite the cached file :return: """ if cached_file is None: cached_file = ensure_dir(ATLAS_CACHE_DIRECTORY) / f'{atlas_name}_bg_volumes.csv' else: cached_file = Path(cached_file) if cached_file.exists() and not force: print_load(cached_file) return pl.read_csv(cached_file) else: df = load_bg_structure_tree(atlas_name) lut = build_annotation_leaf_map(atlas_name) bg = BrainGlobeAtlas(atlas_name) # pyright: ignore[reportArgumentType] flat_annotation = bg.annotation.ravel() voxel_volume_mm3 = (bg.resolution[0] / 1000) ** 3 volumes = [] ids = df['id'].to_list() for region_id in tqdm(ids, desc=f"calculating volumes and save cache ({atlas_name})"): leaf_ids = lut.get(region_id, []) if not leaf_ids: volumes.append(-1) # if not found continue mask = np.isin(flat_annotation, leaf_ids) count = np.count_nonzero(mask) vol_mm3 = count * voxel_volume_mm3 volumes.append(vol_mm3) ret = df.with_columns(pl.Series(name='volume_mm3', values=volumes)) ret.write_csv(cached_file) print_save(cached_file) return ret
@overload def get_children(parent: int, *, dataframe: bool = False, atlas_name: ATLAS_NAME = 'allen_mouse_10um') -> list[int] | pl.DataFrame: pass @overload def get_children(parent: str, *, dataframe: bool = False, atlas_name: ATLAS_NAME = 'allen_mouse_10um') -> list[str] | pl.DataFrame: pass
[docs] def get_children(parent: int | str, *, dataframe: bool = False, atlas_name: ATLAS_NAME = 'allen_mouse_10um') -> list[int] | list[str] | pl.DataFrame: """ Get children brain region id or acronym from its parent :param parent: id or acronym :param dataframe: return as dataframe, otherwise return as list :param atlas_name: :attr:`~neuralib.atlas.data.ATLAS_NAME` :return: """ df = load_bg_structure_tree(atlas_name=atlas_name) return _get_children(df, parent, dataframe)
def _get_children(df, parent, dataframe): if isinstance(parent, int): ret = df.filter(pl.col('parent_structure_id') == parent) field = 'id' elif isinstance(parent, str): ret = df.filter(pl.col('parent_acronym') == parent) field = 'acronym' else: raise TypeError('') if not dataframe: ret = ret[field].to_list() return ret # ============= # # BG Annotation # # ============= #
[docs] def get_annotation_ids(atlas_name: ATLAS_NAME = 'allen_mouse_10um', check_latest: bool = True) -> np.ndarray: """ Get unique annotation id :param atlas_name: :attr:`~neuralib.atlas.data.ATLAS_NAME` :param check_latest: :return: """ annotation = BrainGlobeAtlas(atlas_name, check_latest=check_latest).annotation # pyright: ignore[reportArgumentType] return np.unique(annotation)
[docs] def get_leaf_in_annotation(region: int | str, *, name: bool = False, cached_file: PathLike | None = None, atlas_name: ATLAS_NAME = 'allen_mouse_10um') -> list[int] | list[str]: """ Get a list of annotation {id, acronym} with given region {id, acronym} :param region: Region id or region acronym :param name: If True, return acronym, otherwise return id :param cached_file: Cached json for the annotation_leaf_map :param atlas_name: :attr:`~neuralib.atlas.data.ATLAS_NAME` :return: List of annotation {id, acronym} """ tree = load_bg_structure_tree(atlas_name=atlas_name) # to id if isinstance(region, str): region_ids = tree.filter(pl.col('acronym') == region)['id'].to_list() if len(region_ids) != 1: raise RuntimeError(f"The region {region} is not a valid acronym") region = int(region_ids[0]) dy = build_annotation_leaf_map(atlas_name=atlas_name, cached_file=cached_file) try: result = dy[region] except KeyError: raise ValueError(f'Invalid region: {region}') if name: result = tree.filter(pl.col('id').is_in(result))['acronym'].to_list() return result
[docs] def build_annotation_leaf_map(atlas_name: ATLAS_NAME = 'allen_mouse_10um', *, cached_file: PathLike | None = None, force: bool = False) -> dict[int, list[int]]: """ Get all region id (key) and list of annotation id (values) :param atlas_name: :attr:`~neuralib.atlas.data.ATLAS_NAME` :param cached_file: Cached json file path :param force: Force re-compute the cached file :return: """ if cached_file is None: cached_file = ensure_dir(ATLAS_CACHE_DIRECTORY) / f'{atlas_name}_annotation_leaf.json' else: cached_file = Path(cached_file) if cached_file.suffix != '.json': raise ValueError('not a json file') # if cached_file.exists() and not force: data = load_json(cached_file, verbose=False) leaf_map = {int(k): v for k, v in data.items()} else: tree = load_bg_structure_tree(atlas_name) id_to_children = _build_id_to_children_map(tree) annotation_ids = set(get_annotation_ids()) leaf_map = {} def collect(rid): if rid in leaf_map: return leaf_map[rid] if rid in annotation_ids: leaf_map[rid] = [rid] else: result = [] for child in id_to_children.get(rid, []): result.extend(collect(child)) leaf_map[rid] = result return leaf_map[rid] all_ids = tree['id'].to_list() for rid in all_ids: collect(rid) save_json(cached_file, leaf_map) return leaf_map
def _build_id_to_children_map(tree: pl.DataFrame) -> dict[int, list[int]]: df = tree.select(['id', 'parent_structure_id']) grouped = df.group_by('parent_structure_id', maintain_order=False).agg(pl.col('id')) return {row['parent_structure_id']: row['id'] for row in grouped.iter_rows(named=True)} # =============== # # Allen Resources # # =============== #
[docs] @deprecated_func(new='neuralib.imaging.widefield.get_dorsal_ccf_file()') def get_dorsal_cortex(output_dir: Path | None = None) -> Path: """ Get example dorsal projection annotation svg file .. seealso:: https://community.brain-map.org/t/aligning-dorsal-projection-of-mouse-common-coordinate-framework-with-wide-field-images-of-mouse-brain/140/2 :param output_dir: Output directory for caching :return: Output file path """ if output_dir is None: output_dir = ensure_dir(ATLAS_CACHE_DIRECTORY) filename = 'cortical_map_top_down.svg' output = output_dir / filename if not output.exists(): url = 'http://connectivity.brain-map.org/assets/cortical_map_top_down.svg' content = download_with_tqdm(url) with open(output, 'wb') as f: f.write(content.getvalue()) print_save(output, verb='DOWNLOAD') return output