Source code for neuralib.atlas.cellatlas.core

import io
from pathlib import Path

import polars as pl
from neuralib.atlas.data import load_bg_structure_tree
from neuralib.io.core import ATLAS_CACHE_DIRECTORY
from neuralib.typing import PathLike
from neuralib.util.utils import ensure_dir
from neuralib.util.verbose import print_save

__all__ = ['load_cellatlas']


[docs] def load_cellatlas(file: PathLike | None = None, *, with_cell_type: bool = False, with_detail: bool = False, with_total_neurons: bool = True, with_acronym: bool = True, reload: bool = False) -> pl.DataFrame: """ Load the dataframe with cell types and volume information for each brain area .. seealso:: `Rodarie D et al., (2022) <https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1010739#sec047>`_ :param file: Cached csv filepath. If not exist, download from the source paper :param with_cell_type: With cell type information, defaults to False :param with_detail: With some outlier brain areas, defaults to False :param with_total_neurons: With ``n_neurons`` field, defaults to True :param with_acronym: With ``acronym`` field sync with structure tree data, defaults to True :param reload: Re-download the csv file :return: DataFrame """ if file is None: file = ensure_dir(ATLAS_CACHE_DIRECTORY) / 'cellatlas.csv' else: file = Path(file) if not file.exists() or reload: df = _request(file).rename({'Brain region': 'name'}) else: df = pl.read_csv(file).rename({'Brain region': 'name'}) if not with_cell_type: df = df.select('name', 'Neuron [mm^-3]', 'Volumes [mm^3]') if not with_detail: patterns = (',', '/', r'\(') for pt in patterns: df = df.filter(~(pl.col('name').str.contains(pt))) if with_total_neurons: expr = (pl.col('Neuron [mm^-3]') * pl.col('Volumes [mm^3]')).alias('n_neurons').cast(pl.Int64) df = df.with_columns(expr).drop('Neuron [mm^-3]') if with_acronym: tree = load_bg_structure_tree().select('name', 'acronym').sort('name') df = df.join(tree, on='name') return df
def _request(output: Path) -> pl.DataFrame: """download from paper source""" import requests url = 'https://journals.plos.org/ploscompbiol/article/file?type=supplementary&id=10.1371/journal.pcbi.1010739.s011' resp = requests.get(url) if resp.status_code == 200: df = pl.read_excel(io.BytesIO(resp.content), sheet_name='Densities BBCAv1') df.write_csv(output) print_save(output, verb='DOWNLOAD') else: raise RuntimeError('download cellatlas FAIL') return df