Spatial Decomposition

Calling cell-type compositions for spot-based spatial transcriptomics data

11 datasets · 9 methods · 2 control methods · 1 metrics

Task info Method info Metric info Dataset info Results

Spatial decomposition (also often referred to as Spatial deconvolution) is applicable to spatial transcriptomics data where the transcription profile of each capture location (spot, voxel, bead, etc.) do not share a bijective relationship with the cells in the tissue, i.e., multiple cells may contribute to the same capture location. The task of spatial decomposition then refers to estimating the composition of cell types/states that are present at each capture location. The cell type/states estimates are presented as proportion values, representing the proportion of the cells at each capture location that belong to a given cell type.

We distinguish between reference-based decomposition and de novo decomposition, where the former leverage external data (e.g., scRNA-seq or scNuc-seq) to guide the inference process, while the latter only work with the spatial data. We require that all datasets have an associated reference single cell data set, but methods are free to ignore this information. Due to the lack of real datasets with the necessary ground-truth, this task makes use of a simulated dataset generated by creating cell-aggregates by sampling from a Dirichlet distribution. The ground-truth dataset consists of the spatial expression matrix, XY coordinates of the spots, true cell-type proportions for each spot, and the reference single-cell data (from which cell aggregated were simulated).

Summary

poss_dataset_ids = dataset_info
  .map(d => d.dataset_id)
  .filter(d => results.map(r => r.dataset_id).includes(d))
poss_method_ids = method_info
  .map(d => d.method_id)
  .filter(d => results.map(r => r.method_id).includes(d))
poss_metric_ids = metric_info
  .map(d => d.metric_id)
  .filter(d => results.map(r => Object.keys(r.scaled_scores)).flat().includes(d))

results_long = results.flatMap(d => {
  return Object.entries(d.scaled_scores).map(([metric_id, value]) =>
    ({
      method_id: d.method_id,
      dataset_id: d.dataset_id,
      metric_id: metric_id,
      score: value
    })
  )
}).filter(d => method_ids.includes(d.method_id) && metric_ids.includes(d.metric_id) && dataset_ids.includes(d.dataset_id))

results_resources = results.flatMap(d => {
  return ({
    method_id: d.method_id,
    dataset_id: d.dataset_id,
    ...d.resources
  })
})

function label_time(time) {
  if (time < 1e-5) return "0s";
  if (time < 1) return "<1s";
  if (time < 60) return `${Math.floor(time)}s`;
  if (time < 3600) return `${Math.floor(time / 60)}m`;
  if (time < 3600 * 24) return `${Math.floor(time / 3600)}h`;
  if (time < 3600 * 24 * 7) return `${Math.floor(time / 3600 / 24)}d`;
  return ">7d"; // Assuming missing values are encoded as NaN
}

function label_memory(x_mb, include_mb = true) {
  if (!include_mb && x_mb < 1e3) return "<1G";
  if (x_mb < 1) return "<1M";
  if (x_mb < 1e3) return `${Math.round(x_mb)}M`;
  if (x_mb < 1e6) return `${Math.round(x_mb / 1e3)}G`;
  if (x_mb < 1e9) return `${Math.round(x_mb / 1e6)}T`;
  return ">1P";
}

function aggregate_scores(obj) {
  return d3.mean(obj.map(val => {
    if (val.score === undefined || isNaN(val.score)) return 0;
    return Math.min(1, Math.max(0, val.score))
  }));
}

function mean_na_rm(x) {
  return d3.mean(x.filter(d => !isNaN(d)));
}

function transpose_list_of_objects(list) {
  return Object.fromEntries(Object.keys(list[0]).map(key => [key, list.map(d => d[key])]))
}

overall = d3.groups(results_long, d => d.method_id)
  .map(([method_id, values]) => ({method_id, mean_score: aggregate_scores(values)}))

per_dataset = d3.groups(results_long, d => d.method_id)
  .map(([method_id, values]) => {
    const datasets = d3.groups(values, d => d.dataset_id)
      .map(([dataset_id, values]) => ({["dataset_" + dataset_id]: aggregate_scores(values)}))
      .reduce((a, b) => ({...a, ...b}), {})
    return {method_id, ...datasets}
  })

per_metric = d3.groups(results_long, d => d.method_id)
  .map(([method_id, values]) => {
    const metrics = d3.groups(values, d => d.metric_id)
      .map(([metric_id, values]) => ({["metric_" + metric_id]: aggregate_scores(values)}))
      .reduce((a, b) => ({...a, ...b}), {})
    return {method_id, ...metrics}
  })

resources = d3.groups(results_resources, d => d.method_id)
  .map(([method_id, values]) => {
    const error_pct_oom = d3.mean(values, d => d.exit_code === 137)
    const error_pct_timeout = d3.mean(values, d => d.exit_code === 143)
    const error_pct_error = d3.mean(values, d => d.exit_code > 0) - error_pct_oom - error_pct_timeout
    const error_pct_ok = 1 - error_pct_oom - error_pct_timeout - error_pct_error
    const mean_peak_memory_mb = mean_na_rm(values.map(d => d.peak_memory_mb))
    const mean_disk_read_mb = mean_na_rm(values.map(d => d.disk_read_mb))
    const mean_disk_write_mb = mean_na_rm(values.map(d => d.disk_write_mb))
    const mean_duration_sec = mean_na_rm(values.map(d => d.duration_sec))
    return ({
      method_id,
      error_pct_error,
      error_pct_oom,
      error_pct_timeout,
      error_pct_ok,
      // error_reason: {
      //   "Memory limit exceeded": error_pct_oom,
      //   "Time limit exceeded": error_pct_timeout,
      //   "Execution error": error_pct_error,
      //   "No error": error_pct_ok
      // },
      error_reason: [error_pct_oom, error_pct_timeout, error_pct_error, error_pct_ok],
      mean_cpu_pct: mean_na_rm(values.map(d => d.cpu_pct)),
      mean_peak_memory_mb,
      mean_peak_memory_log: -Math.log10(mean_peak_memory_mb),
      mean_peak_memory_str: " " + label_memory(mean_peak_memory_mb) + " ",
      mean_disk_read_mb: mean_na_rm(values.map(d => d.disk_read_mb)),
      mean_disk_read_log: -Math.log10(mean_disk_read_mb),
      mean_disk_read_str: " " + label_memory(mean_disk_read_mb) + " ",
      mean_disk_write_mb: mean_na_rm(values.map(d => d.disk_write_mb)),
      mean_disk_write_log: -Math.log10(mean_disk_write_mb),
      mean_disk_write_str: " " + label_memory(mean_disk_write_mb) + " ",
      mean_duration_sec,
      mean_duration_log: -Math.log10(mean_duration_sec),
      mean_duration_str: " " + label_time(mean_duration_sec) + " "
    })
  })

summary_all = method_info
  .filter(d => show_con || !d.is_baseline)
  .filter(d => method_ids.includes(d.method_id))
  .map(method => {
    const method_id = method.method_id
    const method_name = method.method_name
    const mean_score = overall.find(d => d.method_id === method_id).mean_score
    const datasets = per_dataset.find(d => d.method_id === method_id)
    const metrics = per_metric.find(d => d.method_id === method_id)
    const resources_ = resources.find(d => d.method_id === method_id)
    return {method_id, method_name, mean_score, ...datasets, ...metrics, ...resources_}
  })
  .sort((a, b) => b.mean_score - a.mean_score)

// make sure the first entry contains all columns
column_info = [
  {id: "method_name", name: "Name", label: null, group: "method", geom: "text", palette: null},
  {id: "mean_score", name: "Score", group: "overall", geom: "bar", palette: "overall"},
  {id: "error_reason", name: "Error reason", group: "overall", geom: "pie", palette: "error_reason"},
  ...dataset_info
    .filter(d => dataset_ids.includes(d.dataset_id)).map(d => ({id: "dataset_" + d.dataset_id, name: d.dataset_name, group: "dataset", geom: "funkyrect", palette: "dataset"}))
    .sort((a, b) => a.name.localeCompare(b.name)),
  ...metric_info
    .filter(d => metric_ids.includes(d.metric_id)).map(d => ({id: "metric_" + d.metric_id, name: d.metric_name, group: "metric", geom: "funkyrect", palette: "metric"}))
    .sort((a, b) => a.name.localeCompare(b.name)),
  {id: "mean_cpu_pct", name: "%CPU", group: "resources", geom: "funkyrect", palette: "resources"},
  {id: "mean_peak_memory_log", name: "Peak memory", label: "mean_peak_memory_str", group: "resources", geom: "rect", palette: "resources"},
  {id: "mean_disk_read_log", name: "Disk read", label: "mean_disk_read_str", group: "resources", geom: "rect", palette: "resources"},
  {id: "mean_disk_write_log", name: "Disk write", label: "mean_disk_write_str", group: "resources", geom: "rect", palette: "resources"},
  {id: "mean_duration_log", name: "Duration", label: "mean_duration_str", group: "resources", geom: "rect", palette: "resources"}
].map(d => {
  if (d.id === "method_name") {
    return {...d, options: {width: 15, hjust: 0}}
  } else if (d.id === "is_baseline") {
    return {...d, options: {width: 1}}
  } else if (d.geom === "bar") {
    return {...d, options: {width: 4}}
  } else {
    return d
  }
})

column_groups = [
  {group: "method", palette: null, level1: ""},
  {group: "overall", palette: "overall", level1: "Overall"},
  {group: "error_reason", palette: "error_reason", level1: "Error reason"},
  {group: "dataset", palette: "dataset", level1: dataset_info.length >= 3 ? "Datasets" : ""},
  {group: "metric", palette: "metric", level1: metric_info.length >= 3 ? "Metrics" : ""},
  {group: "resources", palette: "resources", level1: "Resources"}
]

palettes = [
  {
    overall: "Greys",
    dataset: "Blues",
    metric: "Reds",
    resources: "YlOrBr",
    error_reason: {
      colors: ["#8DD3C7", "#FFFFB3", "#BEBADA", "#FFFFFF"],
      names: ["Memory limit exceeded", "Time limit exceeded", "Execution error", "No error"]
    }
  }
][0]

funkyheatmap(
    transpose_list_of_objects(summary_all),
    transpose_list_of_objects(column_info),
    [],
    transpose_list_of_objects(column_groups),
    [],
    palettes,
    {
        fontSize: 14,
        rowHeight: 26,
        rootStyle: 'max-width: none',
        colorByRank: color_by_rank,
        theme: {
            oddRowBackground: 'var(--bs-body-bg)',
            evenRowBackground: 'var(--bs-button-hover)',
            textColor: 'var(--bs-body-color)',
            strokeColor: 'var(--bs-body-color)',
            headerColor: 'var(--bs-body-color)',
            hoverColor: 'var(--bs-body-color)'
        }
    },
    scale_column
);

Figure 1: Overview of the results per method. This figures shows the mean of the scaled scores (group Overall), the mean scores per dataset (group Dataset) and the mean scores per metric (group Metric).

Display settings

viewof color_by_rank = Inputs.toggle({label: "Color by rank:", value: true})
viewof scale_column = Inputs.toggle({label: "Minmax column:", value: false})
viewof show_con = Inputs.toggle({label: "Show control methods:", value: true})

Filter datasets

viewof dataset_ids = Inputs.checkbox(
  dataset_info.filter(d => poss_dataset_ids.includes(d.dataset_id)),
  {
    keyof: d => d.dataset_name,
    valueof: d => d.dataset_id,
    value: dataset_info.map(d => d.dataset_id),
    label: "Datasets:"
  }
)

Filter methods

viewof method_ids = Inputs.checkbox(
  method_info.filter(d => poss_method_ids.includes(d.method_id)),
  {
    keyof: d => d.method_name,
    valueof: d => d.method_id,
    value: method_info.map(d => d.method_id),
    label: "Methods:"
  }
)

Filter metrics

viewof metric_ids = Inputs.checkbox(
  metric_info.filter(d => poss_metric_ids.includes(d.metric_id)),
  {
    keyof: d => d.metric_name,
    valueof: d => d.metric_id,
    value: metric_info.map(d => d.metric_id),
    label: "Metrics:"
  }
)

funkyheatmap = (await require('d3@7').then(d3 => {
  window.d3 = d3;
  window._ = _;
  return import('https://unpkg.com/funkyheatmapjs@0.2.5');
})).default;

Results

Results table of the scores per method, dataset and metric (after scaling). Use the filters to make a custom subselection of methods and datasets. The “Overall mean” dataset is the mean value across all datasets.

Dataset info

Show

GTEX v9

Single-nucleus cross-tissue molecular reference maps to decipher disease gene function (Eraslan et al. 2022).

Understanding the function of genes and their regulation in tissue homeostasis and disease requires knowing the cellular context in which genes are expressed in tissues across the body. Single cell genomics allows the generation of detailed cellular atlases in human tissues, but most efforts are focused on single tissue types. Here, we establish a framework for profiling multiple tissues across the human body at single-cell resolution using single nucleus RNA-Seq (snRNA-seq), and apply it to 8 diverse, archived, frozen tissue types (three donors per tissue). We apply four snRNA-seq methods to each of 25 samples from 16 donors, generating a cross-tissue atlas of 209,126 nuclei profiles, and benchmark them vs. scRNA-seq of comparable fresh tissues. We use a conditional variational autoencoder (cVAE) to integrate an atlas across tissues, donors, and laboratory methods. We highlight shared and tissue-specific features of tissue-resident immune cells, identifying tissue-restricted and non-restricted resident myeloid populations. These include a cross-tissue conserved dichotomy between LYVE1- and HLA class II-expressing macrophages, and the broad presence of LAM-like macrophages across healthy tissues that is also observed in disease. For rare, monogenic muscle diseases, we identify cell types that likely underlie the neuromuscular, metabolic, and immune components of these diseases, and biological processes involved in their pathology. For common complex diseases and traits analyzed by GWAS, we identify the cell types and gene modules that potentially underlie disease mechanisms. The experimental and analytical frameworks we describe will enable the generation of large-scale studies of how cellular and molecular processes vary across individuals and populations.

Diabetic Kidney Disease

Multimodal single cell sequencing implicates chromatin accessibility and genetic background in diabetic kidney disease progression (Wilson et al. 2022).

Multimodal single cell sequencing is a powerful tool for interrogating cell-specific changes in transcription and chromatin accessibility. We performed single nucleus RNA (snRNA-seq) and assay for transposase accessible chromatin sequencing (snATAC-seq) on human kidney cortex from donors with and without diabetic kidney disease (DKD) to identify altered signaling pathways and transcription factors associated with DKD. Both snRNA-seq and snATAC-seq had an increased proportion of VCAM1+ injured proximal tubule cells (PT_VCAM1) in DKD samples. PT_VCAM1 has a pro-inflammatory expression signature and transcription factor motif enrichment implicated NFkB signaling. We used stratified linkage disequilibrium score regression to partition heritability of kidney-function-related traits using publicly-available GWAS summary statistics. Cell-specific PT_VCAM1 peaks were enriched for heritability of chronic kidney disease (CKD), suggesting that genetic background may regulate chromatin accessibility and DKD progression. snATAC-seq found cell-specific differentially accessible regions (DAR) throughout the nephron that change accessibility in DKD and these regions were enriched for glucocorticoid receptor (GR) motifs. Changes in chromatin accessibility were associated with decreased expression of insulin receptor, increased gluconeogenesis, and decreased expression of the GR cytosolic chaperone, FKBP5, in the diabetic proximal tubule. Cleavage under targets and release using nuclease (CUT&RUN) profiling of GR binding in bulk kidney cortex and an in vitro model of the proximal tubule (RPTEC) showed that DAR co-localize with GR binding sites. CRISPRi silencing of GR response elements (GRE) in the FKBP5 gene body reduced FKBP5 expression in RPTEC, suggesting that reduced FKBP5 chromatin accessibility in DKD may alter cellular response to GR. We developed an open-source tool for single cell allele specific analysis (SALSA) to model the effect of genetic background on gene expression. Heterozygous germline single nucleotide variants (SNV) in proximal tubule ATAC peaks were associated with allele-specific chromatin accessibility and differential expression of target genes within cis-coaccessibility networks. Partitioned heritability of proximal tubule ATAC peaks with a predicted allele-specific effect was enriched for eGFR, suggesting that genetic background may modify DKD progression in a cell-specific manner.

Human immune

Human immune cells dataset from the scIB benchmarks (Luecken et al. 2021).

Human immune cells from peripheral blood and bone marrow taken from 5 datasets comprising 10 batches across technologies (10X, Smart-seq2).

Human Lung Cell Atlas

An integrated cell atlas of the human lung in health and disease (core) (Sikkema et al. 2023).

The integrated Human Lung Cell Atlas (HLCA) represents the first large-scale, integrated single-cell reference atlas of the human lung. It consists of over 2 million cells from the respiratory tract of 486 individuals, and includes 49 different datasets. It is split into the HLCA core, and the extended or full HLCA. The HLCA core includes data of healthy lung tissue from 107 individuals, and includes manual cell type annotations based on consensus across 6 independent experts, as well as demographic, biological and technical metadata.

Human pancreas

Human pancreas cells dataset from the scIB benchmarks (Luecken et al. 2021).

Human pancreatic islet scRNA-seq data from 6 datasets across technologies (CEL-seq, CEL-seq2, Smart-seq2, inDrop, Fluidigm C1, and SMARTER-seq).

Mouse Pancreatic Islet Atlas

Mouse pancreatic islet scRNA-seq atlas across sexes, ages, and stress conditions including diabetes (Hrovatin et al. 2023).

To better understand pancreatic β-cell heterogeneity we generated a mouse pancreatic islet atlas capturing a wide range of biological conditions. The atlas contains scRNA-seq datasets of over 300,000 mouse pancreatic islet cells, of which more than 100,000 are β-cells, from nine datasets with 56 samples, including two previously unpublished datasets. The samples vary in sex, age (ranging from embryonic to aged), chemical stress, and disease status (including T1D NOD model development and two T2D models, mSTZ and db/db) together with different diabetes treatments. Additional information about data fields is available in anndata uns field ‘field_descriptions’ and on https://github.com/theislab/mm_pancreas_atlas_rep/blob/main/resources/cellxgene.md.

HypoMap

A unified single cell gene expression atlas of the murine hypothalamus (Steuernagel et al. 2022).

The hypothalamus plays a key role in coordinating fundamental body functions. Despite recent progress in single-cell technologies, a unified catalogue and molecular characterization of the heterogeneous cell types and, specifically, neuronal subtypes in this brain region are still lacking. Here we present an integrated reference atlas “HypoMap” of the murine hypothalamus consisting of 384,925 cells, with the ability to incorporate new additional experiments. We validate HypoMap by comparing data collected from SmartSeq2 and bulk RNA sequencing of selected neuronal cell types with different degrees of cellular heterogeneity.

CeNGEN

Complete Gene Expression Map of an Entire Nervous System (Hammarlund et al. 2018).

100k FACS-isolated C. elegans neurons from 17 experiments sequenced on 10x Genomics.

Immune Cell Atlas

Cross-tissue immune cell analysis reveals tissue-specific features in humans (Domínguez Conde et al. 2022).

Despite their crucial role in health and disease, our knowledge of immune cells within human tissues remains limited. We surveyed the immune compartment of 16 tissues from 12 adult donors by single-cell RNA sequencing and VDJ sequencing generating a dataset of ~360,000 cells. To systematically resolve immune cell heterogeneity across tissues, we developed CellTypist, a machine learning tool for rapid and precise cell type annotation. Using this approach, combined with detailed curation, we determined the tissue distribution of finely phenotyped immune cell types, revealing hitherto unappreciated tissue-specific features and clonal architecture of T and B cells. Our multitissue approach lays the foundation for identifying highly resolved immune cell types by leveraging a common reference dataset, tissue-integrated expression analysis, and antigen receptor sequencing.

Zebrafish embryonic cells

Single-cell mRNA sequencing of zebrafish embryonic cells (Wagner et al. 2018).

90k cells from zebrafish embryos throughout the first day of development, with and without a knockout of chordin, an important developmental gene.

Tabula Sapiens

A multiple-organ, single-cell transcriptomic atlas of humans (Jones et al. 2022).

Tabula Sapiens is a benchmark, first-draft human cell atlas of nearly 500,000 cells from 24 organs of 15 normal human subjects. This work is the product of the Tabula Sapiens Consortium. Taking the organs from the same individual controls for genetic background, age, environment, and epigenetic effects and allows detailed analysis and comparison of cell types that are shared between tissues. Our work creates a detailed portrait of cell types as well as their distribution and variation in gene expression across tissues and within the endothelial, epithelial, stromal and immune compartments.

Method info

Show

Cell2Location

Cell2location uses a Bayesian model to resolve cell types in spatial transcriptomic data and create comprehensive cellular maps of diverse tissues (Kleshchevnikov et al. 2022). Links: Docs.

Cell2location is a decomposition method based on Negative Binomial regression that is able to account for batch effects in estimating the single-cell gene expression signature used for the spatial decomposition step. Note that when batch information is unavailable for this task, we can use either a hard-coded reference, or a negative-binomial learned reference without batch labels. The parameter alpha refers to the detection efficiency prior.

DestVI

DestVI is a probabilistic method for multi-resolution analysis for spatial transcriptomics that explicitly models continuous variation within cell types (Lopez et al. 2022). Links: Docs.

Deconvolution of Spatial Transcriptomics profiles using Variational Inference (DestVI) is a spatial decomposition method that leverages a conditional generative model of spatial transcriptomics down to the sub-cell-type variation level, which is then used to decompose the cell-type proportions determining the spatial organization of a tissue.

NMFreg

NMFreg reconstructs gene expression as a weighted combination of cell type signatures defined by scRNA-seq (Rodriques et al. 2019). Links: Docs.

Non-Negative Matrix Factorization regression (NMFreg) is a decomposition method that reconstructs expression of each spatial location as a weighted combination of cell-type signatures defined by scRNA-seq. It was originally developed for Slide-seq data. This is a re-implementation from https://github.com/tudaga/NMFreg_tutorial.

NNLS

NNLS is a decomposition method based on Non-Negative Least Square Regression (Aliee and Theis 2021). Links: Docs.

NonNegative Least Squares (NNLS), is a convex optimization problem with convex constraints. It was used by the AutoGeneS method to infer cellular proporrtions by solvong a multi-objective optimization problem.

RCTD

RCTD learns cell type profiles from scRNA-seq to decompose cell type mixtures while correcting for differences across sequencing technologies (Cable et al. 2021). Links: Docs.

RCTD (Robust Cell Type Decomposition) is a decomposition method that uses signatures learnt from single-cell data to decompose spatial expression of tissues. It is able to use a platform effect normalization step, which normalizes the scRNA-seq cell type profiles to match the platform effects of the spatial transcriptomics dataset.

Seurat

Seurat method that is based on Canonical Correlation Analysis (CCA) (Stuart et al. 2019). Links: Docs.

This method applies the ‘anchor’-based integration workflow introduced in Seurat v3, that enables the probabilistic transfer of annotations from a reference to a query set. First, mutual nearest neighbors (anchors) are identified from the reference scRNA-seq and query spatial datasets. Then, annotations are transfered from the single cell reference data to the sptial data along with prediction scores for each spot.

Stereoscope

Stereoscope is a decomposition method based on Negative Binomial regression (Andersson et al. 2020). Links: Docs.

Stereoscope is a decomposition method based on Negative Binomial regression. It is similar in scope and implementation to cell2location but less flexible to incorporate additional covariates such as batch effects and other type of experimental design annotations.

Tangram

Tanagram maps single-cell gene expression data onto spatial gene expression data by fitting gene expression on shared genes (Biancalani et al. 2021). Links: Docs.

Tangram is a method to map gene expression signatures from scRNA-seq data to spatial data. It performs the cell type mapping by learning a similarity matrix between single-cell and spatial locations based on gene expression profiles.

NMF

NMF reconstructs gene expression as a weighted combination of cell type signatures defined by scRNA-seq (Cichocki and Phan 2009). Links: Docs.

NMF is a decomposition method based on Non-negative Matrix Factorization (NMF) that reconstructs expression of each spatial location as a weighted combination of cell-type signatures defined by scRNA-seq. It is a simpler baseline than NMFreg as it only performs the NMF step based on mean expression signatures of cell types, returning the weights loading of the NMF as (normalized) cell type proportions, without the regression step.

Control method info

Show

Random Proportions

Negative control method that randomly assigns celltype proportions from a Dirichlet distribution

A negative control method with random assignment of predicted celltype proportions from a Dirichlet distribution.

True Proportions

Positive control method that assigns celltype proportions from the ground truth

A positive control method with perfect assignment of predicted celltype proportions from the ground truth.

Metric info

Show

R2

R2 represents the proportion of variance in the true proportions which is explained by the predicted proportions (Miles 2005).

R2, or the “coefficient of determination”, reports the fraction of the true proportion values’ variance that can be explained by the predicted proportion values. The best score, and upper bound, is 1.0. There is no fixed lower bound for the metric. The uniform/non-weighted average across all cell types/states is used to summarise performance. By default, cases resulting in a score of NaN (perfect predictions) or -Inf (imperfect predictions) are replaced with 1.0 (perfect predictions) or 0.0 (imperfect predictions) respectively.

Quality control results

Show

Category	Name	Value	Condition	Severity
Raw results	Method 'destvi' %missing	0.7575758	pct_missing <= .1	✗✗✗
Raw results	Method 'rctd' %missing	0.6969697	pct_missing <= .1	✗✗✗
Raw results	Metric 'r2' %missing	0.6776860	pct_missing <= .1	✗✗✗
Raw data	Number of results	121.0000000	len(results) == len(method_info) * len(metric_info) * len(dataset_info)	✗✗✗
Raw results	Method 'cell2location' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'nmfreg' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'nnls' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'random_proportions' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'seurat' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'stereoscope' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'tangram' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'true_proportions' %missing	0.6666667	pct_missing <= .1	✗✗✗
Raw results	Method 'vanillanmf' %missing	0.6666667	pct_missing <= .1	✗✗✗
Scaling	Worst score cell2location r2	-3.4575000	worst_score >= -1	✗✗✗
Scaling	Worst score nnls r2	-3.3901000	worst_score >= -1	✗✗✗
Scaling	Worst score seurat r2	-3.0585000	worst_score >= -1	✗✗✗
Scaling	Worst score nmfreg r2	-2.2265000	worst_score >= -1	✗✗
Raw results	Dataset 'cellxgene_census/tabula_sapiens' %missing	0.1818182	pct_missing <= .1	✗
Raw results	Dataset 'cellxgene_census/tabula_sapiens' %missing	0.1818182	pct_missing <= .1	✗
Raw results	Dataset 'cellxgene_census/tabula_sapiens' %missing	0.1818182	pct_missing <= .1	✗

Normalisation visualisation

Show

Authors

Aliee, Hananeh, and Fabian J. Theis. 2021. “AutoGeneS: Automatic Gene Selection Using Multi-Objective Optimization for RNA-Seq Deconvolution.” Cell Systems 12 (7): 706–715.e4. https://doi.org/10.1016/j.cels.2021.05.006.

Andersson, Alma, Joseph Bergenstråhle, Michaela Asp, Ludvig Bergenstråhle, Aleksandra Jurek, José Fernández Navarro, and Joakim Lundeberg. 2020. “Single-Cell and Spatial Transcriptomics Enables Probabilistic Inference of Cell Type Topography.” Communications Biology 3 (1). https://doi.org/10.1038/s42003-020-01247-y.

Biancalani, Tommaso, Gabriele Scalia, Lorenzo Buffoni, Raghav Avasthi, Ziqing Lu, Aman Sanger, Neriman Tokcan, et al. 2021. “Deep Learning and Alignment of Spatially Resolved Single-Cell Transcriptomes with Tangram.” Nature Methods 18 (11): 1352–62. https://doi.org/10.1038/s41592-021-01264-7.

Cable, Dylan M., Evan Murray, Luli S. Zou, Aleksandrina Goeva, Evan Z. Macosko, Fei Chen, and Rafael A. Irizarry. 2021. “Robust Decomposition of Cell Type Mixtures in Spatial Transcriptomics.” Nature Biotechnology 40 (4): 517–26. https://doi.org/10.1038/s41587-021-00830-w.

Cichocki, Andrzej, and Anh-Huy Phan. 2009. “Fast Local Algorithms for Large Scale Nonnegative Matrix and Tensor Factorizations.” IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences E92-a (3): 708–21. https://doi.org/10.1587/transfun.e92.a.708.

Domínguez Conde, C., C. Xu, L. B. Jarvis, D. B. Rainbow, S. B. Wells, T. Gomes, S. K. Howlett, et al. 2022. “Cross-Tissue Immune Cell Analysis Reveals Tissue-Specific Features in Humans.” Science 376 (6594). https://doi.org/10.1126/science.abl5197.

Eraslan, Gökcen, Eugene Drokhlyansky, Shankara Anand, Evgenij Fiskin, Ayshwarya Subramanian, Michal Slyper, Jiali Wang, et al. 2022. “Single-Nucleus Cross-Tissue Molecular Reference Maps Toward Understanding Disease Gene Function.” Science 376 (6594). https://doi.org/10.1126/science.abl4290.

Hammarlund, Marc, Oliver Hobert, David M. Miller, and Nenad Sestan. 2018. “The CeNGEN Project: The Complete Gene Expression Map of an Entire Nervous System.” Neuron 99 (3): 430–33. https://doi.org/10.1016/j.neuron.2018.07.042.

Hrovatin, Karin, Aimée Bastidas-Ponce, Mostafa Bakhti, Luke Zappia, Maren Büttner, Ciro Sallino, Michael Sterr, et al. 2023. “Delineating Mouse β-Cell Identity During Lifetime and in Diabetes with a Single Cell Atlas.” bioRxiv. https://doi.org/10.1101/2022.12.22.521557.

Jones, Robert C., Jim Karkanias, Mark A. Krasnow, Angela Oliveira Pisco, Stephen R. Quake, Julia Salzman, Nir Yosef, et al. 2022. “The Tabula Sapiens: A Multiple-Organ, Single-Cell Transcriptomic Atlas of Humans.” Science 376 (6594). https://doi.org/10.1126/science.abl4896.

Kleshchevnikov, Vitalii, Artem Shmatko, Emma Dann, Alexander Aivazidis, Hamish W. King, Tong Li, Rasa Elmentaite, et al. 2022. “Cell2location Maps Fine-Grained Cell Types in Spatial Transcriptomics.” Nature Biotechnology 40 (5): 661–71. https://doi.org/10.1038/s41587-021-01139-4.

Lopez, Romain, Baoguo Li, Hadas Keren-Shaul, Pierre Boyeau, Merav Kedmi, David Pilzer, Adam Jelinski, et al. 2022. “DestVI Identifies Continuums of Cell Types in Spatial Transcriptomics Data.” Nature Biotechnology 40 (9): 1360–69. https://doi.org/10.1038/s41587-022-01272-8.

Luecken, Malte D., M. Büttner, K. Chaichoompu, A. Danese, M. Interlandi, M. F. Mueller, D. C. Strobl, et al. 2021. “Benchmarking Atlas-Level Data Integration in Single-Cell Genomics.” Nature Methods 19 (1): 41–50. https://doi.org/10.1038/s41592-021-01336-8.

Miles, Jeremy. 2005. “Encyclopedia of Statistics in Behavioral Science.” In. John Wiley & Sons, Ltd. https://doi.org/10.1002/0470013192.bsa526.

Rodriques, Samuel G., Robert R. Stickels, Aleksandrina Goeva, Carly A. Martin, Evan Murray, Charles R. Vanderburg, Joshua Welch, Linlin M. Chen, Fei Chen, and Evan Z. Macosko. 2019. “Slide-Seq: A Scalable Technology for Measuring Genome-Wide Expression at High Spatial Resolution.” Science 363 (6434): 1463–67. https://doi.org/10.1126/science.aaw1219.

Sikkema, Lisa, Ciro Ramírez-Suástegui, Daniel C. Strobl, Tessa E. Gillett, Luke Zappia, Elo Madissoon, Nikolay S. Markov, et al. 2023. “An Integrated Cell Atlas of the Lung in Health and Disease.” Nature Medicine 29 (6): 1563–77. https://doi.org/10.1038/s41591-023-02327-2.

Steuernagel, Lukas, Brian Y. H. Lam, Paul Klemm, Georgina K. C. Dowsett, Corinna A. Bauder, John A. Tadross, Tamara Sotelo Hitschfeld, et al. 2022. “HypoMap—a Unified Single-Cell Gene Expression Atlas of the Murine Hypothalamus.” Nature Metabolism 4 (10): 1402–19. https://doi.org/10.1038/s42255-022-00657-y.

Stuart, T., A. Butler, P. Hoffman, C. Hafemeister, E. Papalexi, W. M. Mauck, Y. Hao, M. Stoeckius, P. Smibert, and R. Satija. 2019. “Comprehensive Integration of Single-Cell Data.” Cell 177 (7): 1888–1902.e21. https://doi.org/10.1016/j.cell.2019.05.031.

Wagner, Daniel E., Caleb Weinreb, Zach M. Collins, James A. Briggs, Sean G. Megason, and Allon M. Klein. 2018. “Single-Cell Mapping of Gene Expression Landscapes and Lineage in the Zebrafish Embryo.” Science 360 (6392): 981–87. https://doi.org/10.1126/science.aar4362.

Wilson, Parker C., Yoshiharu Muto, Haojia Wu, Anil Karihaloo, Sushrut S. Waikar, and Benjamin D. Humphreys. 2022. “Multimodal Single Cell Sequencing Implicates Chromatin Accessibility and Genetic Background in Diabetic Kidney Disease Progression.” Nature Communications 13 (1). https://doi.org/10.1038/s41467-022-32972-z.