From 647a9ed6a67ea13ad68dd3f2bdf24cdf18bfe9ce Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla
Date: Sun, 9 Oct 2022 09:36:06 -0700
Subject: [PATCH] Support Summarized Experiment derivatives (RDS file) (#145)
* feat: Load any single-cell dataset stored as a `SummarizedExperiment` or `SingleCellExperiment` derivative
* docs: update documentation
Co-authored-by: LTLA
---
README.md | 2 +-
package.json | 4 +--
src/components/Analysis/index.js | 55 +++++++++++++++++++++++++++++---
3 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
index 80efc982..f3eb69f2 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Our client-side approach has a number of advantages:
## For users
-If you have a Matrix Market (`.mtx`) file or HDF5 (tenx V3 or AnnData representation stored as h5ad), you're ready to go.
+If you have a Matrix Market (`.mtx`) file or HDF5 (tenx V3 or `AnnData` representation stored as h5ad), or `SummarizedExperiment` (or derivatives like `SingleCellExperiment`) stored as an RDS file, you're ready to go.
1. Launch the application by clicking [**here**](https://www.jkanche.com/kana/).
2. Select the Matrix Market file (this may be Gzip-compressed).
diff --git a/package.json b/package.json
index a728eb7d..828404bb 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
{
"name": "kana",
"description": "Single cell data analysis in the browser",
- "version": "2.3.1",
+ "version": "2.4.0",
"private": true,
"author": {
"name": "Jayaram Kancherla",
@@ -32,7 +32,7 @@
"@testing-library/jest-dom": "^5.16.5",
"@testing-library/react": "^11.1.0",
"@testing-library/user-event": "^12.1.10",
- "bakana": "^0.5.0",
+ "bakana": "^0.6.0",
"d3": "^7.1.1",
"epiviz.gl": "^1.0.2",
"epiviz.scatter.gl": "^0.0.5",
diff --git a/src/components/Analysis/index.js b/src/components/Analysis/index.js
index cb7bed2b..e3d9dd26 100644
--- a/src/components/Analysis/index.js
+++ b/src/components/Analysis/index.js
@@ -46,6 +46,7 @@ const AnalysisDialog = ({
genes: "Choose feature/gene file",
annotations: "Choose barcode/annotation file",
file: "Choose file...",
+ rds: "Choose file..."
});
let [stmpInputFiles, ssetTmpInputFiles] = useState({
@@ -218,9 +219,19 @@ const AnalysisDialog = ({
) {
all_valid = false;
}
+
+ if (
+ x?.rds && !(
+ inputText[ix]?.rds.toLowerCase().endsWith("rds")
+ )
+ ) {
+ all_valid = false;
+ }
if (x.format === "MatrixMarket") {
if (!x.mtx) all_valid = false;
+ } else if (x.format === "SummarizedExperiment") {
+ if (!x.rds) all_valid = false;
} else {
if (!x.h5) all_valid = false;
}
@@ -323,11 +334,20 @@ const AnalysisDialog = ({
}
if (!x.h5 && (sinputText?.file !== "Choose file...")) all_valid = false;
+ } else if (
+ x.format === "SummarizedExperiment") {
+ if (x?.rds && !(
+ sinputText?.rds.toLowerCase().endsWith("rds")
+ )
+ ) {
+ all_valid = false;
+ }
+
+ if (!x.rds && (sinputText?.file !== "Choose file...")) all_valid = false;
}
// setTmpInputValid(all_valid);
ssetTmpInputValid(all_valid);
-
}
}
}, [stmpInputFiles]);
@@ -1103,7 +1123,7 @@ const AnalysisDialog = ({
}}
defaultSelectedTabId={newImportFormat}
>
-
} />
-
} />
+
+
+
+ } />
@@ -1490,6 +1527,8 @@ const AnalysisDialog = ({
if (row.annotations) {
tname += ` annotations: ${row.annotations.name} `;
}
+ } else if (row["format"] === "SummarizedExperiment") {
+ tname += ` file: ${row.rds.name} `;
} else {
tname += ` file: ${row.h5.name} `;
}
@@ -1948,7 +1987,8 @@ const AnalysisDialog = ({
Matrix Market - *.mtx or *.mtx.gz
features or genes, *.tsv or *.tsv.gz
-
HDF5 (10x or h5ad) - *.h5 or *.hdf5 or *.h5ad
+
HDF5 (10X or H5AD) - *.h5 or *.hdf5 or *.h5ad
+
RDS - *.rds
Note: Names of dataset must be unique!
@@ -1977,6 +2017,13 @@ const AnalysisDialog = ({
We will also try to guess which field in the obs annotation contains gene symbols.
+
+ A SummarizedExperiment object saved in the RDS (*.rds) format.
+ We support any SummarizedExperiment subclass containing a dense or sparse count matrix
+ (identified as any assay with name starting with "counts", or if none exist, just the first assay).
+ For a SingleCellExperiment, any alternative experiment with name starting with "hto", "adt" or "antibody" is assumed to represent CITE-seq data.
+
+
Batch correction: you can now import more than one file to integrate and analyze datasets.
If you only import a single dataset, specify the annotation column that contains the batch information.