Tonsil
This report includes stats and diagnostic plots of the Nanostring CosMx Spatial Molecular Imager (SMI) data. It was generated in the CosMx training session with two Tonsil samples. The QC metrics and workflows were adopted from the paper (https://doi.org/10.1038/s41467-023-43458-x) where the Authors analyzed 10X Genomics Xenium In Situ data.
# Do not run
baseDir <- "WORKING_DIRECTORY" # where the CosMx folders/files are located
setwd(baseDir)
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(ggpubr))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(ggridges))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(RColorBrewer))
if (file.exists(file.path(outDir, "01_rawFlats.CosMx.RDS"))) {
rawFlatL <- readRDS(file.path(outDir, "01_rawFlats.CosMx.RDS"))
} else {
rawFlatL <- lapply(seq_along(slideOrder), function(idx) {
slidePath <- file.path(baseDir, paste0("FlatFiles_", projectTag), slideOrder[idx])
# LoadNanostring function was imported from the Seurat package
slideObj <- LoadNanostring(data.dir = slidePath, fov = slideOrder[idx])
return(slideObj)
})
names(rawFlatL) <- slideOrder
saveRDS(rawFlatL, file.path(outDir, "01_rawFlats.CosMx.RDS"))
}
rawFlatL
## $Tonsil_1
## An object of class Seurat
## 1207 features across 215312 samples within 1 assay
## Active assay: Nanostring (1207 features, 0 variable features)
## 1 layer present: counts
## 1 spatial field of view present: Tonsil_1
##
## $Tonsil_2
## An object of class Seurat
## 1207 features across 218421 samples within 1 assay
## Active assay: Nanostring (1207 features, 0 variable features)
## 1 layer present: counts
## 1 spatial field of view present: Tonsil_2
if (file.exists(file.path(outDir, "02_metadata.CosMx.RDS"))) {
metadataL <- readRDS(file.path(outDir, "02_metadata.CosMx.RDS"))
} else {
metadataL <- lapply(seq_along(slideOrder), function(idx) {
metadataPath <- file.path(baseDir, paste0("FlatFiles_", projectTag), slideOrder[idx], paste0(slideOrder[idx], "_metadata_file.csv.gz"))
metadata <- read.csv(metadataPath, header=T)
return(metadata)
})
names(metadataL) <- slideOrder
saveRDS(metadataL, file.path(outDir, "02_metadata.CosMx.RDS"))
}
metadataDf <- do.call("rbind", metadataL)
metadataDf <- metadataDf[order(metadataDf$fov, metadataDf$cell_id, metadataDf$slide_ID),]
metadataDf$fov <- factor(metadataDf$fov, levels=unique(metadataDf$fov))
metadataDf$Run_Tissue_name <- factor(metadataDf$Run_Tissue_name, levels=names(slideCols))
mSubDf <- metadataDf[,c("nCount_RNA", "nFeature_RNA", "Area", "fov", "Run_Tissue_name")]
fovstatsL <- lapply(seq_along(metadataL), function(idx) {
metadata <- metadataL[[idx]][,c("nCount_RNA", "nFeature_RNA", "Area", "fov", "Run_Tissue_name")]
fovstats <- c()
for (fov in unique(metadata$fov)) {
buff <- metadata[which(metadata$fov == fov),]
ncells <- nrow(buff)
ncount <- sum(buff$nCount_RNA) / ncells
nfeature <- sum(buff$nFeature_RNA) / ncells
area <- sum(buff$Area) / ncells
fovstats <- rbind(fovstats, c(idx, fov, ncells, ncount, nfeature, area))
}
return(fovstats)
})
fovstatsDf <- do.call("rbind", fovstatsL)
fovstatsDf <- apply(fovstatsDf, 2, as.numeric)
fovstatsDf <- as.data.frame(fovstatsDf)
colnames(fovstatsDf) <- c("Slide", "FOV", "nCells", "nCount/cell", "nFeature/cell", "Area/cell")
fovstatsDf$Slide <- factor(fovstatsDf$Slide, levels=seq_along(metadataL), labels=names(slideCols))
ridgePlot(df = mSubDf, x = "nCount_RNA", y = "fov", by = "Run_Tissue_name", cols = slideCols)
ridgePlot(df = mSubDf, x = "nFeature_RNA", y = "fov", by = "Run_Tissue_name", cols = slideCols)
ridgePlot(df = mSubDf, x = "Area", y = "fov", by = "Run_Tissue_name", cols = slideCols)
perFovPlot(df = fovstatsDf, x = "Slide", y = "nCount/cell", label = "nCount / cell", cols = slideCols)
perFovPlot(df = fovstatsDf, x = "Slide", y = "nFeature/cell", label = "nFeature / cell", cols = slideCols)
perFovPlot(df = fovstatsDf, x = "Slide", y = "Area/cell", label = "Area / cell", cols = slideCols)
perFovPlot(df = fovstatsDf, x = "Slide", y = "nCells", label = "Number of cells", cols = slideCols)
rsdL <- lapply(seq_along(rawFlatL), function(idx) {
slidename <- names(rawFlatL)[idx]
slide <- rawFlatL[[idx]]
mat <- t(as.matrix(slide@assays$Nanostring$counts)) # TO-DOs: optimization
columnids <- data.frame(
fov = sapply(str_split(rownames(mat), "_"), "[[", 2),
cellid = sapply(str_split(rownames(mat), "_"), "[[", 1)
)
fovMat <- c()
for (fov in sort(unique(columnids$fov))) {
idx <- which(columnids$fov == fov)
sumMat <- apply(mat[idx, ], 2, sum)
fovMat <- rbind(fovMat, sumMat)
}
rownames(fovMat) <- sort(unique(columnids$fov))
negIdx <- which(str_detect(colnames(fovMat), "^Neg"))
if (length(negIdx) > 0) {
fovMat <- fovMat[,-negIdx]
}
sysIdx <- which(str_detect(colnames(fovMat), "^Sys"))
if (length(sysIdx) > 0) {
fovMat <- fovMat[,-sysIdx]
}
chisq <- chisq.test(fovMat)
residual <- chisq$residuals
resSD <- apply(residual, 1, sd)
return(data.frame(Slide = slidename, FOV = names(resSD), RSD = as.numeric(resSD)))
})
names(rsdL) <- names(slideCols)
residualSD <- do.call("rbind", rsdL)
residualSD$Slide <- factor(residualSD$Slide, levels=names(slideCols))
perFovPlot(df = residualSD, x = "Slide", y = "RSD", label = "Residual SD", cols = slideCols)
lowRsdFovs <- lapply(seq_along(rsdL), function(idx) {
slidename <- names(rsdL)[idx]
rsd <- rsdL[[idx]]
return(rsd$FOV[which(rsd$RSD < quantile(rsd$RSD, rsdPerc))])
})
ecdfL <- lapply(seq_along(rsdL), function(idx) {
slide <- names(rsdL)[idx]
tmpDf <- mSubDf[which(mSubDf$Run_Tissue_name == slide),]
fovCols <- rep("grey60", length(sort(unique(tmpDf$fov))))
names(fovCols) <- sort(unique(tmpDf$fov))
fovCols[which(names(fovCols) %in% lowRsdFovs[[idx]])] <- "red"
ggCount <- ecdfPlot(df = tmpDf, x = "nCount_RNA", y = "fov", cols = fovCols, label = slide)
return(ggCount)
})
print(ecdfL)
ecdfL <- lapply(seq_along(rsdL), function(idx) {
slide <- names(rsdL)[idx]
tmpDf <- mSubDf[which(mSubDf$Run_Tissue_name == slide),]
fovCols <- rep("grey60", length(sort(unique(tmpDf$fov))))
names(fovCols) <- sort(unique(tmpDf$fov))
fovCols[which(names(fovCols) %in% lowRsdFovs[[idx]])] <- "red"
ggFeature <- ecdfPlot(df = tmpDf, x = "nFeature_RNA", y = "fov", cols = fovCols, label = slide)
return(ggFeature)
})
print(ecdfL)
ecdfL <- lapply(seq_along(rsdL), function(idx) {
slide <- names(rsdL)[idx]
tmpDf <- mSubDf[which(mSubDf$Run_Tissue_name == slide),]
fovCols <- rep("grey60", length(sort(unique(tmpDf$fov))))
names(fovCols) <- sort(unique(tmpDf$fov))
fovCols[which(names(fovCols) %in% lowRsdFovs[[idx]])] <- "red"
ggArea <- ecdfPlot(df = tmpDf, x = "Area", y = "fov", cols = fovCols, label = slide)
return(ggArea)
})
print(ecdfL)
objFileName <- list.files(file.path(baseDir, paste0("SeuratObjectAfterQC_", projectTag)), pattern = ".RDS", full.names = T)
seuratObjfile <- readRDS(objFileName)
sMetadata <- seuratObjfile@meta.data
seuratObjfile
## An object of class Seurat
## 2207 features across 433733 samples within 4 assays
## Active assay: RNA (1000 features, 0 variable features)
## 2 layers present: counts, data
## 3 other assays present: falsecode, negprobes, RNA_normalized_086d7349.c603.4808.9ae6.52c504d0ac3b_1
## 2 dimensional reductions calculated: approximatepca_94c48860.6aee.4a27.8b15.53e99c25f2cd_1, approximateumap_f77f0d93.45a0.4469.ba27.1f6939e6c1cd_1
qcFlagsFOV <- sMetadata[which(sMetadata$qcFlagsFOV == "Fail"),]
qcFlagsCellCounts <- sMetadata[which(sMetadata$qcFlagsCellCounts == "Fail"),]
qcFlagsCellPropNeg <- sMetadata[which(sMetadata$qcFlagsCellPropNeg == "Fail"),]
qcFlagsCellArea <- sMetadata[which(sMetadata$qcFlagsCellArea == "Fail"),]
# qcFlagsRNACounts <- sMetadata[which(sMetadata$qcFlagsRNACounts == "Fail"),]
# qcFlagsCellComplex <- sMetadata[which(sMetadata$qcFlagsCellComplex == "Fail"),]
failMetadata <- sMetadata[Reduce(union, list(
which(sMetadata$qcFlagsRNACounts == "Fail"),
which(sMetadata$qcFlagsCellCounts == "Fail"),
which(sMetadata$qcFlagsCellPropNeg == "Fail"),
which(sMetadata$qcFlagsCellComplex == "Fail"),
which(sMetadata$qcFlagsCellArea == "Fail"),
which(sMetadata$qcFlagsFOV == "Fail"))),
]
qcDfL <- lapply(seq_along(rawFlatL), function(idx) {
slidename <- names(rawFlatL)[idx]
metadata <- metadataL[[idx]]
metadata$Filter <- "Included"
metadata$Filter[which(metadata$cell %in% rownames(failMetadata))] <- "Excluded"
qcDf <- data.frame(
id = metadata$cell,
Slide = slidename,
FOV = as.numeric(metadata$fov),
CellId = as.numeric(metadata$cell_ID),
nCount = as.numeric(metadata$nCount_RNA),
nFeature = as.numeric(metadata$nFeature_RNA),
Area = as.numeric(metadata$Area),
Group = metadata$Filter
)
return(qcDf)
})
qcDf <- do.call("rbind", qcDfL)
qcDf <- qcDf[order(qcDf$FOV, qcDf$CellId, qcDf$Slide),]
qcDf$FOV <- factor(qcDf$FOV, levels=sort(unique(qcDf$FOV)))
qcDf$Group <- factor(qcDf$Group, levels=c("Included", "Excluded"))
for (slide in names(slideCols)) {
subQcDf <- qcDf[which(qcDf$Slide == slide),]
subDfInc <- subQcDf[which(subQcDf$Group == "Included"),]
subDfExc <- subQcDf[which(subQcDf$Group == "Excluded"),]
subQcDf <- densityColors(df = subQcDf, cols = heatCols, option = 1)
par(mfrow=c(1, 2))
plot(subQcDf$nCount, subQcDf$nFeature, col = subQcDf$Col, xlab = "nCount", ylab = "nFeature", pch=20, xlim = c(0, max(qcDf$nCount)), ylim = c(0, max(qcDf$nFeature)), main=slide)
plot(subDfInc$nCount, subDfInc$nFeature, col = "grey60", xlab = "nCount", ylab = "nFeature", pch=20, xlim = c(0, max(qcDf$nCount)), ylim = c(0, max(qcDf$nFeature)), main="")
points(subDfExc$nCount, subDfExc$nFeature, col = "red", pch=20)
legend("topleft", legend=c("Included", "Excluded"), fill=c("grey60", "red"), bty="n")
}
for (slide in names(slideCols)) {
subQcDf <- qcDf[which(qcDf$Slide == slide),]
subDfInc <- subQcDf[which(subQcDf$Group == "Included"),]
subDfExc <- subQcDf[which(subQcDf$Group == "Excluded"),]
subQcDf <- densityColors(df = subQcDf, cols = heatCols, option = 1)
par(mfrow=c(1, 2))
plot(log10(subQcDf$nCount+1), log10(subQcDf$nFeature+1), col = subQcDf$Col, xlab = "nCount, log10", ylab = "nFeature, log10", pch=20, xlim = c(0, max(log10(qcDf$nCount+1))), ylim = c(0, max(log10(qcDf$nFeature+1))), main=slide)
plot(log10(subDfInc$nCount+1), log10(subDfInc$nFeature+1), col = "grey60", xlab = "nCount, log10", ylab = "nFeature, log10", pch=20, xlim = c(0, max(log10(qcDf$nCount+1))), ylim = c(0, max(log10(qcDf$nFeature+1))), main="")
points(log10(subDfExc$nCount+1), log10(subDfExc$nFeature+1), col = "red", pch=20)
legend("topleft", legend=c("Included", "Excluded"), fill=c("grey60", "red"), bty="n")
}
for (slide in names(slideCols)) {
subQcDf <- qcDf[which(qcDf$Slide == slide),]
subDfInc <- subQcDf[which(subQcDf$Group == "Included"),]
subDfExc <- subQcDf[which(subQcDf$Group == "Excluded"),]
subQcDf <- densityColors(df = subQcDf, cols = heatCols, option = 2)
par(mfrow=c(1, 2))
plot(subQcDf$nCount, subQcDf$Area, col = subQcDf$Col, xlab = "nCount", ylab = "Area", pch=20, xlim = c(0, max(qcDf$nCount)), ylim = c(min(qcDf$Area, na.rm=F), max(qcDf$Area)), main=slide)
plot(subDfInc$nCount, subDfInc$Area, col = "grey60", xlab = "nCount", ylab = "Area", pch=20, xlim = c(0, max(qcDf$nCount)), ylim = c(min(qcDf$Area, na.rm=F), max(qcDf$Area)), main="")
points(subDfExc$nCount, subDfExc$Area, col = "red", pch=20)
legend("topleft", legend=c("Included", "Excluded"), fill=c("grey60", "red"), bty="n")
}
for (slide in names(slideCols)) {
subQcDf <- qcDf[which(qcDf$Slide == slide),]
subDfInc <- subQcDf[which(subQcDf$Group == "Included"),]
subDfExc <- subQcDf[which(subQcDf$Group == "Excluded"),]
subQcDf <- densityColors(df = subQcDf, cols = heatCols, option = 2)
par(mfrow=c(1, 2))
plot(log10(subQcDf$nCount+1), log10(subQcDf$Area+1), col = subQcDf$Col, xlab = "nCount, log10", ylab = "Area, log10", pch=20, xlim = c(0, max(log10(qcDf$nCount+1))), ylim = c(min(log10(qcDf$Area+1), na.rm=F), max(log10(qcDf$Area+1))), main=slide)
plot(log10(subDfInc$nCount+1), log10(subDfInc$Area+1), col = "grey60", xlab = "nCount, log10", ylab = "Area, log10", pch=20, xlim = c(0, max(log10(qcDf$nCount+1))), ylim = c(min(log10(qcDf$Area+1), na.rm=F), max(log10(qcDf$Area+1))), main="")
points(log10(subDfExc$nCount+1), log10(subDfExc$Area+1), col = "red", pch=20)
legend("topleft", legend=c("Included", "Excluded"), fill=c("grey60", "red"), bty="n")
}
for (slide in names(slideCols)) {
subQcDf <- qcDf[which(qcDf$Slide == slide),]
subDfInc <- subQcDf[which(subQcDf$Group == "Included"),]
subDfExc <- subQcDf[which(subQcDf$Group == "Excluded"),]
subQcDf <- densityColors(df = subQcDf, cols = heatCols, option = 3)
par(mfrow=c(1, 2))
plot(subQcDf$nFeature, subQcDf$Area, col = subQcDf$Col, xlab = "nFeature", ylab = "Area", pch=20, xlim = c(0, max(qcDf$nFeature)), ylim = c(min(qcDf$Area, na.rm=F), max(qcDf$Area)), main=slide)
plot(subDfInc$nFeature, subDfInc$Area, col = "grey60", xlab = "nFeature", ylab = "Area", pch=20, xlim = c(0, max(qcDf$nFeature)), ylim = c(min(qcDf$Area, na.rm=F), max(qcDf$Area)), main="")
points(subDfExc$nFeature, subDfExc$Area, col = "red", pch=20)
legend("topleft", legend=c("Included", "Excluded"), fill=c("grey60", "red"), bty="n")
}
for (slide in names(slideCols)) {
subQcDf <- qcDf[which(qcDf$Slide == slide),]
subDfInc <- subQcDf[which(subQcDf$Group == "Included"),]
subDfExc <- subQcDf[which(subQcDf$Group == "Excluded"),]
subQcDf <- densityColors(df = subQcDf, cols = heatCols, option = 3)
par(mfrow=c(1, 2))
plot(log10(subQcDf$nFeature+1), log10(subQcDf$Area+1), col = subQcDf$Col, xlab = "nFeature, log10", ylab = "Area, log10", pch=20, xlim = c(0, max(log10(qcDf$nFeature+1))), ylim = c(min(log10(qcDf$Area+1), na.rm=F), max(log10(qcDf$Area+1))), main=slide)
plot(log10(subDfInc$nFeature+1), log10(subDfInc$Area+1), col = "grey60", xlab = "nFeature, log10", ylab = "Area, log10", pch=20, xlim = c(0, max(log10(qcDf$nFeature+1))), ylim = c(min(log10(qcDf$Area+1), na.rm=F), max(log10(qcDf$Area+1))), main="")
points(log10(subDfExc$nFeature+1), log10(subDfExc$Area+1), col = "red", pch=20)
legend("topleft", legend=c("Included", "Excluded"), fill=c("grey60", "red"), bty="n")
}