1 Introduction

This report includes stats and diagnostic plots of the Human Brain Nanostring GeoMx Digital Spatial (DSP) data.

Nanostring offers a set of R packages that enables comprehensive QC/analysis of the GeoMx DSP data and, visit the GeoMxTools vignettes for more information. Nanostring generously provided Spatial Organ Atlas (SOA) and the Human Brain dataset was used to generate the following results using an App from Docker Hub that was built to compile a set of packages to improve reproducibility and replicability.

2 Prerequisite

2.1 Environment

Software: R (v4.3.1)
Pakcages:

NanoStringNCTools (v1.8.0)
GeoMxWorkflows (v1.6.0)
GeomxTools (v3.4.0)
stringr (v1.5.1)
yaml (v2.3.7)
dplyr (v1.1.3)
scales (v1.2.1)
ggplot2 (v3.4.4)
ggrepel (v0.9.4)
ggsankey (v0.0.99999)
plotly (v4.10.4)
reshape2 (v1.4.4)
DescTools (v0.99.50)

2.2 GeoMx DSP Data

Sample, N=5:

Run date	Slide (Individual)	Sample	Autopsy date	Sex	Age
APR 30, 2021	hu_brain_001	81074A	JUN 19, 2017	M	90
JUN 21, 2021	hu_brain_002	81077A	JUL 05, 2017	M	70
JUN 21, 2021	hu_brain_003	81073A	APR 21, 2017	M	72
SEP 01, 2021	hu_brain_004a	532229A (Cortex)	SEP 04, 2020	M	75
SEP 01, 2021	hu_brain_004b	81084A (Hippocampus)	SEP 04, 2020	M	75

Ethnicity: Caucasian
Diagnosis: Non-disease normal tissue
Sample type: FFPE
Segment, N=5: Neuron ◼, Iba1 ◼, GFAP ◼, Neuropil ◼, Full ◼
Files: Annotation.xlsx, DCC files, and PKC file are under the Annot, DCC, and PKC folders, respectivley

WORKING_DIRECTORY
├── Annot
│   └── Annotation.xlsx
├── DCC
│   ├── DSP-*-?-???.dcc
└── PKC
    └── *.pkc

3 Preprocessing/QC

3.1 Preparation

Set working directory to load files

# Do not run
baseDir <- "WORKING_DIRECTORY" # where the Annot/DCC/PKC/Settings folders are located
setwd(baseDir)

library(NanoStringNCTools)
library(GeoMxWorkflows)
library(GeomxTools)
library(stringr)
library(yaml)
library(dplyr)
library(scales)
library(plotly)
library(ggplot2)
library(ggrepel)
library(ggsankey)
library(reshape2)
library(DescTools)

3.2 Compile a GeoMx object (Step 1)

Load DCC/PKC/Annotation files and export to a GeoMx object, e.g., geomxSet.RDS

dccFiles <- dir(file.path(baseDir, "DCC"), pattern = ".dcc$", full.names = TRUE, recursive = TRUE)
pkcFile <- dir(file.path(baseDir, "PKC"), pattern = ".pkc$", full.names = TRUE, recursive = TRUE)
annotFile <- dir(file.path(baseDir, "Annot"), pattern = ".xlsx$", full.names = TRUE, recursive = TRUE)
initSet <- readNanoStringGeoMxSet(
        dccFiles = dccFiles,
        pkcFiles = pkcFile,
        phenoDataFile = annotFile,
        phenoDataSheet = "Annotation",
        phenoDataDccColName = "FileName",
        protocolDataColNames = c("ROI", "AOI"),
        experimentDataColNames = c("Panel")
)

dim(initSet)

## Features  Samples 
##    18815      252

head(assayData(initSet)$exprs[, c(1:2)])

##            DSP-1009880000092-G-A02.dcc DSP-1009880000092-G-A03.dcc
## RTS0020877                          10                          23
## RTS0020878                           4                           4
## RTS0020879                           9                          10
## RTS0020880                           9                           9
## RTS0020881                          20                           5
## RTS0020882                           0                           4

3.3 Assign a pseudo value (Step 2)

Shift any expression counts with a value of zero (0) to one (1) to enable in downstream transformations

gSet <- shiftCountsOne(initSet, useDALogic = TRUE)
head(assayData(gSet)$exprs[, c(1:2)])

##            DSP-1009880000092-G-A02.dcc DSP-1009880000092-G-A03.dcc
## RTS0020877                          10                          23
## RTS0020878                           4                           4
## RTS0020879                           9                          10
## RTS0020880                           9                           9
## RTS0020881                          20                           5
## RTS0020882                           1                           4

3.4 Segment QC (Step 3)

Assess sequencing quality and adequate tissue sampling for every ROI/AOI segment
- Any segments below the dotted line, i.e., recommended thresholds, will be filtered out
- Row: Slide, Column: Segment

gSet <- setSegmentQCFlags(gSet, qcCutoffs = segmentQcParams)
qcMat <- sData(gSet)
qcMat$Segment <- factor(qcMat$Segment, levels = segmentOrder)

segmentQcResults <- protocolData(gSet)[["QCFlags"]]
flagColumns <- colnames(segmentQcResults)
qcSummary <- data.frame(Pass = colSums(!segmentQcResults[, flagColumns]), Warning = colSums(segmentQcResults[, flagColumns]))
segmentQcResults$QCStatus <- apply(segmentQcResults, 1L, function(x) {
        ifelse(sum(x) == 0L, "PASS", "WARNING")
})
qcSummary["TOTAL FLAGS", ] <- c(sum(segmentQcResults[, "QCStatus"] == "PASS"), sum(segmentQcResults[, "QCStatus"] == "WARNING"))
qcSummary[, "TOTAL"] <- apply(qcSummary, 1, sum)
qcSummary

##               Pass Warning TOTAL
## LowReads       252       0   252
## LowTrimmed     252       0   252
## LowStitched    251       1   252
## LowAligned     250       2   252
## LowSaturation  252       0   252
## LowNegatives   252       0   252
## LowNuclei      241      11   252
## LowArea        252       0   252
## TOTAL FLAGS    239      13   252

gSet <- gSet[, segmentQcResults$QCStatus == "PASS"]
dim(gSet)

## Features  Samples 
##    18815      239

3.4.1 Trimmed

histQC(qcMat, "Trimmed (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_trimmedThre, cols = segmentCols)

3.4.2 Stitched

histQC(qcMat, "Stitched (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_stitchedThre, cols = segmentCols)

3.4.3 Aligned

histQC(qcMat, "Aligned (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_alignedThre, cols = segmentCols)

3.4.4 Saturated

histQC(qcMat, "Saturated (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_saturatedThre, cols = segmentCols)

3.4.5 Area

histQC(qcMat, "area", segmentQC_colBy, segmentQC_rowBy, segmentQC_areaThre, "log10", "AOI Area (log10)", cols = segmentCols)

3.4.6 Nuclei count

histQC(qcMat, "nuclei", segmentQC_colBy, segmentQC_rowBy, segmentQC_nucleiThre, "log10", "AOI nuclei count", cols = segmentCols)

3.5 Background signal (Step 4)

Calculate negative count which is the geometric mean of the several unique negative probes in the GeoMx panel that do not target mRNA and establish the background count level per segment

negCol <- "NegGeoMean"

negativeGeoMeans <- esBy(
        negativeControlSubset(gSet),
        GROUP = "Module",
        FUN = function(x) {
                assayDataApply(x, MARGIN = 2, FUN = ngeoMean, elt = "exprs")
        }
)
protocolData(gSet)[[negCol]] <- negativeGeoMeans
pData(gSet)[, negCol] <- sData(gSet)[[negCol]]

backgrounMat <- sData(gSet)
backgrounMat <- backgrounMat[, c("NegGeoMean", segmentQC_colBy, segmentQC_rowBy)]
backgrounMat$Segment <- factor(backgrounMat$Segment, levels = segmentOrder)
histQC(backgrounMat, "NegGeoMean", segmentQC_colBy, segmentQC_rowBy, 2, "log10", "GeoMean(negative probes)", cols = segmentCols)

3.6 Probe-level QC (Step 5)

Remove low-performing probes. In short, this QC is an outlier removal process, whereby probes are either removed entirely from the study (global) or from specific segments (local)

gSet <- setBioProbeQCFlags(gSet, qcCutoffs = probeQcParams, removeLocalOutliers = TRUE)
probeQcResults <- fData(gSet)[["QCFlags"]]

qcDf <- data.frame(
        Passed = sum(rowSums(probeQcResults[, -1]) == 0),
        Global = sum(probeQcResults$GlobalGrubbsOutlier),
        Local = sum(rowSums(probeQcResults[, -2:-1]) > 0 & !probeQcResults$GlobalGrubbsOutlier),
        TOTAL = nrow(probeQcResults)
)
qcDf

##   Passed Global Local TOTAL
## 1  18805      1     9 18815

probeQcPassed <- subset(
        gSet,
        fData(gSet)[["QCFlags"]][, c("LowProbeRatio")] == FALSE &
                fData(gSet)[["QCFlags"]][, c("GlobalGrubbsOutlier")] == FALSE
)
gSet <- probeQcPassed
dim(gSet)

## Features  Samples 
##    18814      239

3.7 Gene-level aggregation (Step 6)

Generate a gene-level count matrix where the count for any gene with multiple probes per segment is calculated as the geometric mean of those probes

newSet <- aggregateCounts(gSet)
newSet <- subset(newSet, fData(newSet)$TargetName != "NegProbe-WTX")
dim(newSet)

## Features  Samples 
##    18676      239

3.8 Segment QC (Step 7)

3.8.1 Check the Area size and Nuclei count

Relationship between Area and Nuclei count and their distributions

lowLvqcDf <- data.frame(
        Slide = pData(newSet)$Slide,
        Sample = pData(newSet)$Library,
        Segment = pData(newSet)$Segment,
        Area = pData(newSet)$area,
        Nuclei = pData(newSet)$nuclei
)
lowLvqcDf$Slide <- factor(lowLvqcDf$Slide, levels=slideOrder)
lowLvqcDf$Segment <- factor(lowLvqcDf$Segment, levels=segmentOrder)

scatterPlot <- ggplot(data = lowLvqcDf, aes(x = Area, y = Nuclei, color = Segment, label = Sample, label2 = Slide)) +
        geom_point() +
        scale_color_manual(values = segmentCols) +
        labs(
                title = "",
                x = "Area", 
                y = "#Nuclei"
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
                text = element_text(size = 12)
        ) + 
        scale_x_continuous(trans = "log10") + 
        scale_y_continuous(trans = "log10")

ggplotly(scatterPlot)

3.8.1.1 Area size

ggplot(data = lowLvqcDf, aes(x = Segment, y = Area, fill = Segment)) +
        geom_violin(position = dodge, size = 0) +
        geom_boxplot(width = 0.1, position = dodge, fill="white") +
        scale_fill_manual(values = segmentCols) +
        facet_grid( ~ Slide) +
        labs(
                title = "",
                x = "", 
                y = "Area"
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0),
                legend.position = "none", 
                text = element_text(size = 12)
        ) + 
        scale_y_continuous(trans = "log10")

3.8.1.2 Nuclei count

ggplot(data = lowLvqcDf, aes(x = Segment, y = Nuclei, fill = Segment)) +
        geom_violin(position = dodge, size = 0) +
        geom_boxplot(width = 0.1, position = dodge, fill="white") +
        scale_fill_manual(values = segmentCols) +
        facet_grid( ~ Slide) +
        labs(
                title = "",
                x = "", 
                y = "#Nuclei"
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0),
                legend.position = "none", 
                text = element_text(size = 12)
        ) + 
        scale_y_continuous(trans = "log10")

3.8.2 Calculate LOQ per segment

Determine the limit of quantification (LOQ) per segment where the LOQ is calculated based on the distribution of negative control probes and is intended to approximate the quantifiable limit of gene expression per segment

loqDf <- data.frame(row.names = colnames(newSet))
module <- gsub(".pkc", "", annotation(gSet))
varNames <- paste0(c("NegGeoMean_", "NegGeoSD_"), module)
if (all(varNames[1:2] %in% colnames(pData(newSet)))) {
        loqDf[, module] <- pData(newSet)[, varNames[1]] * (pData(newSet)[, varNames[2]]^loqCutoff)
}

statDf <- data.frame(
        Slide = pData(newSet)$Slide,
        Sample = pData(newSet)$Sample,
        Library = protocolData(newSet)$AOI,
        Segment = pData(newSet)$Segment,
        LOQ = loqDf[, 1]
)
statShort <- dcast(statDf, Sample ~ Segment, fun.aggregate = mean, value.var = "LOQ")
perSample <- statShort[, c(2:ncol(statShort))]
rownames(perSample) <- as.character(statShort[, 1])
statShort <- data.frame(
        Sample = rownames(perSample),
        LOQmean = as.matrix(apply(perSample, 1, mean, na.rm = TRUE))
)
statShort <- statShort[order(statShort$LOQmean), ]
statDf$Sample <- factor(statDf$Sample, levels = statShort$Sample)
statDf$Segment <- factor(statDf$Segment, levels = segmentOrder)

3.8.2.1 Per Segment

ggplot(data = statDf, aes(x = Segment, y = log10(LOQ), fill = Segment)) +
        geom_violin(position = dodge, size = 0) +
        geom_boxplot(width = 0.1, position = dodge, fill = "white") +
        scale_fill_manual(values = segmentCols) +
        labs(
                title = "",
                x = "Segment",
                y = "LOQ, log10"
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
                legend.position = "none",
                text = element_text(size = 12)
        ) +
        geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")

3.8.2.2 Per Slide and Segment

ggplot(data = statDf, aes(x = Segment, y = log10(LOQ), fill = Segment)) +
        geom_violin(position = dodge, size = 0) +
        geom_boxplot(width = 0.1, position = dodge, fill = "white") +
        scale_fill_manual(values = segmentCols) +
        facet_grid(~Slide) +
        labs(
                title = "",
                x = "Segment",
                y = "LOQ, log10"
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
                legend.position = "none",
                text = element_text(size = 12)
        ) +
        geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")

3.8.2.3 Per Sample

ggplot(data = statDf, aes(x = Sample, y = log10(LOQ), group = Segment)) +
        geom_line(aes(color = Segment), lwd = 0.5) +
        geom_point(aes(color = Segment)) +
        scale_color_manual(values = segmentCols) +
        labs(
                title = "",
                x = "",
                y = "LOQ, log10"
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
                # legend.position = "none",
                text = element_text(size = 12)
        ) +
        scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) + 
        geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")

loqDf[loqDf < loqMin] <- loqMin
pData(newSet)$LOQ <- loqDf

loqMat <- t(esApply(newSet, MARGIN = 1, FUN = function(x) {
        x > LOQ[, module]
}))
loqMat <- loqMat[fData(newSet)$TargetName, ] # Ordering

3.8.3 Segment gene detection

Filter out segments with exceptionally low signal that have a small fraction of panel genes detected above the LOQ relative to the other segments in the study

pData(newSet)$GenesDetected <- colSums(loqMat, na.rm = TRUE)
pData(newSet)$GeneDetectionRate <- pData(newSet)$GenesDetected / nrow(newSet)
pData(newSet)$DetectionThreshold <- cut(pData(newSet)$GeneDetectionRate, breaks = geneDetectionRateBins, labels = geneDetectionRateBinLabels)

rateMat <- pData(newSet)
rateMat$Segment <- factor(rateMat$Segment, levels = segmentOrder)

ggplot(rateMat, aes(x = DetectionThreshold)) +
        geom_bar(aes(fill = Segment)) +
        geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5) +
        scale_fill_manual(values = segmentCols) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
                text = element_text(size = 12)
        ) +
        scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
        labs(
                x = "Gene Detection Rate",
                y = "Number of Segments/Samples",
                fill = "Segment"
        ) +
        facet_grid(as.formula(paste("~", segmentQC_rowBy)))

statDf <- data.frame(
        Sample = protocolData(newSet)$AOI,
        Segment = pData(newSet)$Segment,
        GenesDetected = colSums(loqMat, na.rm = TRUE),
        Nuclei = pData(newSet)$nuclei
)
statDf$Segment <- factor(statDf$Segment, segmentOrder)
newSet <- newSet[, pData(newSet)$GeneDetectionRate >= geneDetectionRateThre]
dim(newSet)

## Features  Samples 
##    18676      143

3.8.3.1 Neuron

segment <- segmentOrder[1]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
        geom_bar(stat = "identity") +
        scale_fill_manual(values = segmentCols[segment]) +
        theme_minimal() +
        labs(
                title = "",
                x = "Sample"
        ) +
        coord_flip() +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
                legend.position = "none"
        ) +
        scale_x_discrete(guide = guide_axis(check.overlap = TRUE))

3.8.3.2 Iba1

segment <- segmentOrder[2]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
        geom_bar(stat = "identity") +
        scale_fill_manual(values = segmentCols[segment]) +
        theme_minimal() +
        labs(
                title = "",
                x = "Sample"
        ) +
        coord_flip() +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
                legend.position = "none"
        ) +
        scale_x_discrete(guide = guide_axis(check.overlap = TRUE))

3.8.3.3 GFAP

segment <- segmentOrder[3]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
        geom_bar(stat = "identity") +
        scale_fill_manual(values = segmentCols[segment]) +
        theme_minimal() +
        labs(
                title = "",
                x = "Sample"
        ) +
        coord_flip() +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
                legend.position = "none"
        ) +
        scale_x_discrete(guide = guide_axis(check.overlap = TRUE))

3.8.3.4 Neuropil

segment <- segmentOrder[4]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
        geom_bar(stat = "identity") +
        scale_fill_manual(values = segmentCols[segment]) +
        theme_minimal() +
        labs(
                title = "",
                x = "Sample"
        ) +
        coord_flip() +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
                legend.position = "none"
        ) +
        scale_x_discrete(guide = guide_axis(check.overlap = TRUE))

3.8.3.5 Full

segment <- segmentOrder[5]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
        geom_bar(stat = "identity") +
        scale_fill_manual(values = segmentCols[segment]) +
        theme_minimal() +
        labs(
                title = "",
                x = "Sample"
        ) +
        coord_flip() +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
                legend.position = "none"
        ) +
        scale_x_discrete(guide = guide_axis(check.overlap = TRUE))

3.8.4 Gene detection rate

Determine the detection rate for genes across the study where individual genes are detected to varying degrees in the segments. In other words, we will calculate the total number of genes detected in different percentages of segments to filter out low detected genes

loqMat <- loqMat[, colnames(newSet)]
fData(newSet)$DetectedSegments <- rowSums(loqMat, na.rm = TRUE)
fData(newSet)$DetectionRate <- fData(newSet)$DetectedSegments / nrow(pData(newSet))

geneDetectionRateDf <- data.frame(
        Gene = rownames(newSet),
        Number = fData(newSet)$DetectedSegments,
        DetectionRate = percent(fData(newSet)$DetectionRate)
)
geneDetectionRateDf <- geneDetectionRateDf[order(geneDetectionRateDf$Number, geneDetectionRateDf$DetectionRate, geneDetectionRateDf$Gene), ]

finalSet <- newSet[fData(newSet)$DetectionRate >= geneDetectionRateThre, ]
dim(finalSet)

## Features  Samples 
##     9711      143

3.9 Normalization (Step 8)

Upper-quartile (Q3) normalization method estimates a normalization factor per segment to bring the segment data distributions together

finalSet <- normalize(finalSet, norm_method = "quant", desiredQuantile = .75, toElt = "q_norm")
annot <- pData(finalSet)

segmentIdx <- c()
for (segment in segmentOrder) segmentIdx <- c(segmentIdx, which(annot$Segment == segment))

rawMat <- assayData(finalSet)$exprs
colnames(rawMat) <- annot$Library
rawDf <- melt(rawMat)
colnames(rawDf) <- c("Gene", "Sample", "Expression")
rawDf$Segment <- rep(annot$Segment, each=nrow(rawMat))
rawDf$Segment <- factor(rawDf$Segment, levels = segmentOrder)

normMat <- assayData(finalSet)$q_norm
colnames(normMat) <- annot$Library
normDf <- melt(normMat)
colnames(normDf) <- c("Gene", "Sample", "Expression")
normDf$Segment <- rep(annot$Segment, each=nrow(rawMat))
normDf$Segment <- factor(normDf$Segment, levels = segmentOrder)

3.9.1 Per Segment

boxplot(log10(Expression) ~ Segment, data = rawDf, col = segmentCols, pch = 20, ylab = "Raw count, log10", xlab = "Segment")

boxplot(log10(Expression) ~ Segment, data = normDf, col = segmentCols, pch = 20, ylab = "Upper-quartile norm, log10", xlab = "Segment")

3.9.2 Per Sample

rawMat <- rawMat[, segmentIdx]
boxplot(log10(rawMat), col = segmentCols[annot$Segment[segmentIdx]], pch = 20, names = NA, xlab = "Sample", ylab = "Raw count, log10")

normMat <- normMat[, segmentIdx]
boxplot(log10(normMat), col = segmentCols[annot$Segment[segmentIdx]], pch = 20, names = NA, xlab = "Sample", ylab = "Upper-quartile norm, log10")

3.10 QC Summary

The number of features, i.e., probe or gene, and samples

Dataset	#Samples	#Features
Initial	252	18815
After QC (analysis-ready)	143	9711

Study design after QC

countDf <- pData(finalSet) %>% make_long(Slide, Segment, Region)
ggplot(countDf, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
        geom_sankey(flow.alpha = .6, node.color = "gray30") +
        geom_sankey_label(size = 3, color = "black", fill = "white") +
        scale_fill_viridis_d(option = "A", alpha = 0.95) +
        theme_sankey(base_size = 18) +
        labs(
                x = NULL, 
                y = NULL
        ) +
        theme_bw() +
        theme(
                axis.line = element_blank(),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
                axis.ticks.x = element_blank(),
                axis.text.y = element_blank(),
                axis.ticks.y = element_blank(),
                legend.position = "none", 
                text = element_text(size = 12)
        )

QC threshold values used in this report

Step	Filter name	Threshold	Description
SegmentQC	minSegmentReads	1000	Minimum number of reads
	percentTrimmed	80	Minimum % of reads trimmed
	percentStitched	80	Minimum % of reads stitched
	percentAligned	75	Minimum % of reads aligned
	percentSaturation	50	Minimum sequencing saturation (%)
	minNuclei	20	Minimum number of nuclei estimated
	minArea	1000	Minimum segment area
ProbeQC	minProbeRatio	0.1	Geometric mean of a given probe / geometric mean of all probe
	percentFailGrubbs	20	An outlier according to the Grubb’s test (%)
LOQ	loqCutoff	2	LOQ cut off value
	loqMin	2	LOQ minimum value
GeneQC	geneDetectionRateThre	0.05	Minimum gene detection rate

4 Analysis

4.1 Principal Component Analysis

Dimensionality reduction while preserving relevant information

normMat <- assayData(finalSet)$q_norm
pcaRes <- prcomp(t(normMat), scale = TRUE)
print(paste0("PC1-4 explained: ", paste0(round(summary(pcaRes)$importance[2, c(1:4)] * 100, 1), "%", collapse = ", ")))

## [1] "PC1-4 explained: 17.1%, 10.8%, 4.4%, 3.3%"

pcaDf <- data.frame(
        Sample = annot$Library,
        X = pcaRes$x[, 1],
        Y = pcaRes$x[, 2],
        Slide = annot$Slide,
        Segment = annot$Segment,
        Region = annot$Region
)
pcaDf$Slide <- factor(pcaDf$Slide, levels = slideOrder)
pcaDf$Segment <- factor(pcaDf$Segment, levels = segmentOrder)
pcaDf$Region <- factor(pcaDf$Region)

4.1.1 Slide

pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Slide, label = Sample)) +
        geom_point(size = 2, shape = 20) +
        scale_color_manual(values = slideCols) +
        labs(
                x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
                y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
                text = element_text(size = 12)
        )
ggplotly(pcaPlot)

4.1.2 Segment

pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Segment, label = Sample)) +
        geom_point(size = 2, shape = 20) +
        scale_color_manual(values = segmentCols) +
        labs(
                x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
                y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
                text = element_text(size = 12)
        )
ggplotly(pcaPlot)

4.1.3 Region

pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Region, label = Sample)) +
        geom_point(size = 2, shape = 20) +
        scale_color_manual(values = regionCols) +
        labs(
                x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
                y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
        ) +
        theme_bw() +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank(),
                axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
                text = element_text(size = 12)
        )
ggplotly(pcaPlot)

4.2 Differential Expression Analysis

Identify differential expressed genes between Cortex and Hippocampus in Iba1 (immune)

annot <- pData(finalSet)
exprMat <- assayData(finalSet)$q_norm

iba1Idx <- which(annot$Segment == "Iba1")
cortexIdx <- which(annot$Region == "Cortex")
hippoIdx <- which(annot$Region == "Hippocampus")

statDf <- data.frame(
        Gene = fData(finalSet)$TargetName,
        Log2FC = apply(exprMat, 1, log2fc, idx1 = intersect(iba1Idx, cortexIdx), idx2 = intersect(iba1Idx, hippoIdx)),
        P = apply(exprMat, 1, ttest, idx1 = intersect(iba1Idx, cortexIdx), idx2 = intersect(iba1Idx, hippoIdx))
)
statDf$FDR <- p.adjust(statDf$P, method="fdr")
statDf$DEG <- "Not"
statDf$DEG[statDf$Log2FC > 1 & statDf$P < 0.05] <- "Up"
statDf$DEG[statDf$Log2FC < -1 & statDf$P < 0.05] <- "Down"
statDf$DEG <- factor(statDf$DEG, levels=c("Up", "Not", "Down"))
statDf$Label <- statDf$Gene
statDf$Label[statDf$DEG == "Not"] <- NA
table(statDf$DEG)

## 
##   Up  Not Down 
##   69 9602   40

 ggplot(data = statDf, aes(x = Log2FC, y = -log10(P), col = DEG, label = Label)) +
        geom_point(cex = 3) +
        theme_minimal() +
        geom_text_repel(max.overlaps = 10) +
        geom_vline(xintercept = c(-1, 1), col = "grey40", lty = 2) +
        geom_hline(yintercept = -log10(0.05), col = "grey40", lty = 2) +
        scale_color_manual(
                name = "DEG", values = c("#AE3C32", "#F9FBCB", "#46639C"),
                labels = c("Up-reg. in Cortex", "Not significant", "Down-reg. in Cortex")
        ) +
        labs(
                title = "Cortex vs. Hippocampus in Iba1",
                subtitle = paste0("n(Cortex) = ", length(intersect(iba1Idx, cortexIdx)), "; n(Hippocampus) = ", length(intersect(iba1Idx, hippoIdx))),
                x = "Log2-Fold change",
                y = "-Log10(nominal P)"
        ) +
        theme(
                axis.line = element_line(colour = "black"),
                panel.grid.major = element_blank(),
                panel.grid.minor = element_blank(),
                panel.border = element_blank(),
                panel.background = element_blank()
        )

Hands-on Bioinformatics Workshop (HBW), Series 1

Spatial Transcriptomics Data Analysis

by Applied Spatial Omics Centre at University of Calgary