Preprocessing/QC
Preparation
- Set working directory to load files
# Do not run
baseDir <- "WORKING_DIRECTORY" # where the Annot/DCC/PKC/Settings folders are located
setwd(baseDir)
library(NanoStringNCTools)
library(GeoMxWorkflows)
library(GeomxTools)
library(stringr)
library(yaml)
library(dplyr)
library(scales)
library(plotly)
library(ggplot2)
library(ggrepel)
library(ggsankey)
library(reshape2)
library(DescTools)
Compile a GeoMx
object (Step 1)
- Load DCC/PKC/Annotation files and export to a GeoMx object,
e.g., geomxSet.RDS
dccFiles <- dir(file.path(baseDir, "DCC"), pattern = ".dcc$", full.names = TRUE, recursive = TRUE)
pkcFile <- dir(file.path(baseDir, "PKC"), pattern = ".pkc$", full.names = TRUE, recursive = TRUE)
annotFile <- dir(file.path(baseDir, "Annot"), pattern = ".xlsx$", full.names = TRUE, recursive = TRUE)
initSet <- readNanoStringGeoMxSet(
dccFiles = dccFiles,
pkcFiles = pkcFile,
phenoDataFile = annotFile,
phenoDataSheet = "Annotation",
phenoDataDccColName = "FileName",
protocolDataColNames = c("ROI", "AOI"),
experimentDataColNames = c("Panel")
)
dim(initSet)
## Features Samples
## 18815 252
head(assayData(initSet)$exprs[, c(1:2)])
## DSP-1009880000092-G-A02.dcc DSP-1009880000092-G-A03.dcc
## RTS0020877 10 23
## RTS0020878 4 4
## RTS0020879 9 10
## RTS0020880 9 9
## RTS0020881 20 5
## RTS0020882 0 4
Assign a pseudo value
(Step 2)
- Shift any expression counts with a value of zero (0) to one
(1) to enable in downstream transformations
gSet <- shiftCountsOne(initSet, useDALogic = TRUE)
head(assayData(gSet)$exprs[, c(1:2)])
## DSP-1009880000092-G-A02.dcc DSP-1009880000092-G-A03.dcc
## RTS0020877 10 23
## RTS0020878 4 4
## RTS0020879 9 10
## RTS0020880 9 9
## RTS0020881 20 5
## RTS0020882 1 4
Segment QC (Step 3)
- Assess sequencing quality and adequate tissue sampling for
every ROI/AOI segment
- Any segments below the dotted line, i.e., recommended thresholds,
will be filtered out
- Row: Slide, Column: Segment
gSet <- setSegmentQCFlags(gSet, qcCutoffs = segmentQcParams)
qcMat <- sData(gSet)
qcMat$Segment <- factor(qcMat$Segment, levels = segmentOrder)
segmentQcResults <- protocolData(gSet)[["QCFlags"]]
flagColumns <- colnames(segmentQcResults)
qcSummary <- data.frame(Pass = colSums(!segmentQcResults[, flagColumns]), Warning = colSums(segmentQcResults[, flagColumns]))
segmentQcResults$QCStatus <- apply(segmentQcResults, 1L, function(x) {
ifelse(sum(x) == 0L, "PASS", "WARNING")
})
qcSummary["TOTAL FLAGS", ] <- c(sum(segmentQcResults[, "QCStatus"] == "PASS"), sum(segmentQcResults[, "QCStatus"] == "WARNING"))
qcSummary[, "TOTAL"] <- apply(qcSummary, 1, sum)
qcSummary
## Pass Warning TOTAL
## LowReads 252 0 252
## LowTrimmed 252 0 252
## LowStitched 251 1 252
## LowAligned 250 2 252
## LowSaturation 252 0 252
## LowNegatives 252 0 252
## LowNuclei 241 11 252
## LowArea 252 0 252
## TOTAL FLAGS 239 13 252
gSet <- gSet[, segmentQcResults$QCStatus == "PASS"]
dim(gSet)
## Features Samples
## 18815 239
Trimmed
histQC(qcMat, "Trimmed (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_trimmedThre, cols = segmentCols)
Stitched
histQC(qcMat, "Stitched (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_stitchedThre, cols = segmentCols)
Aligned
histQC(qcMat, "Aligned (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_alignedThre, cols = segmentCols)
Saturated
histQC(qcMat, "Saturated (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_saturatedThre, cols = segmentCols)
Area
histQC(qcMat, "area", segmentQC_colBy, segmentQC_rowBy, segmentQC_areaThre, "log10", "AOI Area (log10)", cols = segmentCols)
Nuclei count
histQC(qcMat, "nuclei", segmentQC_colBy, segmentQC_rowBy, segmentQC_nucleiThre, "log10", "AOI nuclei count", cols = segmentCols)
Background signal
(Step 4)
- Calculate negative count which is the geometric mean of the
several unique negative probes in the GeoMx panel that do not target
mRNA and establish the background count level per segment
negCol <- "NegGeoMean"
negativeGeoMeans <- esBy(
negativeControlSubset(gSet),
GROUP = "Module",
FUN = function(x) {
assayDataApply(x, MARGIN = 2, FUN = ngeoMean, elt = "exprs")
}
)
protocolData(gSet)[[negCol]] <- negativeGeoMeans
pData(gSet)[, negCol] <- sData(gSet)[[negCol]]
backgrounMat <- sData(gSet)
backgrounMat <- backgrounMat[, c("NegGeoMean", segmentQC_colBy, segmentQC_rowBy)]
backgrounMat$Segment <- factor(backgrounMat$Segment, levels = segmentOrder)
histQC(backgrounMat, "NegGeoMean", segmentQC_colBy, segmentQC_rowBy, 2, "log10", "GeoMean(negative probes)", cols = segmentCols)
Probe-level QC (Step
5)
- Remove low-performing probes. In short, this QC is an
outlier removal process, whereby probes are either removed entirely from
the study (global) or from specific segments (local)
gSet <- setBioProbeQCFlags(gSet, qcCutoffs = probeQcParams, removeLocalOutliers = TRUE)
probeQcResults <- fData(gSet)[["QCFlags"]]
qcDf <- data.frame(
Passed = sum(rowSums(probeQcResults[, -1]) == 0),
Global = sum(probeQcResults$GlobalGrubbsOutlier),
Local = sum(rowSums(probeQcResults[, -2:-1]) > 0 & !probeQcResults$GlobalGrubbsOutlier),
TOTAL = nrow(probeQcResults)
)
qcDf
## Passed Global Local TOTAL
## 1 18805 1 9 18815
probeQcPassed <- subset(
gSet,
fData(gSet)[["QCFlags"]][, c("LowProbeRatio")] == FALSE &
fData(gSet)[["QCFlags"]][, c("GlobalGrubbsOutlier")] == FALSE
)
gSet <- probeQcPassed
dim(gSet)
## Features Samples
## 18814 239
Gene-level
aggregation (Step 6)
- Generate a gene-level count matrix where the count for any
gene with multiple probes per segment is calculated as the geometric
mean of those probes
newSet <- aggregateCounts(gSet)
newSet <- subset(newSet, fData(newSet)$TargetName != "NegProbe-WTX")
dim(newSet)
## Features Samples
## 18676 239
Segment QC (Step
7)
Check the Area size and Nuclei count
- Relationship between Area and Nuclei count and their
distributions
lowLvqcDf <- data.frame(
Slide = pData(newSet)$Slide,
Sample = pData(newSet)$Library,
Segment = pData(newSet)$Segment,
Area = pData(newSet)$area,
Nuclei = pData(newSet)$nuclei
)
lowLvqcDf$Slide <- factor(lowLvqcDf$Slide, levels=slideOrder)
lowLvqcDf$Segment <- factor(lowLvqcDf$Segment, levels=segmentOrder)
scatterPlot <- ggplot(data = lowLvqcDf, aes(x = Area, y = Nuclei, color = Segment, label = Sample, label2 = Slide)) +
geom_point() +
scale_color_manual(values = segmentCols) +
labs(
title = "",
x = "Area",
y = "#Nuclei"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10")
ggplotly(scatterPlot)
Area size
ggplot(data = lowLvqcDf, aes(x = Segment, y = Area, fill = Segment)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill="white") +
scale_fill_manual(values = segmentCols) +
facet_grid( ~ Slide) +
labs(
title = "",
x = "",
y = "Area"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0),
legend.position = "none",
text = element_text(size = 12)
) +
scale_y_continuous(trans = "log10")
Nuclei count
ggplot(data = lowLvqcDf, aes(x = Segment, y = Nuclei, fill = Segment)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill="white") +
scale_fill_manual(values = segmentCols) +
facet_grid( ~ Slide) +
labs(
title = "",
x = "",
y = "#Nuclei"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 0),
legend.position = "none",
text = element_text(size = 12)
) +
scale_y_continuous(trans = "log10")
Calculate LOQ per segment
- Determine the limit of quantification (LOQ) per segment
where the LOQ is calculated based on the distribution of negative
control probes and is intended to approximate the quantifiable limit of
gene expression per segment
loqDf <- data.frame(row.names = colnames(newSet))
module <- gsub(".pkc", "", annotation(gSet))
varNames <- paste0(c("NegGeoMean_", "NegGeoSD_"), module)
if (all(varNames[1:2] %in% colnames(pData(newSet)))) {
loqDf[, module] <- pData(newSet)[, varNames[1]] * (pData(newSet)[, varNames[2]]^loqCutoff)
}
statDf <- data.frame(
Slide = pData(newSet)$Slide,
Sample = pData(newSet)$Sample,
Library = protocolData(newSet)$AOI,
Segment = pData(newSet)$Segment,
LOQ = loqDf[, 1]
)
statShort <- dcast(statDf, Sample ~ Segment, fun.aggregate = mean, value.var = "LOQ")
perSample <- statShort[, c(2:ncol(statShort))]
rownames(perSample) <- as.character(statShort[, 1])
statShort <- data.frame(
Sample = rownames(perSample),
LOQmean = as.matrix(apply(perSample, 1, mean, na.rm = TRUE))
)
statShort <- statShort[order(statShort$LOQmean), ]
statDf$Sample <- factor(statDf$Sample, levels = statShort$Sample)
statDf$Segment <- factor(statDf$Segment, levels = segmentOrder)
Per Segment
ggplot(data = statDf, aes(x = Segment, y = log10(LOQ), fill = Segment)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill = "white") +
scale_fill_manual(values = segmentCols) +
labs(
title = "",
x = "Segment",
y = "LOQ, log10"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
legend.position = "none",
text = element_text(size = 12)
) +
geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")
Per Slide and
Segment
ggplot(data = statDf, aes(x = Segment, y = log10(LOQ), fill = Segment)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill = "white") +
scale_fill_manual(values = segmentCols) +
facet_grid(~Slide) +
labs(
title = "",
x = "Segment",
y = "LOQ, log10"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
legend.position = "none",
text = element_text(size = 12)
) +
geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")
Per Sample
ggplot(data = statDf, aes(x = Sample, y = log10(LOQ), group = Segment)) +
geom_line(aes(color = Segment), lwd = 0.5) +
geom_point(aes(color = Segment)) +
scale_color_manual(values = segmentCols) +
labs(
title = "",
x = "",
y = "LOQ, log10"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
# legend.position = "none",
text = element_text(size = 12)
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")
loqDf[loqDf < loqMin] <- loqMin
pData(newSet)$LOQ <- loqDf
loqMat <- t(esApply(newSet, MARGIN = 1, FUN = function(x) {
x > LOQ[, module]
}))
loqMat <- loqMat[fData(newSet)$TargetName, ] # Ordering
Segment gene detection
- Filter out segments with exceptionally low signal that have
a small fraction of panel genes detected above the LOQ relative to the
other segments in the study
pData(newSet)$GenesDetected <- colSums(loqMat, na.rm = TRUE)
pData(newSet)$GeneDetectionRate <- pData(newSet)$GenesDetected / nrow(newSet)
pData(newSet)$DetectionThreshold <- cut(pData(newSet)$GeneDetectionRate, breaks = geneDetectionRateBins, labels = geneDetectionRateBinLabels)
rateMat <- pData(newSet)
rateMat$Segment <- factor(rateMat$Segment, levels = segmentOrder)
ggplot(rateMat, aes(x = DetectionThreshold)) +
geom_bar(aes(fill = Segment)) +
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5) +
scale_fill_manual(values = segmentCols) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
text = element_text(size = 12)
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
labs(
x = "Gene Detection Rate",
y = "Number of Segments/Samples",
fill = "Segment"
) +
facet_grid(as.formula(paste("~", segmentQC_rowBy)))
statDf <- data.frame(
Sample = protocolData(newSet)$AOI,
Segment = pData(newSet)$Segment,
GenesDetected = colSums(loqMat, na.rm = TRUE),
Nuclei = pData(newSet)$nuclei
)
statDf$Segment <- factor(statDf$Segment, segmentOrder)
newSet <- newSet[, pData(newSet)$GeneDetectionRate >= geneDetectionRateThre]
dim(newSet)
## Features Samples
## 18676 143
Neuron
segment <- segmentOrder[1]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = segmentCols[segment]) +
theme_minimal() +
labs(
title = "",
x = "Sample"
) +
coord_flip() +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
legend.position = "none"
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
Iba1
segment <- segmentOrder[2]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = segmentCols[segment]) +
theme_minimal() +
labs(
title = "",
x = "Sample"
) +
coord_flip() +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
legend.position = "none"
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
GFAP
segment <- segmentOrder[3]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = segmentCols[segment]) +
theme_minimal() +
labs(
title = "",
x = "Sample"
) +
coord_flip() +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
legend.position = "none"
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
Neuropil
segment <- segmentOrder[4]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = segmentCols[segment]) +
theme_minimal() +
labs(
title = "",
x = "Sample"
) +
coord_flip() +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
legend.position = "none"
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
Full
segment <- segmentOrder[5]
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$Sample <- factor(tmpDf$Sample, levels = tmpDf$Sample)
ggplot(tmpDf, aes(x = Sample, y = GenesDetected, fill = Segment)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = segmentCols[segment]) +
theme_minimal() +
labs(
title = "",
x = "Sample"
) +
coord_flip() +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
legend.position = "none"
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
Gene detection
rate
- Determine the detection rate for genes across the study
where individual genes are detected to varying degrees in the segments.
In other words, we will calculate the total number of genes detected in
different percentages of segments to filter out low detected
genes
loqMat <- loqMat[, colnames(newSet)]
fData(newSet)$DetectedSegments <- rowSums(loqMat, na.rm = TRUE)
fData(newSet)$DetectionRate <- fData(newSet)$DetectedSegments / nrow(pData(newSet))
geneDetectionRateDf <- data.frame(
Gene = rownames(newSet),
Number = fData(newSet)$DetectedSegments,
DetectionRate = percent(fData(newSet)$DetectionRate)
)
geneDetectionRateDf <- geneDetectionRateDf[order(geneDetectionRateDf$Number, geneDetectionRateDf$DetectionRate, geneDetectionRateDf$Gene), ]
finalSet <- newSet[fData(newSet)$DetectionRate >= geneDetectionRateThre, ]
dim(finalSet)
## Features Samples
## 9711 143
Normalization (Step 8)
- Upper-quartile (Q3) normalization method estimates a
normalization factor per segment to bring the segment data distributions
together
finalSet <- normalize(finalSet, norm_method = "quant", desiredQuantile = .75, toElt = "q_norm")
annot <- pData(finalSet)
segmentIdx <- c()
for (segment in segmentOrder) segmentIdx <- c(segmentIdx, which(annot$Segment == segment))
rawMat <- assayData(finalSet)$exprs
colnames(rawMat) <- annot$Library
rawDf <- melt(rawMat)
colnames(rawDf) <- c("Gene", "Sample", "Expression")
rawDf$Segment <- rep(annot$Segment, each=nrow(rawMat))
rawDf$Segment <- factor(rawDf$Segment, levels = segmentOrder)
normMat <- assayData(finalSet)$q_norm
colnames(normMat) <- annot$Library
normDf <- melt(normMat)
colnames(normDf) <- c("Gene", "Sample", "Expression")
normDf$Segment <- rep(annot$Segment, each=nrow(rawMat))
normDf$Segment <- factor(normDf$Segment, levels = segmentOrder)
Per Segment
boxplot(log10(Expression) ~ Segment, data = rawDf, col = segmentCols, pch = 20, ylab = "Raw count, log10", xlab = "Segment")
boxplot(log10(Expression) ~ Segment, data = normDf, col = segmentCols, pch = 20, ylab = "Upper-quartile norm, log10", xlab = "Segment")
Per Sample
rawMat <- rawMat[, segmentIdx]
boxplot(log10(rawMat), col = segmentCols[annot$Segment[segmentIdx]], pch = 20, names = NA, xlab = "Sample", ylab = "Raw count, log10")
normMat <- normMat[, segmentIdx]
boxplot(log10(normMat), col = segmentCols[annot$Segment[segmentIdx]], pch = 20, names = NA, xlab = "Sample", ylab = "Upper-quartile norm, log10")
QC Summary
- The number of features, i.e., probe or gene, and
samples
Initial |
252 |
18815 |
After QC (analysis-ready) |
143 |
9711 |
countDf <- pData(finalSet) %>% make_long(Slide, Segment, Region)
ggplot(countDf, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_sankey(flow.alpha = .6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "black", fill = "white") +
scale_fill_viridis_d(option = "A", alpha = 0.95) +
theme_sankey(base_size = 18) +
labs(
x = NULL,
y = NULL
) +
theme_bw() +
theme(
axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none",
text = element_text(size = 12)
)
- QC threshold values used in this report
SegmentQC |
minSegmentReads |
1000 |
Minimum number of reads |
|
percentTrimmed |
80 |
Minimum % of reads trimmed |
|
percentStitched |
80 |
Minimum % of reads stitched |
|
percentAligned |
75 |
Minimum % of reads aligned |
|
percentSaturation |
50 |
Minimum sequencing saturation (%) |
|
minNuclei |
20 |
Minimum number of nuclei estimated |
|
minArea |
1000 |
Minimum segment area |
ProbeQC |
minProbeRatio |
0.1 |
Geometric mean of a given probe / geometric mean of all
probe |
|
percentFailGrubbs |
20 |
An outlier according to the Grubb’s test (%) |
LOQ |
loqCutoff |
2 |
LOQ cut off value |
|
loqMin |
2 |
LOQ minimum value |
GeneQC |
geneDetectionRateThre |
0.05 |
Minimum gene detection rate |
Analysis
Principal Component Analysis
- Dimensionality reduction while preserving relevant
information
normMat <- assayData(finalSet)$q_norm
pcaRes <- prcomp(t(normMat), scale = TRUE)
print(paste0("PC1-4 explained: ", paste0(round(summary(pcaRes)$importance[2, c(1:4)] * 100, 1), "%", collapse = ", ")))
## [1] "PC1-4 explained: 17.1%, 10.8%, 4.4%, 3.3%"
pcaDf <- data.frame(
Sample = annot$Library,
X = pcaRes$x[, 1],
Y = pcaRes$x[, 2],
Slide = annot$Slide,
Segment = annot$Segment,
Region = annot$Region
)
pcaDf$Slide <- factor(pcaDf$Slide, levels = slideOrder)
pcaDf$Segment <- factor(pcaDf$Segment, levels = segmentOrder)
pcaDf$Region <- factor(pcaDf$Region)
Slide
pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Slide, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = slideCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
Segment
pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Segment, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = segmentCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
Region
pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Region, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = regionCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
Differential
Expression Analysis
- Identify differential expressed genes between Cortex and Hippocampus
in Iba1 (immune)
annot <- pData(finalSet)
exprMat <- assayData(finalSet)$q_norm
iba1Idx <- which(annot$Segment == "Iba1")
cortexIdx <- which(annot$Region == "Cortex")
hippoIdx <- which(annot$Region == "Hippocampus")
statDf <- data.frame(
Gene = fData(finalSet)$TargetName,
Log2FC = apply(exprMat, 1, log2fc, idx1 = intersect(iba1Idx, cortexIdx), idx2 = intersect(iba1Idx, hippoIdx)),
P = apply(exprMat, 1, ttest, idx1 = intersect(iba1Idx, cortexIdx), idx2 = intersect(iba1Idx, hippoIdx))
)
statDf$FDR <- p.adjust(statDf$P, method="fdr")
statDf$DEG <- "Not"
statDf$DEG[statDf$Log2FC > 1 & statDf$P < 0.05] <- "Up"
statDf$DEG[statDf$Log2FC < -1 & statDf$P < 0.05] <- "Down"
statDf$DEG <- factor(statDf$DEG, levels=c("Up", "Not", "Down"))
statDf$Label <- statDf$Gene
statDf$Label[statDf$DEG == "Not"] <- NA
table(statDf$DEG)
##
## Up Not Down
## 69 9602 40
ggplot(data = statDf, aes(x = Log2FC, y = -log10(P), col = DEG, label = Label)) +
geom_point(cex = 3) +
theme_minimal() +
geom_text_repel(max.overlaps = 10) +
geom_vline(xintercept = c(-1, 1), col = "grey40", lty = 2) +
geom_hline(yintercept = -log10(0.05), col = "grey40", lty = 2) +
scale_color_manual(
name = "DEG", values = c("#AE3C32", "#F9FBCB", "#46639C"),
labels = c("Up-reg. in Cortex", "Not significant", "Down-reg. in Cortex")
) +
labs(
title = "Cortex vs. Hippocampus in Iba1",
subtitle = paste0("n(Cortex) = ", length(intersect(iba1Idx, cortexIdx)), "; n(Hippocampus) = ", length(intersect(iba1Idx, hippoIdx))),
x = "Log2-Fold change",
y = "-Log10(nominal P)"
) +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()
)