Preprocessing/QC
Preparation
- Set working directory to load files
# Do not run
baseDir <- "WORKING_DIRECTORY" # where the Annot/DCC/PKC/Settings folders are located
setwd(baseDir)
library(NanoStringNCTools)
library(GeoMxWorkflows)
library(GeomxTools)
library(stringr)
library(dplyr)
library(scales)
library(ggplot2)
library(ggpubr)
library(ggsankey)
library(plotly)
library(reshape2)
library(DESeq2)
library(DescTools)
Compile a GeoMx
object (Step 1)
- Load DCC/PKC/Annotation files and export to a GeoMx object,
e.g., geomxSet.RDS
dccFiles <- dir(file.path(baseDir, "DCC"), pattern = ".dcc$", full.names = TRUE, recursive = TRUE)
pkcFile <- dir(file.path(baseDir, "PKC"), pattern = ".pkc$", full.names = TRUE, recursive = TRUE)
annotFile <- dir(file.path(baseDir, "Annot"), pattern = ".xlsx$", full.names = TRUE, recursive = TRUE)
initSet <- readNanoStringGeoMxSet(
dccFiles = dccFiles,
pkcFiles = pkcFile,
phenoDataFile = annotFile,
phenoDataSheet = "Female",
phenoDataDccColName = "FileName",
protocolDataColNames = c("ROI", "AOI"),
experimentDataColNames = c("Panel")
)
dim(initSet)
## Features Samples
## 20175 46
assayData(initSet)$exprs[c(1:5), c(40:41)]
## DSP-1001660023394-A-H06.dcc DSP-1001660023394-A-H07.dcc
## RTS0060887 0 17
## RTS0060888 1 11
## RTS0060889 0 25
## RTS0060890 2 34
## RTS0060891 0 21
Assign a pseudo value
(Step 2)
- Shift any expression counts with a value of zero (0) to one
(1) to enable in downstream transformations
gSet <- shiftCountsOne(initSet, useDALogic = TRUE)
assayData(gSet)$exprs[c(1:5), c(40:41)]
## DSP-1001660023394-A-H06.dcc DSP-1001660023394-A-H07.dcc
## RTS0060887 1 17
## RTS0060888 1 11
## RTS0060889 1 25
## RTS0060890 2 34
## RTS0060891 1 21
Segment QC (Step
3)
- Assess sequencing quality and adequate tissue sampling for
every ROI/AOI segment
- Any segments below the dotted line, i.e., recommended thresholds,
will be filtered out
- Row: Model, Column: Segment
module <- gsub(".pkc", "", annotation(gSet))
gSet <- setSegmentQCFlags(gSet, qcCutoffs = segmentQcParams)
qcMat <- sData(gSet)
qcMat$Segment <- factor(qcMat$Segment, levels = segmentOrder)
qcMat$Model <- factor(qcMat$Model, levels = modelOrder)
suppressWarnings(histQC(qcMat, "Trimmed (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_trimmedThre, cols = modelCols))

suppressWarnings(histQC(qcMat, "Stitched (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_stitchedThre, cols = modelCols))

suppressWarnings(histQC(qcMat, "Aligned (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_alignedThre, cols = modelCols))

suppressWarnings(histQC(qcMat, "Saturated (%)", segmentQC_colBy, segmentQC_rowBy, segmentQC_saturatedThre, cols = modelCols))

suppressWarnings(histQC(qcMat, "area", segmentQC_colBy, segmentQC_rowBy, segmentQC_areaThre, "log10", "AOI Area (log10)", cols = modelCols))

suppressWarnings(histQC(qcMat, "nuclei", segmentQC_colBy, segmentQC_rowBy, segmentQC_nucleiThre, "log10", "AOI nuclei count", cols = modelCols))

segmentQcResults <- protocolData(gSet)[["QCFlags"]]
flagColumns <- colnames(segmentQcResults)
qcSummary <- data.frame(Pass = colSums(!segmentQcResults[, flagColumns]), Warning = colSums(segmentQcResults[, flagColumns]))
segmentQcResults$QCStatus <- apply(segmentQcResults, 1L, function(x) {
ifelse(sum(x) == 0L, "PASS", "WARNING")
})
qcSummary["TOTAL FLAGS", ] <- c(sum(segmentQcResults[, "QCStatus"] == "PASS"), sum(segmentQcResults[, "QCStatus"] == "WARNING"))
qcSummary[, "TOTAL"] <- apply(qcSummary, 1, sum)
qcSummary
## Pass Warning TOTAL
## LowReads 46 0 46
## LowTrimmed 46 0 46
## LowStitched 46 0 46
## LowAligned 46 0 46
## LowSaturation 46 0 46
## LowNegatives 46 0 46
## LowNuclei 46 0 46
## LowArea 45 1 46
## TOTAL FLAGS 45 1 46
gSet <- gSet[, segmentQcResults$QCStatus == "PASS"]
dim(gSet)
## Features Samples
## 20175 45
Background signal
(Step 4)
- Calculate negative count which is the geometric mean of the
several unique negative probes in the GeoMx panel that do not target
mRNA and establish the background count level per segment
negCol <- "NegGeoMean"
negativeGeoMeans <- esBy(
negativeControlSubset(gSet),
GROUP = "Module",
FUN = function(x) {
assayDataApply(x, MARGIN = 2, FUN = ngeoMean, elt = "exprs")
}
)
protocolData(gSet)[[negCol]] <- negativeGeoMeans
pData(gSet)[, negCol] <- sData(gSet)[[negCol]]
backgrounMat <- sData(gSet)
backgrounMat <- backgrounMat[, c("NegGeoMean", segmentQC_colBy, segmentQC_rowBy)]
backgrounMat$Segment <- factor(backgrounMat$Segment, levels = segmentOrder)
backgrounMat$Model <- factor(backgrounMat$Model, levels = modelOrder)
suppressWarnings(histQC(backgrounMat, "NegGeoMean", segmentQC_colBy, segmentQC_rowBy, loqCutoff, "log10", "GeoMean(negative probes)", cols = modelCols))

Probe-level QC (Step
5)
- Remove low-performing probes. In short, this QC is an
outlier removal process, whereby probes are either removed entirely from
the study (global) or from specific segments (local)
gSet <- setBioProbeQCFlags(gSet, qcCutoffs = probeQcParams, removeLocalOutliers = FALSE)
probeQcResults <- fData(gSet)[["QCFlags"]]
qcDf <- data.frame(
Global_outlier = length(which(fData(gSet)[["QCFlags"]][, c("GlobalGrubbsOutlier")] == TRUE)),
Local_outlier = length(which(fData(gSet)[["QCFlags"]][, c("LowProbeRatio")] == TRUE))
)
qcDf
## Global_outlier Local_outlier
## 1 0 0
probeQcPassed <- subset(
gSet,
fData(gSet)[["QCFlags"]][, c("LowProbeRatio")] == FALSE &
fData(gSet)[["QCFlags"]][, c("GlobalGrubbsOutlier")] == FALSE
)
gSet <- probeQcPassed
dim(gSet)
## Features Samples
## 20175 45
Gene-level
aggregation (Step 6)
- Generate a gene-level count matrix where the count for any
gene with multiple probes per segment is calculated as the geometric
mean of those probes
newSet <- aggregateCounts(gSet)
newSet <- subset(newSet, fData(newSet)$TargetName != "NegProbe-WTX") # Remove negative probe in downstream analysis
dim(newSet)
## Features Samples
## 19962 45
Segment QC (Step
7)
Relationship
between Area and Number of Nuclei
- Check the AOI area and the number of nuclei
estimated
lowLvqcDf <- data.frame(
Group = pData(newSet)$Group,
Model = pData(newSet)$Model,
Library = pData(newSet)$Library,
Segment = pData(newSet)$Segment,
Area = pData(newSet)$area,
Nuclei = pData(newSet)$nuclei
)
lowLvqcDf$Group <- factor(lowLvqcDf$Group, levels=groupOrder)
lowLvqcDf$Model <- factor(lowLvqcDf$Model, levels=modelOrder)
lowLvqcDf$Segment <- factor(lowLvqcDf$Segment, levels=segmentOrder)
dodge <- position_dodge(width = 0.5)
suppressWarnings({
ggplot(data = lowLvqcDf, aes(x = Model, y = Area, fill = Model)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill="white") +
scale_fill_manual(values = modelCols) +
labs(
title = "",
x = "",
y = "Area"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
legend.position = "none",
text = element_text(size = 12)
) +
scale_y_continuous(trans = "log10")
})

suppressWarnings({
ggplot(data = lowLvqcDf, aes(x = Model, y = Nuclei, fill = Model)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill="white") +
scale_fill_manual(values = modelCols) +
labs(
title = "",
x = "",
y = "#Nuclei"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
legend.position = "none",
text = element_text(size = 12)
) +
scale_y_continuous(trans = "log10")
})

suppressWarnings({
scatterPlot <- ggplot(data = lowLvqcDf, aes(x = Area, y = Nuclei, color = Group, label = Group, label2 = Library)) +
geom_point() +
scale_color_manual(values = groupCols) +
labs(
title = "",
x = "Area",
y = "#Nuclei"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10")
})
ggplotly(scatterPlot)
suppressWarnings({
scatterPlot <- ggplot(data = lowLvqcDf, aes(x = Area, y = Nuclei, color = Model, label = Model, label2 = Library)) +
geom_point() +
scale_color_manual(values = modelCols) +
labs(
title = "",
x = "Area",
y = "#Nuclei"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10")
})
ggplotly(scatterPlot)
suppressWarnings({
scatterPlot <- ggplot(data = lowLvqcDf, aes(x = Area, y = Nuclei, color = Segment, label = Segment, label2 = Library)) +
geom_point() +
scale_color_manual(values = segmentCols) +
labs(
title = "",
x = "Area",
y = "#Nuclei"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
) +
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10")
})
ggplotly(scatterPlot)
Calculate LOQ
(limit of quantification)
- Determine the LOQ where the LOQ is calculated based on the
distribution of negative control probes and is intended to approximate
the quantifiable limit of gene expression per segment
loqDf <- data.frame(row.names = colnames(newSet))
varNames <- paste0(c("NegGeoMean_", "NegGeoSD_"), module)
if (all(varNames[1:2] %in% colnames(pData(newSet)))) {
loqDf[, module] <- pData(newSet)[, varNames[1]] * (pData(newSet)[, varNames[2]]^loqCutoff)
}
statDf <- data.frame(
Model = pData(newSet)$Model,
Library = protocolData(newSet)$AOI,
Segment = pData(newSet)$Segment,
LOQ = loqDf[, 1]
)
statDf <- statDf[order(statDf$LOQ), ]
statDf$Model <- factor(statDf$Model, levels=modelOrder)
statDf$Segment <- factor(statDf$Segment, levels = segmentOrder)
statDf$Library <- factor(statDf$Library, levels = statDf$Library)
dodge <- position_dodge(width = 0.5)
suppressWarnings({
ggplot(data = statDf, aes(x = Model, y = log10(LOQ), fill = Model)) +
geom_violin(position = dodge, size = 0) +
geom_boxplot(width = 0.1, position = dodge, fill = "white") +
scale_fill_manual(values = modelCols) +
labs(
title = "",
x = "",
y = "LOQ, log10"
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
legend.position = "none",
text = element_text(size = 12)
) +
geom_hline(aes(yintercept = log10(loqMin)), lty = 2, col = "grey50")
})

loqDf[loqDf < loqMin] <- loqMin
pData(newSet)$LOQ <- loqDf
loqMat <- t(esApply(newSet, MARGIN = 1, FUN = function(x) {
x > LOQ[, module]
}))
loqMat <- loqMat[fData(newSet)$TargetName, ] # Ordering
Segment gene
detection
- Filter out segments with exceptionally low signal that have
a small fraction of panel genes detected above the LOQ relative to the
other segments in the study
pData(newSet)$GenesDetected <- colSums(loqMat, na.rm = TRUE)
pData(newSet)$GeneDetectionRate <- pData(newSet)$GenesDetected / nrow(newSet)
pData(newSet)$DetectionThreshold <- cut(pData(newSet)$GeneDetectionRate, breaks = geneDetectionRateBins, labels = geneDetectionRateBinLabels)
rateMat <- pData(newSet)
rateMat$Model <- factor(rateMat$Model, levels = modelOrder)
rateMat$Segment <- factor(rateMat$Segment, levels = segmentOrder)
suppressWarnings({
ggplot(rateMat, aes(x = DetectionThreshold)) +
geom_bar(aes(fill = Segment)) +
geom_text(stat = "count", aes(label = after_stat(count)), vjust = -0.5) +
scale_fill_manual(values = segmentCols) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0, hjust = 0),
text = element_text(size = 12),
legend.position = "none"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
labs(
x = "Gene Detection Rate",
y = "Number of Segments"
) +
facet_grid(as.formula(paste("~", segmentQC_colBy)))
})

statDf <- data.frame(
Model = pData(newSet)$Model,
AOI = protocolData(newSet)$AOI,
Segment = pData(newSet)$Segment,
GenesDetected = colSums(loqMat, na.rm = TRUE),
Nuclei = pData(newSet)$nuclei
)
statDf$Model <- factor(statDf$Model, modelOrder)
statDf$Segment <- factor(statDf$Segment, segmentOrder)
for (segment in segmentOrder) {
tmpDf <- statDf[which(statDf$Segment == segment), ]
tmpDf <- tmpDf[order(tmpDf$GenesDetected, decreasing = F), ]
tmpDf$AOI <- factor(tmpDf$AOI, levels = tmpDf$AOI)
barplot <- ggplot(tmpDf, aes(x = AOI, y = GenesDetected, fill = Model)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = modelCols) +
theme_minimal() +
labs(
title = "",
x = "AOI"
) +
coord_flip() +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0),
legend.position = "none"
) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE))
print(barplot)
rm(barplot)
}




newSet <- newSet[, pData(newSet)$GeneDetectionRate >= geneDetectionRateThre]
dim(newSet)
## Features Samples
## 19962 26
Gene detection
rate
- Determine the detection rate for genes across the study
where individual genes are detected to varying degrees in the segments.
In other words, we will calculate the total number of genes detected in
different percentages of segments to filter out low detected
genes
loqMat <- loqMat[, colnames(newSet)]
fData(newSet)$DetectedSegments <- rowSums(loqMat, na.rm = TRUE)
fData(newSet)$DetectionRate <- fData(newSet)$DetectedSegments / nrow(pData(newSet))
geneDetectionRateDf <- data.frame(
Gene = rownames(newSet),
Number = fData(newSet)$DetectedSegments,
DetectionRate = percent(fData(newSet)$DetectionRate)
)
geneDetectionRateDf <- geneDetectionRateDf[order(geneDetectionRateDf$Number, geneDetectionRateDf$DetectionRate, geneDetectionRateDf$Gene), ]
finalSet <- newSet[fData(newSet)$DetectionRate >= geneDetectionRateThre, ]
dim(finalSet)
## Features Samples
## 7276 26
Normalization (Step
8)
annot <- pData(finalSet)
segmentIdx <- c()
for (segment in segmentOrder) segmentIdx <- c(segmentIdx, which(annot$Segment == segment))
rawMat <- assayData(finalSet)$exprs
colnames(rawMat) <- annot$Library
rawDf <- melt(rawMat)
colnames(rawDf) <- c("Gene", "Slide", "Expression")
rawDf$Segment <- rep(annot$Segment, each=nrow(rawMat))
rawDf$Segment <- factor(rawDf$Segment, levels = segmentOrder)
boxplot(log10(rawMat[, segmentIdx] + 1), col = segmentCols[annot$Segment][segmentIdx], names = sapply(str_split(colnames(rawMat), "-"), "[[", 2)[segmentIdx], pch = 20, xlab = "", ylab = "Raw count, log10", las = 3)

- Apply the variance stabilizing transformation (VST) method
via DESeq2 for normalization
coldata <- annot[, c("Group", "Segment")]
coldata$Group <- factor(coldata$Group, levels = groupOrder)
rownames(coldata) <- annot$Library
threeGenes <- c("D830030K20Rik", "LOC118568634", "Gm10406") # decimal point introduces when probes map to a gene
for (gene in threeGenes) {
exprOne <- rawMat[which(rownames(rawMat) == gene),]
exprOne <- round(exprOne)
rawMat[which(rownames(rawMat) == gene),] <- exprOne
} # DESeq2 takes only integers
dds <- DESeqDataSetFromMatrix(countData = rawMat, colData = coldata, design = ~Group)
vsd <- varianceStabilizingTransformation(dds, fitType = "mean") # VST
vstMat <- assay(vsd)
colnames(vstMat) <- rownames(annot)
assayDataElement(finalSet, "vst", validate=TRUE) <- vstMat
saveRDS(finalSet, file.path(outDir, "02_finalSet.GeoMx.RDS"))
normMat <- assayData(finalSet)$vst
colnames(normMat) <- annot$Library
normDf <- melt(normMat)
colnames(normDf) <- c("Gene", "Slide", "Expression")
normDf$Segment <- rep(annot$Segment, each=nrow(rawMat))
normDf$Segment <- factor(normDf$Segment, levels = segmentOrder)
boxplot(log10(normMat[, segmentIdx] + 1), col = segmentCols[annot$Segment][segmentIdx], names = sapply(str_split(colnames(rawMat), "-"), "[[", 2)[segmentIdx], pch = 20, xlab = "", ylab = "Upper-quartile norm, log10", las=3)

Summarize the
information content
- Principal Component Analysis
annot <- pData(finalSet)
exprMat <- assayData(finalSet)$vst
pcaRes <- prcomp(t(exprMat), scale = TRUE)
pcaDf <- data.frame(
Sample = annot$Library,
X = pcaRes$x[, 1],
Y = pcaRes$x[, 2],
Group = annot$Group,
Model = annot$Model,
Segment = annot$Segment
)
pcaDf$Group <- factor(pcaDf$Group, levels = groupOrder)
pcaDf$Model <- factor(pcaDf$Model, levels = modelOrder)
pcaDf$Segment <- factor(pcaDf$Segment, levels = segmentOrder)
beforeMat <- assayData(finalSet)$exprs
beforePcaRes <- prcomp(t(beforeMat), scale = TRUE)
beforePcaDf <- data.frame(
Sample = annot$Library,
X = beforePcaRes$x[, 1],
Y = beforePcaRes$x[, 2],
Group = annot$Group,
Model = annot$Model,
Segment = annot$Segment
)
beforePcaDf$Group <- factor(beforePcaDf$Group, levels = groupOrder)
beforePcaDf$Model <- factor(beforePcaDf$Model, levels = modelOrder)
beforePcaDf$Segment <- factor(beforePcaDf$Segment, levels = segmentOrder)
Group
After VST
suppressWarnings({
pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Group, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = groupCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
})
Before VST
suppressWarnings({
pcaPlot <- ggplot(data = beforePcaDf, aes(x = X, y = Y, color = Group, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = groupCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
})
Model
After VST
suppressWarnings({
pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Model, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = modelCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
})
Before VST
suppressWarnings({
pcaPlot <- ggplot(data = beforePcaDf, aes(x = X, y = Y, color = Model, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = modelCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
})
Segment
After VST
suppressWarnings({
pcaPlot <- ggplot(data = pcaDf, aes(x = X, y = Y, color = Segment, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = segmentCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
})
Before VST
suppressWarnings({
pcaPlot <- ggplot(data = beforePcaDf, aes(x = X, y = Y, color = Segment, label = Sample)) +
geom_point(size = 2, shape = 20) +
scale_color_manual(values = segmentCols) +
labs(
x = paste0("PC1 (", round(summary(pcaRes)$importance[2, c(1)] * 100, 1), "%)"),
y = paste0("PC2 (", round(summary(pcaRes)$importance[2, c(2)] * 100, 1), "%)")
) +
theme_bw() +
theme(
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
text = element_text(size = 12)
)
ggplotly(pcaPlot)
})
QC Summary
- The number of features, i.e., probe or gene, and
samples
| Initial |
46 |
20175 |
| After QC (analysis-ready) |
26 |
7276 |
countDf <- pData(finalSet) %>% make_long(Model, Segment)
ggplot(countDf, aes(x = x, next_x = next_x, node = node, next_node = next_node, fill = factor(node), label = node)) +
geom_sankey(flow.alpha = .6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "black", fill = "white") +
scale_fill_viridis_d(option = "A", alpha = 0.95) +
theme_sankey(base_size = 18) +
labs(
x = NULL,
y = NULL
) +
theme_bw() +
theme(
axis.line = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none",
text = element_text(size = 12)
)
