In this tutorial, we use the pbj
R
package to model sex differences age-related changes in gray matter in the cross-sectional Nathan Kline Institute Rockland Sample (NKI-RS). The data set includes 683 participants ages 6-85.
The outcome images are voxel-based morphometry (VBM).
The intensity of each voxel is a point-wise estimate of gray matter volume Ashburner (2009Ashburner, John. 2009. “Computational Anatomy with the SPM Software.” Magnetic Resonance Imaging 27 (8): 1163–74.).
The analyses in this tutorial were presented in our paper REF.
Using this package and the tools available through R
and Rstudio allows users to perform a fully reproducible group level analysis within R and produce aesthetically attractive html reports.
knitr::knit_hooks$set(GPs=function(before, options, envir){
if (before){
cex=1.5
# graphical parameters
fgcol = 'white'
bgcol = 'black'
par(mgp=c(0.9,.7,0), lwd=1.5, lend=2,
cex.lab=cex, cex.axis=0.8*cex, cex.main=cex,
mar=c(0.5,0.5,0.5,0), bty='l', oma=c(0,0,0,0), bg=bgcol, fg=fgcol, col.axis=fgcol, col.lab=fgcol, col.main = fgcol, col.sub=fgcol)
}})
knitr::opts_chunk$set(echo = TRUE, GPs=TRUE, cache=FALSE, cache.lazy=FALSE, eval=TRUE)
path = Sys.getenv('PATH')
path = Sys.setenv('PATH'=paste(path, '/home/rstudio/.local/bin', sep=':'))
pbj
packageThe latest stable version of the pbj
package can be installed from github by specifying the master
branch. Currently, the longitudinal
branch includes some formatting changes and has edits to accommodate longitudinal data.
# install the latest versions of the packages to perform these analyses.
devtools::install_github('simonvandekar/pbj', ref='longitudinal')
#devtools::install_github('statimagcoll/NIsim')
In this section we walk through setting up the data for the group level image analysis. This first chunk of code loads the libraries needed to perform the group image analysis.
The RNifti
package is used to read and write Nifti images into R
.
The pbj
package includes our functions needed to perform group level neuroimage analysis.
The papayaWidget
package is used to embed interactive visualizations into this html document.
The ns
package includes functions to fit natural cubic splines.
### LIBRARIES ###
library(pbj) # pbj package
library(RNifti) # Nifti I/O. Loaded with pbj
library(fslr) # for susan smoothing from FSL
## Loading required package: oro.nifti
## oro.nifti 0.11.0
##
## Attaching package: 'oro.nifti'
## The following object is masked from 'package:RNifti':
##
## origin
## Loading required package: neurobase
library(papayaWidget) # image viewer
library(splines) # ns
library(magrittr) # %>%
library(callr) # used to run bootstraps in the background
After loading the libraries, we are ready to setup the files that we will need for the analysis. To perform the analysis, we need the following data
.nii.gz
VBM images, assumed to be spatially registered to template space.The data file can be loaded into R
from a .csv
file.
# sets path for data file
dbdatafile = '/media/disk2/pbj/data/rockland/demographic/RocklandBehavioral.csv'
# load in data file
dat = read.csv(dbdatafile, stringsAsFactors = TRUE)
At this stage, non-image analysis tools can be used to explore the data set and make sure there are no missing files or values. Here, we subset the data set to variables that we need for the analysis (the image paths, age, and sex), and then display tables of the output.
The last line using str
gives a summary of the structure of the data set.
# subset the data to relevant variables
dat = dat[,c('AnonymizedID', 'IDandSession', 'files', 'files2mm', 'files2mmsm8', 'age', 'sex')]
# View the top of the data file
# row.names, digits, and booktabs are optional arguments for displaying the
knitr::kable(head(dat[,c('AnonymizedID', 'IDandSession', 'age', 'sex')]), row.names = FALSE, digits=3, booktabs=T, caption='Covariates for first 10 participants.')
Table 1: Covariates for first 10 participants.
AnonymizedID | IDandSession | age | sex |
---|---|---|---|
A00008326 | A00008326_V2 | 59.337 | Female |
A00008399 | A00008399_V2 | 23.389 | Male |
A00010893 | A00010893_V2 | 28.910 | Male |
A00013809 | A00013809_V2 | 61.956 | Female |
A00018030 | A00018030_V2 | 11.674 | Female |
A00019903 | A00019903_V2 | 83.184 | Male |
knitr::kable(head(dat[,'files2mm', drop=FALSE]), row.names = FALSE, digits=3, booktabs=T, caption='Nifti image paths for first 10 participants.')
Table 1: Nifti image paths for first 10 participants.
files2mm |
---|
/media/disk2/pbj/data/rockland/neuroimaging/A00008326/A00008326_V2/NDW_ROCKLAND-x-A00008326-x-A00008326_V2-x-cat12_ndw_v2-x-5623a858-bd5c-4f2f-9754-8db208d94b07/GRAY_MNORM/mwp1t1_2mm.nii.gz |
/media/disk2/pbj/data/rockland/neuroimaging/A00008399/A00008399_V2/NDW_ROCKLAND-x-A00008399-x-A00008399_V2-x-cat12_ndw_v2-x-39c52c5c-c63b-4e29-9403-165f58b4e080/GRAY_MNORM/mwp1t1_2mm.nii.gz |
/media/disk2/pbj/data/rockland/neuroimaging/A00010893/A00010893_V2/NDW_ROCKLAND-x-A00010893-x-A00010893_V2-x-cat12_ndw_v2-x-fe66b52e-bcd2-4556-a809-e8a77ae93667/GRAY_MNORM/mwp1t1_2mm.nii.gz |
/media/disk2/pbj/data/rockland/neuroimaging/A00013809/A00013809_V2/NDW_ROCKLAND-x-A00013809-x-A00013809_V2-x-cat12_ndw_v2-x-9f647603-cfab-4785-b37d-690eb1f85982/GRAY_MNORM/mwp1t1_2mm.nii.gz |
/media/disk2/pbj/data/rockland/neuroimaging/A00018030/A00018030_V2/NDW_ROCKLAND-x-A00018030-x-A00018030_V2-x-cat12_ndw_v2-x-8463cbc8-2a47-4b30-abf7-7fc637ed2706/GRAY_MNORM/mwp1t1_2mm.nii.gz |
/media/disk2/pbj/data/rockland/neuroimaging/A00019903/A00019903_V2/NDW_ROCKLAND-x-A00019903-x-A00019903_V2-x-cat12_ndw_v2-x-58401f59-faad-45ff-b1ba-5382c7b08366/GRAY_MNORM/mwp1t1_2mm.nii.gz |
The mask image identifies which voxels in the image to include in the analysis. The commented code shows how the mask file was made, as the intersection of all voxels that are above zero in all of the participants’ images.
# mask file (created May 12, 2021)
maskfile = "/media/disk2/pbj/data/rockland/neuroimaging/overlap_mask_2mm.nii.gz"
## CREATE MASK
# if(!file.exists(maskfile)){
# imgs = readNifti(dat$files)
# mask = imgs[[1]]
# mask[,,] = 0
# imgs = apply(simplify2array(imgs), 1:3, function(v) sum(v>0))
# mask[sum(imgs)==nrow(dat)] = 1
# writeNifti(mask, file=maskfile)
# }
The template image is included at this stage to make visualization seamless later on. In this case, all of the VBM data are in the MNI template space.
templatefile = '/usr/local/fsl/data/standard/MNI152_T1_2mm_brain.nii.gz'
# reads in image and creates lightbox view.
image(readNifti(templatefile))
In addition to the data, we set the following parameters for the analysis.
The ncores
variable specifies the number of cores to use when running parallel processes.
The datafile
variable specifies where to save the results after running the boostraps.
The bootstrap procedure can take some time to run so we save the results so that it doesn’t have to be rerun when compiling this document.
The last line below sets the seed for the RNG so that the results are reproducible.
# number of cores for parallel things
ncores = 16
# location to save results
pbjdatafile = '/media/disk2/pbj/pbj_ftest/nkirs_bootstrap_results.rdata'
permdatafile = '/media/disk2/pbj/pbj_ftest/nkirs_permutation_results.rdata'
# seed for randomization
set.seed(555)
The input VBM data are at 1mm isotropic resolution and unsmoothed. It is common to downsample and apply smoothing before group level analysis to speed computation, accommodate small regional differences in registration, and reduce noise.
This code downsamples the images to 2mm isotropic resolution and applies smoothing at 8mm FWHM. It first checks whether the downsampled, smoothed output files exist; if not, it downsamples and applies smoothing using functions from the fslr
package.
The second to last line creates an interactive viewer embedded into the html document using papayaWidget
ot visualize the effect of the smoothing in the first participant.
The last line sets the files variable to point to the downsampled smoothed data.
if(!all(file.exists(dat$files2mmsm8))){
## CREATE DOWNSAMPLED DATA
invisible(mcmapply(flirt, infile=dat$files, outfile=dat$files2mm, opts = '-applyxfm', MoreArgs=list(reffile=templatefile, retimg=FALSE), mc.cores = ncores ))
## SMOOTH DOWNSAMPLED DATA
# convert FWHM to sigma
sigma = 8/2.355
invisible(mcmapply(susan, file=dat$files2mm, outfile=dat$files2mmsm4, MoreArgs=list(sigma=sigma, dimg='3', n_usans='0'), mc.cores=ncores))
# sigma = 8: using 8mm smoothing
}
# view files
# papaya(c(template, dat$files2mm[1], dat$files2mmsm4[1]))
# set downsampled smoothed files as the main file
papaya(c(templatefile, dat$files2mm[1], dat$files2mmsm8[1]))
In order to study sex differences in the mean adult life-span curve of gray matter volume we fit the model \[ Y_i(v) = \alpha_0(v) + \alpha_1(v)\text{sex}_i + f_1(\text{age}_i, v) + \text{sex}_i \times f_2(\text{age}_i, v) + E_i(v), \] where \(Y_i(v)\) denotes the VBM image for participant \(i\) and voxel location \(v\). \(\text{sex}_i\) is an indicator variable that is equal to 1 if participant \(i\) is a male. We choose to fit \(f_1\) and \(f_2\) with natural cubic splines on 4 degrees of freedom. The unknown parameters \(\alpha_0(v)\), \(\alpha_1(v)\), \(f_1\), and \(f_2\) quantify the association between each variable and the VBM image at location \(v\).
To study sex differences in the life-span curve, we test the nonlinear age by sex interaction which corresponds to parameters that control the shape of the function \(f_2\). The null hypothesis is that \(f_2(\text{age}_i) = 0\) for all values of \(\text{age}_i\) and all voxel locations \(v\), versus the alternative hypothesis that this is not true, \[\begin{align*} H_0: & f_2(\text{age}, v) = 0 \\ H_1: & f_2(\text{age}) \ne 0. \end{align*}\] Because \(f_2\), is modeled with 4 degrees of freedom the test for \(H_0\) is an F-test at each voxel location, \(v\), on 4 degrees of freedom. This model specification is used to test the nonlinear age by sex interaction. The null hypothesis at each location is that males and females follow the same developmental trajectory.
pbj
uses the full/reduced model specification to test variables in the data set.
The full model, lmfull
, specifies a formula that includes all model terms.
The star indicates to add all main effects and their interactions between the two variables.
The ns
function comes from the splines
package and is used to specify natural cubic splines to model the nonlinear effect of age.
There is no left side to the equation (the dependent/outcome variable) because it is determined by the imaging variables.
The reduced model, lmred
specifies to include main effects of sex and the nonlinear age effect, but not their interaction.
Comparing lmfull
to lmred
will test the terms of the full model that are related to the interaction.
The third line subsets the data to rows that have no missing variables (complete case analysis).
dat$files = dat$files2mmsm8
# Testing the interaction between nonlinear age x sex
lmfull = ~ ns(age, df=4) * sex
lmred = ~ sex + ns(age, df = 4)
dat = dat[apply(!is.na(dat[,all.vars(as.formula(lmfull))]), 1, all), ]
# add a demographic summary table using tangram
To test the parameters corresponding to the \(f_2\), we use a robust test statistic.
The model is fit using the lmPBJ
function and computes the coefficients, the robust test statistic, and objects required to perform inference in the next step.
The robust test statistic ensures that the standard errors are consistently estimated, even if homoskedasticity is violated, i.e., it does not assume the spatial covariance of the image for each subject is the same.
Unlike the parametric Gaussian random field methods implemented in SPM, the methods used in pbj
yield cluster extent p-values that are valid regardless of data smoothness, or cluster-forming threshold (CFT) and perform well in small samples.
pbj
also supports the permutation procedure similar to what is implemented in FSL’s randomise
REF.
The first argument to lmPBJ
is the character vector of outcome images.
The two subsequent arguments are the full and reduced formulas.
The mask
argument takes a niftiImage object or character vector to identify which voxels to include in the analysis.
template
is an optional argument that can be specified to make visualization easier after the model is fit.
data
is a data frame including the variables referenced by the two model formulas.
The last three arguments are default values and adjust how the statistics are computed from the analysis.
robust
is a logical specifying whether robust standard errors should be used, which makes the test statistics valid even under some types of model misspecification.HC3
is a small sample adjustment to the robust standard error estimates that improves coverage performance (it makes the test statistics appear closer to a normal distribution).transform
uses an inverse CDF transformation to make the test statistics appear closer to a normal distribution in small sample sizes. This is a sensible transformation if the null hypothesis is true, but might not be a good idea if the null is false.# lmPBJ -- fits the model and computes the statistical image for the covariate of interest
## using robust test statistics (`robust = TRUE`)
robustStatMap <- lmPBJ(dat$files, form=lmfull, formred=lmred, mask=maskfile, template = templatefile, data=dat, robust = TRUE, HC3 = TRUE, transform = 'none')
The result from this command is an object of class statMap
that contains the following objects:
stat
is a vector the test statistics values for all voxels in the mask.coef
is a matrix of the parameters tested for all voxels in the mask.sqrtSigma
is a list of objects that contain everything required to perform statistical inference in the next step. It includes settings that the user chose when running the lmPBJ
command and features of the test statistic, such as the degrees of freedom and residual degrees of freedom.formulas
is a list of the formulas used to run lmPBJ
.data
is the data set covariates used to fit the model.When multiple parameters are being tested the test statistic is computed as a chi-squared statistical image. We can visualize the image to get an idea of what the results look like before computing p-values for topological features, such as cluster extent p-values.
The image
command can be used to create static images at this stage to create figures for papers or quickly visualize the results.
Because the test statistic is approximately chi-square distributed, we can compute uncorrected image thresholds for visualizing the results.
If we want to view where the uncorrected p-value \(p<0.01\), then we can use the command qchisq(0.01, df=robustStatMap$sqrtSigma$df, lower.tail=FALSE)=
to obtain an uncorrected threshold for the image.
For convenience, this can be done with the image
function by setting the argument pCFT=0.01
.
Running image
without any arguments defaults to a lightbox view.
Alternatively, the user can specify another plane to visualize, or a set of specific slices.
image(robustStatMap, pCFT=0.01)
image(robustStatMap, pCFT=0.01, plane = 'sagittal')
The base graphics function layout
can be used to visualize multiple images in one figure in a custom layout.
# custom layout visualizing inferior frontal cluster
layout(matrix(1:6, byrow=TRUE, nrow=1)); image(robustStatMap, pCFT=0.01, plane = 'axial', slice=25:30)
Papaya can be used to interactively visualize the statistical image.
To do this, we first have to write the results to a nifti image on the hard drive. The first two lines of code create a temporary directory to write the image.
We then use write.statMap
function to write the image to disk.
The final line visualizes the statistical image overlayed on the template using Papaya.
We can use the qchisq
function as above to threshold the statistical image.
# Visualize the chi square image
statmapFolder = tempfile() # generate a random file folder name
dir.create(statmapFolder) # create the directory
files = write.statMap(robustStatMap, outdir=statmapFolder) # write the statistical image
## Writing stat and coef images.
## Writing sqrtSigma object.
papaya(c(templatefile, files$stat )) # view the statistical image
After computing the parametric or robust statistical image for the test of the nonlinear interaction, we can perform different types of inference. Here, we perform inference two ways, using local maxima and cluster extents.
We suggest two methods for estimating the null distribution of the test statistic.
The first is the permutation procedure (similar to randomise
by FSL), which has been shown to work well in previous literature (and in our paper; REFS).
The second is a Rademacher wild bootstrap procedure that we showed to work well in small samples.
Note that, if using the permutation procedure, the test statistic image is no longer robust to heteroskedasticity, and p-values may not maintain the type 1 error rate.
That said, in most cases, we have found in simulations that the permutation procedure works very well at controlling the type 1 error rate.
For a paper, you only need to choose one of the two options. The results should be similar between the two methods. Naturally, we recommend the bootstrap procedure.
We first set some parameters for the analysis:
* cft
is the cluster forming threshold. This line converges the p-value thresholds to the chi-squared scale.
* nboot
is the number of bootstraps or permutations to run. These are used to compute adjusted and unadjusted p-values.
# cluster extent inference
## cluster forming threshold
cft = qchisq(c(0.01, 0.001), df = robustStatMap$sqrtSigma$df, lower.tail = FALSE)
# number of bootstraps (or permutations) to run
nboot=1000
There are many different options for performing inference for neuroimaging data and the pbjInference
function can accommodate a wide range of possibilities.
Below are several examples of ways to run pbjInference
. It is best to include any inference methods you want to consider, so that you don’t have to rerun the bootstrap or permutation procedure each time.
Here are some key arguments to the pbjInference
:
* statMap
is the object returned by lmPBJ.
* statistic
is a function used to compute common topological features of the image.
* nboot
is the number of bootstraps to perform.
* method
controls what resampling procedure to use.
* ...
arguments passed to the statistic
function.
* runMode
controls what output is returned. Default is “cdf”, which returns the cumulative distribution functions used to compute adjusted and unadjusted p-values for each topological feature.
The mmeStat
function is the default statistic
function for pbjInference
and includes options to run inference on the most common types of topological features, including local maxima, cluster mass inference, and cluster extent inference.
To perform cluster extent inference (the default), the mmeStat
function requires that the user specify a vector cft
, which are the thresholds to use to form contiguous clusters in the statistic image. This is passed to pbjInference
by the user.
The final command saves the results in an Rdata file for future use.
In the code below, we use the pbjInferenceBG
command to run the boostrapping in the background. This uses the r_bg
function from the callr
package to run the command in the background, so that we can continue to use this session.
# NEED TO EDIT THIS STILL.
if(!file.exists(pbjdatafile)){
rcallResPBJ <- pbjInferenceBG(robustStatMap, rdata_rds=pbjdatafile, nboot = nboot, method='wild', mask = robustStatMap$mask, cft = cft)
}
if(!file.exists(permdatafile)){
rcallResPerm <- pbjInferenceBG(robustStatMap, rdata_rds=permdatafile, nboot = nboot, method='permutation', mask = robustStatMap$mask, cft = cft)
}
Below is the standard called to pbjInference
that runs in this session (not in the background).
This run will perform inference on local maxima and cluster mass statistics.
# computes cluster mass inference, and inference for maxima. Does not compute CEI
maximaCEIstatMap <- pbjInference(robustStatMap, nboot = nboot, method='wild', mask = robustStatMap$mask, cft = cft, max=TRUE, CMI=TRUE, CEI=FALSE)
The following code loads in the results of the call to pbjInferenceBG
. The resulting object is the statMap
object from the call to lmPBJ
, with the inference results added.
if(file.exists(permdatafile)){
load(permdatafile)
permStatMap <- statMap
}
if(file.exists(pbjdatafile)){
load(pbjdatafile)
robustStatMap <- statMap
}
The table.statMap
function can be used to visualize the statistical results for each topological feature.
The default is to present results for cluster extent inference using the first cft
that was provided by the user.
The table is sorted largest to smallest by the topological features, in this case, the size of the cluster.
The two final columns are the unadjusted and adjusted p-values for each cluster.
Recall that these results are for the test of whether there is an age by sex interaction.
It looks like the first cluster adjusted p-value is “significant” by conventional standards.
We can use the write and visualization methods to export the results or make figures for publications.
permTable = table.statMap(permStatMap)
robustTable = table.statMap(robustStatMap)
# This creates a table using the larger cluster forming threshold specified above.
stringentTable = table.statMap(robustStatMap, cft=cft[2])
knitr::kable(permTable[1:5,], row.names = FALSE,
digits=3, booktabs=T, caption = "Cluster summary table for permutation test results of the age by sex interaction.")
Table 2: Cluster summary table for permutation test results of the age by sex interaction.
cluster ID | Cluster Extent | Centroid (vox) | Unadjusted p-value | FWER p-value |
---|---|---|---|---|
1 | 375 | 44, 88, 28 | 0.000 | 0.050 |
2 | 183 | 29, 33, 24 | 0.001 | 0.268 |
3 | 142 | 52, 39, 10 | 0.002 | 0.373 |
4 | 140 | 36, 27, 12 | 0.002 | 0.383 |
5 | 134 | 40, 58, 42 | 0.002 | 0.409 |
knitr::kable(robustTable[1:5,], row.names = FALSE,
digits=3, booktabs=T, caption = "Cluster summary table for wild bootstrap test results of the age by sex interaction.")
Table 2: Cluster summary table for wild bootstrap test results of the age by sex interaction.
cluster ID | Cluster Extent | Centroid (vox) | Unadjusted p-value | FWER p-value |
---|---|---|---|---|
1 | 375 | 44, 88, 28 | 0.000 | 0.053 |
2 | 183 | 29, 33, 24 | 0.001 | 0.273 |
3 | 142 | 52, 39, 10 | 0.002 | 0.418 |
4 | 140 | 36, 27, 12 | 0.002 | 0.427 |
5 | 134 | 40, 58, 42 | 0.002 | 0.445 |
pbjOutdir = tempdir()
result = write.statMap(robustStatMap, pbjOutdir)
# visualizes template with statistical map and adjusted p-values overlayed
papaya(c(templatefile, result$stat, result$CEI1[1]))
#papaya(c(templatefile, result$stat, result$CMI1[1]))
#image(robustStatMap, plane = 'axial', cft=cft[1], alpha=0.06, roi=1)
rang = range(robustStatMap$stat)
layoutMat = cbind(matrix(1:6, nrow=2, byrow=TRUE) %x% matrix(1, nrow=3, ncol=3) , 7)
layout(layoutMat)
image(robustStatMap, plane = 'axial', cft=cft[1], alpha=0.06, slice=26:31, clusterID = FALSE)
#mtext('Probability', side=3, outer = TRUE, cex=1*cex, font=2)
# these margins adjust the height and width of the color bar bottom,left,top,right
fgcol='white'
par(mar=c(8,4,8,0.5), mgp=c(3,0.6,0), fg=fgcol, col.axis=fgcol, col.lab=fgcol, col.main = fgcol, col.sub=fgcol)
colorBar(pbj:::redyellow(64), min=cft[1], max=rang[2], nticks=4, ylab = 'Chi-sq')
plotResults = function(statMap, emForm=NULL, method='CEI', roiInds=NULL, ind=1, data=NULL){
pbjObj = statMap$pbj
if(is.null(pbjObj)) stop('Must run pbjInference to plot results.')
st = table.statMap(statMap, method, cft)
ind = inferenceIndex(statMap$pbj$obsStat, method=method, cft=cft)
rois = pbjObj$ROIs[[ind]]
obsstat = pbjObj$obsStat[[ind]]
data = statMap$data
# load data
imgs = RNifti::readNifti(statMap$images)
# fit model on average data
full = as.formula(statMap$formulas$full)
red = as.formula(statMap$formulas$reduced)
fullT = terms(full)
redT = terms(red)
# formula elements to condition on
term = attr(fullT, 'term.labels')[!attr(fullT, 'term.labels') %in% attr(redT, 'term.labels') ]
condVars = sapply(all.vars(full), function(x) grepl(x, term) )
condVars = names(condVars)[condVars]
robust = statMap$sqrtSigma$robust
data = get_all_vars(full, data=data)
for(roiInd in roiInds){
data$y = sapply(imgs, function(img) mean(img[ rois==roiInd]) )
fullModel = lm(update.formula(full, y ~ .), data=data)
#redModel = lm(update.formula(red, y ~ .), data=data)
# emmeans argument using condVars
# known warning about as.numeric on characters
suppressWarnings(ats <- apply(data[,condVars], 2, function(x) sort(unique(ifelse(is.na(as.numeric(x)), x, as.numeric(x) ) ) ) ))
emg = emmeans::ref_grid(fullModel, at=ats, data=data)
plotdf = as.data.frame(emmeans::emmeans(emg, ~ age | sex))
cex=1.5
# graphical parameters
fgcol = 'white'
bgcol = 'black'
par(mgp=c(1.7,.7,0), lwd=1.5, lend=2,
cex.lab=cex, cex.axis=0.8*cex, cex.main=1*cex,
mar=c(3,3,2.2,0), bty='l', oma=c(0,0,0,0), bg=bgcol, fg=fgcol, col.axis=fgcol, col.lab=fgcol, col.main = fgcol, col.sub=fgcol)
# plot predictions with raw data
cols=c('#fb9a99', '#a6cee3', '#e31a1c', '#1f78b4')
plot(data[,'age'], data[,'y'], col=cols[2+as.numeric(factor(data[,'sex']))], pch=16, ylab='Gray matter volume (AU)', xlab='Age (Years)')
by(plotdf, plotdf$sex, function(df){
polygon(c(df$age, rev(df$age)), c(df$lower.CL, rev(df$upper.CL)), col=scales::alpha(cols[as.numeric(df$sex)], alpha=0.8), border=NA)
points(df$age, df$emmean, type='l', col=cols[as.numeric(df$sex)+2])
} )
}
}
plotResults(robustStatMap, roiInds = 1:4)