--- title: Getting Started with HDF5 Neuroimaging Data author: fmristore Package date: '`r Sys.Date()`' output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Getting Started with HDF5 Neuroimaging Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} params: family: red css: albers.css resource_files: - albers.css - albers.js includes: in_header: |- --- ```{r setup, include = FALSE} if (requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("albersdown", quietly = TRUE)) ggplot2::theme_set(albersdown::theme_albers(params$family)) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE ) library(fmristore) library(neuroim2) ``` ## Why Use HDF5 for Neuroimaging? Working with large fMRI datasets can quickly exhaust your computer's memory. The `fmristore` package solves this problem by storing your data in HDF5 format, which: - **Saves memory**: Data stays on disk and is only loaded when you access it - **Works seamlessly**: Behaves just like regular R arrays - **Scales effortlessly**: Handle datasets larger than your RAM ## Quick Start: 3D Brain Volumes Let's start with a simple example - storing a single brain volume: ```{r quick-3d} # Create a small brain volume (5x5x3 voxels) brain_data <- array(rnorm(75), dim = c(5, 5, 3)) brain_vol <- NeuroVol(brain_data, NeuroSpace(c(5, 5, 3))) # Save to HDF5 format h5_file <- tempfile(fileext = ".h5") h5_brain <- as_h5(brain_vol, file = h5_file) # Access works just like a regular array! print(h5_brain[1:2, 1:2, 1]) # Don't forget to close when done close(h5_brain) ``` ## Working with fMRI Time Series (4D Data) Most fMRI data includes a time dimension. Here's how to handle 4D data efficiently: ```{r fmri-timeseries, fig.width=8} # Create fMRI data: 10x10x5 brain, 20 time points fmri_data <- array(rnorm(10 * 10 * 5 * 20), dim = c(10, 10, 5, 20)) fmri_vec <- NeuroVec(fmri_data, NeuroSpace(c(10, 10, 5, 20))) # Convert to HDF5 h5_file <- tempfile(fileext = ".h5") h5_fmri <- as_h5(fmri_vec, file = h5_file) # Extract time series for a single voxel voxel_timeseries <- series(h5_fmri, i = 5, j = 5, k = 3) plot(voxel_timeseries, type = "l", main = "fMRI Time Series for Voxel [5,5,3]", xlab = "Time", ylab = "Signal" ) # Extract a single time point (3D volume) volume_t10 <- h5_fmri[, , , 10] print(paste( "Volume at time 10 has dimensions:", paste(dim(volume_t10), collapse = "x") )) close(h5_fmri) ``` ## Loading Existing HDF5 Files If you have HDF5 files created by `fmristore` or compatible tools: ```{r load-existing} # First, let's create an example file example_data <- NeuroVol( array(1:27, dim = c(3, 3, 3)), NeuroSpace(c(3, 3, 3)) ) h5_file <- tempfile(fileext = ".h5") temp_h5 <- as_h5(example_data, file = h5_file) close(temp_h5) # Now load it back h5_brain <- H5NeuroVol(file_name = h5_file) # Work with it naturally print(h5_brain[, , 2]) # Get slice 2 print(dim(h5_brain)) # Check dimensions close(h5_brain) unlink(h5_file) ``` ## Best Practices ### 1. Always Close Your Files ```{r close-example, eval=FALSE} h5_data <- as_h5(my_data, file = "output.h5") # ... do your work ... close(h5_data) # Essential! ``` ### 2. Use Chunk Sizes Wisely For better performance with large datasets: ```{r chunk-example, eval=FALSE} # Optimize for accessing full volumes at each time point h5_data <- as_h5(my_4d_data, file = "output.h5", chunk_dims = c(64, 64, 40, 1) ) ``` ### 3. Memory vs Speed Trade-off - **Small frequent access**: HDF5 is perfect - **Whole dataset operations**: Consider loading into memory if it fits ## Common Use Cases ### Preprocessing Large Datasets ```{r preprocess-example, fig.width=8} # Create example data large_fmri <- NeuroVec( array(rnorm(50 * 50 * 30 * 100), c(50, 50, 30, 100)), NeuroSpace(c(50, 50, 30, 100)) ) h5_file <- tempfile(fileext = ".h5") h5_data <- as_h5(large_fmri, file = h5_file) # Process one volume at a time (memory efficient) n_timepoints <- dim(h5_data)[4] means <- numeric(n_timepoints) for (t in 1:n_timepoints) { volume_t <- h5_data[, , , t] means[t] <- mean(volume_t) } plot(means, type = "l", main = "Mean Brain Activity Over Time", xlab = "Time", ylab = "Mean Signal" ) close(h5_data) unlink(h5_file) ``` ### Region of Interest Analysis ```{r roi-example, fig.width=8} # Create example data with a specific ROI brain_data <- array(rnorm(20 * 20 * 10 * 50), c(20, 20, 10, 50)) roi_mask <- array(FALSE, c(20, 20, 10)) roi_mask[8:12, 8:12, 4:6] <- TRUE # Define ROI fmri_vec <- NeuroVec(brain_data, NeuroSpace(c(20, 20, 10, 50))) h5_file <- tempfile(fileext = ".h5") h5_fmri <- as_h5(fmri_vec, file = h5_file) # Extract mean time series from ROI roi_indices <- which(roi_mask, arr.ind = TRUE) roi_timeseries <- matrix(0, nrow = nrow(roi_indices), ncol = 50) for (i in 1:nrow(roi_indices)) { roi_timeseries[i, ] <- series(h5_fmri, i = roi_indices[i, 1], j = roi_indices[i, 2], k = roi_indices[i, 3] ) } mean_roi <- colMeans(roi_timeseries) plot(mean_roi, type = "l", main = "Average ROI Time Series", xlab = "Time", ylab = "Signal" ) close(h5_fmri) unlink(h5_file) ``` ## Summary The `fmristore` package makes working with large neuroimaging datasets simple: 1. **Convert** your data to HDF5 with `as_h5()` 2. **Access** it like regular R arrays 3. **Close** when done No more memory errors, no complicated code - just efficient neuroimaging data analysis! ```{r cleanup, include=FALSE} # Clean up any remaining temp files temp_files <- list.files(tempdir(), pattern = "\\.h5$", full.names = TRUE) file.remove(temp_files) ```