---
title: Writing Custom Storage Backends
author: fmridataset Team
date: '`r Sys.Date()`'
output:
rmarkdown::html_vignette:
toc: yes
toc_depth: 2
vignette: >
%\VignetteIndexEntry{Writing Custom Storage Backends}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
params:
family: red
preset: homage
css: albers.css
resource_files:
- albers.css
- albers.js
includes:
in_header: |-
---
```{r setup, include=FALSE}
if (requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("albersdown", quietly = TRUE)) {
ggplot2::theme_set(
albersdown::theme_albers(family = params$family, preset = params$preset)
)
}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE
)
```
Write a custom backend when your data lives in a format that no built-in backend supports — flat CSV, a proprietary binary format, a remote database, or anything else. A backend is a plain S3 list with six methods; once those methods exist, your format gains full access to chunking, study-level operations, and every other fmridataset feature. This vignette walks through the contract, a complete working example, and how to register and test your backend.
## The six-method contract
Every backend must implement exactly these six S3 generics, declared in `R/storage_backend.R`.
### `backend_open(backend)`
```{r sig-open}
backend_open.my_backend <- function(backend) {
...
}
```
Acquire resources (file handles, connections). Must be idempotent: if the backend is already open, return it unchanged.
### `backend_close(backend)`
```{r sig-close}
backend_close.my_backend <- function(backend) {
...
}
```
Release all resources and set `is_open <- FALSE`. Must be idempotent and return `invisible(backend)`.
### `backend_get_dims(backend)`
```{r sig-dims}
backend_get_dims.my_backend <- function(backend) {
...
}
```
Return `list(spatial = c(nx, ny, nz), time = n_timepoints)` — a named list with a length-3 numeric `spatial` vector and a single positive integer `time`.
### `backend_get_mask(backend)`
```{r sig-mask}
backend_get_mask.my_backend <- function(backend) {
...
}
```
Return a logical vector of length `prod(spatial)` with no `NA` values and at least one `TRUE`.
### `backend_get_data(backend, rows = NULL, cols = NULL)`
```{r sig-data}
backend_get_data.my_backend <- function(backend, rows = NULL, cols = NULL) {
...
}
```
Return a matrix in **timepoints x voxels** orientation. `rows` and `cols` are 1-based integer vectors; `NULL` means "all". Always use `drop = FALSE` when subsetting.
### `backend_get_metadata(backend)`
```{r sig-metadata}
backend_get_metadata.my_backend <- function(backend) {
...
}
```
Return a list — empty is fine. Include an `affine` matrix and `voxel_dims` when your format has them.
## Minimal working example: CSV backend
The following ~40-line implementation is a complete, functional backend for CSV files where rows are timepoints and columns are voxels.
```{r csv-backend-complete}
# Constructor
csv_backend <- function(data_file) {
if (!file.exists(data_file)) stop("File not found: ", data_file)
backend <- list(
data_file = data_file,
data_cache = NULL,
is_open = FALSE
)
class(backend) <- c("csv_backend", "storage_backend")
backend
}
# Open: read the CSV once and cache it
backend_open.csv_backend <- function(backend) {
if (backend$is_open) return(backend)
backend$data_cache <- as.matrix(read.csv(backend$data_file, check.names = FALSE))
backend$is_open <- TRUE
backend
}
# Close: drop the cache
backend_close.csv_backend <- function(backend) {
backend$data_cache <- NULL
backend$is_open <- FALSE
invisible(backend)
}
# Dims: rows = timepoints, cols = voxels; report as flat spatial volume
backend_get_dims.csv_backend <- function(backend) {
stopifnot(backend$is_open)
list(spatial = c(ncol(backend$data_cache), 1L, 1L),
time = nrow(backend$data_cache))
}
# Mask: all voxels valid
backend_get_mask.csv_backend <- function(backend) {
stopifnot(backend$is_open)
rep(TRUE, ncol(backend$data_cache))
}
# Data: return timepoints x voxels, with optional subsetting
backend_get_data.csv_backend <- function(backend, rows = NULL, cols = NULL) {
stopifnot(backend$is_open)
d <- backend$data_cache
if (!is.null(rows)) d <- d[rows, , drop = FALSE]
if (!is.null(cols)) d <- d[, cols, drop = FALSE]
d
}
# Metadata: minimal
backend_get_metadata.csv_backend <- function(backend) {
stopifnot(backend$is_open)
list(source_file = backend$data_file)
}
```
Open, query, and close the backend by calling the generics directly:
```{r csv-usage}
b <- csv_backend("my_data.csv")
b <- backend_open(b)
dims <- backend_get_dims(b) # spatial 500 x 1 x 1, time 200
mask <- backend_get_mask(b) # logical vector, length 500
d <- backend_get_data(b, rows = 1:20, cols = 1:50) # 20 x 50 matrix
b <- backend_close(b)
```
## Registering your backend
`register_backend()` adds your factory function to the package-level registry. After registration, `create_backend()` constructs instances by name.
```{r register}
register_backend(
name = "csv",
factory = csv_backend,
description = "CSV file backend (rows = timepoints, cols = voxels)"
)
```
Create an instance through the registry:
```{r create}
b <- create_backend("csv", data_file = "my_data.csv")
```
`create_backend()` calls `validate_backend()` automatically unless you pass `validate = FALSE`. To see everything currently registered:
```{r list-backends}
list_backend_names() # character vector of registered names
get_backend_registry() # full registration details for all backends
```
## Validation
`validate_backend()` checks the standard contract: class inheritance, method presence, dims structure, and mask constraints. Call it on an open backend instance.
```{r validate}
b <- backend_open(csv_backend("my_data.csv"))
validate_backend(b) # returns TRUE or throws a descriptive error
backend_close(b)
```
The validator calls `backend_get_dims()` and `backend_get_mask()` directly, so the backend must be open when you invoke it.
## Testing
Use `testthat` to verify all six methods and the subsetting contract. The backend should be open for the data-access tests.
```{r tests}
library(testthat)
# Write a small fixture
tmp <- tempfile(fileext = ".csv")
write.csv(matrix(seq_len(200), nrow = 20, ncol = 10), tmp, row.names = FALSE)
test_that("csv_backend satisfies the storage contract", {
b <- backend_open(csv_backend(tmp))
on.exit(backend_close(b))
dims <- backend_get_dims(b)
expect_equal(dims$spatial, c(10, 1L, 1L))
expect_equal(dims$time, 20L)
mask <- backend_get_mask(b)
expect_true(is.logical(mask))
expect_equal(length(mask), prod(dims$spatial))
expect_false(anyNA(mask))
d_full <- backend_get_data(b)
expect_equal(dim(d_full), c(20L, 10L))
d_sub <- backend_get_data(b, rows = 1:5, cols = 1:3)
expect_equal(dim(d_sub), c(5L, 3L))
expect_true(validate_backend(b))
})
unlink(tmp)
```