--- title: Writing Custom Storage Backends author: fmridataset Team date: '`r Sys.Date()`' output: rmarkdown::html_vignette: toc: yes toc_depth: 2 vignette: > %\VignetteIndexEntry{Writing Custom Storage Backends} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} params: family: red preset: homage css: albers.css resource_files: - albers.css - albers.js includes: in_header: |- --- ```{r setup, include=FALSE} if (requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("albersdown", quietly = TRUE)) { ggplot2::theme_set( albersdown::theme_albers(family = params$family, preset = params$preset) ) } knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ``` Write a custom backend when your data lives in a format that no built-in backend supports — flat CSV, a proprietary binary format, a remote database, or anything else. A backend is a plain S3 list with six methods; once those methods exist, your format gains full access to chunking, study-level operations, and every other fmridataset feature. This vignette walks through the contract, a complete working example, and how to register and test your backend. ## The six-method contract Every backend must implement exactly these six S3 generics, declared in `R/storage_backend.R`. ### `backend_open(backend)` ```{r sig-open} backend_open.my_backend <- function(backend) { ... } ``` Acquire resources (file handles, connections). Must be idempotent: if the backend is already open, return it unchanged. ### `backend_close(backend)` ```{r sig-close} backend_close.my_backend <- function(backend) { ... } ``` Release all resources and set `is_open <- FALSE`. Must be idempotent and return `invisible(backend)`. ### `backend_get_dims(backend)` ```{r sig-dims} backend_get_dims.my_backend <- function(backend) { ... } ``` Return `list(spatial = c(nx, ny, nz), time = n_timepoints)` — a named list with a length-3 numeric `spatial` vector and a single positive integer `time`. ### `backend_get_mask(backend)` ```{r sig-mask} backend_get_mask.my_backend <- function(backend) { ... } ``` Return a logical vector of length `prod(spatial)` with no `NA` values and at least one `TRUE`. ### `backend_get_data(backend, rows = NULL, cols = NULL)` ```{r sig-data} backend_get_data.my_backend <- function(backend, rows = NULL, cols = NULL) { ... } ``` Return a matrix in **timepoints x voxels** orientation. `rows` and `cols` are 1-based integer vectors; `NULL` means "all". Always use `drop = FALSE` when subsetting. ### `backend_get_metadata(backend)` ```{r sig-metadata} backend_get_metadata.my_backend <- function(backend) { ... } ``` Return a list — empty is fine. Include an `affine` matrix and `voxel_dims` when your format has them. ## Minimal working example: CSV backend The following ~40-line implementation is a complete, functional backend for CSV files where rows are timepoints and columns are voxels. ```{r csv-backend-complete} # Constructor csv_backend <- function(data_file) { if (!file.exists(data_file)) stop("File not found: ", data_file) backend <- list( data_file = data_file, data_cache = NULL, is_open = FALSE ) class(backend) <- c("csv_backend", "storage_backend") backend } # Open: read the CSV once and cache it backend_open.csv_backend <- function(backend) { if (backend$is_open) return(backend) backend$data_cache <- as.matrix(read.csv(backend$data_file, check.names = FALSE)) backend$is_open <- TRUE backend } # Close: drop the cache backend_close.csv_backend <- function(backend) { backend$data_cache <- NULL backend$is_open <- FALSE invisible(backend) } # Dims: rows = timepoints, cols = voxels; report as flat spatial volume backend_get_dims.csv_backend <- function(backend) { stopifnot(backend$is_open) list(spatial = c(ncol(backend$data_cache), 1L, 1L), time = nrow(backend$data_cache)) } # Mask: all voxels valid backend_get_mask.csv_backend <- function(backend) { stopifnot(backend$is_open) rep(TRUE, ncol(backend$data_cache)) } # Data: return timepoints x voxels, with optional subsetting backend_get_data.csv_backend <- function(backend, rows = NULL, cols = NULL) { stopifnot(backend$is_open) d <- backend$data_cache if (!is.null(rows)) d <- d[rows, , drop = FALSE] if (!is.null(cols)) d <- d[, cols, drop = FALSE] d } # Metadata: minimal backend_get_metadata.csv_backend <- function(backend) { stopifnot(backend$is_open) list(source_file = backend$data_file) } ``` Open, query, and close the backend by calling the generics directly: ```{r csv-usage} b <- csv_backend("my_data.csv") b <- backend_open(b) dims <- backend_get_dims(b) # spatial 500 x 1 x 1, time 200 mask <- backend_get_mask(b) # logical vector, length 500 d <- backend_get_data(b, rows = 1:20, cols = 1:50) # 20 x 50 matrix b <- backend_close(b) ``` ## Registering your backend `register_backend()` adds your factory function to the package-level registry. After registration, `create_backend()` constructs instances by name. ```{r register} register_backend( name = "csv", factory = csv_backend, description = "CSV file backend (rows = timepoints, cols = voxels)" ) ``` Create an instance through the registry: ```{r create} b <- create_backend("csv", data_file = "my_data.csv") ``` `create_backend()` calls `validate_backend()` automatically unless you pass `validate = FALSE`. To see everything currently registered: ```{r list-backends} list_backend_names() # character vector of registered names get_backend_registry() # full registration details for all backends ``` ## Validation `validate_backend()` checks the standard contract: class inheritance, method presence, dims structure, and mask constraints. Call it on an open backend instance. ```{r validate} b <- backend_open(csv_backend("my_data.csv")) validate_backend(b) # returns TRUE or throws a descriptive error backend_close(b) ``` The validator calls `backend_get_dims()` and `backend_get_mask()` directly, so the backend must be open when you invoke it. ## Testing Use `testthat` to verify all six methods and the subsetting contract. The backend should be open for the data-access tests. ```{r tests} library(testthat) # Write a small fixture tmp <- tempfile(fileext = ".csv") write.csv(matrix(seq_len(200), nrow = 20, ncol = 10), tmp, row.names = FALSE) test_that("csv_backend satisfies the storage contract", { b <- backend_open(csv_backend(tmp)) on.exit(backend_close(b)) dims <- backend_get_dims(b) expect_equal(dims$spatial, c(10, 1L, 1L)) expect_equal(dims$time, 20L) mask <- backend_get_mask(b) expect_true(is.logical(mask)) expect_equal(length(mask), prod(dims$spatial)) expect_false(anyNA(mask)) d_full <- backend_get_data(b) expect_equal(dim(d_full), c(20L, 10L)) d_sub <- backend_get_data(b, rows = 1:5, cols = 1:3) expect_equal(dim(d_sub), c(5L, 3L)) expect_true(validate_backend(b)) }) unlink(tmp) ```