Excel Helpers

Code Properties

  • Language: R
  • Packages: readxl, openxlsx, writexl, tidyxl, cellranger

Overview

Collection of utility functions and package references for reading, writing, and processing Excel files in R.

Sources

Packages

PackagePurpose
openxlsxRead/write Excel workbooks with formatting
readxlFast Excel file reading
writexlSimple Excel file writing
tidyxlRead untidy Excel files with cell metadata
cellrangerCell range handling utilities
unpivotrUnpivot complex Excel structures
XLConnectFull Excel integration (Java-based)
flexlsxFlexible Excel export
rioGeneral purpose import/export
janitorData cleaning utilities

Code

General Purpose Excel Reader

#' Read Excel File
#'
#' @param xl_file Path to Excel file
#' @param xl_sheet Sheet name or index
#' @param num_rows Number of rows to read (0 for all)
#' @param ... Additional arguments passed to readxl::read_excel
#'
#' @return A tibble with Excel data
#' @export
read_xl <- function(
  xl_file,
  xl_sheet,
  num_rows,
  ...
) {
  tryCatch({
    readxl::read_excel(
      xl_file,
      sheet = xl_sheet,
      n_max = if (num_rows > 0) num_rows else Inf,
      col_names = FALSE,
      col_types = "text",
      .name_repair = "minimal"
    )
  }, error = function(e) {
    cli::cli_abort("Error reading excel file: {xl_file}: {e$message}")
  })
}

Search Cells for Data Start

#' Find Data Start Position in Excel
#'
#' @param xl_file Path to Excel file
#' @param xl_sheet Sheet name
#' @param search_term Term to search for
#'
#' @return Named list with row and column of found cell
#' @export
find_data_start <- function(xl_file, xl_sheet, search_term) {
  cells <- tidyxl::xlsx_cells(xl_file, sheets = xl_sheet)
  
  match_cell <- cells |>
    dplyr::filter(stringr::str_detect(character, search_term)) |>
    dplyr::slice(1)
  
  if (nrow(match_cell) == 0) {
    cli::cli_abort("Search term '{search_term}' not found in sheet '{xl_sheet}'")
  }
  
  list(
    row = match_cell$row,
    col = match_cell$col,
    address = match_cell$address
  )
}

Convert Cell References

#' Convert Excel Cell Reference to Row/Column
#'
#' @param cell_ref Excel cell reference (e.g., "A1", "B15")
#'
#' @return Named list with row and column numbers
#' @export
parse_cell_ref <- function(cell_ref) {
  parsed <- cellranger::as.cell_addr(cell_ref)
  list(
    row = parsed$row,
    col = parsed$col
  )
}
 
#' Create Cell Range from Corners
#'
#' @param start_cell Starting cell reference
#' @param end_cell Ending cell reference
#'
#' @return Cell range string
#' @export
make_cell_range <- function(start_cell, end_cell) {
  cellranger::cell_limits(
    ul = cellranger::as.cell_addr(start_cell),
    lr = cellranger::as.cell_addr(end_cell)
  ) |>
    cellranger::as.range()
}

Usage

# read entire sheet as text
data <- read_xl("data.xlsx", "Sheet1", num_rows = 0)
 
# find where data starts
start_pos <- find_data_start("data.xlsx", "Sheet1", "Column Header")
 
# read from specific range
range_data <- readxl::read_excel(
  "data.xlsx",
  sheet = "Sheet1",
  range = cellranger::cell_limits(c(start_pos$row, 1), c(NA, NA))
)

Appendix

Note created on 2024-06-23 and last modified on 2024-12-13.

See Also


(c) No Clocks, LLC | 2024