Interactive Data Dictionary Wizard GUI with R
Code Properties
- Language: R
- Packages:
shiny,miniUI
Overview
Description of this code snippet/script/module.
Code
library(shiny)
library(miniUI)
create_data_dictionary <- function(df, dataset_name = NULL) {
col_names <- names(df)
ui <- miniPage(
gadgetTitleBar("Data Dictionary Creator"),
miniContentPanel(
padding = 15,
scrollable = TRUE,
# Dataset-level metadata section
h3("Dataset Metadata"),
hr(),
textInput("dataset_title", "Dataset Title:", value = dataset_name %||% ""),
textInput("dataset_creator", "Creator/Author:", placeholder = "Your Name"),
textInput("dataset_source", "Source URL:", placeholder = "https://..."),
textInput("dataset_publisher", "Publisher:", placeholder = "Organization name"),
textAreaInput("dataset_description", "Dataset Description:",
placeholder = "Describe the dataset purpose and contents",
rows = 3),
textInput("dataset_subject", "Subject/Keywords:",
placeholder = "Comma-separated keywords"),
textInput("dataset_rights", "Rights/License:",
placeholder = "e.g., CC-BY-4.0, MIT"),
selectInput("metadata_standard", "Metadata Standard:",
choices = c("Dublin Core" = "dc",
"ISO 19115" = "iso",
"DataCite" = "datacite",
"None" = "none"),
selected = "dc"),
checkboxInput("generate_roxygen", "Generate roxygen2 documentation", TRUE),
hr(),
# Column-level metadata section
h3("Column Metadata"),
hr(),
lapply(seq_along(col_names), function(i) {
col_name <- col_names[i]
col_type <- class(df[[col_name]])[1]
tagList(
h5(strong(paste0("Column ", i, ": ", col_name, " (", col_type, ")"))),
textInput(
inputId = paste0("desc_", i),
label = "Description:",
placeholder = "Enter column description"
),
textInput(
inputId = paste0("mapped_", i),
label = "Mapped Name:",
value = col_name,
placeholder = "Enter mapped column name"
),
textInput(
inputId = paste0("unit_", i),
label = "Unit/Format:",
placeholder = "e.g., USD, meters, YYYY-MM-DD"
),
hr()
)
})
)
)
server <- function(input, output, session) {
observeEvent(input$done, {
# Collect column metadata
descriptions <- sapply(seq_along(col_names), function(i) {
input[[paste0("desc_", i)]] %||% ""
})
mapped_names <- sapply(seq_along(col_names), function(i) {
input[[paste0("mapped_", i)]] %||% col_names[i]
})
units <- sapply(seq_along(col_names), function(i) {
input[[paste0("unit_", i)]] %||% ""
})
# Create metadata data frame
column_metadata <- data.frame(
original_name = col_names,
mapped_name = mapped_names,
description = descriptions,
unit = units,
data_type = sapply(df, function(x) class(x)[1]),
stringsAsFactors = FALSE
)
# Collect dataset-level metadata
dataset_metadata <- list(
title = input$dataset_title %||% "",
creator = input$dataset_creator %||% "",
source = input$dataset_source %||% "",
publisher = input$dataset_publisher %||% "",
description = input$dataset_description %||% "",
subject = input$dataset_subject %||% "",
rights = input$dataset_rights %||% "",
standard = input$metadata_standard,
date_created = Sys.Date(),
n_rows = nrow(df),
n_cols = ncol(df)
)
# Generate roxygen2 documentation if requested
roxygen_doc <- NULL
if (input$generate_roxygen) {
roxygen_doc <- generate_roxygen_skeleton(
dataset_name = dataset_name %||% "dataset",
dataset_metadata = dataset_metadata,
column_metadata = column_metadata
)
}
# Generate standard metadata if requested
standard_metadata <- NULL
if (input$metadata_standard != "none") {
standard_metadata <- generate_standard_metadata(
standard = input$metadata_standard,
dataset_metadata = dataset_metadata,
column_metadata = column_metadata
)
}
result <- list(
column_metadata = column_metadata,
dataset_metadata = dataset_metadata,
roxygen_doc = roxygen_doc,
standard_metadata = standard_metadata
)
stopApp(result)
})
observeEvent(input$cancel, {
stopApp(NULL)
})
}
runGadget(ui, server, viewer = dialogViewer("Data Dictionary", width = 700, height = 900))
}
# Helper: Generate roxygen2 skeleton
generate_roxygen_skeleton <- function(dataset_name, dataset_metadata, column_metadata) {
# Format column descriptions as definition list
format_str <- paste(
"\\describe{",
paste(sprintf(" \\item{%s}{%s%s}",
column_metadata$original_name,
column_metadata$description,
ifelse(column_metadata$unit != "",
paste0(" (", column_metadata$unit, ")"), "")),
collapse = "\n"),
"}",
sep = "\n"
)
roxygen <- sprintf(
"#' %s\n#'\n#' %s\n#'\n#' @format A data frame with %d rows and %d variables:\n%s\n#'\n#' @source %s\n#'\n#' @examples\n#' data(%s)\n#' head(%s)\n\"%s\"",
dataset_metadata$title,
dataset_metadata$description,
dataset_metadata$n_rows,
dataset_metadata$n_cols,
format_str,
dataset_metadata$source,
dataset_name,
dataset_name,
dataset_name
)
return(roxygen)
}
# Helper: Generate standard metadata formats
generate_standard_metadata <- function(standard, dataset_metadata, column_metadata) {
if (standard == "dc") {
# Dublin Core format
metadata <- list(
"dc:title" = dataset_metadata$title,
"dc:creator" = dataset_metadata$creator,
"dc:source" = dataset_metadata$source,
"dc:publisher" = dataset_metadata$publisher,
"dc:description" = dataset_metadata$description,
"dc:subject" = dataset_metadata$subject,
"dc:rights" = dataset_metadata$rights,
"dc:date" = as.character(dataset_metadata$date_created),
"dc:type" = "Dataset",
"dc:format" = "application/r-rds"
)
} else if (standard == "iso") {
# ISO 19115 simplified format
metadata <- list(
identificationInfo = list(
citation = list(
title = dataset_metadata$title,
date = dataset_metadata$date_created
),
abstract = dataset_metadata$description,
purpose = dataset_metadata$subject
),
distributionInfo = list(
distributor = dataset_metadata$publisher,
transferOptions = list(
onLine = dataset_metadata$source
)
),
contentInfo = list(
attributes = column_metadata[, c("original_name", "description", "data_type", "unit")]
)
)
} else if (standard == "datacite") {
# DataCite format
metadata <- list(
identifier = list(identifierType = "URL", value = dataset_metadata$source),
creators = list(list(creatorName = dataset_metadata$creator)),
titles = list(list(title = dataset_metadata$title)),
publisher = dataset_metadata$publisher,
publicationYear = format(dataset_metadata$date_created, "%Y"),
resourceType = list(resourceTypeGeneral = "Dataset"),
descriptions = list(list(
description = dataset_metadata$description,
descriptionType = "Abstract"
)),
rightsList = list(list(rights = dataset_metadata$rights))
)
}
return(metadata)
}Usage
How to use this code:
create_data_dictionary(mtcars)this, in turn, will launch the interactive GUI to craft the data dictionary:
Dataset Metadata:

Column Metadata:

Example output:
$column_metadata
original_name mapped_name description unit data_type
mpg mpg miles_per_gallon Miles per Gallon numeric
cyl cyl cyl numeric
disp disp disp numeric
hp hp hp numeric
drat drat drat numeric
wt wt wt numeric
qsec qsec qsec numeric
vs vs vs numeric
am am am numeric
gear gear gear numeric
carb carb carb numeric
$dataset_metadata
$dataset_metadata$title
[1] "mtcars"
$dataset_metadata$creator
[1] "Jimmy Briggs <jimmy.briggs@noclocks.dev>"
$dataset_metadata$source
[1] "https://example.com"
$dataset_metadata$publisher
[1] ""
$dataset_metadata$description
[1] "Amazing Cars"
$dataset_metadata$subject
[1] ""
$dataset_metadata$rights
[1] ""
$dataset_metadata$standard
[1] "dc"
$dataset_metadata$date_created
[1] "2025-12-28"
$dataset_metadata$n_rows
[1] 32
$dataset_metadata$n_cols
[1] 11
$roxygen_doc
[1] "#' mtcars\n#'\n#' Amazing Cars\n#'\n#' @format A data frame with 32 rows and 11 variables:\n\\describe{\n \\item{mpg}{Miles per Gallon}\n \\item{cyl}{}\n \\item{disp}{}\n \\item{hp}{}\n \\item{drat}{}\n \\item{wt}{}\n \\item{qsec}{}\n \\item{vs}{}\n \\item{am}{}\n \\item{gear}{}\n \\item{carb}{}\n}\n#'\n#' @source https://example.com\n#'\n#' @examples\n#' data(dataset)\n#' head(dataset)\n\"dataset\""
$standard_metadata
$standard_metadata$`dc:title`
[1] "mtcars"
$standard_metadata$`dc:creator`
[1] "Jimmy Briggs <jimmy.briggs@noclocks.dev>"
$standard_metadata$`dc:source`
[1] "https://example.com"
$standard_metadata$`dc:publisher`
[1] ""
$standard_metadata$`dc:description`
[1] "Amazing Cars"
$standard_metadata$`dc:subject`
[1] ""
$standard_metadata$`dc:rights`
[1] ""
$standard_metadata$`dc:date`
[1] "2025-12-28"
$standard_metadata$`dc:type`
[1] "Dataset"
$standard_metadata$`dc:format`
[1] "application/r-rds"Notes
Additional notes about the code.
Appendix
Note created on 2025-12-28 and last modified on 2025-12-28.
See Also
Backlinks
(c) No Clocks, LLC | 2025