rm(list = ls()) #remove past stored objects
options(scipen = 999) #turn off scientific notation
#install.packages(c("arrow", "dplyr", "ggplot2", "magick", "paws", "tidyr"))
library(arrow)
library(dplyr)
library(tidyr)
library(ggplot2)
library(magick)
#library(paws) #Only needed if using S3 rather than https to retrieve image
The following is a short tutorial showing how MERMAID images and their associated annotations can be accessed from the S3 bucket using R code, with a final step of visualizing them together.
Setting up the environment
Accessing MERMAID annotations
To access and work with MERMAID open data (including images and annotations) you will need to open the mermaid_confirmed_annotations.parquet
file with a library such as arrow
. The following creates an R dataframe from the parquet file.
#annotations_path_s3 = "s3://coral-reef-training/mermaid/mermaid_confirmed_annotations.parquet" # Location of the annotations file (S3)
= "https://coral-reef-training.s3.us-east-1.amazonaws.com/mermaid/mermaid_confirmed_annotations.parquet" # Location of the annotations file (https)
annotations_path_https
# Read the full annotations table (as S3 or https)
# Each row corresponds to one annotated point for an image (25 per image)
#df_annotations_s3 <- arrow::read_parquet(annotations_path_s3)
<- arrow::read_parquet(annotations_path_https)
df_annotations_https
# A per-image table (drop duplicate image rows)
<- df_annotations_https %>%
df_images select(image_id, region_id, region_name) %>%
distinct()
::glue("Loaded {nrow(df_annotations_https)} annotations across {nrow(df_images)} images from {length(unique(df_images$region_id))} unique geographic realms.") glue
Loaded 50000 annotations across 2000 images from 2 unique geographic realms.
Fetching an image (function)
The following function will load an image from S3 (equivalent of get_image_s3 in Python) as a magick
image object, using the paws.storage
function.
<- function(image_id,
get_image bucket = "coral-reef-training",
region = "us-east-1",
thumbnail = FALSE,
use_s3_fallback = TRUE) {
# 1) Try public HTTPS (works if the object is public)
<- if (thumbnail) sprintf("mermaid/%s_thumbnail.png", image_id)
key else sprintf("mermaid/%s.png", image_id)
<- sprintf("https://%s.s3.%s.amazonaws.com/%s", bucket, region, key)
https_url
<- tryCatch(
img ::image_read(https_url),
magickerror = function(e) NULL
)if (!is.null(img)) return(img)
# 2) Optional fallback to paws.storage (needs AWS creds)
if (use_s3_fallback) {
if (!requireNamespace("paws.storage", quietly = TRUE)) {
stop("Public HTTPS failed and {paws.storage} is not installed for S3 fallback.")
}<- paws.storage::s3()
s3 <- tryCatch(s3$get_object(Bucket = bucket, Key = key), error = function(e) NULL)
obj if (is.null(obj)) {
stop("Could not fetch image via HTTPS or S3. If the object isn’t public, configure AWS credentials.")
}return(magick::image_read(obj$Body))
else {
} stop("Could not fetch image via HTTPS. If the object isn’t public, enable S3 fallback and configure AWS credentials.")
} }
Get an image and its annotations
Once you have the annotations parquet file, you can extract an image using the above function and the associated annotations for that image from the dataframe as follows:
# Choose an index (integer) between 1 and nrow(df_images).
<- 1
idx stopifnot(idx >= 1, idx <= nrow(df_images))
#If you already have an image id you can apply it instead of the following line:
<- df_images$image_id[idx]
image_id_iter <- get_image(image_id_iter, thumbnail = FALSE)
img
<- df_annotations_https %>%
annotations filter(image_id == image_id_iter)
Plot the image with annotations
After getting an example image with its associated annotations you can visualize those as follows:
# --- tidy annotation fields & palette ---
<- annotations %>%
annotations mutate(growth_form = ifelse(test = is.na(growth_form_name),
yes = "None",
no = growth_form_name))
# --- PLOT ---
<- magick::image_ggplot(img) +
p geom_point(data = annotations,
aes(x = col, y = row,
color = benthic_attribute_name,
shape = growth_form),
size = 4) +
scale_color_discrete(name = "Benthic attribute") +
scale_shape_discrete(name = "Growth form") +
theme_void()
p