This article contains case-studies illustrating the benefits of implementing workflows with CoRC. Example code to get started can be found in the tutorial articles on model entitiy management, task management and model building.

Initial Setup

library(tidyverse)
library(parallel)
library(CoRC)

# helper to run tasks in parallel on all cores
mapInParallel <- function(data, fun, ..., .export = character(), .prep = {}) {
  cl <- makeCluster(detectCores())
  clusterExport(cl = cl, .export)
  clusterCall(cl = cl, fun = eval, .prep, env = .GlobalEnv)
  result <- parLapplyLB(cl = cl, X = data, fun = as_mapper(fun), ..., chunk.size = 50)
  stopCluster(cl)
  result
}

3D Trajectory Plot of a Calcium Model

This example loads the Kummer2000 - Oscillations in Calcium Signalling model. The model has 3 species which oscillate. These oscialltions can be visualized as a trajectory through a 3D space. The example does this once in a deterministic and once in a stochatic fashion.

loadSBML(biomodels_url(id=329))
#> # A COPASI model reference:
#> Model name: "Kummer2000 - Oscillations in Calcium Signalling"
#> Number of compartments: 1
#> Number of species: 3
#> Number of reactions: 8

# Run 24 sec (2 Periods)
setTimeCourseSettings(duration = 24, intervals = 10000)

timeseries <- list(
  deterministic = runTimeCourse()$result,
  stochastic = runTimeCourse(method = list(method = "directMethod", use_random_seed = TRUE, random_seed = 1))$result
)

# Create the same plot for both timeseries
plots <- map(
  timeseries,
  plotly::plot_ly,
  type = "scatter3d",
  mode = "lines",
  x = ~ `G-alpha`,
  y = ~ activePLC,
  z = ~ Calcium,
  color = ~ Time
)

unloadModel()

plots$deterministic
plots$stochastic

Statistics of Repeated Stochastic Simulations

This implements an example from the Condor-COPASI paper. The example illustrates advantages of parallel processing.

# Run 1000 stochastic time series possibly in parallel
loadModel("https://raw.githubusercontent.com/copasi/condor-copasi/master/examples/stochastic_test.cps")
#> # A COPASI model reference:
#> Model name: "Kummer calcium model"
#> Number of compartments: 1
#> Number of species: 3
#> Number of reactions: 8
setTimeCourseSettings(method = list(method = "directMethod", use_random_seed = TRUE))

model_string <- saveModelToString()

# timeseries <- 1:1000 %>% map(~ runTimeCourse(method = list(random_seed = .x))$result)
timeseries <-
  # Defines parallel evaluation:
  mapInParallel(
    # export the model to the workers
    .export = "model_string",
    # prepare worker for the task
    .prep = quote({
      library(CoRC)
      loadModelFromString(model_string)
    }),
    # iteration data (1000 random seeds)
    1:1000,
    # iteration function using the seed values
    function(seed) runTimeCourse(method = list(random_seed = seed))$result
  )

# Combine all results and reshape the data
plotdata <-
  timeseries %>%
  bind_rows() %>%
  group_by(Time) %>%
  # calculate mean and sd for all time points
  summarise(across(everything(), list(mean = mean, sd = sd)), .groups = "drop") %>%
  # gather all values so the column `name` identifies "a_mean", "b_sd" etc.
  pivot_longer(-Time) %>%
  # split up information on species (a,b,c) and type of value (mean, sd)
  separate(name, c("species", "type"), "_") %>%
  pivot_wider(names_from = type, values_from = value)

print(plotdata, n = 6)
#> # A tibble: 2,403 × 4
#>    Time species  mean    sd
#>   <dbl> <chr>   <dbl> <dbl>
#> 1  0    a        8.00 0    
#> 2  0    b        8.00 0    
#> 3  0    c        8.00 0    
#> 4  0.05 a        7.06 0.249
#> 5  0.05 b        8.12 0.117
#> 6  0.05 c        5.60 0.442
#> # … with 2,397 more rows

plot <-
  ggplot(data = plotdata, aes(x = Time, y = mean, group = species, tt_sd = sd)) +
  geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = species), alpha = 1 / 4) +
  geom_line(aes(color = species)) +
  guides(fill = "none") +
  labs(
    x = paste0("Time (", getTimeUnit(), ")"),
    y = paste0("Concentration (", getQuantityUnit(), ")"),
    color = "Species"
  )

unloadModel()

plotly::ggplotly(plot, tooltip = c("group", "x", "y", "tt_sd"))

Parameter Scan

2D Scan Over the Cartesian Product of Two Species Concentration Vectors

This implements an example from the Mendes2009 paper on COPASI use cases.

loadSBML(biomodels_url(id=68))
#> # A COPASI model reference:
#> Model name: "Curien2003_MetThr_synthesis"
#> Number of compartments: 1
#> Number of species: 9
#> Number of reactions: 3
setSteadyStateSettings(calculate_jacobian = FALSE, perform_stability_analysis = FALSE)

# Cartesian product of the input values
scan <- expand_grid(
  cysteine = 0.3 * 10 ^ seq(0, 3, length.out = 6),
  adomed = seq(0, 100, length.out = 51)
)

print(scan, n = 6)
#> # A tibble: 306 × 2
#>   cysteine adomed
#>      <dbl>  <dbl>
#> 1      0.3      0
#> 2      0.3      2
#> 3      0.3      4
#> 4      0.3      6
#> 5      0.3      8
#> 6      0.3     10
#> # … with 300 more rows

scan <-
  scan %>%
  rowwise() %>%
  mutate(
    # Calculate steady state fluxes for every row
    ss_fluxes = {
      setSpecies("Cysteine", initial_concentration = cysteine)
      setSpecies("adenosyl", initial_concentration = adomed)
      ss <- runSteadyState()
      stopifnot(ss$result == "found")
      list(ss$reactions$flux)
    },
    # The second flux is CGS
    CGS = ss_fluxes[2],
    # The third flux is TS
    TS = ss_fluxes[3]
  )

plot <-
  ggplot(data = scan, aes(x = adomed, group = cysteine)) +
  geom_point(aes(y = CGS, color = "CGS")) +
  geom_point(aes(y = TS, color = "TS")) +
  geom_line(aes(y = CGS, color = "CGS")) +
  geom_line(aes(y = TS, color = "TS")) +
  labs(
    x = paste0("Adomed (", getQuantityUnit(), ")"),
    y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
    color = "Species"
  )

unloadModel()

plotly::ggplotly(plot)

2D Scan over Random Concentrations of Two Species

This implements an example from the Mendes2009 paper on COPASI use cases. It is in many ways similar to the previous example but is written to run parallelized.

loadSBML(biomodels_url(id=68))
#> # A COPASI model reference:
#> Model name: "Curien2003_MetThr_synthesis"
#> Number of compartments: 1
#> Number of species: 9
#> Number of reactions: 3
setSteadyStateSettings(calculate_jacobian = FALSE, perform_stability_analysis = FALSE)

model_string <- saveModelToString()

# 10000 repeats of steady state task with random cysteine and adomed
scan <-
  # Defines parallel evaluation:
  mapInParallel(
    # export the model to the workers
    .export = "model_string",
    # prepare worker for the task
    .prep = quote({
      library(CoRC)
      loadModelFromString(model_string)
    }),
    # iteration data (10000 random seeds)
    1:10000,
    # iteration function using the seed values
    function(seed) {
      set.seed(seed)
      cysteine <- 0.3 * 10 ^ runif(1L, min = 0, max = 3)
      adomed <- runif(1L, min = 0, max = 100)
      setSpecies(
        key = c("Cysteine", "adenosyl"),
        initial_concentration = c(cysteine, adomed)
      )
      ss <- runSteadyState()
      stopifnot(ss$result == "found")
      list(
        cysteine = cysteine,
        adomed = adomed,
        CGS = ss$reactions$flux[2],
        TS = ss$reactions$flux[3]
      )
    }
  )

# Combine all results and reshape the data
plotdata <-
  scan %>%
  bind_rows() %>%
  pivot_longer(c(CGS, TS), names_to = "reaction", values_to = "flux")

plot <-
  ggplot(data = plotdata, aes(x = adomed, y = flux, group = reaction, tt_cys = cysteine)) +
  geom_point(aes(color = reaction), alpha = 1 / 10, size = 3 / 4) +
  labs(
    x = paste0("Adomed (", getQuantityUnit(), ")"),
    y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
    color = "Species"
  )

unloadModel()

plotly::ggplotly(plot, tooltip = c("tt_cys", "x", "y"))

Parameter Estimation

This implements an example from the Mendes2009 paper on COPASI use cases.

loadSBML(biomodels_url(id=10))
#> # A COPASI model reference:
#> Model name: "Kholodenko2000 - Ultrasensitivity and negative feedback bring oscillations in MAPK cascade"
#> Number of compartments: 1
#> Number of species: 8
#> Number of reactions: 10

# get timeseries for the record
data_before <- runTimeCourse(duration = 1000, dt = 1)$result

# read experimental data
data_experimental <-
  read_tsv("data/MAPKdata.txt", show_col_types = FALSE) %>%
  rename(Time = time, `Mos-P` = "MAPKKK-P", `Erk2-P` = "MAPK-P")

# define the experiments for COPASI
fit_experiments <- defineExperiments(
  data = data_experimental,
  type = c("time", "dependent", "dependent"),
  mapping = c(NA, "{[Mos-P]}", "{[Erk2-P]}"),
  weight_method = "mean_square"
)

# find all reaction rates, which are reaction parameters named .V*
v_params <- parameter(regex("\\.V\\d+$"))
v_params
#> [1] "(MAPKKK activation).V1"             "(MAPKKK inactivation).V2"          
#> [3] "(dephosphorylation of MAPKK-PP).V5" "(dephosphorylation of MAPKK-P).V6" 
#> [5] "(dephosphorylation of MAPK-PP).V9"  "(dephosphorylation of MAPK-P).V10"

# define the parameters for COPASI, with start values and bounds
fit_parameters <- map(v_params, function(param) {
  val <- getParameters(param)$value
  defineParameterEstimationParameter(
    ref = parameter(param, "Value"),
    start_value = val,
    lower_bound = val * 0.1,
    upper_bound = val * 1.9)
})

result <-
  runParameterEstimation(
    parameters = fit_parameters,
    experiments = fit_experiments,
    method = list(
      method = "LevenbergMarquardt",
      log_verbosity = 2
    ),
    update_model = TRUE
  )

# get timeseries for the record
data_after <- runTimeCourse(duration = 1000, dt = 1)$result

plots <- list(
  `Erk2-P` =
    ggplot(mapping = aes(x = Time, y = `Erk2-P`)) +
    geom_point(data = data_experimental, aes(color = "experimental")) +
    geom_line(data = data_before, aes(color = "before")) +
    geom_line(data = data_after, aes(color = "after")) +
    scale_color_manual(values = c(before = "red", after = "green", experimental = "black")) +
    labs(
      x = paste0("Time (", getTimeUnit(), ")"),
      y = paste0("Erk2-P (", getQuantityUnit(), ")"),
      color = "Series"
    ),
  `Mos-P` =
    ggplot(mapping = aes(x = Time, y = `Mos-P`)) +
    geom_point(data = data_experimental, aes(color = "experimental")) +
    geom_line(data = data_before, aes(color = "before")) +
    geom_line(data = data_after, aes(color = "after")) +
    scale_color_manual(values = c(before = "red", after = "green", experimental = "black")) +
    labs(
      x = paste0("Time (", getTimeUnit(), ")"),
      y = paste0("Mos-P (", getQuantityUnit(), ")"),
      color = "Series"
    )
)

unloadModel()

result$fitted_values
#> # A tibble: 2 × 5
#>   fitted_value objective_value root_mean_square error_mean error_mean_std_devi…¹
#>   <chr>                  <dbl>            <dbl>      <dbl>                 <dbl>
#> 1 [Mos-P]               0.0406           0.0637   -0.0137                 0.0622
#> 2 [Erk2-P]              0.0192           0.0438    0.00386                0.0436
#> # … with abbreviated variable name ¹​error_mean_std_deviation
result$parameters
#> # A tibble: 6 × 8
#>   parameter               lower…¹ start…² value upper…³ std_d…⁴ coeff…⁵ gradient
#>   <chr>                     <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
#> 1 (MAPKKK activation).V1    0.25     2.5  2.41    4.75  0.139      5.78  0.00535
#> 2 (MAPKKK inactivation).…   0.025    0.25 0.243   0.475 0.00827    3.41  0.129  
#> 3 (dephosphorylation of …   0.075    0.75 0.729   1.42  0.104     14.3   0.0145 
#> 4 (dephosphorylation of …   0.075    0.75 1.42    1.42  0.490     34.4  -0.0300 
#> 5 (dephosphorylation of …   0.05     0.5  0.768   0.95  0.0822    10.7  -0.0164 
#> 6 (dephosphorylation of …   0.05     0.5  0.781   0.95  0.104     13.3  -0.0115 
#> # … with abbreviated variable names ¹​lower_bound, ²​start_value, ³​upper_bound,
#> #   ⁴​std_deviation, ⁵​coeff_of_variation
str_trunc(result$protocol, width = 800, side = "center")
#> [1] "2023-03-12T23:40:45: Levenberg-Marquardt algorithm started\nFor more information about this method see: http://copasi.org/Support/User_Manual/Methods/Optimization_Methods/Levenberg_-_Marquardt/\n\n2023-03-12T23:40:45: niter=0, f=0.133375, fbest=0.362073\nposition: x[0]=2.5002 x[1]=0.250512 x[2]=0.79622 x[3]=1.27724 x[4]=0.510028 x[5]=0.530554 \n\n2023-03-12T23:40:45: niter=1, f=0.101663, fbest=0.133375...\n\n2023-03-12T23:40:45: niter=63, f=0.0597327, fbest=0.0597327\nposition: x[0]=2.41151 x[1]=0.242629 x[2]=0.728742 x[3]=1.425 x[4]=0.767878 x[5]=0.780763 \n\n2023-03-12T23:40:45: Iteration 63: Lambda reached maximal value. Terminating.\n\n2023-03-12T23:40:45: Algorithm reached the edge of the parameter domain 90 times.\n\n2023-03-12T23:40:45: Algorithm finished.\nTerminated after 64 of 2000 iterations.\n\n"

plotly::ggplotly(plots$`Erk2-P`)
plotly::ggplotly(plots$`Mos-P`)