This article contains case-studies illustrating the benefits of implementing workflows with CoRC. Example code to get started can be found in the tutorial articles on model entitiy management, task management and model building.
library(tidyverse)
library(parallel)
library(CoRC)
# helper to run tasks in parallel on all cores
mapInParallel <- function(data, fun, ..., .export = character(), .prep = {}) {
cl <- makeCluster(detectCores())
clusterExport(cl = cl, .export)
clusterCall(cl = cl, fun = eval, .prep, env = .GlobalEnv)
result <- parLapplyLB(cl = cl, X = data, fun = as_mapper(fun), ..., chunk.size = 50)
stopCluster(cl)
result
}
This example loads the Kummer2000 - Oscillations in Calcium Signalling model. The model has 3 species which oscillate. These oscialltions can be visualized as a trajectory through a 3D space. The example does this once in a deterministic and once in a stochatic fashion.
loadSBML(biomodels_url(id=329))
#> # A COPASI model reference:
#> Model name: "Kummer2000 - Oscillations in Calcium Signalling"
#> Number of compartments: 1
#> Number of species: 3
#> Number of reactions: 8
# Run 24 sec (2 Periods)
setTimeCourseSettings(duration = 24, intervals = 10000)
timeseries <- list(
deterministic = runTimeCourse()$result,
stochastic = runTimeCourse(method = list(method = "directMethod", use_random_seed = TRUE, random_seed = 1))$result
)
# Create the same plot for both timeseries
plots <- map(
timeseries,
plotly::plot_ly,
type = "scatter3d",
mode = "lines",
x = ~ `G-alpha`,
y = ~ activePLC,
z = ~ Calcium,
color = ~ Time
)
unloadModel()
plots$deterministic
This implements an example from the Condor-COPASI paper. The example illustrates advantages of parallel processing.
# Run 1000 stochastic time series possibly in parallel
loadModel("https://raw.githubusercontent.com/copasi/condor-copasi/master/examples/stochastic_test.cps")
#> # A COPASI model reference:
#> Model name: "Kummer calcium model"
#> Number of compartments: 1
#> Number of species: 3
#> Number of reactions: 8
setTimeCourseSettings(method = list(method = "directMethod", use_random_seed = TRUE))
model_string <- saveModelToString()
# timeseries <- 1:1000 %>% map(~ runTimeCourse(method = list(random_seed = .x))$result)
timeseries <-
# Defines parallel evaluation:
mapInParallel(
# export the model to the workers
.export = "model_string",
# prepare worker for the task
.prep = quote({
library(CoRC)
loadModelFromString(model_string)
}),
# iteration data (1000 random seeds)
1:1000,
# iteration function using the seed values
function(seed) runTimeCourse(method = list(random_seed = seed))$result
)
# Combine all results and reshape the data
plotdata <-
timeseries %>%
bind_rows() %>%
group_by(Time) %>%
# calculate mean and sd for all time points
summarise(across(everything(), list(mean = mean, sd = sd)), .groups = "drop") %>%
# gather all values so the column `name` identifies "a_mean", "b_sd" etc.
pivot_longer(-Time) %>%
# split up information on species (a,b,c) and type of value (mean, sd)
separate(name, c("species", "type"), "_") %>%
pivot_wider(names_from = type, values_from = value)
print(plotdata, n = 6)
#> # A tibble: 2,403 × 4
#> Time species mean sd
#> <dbl> <chr> <dbl> <dbl>
#> 1 0 a 8.00 0
#> 2 0 b 8.00 0
#> 3 0 c 8.00 0
#> 4 0.05 a 7.06 0.249
#> 5 0.05 b 8.12 0.117
#> 6 0.05 c 5.60 0.442
#> # … with 2,397 more rows
plot <-
ggplot(data = plotdata, aes(x = Time, y = mean, group = species, tt_sd = sd)) +
geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = species), alpha = 1 / 4) +
geom_line(aes(color = species)) +
guides(fill = "none") +
labs(
x = paste0("Time (", getTimeUnit(), ")"),
y = paste0("Concentration (", getQuantityUnit(), ")"),
color = "Species"
)
unloadModel()
plotly::ggplotly(plot, tooltip = c("group", "x", "y", "tt_sd"))
This implements an example from the Mendes2009 paper on COPASI use cases.
loadSBML(biomodels_url(id=68))
#> # A COPASI model reference:
#> Model name: "Curien2003_MetThr_synthesis"
#> Number of compartments: 1
#> Number of species: 9
#> Number of reactions: 3
setSteadyStateSettings(calculate_jacobian = FALSE, perform_stability_analysis = FALSE)
# Cartesian product of the input values
scan <- expand_grid(
cysteine = 0.3 * 10 ^ seq(0, 3, length.out = 6),
adomed = seq(0, 100, length.out = 51)
)
print(scan, n = 6)
#> # A tibble: 306 × 2
#> cysteine adomed
#> <dbl> <dbl>
#> 1 0.3 0
#> 2 0.3 2
#> 3 0.3 4
#> 4 0.3 6
#> 5 0.3 8
#> 6 0.3 10
#> # … with 300 more rows
scan <-
scan %>%
rowwise() %>%
mutate(
# Calculate steady state fluxes for every row
ss_fluxes = {
setSpecies("Cysteine", initial_concentration = cysteine)
setSpecies("adenosyl", initial_concentration = adomed)
ss <- runSteadyState()
stopifnot(ss$result == "found")
list(ss$reactions$flux)
},
# The second flux is CGS
CGS = ss_fluxes[2],
# The third flux is TS
TS = ss_fluxes[3]
)
plot <-
ggplot(data = scan, aes(x = adomed, group = cysteine)) +
geom_point(aes(y = CGS, color = "CGS")) +
geom_point(aes(y = TS, color = "TS")) +
geom_line(aes(y = CGS, color = "CGS")) +
geom_line(aes(y = TS, color = "TS")) +
labs(
x = paste0("Adomed (", getQuantityUnit(), ")"),
y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
color = "Species"
)
unloadModel()
plotly::ggplotly(plot)
This implements an example from the Mendes2009 paper on COPASI use cases. It is in many ways similar to the previous example but is written to run parallelized.
loadSBML(biomodels_url(id=68))
#> # A COPASI model reference:
#> Model name: "Curien2003_MetThr_synthesis"
#> Number of compartments: 1
#> Number of species: 9
#> Number of reactions: 3
setSteadyStateSettings(calculate_jacobian = FALSE, perform_stability_analysis = FALSE)
model_string <- saveModelToString()
# 10000 repeats of steady state task with random cysteine and adomed
scan <-
# Defines parallel evaluation:
mapInParallel(
# export the model to the workers
.export = "model_string",
# prepare worker for the task
.prep = quote({
library(CoRC)
loadModelFromString(model_string)
}),
# iteration data (10000 random seeds)
1:10000,
# iteration function using the seed values
function(seed) {
set.seed(seed)
cysteine <- 0.3 * 10 ^ runif(1L, min = 0, max = 3)
adomed <- runif(1L, min = 0, max = 100)
setSpecies(
key = c("Cysteine", "adenosyl"),
initial_concentration = c(cysteine, adomed)
)
ss <- runSteadyState()
stopifnot(ss$result == "found")
list(
cysteine = cysteine,
adomed = adomed,
CGS = ss$reactions$flux[2],
TS = ss$reactions$flux[3]
)
}
)
# Combine all results and reshape the data
plotdata <-
scan %>%
bind_rows() %>%
pivot_longer(c(CGS, TS), names_to = "reaction", values_to = "flux")
plot <-
ggplot(data = plotdata, aes(x = adomed, y = flux, group = reaction, tt_cys = cysteine)) +
geom_point(aes(color = reaction), alpha = 1 / 10, size = 3 / 4) +
labs(
x = paste0("Adomed (", getQuantityUnit(), ")"),
y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
color = "Species"
)
unloadModel()
plotly::ggplotly(plot, tooltip = c("tt_cys", "x", "y"))
This implements an example from the Mendes2009 paper on COPASI use cases.
loadSBML(biomodels_url(id=10))
#> # A COPASI model reference:
#> Model name: "Kholodenko2000 - Ultrasensitivity and negative feedback bring oscillations in MAPK cascade"
#> Number of compartments: 1
#> Number of species: 8
#> Number of reactions: 10
# get timeseries for the record
data_before <- runTimeCourse(duration = 1000, dt = 1)$result
# read experimental data
data_experimental <-
read_tsv("data/MAPKdata.txt", show_col_types = FALSE) %>%
rename(Time = time, `Mos-P` = "MAPKKK-P", `Erk2-P` = "MAPK-P")
# define the experiments for COPASI
fit_experiments <- defineExperiments(
data = data_experimental,
type = c("time", "dependent", "dependent"),
mapping = c(NA, "{[Mos-P]}", "{[Erk2-P]}"),
weight_method = "mean_square"
)
# find all reaction rates, which are reaction parameters named .V*
v_params <- parameter(regex("\\.V\\d+$"))
v_params
#> [1] "(MAPKKK activation).V1" "(MAPKKK inactivation).V2"
#> [3] "(dephosphorylation of MAPKK-PP).V5" "(dephosphorylation of MAPKK-P).V6"
#> [5] "(dephosphorylation of MAPK-PP).V9" "(dephosphorylation of MAPK-P).V10"
# define the parameters for COPASI, with start values and bounds
fit_parameters <- map(v_params, function(param) {
val <- getParameters(param)$value
defineParameterEstimationParameter(
ref = parameter(param, "Value"),
start_value = val,
lower_bound = val * 0.1,
upper_bound = val * 1.9)
})
result <-
runParameterEstimation(
parameters = fit_parameters,
experiments = fit_experiments,
method = list(
method = "LevenbergMarquardt",
log_verbosity = 2
),
update_model = TRUE
)
# get timeseries for the record
data_after <- runTimeCourse(duration = 1000, dt = 1)$result
plots <- list(
`Erk2-P` =
ggplot(mapping = aes(x = Time, y = `Erk2-P`)) +
geom_point(data = data_experimental, aes(color = "experimental")) +
geom_line(data = data_before, aes(color = "before")) +
geom_line(data = data_after, aes(color = "after")) +
scale_color_manual(values = c(before = "red", after = "green", experimental = "black")) +
labs(
x = paste0("Time (", getTimeUnit(), ")"),
y = paste0("Erk2-P (", getQuantityUnit(), ")"),
color = "Series"
),
`Mos-P` =
ggplot(mapping = aes(x = Time, y = `Mos-P`)) +
geom_point(data = data_experimental, aes(color = "experimental")) +
geom_line(data = data_before, aes(color = "before")) +
geom_line(data = data_after, aes(color = "after")) +
scale_color_manual(values = c(before = "red", after = "green", experimental = "black")) +
labs(
x = paste0("Time (", getTimeUnit(), ")"),
y = paste0("Mos-P (", getQuantityUnit(), ")"),
color = "Series"
)
)
unloadModel()
result$fitted_values
#> # A tibble: 2 × 5
#> fitted_value objective_value root_mean_square error_mean error_mean_std_devi…¹
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 [Mos-P] 0.0406 0.0637 -0.0137 0.0622
#> 2 [Erk2-P] 0.0192 0.0438 0.00386 0.0436
#> # … with abbreviated variable name ¹error_mean_std_deviation
result$parameters
#> # A tibble: 6 × 8
#> parameter lower…¹ start…² value upper…³ std_d…⁴ coeff…⁵ gradient
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 (MAPKKK activation).V1 0.25 2.5 2.41 4.75 0.139 5.78 0.00535
#> 2 (MAPKKK inactivation).… 0.025 0.25 0.243 0.475 0.00827 3.41 0.129
#> 3 (dephosphorylation of … 0.075 0.75 0.729 1.42 0.104 14.3 0.0145
#> 4 (dephosphorylation of … 0.075 0.75 1.42 1.42 0.490 34.4 -0.0300
#> 5 (dephosphorylation of … 0.05 0.5 0.768 0.95 0.0822 10.7 -0.0164
#> 6 (dephosphorylation of … 0.05 0.5 0.781 0.95 0.104 13.3 -0.0115
#> # … with abbreviated variable names ¹lower_bound, ²start_value, ³upper_bound,
#> # ⁴std_deviation, ⁵coeff_of_variation
str_trunc(result$protocol, width = 800, side = "center")
#> [1] "2023-03-12T23:40:45: Levenberg-Marquardt algorithm started\nFor more information about this method see: http://copasi.org/Support/User_Manual/Methods/Optimization_Methods/Levenberg_-_Marquardt/\n\n2023-03-12T23:40:45: niter=0, f=0.133375, fbest=0.362073\nposition: x[0]=2.5002 x[1]=0.250512 x[2]=0.79622 x[3]=1.27724 x[4]=0.510028 x[5]=0.530554 \n\n2023-03-12T23:40:45: niter=1, f=0.101663, fbest=0.133375...\n\n2023-03-12T23:40:45: niter=63, f=0.0597327, fbest=0.0597327\nposition: x[0]=2.41151 x[1]=0.242629 x[2]=0.728742 x[3]=1.425 x[4]=0.767878 x[5]=0.780763 \n\n2023-03-12T23:40:45: Iteration 63: Lambda reached maximal value. Terminating.\n\n2023-03-12T23:40:45: Algorithm reached the edge of the parameter domain 90 times.\n\n2023-03-12T23:40:45: Algorithm finished.\nTerminated after 64 of 2000 iterations.\n\n"
plotly::ggplotly(plots$`Erk2-P`)