IPD Analysis of EQ-5D and UMSARS IV data
Please see code in collapsed sections above output by pressing “Show”.
We load R packages and read the EQ-5D data.
# Set working directory
setwd("c:/Users/togn/OneDrive - H. Lundbeck A S/University of Sheffield/Part III Cost-effectiveness modelling MSA/Modelling in R reduced")
# Load required packages
if (!requireNamespace("haven", quietly = TRUE)) install.packages("haven")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("eq5d", quietly = TRUE)) install.packages("eq5d")
if (!requireNamespace("knitr", quietly = TRUE)) install.packages("knitr")
if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse")
if (!requireNamespace("msm", quietly = TRUE)) install.packages("msm")
if (!requireNamespace("purrr", quietly = TRUE)) install.packages("purrr")
if (!requireNamespace("reshape2", quietly = TRUE)) install.packages("reshape2")
if (!requireNamespace("kableExtra", quietly = TRUE)) install.packages("kableExtra")
if (!requireNamespace("tibble", quietly = TRUE)) install.packages("tibble")
# Retreive packages
library(haven)
library(dplyr)
library(eq5d)
library(knitr)
library(tidyverse)
library(msm)
library(purrr)
library(reshape2)
library(kableExtra)
library(tibble)
# Read datasets with EQ-5D data
Read_EQ5D_0 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_0.sas7bdat")
Read_EQ5D_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_6.sas7bdat")
Read_EQ5D_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_12.sas7bdat")
Read_EQ5D_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_18.sas7bdat")
Read_EQ5D_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_24.sas7bdat")Sorting datasets and calculation of index scores using UK preference weights for EQ-5D-3L with R package eq5d.
# Combine files into one dataset
EQ5D_dataset <- bind_rows(Read_EQ5D_0, Read_EQ5D_6, Read_EQ5D_12, Read_EQ5D_18, Read_EQ5D_24)
# Remove column 1, 10 and 11 from the dataset (eq5d_id, eq5d6_id, eq5d12_id)
EQ5D_dataset <- select(EQ5D_dataset, -c("eq5d_id", "eq5d6_id", "eq5d12_id"))
# Add empty columns for index (score) and Mean Index Score (utility)
EQ5D_dataset <- cbind(EQ5D_dataset, Index = NA, Mean_Index_Score = NA)
# Rename columns to align with naming convention in r package
EQ5D_dataset <- rename(EQ5D_dataset, MO = mobility, SC = self_care, UA = usual_activities, PD = pain_or_discomfort, AD = anxiety_or_depression)
# Add +1 to the columns MO, SC, UA, PD, AD
EQ5D_dataset <- mutate(EQ5D_dataset, MO = MO + 1, SC = SC + 1, UA = UA + 1, PD = PD + 1, AD = AD + 1)
# Insert sequence of scores to column named Index
EQ5D_dataset$Index <- paste(EQ5D_dataset$MO, EQ5D_dataset$SC, EQ5D_dataset$UA, EQ5D_dataset$PD, EQ5D_dataset$AD, sep = "")
# Make sure only complete cases are used for MO, SC, UA, PD, AD
EQ5D_dataset <- EQ5D_dataset[complete.cases(EQ5D_dataset$MO, EQ5D_dataset$SC, EQ5D_dataset$UA, EQ5D_dataset$PD, EQ5D_dataset$AD), ]
# Calculate mean index scores from UK preference weights
EQ5D_dataset <- EQ5D_dataset %>% mutate(Mean_Index_Score = eq5d(EQ5D_dataset$Index, country = "UK", version = "3L", type = "TTO", ignore = TRUE))We calculate mean index scores for EQ-5D data and create a utility table. We also provide code for fitting a beta distribution for probabilistic sensitivity analysis.
Loading the UMSARS Part IV datasets
Read_UMSARSIV_0 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_0.sas7bdat")
Read_UMSARSIV_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_6.sas7bdat")
Read_UMSARSIV_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_12.sas7bdat")
Read_UMSARSIV_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_18.sas7bdat")
Read_UMSARSIV_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_24.sas7bdat")
## Combine files into one dataset
UMSARSIV <- bind_rows(Read_UMSARSIV_0, Read_UMSARSIV_6, Read_UMSARSIV_12, Read_UMSARSIV_18, Read_UMSARSIV_24)
## Remove all columns except for 2, 35 and 36 (UMSARS IDs)
UMSARSIV <- select(UMSARSIV, -c("umsars_id", "umsars6_id", "umsars12_id"))
## Rename global_disability_scale to UMSARS_IV
UMSARSIV <- rename(UMSARSIV, UMSARS_IV = global_disability_scale)
## Remove incomplete cases
UMSARSIV <- UMSARSIV[complete.cases(UMSARSIV$UMSARS_IV), ]
## Combine datasets
Combined_DataSet <- merge(UMSARSIV, EQ5D_dataset, by = c("pat_code", "month"))
## Reduce dataset to only include pat_code, month, UMSARS_IV and Mean_Index_Score
Combined_DataSet <- select(Combined_DataSet, pat_code, month, UMSARS_IV, Mean_Index_Score)Create table wit mean index scores and estimate alpha and beta for probabilistic sensitivity analysis.
# Create table with the dataset by UMSARS_IV with mean of Mean_Index_Score, number of observations, and standard deviation. Not displayed
Table_Combined_Dataset <- Combined_DataSet %>%
group_by(UMSARS_IV) %>%
summarise(Mean_Index_Score = mean(Mean_Index_Score), N = n()) %>%
mutate(Standard_Error = sd(Mean_Index_Score) / sqrt(N))
# Add alpha and beta for a beta distribution, and standard deviation
Table_Combined_Dataset <- Table_Combined_Dataset %>%
mutate(alpha = Mean_Index_Score * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1),
beta = (1 - Mean_Index_Score) * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1))
# Display the Table_Combined_Dataset
kable(Table_Combined_Dataset, caption = "UMSARS IV utility table with alpha and beta values")| UMSARS_IV | Mean_Index_Score | N | Standard_Error | alpha | beta |
|---|---|---|---|---|---|
| 1 | 0.6051667 | 12 | 0.0897995 | 17.32632 | 11.30434 |
| 2 | 0.5744667 | 75 | 0.0359198 | 108.26736 | 80.19851 |
| 3 | 0.4272593 | 81 | 0.0345638 | 87.09074 | 116.74507 |
| 4 | 0.1435197 | 152 | 0.0252315 | 27.56769 | 164.51524 |
| 5 | -0.1255000 | 60 | 0.0401595 | 11.11696 | -99.69834 |
We provide the same table, but for modified UMSARS IV data where items 1 + 2 are combined into one group.
## Create modified UMSARS
UMSARSIV_mod <- UMSARSIV %>% mutate(UMSARS_IV = ifelse(UMSARS_IV == 1, UMSARS_IV + 1, UMSARS_IV)) %>%
mutate(UMSARS_IV = ifelse(UMSARS_IV == 2, "1 + 2", as.character(UMSARS_IV)))
## Combine datasets
Combined_DataSet_mod <- merge(UMSARSIV_mod, EQ5D_dataset, by = c("pat_code", "month"))
## Reduce dataset to only include pat_code, month, UMSARS_IV and Mean_Index_Score
Combined_DataSet_mod <- select(Combined_DataSet_mod, pat_code, month, UMSARS_IV, Mean_Index_Score)
# Create table with the dataset by UMSARS_IV with mean of Mean_Index_Score, number of observations, and standard error
Table_Combined_Dataset_mod <- Combined_DataSet_mod %>%
group_by(UMSARS_IV) %>%
summarise(Mean_Index_Score = mean(Mean_Index_Score), N = n()) %>%
mutate(Standard_Error = sd(Mean_Index_Score) / sqrt(N))
Table_Combined_Dataset_mod <- Table_Combined_Dataset_mod %>%
mutate(alpha = Mean_Index_Score * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1),
beta = (1 - Mean_Index_Score) * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1))
# Display the Table_Combined_Dataset
kable(Table_Combined_Dataset_mod, caption = "modified UMSARS IV utility table with alpha and beta values")| UMSARS_IV | Mean_Index_Score | N | Standard_Error | alpha | beta |
|---|---|---|---|---|---|
| 1 + 2 | 0.5787011 | 87 | 0.0334286 | 125.68045 | 91.49633 |
| 3 | 0.4272593 | 81 | 0.0346446 | 86.68328 | 116.19888 |
| 4 | 0.1435197 | 152 | 0.0252904 | 27.43868 | 163.74532 |
| 5 | -0.1255000 | 60 | 0.0402534 | 11.06579 | -99.23942 |
Loading the UMSARS Part IV data and survival data to allow for calculations of transition probabilities.
### Load UMSARS IV data
UMSARSIV_0 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_0.sas7bdat")
UMSARSIV_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_6.sas7bdat")
UMSARSIV_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_12.sas7bdat")
UMSARSIV_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_18.sas7bdat")
UMSARSIV_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_24.sas7bdat")
## Load datasets with death. Note that all are alive at baseline, which is why this dataset is not loaded
Survival_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_6.sas7bdat")
Survival_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_12.sas7bdat")
Surival_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_18.sas7bdat")
Survival_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_24.sas7bdat")Combining all datasets. We start by combining the UMSARS IV data into one dataset and then combine it with the survival data.
# Load UMSARS IV datasets
UMSARSIV <- bind_rows(UMSARSIV_0, UMSARSIV_6, UMSARSIV_12, UMSARSIV_18, UMSARSIV_24) %>%
rename(`UMSARS IV` = global_disability_scale)
UMSARSIV <- UMSARSIV %>%
mutate(`UMSARS IV` = ifelse(`UMSARS IV`, as.character(`UMSARS IV`)))
# Reduce dataset to contain pat_code, month, and UMSARS IV
UMSARSIV <- UMSARSIV %>%
select(pat_code, month, `UMSARS IV`)
# Remove incomplete data
UMSARSIV <- UMSARSIV %>%
filter(!is.na(`UMSARS IV`))
# Combine files for Survival into one dataset
## Ensure date_of_data_entry columns have the same data type
Survival_6$date_of_data_entry <- as.POSIXct(Survival_6$date_of_data_entry)
Survival_12$date_of_data_entry <- as.POSIXct(Survival_12$date_of_data_entry)
Surival_18$date_of_data_entry <- as.POSIXct(Surival_18$date_of_data_entry)
Survival_24$date_of_data_entry <- as.POSIXct(Survival_24$date_of_data_entry)
## Reducing the dataset to only the relevant columns
Survival <- bind_rows(Survival_6, Survival_12, Surival_18, Survival_24) %>%
select(pat_code, month, vital_status_alive)
## Removing invalid Numbers
Survival <- Survival %>%
filter(vital_status_alive %in% c(1, 2)) %>%
rename ( Death = vital_status_alive)
# Combine the two datasets
## Sorting by variables
UMSARSIV <- UMSARSIV %>%
select(pat_code, month, `UMSARS IV`)
Survival <- Survival %>%
select(pat_code, month, Death)
# Putting them together so that death becomes the number 6 for UMSARS IV, which allows for a transition matrix to be calculated
# Combine UMSARSIV and Survival datasets by pat_code and month
Matrix_dataset <- UMSARSIV %>%
full_join(Survival, by = c("pat_code", "month")) %>%
arrange(pat_code, month)
# Assign UMSARS IV value of 6 for Death cases
Matrix_dataset <- Matrix_dataset %>%
mutate(`UMSARS IV` = ifelse(is.na(`UMSARS IV`) & Death == 2, 6, `UMSARS IV`))
# Remove rows with missing UMSARS IV values
Matrix_dataset <- Matrix_dataset %>%
filter(!is.na(`UMSARS IV`))
# Remove column Death
Matrix_dataset <- Matrix_dataset %>%
select(-Death)This section examines transition probabilities using non-parametric count method.
We use a non-parametric count method and time points are baseline to 24 months. Note that death has the value 6 in the table. Subjects are contributing with multiple measurements. We allow all transitions to assess the effect of backwards transitions, which clinically are not feasible, but may be caused by day-to-day variability in the UMSARS IV scores.
### Setting up the dataset
Trans_Matrix_dataset <- Matrix_dataset %>%
select(pat_code, month, `UMSARS IV`) %>%
arrange(pat_code, month) %>%
group_by(pat_code) %>%
mutate(month_diff = lead(month) - month,
`UMSARS IV NEXT` = lead(`UMSARS IV`)) %>%
filter(month_diff == 6) %>%
select(-month_diff) %>%
ungroup()
## Allowing backwards transitions
Trans_Matrix <- Trans_Matrix_dataset %>%
select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
table() %>%
prop.table(1) %>%
round(2)
### Print transition matrix allowing backwards transitions
kable(Trans_Matrix, caption = "Transition Matrix allowing backwards transitions. Baseline to 24 months.", row.names = TRUE)| 1 | 2 | 3 | 4 | 5 | 6 | |
|---|---|---|---|---|---|---|
| 1 | 0.25 | 0.50 | 0.25 | 0.00 | 0.00 | 0.00 |
| 2 | 0.05 | 0.46 | 0.31 | 0.16 | 0.02 | 0.00 |
| 3 | 0.01 | 0.07 | 0.41 | 0.48 | 0.00 | 0.03 |
| 4 | 0.00 | 0.01 | 0.07 | 0.61 | 0.19 | 0.13 |
| 5 | 0.00 | 0.00 | 0.00 | 0.04 | 0.74 | 0.21 |
| 6 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 |
To illustrate this point, we therefore create a matrix with highest and lowest transition probabilities for the modified UMSARS with forward transition probabilities at six-monthly intervals: 0 to 6, 6 to 12, 12 to 18, and 18 to 24 months using a non-parametric count method.This could indicate insufficient sample size, as this is a rare disease. Moreover, there is a short follow-up of 2 years, which complicates extrapolation. We need to adjust for this issue and use of multistate modelling will provide more reliable results.
# Create dataset for modified UMSARSIV
UMSARSIV_mod <- UMSARSIV %>%
mutate(`UMSARS IV` = ifelse(`UMSARS IV` %in% c(1, 2), "1 + 2", as.character(`UMSARS IV`)))
UMSARSIV_mod <- UMSARSIV_mod %>% select(pat_code, month, `UMSARS IV`)
UMSARSIV_mod <- UMSARSIV_mod %>% filter(!is.na(`UMSARS IV`))
# Combine with survival data
Matrix_dataset_mod <- UMSARSIV_mod %>%
full_join(Survival, by = c("pat_code", "month")) %>%
arrange(pat_code, month)
Matrix_dataset_mod <- Matrix_dataset_mod %>%
mutate(`UMSARS IV` = ifelse(is.na(`UMSARS IV`) & Death == 2, 6, `UMSARS IV`))
Matrix_dataset_mod <- Matrix_dataset_mod %>%
filter(!is.na(`UMSARS IV`)) %>%
select(-Death)
# Prepare transition dataset
Trans_Matrix_dataset_mod <- Matrix_dataset_mod %>%
select(pat_code, month, `UMSARS IV`) %>%
arrange(pat_code, month) %>%
group_by(pat_code) %>%
mutate(month_diff = lead(month) - month,
`UMSARS IV NEXT` = lead(`UMSARS IV`)) %>%
filter(month_diff == 6) %>%
select(-month_diff) %>%
ungroup()
# Clean for missing data
Trans_Matrix_dataset_mod <- Trans_Matrix_dataset_mod %>%
filter(!is.na(`UMSARS IV`) & !is.na(`UMSARS IV NEXT`))
# Create transition matrix allowing backwards transitions
Trans_Matrix_mod <- Trans_Matrix_dataset_mod %>%
select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
table() %>%
prop.table(1) %>%
round(2)
# Create transition matrix NOT allowing backwards transitions
Trans_Matrix_for_mod <- Trans_Matrix_dataset_mod %>%
mutate(`UMSARS IV` = ifelse(`UMSARS IV` > `UMSARS IV NEXT`, `UMSARS IV NEXT`, `UMSARS IV`))
# Forward transition matrices for different time periods
Trans_Matrix_Forward_mod_0to6 <- Trans_Matrix_for_mod %>%
filter(month >= 0 & month <= 6) %>%
filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
table() %>%
prop.table(1) %>%
round(2)
Trans_Matrix_Forward_mod_6to12 <- Trans_Matrix_for_mod %>%
filter(month >= 6 & month <= 12) %>%
filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
table() %>%
prop.table(1) %>%
round(2)
Trans_Matrix_Forward_mod_12to18 <- Trans_Matrix_for_mod %>%
filter(month >= 12 & month <= 18) %>%
filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
table() %>%
prop.table(1) %>%
round(2)
Trans_Matrix_Forward_mod_18to24 <- Trans_Matrix_for_mod %>%
filter(month >= 18 & month <= 24) %>%
filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
table() %>%
prop.table(1) %>%
round(2)
# Matrix with highest and lowest transition probabilities for the modified UMSARS with forward transition probabilities at six-monthly intervals: 0 to 6, 6 to 12, 12 to 18, and 18 to 24 months.
df0to6 <- as.data.frame(Trans_Matrix_Forward_mod_0to6)
df0to6$Period <- "0 to 6 months"
df6to12 <- as.data.frame(Trans_Matrix_Forward_mod_6to12)
df6to12$Period <- "6 to 12 months"
df12to18 <- as.data.frame(Trans_Matrix_Forward_mod_12to18)
df12to18$Period <- "12 to 18 months"
df18to24 <- as.data.frame(Trans_Matrix_Forward_mod_18to24)
df18to24$Period <- "18 to 24 months"
df_all <- rbind(df0to6, df6to12, df12to18, df18to24)
# Find the highest and lowest transition probabilities for each possible transition
df_summary <- df_all %>%
dplyr::group_by(UMSARS.IV, UMSARS.IV.NEXT) %>%
dplyr::summarise(
lowest_freq = min(Freq),
highest_freq = max(Freq),
.groups = "drop"
)
# Prepare a vector of states
states <- c("1 + 2", "3", "4", "5", "6")
# Create a 5×5 matrix of strings, with row/col names
lowest_highest_matrix <- matrix(
"",
nrow = length(states),
ncol = length(states),
dimnames = list(states, states)
)
# Fill the matrix with "lowest-highest"
for (from_state in states) {
for (to_state in states) {
# Find the row in df_summary that matches from_state/to_state
row_data <- df_summary %>%
dplyr::filter(UMSARS.IV == from_state, UMSARS.IV.NEXT == to_state)
if (nrow(row_data) == 1) {
# Format each cell as "lowest-highest"
lowest_val <- round(row_data$lowest_freq, 2)
highest_val <- round(row_data$highest_freq, 2)
lowest_highest_matrix[from_state, to_state] <- paste0(lowest_val, "-", highest_val)
} else {
# No transitions for this pair
lowest_highest_matrix[from_state, to_state] <- "NA"
}
}
}
# Print
kable(lowest_highest_matrix, caption = "Lowest and highest transition probabilities", row.names = TRUE)| 1 + 2 | 3 | 4 | 5 | 6 | |
|---|---|---|---|---|---|
| 1 + 2 | 0.55-0.8 | 0.2-0.29 | 0-0.16 | 0-0.02 | 0-0 |
| 3 | 0-0 | 0.42-0.89 | 0.11-0.54 | 0-0 | 0-0.06 |
| 4 | 0-0 | 0-0 | 0.63-0.7 | 0.2-0.26 | 0.05-0.17 |
| 5 | 0-0 | 0-0 | 0-0 | 0.55-0.88 | 0.12-0.45 |
| 6 | 0-0 | 0-0 | 0-0 | 0-0 | 1-1 |
In section we conduct multistate modelling to overcome the issues outlined in the previous section.
Therefore, a multistate model will be used to calculate transition probabilities over time. The R package msm, which will be used, use maximum-likelihood estimation for multistate Markov models. Although, the msm package was designed for continuous time models, it can be used for discrete time assuming that there is a continunous process underlying the data (Jackson 2024. msm: Multi-State Markov for Panel Data in R). I argue that this is the case for disease progression data (natural history of disease) captured at 6-monthly intervals.
It follows that the data need to be organised as a minimum by time of observation, observed state, if patient ID is not available. We have all three variables available. We will only calculate the transition probabilities for the modified UMSARS IV data with only forward transitions.
First, we start by modifying the dataset for the msm package and produce a frequency table.
# We will need to modify the Matrix_dataset_mod to make it fit for the msm package
MSM_dataset <- Matrix_dataset_mod %>%
rename(subject = pat_code, state = `UMSARS IV`) %>%
# Create a dataset with only forward transitions while keeping all subjects
group_by(subject) %>%
arrange(subject, month) %>%
mutate(state = accumulate(state, ~ max(.x, .y))) %>%
ungroup()
# Create frequency table
Freq_table <- statetable.msm(state, subject, data = MSM_dataset)
kable(Freq_table, caption = "Frequency table for the modified UMSARS IV data", row.names = TRUE)| 1 + 2 | 3 | 4 | 5 | 6 | |
|---|---|---|---|---|---|
| 1 + 2 | 38 | 21 | 11 | 2 | 1 |
| 3 | 0 | 37 | 29 | 0 | 2 |
| 4 | 0 | 0 | 96 | 23 | 17 |
| 5 | 0 | 0 | 0 | 44 | 11 |
| 6 | 0 | 0 | 0 | 0 | 2 |
# Sanity check that we are not loosing subjects
# nrow(unique(MSM_dataset$subject)) == nrow(unique(Matrix_dataset_mod$pat_code))Second, we have specified a transition intensity matrix Q, which reflects the clinically possible transitions (instantaneous transitions), which is a snapshot of the Markov process. Outside the R environment. We have specified a medically realistic Q matrix. For example, patients follow logical disease progression from UMSARS 1 + 2 (mostly or not completely independent) to UMSARS 3 (more dependent) and NOT from UMSARS 1 + 2 to UMSARS 4 (totally dependent) without passing through UMSARS 3. Also, this is a chronic progressive disease with NO disease modifiable treatments. Resultingly, patients can not improve.
However, in R, we also specify a Q matrix to illustrate the possible transitions. Zeros in the Q matrix illustrates that if time intervals are sufficiently short - we do not expect transitions through these states, but where transitions are possible we specify an initial value for transition intensity. However, for the zeros, if time intervals are long, then patients can still transition through these states. What looks like 1 + 2 to 4 in the transition matrix, is in theory 1 + 2 to 3 to 4 within the time interval t. So for the cost-effectiveness model we use 1 month as cycle length to limit this issue.
# We now specify a Q matrix in R, which is a matrix of transition intensities. Where q = 0, which also include a zero in R, but were transitions are possible we specify an initial value for transition intensity.
Q <- rbind( c(0, 0.25, 0, 0, 0.25),
c(0, 0, 0.25, 0, 0.25),
c(0, 0, 0, 0.25, 0.25),
c(0, 0, 0, 0, 0.5),
c(0, 0, 0, 0, 0))
# Specifying initial values. In order to run this function we need to rename the state variables to 1, 2, 3, 4, 5. The new state names represent 1 = UMSARS 1 + 2, 2 = UMSARS 3, 3 = UMSARS 4, 4 = UMSARS 5, 5 = UMSARS 6
MSM_dataset <- MSM_dataset %>%
mutate(state = recode(state, "1 + 2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5)) %>%
mutate(month = recode(month, "0" = 0, "6" = 0.5, "12" = 1, "18" = 1.5, "24" = 2)) %>%
rename(years = month)
# We then remove subjects with only one complete observation.
MSM_dataset <- MSM_dataset %>%
group_by(subject) %>%
mutate(n_obs = n()) %>%
ungroup() %>%
filter(n_obs > 1) %>%
select(-n_obs)
Q.crude <- crudeinits.msm(state ~ years, subject, data = MSM_dataset, qmatrix = Q)
# Running the multistate model. Note the warning for subject 275, this is because the subject is indeed moving to an absorbing state (5 = Death)
cav_msm <- msm(state ~ years, subject = subject, data = MSM_dataset, qmatrix = Q)
# Extract six-monthly transition probabilities by the extractor function. Note that although transition 1 to 3 in the Q matrix is specified as 0, the transition probability is not 0, because within the t = time it may be possible to transition from 1 to 2 to 3.
cycle_length <- 1/12
MSM_matrix <- pmatrix.msm(cav_msm, t = cycle_length)
# Define original state labels
state_labels <- c("mUMSARS 1 + 2", "3", "4", "5", "6 (Death)")
# Assign row and column names
rownames(MSM_matrix) <- state_labels
colnames(MSM_matrix) <- state_labels
# Convert the matrix to a data frame while preserving column names
df_MSM_matrix <- as.data.frame.matrix(MSM_matrix, check.names = FALSE)
# Save the matrix to a csv file
write.csv(df_MSM_matrix, "C:/Users/togn/OneDrive - H. Lundbeck A S/University of Sheffield/Part III Cost-effectiveness modelling MSA/Modelling in R/Data/transitions.csv", row.names = TRUE)
# Add 'From' column from row names
df_MSM_matrix <- rownames_to_column(df_MSM_matrix, var = "From")
# Round numeric columns for better readability. Skip step for now.
#msm_df <- msm_df %>%
# mutate(across(where(is.numeric), ~ round(.x, 4)))
# Display the table
kable(df_MSM_matrix,,
caption = "Multistate model transition matrix for the modified UMSARS IV data at cycle length: 1 month",
align = "c",
escape = TRUE) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = TRUE,
position = "center")| From | mUMSARS 1 + 2 | 3 | 4 | 5 | 6 (Death) |
|---|---|---|---|---|---|
| mUMSARS 1 + 2 | 0.9089867 | 0.0861926 | 0.0047373 | 0.0000506 | 0.0000328 |
| 3 | 0.0000000 | 0.8975501 | 0.0998177 | 0.0016039 | 0.0010283 |
| 4 | 0.0000000 | 0.0000000 | 0.9499918 | 0.0303415 | 0.0196667 |
| 5 | 0.0000000 | 0.0000000 | 0.0000000 | 0.9686710 | 0.0313290 |
| 6 (Death) | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 | 1.0000000 |
This simple plot shows the observed vs. expected state occupation, state 1 = UMSARS 1 + 2, state 2 = UMSARS 3, state 3 = UMSARS 4, state 4 = UMSARS 5, and state 5 = UMSARS 6 (Death). It is clear we only have data up to 2 years.
Lastly, we want to generate standard errors. Note that values are not generated for impossible transitions in the Q matrix.
# Standard errors for probabilistic sensitivity analysis. The process is complicated and require bootstrapping, which means a dataset is created by resampling pairs of consecutive stays a 1000 times. The last piece of code then calculates the bootstrap standard error as the empirical standard deviation of the 1000 (5 x 5 matrices) estimates, and a 95% bootstrap confidence interval.
# Bootstrapping. Note bootstrapping is computational heavy.
q.list <- boot.msm(cav_msm, stat = function (x) {qmatrix.msm(x)$estimates})
# Standard errors
q.array <- array(unlist(q.list), dim=c(5,5,1000))
SE_table <- apply(q.array, c(1,2), sd, na.rm = TRUE)
rownames(SE_table) <- state_labels
colnames(SE_table) <- state_labels
SE_table_df <- as.data.frame(SE_table)
SE_table_df <- cbind(From = rownames(SE_table_df), SE_table_df)
# Print standard errors
kable(
SE_table_df,
caption = "Standard Errors",
row.names = FALSE,
digits = 4
)| From | mUMSARS 1 + 2 | 3 | 4 | 5 | 6 (Death) |
|---|---|---|---|---|---|
| mUMSARS 1 + 2 | 0.2378 | 0.2391 | 0.0000 | 0.0000 | 0.0159 |
| 3 | 0.0000 | 0.2098 | 0.2114 | 0.0000 | 0.0307 |
| 4 | 0.0000 | 0.0000 | 0.0927 | 0.0778 | 0.0658 |
| 5 | 0.0000 | 0.0000 | 0.0000 | 0.1236 | 0.1236 |
| 6 (Death) | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 |