IPD Analysis of EQ-5D and UMSARS IV data

Please see code in collapsed sections above output by pressing “Show”.

1 Part 1: Preparation of EQ-5D data

1.1 Loading of data and R packages

We load R packages and read the EQ-5D data.

# Set working directory

setwd("c:/Users/togn/OneDrive - H. Lundbeck A S/University of Sheffield/Part III Cost-effectiveness modelling MSA/Modelling in R reduced")

# Load required packages
if (!requireNamespace("haven", quietly = TRUE)) install.packages("haven")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("eq5d", quietly = TRUE)) install.packages("eq5d")
if (!requireNamespace("knitr", quietly = TRUE)) install.packages("knitr")
if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse")
if (!requireNamespace("msm", quietly = TRUE)) install.packages("msm")
if (!requireNamespace("purrr", quietly = TRUE)) install.packages("purrr")
if (!requireNamespace("reshape2", quietly = TRUE)) install.packages("reshape2")
if (!requireNamespace("kableExtra", quietly = TRUE)) install.packages("kableExtra")
if (!requireNamespace("tibble", quietly = TRUE)) install.packages("tibble")

# Retreive packages

library(haven)
library(dplyr)
library(eq5d)
library(knitr)
library(tidyverse)
library(msm)
library(purrr)
library(reshape2)
library(kableExtra)
library(tibble)


# Read datasets with EQ-5D data

Read_EQ5D_0 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_0.sas7bdat")
Read_EQ5D_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_6.sas7bdat")
Read_EQ5D_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_12.sas7bdat")
Read_EQ5D_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_18.sas7bdat")
Read_EQ5D_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/eq5d_24.sas7bdat")

1.2 Combine data

Sorting datasets and calculation of index scores using UK preference weights for EQ-5D-3L with R package eq5d.

# Combine files into one dataset

EQ5D_dataset <- bind_rows(Read_EQ5D_0, Read_EQ5D_6, Read_EQ5D_12, Read_EQ5D_18, Read_EQ5D_24)


# Remove column 1, 10 and 11 from the dataset (eq5d_id, eq5d6_id, eq5d12_id)

EQ5D_dataset <- select(EQ5D_dataset, -c("eq5d_id", "eq5d6_id", "eq5d12_id"))


# Add empty columns for index (score) and Mean Index Score (utility)


EQ5D_dataset <- cbind(EQ5D_dataset, Index = NA, Mean_Index_Score = NA)

# Rename columns to align with naming convention in r package 

EQ5D_dataset <- rename(EQ5D_dataset, MO = mobility, SC = self_care, UA = usual_activities, PD = pain_or_discomfort, AD = anxiety_or_depression)


# Add +1 to the columns MO, SC, UA, PD, AD

EQ5D_dataset <- mutate(EQ5D_dataset, MO = MO + 1, SC = SC + 1, UA = UA + 1, PD = PD + 1, AD = AD + 1)


# Insert sequence of scores to column named Index

EQ5D_dataset$Index <- paste(EQ5D_dataset$MO, EQ5D_dataset$SC, EQ5D_dataset$UA, EQ5D_dataset$PD, EQ5D_dataset$AD, sep = "")


# Make sure only complete cases are used for MO, SC, UA, PD, AD

EQ5D_dataset <- EQ5D_dataset[complete.cases(EQ5D_dataset$MO, EQ5D_dataset$SC, EQ5D_dataset$UA, EQ5D_dataset$PD, EQ5D_dataset$AD), ]

# Calculate mean index scores from UK preference weights

EQ5D_dataset <- EQ5D_dataset %>% mutate(Mean_Index_Score = eq5d(EQ5D_dataset$Index, country = "UK", version = "3L", type = "TTO", ignore = TRUE))

2 Part 2: Results EQ-5D

We calculate mean index scores for EQ-5D data and create a utility table. We also provide code for fitting a beta distribution for probabilistic sensitivity analysis.

2.1 Preparation

Loading the UMSARS Part IV datasets

Read_UMSARSIV_0 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_0.sas7bdat")
Read_UMSARSIV_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_6.sas7bdat")
Read_UMSARSIV_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_12.sas7bdat")
Read_UMSARSIV_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_18.sas7bdat")
Read_UMSARSIV_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_24.sas7bdat")

## Combine files into one dataset


UMSARSIV <- bind_rows(Read_UMSARSIV_0, Read_UMSARSIV_6, Read_UMSARSIV_12, Read_UMSARSIV_18, Read_UMSARSIV_24)


## Remove all columns except for 2, 35 and 36 (UMSARS IDs)


UMSARSIV <- select(UMSARSIV, -c("umsars_id", "umsars6_id", "umsars12_id"))


## Rename global_disability_scale to UMSARS_IV


UMSARSIV <- rename(UMSARSIV, UMSARS_IV = global_disability_scale)


## Remove incomplete cases


UMSARSIV <- UMSARSIV[complete.cases(UMSARSIV$UMSARS_IV), ]  


## Combine datasets 


Combined_DataSet <- merge(UMSARSIV, EQ5D_dataset, by = c("pat_code", "month"))  


## Reduce dataset to only include pat_code, month, UMSARS_IV and Mean_Index_Score


Combined_DataSet <- select(Combined_DataSet, pat_code, month, UMSARS_IV, Mean_Index_Score)

2.2 UMSARS Part IV: Utility table

Create table wit mean index scores and estimate alpha and beta for probabilistic sensitivity analysis.

# Create table with the dataset by UMSARS_IV with mean of Mean_Index_Score, number of observations, and standard deviation. Not displayed
Table_Combined_Dataset <- Combined_DataSet %>%
group_by(UMSARS_IV) %>%
summarise(Mean_Index_Score = mean(Mean_Index_Score), N = n()) %>%
mutate(Standard_Error = sd(Mean_Index_Score) / sqrt(N))


# Add alpha and beta for a beta distribution, and standard deviation

Table_Combined_Dataset <- Table_Combined_Dataset %>%
  mutate(alpha = Mean_Index_Score * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1),
         beta = (1 - Mean_Index_Score) * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1))

# Display the Table_Combined_Dataset
kable(Table_Combined_Dataset, caption = "UMSARS IV utility table with alpha and beta values")

UMSARS IV utility table with alpha and beta values
UMSARS_IV	Mean_Index_Score	N	Standard_Error	alpha	beta
1	0.6051667	12	0.0897995	17.32632	11.30434
2	0.5744667	75	0.0359198	108.26736	80.19851
3	0.4272593	81	0.0345638	87.09074	116.74507
4	0.1435197	152	0.0252315	27.56769	164.51524
5	-0.1255000	60	0.0401595	11.11696	-99.69834

# Save as csv file

# write.csv(Table_Combined_Dataset, "C:/Users/togn/OneDrive - H. Lundbeck A S/University of Sheffield/Part III Cost-effectiveness modelling MSA/Modelling in R/utilities.csv", row.names = TRUE)

2.3 mUMSARS Part IV: Utility table

We provide the same table, but for modified UMSARS IV data where items 1 + 2 are combined into one group.

## Create modified UMSARS 


UMSARSIV_mod <- UMSARSIV %>% mutate(UMSARS_IV = ifelse(UMSARS_IV == 1, UMSARS_IV + 1, UMSARS_IV)) %>% 
  mutate(UMSARS_IV = ifelse(UMSARS_IV == 2, "1 + 2", as.character(UMSARS_IV)))


## Combine datasets 


Combined_DataSet_mod <- merge(UMSARSIV_mod, EQ5D_dataset, by = c("pat_code", "month"))  


## Reduce dataset to only include pat_code, month, UMSARS_IV and Mean_Index_Score


Combined_DataSet_mod <- select(Combined_DataSet_mod, pat_code, month, UMSARS_IV, Mean_Index_Score)

# Create table with the dataset by UMSARS_IV with mean of Mean_Index_Score, number of observations, and standard error

Table_Combined_Dataset_mod <- Combined_DataSet_mod %>%
group_by(UMSARS_IV) %>%
summarise(Mean_Index_Score = mean(Mean_Index_Score), N = n()) %>%
mutate(Standard_Error = sd(Mean_Index_Score) / sqrt(N))

Table_Combined_Dataset_mod <- Table_Combined_Dataset_mod %>%
  mutate(alpha = Mean_Index_Score * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1),
         beta = (1 - Mean_Index_Score) * (Mean_Index_Score * (1 - Mean_Index_Score) / Standard_Error^2 - 1))

# Display the Table_Combined_Dataset
kable(Table_Combined_Dataset_mod, caption = "modified UMSARS IV utility table with alpha and beta values")

modified UMSARS IV utility table with alpha and beta values
UMSARS_IV	Mean_Index_Score	N	Standard_Error	alpha	beta
1 + 2	0.5787011	87	0.0334286	125.68045	91.49633
3	0.4272593	81	0.0346446	86.68328	116.19888
4	0.1435197	152	0.0252904	27.43868	163.74532
5	-0.1255000	60	0.0402534	11.06579	-99.23942

# Save as csv file
write.csv(Table_Combined_Dataset_mod, "C:/Users/togn/OneDrive - H. Lundbeck A S/University of Sheffield/Part III Cost-effectiveness modelling MSA/Modelling in R/utilities.csv", row.names = TRUE)

3 Part 3: Preparation of UMSARS Part IV and survival data

3.1 Preparation

Loading the UMSARS Part IV data and survival data to allow for calculations of transition probabilities.

### Load UMSARS IV data

UMSARSIV_0 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_0.sas7bdat")

UMSARSIV_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_6.sas7bdat")

UMSARSIV_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_12.sas7bdat")

UMSARSIV_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_18.sas7bdat")

UMSARSIV_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/umsars_24.sas7bdat")

## Load datasets with death. Note that all are alive at baseline, which is why this dataset is not loaded

Survival_6 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_6.sas7bdat")
Survival_12 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_12.sas7bdat")
Surival_18 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_18.sas7bdat")
Survival_24 <- read_sas("B:/Data/External_Studies/European MSA registry/CrudeSASData/personal_24.sas7bdat")

3.2 Create dataset for analysis

Combining all datasets. We start by combining the UMSARS IV data into one dataset and then combine it with the survival data.

# Load UMSARS IV datasets

UMSARSIV <- bind_rows(UMSARSIV_0, UMSARSIV_6, UMSARSIV_12, UMSARSIV_18, UMSARSIV_24) %>%
  rename(`UMSARS IV` = global_disability_scale)


UMSARSIV <- UMSARSIV %>%
  mutate(`UMSARS IV` = ifelse(`UMSARS IV`, as.character(`UMSARS IV`)))

# Reduce dataset to contain pat_code, month, and UMSARS IV


UMSARSIV <- UMSARSIV %>%
      select(pat_code, month, `UMSARS IV`)


# Remove incomplete data

UMSARSIV <- UMSARSIV %>%
        filter(!is.na(`UMSARS IV`))


# Combine files for Survival into one dataset

## Ensure date_of_data_entry columns have the same data type

Survival_6$date_of_data_entry <- as.POSIXct(Survival_6$date_of_data_entry) 
Survival_12$date_of_data_entry <- as.POSIXct(Survival_12$date_of_data_entry)
Surival_18$date_of_data_entry <- as.POSIXct(Surival_18$date_of_data_entry)
Survival_24$date_of_data_entry <- as.POSIXct(Survival_24$date_of_data_entry)

## Reducing the dataset to only the relevant columns

Survival <- bind_rows(Survival_6, Survival_12, Surival_18, Survival_24) %>%
  select(pat_code, month, vital_status_alive)


## Removing invalid Numbers 


Survival <- Survival %>% 
  filter(vital_status_alive %in% c(1, 2)) %>%
  rename ( Death = vital_status_alive)


# Combine the two datasets

## Sorting by variables

UMSARSIV <- UMSARSIV %>%
  select(pat_code, month, `UMSARS IV`)

Survival <- Survival %>%
  select(pat_code, month, Death)

# Putting them together so that death becomes the number 6 for UMSARS IV, which allows for a transition matrix to be calculated

# Combine UMSARSIV and Survival datasets by pat_code and month
Matrix_dataset <- UMSARSIV %>%
  full_join(Survival, by = c("pat_code", "month")) %>%
  arrange(pat_code, month)

# Assign UMSARS IV value of 6 for Death cases
Matrix_dataset <- Matrix_dataset %>%
  mutate(`UMSARS IV` = ifelse(is.na(`UMSARS IV`) & Death == 2, 6, `UMSARS IV`))

# Remove rows with missing UMSARS IV values
Matrix_dataset <- Matrix_dataset %>%
  filter(!is.na(`UMSARS IV`))
# Remove column Death
Matrix_dataset <- Matrix_dataset %>%
  select(-Death)

4 Part 4: Non-parametric count method: UMSARS Part IV and survival

This section examines transition probabilities using non-parametric count method.

4.1 Exploration of backwards transitions

We use a non-parametric count method and time points are baseline to 24 months. Note that death has the value 6 in the table. Subjects are contributing with multiple measurements. We allow all transitions to assess the effect of backwards transitions, which clinically are not feasible, but may be caused by day-to-day variability in the UMSARS IV scores.

### Setting up the dataset

Trans_Matrix_dataset <- Matrix_dataset %>%
  select(pat_code, month, `UMSARS IV`) %>%
  arrange(pat_code, month) %>%
  group_by(pat_code) %>%
  mutate(month_diff = lead(month) - month, 
         `UMSARS IV NEXT` = lead(`UMSARS IV`)) %>%
  filter(month_diff == 6) %>%
  select(-month_diff) %>%
  ungroup()

## Allowing backwards transitions

Trans_Matrix <- Trans_Matrix_dataset %>% 
  select(`UMSARS IV`, `UMSARS IV NEXT`) %>% 
  table() %>%
  prop.table(1) %>%
  round(2) 

### Print transition matrix allowing backwards transitions

kable(Trans_Matrix, caption = "Transition Matrix allowing backwards transitions. Baseline to 24 months.", row.names = TRUE)

Transition Matrix allowing backwards transitions. Baseline to 24 months.
	1	2	3	4	5	6
1	0.25	0.50	0.25	0.00	0.00	0.00
2	0.05	0.46	0.31	0.16	0.02	0.00
3	0.01	0.07	0.41	0.48	0.00	0.03
4	0.00	0.01	0.07	0.61	0.19	0.13
5	0.00	0.00	0.00	0.04	0.74	0.21
6	0.00	0.00	0.00	0.00	0.00	1.00

4.2 Illustrating that transitions are not constant over time

To illustrate this point, we therefore create a matrix with highest and lowest transition probabilities for the modified UMSARS with forward transition probabilities at six-monthly intervals: 0 to 6, 6 to 12, 12 to 18, and 18 to 24 months using a non-parametric count method.This could indicate insufficient sample size, as this is a rare disease. Moreover, there is a short follow-up of 2 years, which complicates extrapolation. We need to adjust for this issue and use of multistate modelling will provide more reliable results.

# Create dataset for modified UMSARSIV
UMSARSIV_mod <- UMSARSIV %>%
  mutate(`UMSARS IV` = ifelse(`UMSARS IV` %in% c(1, 2), "1 + 2", as.character(`UMSARS IV`)))
UMSARSIV_mod <- UMSARSIV_mod %>% select(pat_code, month, `UMSARS IV`)
UMSARSIV_mod <- UMSARSIV_mod %>% filter(!is.na(`UMSARS IV`))

# Combine with survival data
Matrix_dataset_mod <- UMSARSIV_mod %>% 
  full_join(Survival, by = c("pat_code", "month")) %>% 
  arrange(pat_code, month)
Matrix_dataset_mod <- Matrix_dataset_mod %>% 
  mutate(`UMSARS IV` = ifelse(is.na(`UMSARS IV`) & Death == 2, 6, `UMSARS IV`))
Matrix_dataset_mod <- Matrix_dataset_mod %>%
  filter(!is.na(`UMSARS IV`)) %>%
  select(-Death)

# Prepare transition dataset
Trans_Matrix_dataset_mod <- Matrix_dataset_mod %>%
  select(pat_code, month, `UMSARS IV`) %>%
  arrange(pat_code, month) %>%
  group_by(pat_code) %>%
  mutate(month_diff = lead(month) - month, 
         `UMSARS IV NEXT` = lead(`UMSARS IV`)) %>%
  filter(month_diff == 6) %>%
  select(-month_diff) %>%
  ungroup()

# Clean for missing data
Trans_Matrix_dataset_mod <- Trans_Matrix_dataset_mod %>%
  filter(!is.na(`UMSARS IV`) & !is.na(`UMSARS IV NEXT`))

# Create transition matrix allowing backwards transitions
Trans_Matrix_mod <- Trans_Matrix_dataset_mod %>%
  select(`UMSARS IV`, `UMSARS IV NEXT`) %>% 
  table() %>%
  prop.table(1) %>%
  round(2)

# Create transition matrix NOT allowing backwards transitions
Trans_Matrix_for_mod <- Trans_Matrix_dataset_mod %>%
  mutate(`UMSARS IV` = ifelse(`UMSARS IV` > `UMSARS IV NEXT`, `UMSARS IV NEXT`, `UMSARS IV`))

# Forward transition matrices for different time periods
Trans_Matrix_Forward_mod_0to6 <- Trans_Matrix_for_mod %>%
  filter(month >= 0 & month <= 6) %>%
  filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
  select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
  table() %>%
  prop.table(1) %>%
  round(2)

Trans_Matrix_Forward_mod_6to12 <- Trans_Matrix_for_mod %>%
  filter(month >= 6 & month <= 12) %>%
  filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
  select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
  table() %>%
  prop.table(1) %>%
  round(2)

Trans_Matrix_Forward_mod_12to18 <- Trans_Matrix_for_mod %>%
  filter(month >= 12 & month <= 18) %>%
  filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
  select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
  table() %>%
  prop.table(1) %>%
  round(2)

Trans_Matrix_Forward_mod_18to24 <- Trans_Matrix_for_mod %>%
  filter(month >= 18 & month <= 24) %>%
  filter(`UMSARS IV NEXT` >= `UMSARS IV`) %>%
  select(`UMSARS IV`, `UMSARS IV NEXT`) %>%
  table() %>%
  prop.table(1) %>%
  round(2)

# Matrix with highest and lowest transition probabilities for the modified UMSARS with forward transition probabilities at six-monthly intervals: 0 to 6, 6 to 12, 12 to 18, and 18 to 24 months.

df0to6 <- as.data.frame(Trans_Matrix_Forward_mod_0to6)
df0to6$Period <- "0 to 6 months"

df6to12 <- as.data.frame(Trans_Matrix_Forward_mod_6to12)
df6to12$Period <- "6 to 12 months"

df12to18 <- as.data.frame(Trans_Matrix_Forward_mod_12to18)
df12to18$Period <- "12 to 18 months"

df18to24 <- as.data.frame(Trans_Matrix_Forward_mod_18to24)
df18to24$Period <- "18 to 24 months"

df_all <- rbind(df0to6, df6to12, df12to18, df18to24)

# Find the highest and lowest transition probabilities for each possible transition

df_summary <- df_all %>%
  dplyr::group_by(UMSARS.IV, UMSARS.IV.NEXT) %>%
  dplyr::summarise(
    lowest_freq  = min(Freq),
    highest_freq = max(Freq),
    .groups = "drop"
  )

# Prepare a vector of states
states <- c("1 + 2", "3", "4", "5", "6")

# Create a 5×5 matrix of strings, with row/col names
lowest_highest_matrix <- matrix(
  "", 
  nrow = length(states), 
  ncol = length(states),
  dimnames = list(states, states)
)

# Fill the matrix with "lowest-highest"
for (from_state in states) {
  for (to_state in states) {
    
    # Find the row in df_summary that matches from_state/to_state
    row_data <- df_summary %>%
      dplyr::filter(UMSARS.IV == from_state, UMSARS.IV.NEXT == to_state)
    
    if (nrow(row_data) == 1) {
      # Format each cell as "lowest-highest"
      lowest_val  <- round(row_data$lowest_freq, 2)
      highest_val <- round(row_data$highest_freq, 2)
      lowest_highest_matrix[from_state, to_state] <- paste0(lowest_val, "-", highest_val)
    } else {
      # No transitions for this pair
      lowest_highest_matrix[from_state, to_state] <- "NA"
    }
  }
}

# Print 

kable(lowest_highest_matrix, caption = "Lowest and highest transition probabilities", row.names = TRUE)

Lowest and highest transition probabilities
	1 + 2	3	4	5	6
1 + 2	0.55-0.8	0.2-0.29	0-0.16	0-0.02	0-0
3	0-0	0.42-0.89	0.11-0.54	0-0	0-0.06
4	0-0	0-0	0.63-0.7	0.2-0.26	0.05-0.17
5	0-0	0-0	0-0	0.55-0.88	0.12-0.45
6	0-0	0-0	0-0	0-0	1-1

5 Part 5: Multistate model approach

In section we conduct multistate modelling to overcome the issues outlined in the previous section.

5.1 Multistate modelling for mUMSARS IV with forward transitions

Therefore, a multistate model will be used to calculate transition probabilities over time. The R package msm, which will be used, use maximum-likelihood estimation for multistate Markov models. Although, the msm package was designed for continuous time models, it can be used for discrete time assuming that there is a continunous process underlying the data (Jackson 2024. msm: Multi-State Markov for Panel Data in R). I argue that this is the case for disease progression data (natural history of disease) captured at 6-monthly intervals.

It follows that the data need to be organised as a minimum by time of observation, observed state, if patient ID is not available. We have all three variables available. We will only calculate the transition probabilities for the modified UMSARS IV data with only forward transitions.

First, we start by modifying the dataset for the msm package and produce a frequency table.

# We will need to modify the Matrix_dataset_mod to make it fit for the msm package


MSM_dataset <- Matrix_dataset_mod %>%
  rename(subject = pat_code, state = `UMSARS IV`) %>%

# Create a dataset with only forward transitions while keeping all subjects

group_by(subject) %>%
  arrange(subject, month) %>%
  mutate(state = accumulate(state, ~ max(.x, .y))) %>%
  ungroup()

# Create frequency table 

Freq_table <- statetable.msm(state, subject, data = MSM_dataset)

kable(Freq_table, caption = "Frequency table for the modified UMSARS IV data", row.names = TRUE)

Frequency table for the modified UMSARS IV data
	1 + 2	3	4	5	6
1 + 2	38	21	11	2	1
3	0	37	29	0	2
4	0	0	96	23	17
5	0	0	0	44	11
6	0	0	0	0	2

# Sanity check that we are not loosing subjects
 # nrow(unique(MSM_dataset$subject)) == nrow(unique(Matrix_dataset_mod$pat_code))

Second, we have specified a transition intensity matrix Q, which reflects the clinically possible transitions (instantaneous transitions), which is a snapshot of the Markov process. Outside the R environment. We have specified a medically realistic Q matrix. For example, patients follow logical disease progression from UMSARS 1 + 2 (mostly or not completely independent) to UMSARS 3 (more dependent) and NOT from UMSARS 1 + 2 to UMSARS 4 (totally dependent) without passing through UMSARS 3. Also, this is a chronic progressive disease with NO disease modifiable treatments. Resultingly, patients can not improve.

# We have specified a matrix Q (outside R environment). 

include_graphics("Q_image.JPG")

However, in R, we also specify a Q matrix to illustrate the possible transitions. Zeros in the Q matrix illustrates that if time intervals are sufficiently short - we do not expect transitions through these states, but where transitions are possible we specify an initial value for transition intensity. However, for the zeros, if time intervals are long, then patients can still transition through these states. What looks like 1 + 2 to 4 in the transition matrix, is in theory 1 + 2 to 3 to 4 within the time interval t. So for the cost-effectiveness model we use 1 month as cycle length to limit this issue.

# We now specify a Q matrix in R, which is a matrix of transition intensities. Where q = 0, which also include a zero in R, but were transitions are possible we specify an initial value for transition intensity.

Q <- rbind( c(0, 0.25, 0, 0, 0.25),
            c(0, 0, 0.25, 0, 0.25),
            c(0, 0, 0, 0.25, 0.25),
            c(0, 0, 0, 0, 0.5),
            c(0, 0, 0, 0, 0))


# Specifying initial values. In order to run this function we need to rename the state variables to 1, 2, 3, 4, 5. The new state names represent 1 = UMSARS 1 + 2, 2 = UMSARS 3, 3 = UMSARS 4, 4 = UMSARS 5, 5 = UMSARS 6

 MSM_dataset <- MSM_dataset %>%
  mutate(state = recode(state, "1 + 2" = 1, "3" = 2, "4" = 3, "5" = 4, "6" = 5)) %>%
           mutate(month = recode(month, "0" = 0, "6" = 0.5, "12" = 1, "18" = 1.5, "24" = 2)) %>%
                    rename(years = month)
         

 # We then remove subjects with only one complete observation. 
 
 MSM_dataset <- MSM_dataset %>%
  group_by(subject) %>%
  mutate(n_obs = n()) %>%
  ungroup() %>%
  filter(n_obs > 1) %>%
  select(-n_obs)
 
Q.crude <- crudeinits.msm(state ~ years, subject, data = MSM_dataset, qmatrix = Q)

# Running the multistate model. Note the warning for subject 275, this is because the subject is indeed moving to an absorbing state (5 = Death) 

cav_msm <- msm(state ~ years, subject = subject, data = MSM_dataset, qmatrix = Q)

# Extract six-monthly transition probabilities by the extractor function. Note that although transition 1 to 3 in the Q matrix is specified as 0, the transition probability is not 0, because within the t = time it may be possible to transition from 1 to 2 to 3.

cycle_length <- 1/12

MSM_matrix <- pmatrix.msm(cav_msm, t = cycle_length)

# Define original state labels
state_labels <- c("mUMSARS 1 + 2", "3", "4", "5", "6 (Death)")

# Assign row and column names
rownames(MSM_matrix) <- state_labels
colnames(MSM_matrix) <- state_labels


# Convert the matrix to a data frame while preserving column names
df_MSM_matrix <- as.data.frame.matrix(MSM_matrix, check.names = FALSE)

# Save the matrix to a csv file

write.csv(df_MSM_matrix, "C:/Users/togn/OneDrive - H. Lundbeck A S/University of Sheffield/Part III Cost-effectiveness modelling MSA/Modelling in R/Data/transitions.csv", row.names = TRUE)

# Add 'From' column from row names
df_MSM_matrix <- rownames_to_column(df_MSM_matrix, var = "From")

# Round numeric columns for better readability. Skip step for now. 
#msm_df <- msm_df %>%
 # mutate(across(where(is.numeric), ~ round(.x, 4)))


# Display the table
kable(df_MSM_matrix,, 
      caption = "Multistate model transition matrix for the modified UMSARS IV data at cycle length: 1 month",
      align = "c",
      escape = TRUE) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), 
                full_width = TRUE,
                position = "center")

Multistate model transition matrix for the modified UMSARS IV data at cycle length: 1 month
From	mUMSARS 1 + 2	3	4	5	6 (Death)
mUMSARS 1 + 2	0.9089867	0.0861926	0.0047373	0.0000506	0.0000328
3	0.0000000	0.8975501	0.0998177	0.0016039	0.0010283
4	0.0000000	0.0000000	0.9499918	0.0303415	0.0196667
5	0.0000000	0.0000000	0.0000000	0.9686710	0.0313290
6 (Death)	0.0000000	0.0000000	0.0000000	0.0000000	1.0000000

This simple plot shows the observed vs. expected state occupation, state 1 = UMSARS 1 + 2, state 2 = UMSARS 3, state 3 = UMSARS 4, state 4 = UMSARS 5, and state 5 = UMSARS 6 (Death). It is clear we only have data up to 2 years.

# Observed vs. expected state occupation

plot.prevalence.msm(cav_msm, mintime = 0, maxtime = 10)

Lastly, we want to generate standard errors. Note that values are not generated for impossible transitions in the Q matrix.

# Standard errors for probabilistic sensitivity analysis. The process is complicated and require bootstrapping, which means a dataset is created by resampling pairs of consecutive stays a 1000 times. The last piece of code then calculates the bootstrap standard error as the empirical standard deviation of the 1000 (5 x 5 matrices) estimates, and a 95% bootstrap confidence interval.

# Bootstrapping. Note bootstrapping is computational heavy.  

q.list <- boot.msm(cav_msm, stat = function (x) {qmatrix.msm(x)$estimates})

# Standard errors

q.array <- array(unlist(q.list), dim=c(5,5,1000))
SE_table <- apply(q.array, c(1,2), sd, na.rm = TRUE)

rownames(SE_table) <- state_labels
colnames(SE_table) <- state_labels

SE_table_df <- as.data.frame(SE_table)
SE_table_df <- cbind(From = rownames(SE_table_df), SE_table_df)

# Print standard errors

kable(
  SE_table_df,
  caption = "Standard Errors",
  row.names = FALSE,
  digits = 4
)

Standard Errors
From	mUMSARS 1 + 2	3	4	5	6 (Death)
mUMSARS 1 + 2	0.2378	0.2391	0.0000	0.0000	0.0159
3	0.0000	0.2098	0.2114	0.0000	0.0307
4	0.0000	0.0000	0.0927	0.0778	0.0658
5	0.0000	0.0000	0.0000	0.1236	0.1236
6 (Death)	0.0000	0.0000	0.0000	0.0000	0.0000

DATA ANALYSIS FOR MARKOV MODEL

Tobias

2025-08-13