--- title: "APC_Analyst&Analyst" output: html_document date: "2023-08-27" --- #1.R包的安装与加载 ```{r} library(data.table) library(scales) library(readxl) library(tidyverse) ``` #2.数据加载(疾病数据、人口学数据、order文件) ```{R} ##2.1 疾病数据(多个文件rbind) ###方法1:个性化方法(读取每个文件,然后rbind) #data_1 <- read.csv("./data/CHD_1.csv") #data_2 <- read.csv("./data/CHD_2.csv") #data_all <- rbind(data_1,data_2) ###方法2:普适性方法(读取整个文件夹,利用循环) dirname_1 <- dir("./data/疾病数据/") # dir是directory(目录)的缩写 file_1 <- paste0("./data/疾病数据/",dirname_1) # data_all <- as.data.frame(matrix(nrow=0,ncol=16)) for (a in file_1) { data <- fread(a) #fread函数用于快速读取大型文件(相较于read.csv更快),fread全称是fast read data_all <- rbind(data_all,data) } ##2.2 人口学数据 dirname_2 <- dir('./data/GBD_Population/') #注意文件路径 file_2 <- paste0('./data/GBD_Population/',dirname_2)#注意文件路径 ##2.3 order文件 order <- read_xlsx('./data/orders.xlsx', sheet = 3) ``` #3.数据清洗 ##3.1 数据清洗_疾病数据(个性化) ```{R} ###根据非常“6+1”,针对空间、人间的性别、人间的年龄、measure、metric,注意:时间不用限定 data_select <- data_all %>% filter(location_name == "Global", #个性化 sex_name %in% c("Both","Male","Female"), age_name %in% c('<5 years','5-9 years','10-14 years','15-19 years','20-24 years','25-29 years', '30-34 years', '35-39 years', '40-44 years','45-49 years', '50-54 years', '55-59 years', '60-64 years', '65-69 years'), #个性化 measure_name == "Deaths", #个性化 metric_name == "Number") ## 修改年龄名称,用于与polulation数据对应:普适 data_select$age_name <- str_split(data_select$age_name, ' ', simplify = T)[,1] data_select$age_name <- str_replace(data_select$age_name, '-',' to ') data_select$age_name[which(data_select$age_name == '<5')] <- 'Under 5' ``` ##3.2 数据清洗_人口学数据(基本普适) ```{R} var_name <- c("location_id","location_name","sex_name","year_id","age_group_id","age_group_name","val") population_all <- as.data.frame(matrix(nrow=0,ncol=length(var_name))) names(population_all)=var_name for (a in file_2) { data <- fread(a) %>% dplyr::select(var_name) population_all <- rbind(population_all,data) } ``` ##3.3 数据清洗_order文件(个性化) ```{R} order_data_all <- order %>% filter(subregion %in% c('Global')) #个性化 region <- order_data_all$Country location_ID <- order_data_all$location_id ``` #4.基于我们提供的函数完成APC建模(基本普适) ```{R} ##4.1 加载我们写好的函数 source('APC_function.R') ## 按照间隔5年求每年均值,针对国家和全球 Both <- made_country_data(data_select,sex_name = c('Male', 'Female', 'Both'), output = T, output_path = './output/自动/', measure = 'Deaths', order_list = 'Global', population_all = population_all, location_ID = 1, start_year = 1990, end_year=2019, current_year=2019) ``` #5.可视化 ##5.1 Local drift ```{R} ###5.1.1 按照间隔5年求均值 Both_apc <- as.data.frame(matrix(ncol = 7)) for (sex in names(Both)){ data <- Both[[sex]] apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'LocalDrifts') apc <- as.data.frame(apc) apc$region <- 'Global' #个性化 apc$sex <- sex apc$measure <- 'Deaths' #个性化 colnames(Both_apc) <- colnames(apc) Both_apc <- rbind(Both_apc, apc) } ###5.1.2 画图数据整理(普适) merged_Both_apc <- as.data.frame(matrix(ncol = 7)) colnames(merged_Both_apc) <- colnames(apc) merged_Both_apc <- rbind(merged_Both_apc, Both_apc) merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Age),] merged_Both_apc_selected <- merged_Both_apc merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex) ###5.1.3 可视化(基本普适) plot_localdrift<-ggplot(merged_Both_apc_selected,aes(x = Age,y = `Mean Percent Change per Calendar Year`,color=sex,group=sex))+ geom_point(size=0.8)+ geom_line(cex=0.5)+ geom_hline(yintercept = 0 ,lty=2,lwd=0.5,col="Black")+ geom_vline(xintercept = 20 ,lty=2,lwd=0.5,col="Black")+ geom_ribbon(aes(ymin =CILo, ymax =CIHi,fill=sex),alpha=0.2, linetype = 0) + #带有面积填充的区间图 scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_x_continuous(breaks = seq(0, 70, by = 10),limits = c(0,70))+ scale_y_continuous(breaks = seq(-4, 1, by = 1),limits = c(-4, 1))+ ylab('Annual change of mortality (% per year)')+ xlab('Age at death (years)')+ theme_bw()+ #黑边白底 theme(#panel.background = element_rect(fill = "transparent"), #面板中的矩形元素 #panel.grid.major = element_blank(),#面板中的粗网格 #panel.grid.minor = element_blank(),#面板中的细网格 panel.border = element_rect(size=1,fill = 'transparent'),#面板的边框 axis.line = element_line(color = "black",linewidth = ), axis.ticks.x = element_line(color = "black",size = 0.5), axis.ticks.y = element_line(color = "black",size = 0.5), axis.text = element_text(color='black',size = 6), axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5), axis.title = element_text(color='black',size = 8), legend.position = 'right', legend.key.size = unit(0.5, "cm"), legend.title = element_blank(), legend.text = element_text(color='black',size = 8), plot.title = element_text(hjust = 0.5,size = 8)) #ggsave(plot_localdrift, file ='./output/plot_localdrift.pdf',units = 'cm', height = 6, width = 10) ``` ##5.2 Age effect ```{R} ###5.2.1 按照间隔5年求均值 Both_apc <- as.data.frame(matrix(ncol = 7)) for (sex in names(Both)){ data <- Both[[sex]] apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'LongAge') apc <- as.data.frame(apc) apc$region <- 'Global' apc$sex <- sex apc$measure <- 'Deaths' colnames(Both_apc) <- colnames(apc) Both_apc <- rbind(Both_apc, apc) } ###5.2.2 画图数据整理(普适) merged_Both_apc <- as.data.frame(matrix(ncol = 7)) colnames(merged_Both_apc) <- colnames(apc) merged_Both_apc <- rbind(merged_Both_apc, Both_apc) merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Age),] merged_Both_apc_selected <- merged_Both_apc merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex) ###5.2.3 可视化(基本普适) plot_age_effect<-ggplot(merged_Both_apc_selected,aes(x = Age,y = Rate, color=sex,group=sex))+ geom_ribbon(aes(ymin =CILo, ymax =CIHi,fill=sex),alpha=0.2, linetype = 0) + geom_line(cex=0.3)+ geom_point(size=0.5)+ scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_x_continuous(breaks = seq(0, 70, by = 10),limits = c(0,70))+ scale_y_continuous( trans='log10',breaks = trans_breaks("log10", function(x) 10^x), label = trans_format("log10", math_format(10^.x)), limits = c(NA, 100) )+ ylab('Mortality rate (per 100,000 person-years)')+ xlab('Age')+ annotation_logticks(sides = 'l')+ theme_minimal(10)+ theme(legend.position = 'right', panel.border = element_rect(size=1,fill = 'transparent'), axis.text = element_text(color='black',size = 6), axis.title = element_text(color='black',size = 8), axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5), axis.ticks.x = element_line(color = "black",size = 0.5), axis.ticks.y = element_line(color = "black",size = 0.5), legend.key.size = unit(0.3, "cm"), legend.title = element_blank(), legend.text = element_text(color='black',size = 8), plot.title = element_text(hjust = 0.5,size = 8)) #ggsave(plot_age_effect, file ='./output/plot_age_effect.pdf',units = 'cm', height = 6, width = 10) ``` ##5.3 Period effect ```{R} ###5.3.1 按照间隔5年求均值 Both_apc <- as.data.frame(matrix(ncol = 7)) for (sex in names(Both)){ data <- Both[[sex]] apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'PeriodRR') #PeriodRR对应时期效应 apc <- as.data.frame(apc) apc$region <- 'Global' apc$sex <- sex apc$measure <- 'deaths' colnames(Both_apc) <- colnames(apc) Both_apc <- rbind(Both_apc, apc) } ###5.3.2 画图数据整理(普适) merged_Both_apc <- as.data.frame(matrix(ncol = 7)) colnames(merged_Both_apc) <- colnames(apc) merged_Both_apc <- rbind(merged_Both_apc, Both_apc) merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Period),] merged_Both_apc_selected <- merged_Both_apc merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex) ###5.3.3 可视化(基本普适) plot_period_effect<-ggplot(merged_Both_apc_selected,aes(x = Period,y = `Rate Ratio`, color=sex, group=sex))+ geom_ribbon(aes(ymin =`CI Lo`,ymax =`CI Hi`,fill=sex),alpha=0.2, linetype = 0) + geom_line(cex=0.3)+ geom_point(size=0.5)+ geom_hline(yintercept = 1 ,lty=2,lwd=0.5,col="Black")+ geom_vline(xintercept = 1992.5 ,lty=2,lwd=0.5,col="Black")+ scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_x_continuous(breaks = seq(1990, 2020, by = 5),limits = c(1990,2020))+ scale_y_continuous(breaks = seq(0.5, 1.1, by = 0.1),limits = c(0.5, 1.1))+ ylab('Mortality rate ratio')+ xlab('Period (year)')+ theme_minimal(10)+ theme(legend.position = 'right', panel.border = element_rect(size=1,fill = 'transparent'), axis.text = element_text(color='black',size = 6), axis.title = element_text(color='black',size = 8), axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5), axis.ticks.x = element_line(color = "black",size = 0.5), axis.ticks.y = element_line(color = "black",size = 0.5), legend.key.size = unit(0.3, "cm"), legend.title = element_blank(), legend.text = element_text(color='black',size = 8), plot.title = element_text(hjust = 0.5,size = 8)) #ggsave(plot_period_effect, file ='./output//plot_period_effect.pdf',units = 'cm', height = 6, width = 10) ``` ##5.4 Cohort effect ```{R} ###5.3.1 按照间隔5年求均值 Both_apc <- as.data.frame(matrix(ncol = 7)) for (sex in names(Both)){ data <- Both[[sex]] apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'CohortRR') #CohortRR对应队列效应 apc <- as.data.frame(apc) apc$region <- 'Global' apc$sex <- sex apc$measure <- 'deaths' colnames(Both_apc) <- colnames(apc) Both_apc <- rbind(Both_apc, apc) } ###5.3.2 画图数据整理(普适) merged_Both_apc <- as.data.frame(matrix(ncol = 7)) colnames(merged_Both_apc) <- colnames(apc) merged_Both_apc <- rbind(merged_Both_apc, Both_apc) merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Cohort),] #Cohort对应队列效应 merged_Both_apc_selected <- merged_Both_apc merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex) ###5.3.3 可视化(基本普适) plot_cohort_effect<-ggplot(merged_Both_apc_selected,aes(x = Cohort,y = `Rate Ratio`, #Cohort对应队列效应 color=sex,group=sex))+ geom_hline(yintercept = 1 ,lty=2,lwd=0.5,col="Black")+ geom_vline(xintercept = 1960 ,lty=2,lwd=0.5,col="Black")+ geom_ribbon(aes(ymin =`CILo`, ymax =`CIHi`,fill=sex),alpha=0.2, linetype = 0) + geom_line(cex=0.3)+ geom_point(size=0.5)+ scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色 scale_x_continuous(breaks = seq(1920, 2020, by = 10),limits = c(1920, 2020))+ scale_y_continuous(breaks = seq(0, 3, by = 0.5),limits = c(0, 3))+ ylab('Mortality rate ratio')+ xlab('Birth Cohort')+ theme_minimal(10)+ theme(legend.position = 'right', panel.border = element_rect(size=1,fill = 'transparent'), axis.text = element_text(color='black',size = 6), axis.title = element_text(color='black',size = 8), axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5), axis.ticks.x = element_line(color = "black",size = 0.5), axis.ticks.y = element_line(color = "black",size = 0.5), legend.key.size = unit(0.3, "cm"), legend.title = element_blank(), legend.text = element_text(color='black',size = 8), plot.title = element_text(hjust = 0.5,size = 8)) #ggsave(plot_cohort_effect, file ='./output//plot_cohort_effect.pdf',units = 'cm', height = 6, width = 10) ```