---
title: "APC_Analyst&Analyst"
output: html_document
date: "2023-08-27"
---
#1.R包的安装与加载
```{r}
library(data.table)
library(scales)
library(readxl)
library(tidyverse)
```
#2.数据加载(疾病数据、人口学数据、order文件)
```{R}
##2.1 疾病数据(多个文件rbind)
###方法1:个性化方法(读取每个文件,然后rbind)
#data_1 <- read.csv("./data/CHD_1.csv")
#data_2 <- read.csv("./data/CHD_2.csv")
#data_all <- rbind(data_1,data_2)
###方法2:普适性方法(读取整个文件夹,利用循环)
dirname_1 <- dir("./data/疾病数据/") # dir是directory(目录)的缩写
file_1 <- paste0("./data/疾病数据/",dirname_1) #
data_all <- as.data.frame(matrix(nrow=0,ncol=16))
for (a in file_1) {
data <- fread(a) #fread函数用于快速读取大型文件(相较于read.csv更快),fread全称是fast read
data_all <- rbind(data_all,data)
}
##2.2 人口学数据
dirname_2 <- dir('./data/GBD_Population/') #注意文件路径
file_2 <- paste0('./data/GBD_Population/',dirname_2)#注意文件路径
##2.3 order文件
order <- read_xlsx('./data/orders.xlsx', sheet = 3)
```
#3.数据清洗
##3.1 数据清洗_疾病数据(个性化)
```{R}
###根据非常“6+1”,针对空间、人间的性别、人间的年龄、measure、metric,注意:时间不用限定
data_select <- data_all %>%
filter(location_name == "Global", #个性化
sex_name %in% c("Both","Male","Female"),
age_name %in% c('<5 years','5-9 years','10-14 years','15-19 years','20-24 years','25-29 years', '30-34 years', '35-39 years', '40-44 years','45-49 years', '50-54 years', '55-59 years', '60-64 years', '65-69 years'), #个性化
measure_name == "Deaths", #个性化
metric_name == "Number")
## 修改年龄名称,用于与polulation数据对应:普适
data_select$age_name <- str_split(data_select$age_name, ' ', simplify = T)[,1]
data_select$age_name <- str_replace(data_select$age_name, '-',' to ')
data_select$age_name[which(data_select$age_name == '<5')] <- 'Under 5'
```
##3.2 数据清洗_人口学数据(基本普适)
```{R}
var_name <- c("location_id","location_name","sex_name","year_id","age_group_id","age_group_name","val")
population_all <- as.data.frame(matrix(nrow=0,ncol=length(var_name)))
names(population_all)=var_name
for (a in file_2) {
data <- fread(a) %>% dplyr::select(var_name)
population_all <- rbind(population_all,data)
}
```
##3.3 数据清洗_order文件(个性化)
```{R}
order_data_all <- order %>%
filter(subregion %in% c('Global')) #个性化
region <- order_data_all$Country
location_ID <- order_data_all$location_id
```
#4.基于我们提供的函数完成APC建模(基本普适)
```{R}
##4.1 加载我们写好的函数
source('APC_function.R')
## 按照间隔5年求每年均值,针对国家和全球
Both <- made_country_data(data_select,sex_name = c('Male', 'Female', 'Both'),
output = T, output_path = './output/自动/',
measure = 'Deaths',
order_list = 'Global',
population_all = population_all,
location_ID = 1,
start_year = 1990, end_year=2019, current_year=2019)
```
#5.可视化
##5.1 Local drift
```{R}
###5.1.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
data <- Both[[sex]]
apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'LocalDrifts')
apc <- as.data.frame(apc)
apc$region <- 'Global' #个性化
apc$sex <- sex
apc$measure <- 'Deaths' #个性化
colnames(Both_apc) <- colnames(apc)
Both_apc <- rbind(Both_apc, apc)
}
###5.1.2 画图数据整理(普适)
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Age),]
merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)
###5.1.3 可视化(基本普适)
plot_localdrift<-ggplot(merged_Both_apc_selected,aes(x = Age,y = `Mean Percent Change per Calendar Year`,color=sex,group=sex))+
geom_point(size=0.8)+
geom_line(cex=0.5)+
geom_hline(yintercept = 0 ,lty=2,lwd=0.5,col="Black")+
geom_vline(xintercept = 20 ,lty=2,lwd=0.5,col="Black")+
geom_ribbon(aes(ymin =CILo,
ymax =CIHi,fill=sex),alpha=0.2, linetype = 0) + #带有面积填充的区间图
scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_x_continuous(breaks = seq(0, 70, by = 10),limits = c(0,70))+
scale_y_continuous(breaks = seq(-4, 1, by = 1),limits = c(-4, 1))+
ylab('Annual change of mortality (% per year)')+
xlab('Age at death (years)')+
theme_bw()+ #黑边白底
theme(#panel.background = element_rect(fill = "transparent"), #面板中的矩形元素
#panel.grid.major = element_blank(),#面板中的粗网格
#panel.grid.minor = element_blank(),#面板中的细网格
panel.border = element_rect(size=1,fill = 'transparent'),#面板的边框
axis.line = element_line(color = "black",linewidth = ),
axis.ticks.x = element_line(color = "black",size = 0.5),
axis.ticks.y = element_line(color = "black",size = 0.5),
axis.text = element_text(color='black',size = 6),
axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
axis.title = element_text(color='black',size = 8),
legend.position = 'right',
legend.key.size = unit(0.5, "cm"),
legend.title = element_blank(),
legend.text = element_text(color='black',size = 8),
plot.title = element_text(hjust = 0.5,size = 8))
#ggsave(plot_localdrift, file ='./output/plot_localdrift.pdf',units = 'cm', height = 6, width = 10)
```
##5.2 Age effect
```{R}
###5.2.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
data <- Both[[sex]]
apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'LongAge')
apc <- as.data.frame(apc)
apc$region <- 'Global'
apc$sex <- sex
apc$measure <- 'Deaths'
colnames(Both_apc) <- colnames(apc)
Both_apc <- rbind(Both_apc, apc)
}
###5.2.2 画图数据整理(普适)
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Age),]
merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)
###5.2.3 可视化(基本普适)
plot_age_effect<-ggplot(merged_Both_apc_selected,aes(x = Age,y = Rate,
color=sex,group=sex))+
geom_ribbon(aes(ymin =CILo,
ymax =CIHi,fill=sex),alpha=0.2, linetype = 0) +
geom_line(cex=0.3)+
geom_point(size=0.5)+
scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_x_continuous(breaks = seq(0, 70, by = 10),limits = c(0,70))+
scale_y_continuous( trans='log10',breaks = trans_breaks("log10", function(x) 10^x),
label = trans_format("log10", math_format(10^.x)),
limits = c(NA, 100)
)+
ylab('Mortality rate (per 100,000 person-years)')+
xlab('Age')+
annotation_logticks(sides = 'l')+
theme_minimal(10)+
theme(legend.position = 'right',
panel.border = element_rect(size=1,fill = 'transparent'),
axis.text = element_text(color='black',size = 6),
axis.title = element_text(color='black',size = 8),
axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
axis.ticks.x = element_line(color = "black",size = 0.5),
axis.ticks.y = element_line(color = "black",size = 0.5),
legend.key.size = unit(0.3, "cm"),
legend.title = element_blank(),
legend.text = element_text(color='black',size = 8),
plot.title = element_text(hjust = 0.5,size = 8))
#ggsave(plot_age_effect, file ='./output/plot_age_effect.pdf',units = 'cm', height = 6, width = 10)
```
##5.3 Period effect
```{R}
###5.3.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
data <- Both[[sex]]
apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'PeriodRR') #PeriodRR对应时期效应
apc <- as.data.frame(apc)
apc$region <- 'Global'
apc$sex <- sex
apc$measure <- 'deaths'
colnames(Both_apc) <- colnames(apc)
Both_apc <- rbind(Both_apc, apc)
}
###5.3.2 画图数据整理(普适)
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Period),]
merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)
###5.3.3 可视化(基本普适)
plot_period_effect<-ggplot(merged_Both_apc_selected,aes(x = Period,y = `Rate Ratio`,
color=sex,
group=sex))+
geom_ribbon(aes(ymin =`CI Lo`,ymax =`CI Hi`,fill=sex),alpha=0.2, linetype = 0) +
geom_line(cex=0.3)+
geom_point(size=0.5)+
geom_hline(yintercept = 1 ,lty=2,lwd=0.5,col="Black")+
geom_vline(xintercept = 1992.5 ,lty=2,lwd=0.5,col="Black")+
scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_x_continuous(breaks = seq(1990, 2020, by = 5),limits = c(1990,2020))+
scale_y_continuous(breaks = seq(0.5, 1.1, by = 0.1),limits = c(0.5, 1.1))+
ylab('Mortality rate ratio')+
xlab('Period (year)')+
theme_minimal(10)+
theme(legend.position = 'right',
panel.border = element_rect(size=1,fill = 'transparent'),
axis.text = element_text(color='black',size = 6),
axis.title = element_text(color='black',size = 8),
axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
axis.ticks.x = element_line(color = "black",size = 0.5),
axis.ticks.y = element_line(color = "black",size = 0.5),
legend.key.size = unit(0.3, "cm"),
legend.title = element_blank(),
legend.text = element_text(color='black',size = 8),
plot.title = element_text(hjust = 0.5,size = 8))
#ggsave(plot_period_effect, file ='./output//plot_period_effect.pdf',units = 'cm', height = 6, width = 10)
```
##5.4 Cohort effect
```{R}
###5.3.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
data <- Both[[sex]]
apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'CohortRR') #CohortRR对应队列效应
apc <- as.data.frame(apc)
apc$region <- 'Global'
apc$sex <- sex
apc$measure <- 'deaths'
colnames(Both_apc) <- colnames(apc)
Both_apc <- rbind(Both_apc, apc)
}
###5.3.2 画图数据整理(普适)
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Cohort),] #Cohort对应队列效应
merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)
###5.3.3 可视化(基本普适)
plot_cohort_effect<-ggplot(merged_Both_apc_selected,aes(x = Cohort,y = `Rate Ratio`, #Cohort对应队列效应
color=sex,group=sex))+
geom_hline(yintercept = 1 ,lty=2,lwd=0.5,col="Black")+
geom_vline(xintercept = 1960 ,lty=2,lwd=0.5,col="Black")+
geom_ribbon(aes(ymin =`CILo`,
ymax =`CIHi`,fill=sex),alpha=0.2, linetype = 0) +
geom_line(cex=0.3)+
geom_point(size=0.5)+
scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
scale_x_continuous(breaks = seq(1920, 2020, by = 10),limits = c(1920, 2020))+
scale_y_continuous(breaks = seq(0, 3, by = 0.5),limits = c(0, 3))+
ylab('Mortality rate ratio')+
xlab('Birth Cohort')+
theme_minimal(10)+
theme(legend.position = 'right',
panel.border = element_rect(size=1,fill = 'transparent'),
axis.text = element_text(color='black',size = 6),
axis.title = element_text(color='black',size = 8),
axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
axis.ticks.x = element_line(color = "black",size = 0.5),
axis.ticks.y = element_line(color = "black",size = 0.5),
legend.key.size = unit(0.3, "cm"),
legend.title = element_blank(),
legend.text = element_text(color='black',size = 8),
plot.title = element_text(hjust = 0.5,size = 8))
#ggsave(plot_cohort_effect, file ='./output//plot_cohort_effect.pdf',units = 'cm', height = 6, width = 10)
```