---
title: "APC_Analyst&Analyst"
output: html_document
date: "2023-08-27"
---

#1.R包的安装与加载
```{r}
library(data.table)
library(scales)
library(readxl)
library(tidyverse)
```

#2.数据加载(疾病数据、人口学数据、order文件）
```{R}
##2.1 疾病数据（多个文件rbind）
###方法1：个性化方法（读取每个文件，然后rbind)
#data_1 <- read.csv("./data/CHD_1.csv")
#data_2 <- read.csv("./data/CHD_2.csv")
#data_all <- rbind(data_1,data_2)

###方法2：普适性方法（读取整个文件夹，利用循环)
dirname_1 <- dir("./data/疾病数据/") # dir是directory（目录）的缩写
file_1 <- paste0("./data/疾病数据/",dirname_1) #

data_all  <-  as.data.frame(matrix(nrow=0,ncol=16))
for (a in file_1) {
  data <- fread(a) #fread函数用于快速读取大型文件(相较于read.csv更快)，fread全称是fast read
  data_all <- rbind(data_all,data)
  }

##2.2 人口学数据
dirname_2 <- dir('./data/GBD_Population/') #注意文件路径
file_2 <- paste0('./data/GBD_Population/',dirname_2)#注意文件路径

##2.3 order文件
order <- read_xlsx('./data/orders.xlsx', sheet = 3)
```


#3.数据清洗
##3.1 数据清洗_疾病数据（个性化）
```{R}
###根据非常“6+1”，针对空间、人间的性别、人间的年龄、measure、metric，注意：时间不用限定
data_select <- data_all %>%
  filter(location_name == "Global", #个性化
         sex_name %in% c("Both","Male","Female"),
         age_name %in% c('<5 years','5-9 years','10-14 years','15-19 years','20-24 years','25-29 years', '30-34 years', '35-39 years', '40-44 years','45-49 years', '50-54 years', '55-59 years', '60-64 years', '65-69 years'), #个性化
         measure_name == "Deaths", #个性化
         metric_name == "Number")

## 修改年龄名称，用于与polulation数据对应：普适
data_select$age_name <- str_split(data_select$age_name, ' ', simplify = T)[,1]
data_select$age_name <- str_replace(data_select$age_name, '-',' to ')
data_select$age_name[which(data_select$age_name == '<5')] <- 'Under 5'
```

##3.2 数据清洗_人口学数据（基本普适）
```{R}
var_name <- c("location_id","location_name","sex_name","year_id","age_group_id","age_group_name","val")
population_all  <-  as.data.frame(matrix(nrow=0,ncol=length(var_name)))
names(population_all)=var_name
for (a in file_2) {
  data <- fread(a) %>% dplyr::select(var_name)
  population_all <- rbind(population_all,data)
  }
```

##3.3 数据清洗_order文件（个性化）
```{R}
order_data_all <- order %>%
    filter(subregion %in% c('Global')) #个性化
region <- order_data_all$Country
location_ID <- order_data_all$location_id
```

#4.基于我们提供的函数完成APC建模（基本普适）
```{R}
##4.1 加载我们写好的函数
source('APC_function.R')

## 按照间隔5年求每年均值，针对国家和全球
Both <- made_country_data(data_select,sex_name = c('Male', 'Female', 'Both'),
                          output = T, output_path = './output/自动/',
                          measure = 'Deaths',
                          order_list = 'Global',
                          population_all = population_all,
                          location_ID = 1,
                          start_year = 1990, end_year=2019, current_year=2019)
```

#5.可视化
##5.1 Local drift
```{R}
###5.1.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
  data <- Both[[sex]]
  apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'LocalDrifts')
  apc <- as.data.frame(apc)
  apc$region <- 'Global' #个性化
  apc$sex <- sex
  apc$measure <- 'Deaths' #个性化
  colnames(Both_apc) <- colnames(apc)
  Both_apc <- rbind(Both_apc, apc)
}

###5.1.2 画图数据整理（普适）
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Age),]
    

merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)


###5.1.3 可视化（基本普适）
plot_localdrift<-ggplot(merged_Both_apc_selected,aes(x = Age,y = `Mean Percent Change per Calendar Year`,color=sex,group=sex))+
  geom_point(size=0.8)+
  geom_line(cex=0.5)+
  geom_hline(yintercept = 0 ,lty=2,lwd=0.5,col="Black")+
  geom_vline(xintercept = 20 ,lty=2,lwd=0.5,col="Black")+
  geom_ribbon(aes(ymin =CILo,
                  ymax =CIHi,fill=sex),alpha=0.2, linetype = 0) + #带有面积填充的区间图
  scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_x_continuous(breaks = seq(0, 70, by = 10),limits = c(0,70))+
  scale_y_continuous(breaks = seq(-4, 1, by = 1),limits = c(-4, 1))+
  ylab('Annual change of mortality (% per year)')+
  xlab('Age at death (years)')+
  theme_bw()+ #黑边白底
  theme(#panel.background = element_rect(fill = "transparent"), #面板中的矩形元素
        #panel.grid.major = element_blank(),#面板中的粗网格
        #panel.grid.minor = element_blank(),#面板中的细网格
        panel.border = element_rect(size=1,fill = 'transparent'),#面板的边框
        axis.line = element_line(color = "black",linewidth = ),
        axis.ticks.x = element_line(color = "black",size = 0.5),
        axis.ticks.y = element_line(color = "black",size = 0.5),
        axis.text = element_text(color='black',size = 6),
        axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
        axis.title = element_text(color='black',size = 8),
        legend.position = 'right',
        legend.key.size = unit(0.5, "cm"),      
        legend.title = element_blank(),
        legend.text = element_text(color='black',size = 8),
        plot.title = element_text(hjust = 0.5,size = 8))
#ggsave(plot_localdrift, file ='./output/plot_localdrift.pdf',units = 'cm', height = 6, width = 10)
```

##5.2 Age effect
```{R}
###5.2.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
  data <- Both[[sex]]
  apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'LongAge')
  apc <- as.data.frame(apc)
  apc$region <- 'Global'
  apc$sex <- sex
  apc$measure <- 'Deaths'
  colnames(Both_apc) <- colnames(apc)
  Both_apc <- rbind(Both_apc, apc)
}

###5.2.2 画图数据整理（普适）
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Age),]
    

merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)

###5.2.3 可视化（基本普适）
plot_age_effect<-ggplot(merged_Both_apc_selected,aes(x = Age,y = Rate,
                                                      color=sex,group=sex))+
  geom_ribbon(aes(ymin =CILo,
                  ymax =CIHi,fill=sex),alpha=0.2, linetype = 0) +
  geom_line(cex=0.3)+
  geom_point(size=0.5)+
  scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_x_continuous(breaks = seq(0, 70, by = 10),limits = c(0,70))+
  scale_y_continuous( trans='log10',breaks = trans_breaks("log10", function(x) 10^x),
                      label = trans_format("log10", math_format(10^.x)),
                      limits = c(NA, 100)
                      )+
  ylab('Mortality rate (per 100,000 person-years)')+
  xlab('Age')+
  annotation_logticks(sides = 'l')+
  theme_minimal(10)+
  theme(legend.position = 'right',
        panel.border = element_rect(size=1,fill = 'transparent'),
        axis.text = element_text(color='black',size = 6),
        axis.title = element_text(color='black',size = 8),
        axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
        axis.ticks.x = element_line(color = "black",size = 0.5),
        axis.ticks.y = element_line(color = "black",size = 0.5),
        legend.key.size = unit(0.3, "cm"),     
        legend.title = element_blank(),
        legend.text = element_text(color='black',size = 8),
        plot.title = element_text(hjust = 0.5,size = 8))

#ggsave(plot_age_effect, file ='./output/plot_age_effect.pdf',units = 'cm', height = 6, width = 10)
```


##5.3 Period effect
```{R}
###5.3.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
  data <- Both[[sex]]
  apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'PeriodRR') #PeriodRR对应时期效应
  apc <- as.data.frame(apc)
  apc$region <- 'Global'
  apc$sex <- sex
  apc$measure <- 'deaths'
  colnames(Both_apc) <- colnames(apc)
  Both_apc <- rbind(Both_apc, apc)
  }

###5.3.2 画图数据整理（普适）
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Period),] 
    

merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)


###5.3.3 可视化（基本普适）
plot_period_effect<-ggplot(merged_Both_apc_selected,aes(x = Period,y = `Rate Ratio`,
                                             color=sex,
                                             group=sex))+
  geom_ribbon(aes(ymin =`CI Lo`,ymax =`CI Hi`,fill=sex),alpha=0.2, linetype = 0) +
  geom_line(cex=0.3)+
  geom_point(size=0.5)+
  geom_hline(yintercept = 1 ,lty=2,lwd=0.5,col="Black")+
  geom_vline(xintercept = 1992.5 ,lty=2,lwd=0.5,col="Black")+
  scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_x_continuous(breaks = seq(1990, 2020, by = 5),limits = c(1990,2020))+
  scale_y_continuous(breaks = seq(0.5, 1.1, by = 0.1),limits = c(0.5, 1.1))+
  ylab('Mortality rate ratio')+
  xlab('Period (year)')+
  theme_minimal(10)+
  theme(legend.position = 'right',
        panel.border = element_rect(size=1,fill = 'transparent'),
        axis.text = element_text(color='black',size = 6),
        axis.title = element_text(color='black',size = 8),
        axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
        axis.ticks.x = element_line(color = "black",size = 0.5),
        axis.ticks.y = element_line(color = "black",size = 0.5),
        legend.key.size = unit(0.3, "cm"),      
        legend.title = element_blank(),
        legend.text = element_text(color='black',size = 8),
        plot.title = element_text(hjust = 0.5,size = 8))

#ggsave(plot_period_effect, file ='./output//plot_period_effect.pdf',units = 'cm', height = 6, width = 10)
```

##5.4 Cohort effect
```{R}
###5.3.1 按照间隔5年求均值
Both_apc <- as.data.frame(matrix(ncol = 7))
for (sex in names(Both)){
  data <- Both[[sex]]
  apc <- made_country_apc(data,start_year = 1990,start_age = '0', interval_year = 5,'CohortRR') #CohortRR对应队列效应
  apc <- as.data.frame(apc)
  apc$region <- 'Global'
  apc$sex <- sex
  apc$measure <- 'deaths'
  colnames(Both_apc) <- colnames(apc)
  Both_apc <- rbind(Both_apc, apc)
  }

###5.3.2 画图数据整理（普适）
merged_Both_apc <- as.data.frame(matrix(ncol = 7))
colnames(merged_Both_apc) <- colnames(apc)
merged_Both_apc <- rbind(merged_Both_apc, Both_apc)
merged_Both_apc <- merged_Both_apc[!is.na(merged_Both_apc$Cohort),] #Cohort对应队列效应
    

merged_Both_apc_selected <- merged_Both_apc
merged_Both_apc_selected$sex <- str_to_title(merged_Both_apc_selected$sex)

###5.3.3 可视化（基本普适）
plot_cohort_effect<-ggplot(merged_Both_apc_selected,aes(x = Cohort,y = `Rate Ratio`, #Cohort对应队列效应
                                         color=sex,group=sex))+
  geom_hline(yintercept = 1 ,lty=2,lwd=0.5,col="Black")+
  geom_vline(xintercept = 1960 ,lty=2,lwd=0.5,col="Black")+ 
  geom_ribbon(aes(ymin =`CILo`,
                  ymax =`CIHi`,fill=sex),alpha=0.2, linetype = 0) +
  geom_line(cex=0.3)+
  geom_point(size=0.5)+
  scale_color_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_fill_manual(values = c("Both" = "#7D7D7D", "Male" = "#0070C0", "Female" = "#EBC101"))+ #手动设置颜色
  scale_x_continuous(breaks = seq(1920, 2020, by = 10),limits = c(1920, 2020))+
  scale_y_continuous(breaks = seq(0, 3, by = 0.5),limits = c(0, 3))+
  ylab('Mortality rate ratio')+
  xlab('Birth Cohort')+
  theme_minimal(10)+
  theme(legend.position = 'right',
        panel.border = element_rect(size=1,fill = 'transparent'),
        axis.text = element_text(color='black',size = 6),
        axis.title = element_text(color='black',size = 8),
        axis.text.x = element_text(angle = 0,vjust = 0.5,hjust = 0.5),
        axis.ticks.x = element_line(color = "black",size = 0.5),
        axis.ticks.y = element_line(color = "black",size = 0.5),
        legend.key.size = unit(0.3, "cm"),
        legend.title = element_blank(),
        legend.text = element_text(color='black',size = 8),
        plot.title = element_text(hjust = 0.5,size = 8))

#ggsave(plot_cohort_effect, file ='./output//plot_cohort_effect.pdf',units = 'cm', height = 6, width = 10)

```