---
title: "Correlation_SDI_国家层面"
output: html_document
date: "2023-08-7"
---
#1.安装并加载相关R包
```{r}
#install.packages("readxl")
#install.packages("tidyverse")
#install.packages("ggplot2")
library(readxl)
library(tidyverse)
library(ggplot2)
```
#2.加载数据
```{r}
##获取SDI_value文件(这个文件是location × year的SDI矩阵)
SDI_value <- readxl::read_xlsx('./data/SDI_value_GBD2019_1990_2019_modified.XLSX',sheet=1) #读入xlsx文件的第二个sheet,跳过第一行
##获取204个国家的COPD疾病负担数据(我们直接利用TableS4的数据)
Global_21Regions_204counties_1990to2019 <- read.csv('./data/Global_21Regions_204counties_1990to2019.csv', header=T)
Global_21Regions_204counties_1990to2019[Global_21Regions_204counties_1990to2019$location_name == "C么te d'Ivoire","location_name"] = "Côte d'Ivoire"
##获取204个国家的list文件(我们直接利用TableS4的数据)
list_global_21regions_204countries <- read_xlsx("./data/list_global_21regions_204countries.xlsx", sheet=1)
```
#3.数据清洗(即需要整理出3列数据,分别是204个国家的location list,SDI值,疾病负担指标DALYs ASR)
```{r}
##3.1 制作第一列数据:204个国家的location list
list_204countries <- list_global_21regions_204countries %>%
filter(!subregion %in% c("Global","region21")) %>% #匹配符号,常常与向量一起使用
rename("Location" = "Country")
##3.2 制作第二列数据:204个国家的在2019年的SDI数据
SDI_value_204countries <- SDI_value %>%
select("Location","2019") %>%
filter(Location %in% list_204countries$Location) %>%
rename("SDI_2019"="2019")
##3.3 制作第三列数据:204个国家,2019年,年龄标准化的DALYs rate
Countries204_DALYs_ASR <- Global_21Regions_204counties_1990to2019 %>%
select("location_name","year","sex_name","age_name","measure_name","metric_name","val") %>%
filter(location_name %in% list_204countries$Location,
year == "2019",
sex_name == "Both",
age_name == "Age-standardized",
measure_name == "DALYs (Disability-Adjusted Life Years)",
metric_name == "Rate") %>%
rename("DALYs_ASR_2019" = "val",
"Location" = "location_name") #列名重命名
##3.4 将上述数据框合并(基于location)
Countries204_DALYs_ASR_SDI_1 <- left_join(SDI_value_204countries,Countries204_DALYs_ASR, by = 'Location')
Countries204_DALYs_ASR_SDI_2 <- left_join(Countries204_DALYs_ASR_SDI_1, list_204countries, by = 'Location') %>%
mutate(SDI_2019 = as.numeric(str_replace(SDI_2019,'\\·','.')))
#因为用于相关性分析的变量需要是数值型变量,而不能是字符串型。明确变量类型用class()函数
```
#4.计算相关系数并可视化
```{R}
##4.1 计算相关系数
cor_test <- cor.test(Countries204_DALYs_ASR_SDI_2[,"SDI_2019",drop = T], Countries204_DALYs_ASR_SDI_2[, "DALYs_ASR_2019", drop = TRUE])
cor_r <- cor_test$estimate
cor_int<- cor_test$conf.int
cor_p <- cor_test$p.value
##4.2 基于ggplot函数进行可视化
Corr_204countries_SDI <- ggplot(Countries204_DALYs_ASR_SDI_2, # 数据框,用于绘图
aes(Countries204_DALYs_ASR_SDI_2[,"SDI_2019",drop = T], # x轴变量,从数据框中获取"SDI"列
Countries204_DALYs_ASR_SDI_2[,"DALYs_ASR_2019",drop = T])) + # y轴变量,从数据框中获取"DALYs_ASR"列
geom_point(aes(color = subregion),size = 0.5)+ #添加图形几何图层以及该图层中颜色/形状映射
scale_shape_manual(values = 1:22) + #手动设置点的形状,一共22种
geom_smooth(colour='black',stat = "smooth",method='loess',se=F,span=0.5) + #添加平滑曲线的图形几何图层
geom_text(x = min(Countries204_DALYs_ASR_SDI_2$SDI_2019 + 0.25), #添加文本标签的图层,设置该文本的位置(x和y)、内容(label)、大小
y = max(Countries204_DALYs_ASR_SDI_2[,"DALYs_ASR_2019",drop = T])*0.8,
label = paste("R =", round(cor_r, 2),"(", round(cor_int[1],2), "to", round(cor_int[2],2), ")","\n", "p =", ifelse(cor_p<0.001,"< 0.001",round(cor_p,2))),
hjust = 1, vjust = 0,
size = 4)+
geom_text(aes(label = Location,color = subregion), # 将Location列的值作为标签
hjust = 0.7, vjust = 0,
size = 1) +
ylab(paste0("DALYs_ASR",'\n (100,000 persons)'))+ # 设置x和y轴的标签
xlab("Socio-demographic index")+
theme_bw(base_size = 8)+ #设置主题样式和基本字体大小,theme_bw 函数用于设置图表的主题样式为黑白风格。通过 base_size = 14 参数,设置图表的基本字体大小为 14
theme(panel.background = element_rect(fill = "transparent"), # 设置绘图区域的背景为透明
plot.background = element_rect(fill = "transparent"), # 设置整个绘图的背景为透明
legend.position = 'top', # 设置legend的位置和大小
legend.key.size = unit(0.3, "cm"),
legend.title = element_blank(),
axis.text.x = element_text(face = "bold",size = 8), # 设置坐标轴文本的外观和大小
axis.text.y = element_text(face = "bold",size = 8),
axis.title.x = element_text(face = "bold",size = 10),# 设置坐标轴标题的外观和大小
axis.title.y = element_text(face = "bold",size = 10))
```
#5.文件导出
```{R}
ggsave(Corr_204countries_SDI, file = './output/DALYs_rate_204countries_SDI.pdf', width = 8, height = 4)
```