--- title: "Correlation_SDI_国家层面" output: html_document date: "2023-08-7" --- #1.安装并加载相关R包 ```{r} #install.packages("readxl") #install.packages("tidyverse") #install.packages("ggplot2") library(readxl) library(tidyverse) library(ggplot2) ``` #2.加载数据 ```{r} ##获取SDI_value文件(这个文件是location × year的SDI矩阵) SDI_value <- readxl::read_xlsx('./data/SDI_value_GBD2019_1990_2019_modified.XLSX',sheet=1) #读入xlsx文件的第二个sheet,跳过第一行 ##获取204个国家的COPD疾病负担数据(我们直接利用TableS4的数据) Global_21Regions_204counties_1990to2019 <- read.csv('./data/Global_21Regions_204counties_1990to2019.csv', header=T) Global_21Regions_204counties_1990to2019[Global_21Regions_204counties_1990to2019$location_name == "C么te d'Ivoire","location_name"] = "Côte d'Ivoire" ##获取204个国家的list文件(我们直接利用TableS4的数据) list_global_21regions_204countries <- read_xlsx("./data/list_global_21regions_204countries.xlsx", sheet=1) ``` #3.数据清洗(即需要整理出3列数据,分别是204个国家的location list,SDI值,疾病负担指标DALYs ASR) ```{r} ##3.1 制作第一列数据:204个国家的location list list_204countries <- list_global_21regions_204countries %>% filter(!subregion %in% c("Global","region21")) %>% #匹配符号,常常与向量一起使用 rename("Location" = "Country") ##3.2 制作第二列数据:204个国家的在2019年的SDI数据 SDI_value_204countries <- SDI_value %>% select("Location","2019") %>% filter(Location %in% list_204countries$Location) %>% rename("SDI_2019"="2019") ##3.3 制作第三列数据:204个国家,2019年,年龄标准化的DALYs rate Countries204_DALYs_ASR <- Global_21Regions_204counties_1990to2019 %>% select("location_name","year","sex_name","age_name","measure_name","metric_name","val") %>% filter(location_name %in% list_204countries$Location, year == "2019", sex_name == "Both", age_name == "Age-standardized", measure_name == "DALYs (Disability-Adjusted Life Years)", metric_name == "Rate") %>% rename("DALYs_ASR_2019" = "val", "Location" = "location_name") #列名重命名 ##3.4 将上述数据框合并(基于location) Countries204_DALYs_ASR_SDI_1 <- left_join(SDI_value_204countries,Countries204_DALYs_ASR, by = 'Location') Countries204_DALYs_ASR_SDI_2 <- left_join(Countries204_DALYs_ASR_SDI_1, list_204countries, by = 'Location') %>% mutate(SDI_2019 = as.numeric(str_replace(SDI_2019,'\\·','.'))) #因为用于相关性分析的变量需要是数值型变量,而不能是字符串型。明确变量类型用class()函数 ``` #4.计算相关系数并可视化 ```{R} ##4.1 计算相关系数 cor_test <- cor.test(Countries204_DALYs_ASR_SDI_2[,"SDI_2019",drop = T], Countries204_DALYs_ASR_SDI_2[, "DALYs_ASR_2019", drop = TRUE]) cor_r <- cor_test$estimate cor_int<- cor_test$conf.int cor_p <- cor_test$p.value ##4.2 基于ggplot函数进行可视化 Corr_204countries_SDI <- ggplot(Countries204_DALYs_ASR_SDI_2, # 数据框,用于绘图 aes(Countries204_DALYs_ASR_SDI_2[,"SDI_2019",drop = T], # x轴变量,从数据框中获取"SDI"列 Countries204_DALYs_ASR_SDI_2[,"DALYs_ASR_2019",drop = T])) + # y轴变量,从数据框中获取"DALYs_ASR"列 geom_point(aes(color = subregion),size = 0.5)+ #添加图形几何图层以及该图层中颜色/形状映射 scale_shape_manual(values = 1:22) + #手动设置点的形状,一共22种 geom_smooth(colour='black',stat = "smooth",method='loess',se=F,span=0.5) + #添加平滑曲线的图形几何图层 geom_text(x = min(Countries204_DALYs_ASR_SDI_2$SDI_2019 + 0.25), #添加文本标签的图层,设置该文本的位置(x和y)、内容(label)、大小 y = max(Countries204_DALYs_ASR_SDI_2[,"DALYs_ASR_2019",drop = T])*0.8, label = paste("R =", round(cor_r, 2),"(", round(cor_int[1],2), "to", round(cor_int[2],2), ")","\n", "p =", ifelse(cor_p<0.001,"< 0.001",round(cor_p,2))), hjust = 1, vjust = 0, size = 4)+ geom_text(aes(label = Location,color = subregion), # 将Location列的值作为标签 hjust = 0.7, vjust = 0, size = 1) + ylab(paste0("DALYs_ASR",'\n (100,000 persons)'))+ # 设置x和y轴的标签 xlab("Socio-demographic index")+ theme_bw(base_size = 8)+ #设置主题样式和基本字体大小,theme_bw 函数用于设置图表的主题样式为黑白风格。通过 base_size = 14 参数,设置图表的基本字体大小为 14 theme(panel.background = element_rect(fill = "transparent"), # 设置绘图区域的背景为透明 plot.background = element_rect(fill = "transparent"), # 设置整个绘图的背景为透明 legend.position = 'top', # 设置legend的位置和大小 legend.key.size = unit(0.3, "cm"), legend.title = element_blank(), axis.text.x = element_text(face = "bold",size = 8), # 设置坐标轴文本的外观和大小 axis.text.y = element_text(face = "bold",size = 8), axis.title.x = element_text(face = "bold",size = 10),# 设置坐标轴标题的外观和大小 axis.title.y = element_text(face = "bold",size = 10)) ``` #5.文件导出 ```{R} ggsave(Corr_204countries_SDI, file = './output/DALYs_rate_204countries_SDI.pdf', width = 8, height = 4) ```