--- title: "Correlation_SDI_地区层面" output: html_document date: "2023-08-7" --- #1.安装并加载相关R包 ```{r} #install.packages("tidyverse") #install.packages("readxl") #install.packages("ggplot2") library(readxl) library(tidyverse) library(ggplot2) ``` #2.加载数据(主要有三个:1.全球和21个地区从1990到2019年的SDI数据;2.全球和21个地区在2019年的DALYs ASR;3.全球和21个地区的list) ```{r} ##获取SDI_value文件(这个文件是location × year的SDI矩阵) SDI_value <- readxl::read_xlsx('./data/SDI_value_GBD2019_1990_2019.XLSX',sheet=2,skip=1) #读入xlsx文件的第二个sheet,跳过第一行 ##获取Global和21regions的COPD疾病负担数据 Global_21Regions_1990to2019 <- read.csv('./data/Global_21Regions_1990to2019.csv', header=T) ##获取Global和21regions的list文件 order_globalandregions <- read.csv("./data/order_globalandregions.csv", header = F) ``` #3.数据清洗(即需要整理出3列数据,分别是Global和21个region的location_year pair,SDI值,疾病负担指标如ASR) ```{r} ##3.1 制作第一列数据:Global和21个regions的"location_year"组合,一共22*30=660行,在整理第二列和第三列数据的时候,整理 ##3.2 制作第二列数据:Global和21个regions在1990-2019年的SDI数据 SDI_value_Global_21Regions <- SDI_value %>% filter(Location %in% order_globalandregions$V1) SDI_value_Global_21Regions_long <- pivot_longer(SDI_value_Global_21Regions, # 输入的数据框(wide format) cols = colnames(SDI_value_Global_21Regions)[-1], # 指定要进行转换的列,排除第一列 names_to = 'Year', # 转换后的列名将存储在名为'Year'的新列中 values_to = 'SDI') %>% # 转换后的数值将存储在名为'SDI'的新列中 mutate(ID = paste0(Location,"_", Year)) #用于后续数据框合并 ##3.3 制作第三列数据:全球和21个地区,1990-2019年,年龄标准化的DALYs rate Global_21regions_DALYs_ASR <- Global_21Regions_1990to2019 %>% select("location_name","year","sex_name","age_name","measure_name","metric_name","val") %>% filter(location_name %in% order_globalandregions$V1, sex_name == "Both", age_name == "Age-standardized", measure_name == "DALYs (Disability-Adjusted Life Years)", metric_name == "Rate") %>% rename("DALYs_ASR" = "val") %>% #列名重命名 mutate(DALYs_ASR = round(DALYs_ASR/1000,5), ID = paste0(location_name,"_",year)) #用于后续数据框合并 ##3.4 将上述数据框合并(基于location_time组合) Global_21regions_DALYs_ASR_LocationandYear <- left_join(SDI_value_Global_21Regions_long,Global_21regions_DALYs_ASR, by = 'ID') %>% mutate(SDI = as.numeric(str_replace(SDI,'\\·','.'))) #因为用于相关性分析的变量需要是数值型变量,而不能是字符串型。明确变量类型用class()函数 ``` #4.计算相关系数并可视化 ```{R} ##4.1 计算相关系数(包括置信区间和p值) cor_test <- cor.test(Global_21regions_DALYs_ASR_LocationandYear[,"SDI",drop = T], Global_21regions_DALYs_ASR_LocationandYear[, "DALYs_ASR", drop = TRUE]) cor_r <- cor_test$estimate cor_int<- cor_test$conf.int cor_p <- cor_test$p.value ##4.2 基于ggplot函数进行可视化(数据、映射【x轴 + y轴】、几何图形【点 + 线 + 文本】、坐标轴信息、主题) Corr_Global_Regions_SDI <- ggplot(Global_21regions_DALYs_ASR_LocationandYear, # 数据框,用于绘图 aes(Global_21regions_DALYs_ASR_LocationandYear[,"SDI",drop = T], # x轴变量,从数据框中获取"SDI"列 Global_21regions_DALYs_ASR_LocationandYear[,"DALYs_ASR",drop = T])) + # y轴变量,从数据框中获取"DALYs_ASR"列 geom_point(aes(color = Location, shape= Location))+ #添加图形几何图层以及该图层中颜色/形状映射 scale_shape_manual(values = 1:22) + #手动设置点的形状,一共22种 geom_smooth(colour='black',stat = "smooth",method='loess',se=F,span=0.5) + #添加平滑曲线的图形几何图层 geom_text(x = min(Global_21regions_DALYs_ASR_LocationandYear$SDI + 0.2), #添加文本标签的图层,设置该文本的位置(x和y)、内容(label)、大小 y = max(Global_21regions_DALYs_ASR_LocationandYear[,"DALYs_ASR",drop = T])*0.8, label = paste("R =", round(cor_r, 2),"(", round(cor_int[1],2), "to", round(cor_int[2],2), ")","\n", "p =", ifelse(cor_p<0.001,"< 0.001",round(cor_p,2))), hjust = 1, vjust = 0, size = 4)+ ylab(paste0("DALYs_ASR",'\n (100,000 persons)'))+ # 设置x和y轴的标签 xlab("Sociodemographic index")+ theme_bw(base_size = 8)+ #设置主题样式和基本字体大小,theme_bw 函数用于设置图表的主题样式为黑白风格。通过 base_size = 14 参数,设置图表的基本字体大小为 14 theme(panel.background = element_rect(fill = "transparent"), # 设置绘图区域的背景为透明 plot.background = element_rect(fill = "transparent"), # 设置整个绘图的背景为透明 legend.position = 'top', # 设置legend的位置和大小 legend.key.size = unit(0.3, "cm"), legend.title = element_blank(), axis.text.x = element_text(face = "bold",size = 8), # 设置坐标轴文本的外观和大小 axis.text.y = element_text(face = "bold",size = 8), axis.title.x = element_text(face = "bold",size = 10),# 设置坐标轴标题的外观和大小 axis.title.y = element_text(face = "bold",size = 10)) Corr_Global_Regions_SDI ``` #5.文件储存 ```{R} ggsave(Corr_Global_Regions_SDI, file = './output/DALYs_rate_22region_SDI.pdf', width = 7, height = 4) ```