服务热线
178 0020 3020
R语言之词云图
#任务1 library(RMySQL) #数据库连接删除函数,每个任务之前最好先清理所有的连接,调用此函数就可以 killDbConnections <- function () { all_cons <- dbListConnections(MySQL()) print(all_cons) for(con in all_cons) + dbDisconnect(con) print(paste(length(all_cons), " connections killed.")) } killDbConnections() #创建数据库连接 con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="") dbSendQuery(con,'SET NAMES utf8') rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1") words = data.frame(word=c(), freq = c()) while (!dbHasCompleted(rs)) { chunk <- dbFetch(rs, 10) #chunk[,4]为title, #chunk[,5]为abstract count=nrow(chunk) cnt=1 while(cnt<=count){ str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5])) str = gsub("聽"," ",str) #在上面的内容中发现这样的特殊字符串,我们替换 temp = as.vector(unlist(strsplit(str, split = " "))) temp_len = length(temp) cnt2 = 1 while(cnt2 <= temp_len){ if(temp[cnt2] %in% words$word){ #存在dataframe中 words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1 }else{ #新增 words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1))) } cnt2 = cnt2+1 } cnt = cnt +1 } } head(words)
Result1:
#任务2 library(wordcloud2) df=words[0:1000,] wordcloud2(df)##默认颜色
Result2:
#任务2-1,定制颜色 library(wordcloud2) df=words[0:1000,] wordcloud2(df,color=ifelse(df[,2]>2000,'#f02222','#c09292'))
Result2-1:
#任务3 #按照freq的降序排列 new_words =words[order(words$freq,decreasing=T),] #去掉一些定冠词 of and the del_word = c('of','the','and','in','to','a','that','is','for','buy','with', 'we','are','an','this','these','as','from','which','at','their', 'have','or','our','its','but','how','be','as','here','on','can', 'into','data','between','both','also','by','yet','than','well', 'it','not') words2=words[which(!words$word %in% del_word),] wordcloud2(words2,shape='star',size=0.5,background='black')
附件