R2-06-dean wordcloud

dean 2018-03-27 22:40:56 阅读: 1976

#任务1

library(RMySQL)

#数据库连接删除函数，每个任务之前最好先清理所有的连接，调用此函数就可以

killDbConnections <- function () {

all_cons <- dbListConnections(MySQL())

print(all_cons)

for(con in all_cons)

+ dbDisconnect(con)

print(paste(length(all_cons), " connections killed."))

}

killDbConnections()

#创建数据库连接

con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

dbSendQuery(con,'SET NAMES utf8')

rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")

#上面的代码都是之前的

#下面开始新的代码

words = data.frame(word=c(), freq = c())

while (!dbHasCompleted(rs)) {

chunk <- dbFetch(rs, 10)

#chunk[,4]为title, #chunk[,5]为abstract

count=nrow(chunk)

cnt=1

while(cnt<=count){

str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5]))

str = gsub("聽"," ",str) #在上面的内容中发现这样的特殊字符串，我们替换掉

temp = as.vector(unlist(strsplit(str, split = " ")))

temp_len = length(temp)

cnt2 = 1

while(cnt2 <= temp_len){

if(temp[cnt2] %in% words$word){

#存在dataframe中

words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1

}else{

#新增

words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))

}

cnt2 = cnt2+1

}

cnt = cnt +1

}

head(words)

#按照freq的降序排列

new_words = words[order(words$freq,decreasing=T),]

#去掉一些定冠词 of and the

del_word = c('of','the','and','in','to','a','that','is','for','buy','with','we','are','an','this','these','as','from','which','at','their','have','or','our','its','but','how','be','as','here','on','can','into','data','between','both','also')

words2 =words[which(!(words$word %in% del_word)),]

wordcloud2(words2)

邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}

切换到完整评论

{{item.nick_name}} VIP用户

受邀请回答

{{item.refer_comment.nick_name}} VIP用户

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}

切换到完整回复

发送回复

赞({{item.count_zan}})

踩({{item.count_cai}})

删除

回复关闭

R科研作图学习小组(小组)

小组最新话题

R2-20 第二阶段第四次作业科研狗聪 05-05
R2-04-第二阶段第四次作业 blanking 05-04
R2-37 第2阶段第4次洪学志 04-25
R2-35 第2阶段第4次德先森 04-23
R2-09第二阶段第四次作业 Giant Panda 04-22
R2-01-第二阶段第四次作业木萱小主 04-18
R2-30第二阶段第四次作业土貉 04-17
R2-03 第二阶段第四次作业微思微丝 04-15
【源码参考】R语言第二期2-4 统计某个研究领域的研究趋势真·科研狗 04-07
R2-36第二阶段第三次作业范志敏 04-01

R2-06-dean wordcloud

附件

附件

小组最新话题

623577364