服务热线
178 0020 3020
#任务1
library(RMySQL)
#数据库连接删除函数,每个任务之前最好先清理所有的连接,调用此函数就可以
killDbConnections <- function () {
all_cons <- dbListConnections(MySQL())
print(all_cons)
for(con in all_cons)
+ dbDisconnect(con)
print(paste(length(all_cons), " connections killed."))
}
killDbConnections()
#创建数据库连接
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8')
rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")
#上面的代码都是之前的
#下面开始新的代码
words = data.frame(word=c(), freq = c())
while (!dbHasCompleted(rs)) {
chunk <- dbFetch(rs, 10)
#chunk[,4]为title, #chunk[,5]为abstract
count=nrow(chunk)
cnt=1
while(cnt<=count){
str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5]))
str = gsub("聽"," ",str) #在上面的内容中发现这样的特殊字符串,我们替换掉
temp = as.vector(unlist(strsplit(str, split = " ")))
temp_len = length(temp)
cnt2 = 1
while(cnt2 <= temp_len){
if(temp[cnt2] %in% words$word){
#存在dataframe中
words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1
}else{
#新增
words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))
}
cnt2 = cnt2+1
}
cnt = cnt +1
}
}
head(words)
#按照freq的降序排列
new_words = words[order(words$freq,decreasing=T),]
#去掉一些定冠词 of and the
del_word = c('of','the','and','in','to','a','that','is','for','buy','with','we','are','an','this','these','as','from','which','at','their','have','or','our','its','but','how','be','as','here','on','can','into','data','between','both','also')
words2 =words[which(!(words$word %in% del_word)),]
wordcloud2(words2)
附件