服务热线
178 0020 3020
library("DBI")
library("RMySQL")
killDbConnections <- function () {
all_cons <- dbListConnections(MySQL())
print(all_cons)
for(con in all_cons)
+ dbDisconnect(con)
print(paste(length(all_cons), " connections killed."))
}
killDbConnections() #删除连接
con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
dbSendQuery(con,'SET NAMES utf8') #创建新连接
rs<- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")
words = data.frame(word=c(), freq = c()) #创建词汇及频率列表
while (!dbHasCompleted("rs")) {
chunk <- dbFetch("rs", 10) #提取前10条数据
chunk$abstract<-iconv(chunk$abstract,"WINDOWS-1252","UTF-8") #转换Abstract编码格式
count=nrow(chunk) #取每行的字符
cnt=1
while(cnt<=count){
str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5])) #将字母转换成小写,且去掉所有标点符
temp = as.vector(unlist(strsplit(str, split = " "))) #将字符串分割成字符串数组的list后再转变位向量
temp_len = length(temp)
cnt2 = 1
while(cnt2 <= temp_len){
if(temp[cnt2] %in% words$word){ #判断是否存在dataframe中
words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1
}else{
words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))
}
cnt2 = cnt2+1
}
cnt = cnt +1
}
}
head("words")
install.packages("wordcloud2")
install.packages("jsonlite")
library(wordcloud2)
wordcloud2(words[0:1000,])
new_words=words[order(words$freq,decreasing=T),]
del_word=c('of','the','and','in','to','a','that','is','for','buy','with',
'we','are','an','this','these','as','from','which','at','their',
'have','or','our','its','but','how','be','as','here','on','can',
'into','data','between','both','also','by','yet','than','well',
'it','not')
words2=words[which(!words$word %in% del_word),]
wordcloud2(words2,shape='triangle',size=0.5)
附件