服务热线
178 0020 3020
#任务1 #得先登陆服务器 library(RMySQL) killDbConnections <- function () { all_cons <- dbListConnections(MySQL()) print(all_cons) for(con in all_cons) + dbDisconnect(con) print(paste(length(all_cons), " connections killed.")) } killDbConnections() #删除连接 con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="") dbSendQuery(con,'SET NAMES utf8') #创建新连接 rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1") words = data.frame(word=c(), freq = c()) #创建词汇及频率列表 while (!dbHasCompleted(rs)) { chunk <- dbFetch(rs, 10) #提取前10条数据 chunk$abstract<-iconv(chunk$abstract,"WINDOWS-1252","UTF-8") #转换Abstract编码格式 count=nrow(chunk) #取每行的字符 cnt=1 while(cnt<=count){ str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5])) #将字母转换成小写,且去掉所有标点符 temp = as.vector(unlist(strsplit(str, split = " "))) #将字符串分割成字符串数组的list后再转变位向量 temp_len = length(temp) cnt2 = 1 while(cnt2 <= temp_len){ if(temp[cnt2] %in% words$word){ #判断是否存在dataframe中 words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1 }else{ words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1))) } cnt2 = cnt2+1 } cnt = cnt +1 } } head(words)
#任务2 install.packages("wordcloud2") library(wordcloud2) wordcloud2(words[0:1000,]) #本来跑不出图的,加了修改Abstract编码格式以后才可以
#任务3 new_words = words[order(words$freq,decreasing=T),]#按照freq的降序排列 del_word = c('of','the','and','in','to','a','that','is','for','buy','with','we','are','an','this','these','as','from','which','at','their','have','or','our','its','but','how','be','as','here','on','can','into','data','between','both','also') words2 =words[which(!(words$word %in% del_word)),] #!表示排除掉del_word里的词 wordcloud2(words2)
附件