R2-04-第二阶段第三次作业

blanking 2018-03-28 21:35:42 阅读: 1546

library(DBI)

library(RMySQL)

library(wordcloud2)

killDbConnections <- function () {

  all_cons <- dbListConnections(MySQL())

  print(all_cons)

  for(con in all_cons)

    +  dbDisconnect(con)

  print(paste(length(all_cons), " connections killed."))

}

killDbConnections()

con <- dbConnect(MySQL(),dbname="practice",username="root",password="")

dbSendQuery(con,'SET NAMES utf8')

rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")

#任务1

words = data.frame(word=c(), freq = c())

while (!dbHasCompleted(rs)) {

  chunk <- dbFetch(rs, 10)

  #chunk[,4]为title, #chunk[,5]为abstract

  count=nrow(chunk)

  cnt=1

  while(cnt<=count){

    str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5]))

    str = gsub("-", " ", str)

    temp = unlist(strsplit(str, split = " "))

    temp_len = length(temp)

    cnt2 = 1

    while(cnt2 <= temp_len){

      if(temp[cnt2] %in% words$word){

        words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1

      }

      else{

        words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))

      }

      cnt2 = cnt2+1

    }

    cnt = cnt +1

  }

}

head(words,20)

#任务2

wordcloud2(words[0:1000,]) 

#任务3

new_words = words[order(words$freq,decreasing=T),]

del_word = c('not','two','other','during','of','it','may','the','and','in','to','a','that','is','for','by','with',

             'we','are','an','this','these','as','from','which','at','their','have','or','our','its','but','how',

             'be','as','here','on','can','into','data','between','both','also','show','within','however','all',

             'thus','yet','such','only','after','when','were','now','thereby','well','than','most','results',

             'previously','cause','each','report','role','insight','among','was','has','known','while','forward',

             'due','using','find','enables','present','bound','toward','via','2','1','remain','used','remains',

             'binds','based','three','across','provides','b','early','against','binding','enhanced','unknown','low',

             'demonstrate','found','findings','understood','specific','related','promotes','key','revealed','highly',

             'functional','responses','action','significant','establish','could','many','through','high','largely',

             'unexpectedly','efficacy','suport','range','more','expressed','levels')

words2 =words[which(!(words$word %in% del_word)),]

wordcloud2(words2)


 
邀请讨论

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
{{item.nick_name}} 受邀请回答 {{item.create_time}}
{{item.refer_comment.nick_name}} {{item.refer_comment.create_time}}

附件

{{f.title}} 大小 {{f.file_size}} 下载 {{f.count_download}} 金币 {{f.count_gold}}
切换到完整回复 发送回复
赞({{item.count_zan}}) 踩({{item.count_cai}}) 删除 回复 关闭
科研狗©2015-2024 科研好助手,京ICP备20005780号-1 建议意见

服务热线

178 0020 3020

微信服务号