BosonNLP情感词典评论情感分析

from snownlp import SnowNLP

import pandas as pd

from collections import defaultdict

import os

import re

import jieba

思念的痛苦import codecs

'''

#读取评论内容的.txt⽂件

txt = open('C:/Users/24224/',encoding='utf-8')

text = adlines()

print(text)

#确认读取⽂件成功，并关闭⽂件节省资源

print('读⼊成功')

txt.close()

#遍历每⼀条评论，得到每条评论是positive⽂本的概率，每条评论计算完成后输出ok确认执⾏成功

comments = []

comments_score = []

for i in text:

a1 = SnowNLP(i)

a2 = a1.sentiments

comments.append(i)

comments_score.append(a2)

print('ok')

#将结果数据框存为.xlsx表格，查看结果及分布

table = pd.DataFrame(comments, comments_score)

print(table)

<_excel('C:/Users/24224/Desktop/emotion_analyse.xlsx', sheet_name='result')

#打分范围是[0-1]，此次定义[0,0.5]为负向评论，(0.5,1]为正向评论，观察其分布。

#基于波森情感词典计算情感值

def getscore(text):

df = pd.read_table(r"BosonNLP_sentiment_score\BosonNLP_", sep=" ", names=['key', 'score']) key = df['key'].list()

score = df['score'].list()

# jieba分词

segs = jieba.lcut(text,cut_all = False) #返回list

# 计算得分

score_list = [score[key.index(x)] for x in segs if(x in key)]

return sum(score_list)

#读取⽂件

def read_txt(filename):

with open(filename,'r',encoding='utf-8')as f:

txt = f.read()

return txt

#写⼊⽂件

def write_data(filename,data):

with open(filename,'a',encoding='utf-8')as f:

f.write(data)

if __name__=='__main__':

text = read_txt('C:/Users/24224/')

lists = text.split('\n')

i = 0

for list in lists:

if list != '':

sentiments = round(getscore(list),2)

#情感值为正数，表⽰积极；为负数表⽰消极

print(list)

print("情感值：",sentiments)

if sentiments > 0:

print("机器标注情感倾向：积极\n")

s = "机器判断情感倾向：积极\n"

else:

print('机器标注情感倾向：消极\n')

s = "机器判断情感倾向：消极"+'\n'

sentiment = '情感值：'+str(sentiments)+'\n'

#⽂件写⼊

filename = 'BosonNLP情感分析结果.txt'

write_data(filename,'情感分析⽂本：')

员工福利申请报告write_data(filename,'情感分析⽂本：')

write_data(filename,list+'\n') #写⼊待处理⽂本

write_data(filename,sentiment) #写⼊情感值

#write_data(filename,al_sentiment) #写⼊机器判断情感倾向

write_data(filename,s+'\n') #写⼊⼈⼯标注情感

i = i+1

'''

# ⽣成stopword表，需要去除⼀些否定词和程度词汇

stopwords =set()

fr =open('停⽤词.txt','r', encoding='utf-8')

for word in fr:

stopwords.add(word.strip())# Python strip() ⽅法⽤于移除字符串头尾指定的字符（默认为空格或换⾏符）或字符序列。# 读取否定词⽂件

not_word_file =open('否定词.txt','r+', encoding='utf-8')

not_word_list = not_adlines()

not_word_list =[w.strip()for w in not_word_list]

# 读取程度副词⽂件

degree_file =open('程度副词.txt','r+',encoding='utf-8')

degree_list = adlines()

degree_list =[item.split(',')[0]for item in degree_list]

# ⽣成新的停⽤词表

with open('','w', encoding='utf-8')as f:

for word in stopwords:

if(word not in not_word_list)and(word not in degree_list):

f.write(word +'\n')

# jieba分词后去除停⽤词

def seg_word(sentence):

seg_list = jieba.cut(sentence)

seg_result =[]

for i in seg_list:2022年中秋节祝福语

seg_result.append(i)

stopwords =set()

with open('','r',encoding='utf-8')as fr:

27届金曲奖

for i in fr:

stopwords.add(i.strip())

return list(filter(lambda x: x not in stopwords, seg_result))

# 出⽂本中的情感词、否定词和程度副词

def classify_words(word_list):

# 读取情感词典⽂件

sen_file =open('BosonNLP_sentiment_score\BosonNLP_','r+', encoding='utf-8')

# 获取词典⽂件内容

sen_list = adlines()

# 创建情感字典

sen_dict = defaultdict()

# 读取词典每⼀⾏的内容，将其转换成字典对象，key为情感词，value为其对应的权重

for i in sen_list:

if len(i.split(' '))==2:

sen_dict[i.split(' ')[0]]= i.split(' ')[1]

# 读取否定词⽂件

not_word_file =open('否定词.txt','r+', encoding='utf-8')

not_word_list = not_adlines()

# 读取程度副词⽂件

degree_file =open('程度副词.txt','r+', encoding='utf-8')

degree_list = adlines()

degree_dict = defaultdict()

for i in degree_list:

degree_dict[i.split(',')[0]]= i.split(',')[0]

sen_word =dict()

not_word =dict()

degree_word =dict()

# 分类

for i in range(len(word_list)):

word = word_list[i]

if word in sen_dict.keys()and word not in not_word_list and word not in degree_dict.keys():

# 出分词结果中在情感字典中的词

sen_word[i]= sen_dict[word]

elif word in not_word_list and word not in degree_dict.keys():

# 分词结果中在否定词列表中的词

not_word[i]=-1

elif word in degree_dict.keys():

# 分词结果中在程度副词中的词

degree_word[i]= degree_dict[word]

# 关闭打开的⽂件

sen_file.close()

not_word_file.close()

degree_file.close()

# 返回分类结果

return sen_word, not_word, degree_word

# 计算情感词的分数

def score_sentiment(sen_word, not_word, degree_word, seg_result):

# 权重初始化为1

徐麒雯W =1

score =0

# 情感词下标初始化

sentiment_index =-1

# 情感词的位置下标集合

sentiment_index_list =list(sen_word.keys())

# 遍历分词结果

for i in range(0,len(seg_result)):

# 如果是情感词

if i in sen_word.keys():

# 权重*情感词得分

score += W *float(sen_word[i])

# 情感词下标加⼀，获取下⼀个情感词的位置

sentiment_index +=1

if sentiment_index <len(sentiment_index_list)-1:

# 判断当前的情感词与下⼀个情感词之间是否有程度副词或否定词

for j in range(sentiment_index_list[sentiment_index], sentiment_index_list[sentiment_index +1]):

# 更新权重，如果有否定词，权重取反

if j in not_word.keys():

W *=-1

elif j in degree_word.keys():

W *=float(degree_word[j])

# 定位到下⼀个情感词

if sentiment_index <len(sentiment_index_list)-1:

i = sentiment_index_list[sentiment_index +1]

return score

# 计算得分

def sentiment_score(sentence):

# 1.对⽂档分词

seg_list = seg_word(sentence)

# 2.将分词结果转换成字典，出情感词、否定词和程度副词

sen_word, not_word, degree_word = classify_words(seg_list)

# 3.计算得分

score = score_sentiment(sen_word, not_word, degree_word, seg_list)

return score

#读取⽂件

def read_txt(filename):

with open(filename,'r',encoding='utf-8')as f:

txt = f.read()

return txt

def write_data(filename,data):

with open(filename,'a',encoding='utf-8')as f:

f.write(data)

#基于波森情感词典计算情感值

text = read_txt('C:/Users/24224/')

lists = text.split('\n')

i =0

for l in lists:

if l !='':

拉面的制作

sentiments =sentiment_score(l)

#情感值为正数，表⽰积极；为负数表⽰消极

print("情感值：",sentiments)

if sentiments >0:

print(l)

print("机器标注情感倾向：积极\n")

s ="机器判断情感倾向：积极\n"

else:

print(l)

print('机器标注情感倾向：消极\n')

s ="机器判断情感倾向：消极"+'\n'

sentiment ='情感值：'+str(sentiments)+'\n'

#⽂件写⼊

filename ='BosonNLP情感分析结果.txt'

write_data(filename,'情感分析⽂本：')

write_data(filename,l+'\n')#写⼊待处理⽂本

write_data(filename,sentiment)#写⼊情感值

#write_data(filename,al_sentiment) #写⼊机器判断情感倾向 write_data(filename,s+'\n')#写⼊⼈⼯标注情感

i = i+1

BosonNLP情感词典评论情感分析

发布评论取消回复

最近发表

热门文章

标签列表