from snownlp import SnowNLP
import pandas as pd
from collections import defaultdict
import os
import re
import jieba
思念的痛苦import codecs
'''
#读取评论内容的.txt⽂件
txt = open('C:/Users/24224/',encoding='utf-8')
text = adlines()
print(text)
#确认读取⽂件成功,并关闭⽂件节省资源
print('读⼊成功')
txt.close()
#遍历每⼀条评论,得到每条评论是positive⽂本的概率,每条评论计算完成后输出ok确认执⾏成功
comments = []
comments_score = []
for i in text:
a1 = SnowNLP(i)
a2 = a1.sentiments
comments.append(i)
comments_score.append(a2)
print('ok')
#将结果数据框存为.xlsx表格,查看结果及分布
table = pd.DataFrame(comments, comments_score)
print(table)
<_excel('C:/Users/24224/Desktop/emotion_analyse.xlsx', sheet_name='result')
#打分范围是[0-1],此次定义[0,0.5]为负向评论,(0.5,1]为正向评论,观察其分布。
#基于波森情感词典计算情感值
def getscore(text):
df = pd.read_table(r"BosonNLP_sentiment_score\BosonNLP_", sep=" ", names=['key', 'score']) key = df['key'].list()
score = df['score'].list()
# jieba分词
segs = jieba.lcut(text,cut_all = False) #返回list
# 计算得分
score_list = [score[key.index(x)] for x in segs if(x in key)]
return sum(score_list)
#读取⽂件
def read_txt(filename):
with open(filename,'r',encoding='utf-8')as f:
txt = f.read()
return txt
#写⼊⽂件
def write_data(filename,data):
with open(filename,'a',encoding='utf-8')as f:
f.write(data)
if __name__=='__main__':
text = read_txt('C:/Users/24224/')
lists = text.split('\n')
i = 0
for list in lists:
if list != '':
sentiments = round(getscore(list),2)
#情感值为正数,表⽰积极;为负数表⽰消极
print(list)
print("情感值:",sentiments)
if sentiments > 0:
print("机器标注情感倾向:积极\n")
s = "机器判断情感倾向:积极\n"
else:
print('机器标注情感倾向:消极\n')
s = "机器判断情感倾向:消极"+'\n'
sentiment = '情感值:'+str(sentiments)+'\n'
#⽂件写⼊
filename = 'BosonNLP情感分析结果.txt'
write_data(filename,'情感分析⽂本:')
员工福利申请报告write_data(filename,'情感分析⽂本:')
write_data(filename,list+'\n') #写⼊待处理⽂本
write_data(filename,sentiment) #写⼊情感值
#write_data(filename,al_sentiment) #写⼊机器判断情感倾向
write_data(filename,s+'\n') #写⼊⼈⼯标注情感
i = i+1
'''
# ⽣成stopword表,需要去除⼀些否定词和程度词汇
stopwords =set()
fr =open('停⽤词.txt','r', encoding='utf-8')
for word in fr:
stopwords.add(word.strip())# Python strip() ⽅法⽤于移除字符串头尾指定的字符(默认为空格或换⾏符)或字符序列。# 读取否定词⽂件
not_word_file =open('否定词.txt','r+', encoding='utf-8')
not_word_list = not_adlines()
not_word_list =[w.strip()for w in not_word_list]
# 读取程度副词⽂件
degree_file =open('程度副词.txt','r+',encoding='utf-8')
degree_list = adlines()
degree_list =[item.split(',')[0]for item in degree_list]
# ⽣成新的停⽤词表
with open('','w', encoding='utf-8')as f:
for word in stopwords:
if(word not in not_word_list)and(word not in degree_list):
f.write(word +'\n')
# jieba分词后去除停⽤词
def seg_word(sentence):
seg_list = jieba.cut(sentence)
seg_result =[]
for i in seg_list:2022年中秋节祝福语
seg_result.append(i)
stopwords =set()
with open('','r',encoding='utf-8')as fr:
27届金曲奖for i in fr:
stopwords.add(i.strip())
return list(filter(lambda x: x not in stopwords, seg_result))
# 出⽂本中的情感词、否定词和程度副词
def classify_words(word_list):
# 读取情感词典⽂件
sen_file =open('BosonNLP_sentiment_score\BosonNLP_','r+', encoding='utf-8')
# 获取词典⽂件内容
sen_list = adlines()
# 创建情感字典
sen_dict = defaultdict()
# 读取词典每⼀⾏的内容,将其转换成字典对象,key为情感词,value为其对应的权重
for i in sen_list:
if len(i.split(' '))==2:
sen_dict[i.split(' ')[0]]= i.split(' ')[1]
# 读取否定词⽂件
not_word_file =open('否定词.txt','r+', encoding='utf-8')
not_word_list = not_adlines()
# 读取程度副词⽂件
degree_file =open('程度副词.txt','r+', encoding='utf-8')
degree_list = adlines()
degree_dict = defaultdict()
for i in degree_list:
degree_dict[i.split(',')[0]]= i.split(',')[0]
sen_word =dict()
not_word =dict()
degree_word =dict()
degree_word =dict()
# 分类
for i in range(len(word_list)):
word = word_list[i]
if word in sen_dict.keys()and word not in not_word_list and word not in degree_dict.keys():
# 出分词结果中在情感字典中的词
sen_word[i]= sen_dict[word]
elif word in not_word_list and word not in degree_dict.keys():
# 分词结果中在否定词列表中的词
not_word[i]=-1
elif word in degree_dict.keys():
# 分词结果中在程度副词中的词
degree_word[i]= degree_dict[word]
# 关闭打开的⽂件
sen_file.close()
not_word_file.close()
degree_file.close()
# 返回分类结果
return sen_word, not_word, degree_word
# 计算情感词的分数
def score_sentiment(sen_word, not_word, degree_word, seg_result):
# 权重初始化为1
徐麒雯W =1
score =0
# 情感词下标初始化
sentiment_index =-1
# 情感词的位置下标集合
sentiment_index_list =list(sen_word.keys())
# 遍历分词结果
for i in range(0,len(seg_result)):
# 如果是情感词
if i in sen_word.keys():
# 权重*情感词得分
score += W *float(sen_word[i])
# 情感词下标加⼀,获取下⼀个情感词的位置
sentiment_index +=1
if sentiment_index <len(sentiment_index_list)-1:
# 判断当前的情感词与下⼀个情感词之间是否有程度副词或否定词
for j in range(sentiment_index_list[sentiment_index], sentiment_index_list[sentiment_index +1]):
# 更新权重,如果有否定词,权重取反
if j in not_word.keys():
W *=-1
elif j in degree_word.keys():
W *=float(degree_word[j])
# 定位到下⼀个情感词
if sentiment_index <len(sentiment_index_list)-1:
i = sentiment_index_list[sentiment_index +1]
return score
# 计算得分
def sentiment_score(sentence):
# 1.对⽂档分词
seg_list = seg_word(sentence)
# 2.将分词结果转换成字典,出情感词、否定词和程度副词
sen_word, not_word, degree_word = classify_words(seg_list)
# 3.计算得分
score = score_sentiment(sen_word, not_word, degree_word, seg_list)
return score
#读取⽂件
def read_txt(filename):
with open(filename,'r',encoding='utf-8')as f:
txt = f.read()
return txt
def write_data(filename,data):
def write_data(filename,data):
with open(filename,'a',encoding='utf-8')as f:
f.write(data)
#基于波森情感词典计算情感值
text = read_txt('C:/Users/24224/')
lists = text.split('\n')
i =0
for l in lists:
if l !='':
拉面的制作sentiments =sentiment_score(l)
#情感值为正数,表⽰积极;为负数表⽰消极
print("情感值:",sentiments)
if sentiments >0:
print(l)
print("机器标注情感倾向:积极\n")
s ="机器判断情感倾向:积极\n"
else:
print(l)
print('机器标注情感倾向:消极\n')
s ="机器判断情感倾向:消极"+'\n'
sentiment ='情感值:'+str(sentiments)+'\n'
#⽂件写⼊
filename ='BosonNLP情感分析结果.txt'
write_data(filename,'情感分析⽂本:')
write_data(filename,l+'\n')#写⼊待处理⽂本
write_data(filename,sentiment)#写⼊情感值
#write_data(filename,al_sentiment) #写⼊机器判断情感倾向 write_data(filename,s+'\n')#写⼊⼈⼯标注情感
i = i+1
发布评论