-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtopicImportance.py
More file actions
63 lines (56 loc) · 2.5 KB
/
topicImportance.py
File metadata and controls
63 lines (56 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import sys
import os
from itertools import groupby
import docParsingFunctions
def topicOfDocument():
topTopics = docParsingFunctions.mainTopics()
topSents = docParsingFunctions.mainSentiment()
#call the mainSentiment and mainTopics function in order to populate those lists
dataForFile = []
overalTopicValues = []
#loops through each sentiment for each document and tries to map it to corresponding topics
for sent in topSents:
for sentiment in sent:
for top in topTopics:
docIndex = top[0][1]
sentimentLabel = top[1]
#if the document indexes and sentiment indexes match, there is a...match
if docIndex == sentiment[0] and sentimentLabel == sentiment[2]:
topicValueList = top[2]
#lopps through the matched topic
for val in topicValueList:
topicLabel = val[0]
prob = val[1]
#overal importance of the topic for the given document calculated by multiplying the sentiment value for that..
#..document with the probability of that topic within that sentiment
#Note: Each topic baring a sentiment has a thetha value and all thetha values for a sentiment for a document sum up to 1.00,
#however, the real importance of a topic for a doc is calculated by multiplying that thetha by the value of the sentiment..
#..for that topic for that doc hence sometimes the main topic of a doc isn't always the one with the highest thetha.
overalValue = float(sentiment[3])*float(prob)
#saving topic label, doc indexes, overal importance for that document, sentiment label, sentiment value, thetha
topicProb = (topicLabel, docIndex, overalValue, sentiment[2], sentiment[3], prob)
overalTopicValues.append(topicProb)
overalTopicCalculation(overalTopicValues, dataForFile)
return dataForFile
def overalTopicCalculation(overalTopicValues,dataForFile):
#grouping the topic values by topic label
dic = {}
f = lambda x: x[0]
for key, group in groupby(sorted(overalTopicValues, key=f), f):
dic[key] = list(group)
#for each topic label, calculate the sum of overal topic values across the corpus
#and add to a list the topic label, sentiment and overal importance
for key, value in dic.items():
valueList = []
sentiment = ""
docValue = []
for topicProb in value:
prob = topicProb[2]
probStr = str(prob)
valueList.append(prob)
sentiment = topicProb[3]
documentName = "Document "+topicProb[1]
docValue.append(documentName)
topicValue = sum(valueList)
topicData = (key,sentiment, topicValue)
dataForFile.append(topicData)