From bdbd8f51a393aa9a8db470e71d9f3f370dae6a7b Mon Sep 17 00:00:00 2001
From: arpanrau <arpan.rau@gmail.com>
Date: Sun, 2 Oct 2016 18:50:41 -0400
Subject: [PATCH 1/2] Finished text mining project!

---
 wikipedia_sentiment.py | 103 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 wikipedia_sentiment.py

diff --git a/wikipedia_sentiment.py b/wikipedia_sentiment.py
new file mode 100644
index 0000000..efc73c8
--- /dev/null
+++ b/wikipedia_sentiment.py
@@ -0,0 +1,103 @@
+#scrapes wikipedia and performs sentiment analysis to figure out what wikipedia thinks about
+#congress democrats and republicans
+
+#imports pattern web
+from pattern.web import *
+#imports pattern natural language
+from pattern.en import * 
+
+def sentiment_finder(searchterm):
+    """Finds sentiment of a given article on wikipedia.
+        accepts string searchterm where searchterm is the title of a wikipedia article.
+        Returns vector of (sentiment, objectivity)
+        where sentiment is between -1.0 and 1.0 and objectivity is between 0 and 1"""
+    try:
+        article = Wikipedia().search(searchterm) #pull article from wikipedia 
+        articletext = str.splitlines(article.plaintext().encode('utf-8')) # plain text as list of string lines of article
+        founde = False #looking for articles that start with bulleted lists
+        for i in range(len(articletext)): #loop thru and find the first line in the article 
+                if articletext[i] == '* e':   #First line of article content marked by *e if article starts with bulleted lists
+                        index = i
+                        founde = True
+                if founde == False:          #if we haven't found e yet, no bulleted list so we automatically pull 1st paragraph
+                        index = 0
+                
+        return sentiment(articletext[index+2])     #return sentiment. +2 accounts for first empty lines
+    except:
+                                      #surprisingly some congressmen don't have wikipedia articles.
+       return (0,0)                   #returns no wikipedia sentiment if error is thrown
+                                      #(wikipedia article does not exist)
+    
+#populate list of current voting members of house and senate     
+    
+republicans = [] #empty list for republican names
+
+democrats = [] #empty list for democrat names
+
+
+senators = Wikipedia().search('Current members of the United States House of Representatives')  #pulls wikipedia article for senators
+senatorsections = senators.sections[5] #pulls section of senators wikipedia article to read (voting members)
+senatorlist = senatorsections.tables[0]#pulls list of current senators in table form
+
+#iterates thru the table and classifies article names with Democrats and Republicans
+for i in range(len(senatorlist.rows)):      
+    namelist = senatorlist.rows[i][1] #pulls name in odd form [last, firstfirst last]
+    namelength = (len(namelist)-((len(namelist)-3)/2)) #custom index based on how pattern returns the names
+    name = namelist[namelength-1:len(namelist)] #grabs name in format [first last]
+    if senatorlist.rows[i][3] == 'Republican':  #sorts republicans into republicans and democrats into democrats
+        republicans.append(name.encode('utf-8'))#NOTE: encode to string
+    if senatorlist.rows[i][3] == 'Democratic':
+        democrats.append(name.encode('utf-8'))
+
+
+republicanscores = [] #empty list for republican scores
+democraticscores = [] #empty list for democrat scores
+
+republicancount = 0.0 #counter of how many republican scores 
+democratcount = 0.0 #counter of how many democratic scores
+
+#iterate thru list of republicans and find sentiment
+
+for i in republicans:
+    print i
+    republicancount += 1.0 #keep track of how many republican scores
+    republicanscores.append(sentiment_finder(i))
+    
+
+
+
+#iterate thru list of democrats and find sentiment
+
+
+for i in democrats:
+    print i
+    democratcount += 1.0 #keep track of how many democrat scores
+    democraticscores.append(sentiment_finder(i))
+  
+
+
+republicanaverage = [0,0] #empty list for average republican scores
+democrataverage = [0,0]#empty list for average democrat scores
+
+#sum and average democrat and republican scores
+for i in democraticscores:
+    democrataverage[0] += i[0]
+    democrataverage[1] += i[1]
+
+for i in republicanscores:
+    republicanaverage[0] += i[0]
+    republicanaverage[1] += i[1]
+
+republicanaverage[0] = republicanaverage[0]/republicancount
+republicanaverage[1] = republicanaverage[1]/republicancount
+democrataverage[0] = democrataverage[0]/democratcount
+democrataverage[1] = democrataverage[1]/democratcount
+
+#Print final scores
+print "final scores"
+
+print "Republicans"
+print republicanaverage
+print "Democrats"
+print democrataverage
+    

From 0c078e87ca849d1ef1f57c5b0ca611e3bfd414ab Mon Sep 17 00:00:00 2001
From: arpanrau <arpan.rau@gmail.com>
Date: Mon, 10 Oct 2016 16:26:34 -0400
Subject: [PATCH 2/2] Minor style revisions made

---
 wikipedia_sentiment_revised.py | 80 ++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 wikipedia_sentiment_revised.py

diff --git a/wikipedia_sentiment_revised.py b/wikipedia_sentiment_revised.py
new file mode 100644
index 0000000..60ce54f
--- /dev/null
+++ b/wikipedia_sentiment_revised.py
@@ -0,0 +1,80 @@
+#scrapes wikipedia and performs sentiment analysis to figure out what wikipedia thinks about
+#congress democrats and republicans
+
+from pattern.web import *
+from pattern.en import * 
+
+def sentiment_finder(searchterm):
+    """Finds sentiment of a given article on wikipedia.
+        accepts string searchterm where searchterm is the title of a wikipedia article.
+        Returns vector of (sentiment, objectivity)
+        where sentiment is between -1.0 and 1.0 and objectivity is between 0 and 1"""
+    try:
+        article = Wikipedia().search(searchterm) #pull article from wikipedia 
+        articletext = str.splitlines(article.plaintext().encode('utf-8')) 
+        founde = False 
+        for i in range(len(articletext)): #loop thru and find the first line in the article 
+                if articletext[i] == '* e':   #First line of article content marked by *e if article starts with bulleted lists
+                        index = i
+                        founde = True
+                if founde == False:          #if we haven't found e yet, no bulleted list so we automatically pull 1st paragraph
+                        index = 0
+                
+        return sentiment(articletext[index+2])     #return sentiment. +2 accounts for first empty lines
+    except:
+        return (0,0)                   #returns no wikipedia sentiment if error is thrown
+
+#populate list of current voting members of house and senate         
+republicans = [] 
+democrats = [] 
+senators = Wikipedia().search('Current members of the United States House of Representatives')  #pulls wikipedia article for senators
+senatorsections = senators.sections[5] #pulls section of senators wikipedia article to read (voting members)
+senatorlist = senatorsections.tables[0]#pulls list of current senators in table form
+#iterates thru the table and classifies article names with Democrats and Republicans
+for i in range(len(senatorlist.rows)):      
+    namelist = senatorlist.rows[i][1] 
+    namelength = (len(namelist)-((len(namelist)-3)/2)) #custom index based on how pattern returns the names
+    name = namelist[namelength-1:len(namelist)] 
+    if senatorlist.rows[i][3] == 'Republican':  #NOTE: encode to string
+        republicans.append(name.encode('utf-8'))
+    if senatorlist.rows[i][3] == 'Democratic':
+        democrats.append(name.encode('utf-8'))
+        
+republicanscores = [] 
+democraticscores = []
+republicancount = 0.0
+democratcount = 0.0 
+#iterate thru list of republicans and find sentiment
+for i in republicans:
+    print i
+    republicancount += 1.0 
+    republicanscores.append(sentiment_finder(i))
+#iterate thru list of democrats and find sentiment
+for i in democrats:
+    print i
+    democratcount += 1.0 
+    democraticscores.append(sentiment_finder(i))
+
+republicanaverage = [0,0] 
+democrataverage = [0,0]
+#sum and average democrat and republican scores
+for i in democraticscores:
+    democrataverage[0] += i[0]
+    democrataverage[1] += i[1]
+
+for i in republicanscores:
+    republicanaverage[0] += i[0]
+    republicanaverage[1] += i[1]
+
+republicanaverage[0] = republicanaverage[0]/republicancount
+republicanaverage[1] = republicanaverage[1]/republicancount
+democrataverage[0] = democrataverage[0]/democratcount
+democrataverage[1] = democrataverage[1]/democratcount
+
+#Print final scores
+print "final scores"
+print "Republicans"
+print republicanaverage
+print "Democrats"
+print democrataverage
+