From 72a675ead8edf327b0ec8bf5e26fcb57d4ffab17 Mon Sep 17 00:00:00 2001
From: Qingyun Liu <lqyeric94@gmail.com>
Date: Mon, 3 Oct 2016 00:17:42 -0400
Subject: [PATCH 1/3] Turning in my mini project 1

---
 mini_project_1.ipynb | 166 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 mini_project_1.ipynb

diff --git a/mini_project_1.ipynb b/mini_project_1.ipynb
new file mode 100644
index 0000000..1ea2ff0
--- /dev/null
+++ b/mini_project_1.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'the'"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pattern.web import *\n",
+    "import string\n",
+    "\n",
+    "christianity=URL(\"http://www.gutenberg.org/cache/epub/8294/pg8294.txt\").download()\n",
+    "\n",
+    "\n",
+    "\n",
+    "                \n",
+    "def process_file(filename):\n",
+    "    dic = dict()\n",
+    "    fil = filename\n",
+    "    for word in fil.split():\n",
+    "        words = word.replace(\"-\",\" \")\n",
+    "        wordss = words.strip(string.punctuation + string.whitespace)\n",
+    "        wordss = wordss.lower()\n",
+    "        if wordss in dic:\n",
+    "            dic[wordss] += 1\n",
+    "        else:\n",
+    "            dic [wordss] = 1\n",
+    "    le=[]\n",
+    "    for a in dic:\n",
+    "        le.append((dic[a],a))\n",
+    "    le.sort(reverse=True)\n",
+    "    for a,b in le:\n",
+    "        print b\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "\n",
+    "process_file(christianity)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'the'"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pattern.web import *\n",
+    "import string\n",
+    "\n",
+    "buddhism=URL(\"http://www.gutenberg.org/files/15255/15255-0.txt\").download()\n",
+    "\n",
+    "\n",
+    "\n",
+    "                \n",
+    "def process_file(filename):\n",
+    "    dic = dict()\n",
+    "    fil = filename\n",
+    "    for word in fil.split():\n",
+    "        words = word.replace(\"-\",\" \")\n",
+    "        wordss = words.strip(string.punctuation + string.whitespace)\n",
+    "        wordss = wordss.lower()\n",
+    "        if wordss in dic:\n",
+    "            dic[wordss] += 1\n",
+    "        else:\n",
+    "            dic [wordss] = 1\n",
+    "    le=[]\n",
+    "    for a in dic:\n",
+    "        le.append((dic[a],a))\n",
+    "    le.sort(reverse=True)\n",
+    "    for a,b in le:\n",
+    "        print b\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "\n",
+    "process_file(buddhism)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "Project overview:\n",
+    "    For the data, I used two books from_ the project glutenberg to analyze. I used the technique of counting the top\n",
+    "    ten words that appeared in_ the book. Since the two book that I picked are religious books. One is_ about buddhism \n",
+    "    and_ the other one is_ about christianity. I want to see how the top words in_ these two books can show the difference\n",
+    "    of the two religion.\n",
+    "Implementation:\n",
+    "    The main logic of my analysis is_ create a dictionary that has all_ the words in_ the book as_ keys and_ everytime\n",
+    "    the word shows up again, the value of the key increase by one. So that the frequency of all_ the words in_ the book\n",
+    "    will be found. At the beginning I thought about weather I should download the book to the computer first or_ I should\n",
+    "    just create a variable and_ set_ its value to be the book content. I decided to create a varaible so that all_ \n",
+    "    computers will be able to use my code directly.\n",
+    "Results:\n",
+    "    The top ten words of the christianity book are god,Israel,Son,Man,King,People,We,Children,Land,and_ Father.\n",
+    "    The top ten words of the buddhism book are Buddha,Life,India,Existence,Human,Ideas,Knowledge,Universe,Intellectual,\n",
+    "    and_ China.\n",
+    "    I think the words are definitely different. They all_ show the main idol of the religion. God and_ Buddha. They also\n",
+    "    show the main place of the religion. Israel and_ India. The other words shows that Christianity is_ more about people.\n",
+    "    The words like son, we, father, children shows Christianity is_ mainly about people and_ peoples relationship.\n",
+    "    Differently, Buddhism words are more about thinking, about ideas. The words like life, idea, knowledge, universe,\n",
+    "    intellectual.These words shows how Buddhism pay more attention on how people think and_ the importance of thinking.\n",
+    "Reflection:\n",
+    "    The process went pretty smoothly since I had a lot of similar practices in_ the completion of the reading journals.\n",
+    "    One thing that I need to pay attention later on is_ that I need to be more careful about what format_ the imput is_\n",
+    "    in_. Because the first time I run the code i got a count of each of the alphabetical letters in_ the book. That is_\n",
+    "    because I didnot realize that the input_ isnot seperated by lines. So I over divided it and_ get all_ letters instead\n",
+    "    of words.\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

From 615bf7c85bbd351b112c88292e0b99b93e7e379f Mon Sep 17 00:00:00 2001
From: lqyeric94 <lqyeric94@gmail.com>
Date: Mon, 3 Oct 2016 00:21:25 -0400
Subject: [PATCH 2/3] Add files via upload

---
 mini_project_1.ipynb | 166 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 mini_project_1.ipynb

diff --git a/mini_project_1.ipynb b/mini_project_1.ipynb
new file mode 100644
index 0000000..1ea2ff0
--- /dev/null
+++ b/mini_project_1.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'the'"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pattern.web import *\n",
+    "import string\n",
+    "\n",
+    "christianity=URL(\"http://www.gutenberg.org/cache/epub/8294/pg8294.txt\").download()\n",
+    "\n",
+    "\n",
+    "\n",
+    "                \n",
+    "def process_file(filename):\n",
+    "    dic = dict()\n",
+    "    fil = filename\n",
+    "    for word in fil.split():\n",
+    "        words = word.replace(\"-\",\" \")\n",
+    "        wordss = words.strip(string.punctuation + string.whitespace)\n",
+    "        wordss = wordss.lower()\n",
+    "        if wordss in dic:\n",
+    "            dic[wordss] += 1\n",
+    "        else:\n",
+    "            dic [wordss] = 1\n",
+    "    le=[]\n",
+    "    for a in dic:\n",
+    "        le.append((dic[a],a))\n",
+    "    le.sort(reverse=True)\n",
+    "    for a,b in le:\n",
+    "        print b\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "\n",
+    "process_file(christianity)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'the'"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pattern.web import *\n",
+    "import string\n",
+    "\n",
+    "buddhism=URL(\"http://www.gutenberg.org/files/15255/15255-0.txt\").download()\n",
+    "\n",
+    "\n",
+    "\n",
+    "                \n",
+    "def process_file(filename):\n",
+    "    dic = dict()\n",
+    "    fil = filename\n",
+    "    for word in fil.split():\n",
+    "        words = word.replace(\"-\",\" \")\n",
+    "        wordss = words.strip(string.punctuation + string.whitespace)\n",
+    "        wordss = wordss.lower()\n",
+    "        if wordss in dic:\n",
+    "            dic[wordss] += 1\n",
+    "        else:\n",
+    "            dic [wordss] = 1\n",
+    "    le=[]\n",
+    "    for a in dic:\n",
+    "        le.append((dic[a],a))\n",
+    "    le.sort(reverse=True)\n",
+    "    for a,b in le:\n",
+    "        print b\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "\n",
+    "process_file(buddhism)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "Project overview:\n",
+    "    For the data, I used two books from_ the project glutenberg to analyze. I used the technique of counting the top\n",
+    "    ten words that appeared in_ the book. Since the two book that I picked are religious books. One is_ about buddhism \n",
+    "    and_ the other one is_ about christianity. I want to see how the top words in_ these two books can show the difference\n",
+    "    of the two religion.\n",
+    "Implementation:\n",
+    "    The main logic of my analysis is_ create a dictionary that has all_ the words in_ the book as_ keys and_ everytime\n",
+    "    the word shows up again, the value of the key increase by one. So that the frequency of all_ the words in_ the book\n",
+    "    will be found. At the beginning I thought about weather I should download the book to the computer first or_ I should\n",
+    "    just create a variable and_ set_ its value to be the book content. I decided to create a varaible so that all_ \n",
+    "    computers will be able to use my code directly.\n",
+    "Results:\n",
+    "    The top ten words of the christianity book are god,Israel,Son,Man,King,People,We,Children,Land,and_ Father.\n",
+    "    The top ten words of the buddhism book are Buddha,Life,India,Existence,Human,Ideas,Knowledge,Universe,Intellectual,\n",
+    "    and_ China.\n",
+    "    I think the words are definitely different. They all_ show the main idol of the religion. God and_ Buddha. They also\n",
+    "    show the main place of the religion. Israel and_ India. The other words shows that Christianity is_ more about people.\n",
+    "    The words like son, we, father, children shows Christianity is_ mainly about people and_ peoples relationship.\n",
+    "    Differently, Buddhism words are more about thinking, about ideas. The words like life, idea, knowledge, universe,\n",
+    "    intellectual.These words shows how Buddhism pay more attention on how people think and_ the importance of thinking.\n",
+    "Reflection:\n",
+    "    The process went pretty smoothly since I had a lot of similar practices in_ the completion of the reading journals.\n",
+    "    One thing that I need to pay attention later on is_ that I need to be more careful about what format_ the imput is_\n",
+    "    in_. Because the first time I run the code i got a count of each of the alphabetical letters in_ the book. That is_\n",
+    "    because I didnot realize that the input_ isnot seperated by lines. So I over divided it and_ get all_ letters instead\n",
+    "    of words.\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

From 15d287dca4a6a704f9bff70f08929506941cc9de Mon Sep 17 00:00:00 2001
From: Qingyun Liu <lqyeric94@gmail.com>
Date: Mon, 3 Oct 2016 14:47:01 -0400
Subject: [PATCH 3/3] Turning in my mini project 1

---
 mini_project_1.ipynb | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/mini_project_1.ipynb b/mini_project_1.ipynb
index 1ea2ff0..945cb91 100644
--- a/mini_project_1.ipynb
+++ b/mini_project_1.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 4,
    "metadata": {
     "collapsed": false
    },
@@ -13,7 +13,7 @@
        "'the'"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -43,7 +43,7 @@
     "        le.append((dic[a],a))\n",
     "    le.sort(reverse=True)\n",
     "    for a,b in le:\n",
-    "        print b\n",
+    "        return b\n",
     "    \n",
     "    \n",
     "    \n",
@@ -104,6 +104,15 @@
     "process_file(buddhism)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,