{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json\n", "import gzip\n", "\n", "import nltk\n", "\n", "# set the java environment variables:\n", "# CLASSPATH is the path to the stanford-postagger.jar in your local disk\n", "# STANFORD_MODELS is the path to the tagger file in your local disk\n", "import os\n", "os.environ['CLASSPATH']='/opt/local/stanford-postagger-2014-01-04/stanford-postagger.jar'\n", "os.environ['STANFORD_MODELS']='/opt/local/stanford-postagger-2014-01-04/models/english-left3words-distsim.tagger'\n", "\n", "# initialize tagger\n", "from nltk.tag.stanford import StanfordPOSTagger\n", "\n", "# function to read the compressed Amazon reviews for musical instruments\n", "def parse(path):\n", " g = gzip.open(path, 'r')\n", " for l in g:\n", " yield json.dumps(eval(l))\n", " \n", "# process the reviews: tokennizing, normalizing\n", "# input_filename is the path to the downloaded .json.gz file of the reviews\n", "\n", "def pre_process(input_filename):\n", " # for each review\n", " for l in parse(input_filename):\n", " \n", " # the variable \"review\" is a Python string containing the texts of the review.\n", " review = json.loads(l)['reviewText']\n", "\n", " # your codes to process the review text go below\n", " \n", "def output_task_1(attributes, frequencies, labels, supp_sents):\n", " \"\"\"\n", " This function write your results to p1_t1.csv\n", " \"\"\"\n", " f = open('p1_t1.csv', 'w')\n", " for a,f,l,s in zip(attributes, frequencies, labels, supp_sents):\n", " f.write(a + ',' + str(f) + ',' + str(l) + ',' + s + '\\n')\n", " f.close()\n", " \n", "def output_task_2(pairs, labels, supp_sents):\n", " f = open('p1_t2.csv', 'w')\n", " for p, l, s in zip(pairs, labels, supp_sents):\n", " f.write('(' + p[0] + ',' + p[1] +'),'+str(l) + ','+s+'\\n')\n", " f.close()\n", " \n", "# example to use stanford POS tagger\n", "st = StanfordPOSTagger('english-bidirectional-distsim.tagger')\n", "st.tag('Get your copy , today , along with \"The Green Violin ; Theory , Ear Training , and Musicianship for Violinists \" book to prepare for this concerto and for more advanced playing !'.split())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }