Data Analysis of Movie Review using Natural Language Processing
A tutorial of Data Analysis for Movie Review using NLTK.
• 37 min read
import nltk
nltk.download('movie_reviews')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
[nltk_data] Downloading package movie_reviews to /root/nltk_data... [nltk_data] Package movie_reviews is already up-to-date! [nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package averaged_perceptron_tagger to [nltk_data] /root/nltk_data... [nltk_data] Package averaged_perceptron_tagger is already up-to- [nltk_data] date! [nltk_data] Downloading package wordnet to /root/nltk_data... [nltk_data] Unzipping corpora/wordnet.zip.
True
from nltk.corpus import movie_reviews
documents=[]
for category in movie_reviews.categories():
for fileid in movie_reviews.fileids(category):
documents.append((movie_reviews.words(fileid), category))
import random
random.shuffle(documents)
from nltk.corpus import wordnet
from nltk import pos_tag
from nltk.corpus import stopwords
import string
from nltk.stem import WordNetLemmatizer
lemmatizer=WordNetLemmatizer()
stops=set(stopwords.words('english'))
punctuations=list(string.punctuation)
stops.update(punctuations)
def get_simple_pos(tag):
if tag.startswith('J'):
return wordnet.ADJ
if tag.startswith('N'):
return wordnet.NOUN
if tag.startswith('V'):
return wordnet.VERB
if tag.startswith('R'):
return wordnet.ADV
else:
return wordnet.NOUN
def clean_review(words):
clean_words=[]
for word in words:
if word.lower() not in stops:
pos=pos_tag([word])[0][1]
clean_word=lemmatizer.lemmatize(word, pos=get_simple_pos(pos))
clean_words.append(clean_word.lower())
return clean_words
docs=[(clean_review(document), category) for document, category in documents]
all_words=[]
for tup in docs:
all_words+=tup[0]
import nltk
freq=nltk.FreqDist(all_words)
common=freq.most_common(3000)
features=[i[0] for i in common]
features
['film', 'movie', 'one', 'make', 'like', 'character', 'get', 'see', 'go', 'time', 'well', 'scene', 'even', 'good', 'story', 'take', 'would', 'much', 'come', 'also', 'bad', 'give', 'life', 'two', 'look', 'way', 'know', 'seem', 'first', 'end', '--', 'year', 'work', 'thing', 'plot', 'say', 'play', 'really', 'little', 'show', 'people', 'could', 'man', 'star', 'love', 'never', 'try', 'great', 'director', 'best', 'performance', 'new', 'big', 'many', 'action', 'actor', 'want', 'u', 'watch', 'find', 'think', 'role', 'act', 'another', 'back', 'audience', 'something', 'world', 'turn', 'still', 'day', 'old', 'set', 'however', 'use', 'every', 'begin', 'though', 'guy', 'part', 'comedy', 'feel', 'cast', 'real', 'enough', 'around', 'point', 'interest', 'last', 'run', 'write', 'young', 'may', 'fact', 'name', 'long', 'funny', 'script', 'actually', 'right', 'minute', 'woman', 'effect', 'almost', 'lot', 'friend', 'nothing', 'john', 'place', 'although', 'screen', 'played', 'ever', 'start', 'moment', 'since', 'line', 'call', 'become', 'tell', 'lead', 'kill', 'original', 'help', 'high', 'family', 'without', 'three', 'problem', 'girl', 'picture', 'least', 'quite', 'sequence', 'need', 'away', 'course', 'laugh', 'might', 'far', 'fall', 'rather', 'must', 'anything', 'put', 'bit', 'include', 'child', 'job', 'american', 'yet', 'keep', 'follow', 'wife', 'alien', 'kind', 'hour', 'hard', 'always', 'reason', 'fun', 'head', 'feature', 'special', 'attempt', 'home', 'hand', 'hollywood', 'human', 'instead', 'lose', 'sense', 'move', 'war', 'mind', 'series', 'face', 'black', 'night', 'let', 'half', 'probably', 'kid', 'shot', 'along', 'men', 'everything', 'idea', 'pretty', 'becomes', 'direct', 'dialogue', 'sure', 'together', 'force', 'money', 'involve', 'believe', 'talk', 'father', 'save', 'whole', 'fight', 'horror', 'death', 'appear', 'boy', 'city', 'everyone', 'question', 'sex', 'music', 'less', 'do', 'release', 'second', 'couple', 'thought', 'brother', 'eye', 'perhaps', 'small', 'case', 'next', 'especially', 'expect', 'happen', 'meet', 'relationship', '10', 'word', 'mother', 'base', 'create', 'completely', 'care', 'rest', '2', 'whose', 'evil', 'writer', 'change', 'james', 'different', 'sound', 'simply', 'mean', 'book', 'mr', 'anyone', 'joke', 'school', 'michael', 'late', 'review', 'several', 'top', 'dead', 'humor', 'suppose', 'live', 'lack', 'left', 'add', 'true', 'matter', 'town', 'entire', 'hit', 'group', 'found', 'fan', 'comic', 'soon', 'house', 'someone', 'main', 'entertain', 'tv', 'game', 'side', 'leave', 'wrong', 'present', 'wonder', 'david', 'else', 'either', 'element', 'full', 'final', 'stop', 'unfortunately', 'later', 'viewer', 'murder', 'car', 'open', 'style', 'enjoy', 'camera', 'support', 'return', 'credit', 'robert', 'often', 'provide', 'power', 'die', 'deal', 'son', 'hero', 'behind', 'person', 'certainly', 'title', 'result', 'team', 'scream', 'despite', 'perfect', 'finally', 'video', 'order', 'nice', 'killer', 'stand', 'note', 'miss', 'maybe', 'piece', 'summer', 'past', 'able', 'fine', 'view', 'consider', 'classic', 'strong', 'example', 'voice', 'daughter', 'hope', 'situation', 'theater', 'production', 'sort', 'thriller', 'close', 'event', 'dog', 'white', 'kevin', 'talent', 'break', 'drama', 'joe', 'body', 'worth', 'earth', 'heart', 'bring', 'short', 'cut', 'version', 'level', 'self', 'dark', 'room', 'dream', 'cop', 'early', 'opening', 'nearly', 'upon', 'screenplay', 'light', 'experience', 'ask', 'violence', 'major', 'art', 'age', 'throughout', 'figure', 'ship', 'direction', 'beautiful', 'computer', 'jack', 'exactly', 'realize', 'obvious', 'state', 'genre', 'disney', 'already', 'plan', 'others', 'number', 'fill', 'guess', 'wait', 'space', 'waste', 'five', 'deep', 'simple', 'offer', 'form', 'four', 'jackie', 'surprise', 'twist', 'learn', 'king', 'walk', 'touch', 'pull', 'chase', 'career', 'flick', 'sometimes', 'rise', 'oscar', 'novel', 'member', 'hold', 'truly', 'filmmaker', 'battle', '1', 'tom', 'boring', 'husband', 'easy', 'pace', 'drug', 'lee', 'god', 'peter', 'sequel', 'type', 'york', 'planet', 'fiction', 'charm', 'win', 'carry', 'parent', 'happens', 'yes', 'saw', 'song', 'score', 'remember', 'tale', 'quickly', 'stupid', 'romantic', 'possible', 'gun', 'villain', 'chance', 'material', 'manages', 'extremely', 'mostly', 'de', 'future', 'attention', 'project', 'single', 'particularly', 'escape', 'focus', 'paul', 'quality', 'mention', 'police', 'none', 'co', 'van', 'stay', 'eventually', 'decide', 'crime', 'wild', 'hell', 'emotional', 'science', 'image', 'smith', 'shoot', 'large', 'sit', 'dr', 'girlfriend', 'pay', 'living', 'detail', 'steal', 'rock', 'drive', 'george', 'success', 'low', 'actress', 'alone', 'theme', 'obviously', 'cover', 'television', 'million', 'within', 'usually', 'water', 'among', 'cause', 'premise', 'middle', 'read', 'secret', 'aspect', 'complete', '3', 'understand', 'flaw', 'across', 'subject', 'chris', 'except', 'mission', 'poor', 'history', 'slow', 'reality', 'local', 'wonderful', 'whether', 'serious', 'amaze', 'oh', 'agent', 'crew', 'law', 'important', 'effort', 'motion', 'happy', 'mark', 'explain', 'impressive', 'studio', 'near', 'america', 'told', 'stuff', 'office', 'entertainment', 'street', 'cool', 'robin', 'basically', 'recent', 'produce', 'easily', 'ryan', 'apparently', 'message', 'vampire', 'wish', 'williams', 'screenwriter', 'party', 'mystery', 'attack', 'somehow', 'blood', 'william', 'fear', 'bill', 'lie', 'fast', 'jones', 'control', 'doubt', 'fire', 'batman', 'brings', 'suspense', 'straight', 'producer', 'hilarious', 'difficult', 'red', 'ben', 'ago', 'appeal', 'certain', 'country', 'romance', 'approach', 'popular', 'company', 'presence', 'business', 'due', 'effective', 'critic', 'confuse', 'adult', 'fly', 'throw', 'answer', 'annoy', 'smart', 'teen', 'fail', 'trouble', 'third', 'dramatic', 'general', 'budget', 'class', 'sexual', 'date', 'discover', 'clear', 'speak', 'rich', 'surprisingly', 'anyway', 'us', 'personal', '4', 'emotion', 'decides', 'share', 'cross', 'somewhat', 'rat', 'harry', 'ability', 'choice', 'successful', 'prison', 'absolutely', 'sister', 'cinema', 'former', 'excite', 'jim', 'previous', 'similar', 'allow', 'strange', 'student', 'excellent', 'familiar', 'leaf', 'intelligent', 'rule', 'bob', 'predictable', 'dance', 'towards', 'giant', 'powerful', 'beyond', 'b', 'shock', 'box', 'animate', 'visual', 'r', 'victim', 'trailer', 'nature', 'la', 'sam', 'deliver', 'tone', 'clever', 'blue', 'catch', 'definitely', 'usual', 'martin', 'murphy', 'master', 'felt', 'brilliant', 'scary', 'wedding', 'suspect', 'manage', 'stone', 'capture', 'musical', 'solid', 'reveal', 'appearance', 'bunch', 'mess', 'rating', 'favorite', 'pick', 'land', 'travel', 'handle', 'reach', 'free', 'grow', 'potential', 'seriously', 'treat', 'track', 'wear', 'huge', 'hunt', 'search', 'continue', 'week', 'amount', 'married', 'sweet', 'unlike', 'l', 'perfectly', 'join', 'issue', 'non', 'park', 'ex', 'hate', 'bond', 'adventure', 'enjoyable', 'scott', 'strike', 'e', 'decent', 'likely', 'ten', 'immediately', 'frank', 'heard', 'truman', 'depth', 'cameron', 'private', 'door', 'monster', 'bruce', 'overall', 'carter', 'inside', 'fails', 'impossible', 'richard', 'cold', 'race', 'truth', 'toy', 'trek', 'merely', 'particular', 'step', 'purpose', 'air', 'prove', 'color', 'gag', 'mar', 'sign', 'struggle', 'sight', 'neither', 'design', 'drop', 'ultimately', 'modern', 'thrill', 'personality', 'list', 'brought', 'spirit', 'pop', 'club', 'tim', 'cliche', 'term', 'otherwise', 'society', '5', 'wood', 'allen', 'key', 'dumb', 'player', 'fashion', 'government', 'serve', 'talented', 'west', 'succeed', 'opportunity', 'amuse', 'various', 'political', 'mouth', 'angel', 'army', 'soundtrack', 'development', 'sell', 'female', 'raise', 'english', 'silly', 'haunt', 'detective', 'slightly', 'steve', 'hank', 'train', 'month', 'introduce', 'roll', 'kiss', 'spend', 'tension', 'lover', 'require', 'imagine', 'compare', 'eddie', 'grace', 'foot', 'disaster', 'baby', 'today', 'creature', 'earlier', 'chan', '90', 'six', 'hill', 'memorable', 'heavy', 'award', 'max', 'cannot', 'background', 'rescue', 'episode', 'ground', 'totally', 'notice', 'leader', 'soldier', 'respect', 'front', 'woody', 'fi', 'ape', 'engage', 'machine', 'animation', 'arm', 'hop', 'sci', 'edit', 'simon', 'doctor', 'costume', 'ride', 'mary', 'terrible', 'constantly', 'steven', 'entirely', 'actual', 'impact', 'british', 'queen', 'minor', 'suffer', 'promise', 'convincing', 'standard', 'fantasy', 'brief', 'menace', 'animal', 'atmosphere', 'nick', 'quick', 'cinematography', 'ridiculous', 'officer', 'typical', 'rush', 'island', 'spent', '8', 'violent', 'double', 'partner', 'subtle', 'trip', 'bug', 'adam', 'road', 'building', 'grant', 'cameo', 'fairly', 'seven', 'dollar', 'inspire', 'concept', 'cheap', 'dress', 'beauty', 'suddenly', 'willis', 'whatever', 'bore', 'remain', 'highly', 'store', 'complex', 'college', 'president', 'dull', 'flat', 'indeed', 'pair', 'suit', 'recommend', 'ii', 'basic', 'mike', 'cute', 'godzilla', 'outside', 'brain', 'kick', 'frame', 'recently', 'plenty', 'titanic', 'cinematic', 'meanwhile', 'climax', 'clearly', 'x', 'hear', 'intrigue', 'awful', 'buddy', 'longer', 'prof', 'forget', 'tough', 'common', 'chemistry', 'century', 'sean', 'period', 'realistic', 'believable', 'admit', 'protagonist', 'conclusion', 'scientist', 'lawyer', 'language', 'possibly', 'encounter', 'band', 'male', 'french', 'aside', 'twenty', 'sing', 'camp', 'hang', 'carrey', 'brown', 'jerry', 'thin', 'somewhere', 'slowly', 'center', 'hot', 'witch', 'system', ...]
training_documents=docs[0:1500]
testing_documents=docs[1500:]
def get_feature_dict(words):
current_features={}
words_set=set(words)
for w in features:
current_features[w]=w in words_set
return current_features
training_data=[(get_feature_dict(doc), category)for doc, category in training_documents]
testing_data=[(get_feature_dict(doc), category)for doc, category in testing_documents]
from nltk import NaiveBayesClassifier
clf=NaiveBayesClassifier.train(training_data)
nltk.classify.accuracy(clf, testing_data)
0.834
clf.show_most_informative_features(500)
Most Informative Features
outstanding = True pos : neg = 10.3 : 1.0
uninspired = True neg : pos = 9.9 : 1.0
seagal = True neg : pos = 8.7 : 1.0
ludicrous = True neg : pos = 8.2 : 1.0
stupidity = True neg : pos = 7.9 : 1.0
breathtaking = True pos : neg = 6.9 : 1.0
idiotic = True neg : pos = 6.7 : 1.0
castle = True pos : neg = 6.7 : 1.0
bottle = True neg : pos = 6.5 : 1.0
damon = True pos : neg = 6.4 : 1.0
mulan = True pos : neg = 6.0 : 1.0
poker = True pos : neg = 6.0 : 1.0
turkey = True neg : pos = 5.9 : 1.0
wonderfully = True pos : neg = 5.9 : 1.0
lame = True neg : pos = 5.9 : 1.0
awful = True neg : pos = 5.8 : 1.0
sat = True neg : pos = 5.5 : 1.0
belief = True pos : neg = 5.5 : 1.0
poorly = True neg : pos = 5.4 : 1.0
anger = True pos : neg = 5.3 : 1.0
welles = True neg : pos = 5.2 : 1.0
religion = True pos : neg = 5.0 : 1.0
inane = True neg : pos = 4.9 : 1.0
waste = True neg : pos = 4.8 : 1.0
emperor = True pos : neg = 4.8 : 1.0
era = True pos : neg = 4.8 : 1.0
stupid = True neg : pos = 4.7 : 1.0
random = True neg : pos = 4.7 : 1.0
idiot = True neg : pos = 4.7 : 1.0
alicia = True neg : pos = 4.6 : 1.0
ridiculous = True neg : pos = 4.6 : 1.0
fantastic = True pos : neg = 4.6 : 1.0
chuckle = True neg : pos = 4.5 : 1.0
whatsoever = True neg : pos = 4.5 : 1.0
beautifully = True pos : neg = 4.5 : 1.0
lifeless = True neg : pos = 4.4 : 1.0
shine = True pos : neg = 4.4 : 1.0
delight = True pos : neg = 4.4 : 1.0
affect = True pos : neg = 4.3 : 1.0
unfunny = True neg : pos = 4.3 : 1.0
pointless = True neg : pos = 4.3 : 1.0
painfully = True neg : pos = 4.3 : 1.0
ordinary = True pos : neg = 4.2 : 1.0
dull = True neg : pos = 4.2 : 1.0
traditional = True pos : neg = 4.1 : 1.0
garbage = True neg : pos = 4.1 : 1.0
flawless = True pos : neg = 4.1 : 1.0
bland = True neg : pos = 4.1 : 1.0
freddie = True neg : pos = 4.0 : 1.0
memorable = True pos : neg = 4.0 : 1.0
jolie = True neg : pos = 4.0 : 1.0
zeta = True neg : pos = 4.0 : 1.0
martha = True neg : pos = 4.0 : 1.0
jesus = True pos : neg = 3.9 : 1.0
allows = True pos : neg = 3.8 : 1.0
sinise = True neg : pos = 3.7 : 1.0
prinze = True neg : pos = 3.7 : 1.0
insult = True neg : pos = 3.7 : 1.0
ambitious = True pos : neg = 3.7 : 1.0
superb = True pos : neg = 3.6 : 1.0
rick = True neg : pos = 3.6 : 1.0
german = True pos : neg = 3.6 : 1.0
embarrass = True neg : pos = 3.6 : 1.0
balance = True pos : neg = 3.6 : 1.0
badly = True neg : pos = 3.5 : 1.0
terrible = True neg : pos = 3.5 : 1.0
felix = True pos : neg = 3.5 : 1.0
flynt = True pos : neg = 3.5 : 1.0
gas = True pos : neg = 3.5 : 1.0
ideal = True pos : neg = 3.5 : 1.0
respectively = True pos : neg = 3.4 : 1.0
initially = True pos : neg = 3.4 : 1.0
terrific = True pos : neg = 3.4 : 1.0
companion = True pos : neg = 3.4 : 1.0
comfort = True pos : neg = 3.4 : 1.0
anywhere = True neg : pos = 3.4 : 1.0
grip = True pos : neg = 3.4 : 1.0
excellent = True pos : neg = 3.4 : 1.0
affection = True pos : neg = 3.3 : 1.0
delightful = True pos : neg = 3.3 : 1.0
potentially = True neg : pos = 3.3 : 1.0
anna = True pos : neg = 3.3 : 1.0
parker = True pos : neg = 3.3 : 1.0
scorsese = True pos : neg = 3.3 : 1.0
promising = True neg : pos = 3.2 : 1.0
crowe = True pos : neg = 3.2 : 1.0
harris = True pos : neg = 3.2 : 1.0
boring = True neg : pos = 3.2 : 1.0
stun = True pos : neg = 3.2 : 1.0
mess = True neg : pos = 3.2 : 1.0
politics = True pos : neg = 3.2 : 1.0
schwarzenegger = True neg : pos = 3.1 : 1.0
remotely = True neg : pos = 3.1 : 1.0
bother = True neg : pos = 3.1 : 1.0
obi = True pos : neg = 3.1 : 1.0
darth = True pos : neg = 3.1 : 1.0
gradually = True pos : neg = 3.1 : 1.0
portrait = True pos : neg = 3.1 : 1.0
natural = True pos : neg = 3.1 : 1.0
lonely = True pos : neg = 3.1 : 1.0
perfectly = True pos : neg = 3.1 : 1.0
subtle = True pos : neg = 3.1 : 1.0
braveheart = True pos : neg = 3.1 : 1.0
shall = True pos : neg = 3.1 : 1.0
mature = True pos : neg = 3.1 : 1.0
dumb = True neg : pos = 3.0 : 1.0
uninteresting = True neg : pos = 3.0 : 1.0
noir = True pos : neg = 3.0 : 1.0
snake = True neg : pos = 3.0 : 1.0
inept = True neg : pos = 3.0 : 1.0
appropriately = True pos : neg = 3.0 : 1.0
detailed = True pos : neg = 3.0 : 1.0
consequence = True pos : neg = 3.0 : 1.0
clich = True neg : pos = 3.0 : 1.0
involves = True neg : pos = 3.0 : 1.0
laughable = True neg : pos = 3.0 : 1.0
innocence = True pos : neg = 3.0 : 1.0
italian = True pos : neg = 3.0 : 1.0
tribe = True neg : pos = 2.9 : 1.0
shark = True neg : pos = 2.9 : 1.0
rare = True pos : neg = 2.9 : 1.0
offensive = True neg : pos = 2.9 : 1.0
refresh = True pos : neg = 2.9 : 1.0
freedom = True pos : neg = 2.9 : 1.0
luckily = True pos : neg = 2.9 : 1.0
dillon = True pos : neg = 2.9 : 1.0
tucker = True pos : neg = 2.9 : 1.0
anakin = True pos : neg = 2.9 : 1.0
alice = True pos : neg = 2.9 : 1.0
philosophy = True pos : neg = 2.9 : 1.0
foster = True pos : neg = 2.9 : 1.0
angela = True pos : neg = 2.9 : 1.0
unusual = True pos : neg = 2.9 : 1.0
loyal = True pos : neg = 2.9 : 1.0
nomination = True pos : neg = 2.9 : 1.0
slide = True pos : neg = 2.9 : 1.0
pattern = True pos : neg = 2.9 : 1.0
sidney = True pos : neg = 2.9 : 1.0
fargo = True pos : neg = 2.9 : 1.0
li = True neg : pos = 2.8 : 1.0
patch = True neg : pos = 2.8 : 1.0
chick = True neg : pos = 2.8 : 1.0
frankly = True neg : pos = 2.8 : 1.0
portrayal = True pos : neg = 2.8 : 1.0
tedious = True neg : pos = 2.8 : 1.0
friendship = True pos : neg = 2.8 : 1.0
damme = True neg : pos = 2.8 : 1.0
depict = True pos : neg = 2.8 : 1.0
citizen = True pos : neg = 2.8 : 1.0
triumph = True pos : neg = 2.8 : 1.0
terribly = True neg : pos = 2.7 : 1.0
contrast = True pos : neg = 2.7 : 1.0
crap = True neg : pos = 2.7 : 1.0
dread = True pos : neg = 2.7 : 1.0
antic = True neg : pos = 2.7 : 1.0
clone = True neg : pos = 2.7 : 1.0
painful = True neg : pos = 2.7 : 1.0
training = True pos : neg = 2.7 : 1.0
niro = True pos : neg = 2.7 : 1.0
happiness = True pos : neg = 2.7 : 1.0
justice = True pos : neg = 2.7 : 1.0
subtitle = True pos : neg = 2.7 : 1.0
banderas = True neg : pos = 2.7 : 1.0
sutherland = True neg : pos = 2.7 : 1.0
golden = True pos : neg = 2.7 : 1.0
france = True pos : neg = 2.7 : 1.0
rip = True neg : pos = 2.7 : 1.0
randy = True neg : pos = 2.7 : 1.0
inventive = True pos : neg = 2.7 : 1.0
lethal = True neg : pos = 2.7 : 1.0
yell = True neg : pos = 2.6 : 1.0
jedi = True pos : neg = 2.6 : 1.0
liam = True pos : neg = 2.6 : 1.0
mass = True pos : neg = 2.6 : 1.0
gem = True pos : neg = 2.6 : 1.0
normal = True pos : neg = 2.6 : 1.0
overall = True pos : neg = 2.6 : 1.0
effective = True pos : neg = 2.6 : 1.0
rent = True neg : pos = 2.6 : 1.0
designer = True pos : neg = 2.6 : 1.0
drunk = True neg : pos = 2.6 : 1.0
greg = True neg : pos = 2.6 : 1.0
massive = True neg : pos = 2.6 : 1.0
nonsense = True neg : pos = 2.6 : 1.0
porn = True neg : pos = 2.6 : 1.0
satisfy = True pos : neg = 2.6 : 1.0
dicaprio = True pos : neg = 2.6 : 1.0
ace = True neg : pos = 2.6 : 1.0
sandler = True neg : pos = 2.6 : 1.0
fascinate = True pos : neg = 2.6 : 1.0
influence = True pos : neg = 2.6 : 1.0
epic = True pos : neg = 2.6 : 1.0
regard = True pos : neg = 2.5 : 1.0
extraordinary = True pos : neg = 2.5 : 1.0
screw = True neg : pos = 2.5 : 1.0
upset = True pos : neg = 2.5 : 1.0
creation = True pos : neg = 2.5 : 1.0
trilogy = True pos : neg = 2.5 : 1.0
midnight = True pos : neg = 2.5 : 1.0
stiller = True pos : neg = 2.5 : 1.0
highly = True pos : neg = 2.5 : 1.0
pulp = True pos : neg = 2.5 : 1.0
tyler = True neg : pos = 2.5 : 1.0
admire = True pos : neg = 2.5 : 1.0
intensity = True pos : neg = 2.5 : 1.0
fails = True neg : pos = 2.5 : 1.0
portrayed = True pos : neg = 2.5 : 1.0
somebody = True neg : pos = 2.5 : 1.0
suck = True neg : pos = 2.5 : 1.0
malkovich = True pos : neg = 2.5 : 1.0
strength = True pos : neg = 2.5 : 1.0
briefly = True pos : neg = 2.5 : 1.0
upper = True pos : neg = 2.5 : 1.0
pleasant = True pos : neg = 2.5 : 1.0
endear = True pos : neg = 2.5 : 1.0
saturday = True neg : pos = 2.4 : 1.0
zero = True neg : pos = 2.4 : 1.0
bury = True neg : pos = 2.4 : 1.0
jungle = True neg : pos = 2.4 : 1.0
anaconda = True neg : pos = 2.4 : 1.0
54 = True neg : pos = 2.4 : 1.0
stalk = True neg : pos = 2.4 : 1.0
political = True pos : neg = 2.4 : 1.0
remarkable = True pos : neg = 2.4 : 1.0
schindler = True pos : neg = 2.4 : 1.0
suppose = True neg : pos = 2.4 : 1.0
sadly = True neg : pos = 2.4 : 1.0
perfect = True pos : neg = 2.4 : 1.0
andy = True pos : neg = 2.4 : 1.0
uncle = True pos : neg = 2.4 : 1.0
october = True pos : neg = 2.4 : 1.0
mine = True neg : pos = 2.4 : 1.0
deserves = True pos : neg = 2.4 : 1.0
famous = True pos : neg = 2.4 : 1.0
phenomenon = True pos : neg = 2.4 : 1.0
likeable = True pos : neg = 2.4 : 1.0
snow = True pos : neg = 2.4 : 1.0
eve = True neg : pos = 2.4 : 1.0
acclaim = True pos : neg = 2.4 : 1.0
neighbor = True pos : neg = 2.4 : 1.0
beach = True pos : neg = 2.4 : 1.0
nevertheless = True pos : neg = 2.4 : 1.0
slip = True pos : neg = 2.4 : 1.0
wonderful = True pos : neg = 2.4 : 1.0
masterpiece = True pos : neg = 2.4 : 1.0
reality = True pos : neg = 2.4 : 1.0
warren = True neg : pos = 2.4 : 1.0
period = True pos : neg = 2.4 : 1.0
realistic = True pos : neg = 2.4 : 1.0
command = True pos : neg = 2.4 : 1.0
notch = True pos : neg = 2.4 : 1.0
north = True pos : neg = 2.4 : 1.0
laura = True pos : neg = 2.4 : 1.0
secretly = True pos : neg = 2.4 : 1.0
study = True pos : neg = 2.4 : 1.0
stereotype = True neg : pos = 2.4 : 1.0
gary = True neg : pos = 2.4 : 1.0
sentimental = True pos : neg = 2.4 : 1.0
criticism = True pos : neg = 2.4 : 1.0
lucas = True pos : neg = 2.4 : 1.0
medical = True pos : neg = 2.4 : 1.0
creates = True pos : neg = 2.3 : 1.0
generic = True neg : pos = 2.3 : 1.0
julie = True neg : pos = 2.3 : 1.0
incredible = True pos : neg = 2.3 : 1.0
kim = True neg : pos = 2.3 : 1.0
wire = True neg : pos = 2.3 : 1.0
mill = True pos : neg = 2.3 : 1.0
iron = True pos : neg = 2.3 : 1.0
harrison = True pos : neg = 2.3 : 1.0
solid = True pos : neg = 2.3 : 1.0
breast = True neg : pos = 2.3 : 1.0
visuals = True pos : neg = 2.3 : 1.0
mood = True pos : neg = 2.3 : 1.0
b = True neg : pos = 2.3 : 1.0
snipe = True neg : pos = 2.3 : 1.0
comet = True neg : pos = 2.3 : 1.0
superficial = True neg : pos = 2.3 : 1.0
smoke = True neg : pos = 2.3 : 1.0
demonstrates = True pos : neg = 2.3 : 1.0
bore = True neg : pos = 2.3 : 1.0
hokey = True neg : pos = 2.3 : 1.0
equally = True pos : neg = 2.3 : 1.0
revelation = True pos : neg = 2.3 : 1.0
spielberg = True pos : neg = 2.3 : 1.0
social = True pos : neg = 2.3 : 1.0
tony = True pos : neg = 2.3 : 1.0
vehicle = True neg : pos = 2.3 : 1.0
drinking = True neg : pos = 2.3 : 1.0
teacher = True pos : neg = 2.3 : 1.0
portrays = True pos : neg = 2.3 : 1.0
cheesy = True neg : pos = 2.3 : 1.0
flubber = True neg : pos = 2.2 : 1.0
horribly = True neg : pos = 2.2 : 1.0
war = True pos : neg = 2.2 : 1.0
share = True pos : neg = 2.2 : 1.0
hilarious = True pos : neg = 2.2 : 1.0
fake = True neg : pos = 2.2 : 1.0
jordan = True pos : neg = 2.2 : 1.0
mcgregor = True pos : neg = 2.2 : 1.0
independent = True pos : neg = 2.2 : 1.0
paulie = True pos : neg = 2.2 : 1.0
fortunately = True pos : neg = 2.2 : 1.0
visually = True pos : neg = 2.2 : 1.0
oscar = True pos : neg = 2.2 : 1.0
stewart = True pos : neg = 2.2 : 1.0
howard = True pos : neg = 2.2 : 1.0
vincent = True pos : neg = 2.2 : 1.0
squad = True neg : pos = 2.2 : 1.0
ross = True neg : pos = 2.2 : 1.0
thompson = True neg : pos = 2.2 : 1.0
visual = True pos : neg = 2.2 : 1.0
8 = True pos : neg = 2.2 : 1.0
naked = True neg : pos = 2.2 : 1.0
personal = True pos : neg = 2.2 : 1.0
deeply = True pos : neg = 2.2 : 1.0
perfection = True pos : neg = 2.2 : 1.0
kilmer = True neg : pos = 2.2 : 1.0
gabriel = True neg : pos = 2.2 : 1.0
sorry = True neg : pos = 2.2 : 1.0
guilty = True neg : pos = 2.2 : 1.0
predictable = True neg : pos = 2.2 : 1.0
gag = True neg : pos = 2.2 : 1.0
yeah = True neg : pos = 2.2 : 1.0
cliched = True neg : pos = 2.2 : 1.0
sport = True neg : pos = 2.2 : 1.0
dazzle = True pos : neg = 2.2 : 1.0
universal = True pos : neg = 2.2 : 1.0
shakespeare = True pos : neg = 2.2 : 1.0
batman = True neg : pos = 2.2 : 1.0
gordon = True neg : pos = 2.2 : 1.0
brilliant = True pos : neg = 2.2 : 1.0
plant = True neg : pos = 2.2 : 1.0
clerk = True neg : pos = 2.2 : 1.0
natasha = True neg : pos = 2.2 : 1.0
watson = True pos : neg = 2.2 : 1.0
thankfully = True pos : neg = 2.2 : 1.0
phil = True neg : pos = 2.2 : 1.0
straightforward = True pos : neg = 2.2 : 1.0
correct = True pos : neg = 2.2 : 1.0
ford = True pos : neg = 2.2 : 1.0
painting = True pos : neg = 2.2 : 1.0
pack = True pos : neg = 2.2 : 1.0
portray = True pos : neg = 2.2 : 1.0
passion = True pos : neg = 2.2 : 1.0
cable = True neg : pos = 2.2 : 1.0
profanity = True neg : pos = 2.2 : 1.0
animation = True pos : neg = 2.2 : 1.0
range = True pos : neg = 2.2 : 1.0
edge = True pos : neg = 2.2 : 1.0
absurd = True neg : pos = 2.1 : 1.0
devil = True neg : pos = 2.1 : 1.0
gratuitous = True neg : pos = 2.1 : 1.0
obnoxious = True neg : pos = 2.1 : 1.0
expert = True neg : pos = 2.1 : 1.0
emotionally = True pos : neg = 2.1 : 1.0
doom = True pos : neg = 2.1 : 1.0
disappointed = True pos : neg = 2.1 : 1.0
definitely = True pos : neg = 2.1 : 1.0
con = True neg : pos = 2.1 : 1.0
variety = True pos : neg = 2.1 : 1.0
remake = True neg : pos = 2.1 : 1.0
beauty = True pos : neg = 2.1 : 1.0
neo = True pos : neg = 2.1 : 1.0
carrie = True pos : neg = 2.1 : 1.0
none = True neg : pos = 2.1 : 1.0
others = True pos : neg = 2.1 : 1.0
homage = True pos : neg = 2.1 : 1.0
robocop = True pos : neg = 2.1 : 1.0
ripley = True pos : neg = 2.1 : 1.0
empire = True pos : neg = 2.1 : 1.0
strain = True pos : neg = 2.1 : 1.0
quentin = True pos : neg = 2.1 : 1.0
frank = True pos : neg = 2.1 : 1.0
craft = True pos : neg = 2.1 : 1.0
journey = True pos : neg = 2.1 : 1.0
bunch = True neg : pos = 2.1 : 1.0
desire = True pos : neg = 2.1 : 1.0
unlike = True pos : neg = 2.1 : 1.0
pathetic = True neg : pos = 2.1 : 1.0
collect = True pos : neg = 2.1 : 1.0
eager = True pos : neg = 2.1 : 1.0
powerful = True pos : neg = 2.1 : 1.0
tradition = True pos : neg = 2.1 : 1.0
modern = True pos : neg = 2.1 : 1.0
worker = True pos : neg = 2.1 : 1.0
condition = True pos : neg = 2.1 : 1.0
irritate = True neg : pos = 2.1 : 1.0
rocky = True pos : neg = 2.1 : 1.0
river = True pos : neg = 2.1 : 1.0
redeem = True neg : pos = 2.1 : 1.0
loser = True neg : pos = 2.1 : 1.0
corny = True neg : pos = 2.0 : 1.0
halloween = True neg : pos = 2.0 : 1.0
blair = True neg : pos = 2.0 : 1.0
spice = True neg : pos = 2.0 : 1.0
diamond = True neg : pos = 2.0 : 1.0
arnold = True neg : pos = 2.0 : 1.0
spacey = True pos : neg = 2.0 : 1.0
reflect = True pos : neg = 2.0 : 1.0
brilliantly = True pos : neg = 2.0 : 1.0
spoken = True pos : neg = 2.0 : 1.0
tour = True pos : neg = 2.0 : 1.0
festival = True pos : neg = 2.0 : 1.0
oppose = True pos : neg = 2.0 : 1.0
split = True pos : neg = 2.0 : 1.0
blame = True neg : pos = 2.0 : 1.0
guess = True neg : pos = 2.0 : 1.0
erotic = True neg : pos = 2.0 : 1.0
disappoint = True neg : pos = 2.0 : 1.0
intense = True pos : neg = 2.0 : 1.0
remains = True pos : neg = 2.0 : 1.0
today = True pos : neg = 2.0 : 1.0
troubled = True pos : neg = 2.0 : 1.0
update = True neg : pos = 2.0 : 1.0
amanda = True neg : pos = 2.0 : 1.0
failure = True neg : pos = 2.0 : 1.0
compelling = True pos : neg = 2.0 : 1.0
identify = True pos : neg = 2.0 : 1.0
develops = True pos : neg = 2.0 : 1.0
threat = True pos : neg = 2.0 : 1.0
prisoner = True pos : neg = 2.0 : 1.0
cousin = True neg : pos = 2.0 : 1.0
meyer = True neg : pos = 2.0 : 1.0
showgirl = True neg : pos = 2.0 : 1.0
culture = True pos : neg = 2.0 : 1.0
fully = True pos : neg = 2.0 : 1.0
trap = True neg : pos = 2.0 : 1.0
disappointment = True neg : pos = 2.0 : 1.0
tragedy = True pos : neg = 2.0 : 1.0
perspective = True pos : neg = 2.0 : 1.0
equal = True pos : neg = 2.0 : 1.0
carol = True pos : neg = 2.0 : 1.0
cave = True neg : pos = 2.0 : 1.0
wrestling = True neg : pos = 2.0 : 1.0
paltrow = True neg : pos = 2.0 : 1.0
loud = True neg : pos = 2.0 : 1.0
bomb = True neg : pos = 2.0 : 1.0
suffers = True neg : pos = 2.0 : 1.0
flaw = True pos : neg = 2.0 : 1.0
austin = True pos : neg = 2.0 : 1.0
richards = True pos : neg = 2.0 : 1.0
driver = True pos : neg = 2.0 : 1.0
francis = True pos : neg = 2.0 : 1.0
interview = True pos : neg = 2.0 : 1.0
poor = True neg : pos = 2.0 : 1.0
subplots = True neg : pos = 2.0 : 1.0
honest = True pos : neg = 2.0 : 1.0
matt = True pos : neg = 2.0 : 1.0
realizes = True pos : neg = 2.0 : 1.0
palma = True neg : pos = 2.0 : 1.0
henstridge = True neg : pos = 2.0 : 1.0
warm = True pos : neg = 2.0 : 1.0
witty = True pos : neg = 2.0 : 1.0
fiennes = True pos : neg = 2.0 : 1.0
neve = True pos : neg = 2.0 : 1.0
grand = True pos : neg = 2.0 : 1.0
trite = True neg : pos = 2.0 : 1.0
grade = True neg : pos = 2.0 : 1.0
ethan = True pos : neg = 2.0 : 1.0
narrator = True pos : neg = 2.0 : 1.0
america = True pos : neg = 2.0 : 1.0
barely = True neg : pos = 2.0 : 1.0
ad = True neg : pos = 2.0 : 1.0
astonish = True pos : neg = 2.0 : 1.0
spell = True pos : neg = 2.0 : 1.0
complain = True pos : neg = 2.0 : 1.0
directs = True pos : neg = 2.0 : 1.0
deny = True pos : neg = 2.0 : 1.0
pat = True pos : neg = 2.0 : 1.0
crisis = True pos : neg = 2.0 : 1.0
irony = True pos : neg = 2.0 : 1.0
crucial = True pos : neg = 2.0 : 1.0
convict = True pos : neg = 2.0 : 1.0
anymore = True pos : neg = 2.0 : 1.0
fifth = True neg : pos = 1.9 : 1.0
hey = True neg : pos = 1.9 : 1.0
sub = True neg : pos = 1.9 : 1.0
8mm = True neg : pos = 1.9 : 1.0
jackal = True neg : pos = 1.9 : 1.0
commentary = True pos : neg = 1.9 : 1.0
silver = True pos : neg = 1.9 : 1.0
maybe = True neg : pos = 1.9 : 1.0
blow = True neg : pos = 1.9 : 1.0
connect = True pos : neg = 1.9 : 1.0
flow = True pos : neg = 1.9 : 1.0
class = True pos : neg = 1.9 : 1.0
cameron = True pos : neg = 1.9 : 1.0
gift = True pos : neg = 1.9 : 1.0
true = True pos : neg = 1.9 : 1.0
knowledge = True pos : neg = 1.9 : 1.0
willing = True pos : neg = 1.9 : 1.0
wannabe = True neg : pos = 1.9 : 1.0
academy = True pos : neg = 1.9 : 1.0
7 = True pos : neg = 1.9 : 1.0
structure = True pos : neg = 1.9 : 1.0
queen = True pos : neg = 1.9 : 1.0
empty = True neg : pos = 1.9 : 1.0
disturb = True pos : neg = 1.9 : 1.0
moral = True pos : neg = 1.9 : 1.0
clf.classify_many([tup[0] for tup in testing_data])
['pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'pos', 'pos', 'pos', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'neg', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'neg', 'pos', 'pos', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'neg', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'neg', 'pos', 'neg', 'pos', 'pos', 'neg', 'neg', 'pos', 'pos', 'neg', 'pos', 'neg', 'neg']