내 트윗 파일입니다.

#!/usr/bin/env python3 import os import sys from analyzer import Analyzer from termcolor import colored from helpers import get_user_timeline def main(): # ensure proper usage if len(sys.argv)!=2 or sys.argv[1][0]!="@": sys.exit("Usage: ./tweets @userstweeter") # get tweets tweets = get_user_timeline(sys.argv[1][1:], 50) if tweets == None: sys.exit("Error") print (tweets) # absolute paths to lists positives = os.path.join(sys.path[0], "positive-words.txt") negatives = os.path.join(sys.path[0], "negative-words.txt") # instantiate analyzer analyzer = Analyzer(positives, negatives) # analyze word score = analyzer.analyze(tweets) if score > 0.0: print(colored(":)", "green")) elif score < 0.0: print(colored(":(", "red")) else: print(colored(":|", "yellow")) if __name__ == "__main__": main() 

이것은 내 analyzer.py입니다

p>

import nltk import string import sys from nltk.tokenize import TweetTokenizer class Analyzer(): """Implements sentiment analysis.""" def __init__(self, positives, negatives): """Initialize Analyzer.""" self.positives = set() with open("positive-words.txt") as file: for line in file: if line[0]!=" " and line[0]!=";": self.positives.add(line.rstrip("\n")) self.negatives = set() with open("positive-words.txt") as file: for line in file: if not line.startswith(";") and not line.startswith(" "): self.positives.add(line.rstrip("\n")) def analyze(self, text): """Analyze text for sentiment, returning its score.""" tokenizer = nltk.tokenize.TweetTokenizer(preserve_case=False) tokens = tokenizer.tokenize(text) pos = 0 neg = 0 for tok in tokens: if tok in self.positives: pos += 1 if tok in self.negatives: neg -= 1 return pos + neg 

오류입니다.

Traceback (most recent call last): File "./tweets", line 39, in <module> main() File "./tweets", line 30, in main score = analyzer.analyze(tweets) File "/home/ubuntu/workspace/pset6/sentiments/analyzer.py", line 29, in analyze tokens = tokenizer.tokenize(text) File "/home/ubuntu/.local/lib/python3.4/site-packages/nltk/tokenize/casual.py", line 294, in tokenize text = _replace_html_entities(text) File "/home/ubuntu/.local/lib/python3.4/site-packages/nltk/tokenize/casual.py", line 258, in _replace_html_entities return ENT_RE.sub(_convert_entity, _str_to_unicode(text, encoding)) TypeError: expected string or buffer 

댓글

  • reddit
  • TypeError가 생성 된 코드 줄은 무엇입니까?

Answer

문제는 문자열 목록을 반환하기 때문에 get_user_timeline입니다. 즉, 토큰 화 함수에 전달할 수는 없습니다. 대신 문자열을 반복하고 별도로 토큰 화해야합니다.

답글 남기기

이메일 주소를 발행하지 않을 것입니다. 필수 항목은 *(으)로 표시합니다