输出结果
设计思路
核心代码
tf = TfidfVectorizer(
analyzer='word',
ngram_range=(1,4),
# stop_words=stop_words,
max_features=150000
)
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=1234)
x_train = tf.transform(x_train)
x_test = tf.transform(x_test)
classifier = MultinomialNB()
classifier.fit(x_train,y_train)
lg = LogisticRegression(C=4, dual=True)
lg.fit(x_train,y_train)

