Difficulty: Beginner
Estimated Time: 10 minutes

from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

categories = ['alt.atheism','soc.religion.christian','comp.graphics','sci.med'] twenty_train = fetch_20newsgroups(subset='train', categories=categories,shuffle=True,random_state=42)

print(type(twenty_train.data)) print(type(twenty_train.target))

text_clf = Pipeline([('vect',CountVectorizer()), ('tfidf',TfidfTransformer()), ('clf',MultinomialNB())]) text_clf.fit(twenty_train.data,twenty_train.target)

docs_new = ['God is Love','Nvidia card','Disbelieves in God']

predicted = text_clf.predict(docs_new) print(predicted) for doc,category in zip(docs_new,predicted): print('%r => %s'%(doc,twenty_train.target_names[category]))

Don’t stop now! The next scenario will only take about 10 minutes to complete.

newsgroup 2

App