from sklearn.feature_extraction .text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
# Sample dataset (replace with your real data)
data = [
( "The team won the championship after a thrilling game" , "sports" ) ,
( "Government passes new economic reforms" , "politics" ) ,
( "Stock markets rally as inflation fears ease" , "economics" ) ,
( "Community organizes a social event in the park" , "social" ) ,
( "Player breaks record in latest football match" , "sports" ) ,
( "Election results cause political shakeup" , "politics" ) ,
( "New policies impact global economy" , "economics" ) ,
( "Volunteers help clean up neighborhood" , "social" ) ,
]
# Separate texts and labels
texts, labels = zip ( *data)
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split( texts, labels, test_size= 0.3 , random_state= 42 )
# Create a pipeline: TF-IDF vectorizer + SVM classifier
model = make_pipeline( TfidfVectorizer( ) , SVC( kernel= 'linear' , decision_function_shape= 'ovr' ) )
# Train the model
model.fit ( X_train, y_train)
# Predict on test set
y_pred = model.predict ( X_test)
# Evaluation
print ( "Accuracy:" , accuracy_score( y_test, y_pred) )
print ( "\n Classification Report:\n " , classification_report( y_test, y_pred) )
ZnJvbSBza2xlYXJuLmZlYXR1cmVfZXh0cmFjdGlvbi50ZXh0IGltcG9ydCBUZmlkZlZlY3Rvcml6ZXIKZnJvbSBza2xlYXJuLnN2bSBpbXBvcnQgU1ZDCmZyb20gc2tsZWFybi5waXBlbGluZSBpbXBvcnQgbWFrZV9waXBlbGluZQpmcm9tIHNrbGVhcm4ubW9kZWxfc2VsZWN0aW9uIGltcG9ydCB0cmFpbl90ZXN0X3NwbGl0CmZyb20gc2tsZWFybi5tZXRyaWNzIGltcG9ydCBjbGFzc2lmaWNhdGlvbl9yZXBvcnQsIGFjY3VyYWN5X3Njb3JlCgojIFNhbXBsZSBkYXRhc2V0IChyZXBsYWNlIHdpdGggeW91ciByZWFsIGRhdGEpCmRhdGEgPSBbCiAgICAoIlRoZSB0ZWFtIHdvbiB0aGUgY2hhbXBpb25zaGlwIGFmdGVyIGEgdGhyaWxsaW5nIGdhbWUiLCAic3BvcnRzIiksCiAgICAoIkdvdmVybm1lbnQgcGFzc2VzIG5ldyBlY29ub21pYyByZWZvcm1zIiwgInBvbGl0aWNzIiksCiAgICAoIlN0b2NrIG1hcmtldHMgcmFsbHkgYXMgaW5mbGF0aW9uIGZlYXJzIGVhc2UiLCAiZWNvbm9taWNzIiksCiAgICAoIkNvbW11bml0eSBvcmdhbml6ZXMgYSBzb2NpYWwgZXZlbnQgaW4gdGhlIHBhcmsiLCAic29jaWFsIiksCiAgICAoIlBsYXllciBicmVha3MgcmVjb3JkIGluIGxhdGVzdCBmb290YmFsbCBtYXRjaCIsICJzcG9ydHMiKSwKICAgICgiRWxlY3Rpb24gcmVzdWx0cyBjYXVzZSBwb2xpdGljYWwgc2hha2V1cCIsICJwb2xpdGljcyIpLAogICAgKCJOZXcgcG9saWNpZXMgaW1wYWN0IGdsb2JhbCBlY29ub215IiwgImVjb25vbWljcyIpLAogICAgKCJWb2x1bnRlZXJzIGhlbHAgY2xlYW4gdXAgbmVpZ2hib3Job29kIiwgInNvY2lhbCIpLApdCgojIFNlcGFyYXRlIHRleHRzIGFuZCBsYWJlbHMKdGV4dHMsIGxhYmVscyA9IHppcCgqZGF0YSkKCiMgU3BsaXQgaW50byB0cmFpbmluZyBhbmQgdGVzdCBzZXQKWF90cmFpbiwgWF90ZXN0LCB5X3RyYWluLCB5X3Rlc3QgPSB0cmFpbl90ZXN0X3NwbGl0KHRleHRzLCBsYWJlbHMsIHRlc3Rfc2l6ZT0wLjMsIHJhbmRvbV9zdGF0ZT00MikKCiMgQ3JlYXRlIGEgcGlwZWxpbmU6IFRGLUlERiB2ZWN0b3JpemVyICsgU1ZNIGNsYXNzaWZpZXIKbW9kZWwgPSBtYWtlX3BpcGVsaW5lKFRmaWRmVmVjdG9yaXplcigpLCBTVkMoa2VybmVsPSdsaW5lYXInLCBkZWNpc2lvbl9mdW5jdGlvbl9zaGFwZT0nb3ZyJykpCgojIFRyYWluIHRoZSBtb2RlbAptb2RlbC5maXQoWF90cmFpbiwgeV90cmFpbikKCiMgUHJlZGljdCBvbiB0ZXN0IHNldAp5X3ByZWQgPSBtb2RlbC5wcmVkaWN0KFhfdGVzdCkKCiMgRXZhbHVhdGlvbgpwcmludCgiQWNjdXJhY3k6IiwgYWNjdXJhY3lfc2NvcmUoeV90ZXN0LCB5X3ByZWQpKQpwcmludCgiXG5DbGFzc2lmaWNhdGlvbiBSZXBvcnQ6XG4iLCBjbGFzc2lmaWNhdGlvbl9yZXBvcnQoeV90ZXN0LCB5X3ByZWQpKQo=
stdout
('Accuracy:', 0.0)
('\nClassification Report:\n', u' precision recall f1-score support\n\n economics 0.00 0.00 0.00 0\n politics 0.00 0.00 0.00 2\n social 0.00 0.00 0.00 0\n sports 0.00 0.00 0.00 1\n\n micro avg 0.00 0.00 0.00 3\n macro avg 0.00 0.00 0.00 3\nweighted avg 0.00 0.00 0.00 3\n')
stderr
/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
'precision', 'predicted', average, warn_for)
/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py:1145: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.
'recall', 'true', average, warn_for)