fork download
  1. from sklearn.feature_extraction.text import TfidfVectorizer
  2. from sklearn.svm import SVC
  3. from sklearn.pipeline import make_pipeline
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.metrics import classification_report, accuracy_score
  6.  
  7. # Sample dataset (replace with your real data)
  8. data = [
  9. ("The team won the championship after a thrilling game", "sports"),
  10. ("Government passes new economic reforms", "politics"),
  11. ("Stock markets rally as inflation fears ease", "economics"),
  12. ("Community organizes a social event in the park", "social"),
  13. ("Player breaks record in latest football match", "sports"),
  14. ("Election results cause political shakeup", "politics"),
  15. ("New policies impact global economy", "economics"),
  16. ("Volunteers help clean up neighborhood", "social"),
  17. ]
  18.  
  19. # Separate texts and labels
  20. texts, labels = zip(*data)
  21.  
  22. # Split into training and test set
  23. X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.3, random_state=42)
  24.  
  25. # Create a pipeline: TF-IDF vectorizer + SVM classifier
  26. model = make_pipeline(TfidfVectorizer(), SVC(kernel='linear', decision_function_shape='ovr'))
  27.  
  28. # Train the model
  29. model.fit(X_train, y_train)
  30.  
  31. # Predict on test set
  32. y_pred = model.predict(X_test)
  33.  
  34. # Evaluation
  35. print("Accuracy:", accuracy_score(y_test, y_pred))
  36. print("\nClassification Report:\n", classification_report(y_test, y_pred))
  37.  
Success #stdin #stdout #stderr 0.43s 65368KB
stdin
Standard input is empty
stdout
('Accuracy:', 0.0)
('\nClassification Report:\n', u'              precision    recall  f1-score   support\n\n   economics       0.00      0.00      0.00         0\n    politics       0.00      0.00      0.00         2\n      social       0.00      0.00      0.00         0\n      sports       0.00      0.00      0.00         1\n\n   micro avg       0.00      0.00      0.00         3\n   macro avg       0.00      0.00      0.00         3\nweighted avg       0.00      0.00      0.00         3\n')
stderr
/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/usr/local/lib/python2.7/dist-packages/sklearn/metrics/classification.py:1145: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)