2
votes

I need to input 6 attributes and classify/predict 3 attributes from that input using Java/Weka programmatically. I've figured out how to predict 1 (the last) attribute, but how can I change this to train and predict the last 3 at the same time?

The numbers in the .arff files correspond to movie objects in a database.

Here is my Java code:

import java.io.BufferedReader;
import java.io.FileReader;

import weka.classifiers.meta.FilteredClassifier;
import weka.classifiers.trees.DecisionStump;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.classifiers.trees.RandomTree;
import weka.core.Instances;
import weka.filters.unsupervised.attribute.Remove;

public class WekaTrial {

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {

        // Create training data instance
        Instances training_data = new Instances(
                new BufferedReader(
                        new FileReader(
                                "C:/Users/Me/Desktop/File_Project/src/movie_training.arff")));
        training_data.setClassIndex(training_data.numAttributes() - 1);

        // Create testing data instance
        Instances testing_data = new Instances(
                new BufferedReader(
                        new FileReader(
                                "C:/Users/Me/Desktop/FileProject/src/movie_testing.arff")));
        testing_data.setClassIndex(training_data.numAttributes() - 1);

        // Print initial data summary
        String summary = training_data.toSummaryString();
        int number_samples = training_data.numInstances();
        int number_attributes_per_sample = training_data.numAttributes();
        System.out.println("Number of attributes in model = "
                + number_attributes_per_sample);
        System.out.println("Number of samples = " + number_samples);
        System.out.println("Summary: " + summary);
        System.out.println();

        // a classifier for decision trees:
        J48 j48 = new J48();

        // filter for removing samples:
        Remove rm = new Remove();
        rm.setAttributeIndices("1"); // remove 1st attribute

        // filtered classifier
        FilteredClassifier fc = new FilteredClassifier();
        fc.setFilter(rm);
        fc.setClassifier(j48);

                    // Create counters and print values
        float correct = 0;
        float incorrect = 0;

                    // train using stock_training_data.arff:
        fc.buildClassifier(training_data);

                    // test using stock_testing_data.arff:
        for (int i = 0; i < testing_data.numInstances(); i++) {
            double pred = fc.classifyInstance(testing_data.instance(i));
            System.out.print("Expected values: "
                    + testing_data.classAttribute().value(
                            (int) testing_data.instance(i).classValue()));
            System.out.println(", Predicted values: "
                    + testing_data.classAttribute().value((int) pred));
            // Increment correct/incorrect values
            if (testing_data.classAttribute().value(
                    (int) testing_data.instance(i).classValue()) == testing_data.classAttribute().value((int) pred)) {
                        correct += 1;
                    } else {
                        incorrect += 1;
                    }
        }

                    // Print correct/incorrect
        float percent_correct = correct/(correct+incorrect)*100;
        System.out.println("Number correct: " + correct + "\nNumber incorrect: " + incorrect + "\nPercent correct: " +
                percent_correct + "%");

    }

}

This is my .arff training file (with excess rows removed):

@relation movie_data

@attribute movie1_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}

@data
18,18,18,18,18,18,18,18,18
28,18,36,18,53,10769,18,53,10769
37,37,37,28,12,14,28,12,14
27,53,27,18,10749,10769,27,53,27
12,12,12,35,10751,35,12,12,12
35,18,10749,18,18,18,35,18,10749
28,12,878,53,53,53,53,53,53
18,18,18,28,37,10769,18,18,18
18,53,18,28,12,35,18,53,18
28,80,53,80,18,10749,28,80,53
18,10749,18,18,10756,18,18,10756,18
18,10749,10769,28,12,878,18,10749,10769
18,10756,18,16,35,10751,16,35,10751
35,18,10751,35,18,10752,35,18,10751

And the .arff testing file:

@relation movie_data

@attribute movie1_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie1_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute movie2_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_one {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_two {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}
@attribute decision_three {28,12,16,35,80,105,99,18,82,2916,10751,10750,14,10753,10769,36,10595,27,10756,10402,22,9648,10754,1115,10749,878,10755,9805,10758,10757,10748,10770,53,10752,37}

@data
18,27,53,18,53,10756,18,27,53
35,18,10749,18,10769,18,18,10769,18
16,878,53,16,18,16,16,18,16
35,10749,10757,18,18,18,18,18,18
80,18,10748,18,10749,18,18,10749,18
28,18,36,35,18,10751,28,18,36
18,10749,10769,35,18,10402,35,18,10402
28,12,878,18,10749,10769,18,10749,10769
35,10749,35,14,10402,10751,14,10402,10751
2

2 Answers

0
votes

If I understood you correctly, you have a "Multi-Class" or "Multi-Target" problem. You have several simple options to solve the problem:

  1. Create a new target class which incorporates all 3 (concatenation of decision_one, decision_two and decision_three)

  2. Train each target separately.

0
votes

I think the simplest approach would be, as Bella said, to train three separate models, one for each class, possibly removing the rest of the class attribs (depending on whether or not you want the other two classes to influence your classification).