sffs - Hochschule Wismar Fachbereich Wirtschaft

* @author Jens Jahnke

* berschrift: Sequential Floating Forward Search

* Beschreibung: Fhrt eine SFFS auf den gegebenen Daten durch.

* Organisation: Fraunhofer IGD

* @version 0.9

public class sffs {

final static boolean DEBUGFLAG = false;

final static boolean VERBOSE = false;

protected Instances m_data_train;

protected Instances m_data_test;

protected int maxAttributes = 13; // maximum number of attributes that should be used protected StringBuffer m_SelectedAttributes; // contains the selected attributes protected StringBuffer m_Attributes; // contains all attributes

protected StringBuffer m_ClassAttributes; // attributes that shall not be removed protected double m_delta = 0.0025; // break indicator

protected double m_error = -1; // buffer for last calculated error protected double m_correctly = 0; // correctly classified instances (%) protected double m_incorrectly = 0; // incorrectly classified instances (%) private StringBuffer m_States;

protected Filter m_Filter = null;

protected int m_classifier = 0; // classifier

protected int m_Folds = 6; // number of folds for cross validation public sffs(String attrs, String classattrs, int maxAttrs) {

m_classifier = 0; // set to default (Naive Bayes) m_data_train = null;

m_data_test = null;

m_SelectedAttributes = new StringBuffer();

m_Attributes = new StringBuffer(attrs);

m_ClassAttributes = new StringBuffer(classattrs);

m_States = new StringBuffer();

maxAttributes = maxAttrs;

m_delta = 0.0025;

m_error = -1;

} // constructor

/**

* Returns the value of m_delta, that influences the sffs algorithm.

* @return (double)

public double getDelta() { return (m_delta);

} /**

* Sets the value of m_delta, that influences the sffs algorithm.

* @param delta (double value (default: 0.0025))

public void setDelta(double delta) { m_delta = delta;

} /**

* Returns the percentage of correctly classified instances.

* @return (double)

public double getCorrectly() {

double percentage = m_correctly * 100 / (m_correctly + m_incorrectly);

percentage = Math.rint(percentage * 100.0) / 100.0; // rounding to 2 digits return (percentage);

} /**

* Returns the percentage of incorrectly classified instances.

* @return (double)

public double getIncorrectly() {

double percentage = m_incorrectly * 100 / (m_correctly + m_incorrectly);

percentage = Math.rint(percentage * 100.0) / 100.0; // rounding to 2 digits return (percentage);

} /**

* set classifier to use to "i"

* @param i

public void setClassifier(int i) { if ((i > -1) && (i < 4)) {

public int getClassifier() { return (m_classifier);

} /**

* Do some init stuff before the run.

public void initRun() {

m_SelectedAttributes = new StringBuffer();

m_States = new StringBuffer();

m_error = -1;

m_correctly = 0;

m_incorrectly = 0;

} /**

* Sets the class attribute.

* @param classattrs

public void setClassAttribute(int classattrs) {

if (classattrs > 0 && classattrs < m_data_train.numAttributes()) m_ClassAttributes = new StringBuffer(String.valueOf(classattrs));

} /**

* Sets the maximum number of desired attributes.

* @param num_attrs

public void setNumberOfAttributes(int num_attrs) {

if (num_attrs > 0 && num_attrs < m_data_train.numAttributes()) maxAttributes = num_attrs;

} /**

* sets the filter to use

* @param name the classname of the filter

* @param options the options for the filter

private void setFilter(String name, String[] options) throws Exception { m_Filter = (Filter) Class.forName(name).newInstance();

if (m_Filter instanceof OptionHandler)

((OptionHandler) m_Filter).setOptions(options);

} /**

* Executes the filter "filtername" with the given parameters.

* @param filtername contains the "exact" name of the weka filter to use.

* @param filteropts a list of filter options.

* @param source weka instance source

private Instances prepare_data(String filtername, String filteropts[], Instances source) {

try {

// set filtername

this.setFilter(filtername, filteropts);

if (DEBUGFLAG && VERBOSE) {

System.err.println("Applying filter.");

}

m_Filter.setInputFormat(source);

Instances filtered = Filter.useFilter(source, m_Filter);

return (filtered);

} catch (Exception e) {

System.err.println("An error occured during the execution of the following filter: \n"

+ "\tName: " + filtername);

System.err.println(e.getLocalizedMessage());

* Sets the given weka instance as data instance.

* @param data (Weka-Instance)

* @param folds (numer of desired folds for cross validation)

public void setInstance(Instances data, int folds) { try {

m_Folds = folds;

m_data_train = data;

m_data_test = data;

} catch (Exception e) {

System.err.println(e.getLocalizedMessage());

// set the max number of attributes

public int setMaxAttributes(int attributes) { if (attributes > 0) {

maxAttributes = attributes;

* Returns the number of selected attributes.

* @return

public int getNumberOfSelectedAttributes() { int nr;

if (m_SelectedAttributes.length() < 1) nr = 0;

else {

StringTokenizer strtok = new StringTokenizer(m_SelectedAttributes.toString(), ",");

nr = strtok.countTokens();

public String getSelectedAttributes() { try {

return (m_SelectedAttributes.toString());

} catch (Exception e) {

System.err.println(e.getStackTrace());

return ("");

} } /**

* Checks if the given state is present in the states buffer.

* @param State

* @return true or false

private boolean isInStates(String State) { String tmp = m_States.toString();

if (tmp.contains(State)) { return(true);

} else { return(false);

}

} /**

* Adds the given state to the states buffer.

* @param State

private void addState(String State) { m_States.append(State);

m_States.append(" "); // divide the states by adding a blank }

/**

* Checks if findme is contained within searchme. Returns true or false.

* @param findme

* @param searchme

* @return

private boolean isInString(String findme, String searchme) { boolean isIt = false;

String tmp = "";

StringTokenizer st = new StringTokenizer(searchme, ",");

try {

} catch (Exception e) {

System.err.println(e.getLocalizedMessage());

private String m(String m1, String m2) { StringBuffer match = new StringBuffer();

String tmp = "";

StringTokenizer genAttributes = new StringTokenizer(m1, ",");

if (m2.length() == 0) { match.append(m1);

} else { try {

while (genAttributes.hasMoreTokens()) { tmp = genAttributes.nextToken(); // get token if (!isInString(tmp, m2)) {

match.append(tmp);

match.append(",");

}

} // while (genAttributes.hasMoreTokens()) if (match.length() > 0) {

match.deleteCharAt(match.length() - 1); // delete last ","

}

} catch (Exception e) {

System.err.println(e.getLocalizedMessage());

* Creates the union of m1 and m2.

* @param m1

* @param m2

* @return m1 (union) m2

private String union(String m1, String m2) { String tmp = "";

StringBuffer m = new StringBuffer();

StringTokenizer st = new StringTokenizer(m2, ",");

if (m1.length() == 0) {

if (!isInString(tmp, m1)) { m.append(tmp);

m.append(",");

} }

m.deleteCharAt(m.length() - 1);

} catch (Exception e) {

System.err.println(e.getLocalizedMessage());

* Adds the attribute attr to the list of selected attributes.

* @param attr

private void addAttribute(String attr) { if (m_SelectedAttributes.length() > 0) { m_SelectedAttributes.append(",");

}

m_SelectedAttributes.append(attr);

} /**

* Randomly removes an entry from the list of selected attributes.

private String remAttribute(String Attributes, boolean randomize, boolean last) { int i,j,k;

Random zufall = new Random();

StringBuffer tmpBuf = new StringBuffer();

String stmp = "";

// save attribs to tokenizer

StringTokenizer tmp = new StringTokenizer(Attributes, ",");

k = tmp.countTokens();

j = 0;

if (randomize && k > 1) { // use random remove function // avoid the last entry i = zufall.nextInt(k - 1);

} else {

// remove other entry

i = k - 2; // second last one //i = 0; // first one

}

// remove last entry if (last) i = k - 1;

// remove entry if (k > 1) { if (DEBUGFLAG) {

System.err.println("Removing entry " + (i+1) + " from " + k);

System.err.println(Attributes);

* Tries to find the best attribute.

* @param attrs

* @return

private String argmin(String attrs) { String selAttr = "";

String tmpAttr = "";

double tmpTest, tmpTest2 = 0;

StringTokenizer st = new StringTokenizer(attrs, ",");

try {

if (tmpTest2 < tmpTest) {

if (DEBUGFLAG) { System.out.println(tmpTest2 + " < " + tmpTest); }

selAttr = tmpAttr;

tmpTest = tmpTest2;

} }

} catch (Exception e) {

System.err.println("Error in argmin function: ");

System.err.println(e.getLocalizedMessage());

* The main SFFS algorithm.

public void runSFFS() { double f = 0;

double g = 0;

double buf = 100;

double delta = 0;

int n = 0;

StringBuffer tmpAttrib = new StringBuffer();

String tmp = "";

String fm = "";

while (n < maxAttributes) {

System.out.print("."); // give some output if (m_SelectedAttributes.length() > 1) buf = Test(m_SelectedAttributes.toString());

// create m = F \ Fs

tmpAttrib = new StringBuffer();

tmp = "";

tmp = m(m_Attributes.toString(), m_SelectedAttributes.toString());

if (DEBUGFLAG) { System.err.println("Generated {m} : { " + tmp + " }"); } fm = argmin(tmp);

tmpAttrib.append(fm);

if (DEBUGFLAG) { System.err.println("Generated argmin({m}) : { " + tmpAttrib.toString() + " }"); }

// create Fs (union) {m}

tmp = union(m_SelectedAttributes.toString(), tmpAttrib.toString());

if (DEBUGFLAG) { System.err.println("Generated Fs U {m} : { " + tmp + " }"); } addAttribute(tmpAttrib.toString());

// save state

addState(m_SelectedAttributes.toString());

n++;

if (DEBUGFLAG) { System.err.println(m_SelectedAttributes.toString()); } // backward steps

while (true) {

System.out.print("."); // give some output

tmp = m(m_Attributes.toString(), m_SelectedAttributes.toString());

// f = Test(Fs \ {m})

fm = m(m_SelectedAttributes.toString(), tmp);

f = Test(fm);

// remove an attribute

tmpAttrib = new StringBuffer();

tmpAttrib.append(remAttribute(m_SelectedAttributes.toString(), true, false));

// if (Test(Fs \ {f}) < Test(Fs) AND ((Fs \ {f}) is not element of States)) g = Test(tmpAttrib.toString());

if ((g < f) && (!(isInStates(tmpAttrib.toString())))) { f = g;

// Fs = Fs \ {f}

m_SelectedAttributes = tmpAttrib;

// save state

addState(m_SelectedAttributes.toString());

if (DEBUGFLAG) { System.err.println(m_SelectedAttributes.toString()); } n--;

} else { break;

}

} // while (true)

delta = Math.abs(buf - f);

if (delta <= m_delta && buf < 100) { tmpAttrib = new StringBuffer();

tmpAttrib.append(remAttribute(m_SelectedAttributes.toString(), false, true));

m_SelectedAttributes = tmpAttrib;

if (DEBUGFLAG)

System.err.println("Last modification did not improve result (d = " + delta + ").");

break;

} //n++;

if (DEBUGFLAG && VERBOSE) { System.err.println("n = " + n);

System.err.println("Delta = " + delta);

}

} // while (n < maxAttributes)

m_error = buf; // buffer last calculated error if (DEBUGFLAG) {

System.out.println("Error : " + String.valueOf(buf));

}

System.out.print("+"); // give some output }

/**

* This function is the essential part. It pipes the given dataset into the

* Naive Bayes classification net and returns the relative absolute error.

* @param attributes (a string that contains a comma seperated list of attributes.)

* @return A double value that represents the error rate of the naive bayes net.

private double Test(String attributes) { double result = 0;

// filteroptions for the remove filter:

String[] filter_pars = new String[3];

filter_pars[0] = "-V"; // invert remove selection filter_pars[1] = "-R";

filter_pars[2] = m_ClassAttributes.toString() + "," + attributes; // columns to keep // remove all except the attributes specified in "String attributes"

Instances training_data = prepare_data("weka.filters.unsupervised.attribute.Remove", filter_pars, m_data_train);

training_data.setClassIndex(0);

try {

Random random = new Random();

Evaluation myTest = null;

switch (m_classifier) { case 0:

// use naive bayes

//Classifier myClassifier = (Classifier)new NaiveBayes();

NaiveBayes myClassifier = new NaiveBayes();

// train the classifier

myClassifier.buildClassifier(training_data);

// test the classifier

myTest = new Evaluation(training_data);

//myTest.evaluateModel(myClassifier, training_data);

myTest.crossValidateModel(myClassifier, training_data, m_Folds, random);

// get error

result = myTest.meanAbsoluteError();

result = Math.rint(result * 10000.0) / 10000.0; // rounding to 4 digits // get number of correctly and incorrectly classified instances

m_correctly = myTest.correct();

m_incorrectly = myTest.incorrect();

break;

case 1:

// use bayes net

BayesNet myBayesNet = new BayesNet();

// train the classifier

myBayesNet.buildClassifier(training_data);

// test the classifier

myTest = new Evaluation(training_data);

//myTest.evaluateModel(myBayesNet, training_data);

myTest.crossValidateModel(myBayesNet, training_data, m_Folds, random);

// get error

result = myTest.meanAbsoluteError();

result = Math.rint(result * 10000.0) / 10000.0; // rounding to 4 digits // get number of correctly and incorrectly classified instances

m_correctly = myTest.correct();

// train the classifier

myJ48.buildClassifier(training_data);

// test the classifier

myTest = new Evaluation(training_data);

//myTest.evaluateModel(myJ48, training_data);

myTest.crossValidateModel(myJ48, training_data, m_Folds, random);

// get error

result = myTest.meanAbsoluteError();

result = Math.rint(result * 10000.0) / 10000.0; // rounding to 4 digits // get number of correctly and incorrectly classified instances

m_correctly = myTest.correct();

m_incorrectly = myTest.incorrect();

break;

default:

System.err.println("Error in Test function: No valid classifier set!");

System.exit(1);

}

} catch (Exception e) {

System.err.println("Error in Test function:");

System.err.println(e.getCause());

return(result);

} /**

* returns buffered error

* @return

public double getError() { return(m_error);

} /**

* @param args

public static void main(String[] args) { long startms = 0;

long startRun = 0;

long stopms = 0;

long zeit = 0;

String filename = "";

int classifier = 0;

int maxattribs = 0;

int classes = 0;

String classattributes = "";

String fieldattributes = "";

if (args.length < 5) {

System.err.println("Please specify filename (arff format),

classification method, max number of attributes, fieldattributes and class attributes!");

System.err.println("<filename> <0|1|2> <no. of attributes> <fa1,fa2,fa3,...>

<ca1,ca2,ca3,...>");

System.err.println("Classifiers:");

System.err.println("\t0 : Naive Bayes\n\t1 : BayesNet\n\t 2 : J48\n\t3 : Fisherprojection");

System.exit(1);

sffs m_SFFS = new sffs(fieldattributes,classattributes, maxattribs);

m_SFFS.setClassifier(classifier);

System.out.print("Using Classifier: ");

classifier = m_SFFS.getClassifier();

switch (classifier) { case 0:

System.out.println("0 - Naive Bayes");

break;

case 1:

System.out.println("1 - Bayes Net");

break;

case 2:

System.out.println("2 - J48");

break;

default:

System.err.println("No valid classifier set!");

System.exit(1);

}

System.out.println("Allowed maximum number of attributes: " + maxattribs);

System.out.println("Class attributes: " + classattributes);

try {

Instances data = new Instances(

new BufferedReader(

new FileReader(filename)));

//Instances filtered_data = null;

// setting class attribute

data.setClassIndex(data.numAttributes() - 1);

// set instance and split it into 6 folds

m_SFFS.setInstance(data, 6); // TODO: create command line argument for that } catch (IOException ioe) {

System.err.println("An IO error occured!");

ioe.printStackTrace();

System.exit(1);

}

double[] fehlerwerte = new double[10];

double[] correct = new double[10];

double[] incorrect = new double[10];

String[] attribute = new String[10];

String[] emotionen = new String[10];

emotionen[0] = "valence";

StringTokenizer st = new StringTokenizer(classattributes, ",");

String tmpAttribute = "";

classes = st.countTokens();

int i = 0;

while (st.hasMoreTokens()) { tmpAttribute = st.nextToken();

startRun = System.currentTimeMillis();

m_SFFS.setDelta(0.0015); // TODO: Test only!

m_SFFS.setClassAttribute(Integer.parseInt(tmpAttribute));

zeit = stopms - startRun;

zeit = zeit / 1000;

System.out.print(" (time : " + zeit + " s) ");

}

stopms = System.currentTimeMillis();

zeit = stopms - startms;

zeit = zeit / 1000;

System.out.println("");

System.out.println("SFFS completed on file " + filename + " in " + zeit + " seconds with delta value " + m_SFFS.getDelta() + ".");

for (i=0; i<classes; i++) {

System.out.println(emotionen[i] + ": error rate = " + fehlerwerte[i]

+ " (" + correct[i] + "% vs. " + incorrect[i] + "%) -> attributes {"

+ attribute[i] + "}");

}

* @author Jens Jahnke

* berschrift: Werkzeugkiste

* Beschreibung: Validiert eine gegebene Relation auf den gegebenen Testmengen.

* Organisation: Fraunhofer IGD

* @version 0.1

public class werkzeugkiste {

protected String m_Trainingsdaten = "";

protected String[] m_Testdaten = null;

protected int m_NumberOfTestfiles = 0;

protected String m_Removerelation = "";

protected double m_Correctly = 0;

protected double m_Incorrectly = 0;

protected double m_Error = 0;

protected double[] m_Errors = null;

protected double[] m_Prozente = null;

protected Instances m_data_train;

protected Instances m_data_test;

protected Filter m_Filter = null;

protected int m_classifier = 0; // classifier /**

* Konstruktor

* @param remove (String) relation for the weka remove filter

* @param training (String) name of the arff file used for training

* @param test (StringBuffer) comma separated list of arff files for testing

public werkzeugkiste(String remove, String training, StringBuffer test) { m_Correctly = 0;

m_Incorrectly = 0;

m_classifier = 0;

m_Removerelation = remove;

m_Trainingsdaten = training;

StringTokenizer st = new StringTokenizer(test.toString(), ",");

Im Dokument Hochschule Wismar Fachbereich Wirtschaft (Seite 79-91)