Die Ergebnisse der Tests auf allen 55 Datens¨atzen, bei denen jeweils ein Datensatz zum Trainieren und die anderen 54 zum Testen benutzt wurden. Als Klassifizie-rungsalgorithmus kam Naive Bayes zum Einsatz.
Trainingsmenge Durchschnittliche Fehlerrate ( ¯ϕ)
I II III IV V VI VII VIII IX X
Fortsetzung n¨achste Seite . . .
I II III IV V VI VII VIII IX X 48 0,45 0,58 0,48 0,51 0,48 0,15 0,51 0,81 0,46 0,63
49 0,45 0,58 0,46 0,51 0,39 0,15 0,9 0,81 0,46 0,61
50 0,45 0,65 0,77 0,56 0,4 0,15 0,44 0,74 0,46 0,75
51 0,45 0,64 0,76 0,52 0,39 0,15 0,86 0,8 0,46 0,77
52 0,45 0,64 0,77 0,51 0,39 0,15 0,88 0,79 0,46 0,61 53 0,45 0,58 0,76 0,84 0,85 0,15 0,41 0,85 0,78 0,61
54 0,45 0,63 0,46 0,51 0,39 0,15 0,41 0,59 0,46 0,6
55 0,45 0,58 0,84 0,51 0,39 0,15 0,41 0,58 0,8 0,65
∅ 0,65 0,65 0,64 0,67 0,59 0,29 0,62 0,73 0,62 0,71 Tabelle A.15: Die durchschnittlichen Fehlerraten beim Testen der
Personen- und Tagesunabh¨angigkeit auf allen Datens¨atzen
Anhang B
Quelltexte - Packet Datenvorverarbeitung
B.1 dvv einfach
package Datenvorverarbeitung;
/*
* @author Jens Jahnke
* <p>berschrift: Datenvorverarbeitung</p>
* <p>Beschreibung: Parst eine Konfigurationsdatei im XML-Format und generiert eine ARFF-Datei fr Weka.</p>
* <p>Copyright: Copyright (c) 2005/06</p>
* <p>Organisation: Fraunhofer IGD </p>
* @version 0.8
*/
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
//import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
//import org.w3c.dom.DOMException;
import java.io.*;
import java.util.StringTokenizer;
// Weka
import weka.core.Instances;
import weka.core.OptionHandler;
//import weka.core.converters.ArffSaver;
import weka.core.converters.CSVLoader;
//import weka.core.Utils;
import weka.filters.Filter;
public class dvv_einfach { // DEBUGFLAG :-)
final static boolean DEBUGFLAG = false;
// Be verbose...
final static boolean VERBOSE = true;
// drop lines?
final static boolean DROPLINES = true;
// Global value so it can be ref’d by the tree-adapter static Document document;
// name of the configuration file
public String configfile = "Demokoffer-Dvv.xml";
// name of the data file public String datafile = "";
// compress the tree?
boolean compress = true;
// error flag
static Boolean errorFlag = false;
// attribute name
protected String Attributename = "";
// instance
protected Instances m_Training = null;
// filter
protected static Filter m_Filter = null;
// DOM Element
protected Element domElement = null;
// counter
protected static int attrCount = 0;
protected static int elemCount = 0;
/**
* Konstruktor
* @param config (Name of the config file as String)
* @param data (Name of the data file as String)
*/
public dvv_einfach(String config, String data) { configfile = config;
datafile = data;
} // constructor
public void setConfigfile(String config) { configfile = config;
}
public void setDatafile(String data) { datafile = data;
} /**
* sets the filter to use
* @param name the classname of the filter
* @param options the options for the filter
*/
public void setFilter(String name, String[] options) throws Exception { m_Filter = (Filter) Class.forName(name).newInstance();
if (m_Filter instanceof OptionHandler)
((OptionHandler) m_Filter).setOptions(options);
} /**
* Uses the csvloader class to generate the arff header.
* @param infile name of the input file
*/
private String genArffHeader(String infile) { // load CSV
CSVLoader loader = new CSVLoader();
try {
loader.setSource(new File(infile));
Instances data = loader.getStructure();
return (data.toString());
} catch (IOException ioe) { ioe.printStackTrace();
errorFlag = true;
return (null);
} }
/**
* Checks if the arff file is valid.
* @param name of the arff file
* @return true or false
*/
public boolean arff_is_valid(String name) { String[] tmp = new String[1];
errorFlag = false;
try {
Instances data = new Instances(
new BufferedReader(
new FileReader(name)));
data.setClassIndex(data.numAttributes() - 1);
try {
//final dvv_einfach configFilter = new dvv_einfach();
this.setFilter("weka.filters.AllFilter", tmp);
m_Filter.setInputFormat(data);
Instances filtered = Filter.useFilter(data, m_Filter);
if (DEBUGFLAG) {
System.err.println("relationName: " + filtered.relationName());
}
} catch (Exception e) { e.printStackTrace();
errorFlag = true;
}
} catch (IOException ioe) { ioe.printStackTrace();
public boolean parseConfig() { DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
// use validating parser factory.setValidating(true);
// check validity of namespaces factory.setNamespaceAware(true);
errorFlag = false;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setErrorHandler(
new org.xml.sax.ErrorHandler() {
// fatal errors (an exception is guaranteed) public void fatalError(SAXParseException exception) throws SAXException {
System.out.println("** Error"
+ ", line " + exception.getLineNumber() + ", uri " + exception.getSystemId());
throw exception;
}
// treat validation errors as fatal public void error(SAXParseException e) throws SAXParseException
{
System.out.println("** Error"
+ ", line " + e.getLineNumber() + ", uri " + e.getSystemId());
throw e;
}
// dump warnings too
public void warning(SAXParseException err) throws SAXParseException
{
System.out.println("** Warning"
+ ", line " + err.getLineNumber() + ", uri " + err.getSystemId());
System.out.println(" " + err.getMessage());
} } );
document = builder.parse( new File(configfile) );
} catch (SAXException sxe) {
// Error generated during parsing) Exception x = sxe;
if (sxe.getException() != null) x = sxe.getException();
x.printStackTrace();
errorFlag = true;
} catch (ParserConfigurationException pce) { // Parser with specified options can’t be built pce.printStackTrace();
errorFlag = true;
} catch (IOException ioe) { // I/O error
public static void main(String argv[]) {
dvv_einfach dvv = new dvv_einfach("","");
if (argv.length < 1) { dvv.print_usage();
System.exit(1);
}
if ((argv.length > 3) && argv[0].contentEquals("combine")) { StringBuffer liste = new StringBuffer();
for (int z=2; z<argv.length; z++) { liste.append(argv[z]);
liste.append(",");
}
liste.deleteCharAt(liste.length()-1);
System.out.println("Combining files: " + liste.toString());
dvv.combineARFFs(liste, argv[1]);
System.out.println("Converting file " + argv[1]);
dvv.convertDiscreteValues(argv[1]);
System.out.println("done.");
System.exit(0);
} else {
dvv.setConfigfile(argv[0]);
dvv.setDatafile(argv[1]);
System.out.println("dvv: Beginning attribute generation.\n" +
"configfile : " + dvv.configfile + "\n" +
"datafile : " + dvv.datafile);
}
if (DEBUGFLAG) {
System.err.println("Parsing configfile: " + dvv.configfile + " .");
}
System.err.println("There were erros reading the configuration!
Bailing out.");
System.exit(1);
}
if (!dvv.datafile.contains(".arff")) {
// Prepare data file by replacing special characters.
if (VERBOSE) {
System.out.println("Preparing Datafile: " + dvv.datafile);
}
dvv.prepareDatafile(0, dvv.datafile + ".arff");
// from now on we add the .arff extension statically dvv.setDatafile(dvv.datafile + ".arff");
} else { if (VERBOSE) {
System.out.println("The given data file contains the extension ’.arff’!");
System.out.println("Therefore the conversion is skipped.");
} }
// validate arff file
System.out.println("Validating arff file, this will take a moment or two.");
if (!dvv.arff_is_valid(dvv.datafile)) {
System.err.println("Error found in arff file. Please check the file.");
System.exit(1);
private void print_usage() { System.err.println("Usage:\n" +
"\t java dvv datafile\n" +
"\t OR\n" +
"\t java dvv config.xml datafile" +
"\t OR\n" +
"\t java dvv cv arff file" +
"\t OR\n" +
"\t java dvv combine <targetfile> <list of arff files>");
}
/**
* Combines the given comma separated list of arff files into the given
* target file.
* ATTENTION: No validity check is done. So the user has to keep sure that
* the files are compatible!
* @param filenames (StringBuffer : comma separated list of files)
* @param target (String : name of the target file)
*/
public boolean combineARFFs(StringBuffer filenames, String target) { FileReader fr = null;
FileWriter fw = null;
BufferedReader in = null;
BufferedWriter out = null;
String filename = "";
String line = "";
StringTokenizer st = null;
try {
// get list of filenames into tokens
st = new StringTokenizer(filenames.toString(), ",");
filename = st.nextToken();
// copy first file
in = new BufferedReader(fr = new FileReader(filename));
out = new BufferedWriter(fw = new FileWriter(target));
while ((line = in.readLine()) != null) { out.write(line);
// cycle through files and append them to the target.
while (st.hasMoreTokens()) { filename = st.nextToken();
in = new BufferedReader(fr = new FileReader(filename));
while ((line = in.readLine()) != null) {
if (!(line.startsWith("@") || (line.length() < 2))) { out.write(line);
} catch (IOException e) { System.err.println(e);
return (false);
} } /**
* Parses the given arff file for the three discretized values and converts their
* classes to standardized values.
* (i.e. (65.34-78.43] -> medium)
*
*/
public boolean convertDiscreteValues(String filename) { FileReader fr = null;
FileWriter fw = null;
String line = "";
String tmp = "";
StringBuffer replaceMe = new StringBuffer();
StringTokenizer st = null;
String[] ReplaceValue = new String[6];
String outfile = filename+".working-copy.tmp";
int i = 0;
ReplaceValue[1] = "\’\\\\\’very low\\\\\’\’";
ReplaceValue[2] = "\’\\\\\’low\\\\\’\’";
ReplaceValue[3] = "\’\\\\\’medium\\\\\’\’";
ReplaceValue[4] = "\’\\\\\’high\\\\\’\’";
ReplaceValue[5] = "\’\\\\\’very high\\\\\’\’";
try {
BufferedReader in = new BufferedReader(fr = new FileReader(filename));
BufferedWriter out = new BufferedWriter(fw = new FileWriter(outfile));
while ((line = in.readLine()) != null) { if (line.startsWith("@attribute discrete")) { st = new StringTokenizer(line,",");
tmp = st.nextToken();
tmp = tmp.substring(tmp.indexOf("{"));
tmp = tmp.replace(’{’,’ ’);
tmp = tmp.trim();
tmp = tmp.replace(’}’,’ ’);
tmp = tmp.trim();
if (replaceMe.length() > 1) {
st = new StringTokenizer(replaceMe.toString(), ",");
i = 0;
if (st.countTokens() > 1) { while (st.hasMoreTokens()) {
} catch (IOException e) { System.err.println(e);
return (false);
} }
/**
* Prepares the datafile by replacing several special characters and writing the
* result into the specified "outfile".
* @param linesToIgnore, outfile
*/
public boolean prepareDatafile(int linesToIgnore, String outfile) { FileReader fr = null;
FileWriter fw = null;
String line = "";
String header = "";
int LineCount = 0;
int LineLength = 0;
StringTokenizer st = null;
if (DEBUGFLAG) {
System.err.println("Parsing " + datafile + " into " + outfile);
} try {
BufferedReader in = new BufferedReader(fr = new FileReader(datafile));
BufferedWriter out = new BufferedWriter(fw = new FileWriter(outfile));
if (DEBUGFLAG) {
System.err.println("Writing arff header.");
}
// Write ARFF header into target file.
header = genArffHeader(datafile);
System.err.println("Error generating arff header.");
}
line = "";
if (DEBUGFLAG) {
System.err.println("Converting data file.");
}
// Copy data file and replace special characters.
while ((line = in.readLine()) != null) { if (linesToIgnore-- < 0) {
line = line.replaceAll("\t",",");
line = line.replaceAll("-1","?");
// calculate number of entries st = new StringTokenizer(line, ",");
if (DEBUGFLAG) {
System.err.println("Line: " + LineCount + " : " + st.countTokens() +
" (" + LineLength + ")");
}
// compare line length to the line before
if ((st.countTokens() != LineLength) && (LineLength > 0)) { // lines do not match
System.err.println("Line " + LineCount +
" does not match the line before!");
System.err.println(LineLength + " entries versus " + st.countTokens() + " entries at current line.");
if (DROPLINES) { // do not write the line
System.err.println("Dropping line " + LineCount + "!");
} else {
// write the line out.write(line);
out.newLine();
} } else {
// line should be okay
out.write(line);
out.newLine();
}
LineCount++; // line counter // save length of current line LineLength = st.countTokens();
}
} // while
st = null; // free memory
// close the buffered reader/writer in.close();
out.close();
} catch (IOException e) { System.err.println(e);
return (false);
} finally {
try {
if (fr != null) fr.close();
if (fw != null) fw.close();
} catch ( IOException e ) {
System.err.println("Converted " + LineCount + " Lines.");
}
return (true);
}
// Attribute types
static final String[] attrTypes = {
"break",
"dummy",
"numeric",
"nominal", };
static int getAttrType(String name) { int i = 0;
* This function parses through the configuration tree and finds the filters with
* their parameters.
*
*/
public boolean apply_filters() {
//final dvv_einfach configParser = new dvv_einfach();
// We need at least one adapter node and 4 levels of "dummy nodes".
AdapterNode configTree = new AdapterNode(document);
AdapterNode listNode = null;
AdapterNode attrNode = null;
AdapterNode entryNode = null;
AdapterNode filterNode = null;
// some strings for data storage String tmp = "";
String filtername = "";
// TODO: Statical res allocation is neither elegant nor secure coding.
String filteropts[] = new String[1024];
StringTokenizer tmpTok;
int parCount = 0;
if (DEBUGFLAG) {
System.err.println("Document root has got " + configTree.childCount() + " children.");
} try {
if (DEBUGFLAG) {
System.err.println("Opening new Instance from arff file: " + datafile);
}
Instances data = new Instances(
new BufferedReader(
new FileReader(datafile)));
Instances filtered_data = null;
if (DEBUGFLAG) {
System.err.println("Setting class attributes.");
}
// setting class attribute
data.setClassIndex(data.numAttributes() - 1);
Attributename = "";
attrCount = configTree.childCount();
while (attrCount > 0) {
listNode = configTree.child(attrCount);
elemCount = 0;
while (elemCount <= listNode.childCount()) { attrNode = listNode.child(elemCount);
if (attrNode.childCount() > 0) { int i = 0;
while (i < attrNode.childCount()) { entryNode = attrNode.child(i);
if (entryNode.childCount() == 0) {
// Right now we use this only for checking for a break command // within the config file.
tmp = "";
tmp = entryNode.toString();
tmpTok = new StringTokenizer(tmp, " ");
if (tmpTok.hasMoreTokens()) {
if (getAttrType(tmp) == 0) {
System.out.println("Found break in config file, exiting now.");
// save the data
Attributename = tmp; // set attribute name if (VERBOSE) {
System.out.println("Generating attribute ’" + Attributename + "’");
while (j < entryNode.childCount()) { filterNode = entryNode.child(j);
// If the entry has no childs we’re going for the filtername
// and the parameters.
if (filterNode.childCount() == 0) { tmp = filterNode.toString();
tmpTok = new StringTokenizer(tmp, " ");
if (tmpTok.hasMoreTokens()) { tmpTok.nextToken();
tmp = tmpTok.nextToken();
}
if (tmp.equalsIgnoreCase("NAME")) { // set filter name
filtername = filterNode.content();
// check for break condition
if (filtername.equalsIgnoreCase("break")) { System.out.println("Found break in config file, exiting now.");
// save the data save_data(data);
System.exit(0);
}
} else if (tmp.equalsIgnoreCase("PARS")) { // set filter options
tmp = filterNode.content();
tmpTok = new StringTokenizer(tmp, " ");
parCount = 0;
boolean specialParameters = false;
StringBuffer sb = new StringBuffer();
StringBuffer sbtmp = new StringBuffer();
while (tmpTok.hasMoreTokens()) {
// do we have special parameters ("...")?
// TODO: Clean up here! This code is a mess!
if (specialParameters) { sb.append(" ");
sb.append(tmpTok.nextToken());
// check for closing "
sbtmp.append(sb.charAt(sb.length() - 1));
tmp = sbtmp.toString();
if (tmp.contains("\"")) { specialParameters = false;
// delete last "
sb.deleteCharAt((sb.length() - 1)); // found opening "
specialParameters = true;
sb.append(filteropts[parCount]);
// delete first "
sb.deleteCharAt(0);
System.err.print("\t" + filtername + "( ");
int k = 0;
while (k < parCount) {
System.err.print(filteropts[k] + " ");
k++;
}
System.err.println(")");
}
// execute filter
filtered_data = exec_filters(filtername, filteropts, data);
if (filtered_data != null) { data = filtered_data;
} else {
System.err.println("An error occured while executing the last filter!");
}
if (VERBOSE) { System.out.print("."); } // clean up
if (VERBOSE) { System.out.println("\n"); } }
} catch (IOException ioe) {
System.err.println("An IO error occured!");
ioe.printStackTrace();
errorFlag = true;
if (DEBUGFLAG) { System.exit(1); } return (false);
}
return (true);
} // apply_filters /**
* This method saves the given instance to "datafile.arff".
* @param result_data the instance to save
*/
private boolean save_data(Instances result_data) { try {
if (VERBOSE) {
System.out.println("Saving results to " + datafile + ".");
}
BufferedWriter results = new BufferedWriter(new FileWriter(datafile));
results.write(result_data.toString());
results.newLine();
results.flush();
results.close();
return (true);
} catch (IOException ioe) {
System.err.println("An error occured while saving the results!");
ioe.printStackTrace();
errorFlag = true;
if (DEBUGFLAG) { System.exit(1); } return (false);
} } /**
* Executes the filter "filtername" with the given parameters.
*
* @param filtername contains the "exact" name of the weka filter to use.
* @param filteropts a list of filter options.
* @param source weka instance source
* @param target weka instance target
*/
private Instances exec_filters(String filtername, String filteropts[], Instances source) {
try {
//final dvv_einfach configFilter = new dvv_einfach();
setFilter(filtername, filteropts);
if (DEBUGFLAG) {
System.err.println("Applying filter.");
}
m_Filter.setInputFormat(source);
Instances filtered = Filter.useFilter(source, m_Filter);
return (filtered);
} catch (Exception e) {
System.err.println("An error occured during the execution of the following filter: \n"
+ "\tName: " + filtername);
e.printStackTrace();
errorFlag = true;
if (DEBUGFLAG) { System.exit(1); } }
return null;
} // exec_filters
// An array of names for DOM node-types // (Array indexes = nodeType() values.) static final String[] typeName = {
"none",
final int ELEMENT_TYPE = 1;
static final int ATTR_TYPE = 2;
static final int TEXT_TYPE = 3;
static final int CDATA_TYPE = 4;
static final int ENTITYREF_TYPE = 5;
static final int ENTITY_TYPE = 6;
static final int PROCINSTR_TYPE = 7;
static final int COMMENT_TYPE = 8;
static final int DOCUMENT_TYPE = 9;
static final int DOCTYPE_TYPE = 10;
static final int DOCFRAG_TYPE = 11;
static final int NOTATION_TYPE = 12;
// The list of elements to display in the tree static String[] treeElementNames = {
"ATTRIBUTELIST",
Boolean treeElement(String elementName) {
for (int i=0; i<treeElementNames.length; i++) {
if ( elementName.equals(treeElementNames[i]) ) return true;
}
return false;
} /**
* This class makes handling the dom tree a bit easier and was taken
* from SUN’s Java and XML tutorial.
*
// Construct an Adapter node from a DOM node public AdapterNode(org.w3c.dom.Node node) {
domNode = node;
}
// Return a string that identifies this node in the tree // *** Refer to table at top of org.w3c.dom.Node ***
public String toString() {
String s = typeName[domNode.getNodeType()];
String nodeName = domNode.getNodeName();
if (! nodeName.startsWith("#")) { s += ": " + nodeName;
}
if (compress) {
String t = content().trim();
int x = t.indexOf("\n");
if (x >= 0) t = t.substring(0, x);
s += " " + t;
return s;
}
if (domNode.getNodeValue() != null) { if (s.startsWith("ProcInstr"))
s += ", ";
else
s += ": ";
// Trim the value to get rid of NL’s at the front String t = domNode.getNodeValue().trim();
int x = t.indexOf("\n");
if (x >= 0) t = t.substring(0, x);
s += t;
}
return s;
}
public String content() { String s = "";
org.w3c.dom.NodeList nodeList = domNode.getChildNodes();
for (int i=0; i<nodeList.getLength(); i++) { org.w3c.dom.Node node = nodeList.item(i);
int type = node.getNodeType();
AdapterNode adpNode = new AdapterNode(node);
if (type == ELEMENT_TYPE) {
if ( treeElement(node.getNodeName()) ) continue;
s += "<" + node.getNodeName() + ">";
s += adpNode.content();
s += "</" + node.getNodeName() + ">";
} else if (type == TEXT_TYPE) { s += node.getNodeValue();
} else if (type == ENTITYREF_TYPE) {
// The content is in the TEXT node under it s += adpNode.content();
} else if (type == CDATA_TYPE) {
StringBuffer sb = new StringBuffer( node.getNodeValue() );
for (int j=0; j<sb.length(); j++) { if (sb.charAt(j) == ’<’) {
s += "<pre>" + sb + "\n</pre>";
} }
return s;
}
public int index(AdapterNode child) {
//System.err.println("Looking for index of " + child);
int count = childCount();
for (int i=0; i<count; i++) { AdapterNode n = this.child(i);
if (child == n) return i;
}
return -1; // Should never get here.
}
public AdapterNode child(int searchIndex) { //Note: JTree index is zero-based.
org.w3c.dom.Node node =
domNode.getChildNodes().item(searchIndex);
if (compress) {
// Return Nth displayable node int elementNodeIndex = 0;
for (int i=0; i<domNode.getChildNodes().getLength(); i++) { node = domNode.getChildNodes().item(i);
if (node.getNodeType() == ELEMENT_TYPE
&& treeElement( node.getNodeName() )
&& elementNodeIndex++ == searchIndex) { break;
public int childCount() { if (!compress) {
// Indent this
return domNode.getChildNodes().getLength();
}
int count = 0;
for (int i=0; i<domNode.getChildNodes().getLength(); i++) { org.w3c.dom.Node node = domNode.getChildNodes().item(i);
if (node.getNodeType() == ELEMENT_TYPE
&& treeElement( node.getNodeName() )) {
* @author Jens Jahnke
* <p>berschrift: Sequential Floating Forward Search</p>
* <p>Beschreibung: Fhrt eine SFFS auf den gegebenen Daten durch.</p>
* <p>Copyright: Copyright (c) 2005/06</p>
* <p>Organisation: Fraunhofer IGD </p>
* @version 0.9
public class sffs {
final static boolean DEBUGFLAG = false;
final static boolean VERBOSE = false;
protected Instances m_data_train;
protected Instances m_data_test;
protected int maxAttributes = 13; // maximum number of attributes that should be used protected StringBuffer m_SelectedAttributes; // contains the selected attributes protected StringBuffer m_Attributes; // contains all attributes
protected StringBuffer m_ClassAttributes; // attributes that shall not be removed protected double m_delta = 0.0025; // break indicator
protected double m_error = -1; // buffer for last calculated error protected double m_correctly = 0; // correctly classified instances (%) protected double m_incorrectly = 0; // incorrectly classified instances (%) private StringBuffer m_States;
protected Filter m_Filter = null;
protected int m_classifier = 0; // classifier
protected int m_Folds = 6; // number of folds for cross validation public sffs(String attrs, String classattrs, int maxAttrs) {
m_classifier = 0; // set to default (Naive Bayes) m_data_train = null;
m_data_test = null;
m_SelectedAttributes = new StringBuffer();
m_Attributes = new StringBuffer(attrs);