Personen- und Tagesunab¨angigkeit - Hochschule Wismar Fachbereich Wirtschaft

Die Ergebnisse der Tests auf allen 55 Datens¨atzen, bei denen jeweils ein Datensatz zum Trainieren und die anderen 54 zum Testen benutzt wurden. Als Klassifizie-rungsalgorithmus kam Naive Bayes zum Einsatz.

Trainingsmenge Durchschnittliche Fehlerrate ( ¯ϕ)

I II III IV V VI VII VIII IX X

Fortsetzung n¨achste Seite . . .

I II III IV V VI VII VIII IX X 48 0,45 0,58 0,48 0,51 0,48 0,15 0,51 0,81 0,46 0,63

49 0,45 0,58 0,46 0,51 0,39 0,15 0,9 0,81 0,46 0,61

50 0,45 0,65 0,77 0,56 0,4 0,15 0,44 0,74 0,46 0,75

51 0,45 0,64 0,76 0,52 0,39 0,15 0,86 0,8 0,46 0,77

52 0,45 0,64 0,77 0,51 0,39 0,15 0,88 0,79 0,46 0,61 53 0,45 0,58 0,76 0,84 0,85 0,15 0,41 0,85 0,78 0,61

54 0,45 0,63 0,46 0,51 0,39 0,15 0,41 0,59 0,46 0,6

55 0,45 0,58 0,84 0,51 0,39 0,15 0,41 0,58 0,8 0,65

∅ 0,65 0,65 0,64 0,67 0,59 0,29 0,62 0,73 0,62 0,71 Tabelle A.15: Die durchschnittlichen Fehlerraten beim Testen der

Personen- und Tagesunabh¨angigkeit auf allen Datens¨atzen

Anhang B

Quelltexte - Packet Datenvorverarbeitung

B.1 dvv einfach

package Datenvorverarbeitung;

* @author Jens Jahnke

* berschrift: Datenvorverarbeitung

* Beschreibung: Parst eine Konfigurationsdatei im XML-Format und generiert eine ARFF-Datei fr Weka.

* Organisation: Fraunhofer IGD

* @version 0.8

import javax.xml.parsers.DocumentBuilder;

import javax.xml.parsers.DocumentBuilderFactory;

//import javax.xml.parsers.FactoryConfigurationError;

import javax.xml.parsers.ParserConfigurationException;

import org.xml.sax.SAXException;

import org.xml.sax.SAXParseException;

import org.w3c.dom.Document;

import org.w3c.dom.Element;

//import org.w3c.dom.DOMException;

import java.io.*;

import java.util.StringTokenizer;

// Weka

import weka.core.Instances;

import weka.core.OptionHandler;

//import weka.core.converters.ArffSaver;

import weka.core.converters.CSVLoader;

//import weka.core.Utils;

import weka.filters.Filter;

public class dvv_einfach { // DEBUGFLAG :-)

final static boolean DEBUGFLAG = false;

// Be verbose...

final static boolean VERBOSE = true;

// drop lines?

final static boolean DROPLINES = true;

// Global value so it can be ref’d by the tree-adapter static Document document;

// name of the configuration file

public String configfile = "Demokoffer-Dvv.xml";

// name of the data file public String datafile = "";

// compress the tree?

boolean compress = true;

// error flag

static Boolean errorFlag = false;

// attribute name

protected String Attributename = "";

// instance

protected Instances m_Training = null;

// filter

protected static Filter m_Filter = null;

// DOM Element

protected Element domElement = null;

// counter

protected static int attrCount = 0;

protected static int elemCount = 0;

/**

* Konstruktor

* @param config (Name of the config file as String)

* @param data (Name of the data file as String)

public dvv_einfach(String config, String data) { configfile = config;

datafile = data;

} // constructor

public void setConfigfile(String config) { configfile = config;

}

public void setDatafile(String data) { datafile = data;

} /**

* sets the filter to use

* @param name the classname of the filter

* @param options the options for the filter

public void setFilter(String name, String[] options) throws Exception { m_Filter = (Filter) Class.forName(name).newInstance();

if (m_Filter instanceof OptionHandler)

((OptionHandler) m_Filter).setOptions(options);

} /**

* Uses the csvloader class to generate the arff header.

* @param infile name of the input file

private String genArffHeader(String infile) { // load CSV

CSVLoader loader = new CSVLoader();

try {

loader.setSource(new File(infile));

Instances data = loader.getStructure();

return (data.toString());

} catch (IOException ioe) { ioe.printStackTrace();

errorFlag = true;

return (null);

} }

/**

* Checks if the arff file is valid.

* @param name of the arff file

* @return true or false

public boolean arff_is_valid(String name) { String[] tmp = new String[1];

errorFlag = false;

try {

Instances data = new Instances(

new BufferedReader(

new FileReader(name)));

data.setClassIndex(data.numAttributes() - 1);

try {

//final dvv_einfach configFilter = new dvv_einfach();

this.setFilter("weka.filters.AllFilter", tmp);

m_Filter.setInputFormat(data);

Instances filtered = Filter.useFilter(data, m_Filter);

if (DEBUGFLAG) {

System.err.println("relationName: " + filtered.relationName());

}

} catch (Exception e) { e.printStackTrace();

errorFlag = true;

}

} catch (IOException ioe) { ioe.printStackTrace();

public boolean parseConfig() { DocumentBuilderFactory factory =

DocumentBuilderFactory.newInstance();

// use validating parser factory.setValidating(true);

// check validity of namespaces factory.setNamespaceAware(true);

errorFlag = false;

try {

DocumentBuilder builder = factory.newDocumentBuilder();

builder.setErrorHandler(

new org.xml.sax.ErrorHandler() {

// fatal errors (an exception is guaranteed) public void fatalError(SAXParseException exception) throws SAXException {

System.out.println("** Error"

+ ", line " + exception.getLineNumber() + ", uri " + exception.getSystemId());

throw exception;

}

// treat validation errors as fatal public void error(SAXParseException e) throws SAXParseException

{

System.out.println("** Error"

+ ", line " + e.getLineNumber() + ", uri " + e.getSystemId());

throw e;

}

// dump warnings too

public void warning(SAXParseException err) throws SAXParseException

{

System.out.println("** Warning"

+ ", line " + err.getLineNumber() + ", uri " + err.getSystemId());

System.out.println(" " + err.getMessage());

} } );

document = builder.parse( new File(configfile) );

} catch (SAXException sxe) {

// Error generated during parsing) Exception x = sxe;

if (sxe.getException() != null) x = sxe.getException();

x.printStackTrace();

errorFlag = true;

} catch (ParserConfigurationException pce) { // Parser with specified options can’t be built pce.printStackTrace();

errorFlag = true;

} catch (IOException ioe) { // I/O error

public static void main(String argv[]) {

dvv_einfach dvv = new dvv_einfach("","");

if (argv.length < 1) { dvv.print_usage();

System.exit(1);

}

if ((argv.length > 3) && argv[0].contentEquals("combine")) { StringBuffer liste = new StringBuffer();

for (int z=2; z<argv.length; z++) { liste.append(argv[z]);

liste.append(",");

}

liste.deleteCharAt(liste.length()-1);

System.out.println("Combining files: " + liste.toString());

dvv.combineARFFs(liste, argv[1]);

System.out.println("Converting file " + argv[1]);

dvv.convertDiscreteValues(argv[1]);

System.out.println("done.");

System.exit(0);

} else {

dvv.setConfigfile(argv[0]);

dvv.setDatafile(argv[1]);

System.out.println("dvv: Beginning attribute generation.\n" +

"configfile : " + dvv.configfile + "\n" +

"datafile : " + dvv.datafile);

}

if (DEBUGFLAG) {

System.err.println("Parsing configfile: " + dvv.configfile + " .");

}

System.err.println("There were erros reading the configuration!

Bailing out.");

System.exit(1);

}

if (!dvv.datafile.contains(".arff")) {

// Prepare data file by replacing special characters.

if (VERBOSE) {

System.out.println("Preparing Datafile: " + dvv.datafile);

}

dvv.prepareDatafile(0, dvv.datafile + ".arff");

// from now on we add the .arff extension statically dvv.setDatafile(dvv.datafile + ".arff");

} else { if (VERBOSE) {

System.out.println("The given data file contains the extension ’.arff’!");

System.out.println("Therefore the conversion is skipped.");

} }

// validate arff file

System.out.println("Validating arff file, this will take a moment or two.");

if (!dvv.arff_is_valid(dvv.datafile)) {

System.err.println("Error found in arff file. Please check the file.");

System.exit(1);

private void print_usage() { System.err.println("Usage:\n" +

"\t java dvv datafile\n" +

"\t OR\n" +

"\t java dvv config.xml datafile" +

"\t OR\n" +

"\t java dvv cv arff file" +

"\t OR\n" +

"\t java dvv combine <targetfile> <list of arff files>");

}

/**

* Combines the given comma separated list of arff files into the given

* target file.

* ATTENTION: No validity check is done. So the user has to keep sure that

* the files are compatible!

* @param filenames (StringBuffer : comma separated list of files)

* @param target (String : name of the target file)

public boolean combineARFFs(StringBuffer filenames, String target) { FileReader fr = null;

FileWriter fw = null;

BufferedReader in = null;

BufferedWriter out = null;

String filename = "";

String line = "";

StringTokenizer st = null;

try {

// get list of filenames into tokens

st = new StringTokenizer(filenames.toString(), ",");

filename = st.nextToken();

// copy first file

in = new BufferedReader(fr = new FileReader(filename));

out = new BufferedWriter(fw = new FileWriter(target));

while ((line = in.readLine()) != null) { out.write(line);

// cycle through files and append them to the target.

while (st.hasMoreTokens()) { filename = st.nextToken();

in = new BufferedReader(fr = new FileReader(filename));

while ((line = in.readLine()) != null) {

if (!(line.startsWith("@") || (line.length() < 2))) { out.write(line);

} catch (IOException e) { System.err.println(e);

return (false);

} } /**

* Parses the given arff file for the three discretized values and converts their

* classes to standardized values.

* (i.e. (65.34-78.43] -> medium)

public boolean convertDiscreteValues(String filename) { FileReader fr = null;

FileWriter fw = null;

String line = "";

String tmp = "";

StringBuffer replaceMe = new StringBuffer();

StringTokenizer st = null;

String[] ReplaceValue = new String[6];

String outfile = filename+".working-copy.tmp";

int i = 0;

ReplaceValue[1] = "\’\\\\\’very low\\\\\’\’";

ReplaceValue[2] = "\’\\\\\’low\\\\\’\’";

ReplaceValue[3] = "\’\\\\\’medium\\\\\’\’";

ReplaceValue[4] = "\’\\\\\’high\\\\\’\’";

ReplaceValue[5] = "\’\\\\\’very high\\\\\’\’";

try {

BufferedReader in = new BufferedReader(fr = new FileReader(filename));

BufferedWriter out = new BufferedWriter(fw = new FileWriter(outfile));

while ((line = in.readLine()) != null) { if (line.startsWith("@attribute discrete")) { st = new StringTokenizer(line,",");

tmp = st.nextToken();

tmp = tmp.substring(tmp.indexOf("{"));

tmp = tmp.replace(’{’,’ ’);

tmp = tmp.trim();

tmp = tmp.replace(’}’,’ ’);

tmp = tmp.trim();

if (replaceMe.length() > 1) {

st = new StringTokenizer(replaceMe.toString(), ",");

i = 0;

if (st.countTokens() > 1) { while (st.hasMoreTokens()) {

} catch (IOException e) { System.err.println(e);

return (false);

} }

/**

* Prepares the datafile by replacing several special characters and writing the

* result into the specified "outfile".

* @param linesToIgnore, outfile

public boolean prepareDatafile(int linesToIgnore, String outfile) { FileReader fr = null;

FileWriter fw = null;

String line = "";

String header = "";

int LineCount = 0;

int LineLength = 0;

StringTokenizer st = null;

if (DEBUGFLAG) {

System.err.println("Parsing " + datafile + " into " + outfile);

} try {

BufferedReader in = new BufferedReader(fr = new FileReader(datafile));

BufferedWriter out = new BufferedWriter(fw = new FileWriter(outfile));

if (DEBUGFLAG) {

System.err.println("Writing arff header.");

}

// Write ARFF header into target file.

header = genArffHeader(datafile);

System.err.println("Error generating arff header.");

}

line = "";

if (DEBUGFLAG) {

System.err.println("Converting data file.");

}

// Copy data file and replace special characters.

while ((line = in.readLine()) != null) { if (linesToIgnore-- < 0) {

line = line.replaceAll("\t",",");

line = line.replaceAll("-1","?");

// calculate number of entries st = new StringTokenizer(line, ",");

if (DEBUGFLAG) {

System.err.println("Line: " + LineCount + " : " + st.countTokens() +

" (" + LineLength + ")");

}

// compare line length to the line before

if ((st.countTokens() != LineLength) && (LineLength > 0)) { // lines do not match

System.err.println("Line " + LineCount +

" does not match the line before!");

System.err.println(LineLength + " entries versus " + st.countTokens() + " entries at current line.");

if (DROPLINES) { // do not write the line

System.err.println("Dropping line " + LineCount + "!");

} else {

// write the line out.write(line);

out.newLine();

} } else {

// line should be okay

out.write(line);

out.newLine();

}

LineCount++; // line counter // save length of current line LineLength = st.countTokens();

}

} // while

st = null; // free memory

// close the buffered reader/writer in.close();

out.close();

} catch (IOException e) { System.err.println(e);

return (false);

} finally {

try {

if (fr != null) fr.close();

if (fw != null) fw.close();

} catch ( IOException e ) {

System.err.println("Converted " + LineCount + " Lines.");

}

return (true);

}

// Attribute types

static final String[] attrTypes = {

"break",

"dummy",

"numeric",

"nominal", };

static int getAttrType(String name) { int i = 0;

* This function parses through the configuration tree and finds the filters with

* their parameters.

public boolean apply_filters() {

//final dvv_einfach configParser = new dvv_einfach();

// We need at least one adapter node and 4 levels of "dummy nodes".

AdapterNode configTree = new AdapterNode(document);

AdapterNode listNode = null;

AdapterNode attrNode = null;

AdapterNode entryNode = null;

AdapterNode filterNode = null;

// some strings for data storage String tmp = "";

String filtername = "";

// TODO: Statical res allocation is neither elegant nor secure coding.

String filteropts[] = new String[1024];

StringTokenizer tmpTok;

int parCount = 0;

if (DEBUGFLAG) {

System.err.println("Document root has got " + configTree.childCount() + " children.");

} try {

if (DEBUGFLAG) {

System.err.println("Opening new Instance from arff file: " + datafile);

}

Instances data = new Instances(

new BufferedReader(

new FileReader(datafile)));

Instances filtered_data = null;

if (DEBUGFLAG) {

System.err.println("Setting class attributes.");

}

// setting class attribute

data.setClassIndex(data.numAttributes() - 1);

Attributename = "";

attrCount = configTree.childCount();

while (attrCount > 0) {

listNode = configTree.child(attrCount);

elemCount = 0;

while (elemCount <= listNode.childCount()) { attrNode = listNode.child(elemCount);

if (attrNode.childCount() > 0) { int i = 0;

while (i < attrNode.childCount()) { entryNode = attrNode.child(i);

if (entryNode.childCount() == 0) {

// Right now we use this only for checking for a break command // within the config file.

tmp = "";

tmp = entryNode.toString();

tmpTok = new StringTokenizer(tmp, " ");

if (tmpTok.hasMoreTokens()) {

if (getAttrType(tmp) == 0) {

System.out.println("Found break in config file, exiting now.");

// save the data

Attributename = tmp; // set attribute name if (VERBOSE) {

System.out.println("Generating attribute ’" + Attributename + "’");

while (j < entryNode.childCount()) { filterNode = entryNode.child(j);

// If the entry has no childs we’re going for the filtername

// and the parameters.

if (filterNode.childCount() == 0) { tmp = filterNode.toString();

tmpTok = new StringTokenizer(tmp, " ");

if (tmpTok.hasMoreTokens()) { tmpTok.nextToken();

tmp = tmpTok.nextToken();

}

if (tmp.equalsIgnoreCase("NAME")) { // set filter name

filtername = filterNode.content();

// check for break condition

if (filtername.equalsIgnoreCase("break")) { System.out.println("Found break in config file, exiting now.");

// save the data save_data(data);

System.exit(0);

}

} else if (tmp.equalsIgnoreCase("PARS")) { // set filter options

tmp = filterNode.content();

tmpTok = new StringTokenizer(tmp, " ");

parCount = 0;

boolean specialParameters = false;

StringBuffer sb = new StringBuffer();

StringBuffer sbtmp = new StringBuffer();

while (tmpTok.hasMoreTokens()) {

// do we have special parameters ("...")?

// TODO: Clean up here! This code is a mess!

if (specialParameters) { sb.append(" ");

sb.append(tmpTok.nextToken());

// check for closing "

sbtmp.append(sb.charAt(sb.length() - 1));

tmp = sbtmp.toString();

if (tmp.contains("\"")) { specialParameters = false;

// delete last "

sb.deleteCharAt((sb.length() - 1)); // found opening "

specialParameters = true;

sb.append(filteropts[parCount]);

// delete first "

sb.deleteCharAt(0);

System.err.print("\t" + filtername + "( ");

int k = 0;

while (k < parCount) {

System.err.print(filteropts[k] + " ");

k++;

}

System.err.println(")");

}

// execute filter

filtered_data = exec_filters(filtername, filteropts, data);

if (filtered_data != null) { data = filtered_data;

} else {

System.err.println("An error occured while executing the last filter!");

}

if (VERBOSE) { System.out.print("."); } // clean up

if (VERBOSE) { System.out.println("\n"); } }

} catch (IOException ioe) {

System.err.println("An IO error occured!");

ioe.printStackTrace();

errorFlag = true;

if (DEBUGFLAG) { System.exit(1); } return (false);

}

return (true);

} // apply_filters /**

* This method saves the given instance to "datafile.arff".

* @param result_data the instance to save

private boolean save_data(Instances result_data) { try {

if (VERBOSE) {

System.out.println("Saving results to " + datafile + ".");

}

BufferedWriter results = new BufferedWriter(new FileWriter(datafile));

results.write(result_data.toString());

results.newLine();

results.flush();

results.close();

return (true);

} catch (IOException ioe) {

System.err.println("An error occured while saving the results!");

ioe.printStackTrace();

errorFlag = true;

if (DEBUGFLAG) { System.exit(1); } return (false);

} } /**

* Executes the filter "filtername" with the given parameters.

* @param filtername contains the "exact" name of the weka filter to use.

* @param filteropts a list of filter options.

* @param source weka instance source

* @param target weka instance target

private Instances exec_filters(String filtername, String filteropts[], Instances source) {

try {

//final dvv_einfach configFilter = new dvv_einfach();

setFilter(filtername, filteropts);

if (DEBUGFLAG) {

System.err.println("Applying filter.");

}

m_Filter.setInputFormat(source);

Instances filtered = Filter.useFilter(source, m_Filter);

return (filtered);

} catch (Exception e) {

System.err.println("An error occured during the execution of the following filter: \n"

+ "\tName: " + filtername);

e.printStackTrace();

errorFlag = true;

if (DEBUGFLAG) { System.exit(1); } }

return null;

} // exec_filters

// An array of names for DOM node-types // (Array indexes = nodeType() values.) static final String[] typeName = {

"none",

final int ELEMENT_TYPE = 1;

static final int ATTR_TYPE = 2;

static final int TEXT_TYPE = 3;

static final int CDATA_TYPE = 4;

static final int ENTITYREF_TYPE = 5;

static final int ENTITY_TYPE = 6;

static final int PROCINSTR_TYPE = 7;

static final int COMMENT_TYPE = 8;

static final int DOCUMENT_TYPE = 9;

static final int DOCTYPE_TYPE = 10;

static final int DOCFRAG_TYPE = 11;

static final int NOTATION_TYPE = 12;

// The list of elements to display in the tree static String[] treeElementNames = {

"ATTRIBUTELIST",

Boolean treeElement(String elementName) {

for (int i=0; i<treeElementNames.length; i++) {

if ( elementName.equals(treeElementNames[i]) ) return true;

}

return false;

} /**

* This class makes handling the dom tree a bit easier and was taken

* from SUN’s Java and XML tutorial.

// Construct an Adapter node from a DOM node public AdapterNode(org.w3c.dom.Node node) {

domNode = node;

}

// Return a string that identifies this node in the tree // *** Refer to table at top of org.w3c.dom.Node ***

public String toString() {

String s = typeName[domNode.getNodeType()];

String nodeName = domNode.getNodeName();

if (! nodeName.startsWith("#")) { s += ": " + nodeName;

}

if (compress) {

String t = content().trim();

int x = t.indexOf("\n");

if (x >= 0) t = t.substring(0, x);

s += " " + t;

return s;

}

if (domNode.getNodeValue() != null) { if (s.startsWith("ProcInstr"))

s += ", ";

else

s += ": ";

// Trim the value to get rid of NL’s at the front String t = domNode.getNodeValue().trim();

int x = t.indexOf("\n");

if (x >= 0) t = t.substring(0, x);

s += t;

}

return s;

}

public String content() { String s = "";

org.w3c.dom.NodeList nodeList = domNode.getChildNodes();

for (int i=0; i<nodeList.getLength(); i++) { org.w3c.dom.Node node = nodeList.item(i);

int type = node.getNodeType();

AdapterNode adpNode = new AdapterNode(node);

if (type == ELEMENT_TYPE) {

if ( treeElement(node.getNodeName()) ) continue;

s += "<" + node.getNodeName() + ">";

s += adpNode.content();

s += "</" + node.getNodeName() + ">";

} else if (type == TEXT_TYPE) { s += node.getNodeValue();

} else if (type == ENTITYREF_TYPE) {

// The content is in the TEXT node under it s += adpNode.content();

} else if (type == CDATA_TYPE) {

StringBuffer sb = new StringBuffer( node.getNodeValue() );

for (int j=0; j<sb.length(); j++) { if (sb.charAt(j) == ’<’) {

s += "<pre>" + sb + "\n</pre>";

} }

return s;

}

public int index(AdapterNode child) {

//System.err.println("Looking for index of " + child);

int count = childCount();

for (int i=0; i<count; i++) { AdapterNode n = this.child(i);

if (child == n) return i;

}

return -1; // Should never get here.

}

public AdapterNode child(int searchIndex) { //Note: JTree index is zero-based.

org.w3c.dom.Node node =

domNode.getChildNodes().item(searchIndex);

if (compress) {

// Return Nth displayable node int elementNodeIndex = 0;

for (int i=0; i<domNode.getChildNodes().getLength(); i++) { node = domNode.getChildNodes().item(i);

if (node.getNodeType() == ELEMENT_TYPE

&& treeElement( node.getNodeName() )

&& elementNodeIndex++ == searchIndex) { break;

public int childCount() { if (!compress) {

// Indent this

return domNode.getChildNodes().getLength();

}

int count = 0;

for (int i=0; i<domNode.getChildNodes().getLength(); i++) { org.w3c.dom.Node node = domNode.getChildNodes().item(i);

if (node.getNodeType() == ELEMENT_TYPE

&& treeElement( node.getNodeName() )) {

* @author Jens Jahnke

* berschrift: Sequential Floating Forward Search

* Beschreibung: Fhrt eine SFFS auf den gegebenen Daten durch.

* Organisation: Fraunhofer IGD

* @version 0.9

public class sffs {

final static boolean DEBUGFLAG = false;

final static boolean VERBOSE = false;

protected Instances m_data_train;

protected Instances m_data_test;

protected int maxAttributes = 13; // maximum number of attributes that should be used protected StringBuffer m_SelectedAttributes; // contains the selected attributes protected StringBuffer m_Attributes; // contains all attributes

protected StringBuffer m_ClassAttributes; // attributes that shall not be removed protected double m_delta = 0.0025; // break indicator

protected double m_error = -1; // buffer for last calculated error protected double m_correctly = 0; // correctly classified instances (%) protected double m_incorrectly = 0; // incorrectly classified instances (%) private StringBuffer m_States;

protected Filter m_Filter = null;

protected int m_classifier = 0; // classifier

protected int m_Folds = 6; // number of folds for cross validation public sffs(String attrs, String classattrs, int maxAttrs) {

m_classifier = 0; // set to default (Naive Bayes) m_data_train = null;

m_data_test = null;

m_SelectedAttributes = new StringBuffer();

m_Attributes = new StringBuffer(attrs);

Im Dokument Hochschule Wismar Fachbereich Wirtschaft (Seite 62-79)