Evaluation.rar

  • PUDN用户
    了解作者
  • Java
    开发工具
  • 15KB
    文件大小
  • rar
    文件格式
  • 0
    收藏次数
  • 1 积分
    下载积分
  • 3
    下载次数
  • 2009-11-25 21:16
    上传日期
wonderfull sourcne code for weka tools mential
Evaluation.rar
  • Evaluation.java
    81.3KB
内容介绍
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Evaluation.java * Copyright (C) 1999 Eibe Frank,Len Trigg * */ package weka.classifiers; import java.util.*; import java.io.*; import weka.core.*; import weka.estimators.*; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; /** * Class for evaluating machine learning models. <p/> * * ------------------------------------------------------------------- <p/> * * General options when evaluating a learning scheme from the command-line: <p/> * * -t filename <br/> * Name of the file with the training data. (required) <p/> * * -T filename <br/> * Name of the file with the test data. If missing a cross-validation * is performed. <p/> * * -c index <br/> * Index of the class attribute (1, 2, ...; default: last). <p/> * * -x number <br/> * The number of folds for the cross-validation (default: 10). <p/> * * -s seed <br/> * Random number seed for the cross-validation (default: 1). <p/> * * -m filename <br/> * The name of a file containing a cost matrix. <p/> * * -l filename <br/> * Loads classifier from the given file. <p/> * * -d filename <br/> * Saves classifier built from the training data into the given file. <p/> * * -v <br/> * Outputs no statistics for the training data. <p/> * * -o <br/> * Outputs statistics only, not the classifier. <p/> * * -i <br/> * Outputs information-retrieval statistics per class. <p/> * * -k <br/> * Outputs information-theoretic statistics. <p/> * * -p range <br/> * Outputs predictions for test instances, along with the attributes in * the specified range (and nothing else). Use '-p 0' if no attributes are * desired. <p/> * * -r <br/> * Outputs cumulative margin distribution (and nothing else). <p/> * * -g <br/> * Only for classifiers that implement "Graphable." Outputs * the graph representation of the classifier (and nothing * else). <p/> * * ------------------------------------------------------------------- <p/> * * Example usage as the main of a classifier (called FunkyClassifier): * <code> <pre> * public static void main(String [] args) { * try { * Classifier scheme = new FunkyClassifier(); * System.out.println(Evaluation.evaluateModel(scheme, args)); * } catch (Exception e) { * System.err.println(e.getMessage()); * } * } * </pre> </code> * <p/> * * ------------------------------------------------------------------ <p/> * * Example usage from within an application: * <code> <pre> * Instances trainInstances = ... instances got from somewhere * Instances testInstances = ... instances got from somewhere * Classifier scheme = ... scheme got from somewhere * * Evaluation evaluation = new Evaluation(trainInstances); * evaluation.evaluateModel(scheme, testInstances); * System.out.println(evaluation.toSummaryString()); * </pre> </code> * * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.53.2.5 $ */ public class Evaluation implements Summarizable { /** The number of classes. */ protected int m_NumClasses; /** The number of folds for a cross-validation. */ protected int m_NumFolds; /** The weight of all incorrectly classified instances. */ protected double m_Incorrect; /** The weight of all correctly classified instances. */ protected double m_Correct; /** The weight of all unclassified instances. */ protected double m_Unclassified; /*** The weight of all instances that had no class assigned to them. */ protected double m_MissingClass; /** The weight of all instances that had a class assigned to them. */ protected double m_WithClass; /** Array for storing the confusion matrix. */ protected double [][] m_ConfusionMatrix; /** The names of the classes. */ protected String [] m_ClassNames; /** Is the class nominal or numeric? */ protected boolean m_ClassIsNominal; /** The prior probabilities of the classes */ protected double [] m_ClassPriors; /** The sum of counts for priors */ protected double m_ClassPriorsSum; /** The cost matrix (if given). */ protected CostMatrix m_CostMatrix; /** The total cost of predictions (includes instance weights) */ protected double m_TotalCost; /** Sum of errors. */ protected double m_SumErr; /** Sum of absolute errors. */ protected double m_SumAbsErr; /** Sum of squared errors. */ protected double m_SumSqrErr; /** Sum of class values. */ protected double m_SumClass; /** Sum of squared class values. */ protected double m_SumSqrClass; /*** Sum of predicted values. */ protected double m_SumPredicted; /** Sum of squared predicted values. */ protected double m_SumSqrPredicted; /** Sum of predicted * class values. */ protected double m_SumClassPredicted; /** Sum of absolute errors of the prior */ protected double m_SumPriorAbsErr; /** Sum of absolute errors of the prior */ protected double m_SumPriorSqrErr; /** Total Kononenko & Bratko Information */ protected double m_SumKBInfo; /*** Resolution of the margin histogram */ protected static int k_MarginResolution = 500; /** Cumulative margin distribution */ protected double m_MarginCounts []; /** Number of non-missing class training instances seen */ protected int m_NumTrainClassVals; /** Array containing all numeric training class values seen */ protected double [] m_TrainClassVals; /** Array containing all numeric training class weights */ protected double [] m_TrainClassWeights; /** Numeric class error estimator for prior */ protected Estimator m_PriorErrorEstimator; /** Numeric class error estimator for scheme */ protected Estimator m_ErrorEstimator; /** * The minimum probablility accepted from an estimator to avoid * taking log(0) in Sf calculations. */ protected static final double MIN_SF_PROB = Double.MIN_VALUE; /** Total entropy of prior predictions */ protected double m_SumPriorEntropy; /** Total entropy of scheme predictions */ protected double m_SumSchemeEntropy; /** enables/disables the use of priors, e.g., if no training set is * present in case of de-serialized schemes */ protected boolean m_NoPriors = false; /** * Initializes all the counters for the evaluation. * Use <code>useNoPriors()</code> if the dataset is the test set and you * can't initialize with the priors from the training set via * <code>setPriors(Instances)</code>. * * @param data set of training instances, to get some header * information and prior class distribution information * @throws Exception if the class is not defined * @see #useNoPriors() * @see #setPriors(Instances) */ public Evaluation(Instances data) throws Exception { this(data, null); } /** * Initializes all the counters for the evaluation and also takes a * cost matrix as parameter. * Use <code>useNoPriors()</code> if the dataset is the test set and you * can't initialize with the priors from the training set via * <code>setPriors(Instances)</code>. * * @param data set of training instances, to get some header * information and prior class distribution information * @param costMa
评论
    相关推荐