csli.util.classify.stanford
Class ClassifierUtils
java.lang.Object
csli.util.classify.stanford.ClassifierUtils
public abstract class ClassifierUtils
- extends Object
|
Method Summary |
static
|
getFeatures(D datum,
Map<String,Integer> featureMap)
|
static Pair<String,Double> |
getValuedFeature(Object feature)
|
static
|
normalizeFeatures(List<D> examples,
Map<String,Integer> featureMap)
|
static double |
pLogP(double p)
Convenience method to get round the fact that multiplying zero by -Inf gives NaN |
static
<D extends edu.stanford.nlp.dbm.Datum>
void |
|
prune(List<D> examples,
Set<String> toPrune)
|
static
<D extends edu.stanford.nlp.dbm.Datum>
Set<String> |
|
pruneFeatures(List<D> examples,
List<String> specs,
String posLabel,
String negLabel)
|
static
<D extends edu.stanford.nlp.dbm.Datum>
Set<String> |
|
pruneFeaturesByCorrelation(List<D> examples,
double margin,
String posLabel,
String negLabel)
|
static
<D extends edu.stanford.nlp.dbm.Datum>
Set<String> |
|
pruneFeaturesByFreq(List<D> examples,
double threshold)
|
static
<D extends edu.stanford.nlp.dbm.Datum>
Set<String> |
|
pruneFeaturesByInfoGain(List<D> examples,
double margin,
String posLabel,
String negLabel)
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
ClassifierUtils
public ClassifierUtils()
getFeatures
public static <D extends edu.stanford.nlp.dbm.Datum> Map<Integer,Double> getFeatures(D datum,
Map<String,Integer> featureMap)
- Type Parameters:
D - a subclass of Datum- Parameters:
datum - the data instancefeatureMap - a map of feature label to feature index number, or null to assume ordered numeric features
- Returns:
- a map from feature index number to feature value
getValuedFeature
public static Pair<String,Double> getValuedFeature(Object feature)
- Parameters:
feature - a feature, either as a ScoredObject or just a plain Object
- Returns:
- a Pair of the String feature and its Double value (the score of a ScoredObject, or 1.0 otherwise)
normalizeFeatures
public static <D extends edu.stanford.nlp.dbm.Datum> List<Pair<Double,Double>> normalizeFeatures(List<D> examples,
Map<String,Integer> featureMap)
- Type Parameters:
D - a subclass of Datum- Parameters:
examples - a List of data instances to be normalizedfeatureMap - a map of feature label to feature number, or null to assume ordered numeric features
- Returns:
- a List of Pairs of Doubles which record the normalization: f_norm = (f_raw-a)*b
pruneFeatures
public static <D extends edu.stanford.nlp.dbm.Datum> Set<String> pruneFeatures(List<D> examples,
List<String> specs,
String posLabel,
String negLabel)
- Type Parameters:
D - - Parameters:
examples - a list of Datum instances from which to prune featuresspecs - a list of String pruning specificationsposLabel - negLabel -
- Returns:
- the set of feature labels removed
pruneFeaturesByFreq
public static <D extends edu.stanford.nlp.dbm.Datum> Set<String> pruneFeaturesByFreq(List<D> examples,
double threshold)
- Type Parameters:
D - - Parameters:
examples - a list of Datum instances from which to prune featuresthreshold - the value of frequency below which features will be pruned
- Returns:
- the set of feature labels removed
pruneFeaturesByCorrelation
public static <D extends edu.stanford.nlp.dbm.Datum> Set<String> pruneFeaturesByCorrelation(List<D> examples,
double margin,
String posLabel,
String negLabel)
- Type Parameters:
D - - Parameters:
examples - a list of Datum instances from which to prune featuresposLabel - negLabel -
- Returns:
- the set of feature labels removed
pruneFeaturesByInfoGain
public static <D extends edu.stanford.nlp.dbm.Datum> Set<String> pruneFeaturesByInfoGain(List<D> examples,
double margin,
String posLabel,
String negLabel)
- Type Parameters:
D - - Parameters:
examples - a list of Datum instances from which to prune featuresposLabel - negLabel -
- Returns:
- the set of feature labels removed
prune
public static <D extends edu.stanford.nlp.dbm.Datum> void prune(List<D> examples,
Set<String> toPrune)
- Type Parameters:
D - - Parameters:
examples - a list of Datum instances from which to prune featurestoPrune - a set of feature labels to remove
pLogP
public static double pLogP(double p)
- Convenience method to get round the fact that multiplying zero by -Inf gives NaN
- Parameters:
p - a double probability
- Returns:
- p*Math.log(p), which will be 0 if p=0