csli.util.nlp
Class NgramModel

java.lang.Object
  extended by csli.util.nlp.NgramModel

public class NgramModel
extends Object

N-gram model over sequences of arbitrary objects. Uses the default smoothing from csli.util.collections.map.SmoothedDistribution.

Author:
jefe

Constructor Summary
NgramModel(int n)
           
NgramModel(int min, int max)
           
 
Method Summary
 NgramModel extend(List<?> sequence)
           
static List<Double> getScores(List sequence)
           
 double logLikelihood(List<?> sequence)
           
static void main(String[] args)
           
 double maxLogLikelihood(List<?> sequence)
           
<S> Iterable<List<S>>
ngrams(List<S> sequence)
           
static
<S> Iterable<List<S>>
ngrams(List<S> sequence, int len)
           
static
<S> Iterable<List<S>>
ngrams(List<S> sequence, int min, int max)
           
 double normalizedLL(List<?> sequence)
          Log likelihood normalized for sentence length.
static
<S> Iterable<ScoredObject<List<S>>>
scoredNgrams(List<ScoredObject<S>> sequence, int min, int max)
           
static
<S> Iterable<ScoredObject<List<S>>>
sngrams(List<S> sequence, int min, int max)
           
 NgramModel train(List<? extends List<?>> training, double smoothing)
           
static List unScore(List sequence)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

NgramModel

public NgramModel(int n)
Parameters:
n - the length of n-grams in this model

NgramModel

public NgramModel(int min,
                  int max)
Parameters:
max - the max length of n-grams in this model
min - the min length of n-grams in this model
Method Detail

train

public NgramModel train(List<? extends List<?>> training,
                        double smoothing)

extend

public NgramModel extend(List<?> sequence)

logLikelihood

public double logLikelihood(List<?> sequence)

maxLogLikelihood

public double maxLogLikelihood(List<?> sequence)

normalizedLL

public double normalizedLL(List<?> sequence)
Log likelihood normalized for sentence length. Equals the log of the geometric mean of the individual n-gram probabilities in the sentence.


ngrams

public <S> Iterable<List<S>> ngrams(List<S> sequence)

unScore

public static List unScore(List sequence)

getScores

public static List<Double> getScores(List sequence)

ngrams

public static <S> Iterable<List<S>> ngrams(List<S> sequence,
                                           int len)

scoredNgrams

public static <S> Iterable<ScoredObject<List<S>>> scoredNgrams(List<ScoredObject<S>> sequence,
                                                               int min,
                                                               int max)

sngrams

public static <S> Iterable<ScoredObject<List<S>>> sngrams(List<S> sequence,
                                                          int min,
                                                          int max)

ngrams

public static <S> Iterable<List<S>> ngrams(List<S> sequence,
                                           int min,
                                           int max)

main

public static void main(String[] args)