|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcsli.dialog.app.calo.topic.classification.topicextraction.WordDistribution
csli.dialog.app.calo.topic.classification.topicextraction.Topic
public class Topic
A word distribution class restricted to content words
| Field Summary | |
|---|---|
protected static Topic |
global
|
static org.apache.log4j.Logger |
logger
|
protected HashSet<Integer> |
removedWords
|
protected boolean |
removeRareWords
words having count<0.5 will be removed since they can't be significant. |
| Fields inherited from class csli.dialog.app.calo.topic.classification.topicextraction.WordDistribution |
|---|
distribution, ratios, stemmer |
| Constructor Summary | |
|---|---|
Topic(boolean rrw)
|
|
Topic(String s)
|
|
Topic(Topic a)
|
|
Topic(WordDistribution a,
boolean rrw)
|
|
| Method Summary | |
|---|---|
protected static void |
addNullWords(Collection<Integer> cs)
|
protected static void |
addNullWords(File f)
Completes the stopword list by reading from a file. |
void |
clean()
|
static void |
clearFiles()
Clears the files that have been saved on the disk. |
void |
delete()
|
static Set<Topic> |
getCachedTopics()
Deprecated. |
static Topic |
getCriticalVector(ArrayList<SausageUtterance> sausages,
int beg,
int end)
|
static Topic |
getCriticalVector(ArrayList<SausageUtterance> sausages,
int beg,
int end,
int offsegBeg,
int offsegEnd)
|
static Topic |
getCriticalVector(Topic wd,
Topic meetingWD)
|
static Topic |
getCriticalVector(WordDistribution worddist,
ArrayList<SausageUtterance> sausages)
|
String |
getDesc(int n)
|
static String[] |
getMeetingNames()
Get the list of meetings in the corpus. |
String |
getName()
|
protected static Set<Integer> |
getNullWords()
|
Boolean |
getTemp()
|
static Topic |
getTopicFromName(String name)
Get a named Topic from the pool |
edu.stanford.nlp.util.Counter<String> |
getTopWordsCounter(int n)
|
double |
getWeight(Integer i)
|
static boolean |
initGlobal()
Ensures the global word distribution which is used for computing topics, and the associated variables, are loaded. |
void |
keepSignificant(int n)
|
static Topic |
mixture(Collection<Pair<Topic,Double>> toMix)
Computes the mixture of given topics. |
void |
printToStream(PrintStream out,
boolean withNullWords)
|
void |
readFromStream(BufferedReader in)
|
static boolean |
reinitGlobal(String meeting)
Ensures the global (corpus-wide) word distribution is loaded and includes this meeting - if not, forces it to be re-calculated. |
void |
removeIrrelevant()
|
void |
save(boolean temp)
|
void |
setName()
|
void |
setName(String name)
|
void |
setRemoveableWords(WordDistribution ref)
Removes all the words that are too rare in a set of WordDistributions. |
void |
shrink()
|
void |
shrink(int n)
|
void |
temporarySave()
|
String |
toString()
|
protected void |
updateNullwords()
|
void |
userSave()
|
| Methods inherited from class csli.dialog.app.calo.topic.classification.topicextraction.WordDistribution |
|---|
addSausage, addSausage, addSausageUtterance, addSausageUtterance, addSausageUtterances, getCount, getDistribution, getOrthogonalDifference, getStemmer, myexp, mylog, positiveLogSimilarity, positiveSimilarity, removeSausage, removeSausageUtterance, similarity, size, splitWords, topKeys, toString, totalWeight |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
public static org.apache.log4j.Logger logger
protected static Topic global
protected boolean removeRareWords
protected HashSet<Integer> removedWords
| Constructor Detail |
|---|
public Topic(String s)
public Topic(Topic a)
public Topic(boolean rrw)
public Topic(WordDistribution a,
boolean rrw)
| Method Detail |
|---|
protected static void addNullWords(File f)
f - the stopwords list (one word per line)protected void updateNullwords()
protected static Set<Integer> getNullWords()
protected static void addNullWords(Collection<Integer> cs)
public static void clearFiles()
public static boolean initGlobal()
public static boolean reinitGlobal(String meeting)
meeting -
public double getWeight(Integer i)
public void setRemoveableWords(WordDistribution ref)
ref - The WordDistribution object out of which we want to get the words that are too rare.public void removeIrrelevant()
public void keepSignificant(int n)
public void shrink()
public void shrink(int n)
public void clean()
public static Topic getCriticalVector(ArrayList<SausageUtterance> sausages,
int beg,
int end,
int offsegBeg,
int offsegEnd)
public static Topic getCriticalVector(WordDistribution worddist,
ArrayList<SausageUtterance> sausages)
public static Topic getCriticalVector(Topic wd,
Topic meetingWD)
public static Topic getCriticalVector(ArrayList<SausageUtterance> sausages,
int beg,
int end)
public static Topic mixture(Collection<Pair<Topic,Double>> toMix)
toMix - the set of topics to be merged, with their coefficients
public edu.stanford.nlp.util.Counter<String> getTopWordsCounter(int n)
n - the number of top words we want
public String getDesc(int n)
public String getName()
public void setName(String name)
public void setName()
public String toString()
toString in class WordDistributionpublic static Topic getTopicFromName(String name)
name - the name of the Topic
public void delete()
public void save(boolean temp)
throws IOException
IOException
public void userSave()
throws IOException
IOException
public void temporarySave()
throws IOException
IOExceptionpublic static Set<Topic> getCachedTopics()
public Boolean getTemp()
public void printToStream(PrintStream out,
boolean withNullWords)
public void readFromStream(BufferedReader in)
throws IOException
IOExceptionpublic static String[] getMeetingNames()
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||