| Modifier and Type | Field and Description |
|---|---|
boolean |
APPEND_CJK_ENTITY
do the entity recognition ?
|
boolean |
APPEND_CJK_PINYIN
append the Pinyin to the splited IWord
|
boolean |
APPEND_CJK_SYN
append the syn word to the splited IWord.
|
boolean |
APPEND_PART_OF_SPEECH
append the part of speech.
|
boolean |
CLEAR_STOPWORD
clear away the stop word.
|
boolean |
CNFRA_TO_ARABIC
Chinese fraction to Arabic fraction .
|
boolean |
CNNUM_TO_ARABIC
Chinese numeric to Arabic .
|
static int |
COMPLEX_MODE |
static int |
DELIMITER_MODE |
static int |
DETECT_MODE |
boolean |
EN_SECOND_SEG
whether to do the secondary split for complex Latin compose
|
boolean |
I_CN_NAME
identify the Chinese name?
|
boolean |
KEEP_UNREG_WORDS |
static String |
LEX_PROPERTY_FILE
default lexicon property file name
|
boolean |
LOAD_CJK_ENTITY
whether to load the entity define
|
boolean |
LOAD_CJK_PINYIN
whether to load the Pinyin of the CJK_WORDS
|
boolean |
LOAD_CJK_POS
whether to load the word's part of speech
|
boolean |
LOAD_CJK_SYN
whether to load the syn word of the CJK_WORDS.
|
int |
MAX_CN_LNADRON
the max length for the adron of the Chinese last name.like 老陈 “老”
|
int |
MAX_LATIN_LENGTH
maximum length for Latin words
|
int |
MAX_LENGTH
maximum length for maximum match(5-7)
|
int |
MAX_UNIT_LENGTH
maximum length for unit words
for the NLP algorithm added at 2016/11/18
|
int |
NAME_SINGLE_THRESHOLD
the threshold of the single word that is a single word
when it and the last char of the name make up a word.
|
static int |
NLP_MODE |
int |
PPT_MAX_LENGTH
the maximum length for the text between the pair punctuation.
|
static int |
SEARCH_MODE |
static int |
SIMPLE_MODE
simple algorithm or complex algorithm
|
int |
STOKEN_MIN_LEN
Less length for the second split to make up a word
|
| Constructor and Description |
|---|
JcsegTaskConfig()
create the config and do nothing about initialize
Note: this may cuz Incompatibility problems for the old version
that has use this construct method
|
JcsegTaskConfig(boolean autoLoad)
create and initialize the config by auto load
|
JcsegTaskConfig(InputStream is)
create and initialize the task config from a InputStream
|
JcsegTaskConfig(String proFile)
create and initialize the task config from a properties file
|
| Modifier and Type | Method and Description |
|---|---|
boolean |
appendCJKPinyin() |
boolean |
appendCJKSyn() |
void |
autoLoad()
initialize the value of its options by auto searching the jcesg.properties file:
|
boolean |
clearStopwords() |
JcsegTaskConfig |
clone()
rewrite the clone method
|
boolean |
cnFractionToArabic() |
boolean |
cnNumToArabic() |
boolean |
getEnSecondSeg() |
String[] |
getLexiconPath()
return the lexicon directory path
|
int |
getMaxCnLnadron() |
int |
getMaxLength() |
int |
getNameSingleThreshold() |
int |
getPollTime() |
int |
getPPTMaxLength() |
String |
getPropertieFile() |
int |
getSTokenMinLen() |
boolean |
identifyCnName() |
boolean |
isAutoload()
about lexicon autoload
|
boolean |
isKeepPunctuation(char c) |
boolean |
keepUnregWords() |
boolean |
ladCJKPos() |
void |
load(InputStream is)
initialize the value of its options from a InputStream
of a jcseg.properties prperties file
|
void |
load(String proFile)
initialize the value of its options from a speicfied
jcseg.properties propertie file
|
boolean |
loadCJKEntity() |
boolean |
loadCJKPinyin() |
boolean |
loadCJKSyn() |
void |
setAppendCJKPinyin(boolean appendCJKPinyin) |
void |
setAppendCJKSyn(boolean appendCJKPinyin) |
void |
setAppendPartOfSpeech(boolean partOfSpeech) |
void |
setAutoload(boolean autoload) |
void |
setClearStopwords(boolean clearstopwords) |
void |
setCnFactionToArabic(boolean cnFractionToArabic) |
void |
setCnNumToArabic(boolean cnNumToArabic) |
void |
setEnSecondSeg(boolean enSecondSeg) |
void |
setICnName(boolean iCnName) |
void |
setKeepPunctuations(String keepPunctuations) |
void |
setKeepUnregWords(boolean keepUnregWords) |
void |
setLexiconPath(String[] lexPath) |
void |
setLoadCJKPinyin(boolean loadCJKPinyin) |
void |
setLoadCJKPos(boolean loadCJKPos) |
void |
setLoadCJKSyn(boolean loadCJKSyn) |
void |
setLoadEntity(boolean loadEntity) |
void |
setMaxCnLnadron(int maxCnLnadron) |
void |
setMaxLength(int maxLength) |
void |
setNameSingleThreshold(int thresold) |
void |
setPollTime(int polltime) |
void |
setPPT_MAX_LENGTH(int pptMaxLength) |
void |
setSTokenMinLen(int len) |
public static final String LEX_PROPERTY_FILE
public static final int SIMPLE_MODE
public static final int COMPLEX_MODE
public static final int DETECT_MODE
public static final int SEARCH_MODE
public static final int DELIMITER_MODE
public static final int NLP_MODE
public int MAX_LENGTH
public int MAX_LATIN_LENGTH
public int MAX_UNIT_LENGTH
public boolean I_CN_NAME
public int MAX_CN_LNADRON
public boolean LOAD_CJK_PINYIN
public boolean APPEND_CJK_PINYIN
public boolean APPEND_PART_OF_SPEECH
public boolean LOAD_CJK_SYN
public boolean APPEND_CJK_SYN
public boolean LOAD_CJK_POS
public boolean LOAD_CJK_ENTITY
public boolean APPEND_CJK_ENTITY
public int NAME_SINGLE_THRESHOLD
public int PPT_MAX_LENGTH
public boolean CLEAR_STOPWORD
public boolean CNNUM_TO_ARABIC
public boolean CNFRA_TO_ARABIC
public boolean EN_SECOND_SEG
public int STOKEN_MIN_LEN
public boolean KEEP_UNREG_WORDS
public JcsegTaskConfig()
public JcsegTaskConfig(boolean autoLoad)
autoLoad - public JcsegTaskConfig(String proFile)
proFile - public JcsegTaskConfig(InputStream is)
is - public void load(String proFile) throws IOException
proFile - IOExceptionpublic void autoLoad()
throws IOException
1. Inside the dir that jcseg-core-{version}.jar is located, means beside the jar file.
2. Search root classpath.
3. Load from system property "user.home".
IOExceptionpublic void load(InputStream is) throws IOException
is - IOExceptionpublic String[] getLexiconPath()
public void setLexiconPath(String[] lexPath)
public boolean isAutoload()
public void setAutoload(boolean autoload)
public int getPollTime()
public void setPollTime(int polltime)
public int getMaxLength()
public void setMaxLength(int maxLength)
public boolean identifyCnName()
public void setICnName(boolean iCnName)
public int getMaxCnLnadron()
public void setMaxCnLnadron(int maxCnLnadron)
public boolean loadCJKPinyin()
public void setLoadCJKPinyin(boolean loadCJKPinyin)
public void setAppendPartOfSpeech(boolean partOfSpeech)
public boolean appendCJKPinyin()
public void setAppendCJKPinyin(boolean appendCJKPinyin)
public boolean loadCJKSyn()
public void setLoadCJKSyn(boolean loadCJKSyn)
public boolean appendCJKSyn()
public void setAppendCJKSyn(boolean appendCJKPinyin)
public boolean ladCJKPos()
public void setLoadCJKPos(boolean loadCJKPos)
public boolean loadCJKEntity()
public void setLoadEntity(boolean loadEntity)
public int getNameSingleThreshold()
public void setNameSingleThreshold(int thresold)
public int getPPTMaxLength()
public void setPPT_MAX_LENGTH(int pptMaxLength)
public boolean clearStopwords()
public void setClearStopwords(boolean clearstopwords)
public boolean cnNumToArabic()
public void setCnNumToArabic(boolean cnNumToArabic)
public boolean cnFractionToArabic()
public void setCnFactionToArabic(boolean cnFractionToArabic)
public boolean getEnSecondSeg()
public void setEnSecondSeg(boolean enSecondSeg)
public int getSTokenMinLen()
public void setSTokenMinLen(int len)
public void setKeepPunctuations(String keepPunctuations)
public boolean isKeepPunctuation(char c)
public boolean keepUnregWords()
public void setKeepUnregWords(boolean keepUnregWords)
public String getPropertieFile()
public JcsegTaskConfig clone() throws CloneNotSupportedException
clone in class ObjectCloneNotSupportedExceptionCopyright © 2017. All Rights Reserved.