\SplitWord

Summary

Methods

Properties

Constants

__construct()
SplitWord()
__destruct()
_get_index()
GetWordInfos()
SetSource()
SetResultType()
LoadDict()
IsWord()
GetWordProperty()
SetWordInfos()
StartAnalysis()
_deep_analysis()
_deep_analysis_cn()
_optimize_result()
_sort_finally_result()
_out_string_encoding()
GetFinallyResult()
GetSimpleResult()
GetSimpleResultAll()
GetFinallyIndex()
_source_result_charset()
MakeDict()
ExportDict()
InportDict()

$mask_value
$sourceCharSet
$targetCharSet
$resultType
$notSplitLen
$toLower
$differMax
$unitWord
$loadInit
$differFreq
$sourceString
$addonDic
$addonDicFile
$dicStr
$mainDic
$mainDicHand
$mainDicInfos
$mainDicFile
$mainDicFileZip
$isLoadAll
$isUnpacked
$dicWordMax
$simpleResult
$finallyResult
$isLoadDic
$newWords
$foundWordStr
$loadTime

No constants found

No protected methods found

No protected properties found

N/A

No private methods found

No private properties found

N/A

File: include/splitword.class.php
Package: JIANZHICMS.Libraries
Class hierarchy: \SplitWord

Tags

None found

Properties

$mask_value

$mask_value :

Type

$sourceCharSet

$sourceCharSet :

Type

$targetCharSet

$targetCharSet :

Type

$resultType

$resultType :

Type

$notSplitLen

$notSplitLen :

Type

$toLower

$toLower :

Type

$differMax

$differMax :

Type

$unitWord

$unitWord :

Type

$loadInit

$loadInit :

Type

$differFreq

$differFreq :

Type

$sourceString

$sourceString :

Type

$addonDic

$addonDic :

Type

$addonDicFile

$addonDicFile :

Type

$dicStr

$dicStr :

Type

$mainDic

$mainDic :

Type

$mainDicHand

$mainDicHand :

Type

$mainDicInfos

$mainDicInfos :

Type

$mainDicFile

$mainDicFile :

Type

$mainDicFileZip

$mainDicFileZip :

Type

$isLoadAll

$isLoadAll :

Type

$isUnpacked

$isUnpacked :

Type

$dicWordMax

$dicWordMax :

Type

$simpleResult

$simpleResult :

Type

$finallyResult

$finallyResult :

Type

$isLoadDic

$isLoadDic :

Type

$newWords

$newWords :

Type

$foundWordStr

$foundWordStr :

Type

$loadTime

$loadTime :

Type

Methods

__construct()

__construct(  $source_charset = 'utf-8',   $target_charset = 'utf-8',   $load_all = TRUE,   $source = '') : void

构造函数

Parameters

	$source_charset
	$target_charset
	$load_all
	$source

SplitWord()

SplitWord(  $source_charset = 'utf-8',   $target_charset = 'utf-8',   $load_all = TRUE,   $source = '')

Parameters

	$source_charset
	$target_charset
	$load_all
	$source

__destruct()

__destruct()

析构函数

_get_index()

_get_index(  $key) : \short

根据字符串计算key索引

Parameters

$key

Returns

\short —

int

GetWordInfos()

GetWordInfos(  $key,   $type = 'word') : \short

从文件获得词

Parameters

	$key
	$type	(类型 word 或 key_groups)

Returns

\short —

int

SetSource()

SetSource(  $source,   $source_charset = 'utf-8',   $target_charset = 'utf-8') : boolean

设置源字符串

Parameters

	$source
	$source_charset
	$target_charset

Returns

boolean

SetResultType()

SetResultType(  $rstype) : void

设置结果类型(只在获取finallyResult才有效)

Parameters

$rstype

1 为全部， 2去除特殊符号

LoadDict()

LoadDict(  $maindic = '') : void

载入词典

Parameters

$maindic

IsWord()

IsWord(  $word)

检测某个词是否存在

Parameters

$word

GetWordProperty()

GetWordProperty(  $word) : void

获得某个词的词性及词频信息

Parameters

$word

Inherited from: \SplitWord

Tags

parem	$word unicode编码的词

SetWordInfos()

SetWordInfos(  $word,   $infos) : \void;

指定某词的词性信息（通常是新词）

Parameters

	$word
	$infos

Returns

\void;

Inherited from: \SplitWord

Tags

parem

$word unicode编码的词

$infos array('c' => 词频, 'm' => 词性);

StartAnalysis()

StartAnalysis(  $optimize = TRUE) : boolean

开始执行分析

Parameters

$optimize

Returns

boolean

Inherited from: \SplitWord

Tags

parem	bool optimize 是否对结果进行优化

_deep_analysis()

_deep_analysis(  $str,   $ctype,   $spos,   $optimize = TRUE) : boolean

深入分词

Parameters

	$str
	$ctype
	$spos
	$optimize

Returns

boolean

Inherited from: \SplitWord

Tags

parem

$str

$ctype (2 英文类， 3 中/韩/日文类)

$spos 当前粗分结果游标

_deep_analysis_cn()

_deep_analysis_cn(  $str,   $lastec,   $spos,   $slen,   $optimize = TRUE) : void

中文的深入分词

Parameters

	$str
	$lastec
	$spos
	$slen
	$optimize

Inherited from: \SplitWord

Tags

parem	$str

_optimize_result()

_optimize_result(  $smarr,   $spos)

Parameters

	$smarr
	$spos

_sort_finally_result()

_sort_finally_result() : void

转换最终分词结果到 finallyResult 数组

_out_string_encoding()

_out_string_encoding(  $str)

把uncode字符串转换为输出字符串

Parameters

$str

Inherited from: \SplitWord

Tags

parem	str return string

GetFinallyResult()

GetFinallyResult(  $spword = ' ',   $word_meanings = FALSE) : string

获取最终结果字符串（用空格分开后的分词结果）

Parameters

	$spword
	$word_meanings

Returns

string

GetSimpleResult()

GetSimpleResult() : \array()

获取粗分结果，不包含粗分属性

Returns

\array()

GetSimpleResultAll()

GetSimpleResultAll() : \array()

获取粗分结果，包含粗分属性（1中文词句、2 ANSI词汇（包括全角），3 ANSI标点符号（包括全角），4数字（包括全角），5 中文标点或无法识别字符）

Returns

\array()

GetFinallyIndex()

GetFinallyIndex() : \array('word'=>count,...)

获取索引hash数组

Returns

\array('word'=>count,...)

_source_result_charset()

_source_result_charset() : integer

获得保存目标编码

Returns

integer

MakeDict()

MakeDict(  $source_file,   $target_file = '') : void

编译词典

Parameters

	$source_file
	$target_file

Inherited from: \SplitWord

Tags

parem	$sourcefile utf-8编码的文本词典数据文件<参见范例dict/not-build/base_dic_full.txt> 注意, 需要PHP开放足够的内存才能完成操作

ExportDict()

ExportDict(  $targetfile) : void

导出词典的词条

Parameters

$targetfile

Inherited from: \SplitWord

Tags

parem	$targetfile 保存位置

InportDict()

InportDict(  $targetfile)

Parameters

$targetfile