½éÉÜÍêOracleÈ«ÎÄË÷ÒýµÄFILTERÊôÐÔ£¬¼ÌÐø½éÉÜOracleµÄLEXERÊôÐÔ¡£
OracleÈ«ÎÄË÷ÒýµÄLEXERÊôÐÔÓÃÓÚ´¦Àí¸÷ÖÖ²»Í¬µÄÓïÑÔ¡£×î»ù±¾µÄÓ¢ÎÄʹÓÃBASE_FILTER£¬¶øÈç¹ûÐèҪʹÓÃÖÐÎÄÔò¿ÉÒÔʹÓÃCHINESE_VGRAM_LEXER»òCHINESE_LEXER¡£
ÕâÆªÎÄÕ¼òµ¥ËµÃ÷BASIC_LEXERÊôÐÔ¡£BASIC_LEXERÊôÐÔÖ§³Ö¶àÖÖÓïÑÔ£¬±ÈÈçÓ¢Óï¡¢µÂÓï¡¢ºÉÀ¼ÓŲÍþÓï¡¢ÈðµäÓïµÈµÈ¡£
BASIC_LEXER³ýÁËÖ§³Ö¶àÖÖÓïÑÔ£¬»¹¿ÉÒÔÉèÖöàÖÖÊôÐÔ¡£±ÈÈçÕâ¸öÀý×ÓÖнéÉܵÄË÷ÒýµÄ´óСдÉèÖãº
SQL> CREATE TABLE T (ID NUMBER, DOCS VARCHAR2(1000));
±íÒÑ´´½¨¡£
SQL> INSERT INTO T VALUES (1, 'This is a example for the basic lexer');
ÒÑ´´½¨ 1 ÐС£
SQL> INSERT INTO T VALUES (2, 'And we make a example for a mixed spell indexs.');
ÒÑ´´½¨ 1 ÐС£
SQL> INSERT INTO T VALUES (3, 'So the word in UPPER format must be query in UPPER');
ÒÑ´´½¨ 1 ÐС£
SQL> INSERT INTO T VALUES (4, 'And Mixed Spell Word must be Query in Mixed.');
ÒÑ´´½¨ 1 ÐС£
SQL> COMMIT;
Ìá½»Íê³É¡£
SQL> CREATE INDEX IND_T_DOCS ON T (DOCS) INDEXTYPE IS CTXSYS.CONTEXT
2 PARAMETERS ('LEXER C ......
OracleÈ«ÎÄË÷ÒýµÄWORDLISTÊôÐÔÓÃÀ´ÉèÖÃÄ£ºý²éѯºÍͬ´Ê¸ù²éѯ£¬ÁíÍâWORDLISTÊôÐÔ»¹Ö§³ÖͨÅä·û²éѯ¡£
OracleµÄWORDLISTÊôÐÔÖ»ÓÐBASIC_WORDLISTÒ»ÖÖ£¬ÏÂÃæ¿´Ò»¸öBASIC_WORDLISTµÄÀý×Ó£º
1.Stemmer attributeÊÇÓÃÀ´²éѯÓôʸùµÄÊý¾Ý
SQL> create table stemmer_tbl(id number primary key,docs clob);
Table created.
SQL> insert into stemmer_tbl values(111,'We are testing the Stemmer attribute option');
1 row created.
SQL> insert into stemmer_tbl values(112,'The girl sang like a frog');
1 row created.
SQL> insert into stemmer_tbl values(113,'My import is committing too often');
1 row created.
SQL> commit;
Commit complete.
--
-- Basic WordList Stemmer attribute
-- This example uses English for Stemming.
-- Language choices are: NULL,ENGLISH,DERIVATIONAL,DUTCH,FRENCH,GERMAN,ITALIAN
-- and SPANISH. By default we use the language of the database. If the
-- database is not one of the listed languages then we choose NULL for stemmer
-- and default for fuzzy
--
SQL> begin
2 C ......
OracleÈ«ÎÄË÷ÒýµÄSTORAGEÊôÐÔÊÇΪÁ˸øÈ«ÎÄË÷ÒýÉú³ÉµÄ¸¨Öú±íÉèÖô洢²ÎÊýµÄ¡£
OracleµÄÈ«ÎÄË÷Òý»áÉú³ÉÒ»ÕÅ»ò¶àÕŸ¨Öú±í£¬ÓÉÓÚÕâЩ±íÊÇOracle×Ô¶¯Éú³ÉµÄ£¬Óû§Ã»Óа취ֱ½ÓÉèÖÃÕâЩ±íºÍË÷ÒýµÄÎïÀí²ÎÊý£¬Òò´ËOracleÌṩÁËSTORAGEÊôÐÔ£¬×¨ÃÅÉèÖÃÕâЩ¸¨Öú±íºÍË÷ÒýµÄÎïÀí²ÎÊý¡£
SQL> SELECT * from TAB;
TNAME TABTYPE CLUSTERID
------------------------------ ------- ----------
DR$IND_T_DOCS$I TABLE
DR$IND_T_DOCS$K TABLE
DR$IND_T_DOCS$N TABLE
DR$IND_T_DOCS$P TABLE
DR$IND_T_DOCS$R TABLE
T TABLE
ÒÑÑ¡Ôñ6ÐС£
ÉÏÃæµÄÎåÕÅ±í¶¼ÊÇÈ«ÎÄË÷ÒýÉú³ÉµÄ¡£CONTEXTË÷ÒýÉú³É±íµÄ¹æÔòÊÇDR$+Ë÷ÒýÃû+$+±íÓÃ;±êʶ¡£
DR$IND_T_DOCS$I´æ´¢µÄÊÇË÷ÒýÊý¾Ý±í(Index data table)£»
DR$IND_T_DOCS$K´æ´¢µÄÊǼüÖµÓ³Éä±í(Keymap table)£»
DR$IND_T_DOCS$RÊÇROWID±í(Rowid table)£»
DR$IND_T_DOCS$NÊǸº¼üÖµÁ´±í(Negative list table)£»
DR$IND_T_DOCS$PÕâ¸ö±íÖ»ÓÐÔÚCONTEXTË÷ÒýÖÐÉèÖÃBASIC_WORDLISTµÄSUBSTRING_INDEXÊôÐÔºó² ......
OracleµÄÈ«ÎÄË÷Òý³ýÁËÖ§³ÖÎÄ×ÖÆ¥Åä²éѯ£¬»¹Ö§³Ö¶ÔÎÄÕºÒåµÄ²éѯ¡£ÕâÊÇͨ¹ýABOUT²Ù×÷ʵÏֵġ£
OracleĬÈÏÇé¿öÏÂÖ§³ÖÓ¢Îĺͷ¨ÎĵÄÎÄÕºÒåÖ§³Ö¡£ÆäËûÓïÑÔ¿ÉÒÔÌí¼Óͨ¹ýÓû§×Ô¶¨ÒåµÄÓïÑÔ¿âÀ´ÊµÏÖÏàÓ¦µÄ¹¦ÄÜ¡£
ÔÚ
Õâ֮ǰÐèÒªÏȰ²×°Companion CD µÄOracle Database 10g Products Installation
Type,ÆäÖаüÀ¨ÁËOracle Text Supplied Knowledge
Bases£¬·ñÔòÔÚ½¨Á¢º¬ÓÐaboutµÄË÷Òýʱ»á³ö´í¡£Companion CD¿ÉÖ±½ÓÔÚ¹ÙÍøÏÂÔØ£¬°²×°Ò²·Ç³£¼òµ¥¡£
ÏÂÃæ¿´¿´Ó¢ÎÄ»·¾³Ï¼òµ¥µÄABOUT²Ù×÷Ó÷¨£º
SQL> conn myuser/myuser
Connected.
SQL> CREATE TABLE T (ID NUMBER PRIMARY KEY, DOCS VARCHAR2(1000));
Table created.
SQL> INSERT INTO T VALUES (1, 'You can augment the knowledge base to define concepts and terms specific to your
industry or query application. When you do so, ABOUT queries are more precise for the added concepts.');
1 row created.
SQL> INSERT INTO T VALUES (2, 'ABOUT queries perform best when you create a theme component in your index. Theme
components are created by default for English and French.');
1 row ......
ÕâÆªÎÄÕ½éÉܵÄÊÇORACLE CTXCATË÷ÒýµÄINDEX SET£¬Ò²ÊÇCTXCATË÷ÒýÌØÓеÄÊôÐÔ¡£
CTXCAT
Ë÷ÒýÊÇCONTEXTË÷ÒýµÄ¼ò»¯°æ£¬CTXCATË÷ÒýÖ§³ÖµÄPREFERENCE°üÀ¨£ºLEXER¡¢STOPLIST¡¢WORDLISTºÍSTORAGE
²ÎÊý¡£²»Ö§³ÖÆäËûµÄ²ÎÊýÈ磺DATASTORE¡¢FILTER¡¢SECTION
GROUP¡£ËäȻ֧³ÖLEXERµ«²»Ö§³ÖTHEME²éѯ£¬¶øÇÒ²»Ö§³ÖFORMAT¡¢CHARSETºÍLANGUAGEÁУ¬ÁíÍâ²»Ö§³Ö±íºÍË÷Òý·ÖÇø¡£
CTXCAT
Ë÷Òý½ö½ö°üº¬ÁËCONTEXTË÷ÒýµÄ²¿·ÖÄÚÈÝ£¬µ«ÊÇCTXCATË÷ÒýÓÐÆä×ÔÉíµÄÓŵ㡣ÆäÖÐ×îÍ»³öµÄÓŵã¾ÍÊÇÖ§³ÖDMLͬ²½¡£CONTEXTË÷ÒýÓÉÓڽṹ¹ý
ÓÚ¸´ÔÓ£¬ÇÒË÷ÒýµÄÊý¾ÝÁ¿Ò»°ã½Ï´ó£¬Òò´ËCONTEXTË÷Òý²¢²»ÊÇ×Ô¶¯Í¬²½µÄ¡£¶øCTXCATË÷ÒýÊÇ×Ô¶¯Í¬²½µÄ£¬µ±·¢ÉúÁËDMLÐÞ¸Äʱ£¬Oracle»á×Ô¶¯
ͬ²½CTXCATË÷Òý£¬½µµÍÁËË÷ÒýµÄά»¤³É±¾¡£
CTXCAT Ë÷ÒýµÄÁíÍâÒ»¸öÓŵã¾ÍÊÇÕâÀïÒª½éÉܵÄINDEX
SETÊôÐÔ£¬ÕâÒ²ÊÇCTXCATË÷ÒýÌØÓеÄÊôÐÔ¡£¼òµ¥µÄ˵£¬CTXCAT¿ÉÒÔ½¨Á¢Ò»¸öË÷Òý¼¯¡£¿ÉÒÔ°ÑһЩ¾³£ÓëCTXCAT²éѯ×éºÏʹÓõIJéѯÁеÄË÷ÒýÌí
¼Óµ½Ë÷Òý¼¯ÖС£±ÈÈ磬Èç¹ûÔÚ²éѯÎÄÕÂÄÚÈݵÄͬʱ£¬¾³£ÐèÒª²éѯÎÄÕµÄ×÷Õß¡¢±êÌâ»ò´´½¨Ê±¼äµÈÐÅÏ¢£¬Ôò¿ÉÒÔ½«ÕâЩÐÅÏ¢ÁеÄË÷ÒýÌí¼Óµ½Ë÷Òý¼¯ÖУ¬Oracle
¿ÉÒÔ½«ÕâЩ²éѯ·â×°µ½CATSEARCH²Ù×÷ÖУ¬´Ó¶øÌá¸ßÈ«ÎÄË÷ÒýµÄЧ ......
È«ÎÄË÷ÒýÍ£ÓôʵÄÉèÖÃÔÚÇ°ÃæµÄÎÄÕÂÖÐÒѾ½éÉܹýÁË£¬ÕâÀï¼òµ¥ËµÃ÷¼Ç¼һÏÂÍ£ÓôÊÔÚ²éѯʱºòÐèҪעÒâµÄµØ·½¡£
Oracle10gÖУ¬Èç¹û°²×°ÓïÑÔΪÖÐÎÄ£¬Ä¬ÈϵÄLEXERΪCHINESE_VGRAM_LEXER£¬Ä¬ÈϵÄÍ£ÓôÊÓïÑÔҲΪÖÐÎÄ¡£ÕâÆªÍ¨¹ý¶Ô±ÈÖÐÎÄ»·¾³ºÍÓ¢ÎÄ»·¾³À´ËµÃ÷Í£ÓôʲéѯµÄÒ»Ð©ÌØµã¡£
SQL> show user
USER is "MYUSER"
SQL> CREATE TABLE T (ID NUMBER, DOCS VARCHAR2(1000));
Table created.
SQL> INSERT INTO T VALUES (1, 'This example test stopword.');
1 row created.
SQL> INSERT INTO T VALUES (2, 'Oracle9i chinese language environment default stopword are english.');
1 row created.
SQL> COMMIT;
Commit complete.
SQL> CREATE INDEX IND_T_DOCS ON T(DOCS) INDEXTYPE IS CTXSYS.CONTEXT;
Index created.
SQL> SELECT * from T WHERE CONTAINS(DOCS, 'This') > 0;
no rows selected
Ó¢ÎÄ»·¾³ÏÂÔÊÐíµ¥¶À²éѯͣÓôʣ¬µ«ÊDz»»á·µ»Ø½á¹û¡£
SQL> SELECT * from T WHERE CONTAINS(DOCS, 'are english') > 0;
ID DOCS
--- --------------------------------------------------
2 Oracle9i chinese language e ......