Cấu trúc thư mục như sau :
Source code : Download
File IndexingTest.java
package thaihoanghai; import java.io.IOException; import junit.framework.TestCase; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.RAMDirectory; import org.junit.Test; /** * In this example say basics on how to add,delete, and update documents. * * NOTE Users often confuse the maxDoc()and numDocs() methods in IndexWriter and IndexReader. * The first method, maxDoc()returns the total number of deleted or undeleted * documents in the index, whereas numDocs()returns only the number of undeleted documents. * */ public class IndexingTest extends TestCase{ // Data used to test for this program //Example we has 1 table include fields //|-------------------------------------------------------------| //| ProductID | Name | ExpiryDate | Description | //|-------------------------------------------------------------| //| ID001 | Iphone4 | 20/10/2012 | ..ChipA6...v.v | //| ID002 | Iphone5 | 09/09/2013 | ..ChipA7...v.v | //|-------------------------------------------------------------| protected String[] idProducts = {"id001", "id002", "id003"}; protected String[] names = {"Iphone4", "Iphone5", "Iphone6"}; protected String[] expiryDates = {"20/10/2012", "08/09/2013", "08/09/2013"}; protected String[] descriptions = {"1023x896 HD, Camera 8PM, 64GB, Chip A6, 134gram", "1024x887 HD, Camera 12PM, 32GB 64bit, Chip A7, 120gram", "1024x887 HD, Camera 12PM, 32GB 64bit, Chip A8, 120gram"}; private Directory directory; /** * The setUp method create new RAMDirectory to hold the index * It creates an indexWriter on this Directory and iterates over our item * to create Document and Fields and add the Document to IndexWriter => [indexing] * * Note : We could also have called commit(),which would commit the changes to the * directory but leave the writer open for further changes. */ protected void setUp() throws Exception { // Method run before every test directory = new RAMDirectory(); // Create IndexWriter IndexWriter writer = getWriter(); // Add Document for(int i = 0; i < idProducts.length; i++){ Document doc = new Document(); doc.add(new Field("ProductID",idProducts[i],Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("ProductName", names[i], Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("ExpiryDate", expiryDates[i],Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("Description", descriptions[i], Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.close(); } /** * Method used to create IndexWriter * @return * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ private IndexWriter getWriter() throws CorruptIndexException, LockObtainFailedException, IOException{ // Directory : Where the index is stored // Analyzer : Used when we indexing it will token fields // WhitespaceAnalyzer token by white space // Constant MaxFieldLength.UNLIMITED: // :This constant require index all tokens in the document. return new IndexWriter(directory, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); } /** * Create New Searcher [IndexSearcher] and execute a basic query [single-term query]. * @param fieldName : field name used to search [example : ProductID or ExpiryDate ...] * @param searchString : String compare * @return : Numbers of document matched. * @throws CorruptIndexException * @throws IOException */ protected int getHitCount(String fieldName, String searchString) throws CorruptIndexException, IOException{ IndexSearcher searcher = new IndexSearcher(directory); // Build simple single-term query Term t = new Term(fieldName, searchString); Query query = new TermQuery(t); // get number items search match condition int hitCount = TestUtil.hitCount(searcher, query); searcher.close(); return hitCount; } // @Test purpose to verify writer document count. // count number document contain in IndexWriter @Test public void testIndexWriter() throws IOException{ IndexWriter writer = getWriter(); assertEquals(idProducts.length, writer.numDocs()); writer.close(); } // @Test purpose to verify document count in IndexReader @Test public void testIndexReader() throws IOException{ IndexReader reader = IndexReader.open(directory); assertEquals(idProducts.length, reader.maxDoc()); assertEquals(idProducts.length, reader.numDocs()); reader.close(); } /** * Difference between two methods maxDoc() and numDocs() known the total number of deleted or undeleted * Because our index contains three documents, one of which is deleted, * numDocs()returns 2 and maxDocs()returns 3. * @throws IOException */ @Test public void testDeleteBeforeOptimize() throws IOException{ IndexWriter writer = getWriter(); // We test verify 3 documents in index. assertEquals(3, writer.numDocs()); // We execute delete first document. writer.deleteDocuments(new Term("ProductID", "id001")); //writer.deleteDocuments(new Term("ProductID", "id002")); writer.commit(); // Test verify to known index contains deletions. assertTrue(writer.hasDeletions()); // Check to known ...has two document ? // result expect : 3 assertEquals(3, writer.maxDoc()); // check known that one document deleted, remaining 2 docs assertEquals(2, writer.numDocs()); writer.close(); } /** * writer.optimize()=> force Lucene to merge index segments, after deleting one document * Lucene truly removes the deleted document, 2 document remains in the index * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ @Test public void testDeleteAfterOptimize() throws CorruptIndexException, LockObtainFailedException, IOException{ IndexWriter writer = getWriter(); assertEquals(3, writer.numDocs()); writer.deleteDocuments(new Term("ProductID", "id001")); writer.optimize();//Optimize to compact deletions writer.commit(); assertFalse(writer.hasDeletions()); // 0 document deleted assertEquals(2, writer.maxDoc()); assertEquals(2, writer.numDocs()); writer.close(); } /** * Update document basic we must deletes the entire document need update and then * add a new document to index. update at here like as we replace document. * @throws CorruptIndexException * @throws IOException */ @Test public void testUpdate() throws CorruptIndexException, IOException{ // Check Product exits or not to we update it assertEquals(1, getHitCount("ProductID", "id001")); IndexWriter writer = getWriter(); // Create new document with field values as : ID004, Lumina4, 12/09/2013, 3D Plastic Document doc = new Document(); doc.add(new Field("ProductID","id004",Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("ProductName", "Lumina4", Field.Store.YES,Field.Index.ANALYZED)); doc.add(new Field("ExpiryDate", "12/09/2013",Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("Description", "3D Plastic", Field.Store.YES, Field.Index.ANALYZED)); // execute update Product have id001 by new document // Replace with new version writer.updateDocument(new Term("ProductID","id001"), doc); writer.close(); // check ProductName=Iphone4 exits or not assertEquals(0, getHitCount("ProductName", "Iphone4")); // check update product success or not assertEquals(1, getHitCount("ProductName", "Lumina4")); } }
Reference : Ebook lucene in action 2th
Leave a comment