r977 - in trunk/wikitty-lucene/src: main/java/org/nuiton/wikitty/storage/lucene test/java/org/nuiton/wikitty/storage/lucene
Author: echatellier Date: 2011-06-27 11:51:18 +0200 (Mon, 27 Jun 2011) New Revision: 977 Url: http://nuiton.org/repositories/revision/wikitty/977 Log: Improve tree nodes indexation. Added: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/AttachmentInTree.java trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java trunk/wikitty-lucene/src/test/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLuceneTest.java Added: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/AttachmentInTree.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/AttachmentInTree.java (rev 0) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/AttachmentInTree.java 2011-06-27 09:51:18 UTC (rev 977) @@ -0,0 +1,218 @@ +/* + * #%L + * Wikitty :: wikitty-lucene + * + * $Id$ + * $HeadURL$ + * %% + * Copyright (C) 2009 - 2011 CodeLutin, Chatellier Eric + * %% + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/lgpl-3.0.html>. + * #L% + */ + +package org.nuiton.wikitty.storage.lucene; + +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.LUCENE_ID; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.collections.Factory; +import org.apache.commons.collections.map.LazyMap; +import org.apache.lucene.document.Document; +import org.nuiton.wikitty.entities.Wikitty; +import org.nuiton.wikitty.entities.WikittyTreeNode; +import org.nuiton.wikitty.entities.WikittyTreeNodeHelper; + +/** + * Class permettant de construire la liste des objets qui ont ete ajoute + * ou supprimer d'un noeud + * + * @author poussin + * @version $Revision$ + * @since 3.1 + * + * Last update: $Date$ + * by : $Author$ + */ +public class AttachmentInTree { + + // On genere en meme temps la liste des attachments qui doivent + // etre reindexe + protected Set<String> allAttachmentToIndex = new HashSet<String>(); + + protected Factory listFactory = new Factory() { + @Override + public Object create() { + return new HashSet<String>(); + } + }; + + // key: TreeNode id, value: list of attached id + protected Map<String, Set<String>> attachmentRemovedInTree = LazyMap.decorate( + new HashMap<String, Set<String>>(), listFactory); + // key: TreeNode id, value: list of attached id + protected Map<String, Set<String>> attachmentAddedInTree = LazyMap.decorate( + new HashMap<String, Set<String>>(), listFactory); + + /** + * Remove all ids in attachment list. Ids is object already deleted + * reindex it is not necessary + * + * @param ids + */ + public void clean(Collection<String> ids) { + if (ids != null) { + allAttachmentToIndex.removeAll(ids); + for (Set<String> set : attachmentRemovedInTree.values()) { + set.removeAll(ids); + } + for (Set<String> set : attachmentAddedInTree.values()) { + set.removeAll(ids); + } + } + } + + public int size() { + return allAttachmentToIndex.size(); + } + + public Set<String> getAll() { + return allAttachmentToIndex; + } + + public Map<String, Set<String>> getAdded() { + return attachmentAddedInTree; + } + + public Map<String, Set<String>> getRemoved() { + return attachmentRemovedInTree; + } + + /** + * @param id TreeNode id + * @param ids attachment id + */ + public void remove(String id, Collection<String> attId) { + if (attId != null && !attId.isEmpty()) { + attachmentRemovedInTree.get(id).addAll(attId); + allAttachmentToIndex.addAll(attId); + } + } + + /** + * @param doc TreeNode document representation + */ + public void remove(Document doc) { + //String id = SolrUtil.getStringFieldValue(doc, WikittySolrConstant.SOLR_ID); + String id = doc.get(LUCENE_ID); + + //Collection<String> att = SolrUtil.getStringFieldValues(doc, + // WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT, + // TYPE.WIKITTY); + + String[] values = doc.getValues(WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT); + List<String> att = Arrays.asList(values); + remove(id, att); + } + + /** + * Ajout les attachment du TreeNode passe en parametre via son Id + * + * @param id TreeNode id + * @param attId attachment id + */ + public void add(String id, Collection<String> attId) { + if (attId != null && !attId.isEmpty()) { + attachmentAddedInTree.get(id).addAll(attId); + allAttachmentToIndex.addAll(attId); + } + } + + /** + * Ajout l'attachment du TreeNode passe en parametre via son Id + * + * @param id TreeNode id + * @param attId attachment id + * @since 3.0.5 + */ + public void add(String id, String attId) { + if (attId != null) { + attachmentAddedInTree.get(id).add(attId); + allAttachmentToIndex.add(attId); + } + } + + /** + * Ajout les attachment du TreeNode passe en parametre sous forme de doc Solr + * @param doc TreeNode document representation + */ + public void add(Document doc) { + //String id = SolrUtil.getStringFieldValue(doc, WikittySolrConstant.SOLR_ID); + String id = doc.get(LUCENE_ID); + + //Collection<String> att = SolrUtil.getStringFieldValues(doc, + // WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT, + // TYPE.WIKITTY); + + String[] values = doc.getValues(WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT); + List<String> att = Arrays.asList(values); + add(id, att); + } + + /** + * Ajout les attachment du TreeNode passe en parametre sous forme de doc Solr + * Mais seulement si l'attachment est aussi dans la liste restriction + * + * @param doc TreeNode document representation + * @param restriction la liste accepte de wikitty a ajouter + * @since 3.0.5 + */ + public void add(Document doc, Set<String> restriction) { + //String id = SolrUtil.getStringFieldValue(doc, WikittySolrConstant.SOLR_ID); + String id = doc.get(LUCENE_ID); + + //Collection<String> att = SolrUtil.getStringFieldValues(doc, + // WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT, + // TYPE.WIKITTY); + + String[] values = doc.getValues(WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT); + List<String> att = Arrays.asList(values); + if (att != null) { + for (String attId : att) { + if (restriction.contains(attId)) { + add(id, attId); + } + } + } + } + + /** + * Ajout les attachment du TreeNode passe en parametre sous forme de Wikitty + * @param doc TreeNode document representation + */ + public void add(Wikitty w) { + String id = w.getId(); + Set<String> att = WikittyTreeNodeHelper.getAttachment(w); + add(id, att); + } + +} Property changes on: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/AttachmentInTree.java ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision HeadURL Added: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java (rev 0) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java 2011-06-27 09:51:18 UTC (rev 977) @@ -0,0 +1,251 @@ +/* + * #%L + * + * + * $Id$ + * $HeadURL$ + * %% + * Copyright (C) 2011 Codelutin, Chatellier Eric + * %% + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Lesser Public License for more details. + * + * You should have received a copy of the GNU General Lesser Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/lgpl-3.0.html>. + * #L% + */ + +package org.nuiton.wikitty.storage.lucene; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.nuiton.wikitty.WikittyException; +import org.nuiton.wikitty.search.operators.Element; + +/** + * TODO add comment here. + * + * @author chatellier + * @version $Revision$ + * + * Last update : $Date$ + * By : $Author$ + * @since 3.2 + */ +public class LuceneUtil { + + /** to use log facility, just put in your code: log.info(\"...\"); */ + final static private Log log = LogFactory.getLog(LuceneUtil.class); + + /** + * Get field name used in lucene storage for a field name in query. + * + * @param fieldName field name to convert + * @return field name in lucene + */ + public static String getLuceneFieldName(String fieldName) { + String result; + if (Element.ELT_ID.equals(fieldName)) { + result = WikittyLuceneConstants.LUCENE_ID; + } else if (Element.ELT_EXTENSION.equals(fieldName)) { + result = WikittyLuceneConstants.LUCENE_EXTENSIONS; + } else { + result = fieldName; + } + return result; + } + + /** + * get value of field in Document, field must have only one value + * @param d + * @param fieldname + * @param type optional type to generate lucene field name + * @return + */ + static public Integer getIntFieldValue(Document d, String fieldname) { + String luceneFieldName = getLuceneFieldName(fieldname); + + Object value = d.getFieldable(luceneFieldName).stringValue(); + Integer result = convertToInteger(value, luceneFieldName); + return result; + } + + /** + * Converti un Object en String, si l'objet est de type String un simple + * cast est fait, si l'objet est un tableau, on prend le 1er element, si + * le tableau contient plus de 1 element une exception est levee + * + * @param value + * @param luceneFieldName + * @return une string ou null si value est null ou est un tableau vide + */ + static public String convertToString(Object value, String luceneFieldName) { + String result; + if (value == null) { + result = null; + } else if (value instanceof String) { + // c'est un champs monovalue + result = (String)value; + } else if (value instanceof String[]) { + // c'est un champs multivalue + String[] values = (String[])value; + if (values.length == 0) { + result = null; + } else if (values.length == 1) { + result = values[0]; + } else { + throw new WikittyException(String.format( + "You can't get one value from field (%s) with many (%s) value", + luceneFieldName, values.length)); + } + } else if (value instanceof Collection) { + Collection c = (Collection)value; + if (c.isEmpty()) { + result = null; + } else if (c.size() == 1){ + Object o = c.iterator().next(); + result = convertToString(o, luceneFieldName); + } else { + throw new WikittyException(String.format( + "You can't get one value from field (%s) with many (%s) value", + luceneFieldName, c.size())); + } + } else { + throw new WikittyException(String.format( + "Field (%s) is not an String but %s", + luceneFieldName, value.getClass().getName())); + } + return result; + } + + /** + * Converti un Object en String, si l'objet est de type String un simple + * cast est fait, si l'objet est un tableau, on prend le 1er element, si + * le tableau contient plus de 1 element une exception est levee + * + * @param value + * @param luceneFieldName + * @return une string ou null si value est null ou est un tableau vide + */ + static public Integer convertToInteger(Object value, String luceneFieldName) { + Integer result; + if (value == null) { + result = null; + } else if (value instanceof Integer) { + // c'est un champs monovalue + result = (Integer)value; + } else if (value instanceof Integer[]){ + // c'est un champs multivalue + Integer[] values = (Integer[])value; + if (values.length == 0) { + result = null; + } else if (values.length == 1) { + result = values[0]; + } else { + throw new WikittyException(String.format( + "You can't get one value from field (%s) with many (%s) value", + luceneFieldName, values.length)); + } + } else { + throw new WikittyException(String.format( + "Field (%s) is not an Integer but %s", + luceneFieldName, value.getClass().getName())); + } + return result; + } + + /** + * Copy lucene document + * + * @param source lucene document source + * @param dest lucene document destination + * @param fieldToInclude only copy this fields, if null or empty, copy all field + * @param fieldToExclude to not copy these fields + */ + static public void copyLuceneDocument(Document source, Document dest, + String[] fieldToInclude, String[] fieldToExclude) { + Collection<Fieldable> fields = source.getFields(); + Collection<String> fieldNames = new ArrayList<String>(); + for (Fieldable field : fields) { + fieldNames.add(field.name()); + } + + Set<String> fieldToCopy = new HashSet<String>(); + if (fieldToInclude == null || fieldToInclude.length == 0) { + fieldToCopy.addAll(fieldNames); + } else { + for (String fieldName : fieldNames) { + for (String fieldRegexp : fieldToInclude) { + if (fieldName.matches(fieldRegexp)) { + fieldToCopy.add(fieldName); + break; + } + } + } + } + + if (fieldToExclude != null && fieldToExclude.length > 0) { + for (String fieldName : fieldNames) { + for (String fieldRegexp : fieldToExclude) { + if (fieldName.matches(fieldRegexp)) { + fieldToCopy.remove(fieldName); + break; + } + } + } + } + + if (log.isDebugEnabled()) { + log.debug(String.format( + "Copiable field are %s but only field %s are copied", + fieldNames, fieldToCopy)); + } + + for (String fieldName : fieldToCopy) { + dest.removeField(fieldName); // to prevent add in already exist dest field + Fieldable[] scrFields = source.getFieldables(fieldName); + for (Fieldable scrField : scrFields) { + dest.add(scrField); + } + } + } + + /** + * Copy lucene document + * + * @param source lucene document source + * @param dest lucene document destination + * @param fieldToInclude only copy this field, if null or empty, copy all field + * @since 3.2 + */ + static public void copyLuceneDocument(Document source, Document dest, String... fieldToInclude) { + copyLuceneDocument(source, dest, fieldToInclude, null); + } + + /** + * Copy lucene document exlude some fields + * + * @param source lucene document source + * @param dest lucene document destination + * @param fieldToExclude not copy these fields + * @since 3.2 + */ + static public void copyLuceneDocumentExcludeSomeField(Document source, Document dest, String... fieldToExclude) { + copyLuceneDocument(source, dest, null, fieldToExclude); + } +} Property changes on: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/LuceneUtil.java ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision HeadURL Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java 2011-06-24 16:06:02 UTC (rev 976) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/Restriction2Lucene.java 2011-06-27 09:51:18 UTC (rev 977) @@ -34,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import org.nuiton.wikitty.WikittyException; import org.nuiton.wikitty.search.operators.And; import org.nuiton.wikitty.search.operators.AssociatedRestriction; @@ -80,24 +81,6 @@ } /** - * Get field name used in lucene storage for a field name in query. - * - * @param fieldName field name to convert - * @return field name in lucene - */ - protected String getLuceneFieldName(String fieldName) { - String result; - if (Element.ELT_ID.equals(fieldName)) { - result = WikittyLuceneConstants.LUCENE_ID; - } else if (Element.ELT_EXTENSION.equals(fieldName)) { - result = WikittyLuceneConstants.LUCENE_EXTENSIONS; - } else { - result = fieldName; - } - return result; - } - - /** * Convert a wikitty restriction to lucene search query. * * @param restriction restriction to convert @@ -126,13 +109,13 @@ query = or2Lucene((Or)restriction); break; case EQUALS: - query = neq2Lucene((Equals)restriction); + query = eq2Lucene((Equals)restriction); break; case NOT_EQUALS: - query = less2Lucene((NotEquals)restriction); + query = neq2Lucene((NotEquals)restriction); break; case LESS: - query = lessEq2Lucene((Less)restriction); + query = less2Lucene((Less)restriction); break; case LESS_OR_EQUAL: query = lessEq2Lucene((LessOrEqual)restriction); @@ -189,7 +172,7 @@ protected String element2Lucene(Element element) { String result = element.getName(); //result = fieldModifer.convertToSolr(transaction, result); - result = getLuceneFieldName(result); + result = LuceneUtil.getLuceneFieldName(result); return result; } @@ -281,7 +264,12 @@ * @return */ protected Query between2Lucene(Between restriction) { - throw new NotImplementedException("Not yet implemented"); + String fieldName = element2Lucene(restriction.getElement()); + String min = restriction.getMin(); + String max = restriction.getMax(); + + TermRangeQuery query = new TermRangeQuery(fieldName, min, max, true, true); + return query; } /** @@ -312,27 +300,35 @@ * @param restriction * @return */ - protected Query lessEq2Lucene(Less restriction) { + protected Query less2Lucene(Less restriction) { throw new NotImplementedException("Not yet implemented"); } /** * @param restriction * @return + * @throws ParseException */ - protected Query less2Lucene(NotEquals restriction) { - throw new NotImplementedException("Not yet implemented"); + protected Query eq2Lucene(Equals restriction) throws ParseException { + String field = element2Lucene(restriction.getElement()); + Term term = new Term(field, restriction.getValue()); + TermQuery query = new TermQuery(term); + + return query; } /** * @param restriction * @return + * @throws ParseException */ - protected Query neq2Lucene(Equals restriction) { + protected Query neq2Lucene(NotEquals restriction) throws ParseException { String field = element2Lucene(restriction.getElement()); Term term = new Term(field, restriction.getValue()); TermQuery query = new TermQuery(term); - return query; + BooleanQuery bquery = new BooleanQuery(); + bquery.add(query, BooleanClause.Occur.MUST_NOT); + return bquery; } /** Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java 2011-06-24 16:06:02 UTC (rev 976) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittyLuceneConstants.java 2011-06-27 09:51:18 UTC (rev 977) @@ -50,4 +50,22 @@ /** extensions field name in lucene */ static final public String LUCENE_DEFAULT_FIELD = WIKITTY_LUCENE_PREFIX + "fulltext"; + + /** Use for indexation tree node */ + static final public String TREENODE_PREFIX = WIKITTY_LUCENE_PREFIX + "tree."; + + /** Use as field on TreeNode */ + static final public String TREENODE_ROOT = TREENODE_PREFIX + "root"; + + /** Use as field on TreeNode, contains parent node id and himself node id */ + static final public String TREENODE_PARENTS = TREENODE_PREFIX + "parents"; + + /** Use as field on TreeNode, number of parents (root node depth=1) */ + static final public String TREENODE_DEPTH = TREENODE_PREFIX + "depth"; + + /** Use as field on Wikitty object attached on TreeNode, TreeNodeId is added at end */ + static final public String TREENODE_ATTACHED = TREENODE_PREFIX + "attached."; + + /** Use as field on Wikitty object attached on TreeNode, TreeNodeId is added at end used for facetisation */ + static final public String TREENODE_ATTACHED_ALL = TREENODE_PREFIX + "attached-all"; } Modified: trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java =================================================================== --- trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java 2011-06-24 16:06:02 UTC (rev 976) +++ trunk/wikitty-lucene/src/main/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLucene.java 2011-06-27 09:51:18 UTC (rev 977) @@ -25,12 +25,26 @@ package org.nuiton.wikitty.storage.lucene; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.LUCENE_ID; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_ATTACHED; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_ATTACHED_ALL; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_DEPTH; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_PARENTS; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_PREFIX; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.TREENODE_ROOT; +import static org.nuiton.wikitty.storage.lucene.WikittyLuceneConstants.WIKITTY_LUCENE_PREFIX; + import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; @@ -45,23 +59,33 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.util.Version; import org.nuiton.util.ApplicationConfig; +import org.nuiton.util.TimeLog; import org.nuiton.wikitty.WikittyConfigOption; import org.nuiton.wikitty.WikittyException; import org.nuiton.wikitty.WikittyUtil; +import org.nuiton.wikitty.entities.FieldType; +import org.nuiton.wikitty.entities.FieldType.TYPE; import org.nuiton.wikitty.entities.Wikitty; +import org.nuiton.wikitty.entities.WikittyTreeNode; +import org.nuiton.wikitty.entities.WikittyTreeNodeHelper; import org.nuiton.wikitty.search.Criteria; +import org.nuiton.wikitty.search.FacetTopic; import org.nuiton.wikitty.search.PagedResult; +import org.nuiton.wikitty.search.Search; import org.nuiton.wikitty.search.TreeNodeResult; import org.nuiton.wikitty.search.operators.Restriction; import org.nuiton.wikitty.services.WikittyTransaction; @@ -82,6 +106,7 @@ /** Class logger. */ final static private Log log = LogFactory.getLog(WikittySearchEngineLucene.class); + final static private TimeLog timeLog = new TimeLog(WikittySearchEngineLucene.class); /** Index directory. */ protected Directory indexDirectory; @@ -146,7 +171,7 @@ try { Class analyzerClass = config.getOptionAsClass(luceneAnalyzerFactoryKey); - + if (analyzerClass != null) { // get correct constructor Constructor constructor = analyzerClass.getConstructor(Version.class); @@ -200,7 +225,7 @@ /* * @see org.nuiton.wikitty.storage.WikittySearchEngine#store(org.nuiton.wikitty.services.WikittyTransaction, java.util.Collection, boolean) - */ + * @Override public void store(WikittyTransaction transaction, Collection<Wikitty> wikitties, boolean force) throws WikittyException { @@ -230,27 +255,478 @@ } finally { IOUtils.closeQuietly(writer); } + }*/ + + /* + * @see org.nuiton.wikitty.storage.WikittySearchEngine#store(org.nuiton.wikitty.services.WikittyTransaction, java.util.Collection, boolean) + */ + @Override + public void store(WikittyTransaction transaction, + Collection<Wikitty> wikitties, boolean force) { + long startTime = TimeLog.getTime(); + + IndexWriter writer = null; + IndexSearcher searcher = null; + try { + // must be instancied every time (single instance per writer) + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(WIKITTY_LUCENE_VERSION, indexAnalyzer); + writer = new IndexWriter(indexDirectory, indexWriterConfig); + searcher = new IndexSearcher(indexDirectory); + + // tous les wikitties passes en parametre + Map<String, Wikitty> allWikitties = new HashMap<String, Wikitty>(); + // les ids des wikitties en parametre reellement modifier (a reindexer) + Set<String> dirtyObject = new HashSet<String>(); + // les ids des TreeNodes dont le champs parent a change (est aussi + // contenu dans dirtyObject + Set<String> dirtyParent = new HashSet<String>(); + // les valeur du champs parent des TreeNodes dont le champs parent + // a change (sauf si parent = null) + Set<String> dirtyParentParentId = new HashSet<String>(); + + // doc that will be stored at end of process + Map<String, Document> addedDocs = new HashMap<String, Document>(); + + // remplissage des collections + for(Wikitty w : wikitties) { + allWikitties.put(w.getId(), w); + if (force || !w.getDirty().isEmpty() || + WikittyUtil.versionGreaterThan("1", w.getVersion())) { + // s'il y a au moins un champs a reindexer ou que l'objet + // n'a jamais ete sauve (1 > version) + dirtyObject.add(w.getId()); + if (WikittyTreeNodeHelper.hasExtension(w) && (force + ||w.getDirty().contains(WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_PARENT) + || null == WikittyTreeNodeHelper.getParent(w))) { + // si le pere a change + // ou qu'il est null (creation d'un nouvel arbre) + // il faut indexer le noeud + dirtyParent.add(w.getId()); + String parent = WikittyTreeNodeHelper.getParent(w); + if (parent != null) { + dirtyParentParentId.add(parent); + } + } + } + } + + // recuperation des documents Solr deja indexes, pour minimiser la reindexation + Map<String, Document> dirtyObjectDoc = findAllById(searcher, dirtyObject); + Map<String, Document> dirtyParentDoc = findAllByField(searcher, TREENODE_PARENTS, dirtyParent); + Map<String, Document> parents = findAllById(searcher, dirtyParentParentId); + + // On genere en meme temps la liste des attachments qui doivent + // etre reindexe + AttachmentInTree attachmentInTree = new AttachmentInTree(); + + // + // Phase 1: on indexe les objets passe en paremetre, on copie si + // besoin #tree.attached des wikitties et #tree.* des + // TreeNode dont leur champs parent n'a pas ete modifie, et + // dans ce cas on collecte les modif d'attachments des TreeNode + // + + for (String id : dirtyObject) { + Wikitty w = allWikitties.get(id); + Document oldDoc = dirtyObjectDoc.get(id); + Document doc = convertWikittyToDoc(w); + if (oldDoc != null) { + // On a un ancien document partiel ou complet + // s'il etait partiel (seulement l'indexation arbre + // cela veut dire que l'objet TreeNode a ete store + // sans que l'attachment le soit et qu'il l'est que + // maintenant + + // copy des champs #tree.attached des documents partiels ou non + LuceneUtil.copyLuceneDocument(oldDoc, doc, TREENODE_ATTACHED + ".*"); + if (WikittyTreeNodeHelper.hasExtension(w) + && !dirtyParentDoc.containsKey(id)) { + // si c'est un TreeNode, mais qu'aucun pere n'a change + // on recopie l'ancienne indexation d'arbre + // si elle existe + LuceneUtil.copyLuceneDocument(oldDoc, doc, TREENODE_PREFIX + ".*"); + + // il faut verifier les objets attaches + // attaches ajoute/supprime + // on ne traite ici que les TreeNode sans modif d'indexation + // pour les autres les attachments seront traites dans + // la phase suivante + Set<String> newAtt = WikittyTreeNodeHelper.getAttachment(w); + + //Collection<String> oldAtt = SolrUtil.getStringFieldValues( + // oldDoc, + // WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT, + // TYPE.WIKITTY); + String[] values = oldDoc.getValues(WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_ATTACHMENT); + Collection<String> oldAtt = Arrays.asList(values); + + // il faut supprimer l'indexation arbre des noeuds + // qui sont dans old, mais pas dans new + Set<String> toRemove = new HashSet<String>(); + if (oldAtt != null) { + toRemove.addAll(oldAtt); + } + if (newAtt != null) { + toRemove.removeAll(newAtt); + } + attachmentInTree.remove(id, toRemove); + // il faut ajouter l'indexation arbre des noeuds + // qui sont dans new, mais pas dans old + Set<String> toAdd = new HashSet<String>(); + if (newAtt != null) { + toAdd.addAll(newAtt); + } + if (oldAtt != null) { + toAdd.removeAll(oldAtt); + } + attachmentInTree.add(id, toAdd); + } + } + addedDocs.put(doc.get(LUCENE_ID), doc); + } + + // + // Phase 2: on reindexe tous les TreeNode qui en ont besoin + // nouveau TreeNode ou TreeNode ayant un parent modifie + // + + // on ajoute tous les TreeNode qui doivent aussi etre reindexe + // noeud du sous arbre d'un noeud dont le pere a ete modifie + dirtyParent.addAll(dirtyParentDoc.keySet()); + + for (String id : dirtyParent) { + // w et oldDoc peuvent etre null, mais pas en meme temps + // w est null si c'est un noeud dont la reindexation est force + // parce que un de ces peres a change de parent + // oldDoc est null, si l'objet n'a jamais ete indexe (nouveau) + Wikitty w = allWikitties.get(id); + Document oldDoc = dirtyParentDoc.get(id); + Document doc = addedDocs.get(id); + if (w == null) { + // on reindexe un ancien objet + // normalement doc doit etre null + doc = new Document(); + // on recopie tous les champs, sauf l'indexation arbre + LuceneUtil.copyLuceneDocumentExcludeSomeField( + oldDoc, doc, TREENODE_PREFIX + ".*"); + + // modifie les champs root, parents + addTreeIndexField(addedDocs, doc, parents); + + attachmentInTree.remove(oldDoc); + attachmentInTree.add(oldDoc); + } else if (oldDoc == null) { + // ajoute les champs root, parents + addTreeIndexField(addedDocs, doc, parents); + + // on indexe un nouvel objet, il faut ajouter tous les + // attachment pour indexation + attachmentInTree.add(w); + } else { + // ni w, ni oldDoc ne sont pas null, c'est une modification + // dans la phase precendente on a deja indexe les champs + // normaux + + // ajoute les champs root, parents + addTreeIndexField(addedDocs, doc, parents); + + // il faut supprimer tous les anciens attaches + // et ajouter tous nouveaux pour la reindexation + attachmentInTree.remove(oldDoc); + attachmentInTree.add(w); + } + //solrResource.addDoc(id, doc); + addedDocs.put(doc.get(LUCENE_ID), doc); + } + + // + // Phase 3: on reindexe les attachments qui en ont besoin + // + + addTreeIndexField(writer, searcher, addedDocs, null, attachmentInTree); + + // on ne peut faire le add reel qu'à ce moment + // car les meme doc sont ajouter plusieurs fois dans la map + for (Document doc : addedDocs.values()) { + writer.addDocument(doc); + } + writer.commit(); + } catch (Exception eee) { + throw new WikittyException("Can't store wikitty", eee); + } finally { + IOUtils.closeQuietly(writer); + IOUtils.closeQuietly(searcher); + } + timeLog.log(startTime, "store", String.format( + "nb %s in force mode %s", wikitties.size(), force)); } + + /** + * Modifie/Ajoute les champs specifique a l'indexation des arbres sur les + * TreeNode. + * + * On se base sur le fait que si un TreeNode est dans {@link SolrResource} il ne + * peut etre que dans deux etats. Soit il a ete reindexe pour les arbres + * et il a les champs d'indexation arbre. Soit il a pas encore ete reindexe + * pour les arbres et dans ce cas il ne doit pas avoir les champs d'indexation + * d'arbre. (il est donc interdit d'avoir des champs d'indexation arbre + * obsolete si le document est dans {@link SolrResource}) + * + * @param solrResource solR resource + * @param doc les documents representant le TreeNode + * @param tree tous les autres noeuds d'arbre dont on pourrait avoir + * besoin pour l'indexation + */ + protected void addTreeIndexField(Map<String, Document> addedDocs, Document doc, Map<String, Document> tree) { + Set<String> parents = new HashSet<String>(); + String root = null; + String treeNodeId = doc.get(LUCENE_ID); + String parentId = treeNodeId; + if (parentId == null) { + throw new WikittyException("parentId is null, but this must be impossible"); + } + parents.add(parentId); + while (root == null) { + String nextParentId = null; + Document parentDoc = addedDocs.get(parentId); + if (parentDoc != null) { + // si parentDoc a deja ete indexe pour l'arbre, on peut reutiliser + // directement les valeurs et sortir de la boucle + if (parentDoc.get(TREENODE_ROOT) != null) { + root = parentDoc.get(TREENODE_ROOT); + //Collection<String> p = SolrUtil.getStringFieldValues(parentDoc, TREENODE_PARENTS); + String[] values = parentDoc.getValues(TREENODE_PARENTS); + Collection<String> p = Arrays.asList(values); + parents.addAll(p); + break; + } else { + //nextParentId = SolrUtil.getStringFieldValue(parentDoc, + // WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_PARENT, + // TYPE.WIKITTY); + nextParentId = parentDoc.get( + WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_PARENT); + } + } else { + Document oldParentDoc = tree.get(parentId); + if (oldParentDoc != null) { + // si parentDoc a deja ete indexe pour l'arbre, on peut reutiliser + // directement les valeurs et sortir de la boucle + if (oldParentDoc.get(TREENODE_ROOT) != null) { + //root = SolrUtil.getStringFieldValue(oldParentDoc,TREENODE_ROOT); + root = oldParentDoc.get(TREENODE_ROOT); + //Collection<String> p = SolrUtil.getStringFieldValues(oldParentDoc, TREENODE_PARENTS); + String[] values = oldParentDoc.getValues(TREENODE_PARENTS); + Collection<String> p = Arrays.asList(values); + parents.addAll(p); + break; + } else { + //nextParentId = SolrUtil.getStringFieldValue(oldParentDoc, + // WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_PARENT, + // TYPE.WIKITTY); + nextParentId = oldParentDoc.get(WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_PARENT); + } + } + } + if (nextParentId != null) { + if (parents.contains(nextParentId)) { + log.error(String.format("Tree with TreeNode '%s' have loop" + + " at node %s->%s all parents are %s. Set root with" + + " last valide parent '%s'", + treeNodeId, parentId, nextParentId, parents, parentId)); + root = parentId; + } else { + parents.add(nextParentId); + parentId = nextParentId; + } + } else { + root = parentId; + } + } + doc.removeField(TREENODE_ROOT); + doc.removeField(TREENODE_PARENTS); + doc.removeField(TREENODE_DEPTH); + + /*doc.addField(TREENODE_ROOT, root); + doc.addField(TREENODE_DEPTH, parents.size()); + for (String id : parents) { + doc.addField(TREENODE_PARENTS, id); + }*/ + doc.add(new Field(TREENODE_ROOT, root, Store.YES, Index.NOT_ANALYZED)); + doc.add(new Field(TREENODE_DEPTH, String.valueOf(parents.size()), Store.YES, Index.NOT_ANALYZED)); + for (String id : parents) { + doc.add(new Field(TREENODE_PARENTS, id, Store.YES, Index.NOT_ANALYZED)); + } + } + /** + * Update attached extra field on all objects passed in argument + * allAttachmentToIndex + * + * @param solrResource must contains reindexed TreeNode, that contains attachment + * @param tree solr document for some TreeNode (used when TreeNode not find in solrResource) + * @param attachmentInTree attachment added and removed from TreeNode + * @throws IOException + */ + protected void addTreeIndexField(IndexWriter indexWriter, IndexSearcher indexSearcher, Map<String, Document> addedDocs, + Map<String, Document> tree, AttachmentInTree attachmentInTree) throws IOException { + + if (attachmentInTree.size() > 0) { + Map<String, Document> attachments = findAllById(indexSearcher, attachmentInTree.getAll()); + + for (String treeNodeId : attachmentInTree.getRemoved().keySet()) { + for (String attId : attachmentInTree.getRemoved().get(treeNodeId)) { + Document oldDoc = attachments.get(attId); + Document doc = addedDocs.get(attId); + if (oldDoc != null || doc != null) { + if (doc == null) { + doc = new Document(); + LuceneUtil.copyLuceneDocument(oldDoc, doc); + addedDocs.put(attId, doc); + } + doc.removeField(TREENODE_ATTACHED + treeNodeId); + } + } + } + for (String treeNodeId : attachmentInTree.getAdded().keySet()) { + Collection<String> treeNodeParents = null; + Document treeNodeDoc = addedDocs.get(treeNodeId); + if (treeNodeDoc != null) { + //treeNodeParents = SolrUtil.getStringFieldValues( + // treeNodeDoc, TREENODE_PARENTS); + String[] treeNodeParent = treeNodeDoc.getValues(TREENODE_PARENTS); + treeNodeParents = Arrays.asList(treeNodeParent); + } else if (tree != null) { + Document doc = tree.get(treeNodeId); + //treeNodeParents = SolrUtil.getStringFieldValues( + // doc, TREENODE_PARENTS); + String[] treeNodeParent = doc.getValues(TREENODE_PARENTS); + treeNodeParents = Arrays.asList(treeNodeParent); + } else { + log.error("SolR doc not found in Transaction or in tree." + + "This is a bug !!!"); + } + // add tree indexation on all attachments for this treeNodeId + for (String attId : attachmentInTree.getAdded().get(treeNodeId)) { + Document oldDoc = attachments.get(attId); + Document doc = addedDocs.get(attId); + // il faut que oldDoc ou doc soit different de null pour + // pouvoir ajouter l'indexation d'arbre. Le cas on les deux + // sont nuls arrivent lorsqu'on demande la sauvegarde d'un + // TreeNode alors qu'on a pas encore sauve les attachments + // (ex: durant un syncSearchEngine ou l'on demande une + // reindexation totale). Ceci n'est pas un probleme car + // les attachments seront convenablement indexes lorsqu'ils + // seront ajoutes + if (oldDoc == null && doc == null) { + // L'objet en attachment du TreeNode, n'est pas encore + // store et n'est pas dans le meme appel de la methode + // store. On cree donc un document Partiel (seulement + // constitue de l'id et de l'indexation d'arbre qui sera + // reutilise lors du store de reel objet + doc = new Document(); + addToIndexDocument(doc, null, LUCENE_ID, attId, true); + } else if (doc == null) { + doc = new Document(); + LuceneUtil.copyLuceneDocument(oldDoc, doc); + //solrResource.addDoc(attId, doc); + addedDocs.put(attId, doc); + } + doc.removeField(TREENODE_ATTACHED + treeNodeId); + for (String id : treeNodeParents) { + //doc.addField(TREENODE_ATTACHED + treeNodeId, id); + doc.add(new Field(TREENODE_ATTACHED + treeNodeId, id, Store.YES, Index.NOT_ANALYZED)); + } + } + } + } + } + + /** + * Ajoute un champs dans un document a indexer + */ + protected void addToIndexDocument(Document doc, + TYPE type, String fqfieldName, String fieldValue, + boolean collection) { + if (fqfieldName.startsWith(WIKITTY_LUCENE_PREFIX)) { + doc.removeField(fqfieldName); + //doc.addField(fqfieldName, fieldValue); + doc.add(new Field(fqfieldName, fieldValue, Store.YES, Index.NOT_ANALYZED)); + } else { + String solrFqFieldName; + /* +// FIXME REMOVE IT if search on multivalued work with new hack (specific sortable field +// if (collection) { +// solrFqFieldName = SolrUtil.getSolrCollectionFieldName(fqfieldName, type); +// } else { + // add suffix like _s for string type ex: myExt.myField_s + solrFqFieldName = SolrUtil.getSolrFieldName(fqfieldName, type); +// } + + // #all.<fieldname> + // permet de faire des recherches inter extension sur un champs ayant + // le meme nom. ex:Person.name et User.name + // Quoi qu'il arrive pour le #all on utilise du multivalue +// FIXME REMOVE IT if search on multivalued work with new hack (specific sortable field +// String solrAllFqFieldName = SolrUtil.getSolrCollectionFieldName(fqfieldName, type); + String solrAllFieldName = SOLR_ALL_EXTENSIONS + + WikittyUtil.FQ_FIELD_NAME_SEPARATOR + + WikittyUtil.getFieldNameFromFQFieldName(solrFqFieldName); + + // idem mais un champs sur plusieurs extension peut avoir des types + // different, on ajoute donc un champs pour la recherche fulltext + String solrFulltextAllFieldName = SOLR_FULLTEXT_ALL_EXTENSIONS + + WikittyUtil.FQ_FIELD_NAME_SEPARATOR + + WikittyUtil.getFieldNameFromFQFieldName(fqfieldName); + + String solrNullFieldFqFieldName = SOLR_NULL_FIELD + fqfieldName; + + // sortable solr field name for this field ex: myExt.myField_s_sortable + String solrFqFieldNameSortable = solrFqFieldName + SUFFIX_SORTABLE; + + doc.remove(solrFqFieldName); // myExt.myField_s + doc.remove(solrNullFieldFqFieldName); // #null_field-myExt.myField + doc.remove(solrAllFieldName); // #all.myField_s + doc.remove(solrFulltextAllFieldName); // #fulltext.all.myField + doc.remove(solrFqFieldNameSortable); // myExt.myField_s_sortable + + String solrNullFieldFqFieldNameValue = "true"; + if(fieldValue != null) { + doc.addField(solrFqFieldName, fieldValue); + doc.addField(solrAllFieldName, fieldValue); + doc.addField(solrFulltextAllFieldName, fieldValue); + Object oneFieldValue = SolrUtil.getOneValue(fieldValue); + doc.addField(solrFqFieldNameSortable, oneFieldValue); + solrNullFieldFqFieldNameValue = "false"; + if (log.isTraceEnabled()) { + log.trace(String.format("index field '%s' with value '%s'", + solrFqFieldName, + StringUtils.abbreviate(String.valueOf(fieldValue), 50))); + } + } + doc.addField(solrNullFieldFqFieldName, solrNullFieldFqFieldNameValue);*/ + } + } + + /** * Convert a wikitty object to a lucene {@link Document}. * * @param w wikitty to convert * @return lucene document */ protected Document convertWikittyToDoc(Wikitty w) { - + Document document = new Document(); - + // wikitty id Field luceneIdField = new Field(WikittyLuceneConstants.LUCENE_ID, w.getId(), - Store.YES, Index.ANALYZED); + Store.YES, Index.NOT_ANALYZED); document.add(luceneIdField); - + // wikitty extension String extAsString = StringUtils.join(w.getExtensionNames(), ","); Field luceneExtField = new Field(WikittyLuceneConstants.LUCENE_EXTENSIONS, - extAsString, Store.YES, Index.ANALYZED); + extAsString, Store.YES, Index.NOT_ANALYZED); document.add(luceneExtField); StringBuffer allAsText = new StringBuffer(); @@ -258,19 +734,35 @@ for (String wikyttyField : w.getAllFieldNames()) { String ext = WikittyUtil.getExtensionNameFromFQFieldName(wikyttyField); String fieldName = WikittyUtil.getFieldNameFromFQFieldName(wikyttyField); - String value = w.getFieldAsString(ext, fieldName); - if (StringUtils.isNotEmpty(value)) { - if (log.isTraceEnabled()) { - log.trace("add lucene field : " + wikyttyField + " = " + value); + if (w.getFieldType(wikyttyField).isCollection()) { + List<String> values = w.getFieldAsList(ext, fieldName, String.class); + // Store.YES = mandatory for facets + // Index.NOT_ANALYZED = mandatory for search on field + if (values != null) { + for (String value : values) { + Field luceneField = new Field(wikyttyField, value, Store.YES, Index.NOT_ANALYZED); + document.add(luceneField); + + // copy content to #fulltext field + allAsText.append(value); + } } - - // Store.YES = mandatory for facets - // Index.ANALYZED = mandatory for search on field - Field luceneField = new Field(wikyttyField, value, Store.YES, Index.ANALYZED); - document.add(luceneField); - - allAsText.append(value); + } else { + String value = w.getFieldAsString(ext, fieldName); + if (StringUtils.isNotEmpty(value)) { + if (log.isTraceEnabled()) { + log.trace("add lucene field : " + wikyttyField + " = " + value); + } + + // Store.YES = mandatory for facets + // Index.ANALYZED = mandatory for search on field + Field luceneField = new Field(wikyttyField, value, Store.YES, Index.NOT_ANALYZED); + document.add(luceneField); + + // copy content to #fulltext field + allAsText.append(value); + } } } @@ -299,6 +791,9 @@ for (String id : idList) { Term term = new Term(WikittyLuceneConstants.LUCENE_ID, id); + if (log.isDebugEnabled()) { + log.debug("Delete with query : " + term); + } writer.deleteDocuments(term); } @@ -415,6 +910,78 @@ return pagedResult; } + /** + * Look for a lucene document by id. + * + * @param indexSearcher index searcher + * @param id id to find + * @return found document + * @throws IOException + */ + protected Document findById(IndexSearcher indexSearcher, String id) throws IOException { + return findByField(indexSearcher, LUCENE_ID, id); + } + + /** + * Look for a lucene document by id. + * + * @param indexSearcher index searcher + * @param fieldName field name to search into + * @param fieldValue field value to find + * @return found document + * @throws IOException + */ + protected Document findByField(IndexSearcher indexSearcher, String fieldName, String fieldValue) throws IOException { + Term term = new Term(fieldName, fieldValue); + TermQuery termQuery = new TermQuery(term); + TopDocs topDocs = indexSearcher.search(termQuery, null, 1); + Document result = null; + if (topDocs.totalHits == 1) { + int docId = topDocs.scoreDocs[0].doc; + result = indexSearcher.doc(docId); + } + return result; + } + + /** + * Look for a lucene document by id. + * + * @param indexSearcher index searcher + * @param id id to find + * @return found document + * @throws IOException + */ + protected Map<String, Document> findAllById(IndexSearcher indexSearcher, Collection<String> ids) throws IOException { + return findAllByField(indexSearcher, LUCENE_ID, ids); + } + + /** + * Look for a lucene document by id. + * + * @param indexSearcher index searcher + * @param fieldName field name to search into + * @param fieldValues field values to find + * @return found document + * @throws IOException + */ + protected Map<String, Document> findAllByField(IndexSearcher indexSearcher, String fieldName, Collection<String> fieldValues) throws IOException { + BooleanQuery query = new BooleanQuery(); + for (String fieldValue : fieldValues) { + Term term = new Term(fieldName, fieldValue); + TermQuery termQuery = new TermQuery(term); + query.add(termQuery, BooleanClause.Occur.SHOULD); + } + TopDocs topDocs = indexSearcher.search(query, null, 1000); + Map<String, Document> result = new HashMap<String, Document>(); + for (ScoreDoc socreDoc : topDocs.scoreDocs) { + int docId = socreDoc.doc; + Document doc = indexSearcher.doc(docId); + String wikittyId = doc.getFieldable(LUCENE_ID).stringValue(); + result.put(wikittyId, doc); + } + return result; + } + /* * @see org.nuiton.wikitty.storage.WikittySearchEngine#findAllChildrenCount(org.nuiton.wikitty.services.WikittyTransaction, java.lang.String, int, boolean, org.nuiton.wikitty.search.Criteria) */ @@ -423,7 +990,98 @@ WikittyTransaction transaction, String wikittyId, int depth, boolean count, Criteria filter) { - return null; + TreeNodeResult<String> result = null; + IndexSearcher searcher = null; + try { + searcher = new IndexSearcher(indexDirectory); + + Document luceneDoc = findById(searcher, wikittyId); + if (luceneDoc != null) { + + // on verifie que l'argument est bien un TreeNode + if (luceneDoc.getFieldable(TREENODE_DEPTH) != null) { + Search treeSearch = Search.query().and().eq(TREENODE_PARENTS, wikittyId); + if (depth >= 0) { + //Integer d = LuceneUtil.getIntFieldValue(luceneDoc, TREENODE_DEPTH); + String sField = luceneDoc.get(TREENODE_DEPTH); + Integer d = Integer.parseInt(sField); + treeSearch = treeSearch.bw(TREENODE_DEPTH, + String.valueOf(d), String.valueOf(d + depth)); + } + Criteria treeCriteria = treeSearch.criteria(); + + // on a dans treeSearch uniquement le noeud passe en parametre + // et ses enfants jusqu'a la profondeur demandee + Restriction2Lucene restriction2Lucene = + new Restriction2Lucene(indexAnalyzer); + Query query = restriction2Lucene.toLucene(treeCriteria.getRestriction()); + TopDocs topDocs = searcher.search(query, null, 1000); + //SolrQuery query = new SolrQuery(SOLR_QUERY_PARSER + queryString); + //QueryResponse resp = SolrUtil.executeQuery(solrServer, query); + //SolrDocumentList solrResults = resp.getResults(); + + // recuperation si demande du nombre d'attachment par noeud + Map<String, Integer> counts = new HashMap<String, Integer>(); + if (count) { + // TODO poussin 20110128 regarder si on ne peut pas + // restreindre les facettes aux noeuds trouve dans la recherche + // precedente + Criteria attCriteria = Search.query(filter).eq( + TREENODE_ATTACHED_ALL, wikittyId).criteria() + .setFirstIndex(0).setEndIndex(0) + .addFacetField(TREENODE_ATTACHED_ALL); + PagedResult<String> attSearch = + findAllByCriteria(transaction, attCriteria); + List<FacetTopic> topics = attSearch.getTopic(TREENODE_ATTACHED_ALL); + if (topics != null) { + for (FacetTopic topic : topics) { + String topicName = topic.getTopicName(); + int topicCount = topic.getCount(); + counts.put(topicName, topicCount); + } + } + } + + // construction du resultat, il proceder en 2 phases car + // sinon si on construit un fils avant son pere, il ne sera + // jamais associe + Map<String, TreeNodeResult<String>> allTreeNodeResult = + new HashMap<String, TreeNodeResult<String>>(); + // key: id de l'enfant, value: l'id du parent + Map<String, String> childParent = new HashMap<String, String>(); + // construction de tous les TreeNodeResult qui permettront + // de construire l'arbre + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + Document doc = searcher.doc(scoreDoc.doc); + String id = doc.getFieldable(LUCENE_ID).stringValue(); + + String parentId = doc.getFieldable( + WikittyTreeNode.FQ_FIELD_WIKITTYTREENODE_PARENT).stringValue(); + int nb = counts.containsKey(id) ? counts.get(id) : 0; + TreeNodeResult<String> child = new TreeNodeResult<String>(id, nb); + allTreeNodeResult.put(id, child); + childParent.put(id, parentId); + } + // construction de l'arbre avant de le retourner + for(Map.Entry<String, TreeNodeResult<String>> e : allTreeNodeResult.entrySet()) { + String id = e.getKey(); + String parentId = childParent.get(id); + if (allTreeNodeResult.containsKey(parentId)) { + TreeNodeResult<String> child = e.getValue(); + TreeNodeResult<String> parent = allTreeNodeResult.get(parentId); + if (parent != child) { + parent.add(child); + } + } + } + result = allTreeNodeResult.get(wikittyId); + } + } + } catch (IOException ex) { + throw new WikittyException("Can't search on index", ex); + } finally { + IOUtils.closeQuietly(searcher); + } + return result; } - } Modified: trunk/wikitty-lucene/src/test/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLuceneTest.java =================================================================== --- trunk/wikitty-lucene/src/test/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLuceneTest.java 2011-06-24 16:06:02 UTC (rev 976) +++ trunk/wikitty-lucene/src/test/java/org/nuiton/wikitty/storage/lucene/WikittySearchEngineLuceneTest.java 2011-06-27 09:51:18 UTC (rev 977) @@ -73,7 +73,7 @@ protected static ApplicationConfig instance = WikittyConfig.getConfig("wikitty-config-sample-inmemory.properties"); - protected WikittyProxy proxy = new WikittyProxy(ws); + protected static WikittyProxy proxy; @BeforeClass public static void initTests() throws Exception { @@ -83,6 +83,7 @@ instance.setOption(WikittyConfigOption.WIKITTY_DATA_DIR.key, indexDir.getAbsolutePath()); ws = new WikittyServiceLucene(instance); + proxy= new WikittyProxy(ws); } @Before @@ -375,4 +376,20 @@ Collections.singletonList(query.criteria())).get(0).getNumFound()); Assert.assertEquals(0, proxy.findAllByCriteria(WikittyLabel.class, query.criteria()).size()); } + + /** + * Test to store duplicated wikitty. + */ + @Test + public void testLuceneDoubleStore() { + WikittyLabel myLabel1 = new WikittyLabelImpl(); + + proxy.store(myLabel1); + proxy.store(myLabel1); + + Criteria criteria = Search.query().exteq(WikittyLabel.EXT_WIKITTYLABEL).criteria(); + criteria.setEndIndex(0); // just count + PagedResult<Wikitty> result = proxy.findAllByCriteria(criteria); + Assert.assertEquals(1, result.getNumFound()); + } }
participants (1)
-
echatellier@users.nuiton.org