Software Versions:
- Hibernate Search 3.3.0.Final
- JBoss AS 6.0.0.Final
- JDK 1.6.0_18. (build 1.6.0_18-b07)
Basic Problem: I am unable to get the mass indexer to work when the data set to index is large (i.e., greater than a few thousand entries). The mass indexer does work when we have a few thousand entries in the database. I do not believe the issue is a transaction timeout since I am able to run other long running methods in the same stateless session bean (EJB3) - see below. Also, the mass indexer fails if a lucene index currently exists or not.
Here is the code to mass index:
Code:
import javax.annotation.Resource;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.ejb.TransactionManagement;
import javax.ejb.TransactionManagementType;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.transaction.UserTransaction;
import logging.type.AppLogEvent;
import logging.type.LuceneIndexStatistics;
import logging.util.LoggerUtil;
import org.hibernate.CacheMode;
import org.hibernate.FlushMode;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.Session;
import org.hibernate.criterion.CriteriaSpecification;
import org.hibernate.search.FullTextSession;
import org.hibernate.search.jpa.FullTextEntityManager;
import org.hibernate.search.jpa.Search;
import org.hibernate.search.stat.Statistics;
import org.jboss.ejb3.annotation.TransactionTimeout;
@Stateless
@TransactionManagement(TransactionManagementType.BEAN)
public class LuceneIndexAdminFacade implements LuceneIndexAdmin
{
@PersistenceContext(unitName = "LoggerEntityManager") EntityManager em;
@Resource
UserTransaction tx;
@TransactionTimeout(9999999)
@TransactionAttribute(TransactionAttributeType.REQUIRED)
public void massIndexLuceneIndex() throws Exception
{
try
{
tx.begin();
System.out.println("Started the creation of the initial LogRecord Lucene Index...");
FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(em);
fullTextEntityManager.createIndexer().purgeAllOnStart(true)
.threadsForSubsequentFetching(1).threadsToLoadObjects(1).startAndWait();
System.out.println("Finished the creation of the initial LogRecord Lucene Index...");
tx.commit();
}
catch (Exception e)
{
try
{
tx.rollback();
}
catch (Exception e1)
{
e1.printStackTrace();
}
e.printStackTrace();
throw e;
}
}
Below is the jboss console log messages and relevant stack traces. Please note that the indexer continues and receives the same errors and doesn't finish reindexing the index.
2011-06-20 19:37:31,437 INFO [org.hibernate.search.impl.SimpleIndexingProgressMonitor] (Hibernate Search: indexwriter-1) 412350 documents indexed in 271039 ms
2011-06-20 19:37:31,437 INFO [org.hibernate.search.impl.SimpleIndexingProgressMonitor] (Hibernate Search: indexwriter-1) Indexing speed: 1521.3678 documents/second; progress: 78.93709%
2011-06-20 19:37:31,453 WARN [com.arjuna.ats.arjuna] (Transaction Reaper) ARJUNA-12117 TransactionReaper::check timeout for TX 0:ffff0a05019d:126a:4dff9f93:26662 in state RUN
2011-06-20 19:37:31,453 WARN [com.arjuna.ats.arjuna] (Transaction Reaper) ARJUNA-12117 TransactionReaper::check timeout for TX 0:ffff0a05019d:126a:4dff9f93:26663 in state RUN
2011-06-20 19:37:31,453 INFO [org.hibernate.search.impl.SimpleIndexingProgressMonitor] (Hibernate Search: indexwriter-1) 412400 documents indexed in 271057 ms
2011-06-20 19:37:31,453 INFO [org.hibernate.search.impl.SimpleIndexingProgressMonitor] (Hibernate Search: indexwriter-1) Indexing speed: 1521.4512 documents/second; progress: 78.94666%
2011-06-20 19:37:31,468 INFO [org.hibernate.search.impl.SimpleIndexingProgressMonitor] (Hibernate Search: indexwriter-1) 412450 documents indexed in 271074 ms
2011-06-20 19:37:31,468 INFO [org.hibernate.search.impl.SimpleIndexingProgressMonitor] (Hibernate Search: indexwriter-1) Indexing speed: 1521.5402 documents/second; progress: 78.95624%
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12095 Abort of action id 0:ffff0a05019d:126a:4dff9f93:26662 invoked while multiple threads active within it.
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12108 CheckedAction::check - atomic action 0:ffff0a05019d:126a:4dff9f93:26662 aborting with 1 threads active!
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.jta] (Transaction Reaper Worker 0) ARJUNA-16029 SynchronizationImple.afterCompletion - failed for org.hibernate.transaction.synchronization.HibernateSynchronizationImpl@99556e with exception: java.util.ConcurrentModificationException
at java.util.LinkedHashMap$LinkedHashIterator.nextEntry(LinkedHashMap.java:373) [:1.6.0_18]
at java.util.LinkedHashMap$ValueIterator.next(LinkedHashMap.java:388) [:1.6.0_18]
at org.hibernate.engine.StatefulPersistenceContext.afterTransactionCompletion(StatefulPersistenceContext.java:276) [:3.6.0.Final]
at org.hibernate.impl.SessionImpl.afterTransactionCompletion(SessionImpl.java:601) [:3.6.0.Final]
at org.hibernate.jdbc.JDBCContext.afterTransactionCompletion(JDBCContext.java:273) [:3.6.0.Final]
at org.hibernate.transaction.synchronization.CallbackCoordinator.afterCompletion(CallbackCoordinator.java:146) [:3.6.0.Final]
at org.hibernate.transaction.synchronization.HibernateSynchronizationImpl.afterCompletion(HibernateSynchronizationImpl.java:59) [:3.6.0.Final]
at com.arjuna.ats.internal.jta.resources.arjunacore.SynchronizationImple.afterCompletion(SynchronizationImple.java:117) [:6.0.0.Final]
at com.arjuna.ats.arjuna.coordinator.TwoPhaseCoordinator.afterCompletion(TwoPhaseCoordinator.java:371) [:6.0.0.Final]
at com.arjuna.ats.arjuna.coordinator.TwoPhaseCoordinator.cancel(TwoPhaseCoordinator.java:121) [:6.0.0.Final]
at com.arjuna.ats.arjuna.AtomicAction.cancel(AtomicAction.java:212) [:6.0.0.Final]
at com.arjuna.ats.arjuna.coordinator.TransactionReaper.doCancellations(TransactionReaper.java:367) [:6.0.0.Final]
at com.arjuna.ats.internal.arjuna.coordinator.ReaperWorkerThread.run(ReaperWorkerThread.java:79) [:6.0.0.Final]
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12127 TwoPhaseCoordinator.afterCompletion - returned failure for SynchronizationImple< 0:ffff0a05019d:126a:4dff9f93:26664, org.hibernate.transaction.synchronization.HibernateSynchronizationImpl@99556e >
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper) ARJUNA-12117 TransactionReaper::check timeout for TX 0:ffff0a05019d:126a:4dff9f93:26666 in state RUN
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12121 TransactionReaper::doCancellations worker Thread[Transaction Reaper Worker 0,5,jboss] successfully canceled TX 0:ffff0a05019d:126a:4dff9f93:26662
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12095 Abort of action id 0:ffff0a05019d:126a:4dff9f93:26663 invoked while multiple threads active within it.
2011-06-20 19:37:31,484 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12108 CheckedAction::check - atomic action 0:ffff0a05019d:126a:4dff9f93:26663 aborting with 1 threads active!
2011-06-20 19:37:31,500 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12121 TransactionReaper::doCancellations worker Thread[Transaction Reaper Worker 0,5,jboss] successfully canceled TX 0:ffff0a05019d:126a:4dff9f93:26663
2011-06-20 19:37:31,500 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12095 Abort of action id 0:ffff0a05019d:126a:4dff9f93:26666 invoked while multiple threads active within it.
2011-06-20 19:37:31,500 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12108 CheckedAction::check - atomic action 0:ffff0a05019d:126a:4dff9f93:26666 aborting with 1 threads active!
2011-06-20 19:37:31,500 INFO [org.hibernate.jdbc.ConnectionManager] (Transaction Reaper Worker 0) forcing batcher resource cleanup on transaction completion; forgot to close ScrollableResults/Iterator?
2011-06-20 19:37:31,500 WARN [com.arjuna.ats.arjuna] (Transaction Reaper Worker 0) ARJUNA-12121 TransactionReaper::doCancellations worker Thread[Transaction Reaper Worker 0,5,jboss] successfully canceled TX 0:ffff0a05019d:126a:4dff9f93:26666
The below stateless session bean method (in the same LuceneIndexAdminFacade class) was able to reindex millions of records successfully. We tested 5 million a few times successfully. We also tested 10 million records successfully (i.e., index size matched the database and searches performed on the index were correct, i.e., matched results retrieved from the database). The problem is that the below method of indexing is extremely slow with large data sets.
Code:
/**
* <p></p>
*/
@TransactionTimeout(9999999)
@TransactionAttribute(TransactionAttributeType.REQUIRED)
public void regenerateLuceneIndex(int fetchSize, int batchSize) throws Exception
{
if( fetchSize > batchSize || (batchSize % fetchSize != 0 ))
{
fetchSize = 100;
batchSize = 1000;
}
logger.info(new AppLogEvent(0, "Starting to regenerate the lucene index."
+ " Fetch size: " + fetchSize + " Batch Size: " + batchSize));
tx.begin();
try
{
FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(em);
fullTextEntityManager.purgeAll(LogRecord.class); //remove obsolete content
fullTextEntityManager.flushToIndexes(); //Apply purge before optimize
fullTextEntityManager.getSearchFactory().optimize(LogRecord.class); //Physically clear space.
Session session = (Session) fullTextEntityManager.getDelegate();
FullTextSession fullTextSession = org.hibernate.search.Search.getFullTextSession(session);
fullTextSession.setFlushMode(FlushMode.MANUAL);
fullTextSession.setCacheMode(CacheMode.IGNORE);
logger.info(new AppLogEvent(0, "Creating scrollable results...")); //log4j logger
ScrollableResults scroll =
fullTextSession.createCriteria(LogRecord.class)
.setFetchSize(fetchSize)
.setCacheMode(CacheMode.IGNORE)
.setFlushMode(FlushMode.MANUAL)
.setResultTransformer(CriteriaSpecification.DISTINCT_ROOT_ENTITY)
.scroll(ScrollMode.FORWARD_ONLY );
logger.info(new AppLogEvent(0, "Finished creating scrollable results..."));
int batch = 0;
while( scroll.next() )
{
batch++;
fullTextSession.index( scroll.get(0) );
if( batch % batchSize == 0)
{
System.out.println("Initiating Lucene Index Flush... batch = " + batch);
fullTextSession.flushToIndexes();
fullTextSession.clear();
System.out.println("Finished the Lucene Index Flush... batch = " + batch);
}
}
scroll.close();
logger.info(new AppLogEvent(0, "Finished reindexing the entire LogRecord Lucene index."));
}
catch (Exception e)
{
try{
tx.rollback();
}
catch( Exception re)
{
re.printStackTrace();
}
e.printStackTrace();
throw e;
}
finally{
tx.commit();
}
}