Jackrabbit clustering

Question

I am using jackrabbit 2.18.0 and I would like have a cluster setup on my local, later on AWS.

Now I am able to run it with one node with the following configurations:

<?xml version="1.0"?>
<Repository>
    <!-- Enable bellow config for mysqldatastore-->
    <DataStore class="org.apache.jackrabbit.core.data.db.DbDataStore">
        <param name="driver" value="com.mysql.jdbc.Driver"/>
        <param name="url" value="jdbc:mysql://127.0.0.1:3306/jackrabbit" />
        <param name="user" value="root" />
        <param name="password" value="" />
        <param name="databaseType" value="mysql"/>
        <param name="schemaObjectPrefix" value="J_R_DS_"/>
    </DataStore>
    <!-- Enable bellow config for filedatastore-->
    <!--DataStore class="org.apache.jackrabbit.core.data.FileDataStore">
            <param name="path" value="${rep.home}/repository/datastore"/>
            <param name="minRecordLength" value="100"/>
        </DataStore-->
    <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
        <param name="path" value="${rep.home}/repository" />
    </FileSystem>
    <Security appName="Jackrabbit">
        <AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager" />
        <LoginModule class="org.apache.jackrabbit.core.security.SimpleLoginModule">
            <param name="anonymousId" value="anonymous" />
        </LoginModule>
    </Security>
    <Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="jcrlocal" />
    <Workspace name="${wsp.name}">
        <PersistenceManager class="org.apache.jackrabbit.core.persistence.pool.MySqlPersistenceManager">
            <param name="driver" value="com.mysql.jdbc.Driver" />
            <param name="url" value="jdbc:mysql://127.0.0.1:3306/jackrabbit" />
            <param name="user" value="root" />
            <param name="password" value="" />
            <param name="schema" value="mysql" /> 
            <param name="schemaObjectPrefix" value="J_PM_${wsp.name}_" />
            <param name="externalBLOBs" value="false" />
        </PersistenceManager>
        <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
            <param name="path" value="${rep.home}/version" />
        </FileSystem>
    </Workspace>
    <Versioning rootPath="${rep.home}/version">
        <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
            <param name="path" value="${rep.home}/version" />
        </FileSystem>
        <PersistenceManager class="org.apache.jackrabbit.core.persistence.pool.MySqlPersistenceManager">
            <param name="driver" value="com.mysql.jdbc.Driver" />
            <param name="url" value="jdbc:mysql://127.0.0.1:3306/jackrabbit" />
            <param name="user" value="root" />
            <param name="password" value="" />
            <param name="schema" value="mysql" />
            <param name="schemaObjectPrefix" value="J_V_PM_" />
            <param name="externalBLOBs" value="false" />
        </PersistenceManager>
    </Versioning>

    <Cluster id="node1" syncDelay="5"> 
        <Journal class="org.apache.jackrabbit.core.journal.DatabaseJournal"> 
            <param name="revision" value="${rep.home}/revision"/> 
            <param name="driver" value="com.mysql.jdbc.Driver"/> 
            <param name="url" value="jdbc:mysql://127.0.0.1:3306/jackrabbit"/> 
            <param name="user" value="root"/> 
            <param name="password" value=""/> 
            <param name="schema" value="mysql"/> 
            <param name="schemaObjectPrefix" value="J_C_"/> 
        </Journal> 
    </Cluster> 
    <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">   
          <param name="path" value="${rep.home}/index"/>
          <!-- SearchIndex will get the indexing configuration from the classpath, if not found in the workspace home -->
          <param name="indexingConfiguration" value="/Users/tuhinsubhramandal/jack-repo/indexing_configuration.xml"/>
          <param name="useCompoundFile" value="true"/>
          <param name="minMergeDocs" value="100"/>
          <param name="volatileIdleTime" value="3"/>
          <param name="maxMergeDocs" value="100000"/>
          <param name="mergeFactor" value="10"/>
          <param name="maxFieldLength" value="10000"/>
          <param name="bufferSize" value="10"/>
          <param name="cacheSize" value="1000"/>
          <param name="forceConsistencyCheck" value="false"/>
          <param name="autoRepair" value="true"/>
          <param name="queryClass" value="org.apache.jackrabbit.core.query.QueryImpl"/>
          <param name="respectDocumentOrder" value="true"/>
          <param name="resultFetchSize" value="100"/>
          <param name="extractorPoolSize" value="3"/>
          <param name="extractorTimeout" value="100"/>
          <param name="extractorBackLogSize" value="100"/>
          <!-- needed to highlight the searched term -->
          <param name="supportHighlighting" value="true"/>
          <!-- custom provider for getting an HTML excerpt in a query result with rep:excerpt() -->
          <param name="excerptProviderClass" value="org.apache.jackrabbit.core.query.lucene.DefaultXMLExcerpt"/>
    </SearchIndex> 
</Repository>

====================

Now here is my question. I am using tomcat9 where I have adde jackrabbit war to webapps and made it work.

In order to create cluster with let's say 3 nodes, do I have to have 3 different tomcats with 3 set of repositiory.xml with different Clusterids pointing to same datasource?

leopal leopal · Accepted Answer · 2019-03-20T11:36:53

In order to create cluster with let's say 3 nodes, do I have to have 3 different tomcats with 3 set of repositiory.xml with different Clusterids pointing to same datasource?

In general yes, having N nodes set up with their own repository.xml and cluster id(Cluster section) using the same datasource will work.

Keep in mind that the following requirements must be met in order to use clustering.

Quoting from official docs (Requirements section):

Each cluster node must have its own repository configuration.

A DataStore must always be shared between nodes, if used.

The global FileSystem on the repository level must be shared (only the one that is on the same level as the data store; only in the repository.xml file).

Each cluster node needs its own (private) workspace level and version FileSystem (only those within the workspace and versioning configuration; the ones in the repository.xml and workspace.xml file).

Each cluster node needs its own (private) Search indexes.

Every cluster node must be assigned a unique ID.

A journal type must be chosen, either based on files or stored in a database.

Each cluster node must use the same (shared) journal.

The persistence managers must store their data in the same, globally accessible location.

Jackrabbit clustering

1 Answers