I'm facing a problem with Spring Batch. We're using in our job a task executor (simpleAsyncTaskExecutor) that handles a flow of two parallel steps.
In each step, the task executor splits each chunk of data returned by the reader to a different thread (using the Multi-threaded Step concept : see https://docs.spring.io/spring-batch/trunk/reference/html/scalability.html )
The problem is that our commit-interval is large (24,000), the number of rows to return by the reader is very small (less than 50 rows) but the writer receives sometimes more than one chunk (for example a chunk of 30 rows and a chunk of 20 rows, but for another run it can be a chunk of 25 rows and a chunk of 25 rows or only one chunk of 50 rows, it appears to be random) while it should receive only one chunk of 50 rows for any run (it shouldn't be random) as it doesn't go over the commit-interval.
I'm trying to understand why this happens randomly in some runs. If anyone knows this issue in Spring Batch, can you help me ?
Thank you.
Here is the configuration of my job (excluding our custom writers) :
<batch:job id="job">
<batch:split id="split" task-executor="taskExecutor">
<batch:flow>
<batch:step id="step1">
<batch:tasklet task-executor="taskExecutor" throttle-limit="4" >
<batch:chunk reader="reader1" writer="writer1" commit-interval="24000" />
</batch:tasklet>
</batch:step>
</batch:flow>
<batch:flow>
<batch:step id="step2">
<batch:tasklet task-executor="taskExecutor" throttle-limit="4" >
<batch:chunk reader="reader2" writer="writer2" commit-interval="24000" />
</batch:tasklet>
</batch:step>
</batch:flow>
</batch:split>
</batch:job>
<bean id="reader1" class="org.springframework.batch.item.database.JdbcPagingItemReader" scope="step">
<property name="dataSource" ref="postgresql_1" />
<property name="queryProvider">
<bean class="org.springframework.batch.item.database.support.PostgresPagingQueryProvider">
<property name="selectClause" value="
SELECT name
" />
<property name="fromClause" value="
FROM database.people
" />
<property name="whereClause" value="
WHERE age > 30
" />
<property name="sortKeys">
<map>
<entry key="people_id" value="ASCENDING"/>
</map>
</property>
</bean>
</property>
<property name="saveState" value="false" />
<property name="rowMapper">
<bean class="fr.myapp.PeopleRowMapper" />
</property>
</bean>
<bean id="reader2" class="org.springframework.batch.item.database.JdbcPagingItemReader" scope="step">
<property name="dataSource" ref="postgresql_1" />
<property name="queryProvider">
<bean class="org.springframework.batch.item.database.support.PostgresPagingQueryProvider">
<property name="selectClause" value="
SELECT product_name
" />
<property name="fromClause" value="
FROM database.products
" />
<property name="whereClause" value="
WHERE product_order_date <= '01/11/2017'
" />
<property name="sortKeys">
<map>
<entry key="product_id" value="ASCENDING"/>
</map>
</property>
</bean>
</property>
<property name="saveState" value="false" />
<property name="rowMapper">
<bean class="fr.myapp.ProductsRowMapper" />
</property>
</bean>
<bean id="taskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor">
<property name="concurrencyLimit" value="8" />
</bean>