8
votes

I'm facing a problem with the migration from Oracle 12.1 to Oracle 12.2 database. Obviously the encoding of a CLOB column differs after the upgrade.

I'm using JDBC8 for the database connection.

The following database script has been executed:

create table oracle_compatibility_test
(
    id           number(19)    not null primary key,
    name         varchar2(500) not null,
    name_as_clob clob          not null
);    

insert all
    into oracle_compatibility_test (id, name, name_as_clob) values (1, 'test1', 'test1')
    into oracle_compatibility_test (id, name, name_as_clob) values (2, 'test2äößrt', 'test2äößrt')
select *
    from dual;

commit;

I then wrote a program to read the inserted values via JDBC and print them to console:

package de.pie;

import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;

public final class OracleCompatibilityTest {
    //A 12.2 Oracle Database
    private static final String DB1 = "jdbc:oracle:thin:@server1:1521";
    //A 12.1 Oracle Database
    private static final String DB2 = "jdbc:oracle:thin:@server2:1521";

    private static final String USED_DATABASE = DB1;

    public static void main(String[] args) {
        printDatabaseCharset();

        try (Connection connection = DriverManager.getConnection(USED_DATABASE, databaseProperties());
             Statement statement = connection.createStatement();
             ResultSet result = statement.executeQuery(
                     "select name, name_as_clob, convert(name_as_clob, 'AL32UTF8') as utf_8_clob " +
                             "from oracle_compatibility_test " +
                             "order by id asc")) {
            readRow(result);
            readRow(result);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static void printDatabaseCharset() {
        try (Connection connection = DriverManager.getConnection(USED_DATABASE, databaseProperties());
             Statement statement = connection.createStatement();
             ResultSet result = statement.executeQuery(
                     "select * from database_properties where property_name = 'NLS_CHARACTERSET'")) {
            result.next();
            System.out.println("read charset from database: " + result.getString("property_value") + "\n");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private static void readRow(ResultSet result) throws SQLException, IOException {
        result.next();

        String name = result.getString("name");
        String nameAsClob = result.getString("name_as_clob");
        Clob clobAsClobObject = result.getClob("name_as_clob");
        String clobObjectAsString = clobAsClobObject.getSubString(1, (int) clobAsClobObject.length());
        String clobReadAsCharacterStream = readFromCharacterStream(result);
        String utf8clob = result.getString("utf_8_clob");


        StringBuilder sb = new StringBuilder();
        sb.append("read name: ")
          .append(name)
          .append("\nname read as clob: ")
          .append(nameAsClob)
          .append("\nname read as clob-object: ")
          .append(clobObjectAsString)
          .append("\nclob read as character-stream: ")
          .append(clobReadAsCharacterStream)
          .append("\nclob converted to utf-8: ")
          .append(utf8clob)
          .append("\n\n\n");

        System.out.println(sb.toString());
    }

    private static String readFromCharacterStream(ResultSet result) throws SQLException, IOException {
        try (Reader reader = result.getCharacterStream("name_as_clob")) {
            StringBuilder stringBuilder = new StringBuilder();
            int c;
            while ((c = reader.read()) != -1) {
                stringBuilder.append((char) c);
            }
            return stringBuilder.toString();
        }
    }

    private static Properties databaseProperties() {
        Properties prop = new Properties();
        prop.put("user", "user");
        prop.put("password", "password");
        return prop;
    }
}

With Oracle 12.1 the output is the following:

read charset from database: WE8ISO8859P15

read name: test1
name read as clob: test1
name read as clob-object: test1
clob read as character-stream: test1
clob converted to utf-8:  t e s t 1



read name: test2äößrt
name read as clob: test2äößrt
name read as clob-object: test2äößrt
clob read as character-stream: test2äößrt
clob converted to utf-8:  t e s t 2 ä ö ß r t

With Oracle 12.2 the output is the following:

read charset from database: WE8ISO8859P15

read name: test1
name read as clob: test1
name read as clob-object: test1
clob read as character-stream: test1
clob converted to utf-8:  t e s t 1



read name: test2äößrt
name read as clob: test2���rt
name read as clob-object: test2���rt
clob read as character-stream: test2���rt
clob converted to utf-8:  t e s t 2 � � � r t

The JDBC-Driver wrongly auto-detects the character-set as UTF-8, but the Stream really is in ISO8859-15. The character-set cannot be set explicitly in JDBC8. The returned Stream from the database was encoded in UTF-8 under Oracle 12.1

1
Maybe have a look at docs.oracle.com/database/121/JJDBC/global.htm#JJDBC28643 In deed, if you run the same Java program on two DB's having the same characters set and getting this issue is really strange.Wernfried Domscheit
I find it odd that the same JDBC driver would work correctly with 12.1 and then fail with 12.2. In fact, since the driver didn't change but your database did, it's more likely that the problem lies with the database (which of course can cause the driver to act that way).Kayaman
I opened a Service-Request at OracleFloeke

1 Answers

15
votes

We identified the problem and found a solution (which is more of a workaround perhaps).

Under Oracle 12.1 the property oracle.jdbc.defaultLobPrefetchSize was set to -1 by default. Under Oracle 12.2 this changed to 4000, which means that the database will try to fetch everything within one query and cast the CLOB to a VARCHAR2 (if its size is under 4000, See here). This somehow doesn't work with Oracle 12.2 while the Charset WE8ISO8859P15 is used.

Setting this property to -1 will disable the prefetching (so another database-access is neccessary to fetch the CLOB-column) again and everything works fine.

Under Oracle 12.1 with the same encoding and explicitly setting the defaultLobPrefetchSize to 4000 works. So this must obviously be a bug on the database-side.