I'm facing a problem with the migration from Oracle 12.1 to Oracle 12.2 database. Obviously the encoding of a CLOB column differs after the upgrade.
I'm using JDBC8 for the database connection.
The following database script has been executed:
create table oracle_compatibility_test
(
id number(19) not null primary key,
name varchar2(500) not null,
name_as_clob clob not null
);
insert all
into oracle_compatibility_test (id, name, name_as_clob) values (1, 'test1', 'test1')
into oracle_compatibility_test (id, name, name_as_clob) values (2, 'test2äößrt', 'test2äößrt')
select *
from dual;
commit;
I then wrote a program to read the inserted values via JDBC and print them to console:
package de.pie;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
public final class OracleCompatibilityTest {
//A 12.2 Oracle Database
private static final String DB1 = "jdbc:oracle:thin:@server1:1521";
//A 12.1 Oracle Database
private static final String DB2 = "jdbc:oracle:thin:@server2:1521";
private static final String USED_DATABASE = DB1;
public static void main(String[] args) {
printDatabaseCharset();
try (Connection connection = DriverManager.getConnection(USED_DATABASE, databaseProperties());
Statement statement = connection.createStatement();
ResultSet result = statement.executeQuery(
"select name, name_as_clob, convert(name_as_clob, 'AL32UTF8') as utf_8_clob " +
"from oracle_compatibility_test " +
"order by id asc")) {
readRow(result);
readRow(result);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private static void printDatabaseCharset() {
try (Connection connection = DriverManager.getConnection(USED_DATABASE, databaseProperties());
Statement statement = connection.createStatement();
ResultSet result = statement.executeQuery(
"select * from database_properties where property_name = 'NLS_CHARACTERSET'")) {
result.next();
System.out.println("read charset from database: " + result.getString("property_value") + "\n");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private static void readRow(ResultSet result) throws SQLException, IOException {
result.next();
String name = result.getString("name");
String nameAsClob = result.getString("name_as_clob");
Clob clobAsClobObject = result.getClob("name_as_clob");
String clobObjectAsString = clobAsClobObject.getSubString(1, (int) clobAsClobObject.length());
String clobReadAsCharacterStream = readFromCharacterStream(result);
String utf8clob = result.getString("utf_8_clob");
StringBuilder sb = new StringBuilder();
sb.append("read name: ")
.append(name)
.append("\nname read as clob: ")
.append(nameAsClob)
.append("\nname read as clob-object: ")
.append(clobObjectAsString)
.append("\nclob read as character-stream: ")
.append(clobReadAsCharacterStream)
.append("\nclob converted to utf-8: ")
.append(utf8clob)
.append("\n\n\n");
System.out.println(sb.toString());
}
private static String readFromCharacterStream(ResultSet result) throws SQLException, IOException {
try (Reader reader = result.getCharacterStream("name_as_clob")) {
StringBuilder stringBuilder = new StringBuilder();
int c;
while ((c = reader.read()) != -1) {
stringBuilder.append((char) c);
}
return stringBuilder.toString();
}
}
private static Properties databaseProperties() {
Properties prop = new Properties();
prop.put("user", "user");
prop.put("password", "password");
return prop;
}
}
With Oracle 12.1 the output is the following:
read charset from database: WE8ISO8859P15
read name: test1
name read as clob: test1
name read as clob-object: test1
clob read as character-stream: test1
clob converted to utf-8: t e s t 1
read name: test2äößrt
name read as clob: test2äößrt
name read as clob-object: test2äößrt
clob read as character-stream: test2äößrt
clob converted to utf-8: t e s t 2 ä ö ß r t
With Oracle 12.2 the output is the following:
read charset from database: WE8ISO8859P15
read name: test1
name read as clob: test1
name read as clob-object: test1
clob read as character-stream: test1
clob converted to utf-8: t e s t 1
read name: test2äößrt
name read as clob: test2���rt
name read as clob-object: test2���rt
clob read as character-stream: test2���rt
clob converted to utf-8: t e s t 2 � � � r t
The JDBC-Driver wrongly auto-detects the character-set as UTF-8, but the Stream really is in ISO8859-15. The character-set cannot be set explicitly in JDBC8. The returned Stream from the database was encoded in UTF-8 under Oracle 12.1