Skip to content

Commit 2e5d58d

Browse files
committed
HHH-14211 Switch to using oid for CLOB in PostgreSQL to avoid losing data after vacuumlo
1 parent e5a78f0 commit 2e5d58d

File tree

3 files changed

+109
-41
lines changed

3 files changed

+109
-41
lines changed

hibernate-core/src/main/java/org/hibernate/dialect/PostgreSQL81Dialect.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ public PostgreSQL81Dialect() {
114114
registerColumnType( Types.BINARY, "bytea" );
115115
registerColumnType( Types.LONGVARCHAR, "text" );
116116
registerColumnType( Types.LONGVARBINARY, "bytea" );
117-
registerColumnType( Types.CLOB, "text" );
117+
registerColumnType( Types.CLOB, "oid" );
118118
registerColumnType( Types.BLOB, "oid" );
119119
registerColumnType( Types.NUMERIC, "numeric($p, $s)" );
120120
registerColumnType( Types.OTHER, "uuid" );

hibernate-core/src/test/java/org/hibernate/test/lob/PostgreSqlLobStringTest.java

Lines changed: 46 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ protected void prepareTest()
6565
" (?, ?, ?, -1)"
6666
)) {
6767
int index = 1;
68-
statement.setString(index++, value1);
69-
statement.setString(index++, value2);
70-
statement.setString(index++, value3);
68+
statement.setClob( index++, session.getLobHelper().createClob( value1 ) );
69+
statement.setClob( index++, session.getLobHelper().createClob( value2 ) );
70+
statement.setClob( index++, session.getLobHelper().createClob( value3 ) );
7171

7272
assertEquals( 1, statement.executeUpdate() );
7373
}
@@ -77,57 +77,63 @@ protected void prepareTest()
7777

7878
@Test
7979
public void testBadClobDataSavedAsStringFails() {
80-
try {
81-
doInHibernate( this::sessionFactory, session -> {
82-
final Query query = session.createQuery( "from TestEntity" );
80+
doInHibernate( this::sessionFactory, session -> {
81+
final Query query = session.createQuery( "from TestEntity" );
8382

84-
final List<TestEntity> results = query.list();
83+
final List<TestEntity> results = query.list();
8584

86-
fail("Exception thrown expected");
87-
} );
88-
}
89-
catch (Exception e) {
90-
Exception rootException = (Exception) ExceptionUtil.rootCause( e );
91-
assertTrue( rootException.getMessage().startsWith( "Bad value for type long" ) );
92-
}
85+
assertThat( results.size(), is( 1 ) );
86+
87+
final TestEntity testEntity = results.get( 0 );
88+
assertThat( testEntity.getFirstLobField(), is( value1 ) );
89+
assertThat( testEntity.getSecondLobField(), is( value2 ) );
90+
final Clob clobField = testEntity.getClobField();
91+
try {
92+
93+
assertThat( clobField.getSubString( 1, (int) clobField.length() ), is( value3 ) );
94+
}
95+
catch (SQLException e) {
96+
fail( e.getMessage() );
97+
}
98+
} );
9399
}
94100

95101
@Test
96102
public void testBadClobDataSavedAsStringworksAfterUpdate() {
97103
doInHibernate( this::sessionFactory, session -> {
98104

99-
session.doWork( connection -> {
100-
try(Statement statement = connection.createStatement()) {
101-
statement.executeUpdate(
102-
"update test_entity\n" +
103-
"set \n" +
104-
" clobfield = lo_from_bytea(0, cast(clobfield as bytea)),\n" +
105-
" firstlobfield = lo_from_bytea(0, cast(firstlobfield as bytea)),\n" +
106-
" secondlobfield = lo_from_bytea(0, cast(secondlobfield as bytea))"
107-
);
108-
}
109-
} );
110-
} );
105+
session.doWork( connection -> {
106+
try (Statement statement = connection.createStatement()) {
107+
statement.executeUpdate(
108+
"update test_entity\n" +
109+
"set \n" +
110+
" clobfield = lo_from_bytea(0, lo_get(clobfield)),\n" +
111+
" firstlobfield = lo_from_bytea(0, lo_get(firstlobfield)),\n" +
112+
" secondlobfield = lo_from_bytea(0, lo_get(secondlobfield))"
113+
);
114+
}
115+
} );
116+
} );
111117

112118
doInHibernate( this::sessionFactory, session -> {
113-
final Query query = session.createQuery( "from TestEntity" );
119+
final Query query = session.createQuery( "from TestEntity" );
114120

115-
final List<TestEntity> results = query.list();
121+
final List<TestEntity> results = query.list();
116122

117-
assertThat( results.size(), is( 1 ) );
123+
assertThat( results.size(), is( 1 ) );
118124

119-
final TestEntity testEntity = results.get( 0 );
120-
assertThat( testEntity.getFirstLobField(), is( value1 ) );
121-
assertThat( testEntity.getSecondLobField(), is( value2 ) );
122-
final Clob clobField = testEntity.getClobField();
123-
try {
125+
final TestEntity testEntity = results.get( 0 );
126+
assertThat( testEntity.getFirstLobField(), is( value1 ) );
127+
assertThat( testEntity.getSecondLobField(), is( value2 ) );
128+
final Clob clobField = testEntity.getClobField();
129+
try {
124130

125-
assertThat( clobField.getSubString( 1, (int) clobField.length() ), is( value3 ) );
126-
}
127-
catch (SQLException e) {
128-
fail( e.getMessage() );
129-
}
130-
} );
131+
assertThat( clobField.getSubString( 1, (int) clobField.length() ), is( value3 ) );
132+
}
133+
catch (SQLException e) {
134+
fail( e.getMessage() );
135+
}
136+
} );
131137
}
132138

133139
@Entity(name = "TestEntity")

migration-guide.adoc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,65 @@ configure `hibernate.bytecode.provider=javassist`: remove the property if you're
1919

2020
A side effect is that Hibenate ORM no longer lists javassist among its dependencies.
2121

22+
=== Changes to the DDL type for CLOB in PostgreSQL81Dialect and its subclasses
23+
24+
As of 5.6.2, the default PostgreSQL DDL type for CLOB columns i.e. fields annotated with `@Lob` or with the type `java.sql.Clob`
25+
will be the `oid` type whereas before, the type `text` was used. The `text` type does not support streaming data
26+
and is, even if TOASTed, materialized eagerly by the server, which is not what one would expect for LOB types.
27+
28+
All PostgreSQL JDBC drivers unfortunately just store the `oid` it created for a `java.sql.Clob` into the `text` column.
29+
Although reading back the value with the CLOB API works, PostgreSQL has no knowledge of the reference to the LOB,
30+
because the `oid` is not known to PostgreSQL, leading to data loss when `vacuumlo` (the utility to clean up unused LOBs) runs.
31+
To avoid the data loss, it is required to use the `oid` type so that `vacuumlo` can see the reference.
32+
33+
Updating to 5.6.2 does not require any schema or application changes by default, but we highly recommend
34+
that you migrate existing `text` columns for LOBs to `oid` to prevent data loss due to the activity of `vacuumlo`.
35+
36+
[source,sql]
37+
----
38+
alter table test_entity
39+
alter column clobfield
40+
set data type oid using clobfield::oid
41+
----
42+
43+
If you are overriding the `JdbcTypeDescriptor` for `CLOB` to use e.g. `VarcharTypeDescriptor` in a custom PostgreSQL dialect,
44+
beware that you will also have to override the column type in the custom dialect, as with "pgjdbc",
45+
it is not possible to read/write an `oid` column with the JDBC `ResultSet#getString/Statement#setString` methods.
46+
47+
[source,java]
48+
----
49+
registerColumnType( Types.CLOB, "text" );
50+
----
51+
52+
Alternatively, you can remove the `JdbcTypeDescriptor` override and migrate to `oid` with
53+
54+
[source,sql]
55+
----
56+
alter table test_entity
57+
alter column clobfield
58+
set data type oid using lo_from_bytea(0, cast(clobfield as bytea))
59+
----
60+
61+
The switch to `oid` might have a negative impact on performance for small values that are fetched often,
62+
because the value is stored in a different file system page than the row, even for small values
63+
The benefit of the `oid` type is that it allows streaming the content and reduces the row size.
64+
65+
Users that just want a large text type but don't care about streaming should use the Hibernate type `text`:
66+
67+
[source,java]
68+
----
69+
@Entity
70+
public class TestEntity {
71+
72+
@org.hibernate.annotations.Type( type = "text" )
73+
String clobField;
74+
75+
//...
76+
}
77+
----
78+
79+
This will map to `java.sql.Types.LONGVARCHAR` for which Hibernate dialects register a DDL type that supports access
80+
via the `ResultSet#getString/Statement#setString` methods i.e. in case of PostgreSQL the type `text`.
81+
82+
The `@Lob` annotation should only be used to force the use of the `ResultSet#getClob/Statement#setClob` JDBC driver methods,
83+
which is in turn necessary for streaming data.

0 commit comments

Comments
 (0)