@@ -192,88 +192,87 @@ class Test26FixLengthWithIdGeneration extends AnyWordSpec with SparkTestBase wit
192192 assertEqualsMultiline(actual, expected)
193193 }
194194 }
195- }
196195
197- " correctly work with segment id generation option with length field" in {
198- withTempBinFile(" fix_length_reg3" , " .dat" , binFileContentsLengthField) { tmpFileName =>
199- val df = spark
200- .read
201- .format(" cobol" )
202- .option(" copybook_contents" , copybook)
203- .option(" record_format" , " F" )
204- .option(" record_length_field" , " LEN" )
205- .option(" strict_integral_precision" , " true" )
206- .option(" segment_field" , " IND" )
207- .option(" segment_id_prefix" , " ID" )
208- .option(" segment_id_level0" , " A" )
209- .option(" segment_id_level1" , " _" )
210- .option(" redefine-segment-id-map:0" , " SEGMENT1 => A" )
211- .option(" redefine-segment-id-map:1" , " SEGMENT2 => B" )
212- .option(" redefine-segment-id-map:2" , " SEGMENT3 => C" )
213- .option(" input_split_records" , 1 )
214- .option(" pedantic" , " true" )
215- .load(tmpFileName)
196+ " correctly work with segment id generation option with length field and strict integral precision " in {
197+ withTempBinFile(" fix_length_reg3" , " .dat" , binFileContentsLengthField) { tmpFileName =>
198+ val df = spark
199+ .read
200+ .format(" cobol" )
201+ .option(" copybook_contents" , copybook)
202+ .option(" record_format" , " F" )
203+ .option(" record_length_field" , " LEN" )
204+ .option(" strict_integral_precision" , " true" )
205+ .option(" segment_field" , " IND" )
206+ .option(" segment_id_prefix" , " ID" )
207+ .option(" segment_id_level0" , " A" )
208+ .option(" segment_id_level1" , " _" )
209+ .option(" redefine-segment-id-map:0" , " SEGMENT1 => A" )
210+ .option(" redefine-segment-id-map:1" , " SEGMENT2 => B" )
211+ .option(" redefine-segment-id-map:2" , " SEGMENT3 => C" )
212+ .option(" input_split_records" , 1 )
213+ .option(" pedantic" , " true" )
214+ .load(tmpFileName)
216215
217- val actual = SparkUtils .convertDataFrameToPrettyJSON(df.drop(" LEN" ).orderBy(" Seg_Id0" , " Seg_Id1" ))
216+ val actual = SparkUtils .convertDataFrameToPrettyJSON(df.drop(" LEN" ).orderBy(" Seg_Id0" , " Seg_Id1" ))
218217
219- assertEqualsMultiline(actual, expected)
218+ assertEqualsMultiline(actual, expected)
219+ }
220220 }
221- }
222221
223- " work with string values " in {
224- val copybook =
225- """ 01 R.
222+ " correctly work when the length field has the string type " in {
223+ val copybook =
224+ """ 01 R.
226225 05 LEN PIC X(1).
227226 05 FIELD1 PIC X(1).
228227 """
229228
230- val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
231- // A1
232- 0xF2 .toByte, 0xF3 .toByte, 0xF3 .toByte, 0xF4 .toByte
233- ).map(_.toByte)
229+ val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
230+ // A1
231+ 0xF2 .toByte, 0xF3 .toByte, 0xF3 .toByte, 0xF4 .toByte
232+ ).map(_.toByte)
234233
235- withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
236- val df = spark
237- .read
238- .format(" cobol" )
239- .option(" copybook_contents" , copybook)
240- .option(" record_format" , " F" )
241- .option(" record_length_field" , " LEN" )
242- .option(" pedantic" , " true" )
243- .load(tmpFileName)
234+ withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
235+ val df = spark
236+ .read
237+ .format(" cobol" )
238+ .option(" copybook_contents" , copybook)
239+ .option(" record_format" , " F" )
240+ .option(" record_length_field" , " LEN" )
241+ .option(" pedantic" , " true" )
242+ .load(tmpFileName)
244243
245- assert(df.count() == 2 )
244+ assert(df.count() == 2 )
245+ }
246246 }
247- }
248247
249- " fail for incorrect string values" in {
250- val copybook =
251- """ 01 R.
248+ " fail when the length field has the string type and incorrect string values are encountered " in {
249+ val copybook =
250+ """ 01 R.
252251 05 LEN PIC X(1).
253252 05 FIELD1 PIC X(1).
254253 """
255254
256- val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
257- // A1
258- 0xF2 .toByte, 0xF3 .toByte, 0xC3 .toByte, 0xF4 .toByte
259- ).map(_.toByte)
255+ val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
256+ // A1
257+ 0xF2 .toByte, 0xF3 .toByte, 0xC3 .toByte, 0xF4 .toByte
258+ ).map(_.toByte)
260259
261- withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
262- val df = spark
263- .read
264- .format(" cobol" )
265- .option(" copybook_contents" , copybook)
266- .option(" record_format" , " F" )
267- .option(" record_length_field" , " LEN" )
268- .option(" pedantic" , " true" )
269- .load(tmpFileName)
260+ withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
261+ val df = spark
262+ .read
263+ .format(" cobol" )
264+ .option(" copybook_contents" , copybook)
265+ .option(" record_format" , " F" )
266+ .option(" record_length_field" , " LEN" )
267+ .option(" pedantic" , " true" )
268+ .load(tmpFileName)
270269
271- val ex = intercept[SparkException ] {
272- df.count()
273- }
270+ val ex = intercept[SparkException ] {
271+ df.count()
272+ }
274273
275- assert(ex.getCause.getMessage.contains(" Record length value of the field LEN must be an integral type, encountered: 'C'" ))
274+ assert(ex.getCause.getMessage.contains(" Record length value of the field LEN must be an integral type, encountered: 'C'" ))
275+ }
276276 }
277277 }
278-
279278}
0 commit comments