Skip to content

Commit 084fb7f

Browse files
committed
#710 Fix index generation for files with record length fields or expressions.
1 parent 39ddbd5 commit 084fb7f

File tree

3 files changed

+403
-6
lines changed

3 files changed

+403
-6
lines changed

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@
1717
package za.co.absa.cobrix.cobol.reader
1818

1919
import za.co.absa.cobrix.cobol.internal.Logging
20+
import za.co.absa.cobrix.cobol.parser.Copybook
2021
import za.co.absa.cobrix.cobol.parser.common.Constants
21-
import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage
22-
import za.co.absa.cobrix.cobol.parser.encoding.{ASCII, EBCDIC}
2322
import za.co.absa.cobrix.cobol.parser.headerparsers.{RecordHeaderParser, RecordHeaderParserFactory}
24-
import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat.{FixedBlock, VariableBlock}
25-
import za.co.absa.cobrix.cobol.parser.{Copybook, CopybookParser}
23+
import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat.{FixedBlock, FixedLength, VariableBlock}
2624
import za.co.absa.cobrix.cobol.reader.extractors.raw._
2725
import za.co.absa.cobrix.cobol.reader.extractors.record.RecordHandler
2826
import za.co.absa.cobrix.cobol.reader.index.IndexGenerator
@@ -34,8 +32,6 @@ import za.co.absa.cobrix.cobol.reader.schema.CobolSchema
3432
import za.co.absa.cobrix.cobol.reader.stream.SimpleStream
3533
import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator
3634

37-
import java.nio.charset.{Charset, StandardCharsets}
38-
import scala.collection.immutable.HashMap
3935
import scala.collection.mutable.ArrayBuffer
4036
import scala.reflect.ClassTag
4137

@@ -79,6 +75,8 @@ class VarLenNestedReader[T: ClassTag](copybookContents: Seq[String],
7975
Some(new TextRecordExtractor(reParams))
8076
case None if readerProperties.isText =>
8177
Some(new TextFullRecordExtractor(reParams))
78+
case None if readerProperties.recordFormat == FixedLength && (readerProperties.lengthFieldExpression.nonEmpty || readerProperties.lengthFieldMap.nonEmpty) =>
79+
Some(new FixedWithRecordLengthExprRawRecordExtractor(reParams, readerProperties))
8280
case None if readerProperties.recordFormat == FixedBlock =>
8381
val fbParams = FixedBlockParameters(readerProperties.recordLength, bdwOpt.get.blockLength, bdwOpt.get.recordsPerBlock)
8482
FixedBlockParameters.validate(fbParams)
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
* Copyright 2018 ABSA Group Limited
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package za.co.absa.cobrix.cobol.reader.extractors.raw
18+
19+
import org.slf4j.LoggerFactory
20+
import za.co.absa.cobrix.cobol.parser.ast.Primitive
21+
import za.co.absa.cobrix.cobol.reader.iterator.RecordLengthExpression
22+
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
23+
import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator
24+
25+
class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
26+
readerProperties: ReaderParameters) extends Serializable with RawRecordExtractor {
27+
private val log = LoggerFactory.getLogger(this.getClass)
28+
ctx.headerStream.close()
29+
30+
final private val copyBookRecordSize = ctx.copybook.getRecordSize
31+
final private val (recordLengthField, lengthFieldExpr) = ReaderParametersValidator.getEitherFieldAndExpression(readerProperties.lengthFieldExpression, readerProperties.lengthFieldMap, ctx.copybook)
32+
33+
final private val lengthField = recordLengthField.map(_.field)
34+
final private val lengthMap = recordLengthField.map(_.valueMap).getOrElse(Map.empty)
35+
final private val isLengthMapEmpty = lengthMap.isEmpty
36+
37+
type RawRecord = (String, Array[Byte])
38+
39+
private var cachedValue: Option[RawRecord] = _
40+
private var byteIndex = readerProperties.fileStartOffset
41+
42+
final private val segmentIdField = ReaderParametersValidator.getSegmentIdField(readerProperties.multisegment, ctx.copybook)
43+
final private val recordLengthAdjustment = readerProperties.rdwAdjustment
44+
45+
fetchNext()
46+
47+
override def offset: Long = cachedValue match {
48+
case Some(v) => ctx.inputStream.offset - v._2.length
49+
case None => ctx.inputStream.offset
50+
}
51+
52+
override def hasNext: Boolean = cachedValue.nonEmpty
53+
54+
@throws[NoSuchElementException]
55+
override def next(): Array[Byte] = {
56+
cachedValue match {
57+
case None => throw new NoSuchElementException
58+
case Some(value) =>
59+
fetchNext()
60+
value._2
61+
}
62+
}
63+
64+
private def fetchNext(): Unit = {
65+
var recordFetched = false
66+
while (!recordFetched) {
67+
val binaryData = if (lengthField.nonEmpty) {
68+
fetchRecordUsingRecordLengthField()
69+
} else {
70+
fetchRecordUsingRecordLengthFieldExpression(lengthFieldExpr.get)
71+
}
72+
73+
binaryData match {
74+
case None =>
75+
cachedValue = None
76+
recordFetched = true
77+
case Some(data) if data.length < readerProperties.minimumRecordLength || data.length > readerProperties.maximumRecordLength =>
78+
recordFetched = false
79+
case Some(data) =>
80+
val segmentId = getSegmentId(data)
81+
val segmentIdStr = segmentId.getOrElse("")
82+
83+
cachedValue = Some(segmentIdStr, data)
84+
recordFetched = true
85+
}
86+
}
87+
}
88+
89+
private def fetchRecordUsingRecordLengthField(): Option[Array[Byte]] = {
90+
if (lengthField.isEmpty) {
91+
throw new IllegalStateException(s"For variable length reader either RDW record headers or record length field should be provided.")
92+
}
93+
94+
val lengthFieldBlock = lengthField.get.binaryProperties.offset + lengthField.get.binaryProperties.actualSize
95+
96+
val binaryDataStart = ctx.inputStream.next(readerProperties.startOffset + lengthFieldBlock)
97+
98+
byteIndex += readerProperties.startOffset + lengthFieldBlock
99+
100+
if (binaryDataStart.length < readerProperties.startOffset + lengthFieldBlock) {
101+
return None
102+
}
103+
104+
val recordLength = lengthField match {
105+
case Some(lengthAST) => getRecordLengthFromField(lengthAST, binaryDataStart)
106+
case None => copyBookRecordSize
107+
}
108+
109+
val restOfDataLength = recordLength - lengthFieldBlock + readerProperties.endOffset
110+
111+
byteIndex += restOfDataLength
112+
113+
if (restOfDataLength > 0) {
114+
Some(binaryDataStart ++ ctx.inputStream.next(restOfDataLength))
115+
} else {
116+
Some(binaryDataStart)
117+
}
118+
}
119+
120+
final private def getRecordLengthFromField(lengthAST: Primitive, binaryDataStart: Array[Byte]): Int = {
121+
val length = if (isLengthMapEmpty) {
122+
ctx.copybook.extractPrimitiveField(lengthAST, binaryDataStart, readerProperties.startOffset) match {
123+
case i: Int => i
124+
case l: Long => l.toInt
125+
case s: String => s.toInt
126+
case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")
127+
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
128+
}
129+
} else {
130+
ctx.copybook.extractPrimitiveField(lengthAST, binaryDataStart, readerProperties.startOffset) match {
131+
case i: Int => getRecordLengthFromMapping(i.toString)
132+
case l: Long => getRecordLengthFromMapping(l.toString)
133+
case s: String => getRecordLengthFromMapping(s)
134+
case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")
135+
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
136+
}
137+
}
138+
length + recordLengthAdjustment
139+
}
140+
141+
final private def getRecordLengthFromMapping(v: String): Int = {
142+
lengthMap.get(v) match {
143+
case Some(len) => len
144+
case None => throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.")
145+
}
146+
}
147+
148+
final private def getBytesAsHexString(bytes: Array[Byte]): String = {
149+
bytes.map("%02X" format _).mkString
150+
}
151+
152+
private def fetchRecordUsingRecordLengthFieldExpression(expr: RecordLengthExpression): Option[Array[Byte]] = {
153+
val lengthFieldBlock = expr.requiredBytesToread
154+
val evaluator = expr.evaluator
155+
156+
val binaryDataStart = ctx.inputStream.next(readerProperties.startOffset + lengthFieldBlock)
157+
158+
byteIndex += readerProperties.startOffset + lengthFieldBlock
159+
160+
if (binaryDataStart.length < readerProperties.startOffset + lengthFieldBlock) {
161+
return None
162+
}
163+
164+
expr.fields.foreach{
165+
case (name, field) =>
166+
val obj = ctx.copybook.extractPrimitiveField(field, binaryDataStart, readerProperties.startOffset)
167+
try {
168+
obj match {
169+
case i: Int => evaluator.setValue(name, i)
170+
case l: Long => evaluator.setValue(name, l.toInt)
171+
case s: String => evaluator.setValue(name, s.toInt)
172+
case _ => throw new IllegalStateException(s"Record length value of the field ${field.name} must be an integral type.")
173+
}
174+
} catch {
175+
case ex: NumberFormatException =>
176+
throw new IllegalStateException(s"Encountered an invalid value of the record length field. Cannot parse '$obj' as an integer in: ${field.name} = '$obj'.", ex)
177+
}
178+
}
179+
180+
val recordLength = evaluator.eval()
181+
182+
val restOfDataLength = recordLength - lengthFieldBlock + readerProperties.endOffset
183+
184+
byteIndex += restOfDataLength
185+
186+
if (restOfDataLength > 0) {
187+
Some(binaryDataStart ++ ctx.inputStream.next(restOfDataLength))
188+
} else {
189+
Some(binaryDataStart)
190+
}
191+
}
192+
193+
private def getSegmentId(data: Array[Byte]): Option[String] = {
194+
segmentIdField.map(field => {
195+
val fieldValue = ctx.copybook.extractPrimitiveField(field, data, readerProperties.startOffset)
196+
if (fieldValue == null) {
197+
log.error(s"An unexpected null encountered for segment id at $byteIndex")
198+
""
199+
} else {
200+
fieldValue.toString.trim
201+
}
202+
})
203+
}
204+
}

0 commit comments

Comments
 (0)