@@ -6,6 +6,7 @@ import kotlinx.datetime.toKotlinLocalDateTime
6
6
import org.apache.poi.hssf.usermodel.HSSFWorkbook
7
7
import org.apache.poi.ss.usermodel.Cell
8
8
import org.apache.poi.ss.usermodel.CellType
9
+ import org.apache.poi.ss.usermodel.DataFormatter
9
10
import org.apache.poi.ss.usermodel.DateUtil
10
11
import org.apache.poi.ss.usermodel.RichTextString
11
12
import org.apache.poi.ss.usermodel.Row
@@ -83,6 +84,8 @@ private fun setWorkbookTempDirectory() {
83
84
/* *
84
85
* @param sheetName sheet to read. By default, the first sheet in the document
85
86
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
87
+ * @param stringColumns range of columns to read as String regardless of a cell type.
88
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
86
89
* @param skipRows number of rows before header
87
90
* @param rowsCount number of rows to read.
88
91
* @param nameRepairStrategy handling of column names.
@@ -93,17 +96,22 @@ public fun DataFrame.Companion.readExcel(
93
96
sheetName : String? = null,
94
97
skipRows : Int = 0,
95
98
columns : String? = null,
99
+ stringColumns : StringColumns ? = null,
96
100
rowsCount : Int? = null,
97
101
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
98
102
): AnyFrame {
99
103
setWorkbookTempDirectory()
100
104
val wb = WorkbookFactory .create(url.openStream())
101
- return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
105
+ return wb.use {
106
+ readExcel(wb, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
107
+ }
102
108
}
103
109
104
110
/* *
105
111
* @param sheetName sheet to read. By default, the first sheet in the document
106
112
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
113
+ * @param stringColumns range of columns to read as String regardless of a cell type.
114
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
107
115
* @param skipRows number of rows before header
108
116
* @param rowsCount number of rows to read.
109
117
* @param nameRepairStrategy handling of column names.
@@ -114,17 +122,22 @@ public fun DataFrame.Companion.readExcel(
114
122
sheetName : String? = null,
115
123
skipRows : Int = 0,
116
124
columns : String? = null,
125
+ stringColumns : StringColumns ? = null,
117
126
rowsCount : Int? = null,
118
127
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
119
128
): AnyFrame {
120
129
setWorkbookTempDirectory()
121
130
val wb = WorkbookFactory .create(file)
122
- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
131
+ return wb.use {
132
+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
133
+ }
123
134
}
124
135
125
136
/* *
126
137
* @param sheetName sheet to read. By default, the first sheet in the document
127
138
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
139
+ * @param stringColumns range of columns to read as String regardless of a cell type.
140
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
128
141
* @param skipRows number of rows before header
129
142
* @param rowsCount number of rows to read.
130
143
* @param nameRepairStrategy handling of column names.
@@ -135,13 +148,17 @@ public fun DataFrame.Companion.readExcel(
135
148
sheetName : String? = null,
136
149
skipRows : Int = 0,
137
150
columns : String? = null,
151
+ stringColumns : StringColumns ? = null,
138
152
rowsCount : Int? = null,
139
153
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
140
- ): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount, nameRepairStrategy)
154
+ ): AnyFrame =
155
+ readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, stringColumns, rowsCount, nameRepairStrategy)
141
156
142
157
/* *
143
158
* @param sheetName sheet to read. By default, the first sheet in the document
144
159
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
160
+ * @param stringColumns range of columns to read as String regardless of a cell type.
161
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
145
162
* @param skipRows number of rows before header
146
163
* @param rowsCount number of rows to read.
147
164
* @param nameRepairStrategy handling of column names.
@@ -152,17 +169,23 @@ public fun DataFrame.Companion.readExcel(
152
169
sheetName : String? = null,
153
170
skipRows : Int = 0,
154
171
columns : String? = null,
172
+ stringColumns : StringColumns ? = null,
155
173
rowsCount : Int? = null,
156
174
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
157
175
): AnyFrame {
158
176
setWorkbookTempDirectory()
159
177
val wb = WorkbookFactory .create(inputStream)
160
- return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount, nameRepairStrategy) }
178
+ return wb.use {
179
+ readExcel(it, sheetName, skipRows, columns, stringColumns?.toFormattingOptions(), rowsCount, nameRepairStrategy)
180
+ }
161
181
}
162
182
163
183
/* *
164
184
* @param sheetName sheet to read. By default, the first sheet in the document
165
185
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
186
+ * @param formattingOptions range of columns to read as String regardless of a cell type.
187
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
188
+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
166
189
* @param skipRows number of rows before header
167
190
* @param rowsCount number of rows to read.
168
191
* @param nameRepairStrategy handling of column names.
@@ -173,18 +196,37 @@ public fun DataFrame.Companion.readExcel(
173
196
sheetName : String? = null,
174
197
skipRows : Int = 0,
175
198
columns : String? = null,
199
+ formattingOptions : FormattingOptions ? = null,
176
200
rowsCount : Int? = null,
177
201
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
178
202
): AnyFrame {
179
203
val sheet: Sheet = sheetName
180
204
?.let { wb.getSheet(it) ? : error(" Sheet with name $sheetName not found" ) }
181
205
? : wb.getSheetAt(0 )
182
- return readExcel(sheet, columns, skipRows, rowsCount, nameRepairStrategy)
206
+ return readExcel(sheet, columns, formattingOptions, skipRows, rowsCount, nameRepairStrategy)
207
+ }
208
+
209
+ /* *
210
+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
211
+ */
212
+ public class StringColumns (public val range : String )
213
+
214
+ public fun StringColumns.toFormattingOptions (formatter : DataFormatter = DataFormatter ()): FormattingOptions =
215
+ FormattingOptions (range, formatter)
216
+
217
+ /* *
218
+ * @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
219
+ */
220
+ public class FormattingOptions (range : String , public val formatter : DataFormatter = DataFormatter ()) {
221
+ public val columnIndices: Set <Int > = getColumnIndices(range).toSet()
183
222
}
184
223
185
224
/* *
186
225
* @param sheet sheet to read.
187
226
* @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
227
+ * @param formattingOptions range of columns to read as String regardless of a cell type.
228
+ * For example, by default numeric cell with value "3" will be parsed as Double with value being 3.0. With this option, it will be simply "3"
229
+ * See also [FormattingOptions.formatter] and [DataFormatter.formatCellValue].
188
230
* @param skipRows number of rows before header
189
231
* @param rowsCount number of rows to read.
190
232
* @param nameRepairStrategy handling of column names.
@@ -193,19 +235,13 @@ public fun DataFrame.Companion.readExcel(
193
235
public fun DataFrame.Companion.readExcel (
194
236
sheet : Sheet ,
195
237
columns : String? = null,
238
+ formattingOptions : FormattingOptions ? = null,
196
239
skipRows : Int = 0,
197
240
rowsCount : Int? = null,
198
241
nameRepairStrategy : NameRepairStrategy = NameRepairStrategy .CHECK_UNIQUE ,
199
242
): AnyFrame {
200
243
val columnIndexes: Iterable <Int > = if (columns != null ) {
201
- columns.split(" ," ).flatMap {
202
- if (it.contains(" :" )) {
203
- val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
204
- start.. end
205
- } else {
206
- listOf (CellReference .convertColStringToIndex(it))
207
- }
208
- }
244
+ getColumnIndices(columns)
209
245
} else {
210
246
val headerRow = checkNotNull(sheet.getRow(skipRows)) {
211
247
" Row number ${skipRows + 1 } (1-based index) is not defined on the sheet ${sheet.sheetName} "
@@ -235,17 +271,32 @@ public fun DataFrame.Companion.readExcel(
235
271
val name = repairNameIfRequired(nameFromCell, columnNameCounters, nameRepairStrategy)
236
272
columnNameCounters[nameFromCell] =
237
273
columnNameCounters.getOrDefault(nameFromCell, 0 ) + 1 // increase the counter for specific column name
274
+ val getCellValue: (Cell ? ) -> Any? = when {
275
+ formattingOptions != null && index in formattingOptions.columnIndices -> { cell: Cell ? ->
276
+ formattingOptions.formatter.formatCellValue(cell)
277
+ }
238
278
279
+ else -> { cell -> cell.cellValue(sheet.sheetName) }
280
+ }
239
281
val values: List <Any ?> = valueRowsRange.map {
240
282
val row: Row ? = sheet.getRow(it)
241
283
val cell: Cell ? = row?.getCell(index)
242
- cell.cellValue(sheet.sheetName )
284
+ getCellValue(cell )
243
285
}
244
286
DataColumn .createWithTypeInference(name, values)
245
287
}
246
288
return dataFrameOf(columns)
247
289
}
248
290
291
+ private fun getColumnIndices (columns : String ): List <Int > = columns.split(" ," ).flatMap {
292
+ if (it.contains(" :" )) {
293
+ val (start, end) = it.split(" :" ).map { CellReference .convertColStringToIndex(it) }
294
+ start.. end
295
+ } else {
296
+ listOf (CellReference .convertColStringToIndex(it))
297
+ }
298
+ }
299
+
249
300
/* *
250
301
* This is a universal function for name repairing
251
302
* and should be moved to the API module later,
@@ -324,7 +375,7 @@ public fun <T> DataFrame<T>.writeExcel(
324
375
keepFile : Boolean = false,
325
376
) {
326
377
val factory =
327
- if (keepFile){
378
+ if (keepFile) {
328
379
when (workBookType) {
329
380
WorkBookType .XLS -> HSSFWorkbook (file.inputStream())
330
381
WorkBookType .XLSX -> XSSFWorkbook (file.inputStream())
0 commit comments