diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 32e0b72af60..8ba64d19217 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -213,6 +213,7 @@ r/README.md r/README.Rmd r/man/*.Rd r/cran-comments.md +r/vignettes/*.Rmd .gitattributes ruby/red-arrow/.yardopts rust/arrow/test/data/*.csv diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 7f695e23992..7b68377d41f 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -36,10 +36,12 @@ Imports: utils Roxygen: list(markdown = TRUE) RoxygenNote: 6.1.1 +VignetteBuilder: knitr Suggests: covr, fs, hms, + knitr, lubridate, rmarkdown, testthat, @@ -49,33 +51,33 @@ Collate: 'enums.R' 'arrow-package.R' 'type.R' - 'ArrayData.R' - 'ChunkedArray.R' - 'Column.R' - 'Field.R' - 'List.R' - 'RecordBatch.R' - 'RecordBatchReader.R' - 'RecordBatchWriter.R' - 'Schema.R' - 'Struct.R' - 'Table.R' + 'array-data.R' 'array.R' 'arrowExports.R' 'buffer.R' + 'chunked-array.R' 'io.R' 'compression.R' 'compute.R' 'csv.R' 'dictionary.R' 'feather.R' + 'field.R' 'install-arrow.R' 'json.R' - 'memory_pool.R' + 'list.R' + 'memory-pool.R' 'message.R' 'parquet.R' - 'read_record_batch.R' - 'read_table.R' + 'read-record-batch.R' + 'read-table.R' + 'record-batch-reader.R' + 'record-batch-writer.R' + 'record-batch.R' 'reexports-bit64.R' 'reexports-tidyselect.R' - 'write_arrow.R' + 'schema.R' + 'struct.R' + 'table.R' + 'util.R' + 'write-arrow.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 57445482c96..936d9170a2d 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,114 +1,74 @@ # Generated by roxygen2: do not edit by hand -S3method("!=","arrow::Object") -S3method("==","arrow::Array") -S3method("==","arrow::DataType") -S3method("==","arrow::Field") -S3method("==","arrow::RecordBatch") -S3method("==","arrow::Schema") -S3method("==","arrow::ipc::Message") -S3method(BufferReader,"arrow::Buffer") -S3method(BufferReader,default) -S3method(CompressedInputStream,"arrow::io::InputStream") -S3method(CompressedInputStream,character) -S3method(CompressedOutputStream,"arrow::io::OutputStream") -S3method(CompressedOutputStream,character) -S3method(FeatherTableReader,"arrow::io::RandomAccessFile") -S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") -S3method(FeatherTableReader,character) -S3method(FeatherTableReader,raw) -S3method(FeatherTableWriter,"arrow::io::OutputStream") -S3method(FixedSizeBufferWriter,"arrow::Buffer") -S3method(FixedSizeBufferWriter,default) -S3method(MessageReader,"arrow::io::InputStream") -S3method(MessageReader,default) -S3method(RecordBatchFileReader,"arrow::Buffer") -S3method(RecordBatchFileReader,"arrow::io::RandomAccessFile") -S3method(RecordBatchFileReader,character) -S3method(RecordBatchFileReader,raw) -S3method(RecordBatchFileWriter,"arrow::io::OutputStream") -S3method(RecordBatchFileWriter,character) -S3method(RecordBatchStreamReader,"arrow::Buffer") -S3method(RecordBatchStreamReader,"arrow::io::InputStream") -S3method(RecordBatchStreamReader,raw) -S3method(RecordBatchStreamWriter,"arrow::io::OutputStream") -S3method(RecordBatchStreamWriter,character) -S3method(as.data.frame,"arrow::RecordBatch") -S3method(as.data.frame,"arrow::Table") -S3method(as.raw,"arrow::Buffer") -S3method(buffer,"arrow::Buffer") -S3method(buffer,complex) -S3method(buffer,default) -S3method(buffer,integer) -S3method(buffer,numeric) -S3method(buffer,raw) -S3method(csv_table_reader,"arrow::csv::TableReader") -S3method(csv_table_reader,"arrow::io::InputStream") -S3method(csv_table_reader,character) -S3method(csv_table_reader,default) -S3method(dim,"arrow::RecordBatch") -S3method(dim,"arrow::Table") -S3method(json_table_reader,"arrow::io::InputStream") -S3method(json_table_reader,"arrow::json::TableReader") -S3method(json_table_reader,character) -S3method(json_table_reader,default) -S3method(length,"arrow::Array") -S3method(names,"arrow::RecordBatch") -S3method(parquet_file_reader,"arrow::io::RandomAccessFile") -S3method(parquet_file_reader,character) -S3method(parquet_file_reader,raw) +S3method("!=",Object) +S3method("==",Array) +S3method("==",DataType) +S3method("==",Field) +S3method("==",Message) +S3method("==",RecordBatch) +S3method("==",Schema) +S3method(as.data.frame,RecordBatch) +S3method(as.data.frame,Table) +S3method(as.raw,Buffer) +S3method(dim,RecordBatch) +S3method(dim,Table) +S3method(length,Array) +S3method(names,RecordBatch) S3method(print,"arrow-enum") -S3method(read_message,"arrow::io::InputStream") -S3method(read_message,"arrow::ipc::MessageReader") +S3method(read_message,InputStream) +S3method(read_message,MessageReader) S3method(read_message,default) -S3method(read_record_batch,"arrow::Buffer") -S3method(read_record_batch,"arrow::io::InputStream") -S3method(read_record_batch,"arrow::ipc::Message") +S3method(read_record_batch,Buffer) +S3method(read_record_batch,InputStream) +S3method(read_record_batch,Message) S3method(read_record_batch,raw) -S3method(read_schema,"arrow::Buffer") -S3method(read_schema,"arrow::io::InputStream") -S3method(read_schema,"arrow::ipc::Message") +S3method(read_schema,Buffer) +S3method(read_schema,InputStream) +S3method(read_schema,Message) S3method(read_schema,raw) -S3method(read_table,"arrow::ipc::RecordBatchFileReader") -S3method(read_table,"arrow::ipc::RecordBatchStreamReader") +S3method(read_table,RecordBatchFileReader) +S3method(read_table,RecordBatchStreamReader) S3method(read_table,character) S3method(read_table,raw) -S3method(type,"arrow::Array") -S3method(type,"arrow::ChunkedArray") -S3method(type,"arrow::Column") +S3method(type,Array) +S3method(type,ChunkedArray) +S3method(type,Column) S3method(type,default) -S3method(write_arrow,"arrow::ipc::RecordBatchWriter") +S3method(write_arrow,RecordBatchWriter) S3method(write_arrow,character) S3method(write_arrow,raw) -S3method(write_feather,"arrow::RecordBatch") -S3method(write_feather,data.frame) -S3method(write_feather,default) -S3method(write_feather_RecordBatch,"arrow::io::OutputStream") -S3method(write_feather_RecordBatch,character) -S3method(write_feather_RecordBatch,default) +export(Array) +export(Buffer) export(BufferOutputStream) export(BufferReader) +export(ChunkedArray) export(CompressedInputStream) export(CompressedOutputStream) export(CompressionType) export(DateUnit) export(FeatherTableReader) export(FeatherTableWriter) +export(Field) export(FileMode) export(FileOutputStream) export(FixedSizeBufferWriter) +export(MemoryMappedFile) export(MessageReader) export(MessageType) export(MockOutputStream) +export(ParquetFileReader) +export(ParquetReaderProperties) +export(RandomAccessFile) export(ReadableFile) export(RecordBatchFileReader) export(RecordBatchFileWriter) export(RecordBatchStreamReader) export(RecordBatchStreamWriter) +export(Schema) export(StatusCode) +export(Table) export(TimeUnit) export(Type) -export(array) export(arrow_available) export(bool) export(boolean) @@ -150,8 +110,6 @@ export(mmap_open) export(null) export(num_range) export(one_of) -export(parquet_arrow_reader_properties) -export(parquet_file_reader) export(read_arrow) export(read_csv_arrow) export(read_delim_arrow) @@ -168,7 +126,6 @@ export(schema) export(starts_with) export(string) export(struct) -export(table) export(time32) export(time64) export(timestamp) @@ -180,7 +137,6 @@ export(uint8) export(utf8) export(write_arrow) export(write_feather) -export(write_feather_RecordBatch) export(write_parquet) importFrom(R6,R6Class) importFrom(Rcpp,sourceCpp) diff --git a/r/R/RecordBatchReader.R b/r/R/RecordBatchReader.R deleted file mode 100644 index 6593b0bb0e6..00000000000 --- a/r/R/RecordBatchReader.R +++ /dev/null @@ -1,138 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' @include arrow-package.R - -#' @title class arrow::RecordBatchReader -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__RecordBatchReader -#' @name arrow__RecordBatchReader -`arrow::RecordBatchReader` <- R6Class("arrow::RecordBatchReader", inherit = `arrow::Object`, - public = list( - read_next_batch = function() { - shared_ptr(`arrow::RecordBatch`, RecordBatchReader__ReadNext(self)) - } - ), - active = list( - schema = function() shared_ptr(`arrow::Schema`, RecordBatchReader__schema(self)) - ) -) - -#' @title class arrow::ipc::RecordBatchStreamReader -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__ipc__RecordBatchStreamReader -#' @name arrow__ipc__RecordBatchStreamReader -`arrow::ipc::RecordBatchStreamReader` <- R6Class("arrow::ipc::RecordBatchStreamReader", inherit = `arrow::RecordBatchReader`, - public = list( - batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) - ) -) - -#' @title class arrow::ipc::RecordBatchFileReader -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__ipc__RecordBatchFileReader -#' @name arrow__ipc__RecordBatchFileReader -`arrow::ipc::RecordBatchFileReader` <- R6Class("arrow::ipc::RecordBatchFileReader", inherit = `arrow::Object`, - public = list( - get_batch = function(i) shared_ptr(`arrow::RecordBatch`, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), - - batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = `arrow::RecordBatch`) - ), - active = list( - num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self), - schema = function() shared_ptr(`arrow::Schema`, ipc___RecordBatchFileReader__schema(self)) - ) -) - -#' Create a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] from an input stream -#' -#' @param stream input stream, an [arrow::io::InputStream][arrow__io__InputStream] or a raw vector -#' -#' @export -RecordBatchStreamReader <- function(stream){ - UseMethod("RecordBatchStreamReader") -} - -#' @export -`RecordBatchStreamReader.arrow::io::InputStream` <- function(stream) { - shared_ptr(`arrow::ipc::RecordBatchStreamReader`, ipc___RecordBatchStreamReader__Open(stream)) -} - -#' @export -`RecordBatchStreamReader.raw` <- function(stream) { - RecordBatchStreamReader(BufferReader(stream)) -} - -#' @export -`RecordBatchStreamReader.arrow::Buffer` <- function(stream) { - RecordBatchStreamReader(BufferReader(stream)) -} - - -#' Create an [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] from a file -#' -#' @param file The file to read from. A file path, or an [arrow::io::RandomAccessFile][arrow__ipc__RecordBatchFileReader] -#' -#' @export -RecordBatchFileReader <- function(file) { - UseMethod("RecordBatchFileReader") -} - -#' @export -`RecordBatchFileReader.arrow::io::RandomAccessFile` <- function(file) { - shared_ptr(`arrow::ipc::RecordBatchFileReader`, ipc___RecordBatchFileReader__Open(file)) -} - -#' @export -`RecordBatchFileReader.character` <- function(file) { - assert_that(length(file) == 1L) - RecordBatchFileReader(ReadableFile(file)) -} - -#' @export -`RecordBatchFileReader.arrow::Buffer` <- function(file) { - RecordBatchFileReader(BufferReader(file)) -} - -#' @export -`RecordBatchFileReader.raw` <- function(file) { - RecordBatchFileReader(BufferReader(file)) -} diff --git a/r/R/RecordBatchWriter.R b/r/R/RecordBatchWriter.R deleted file mode 100644 index 7185dc2a9a5..00000000000 --- a/r/R/RecordBatchWriter.R +++ /dev/null @@ -1,178 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' @include arrow-package.R - -#' @title class arrow::ipc::RecordBatchWriter -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' - `$write_batch(batch)`: Write record batch to stream -#' - `$write_table(table)`: write Table to stream -#' - `$close()`: close stream -#' -#' @section Derived classes: -#' -#' - [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] implements the streaming binary format -#' - [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] implements the binary file format -#' -#' @rdname arrow__ipc__RecordBatchWriter -#' @name arrow__ipc__RecordBatchWriter -`arrow::ipc::RecordBatchWriter` <- R6Class("arrow::ipc::RecordBatchWriter", inherit = `arrow::Object`, - public = list( - write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), - write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), - - write = function(x) { - if (inherits(x, "arrow::RecordBatch")) { - self$write_batch(x) - } else if (inherits(x, "arrow::Table")) { - self$write_table(x) - } else if (inherits(x, "data.frame")) { - self$write_table(table(x)) - } else { - abort("unexpected type for RecordBatchWriter$write(), must be an arrow::RecordBatch or an arrow::Table") - } - }, - - close = function() ipc___RecordBatchWriter__Close(self) - ) -) - -#' @title class arrow::ipc::RecordBatchStreamWriter -#' -#' Writer for the Arrow streaming binary format -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section usage: -#' -#' ``` -#' writer <- RecordBatchStreamWriter(sink, schema) -#' -#' writer$write_batch(batch) -#' writer$write_table(table) -#' writer$close() -#' ``` -#' -#' @section Factory: -#' -#' The [RecordBatchStreamWriter()] function creates a record batch stream writer. -#' -#' @section Methods: -#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] -#' -#' - `$write_batch(batch)`: Write record batch to stream -#' - `$write_table(table)`: write Table to stream -#' - `$close()`: close stream -#' -#' @rdname arrow__ipc__RecordBatchStreamWriter -#' @name arrow__ipc__RecordBatchStreamWriter -`arrow::ipc::RecordBatchStreamWriter` <- R6Class("arrow::ipc::RecordBatchStreamWriter", inherit = `arrow::ipc::RecordBatchWriter`) - -#' Writer for the Arrow streaming binary format -#' -#' @param sink Where to write. Can either be: -#' -#' - A string file path -#' - [arrow::io::OutputStream][arrow__io__OutputStream] -#' -#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. -#' -#' @return a [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] -#' -#' @export -RecordBatchStreamWriter <- function(sink, schema) { - UseMethod("RecordBatchStreamWriter") -} - -#' @export -RecordBatchStreamWriter.character <- function(sink, schema){ - RecordBatchStreamWriter(FileOutputStream(sink), schema) -} - -#' @export -`RecordBatchStreamWriter.arrow::io::OutputStream` <- function(sink, schema){ - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::ipc::RecordBatchStreamWriter`, ipc___RecordBatchStreamWriter__Open(sink, schema)) -} - -#' @title class arrow::ipc::RecordBatchFileWriter -#' -#' Writer for the Arrow binary file format -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section usage: -#' -#' ``` -#' writer <- RecordBatchFileWriter(sink, schema) -#' -#' writer$write_batch(batch) -#' writer$write_table(table) -#' writer$close() -#' ``` -#' -#' @section Factory: -#' -#' The [RecordBatchFileWriter()] function creates a record batch stream writer. -#' -#' @section Methods: -#' inherited from [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter] -#' -#' - `$write_batch(batch)`: Write record batch to stream -#' - `$write_table(table)`: write Table to stream -#' - `$close()`: close stream -#' -#' @rdname arrow__ipc__RecordBatchFileWriter -#' @name arrow__ipc__RecordBatchFileWriter -`arrow::ipc::RecordBatchFileWriter` <- R6Class("arrow::ipc::RecordBatchFileWriter", inherit = `arrow::ipc::RecordBatchStreamWriter`) - -#' Create a record batch file writer from a stream -#' -#' @param sink Where to write. Can either be: -#' -#' - a string file path -#' - [arrow::io::OutputStream][arrow__io__OutputStream] -#' -#' @param schema The [arrow::Schema][arrow__Schema] for data to be written. -#' -#' @return an `arrow::ipc::RecordBatchWriter` object -#' -#' @export -RecordBatchFileWriter <- function(sink, schema) { - UseMethod("RecordBatchFileWriter") -} - -#' @export -RecordBatchFileWriter.character <- function(sink, schema){ - RecordBatchFileWriter(FileOutputStream(sink), schema) -} - -#' @export -`RecordBatchFileWriter.arrow::io::OutputStream` <- function(sink, schema){ - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::ipc::RecordBatchFileWriter`, ipc___RecordBatchFileWriter__Open(sink, schema)) -} diff --git a/r/R/ArrayData.R b/r/R/array-data.R similarity index 78% rename from r/R/ArrayData.R rename to r/R/array-data.R index d9f307bf540..a141ba758c3 100644 --- a/r/R/ArrayData.R +++ b/r/R/array-data.R @@ -15,18 +15,17 @@ # specific language governing permissions and limitations # under the License. -#' @include type.R - -#' @title class arrow::ArrayData -#' +#' @title ArrayData class #' @usage NULL #' @format NULL #' @docType class +#' @description The `ArrayData` class allows you to get and inspect the data +#' inside an `arrow::Array`. #' #' @section Usage: #' #' ``` -#' data <- array(x)$data() +#' data <- Array$create(x)$data() #' #' data$type() #' data$length() @@ -39,15 +38,16 @@ #' #' ... #' -#' @rdname arrow__ArrayData -#' @name arrow__ArrayData -`arrow::ArrayData` <- R6Class("arrow::ArrayData", - inherit = `arrow::Object`, +#' @rdname ArrayData +#' @name ArrayData +#' @include type.R +ArrayData <- R6Class("ArrayData", + inherit = Object, active = list( - type = function() `arrow::DataType`$dispatch(ArrayData__get_type(self)), + type = function() DataType$create(ArrayData__get_type(self)), length = function() ArrayData__get_length(self), null_count = function() ArrayData__get_null_count(self), offset = function() ArrayData__get_offset(self), - buffers = function() map(ArrayData__buffers(self), shared_ptr, class = `arrow::Buffer`) + buffers = function() map(ArrayData__buffers(self), shared_ptr, class = Buffer) ) ) diff --git a/r/R/array.R b/r/R/array.R index fd7c6ef7c8d..ac5474cf0a6 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -17,32 +17,24 @@ #' @include arrow-package.R -#' @title class arrow::Array -#' -#' Array base type. Immutable data array with some logical type and some length. -#' +#' @title Array class +#' @description Array base type. Immutable data array with some logical type +#' and some length. #' @usage NULL #' @format NULL #' @docType class #' +#' @section Factory: +#' The `Array$create()` factory method instantiates an `Array` and +#' takes the following arguments: +#' * `x`: an R vector, list, or `data.frame` +#' * `type`: an optional [data type][data-type] for `x`. If omitted, the type +#' will be inferred from the data. #' @section Usage: #' #' ``` -#' a <- array(x) -#' -#' a$IsNull(i) -#' a$IsValid(i) -#' a$length() or length(a) -#' a$offset() -#' a$null_count() -#' a$type() -#' a$type_id() -#' a$Equals(b) -#' a$ApproxEquals(b) -#' a$as_vector() -#' a$ToString() -#' a$Slice(offset, length = NULL) -#' a$RangeEquals(other, start_idx, end_idx, other_start_idx) +#' a <- Array$create(x) +#' length(a) #' #' print(a) #' a == a @@ -59,16 +51,17 @@ #' - `$type_id()`: type id #' - `$Equals(other)` : is this array equal to `other` #' - `$ApproxEquals(other)` : -#' - `$data()`: return the underlying [arrow::ArrayData][arrow__ArrayData] +#' - `$data()`: return the underlying [ArrayData][ArrayData] #' - `$as_vector()`: convert to an R vector #' - `$ToString()`: string representation of the array #' - `$Slice(offset, length = NULL)` : Construct a zero-copy slice of the array with the indicated offset and length. If length is `NULL`, the slice goes until the end of the array. #' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` : #' -#' @rdname arrow__Array -#' @name arrow__Array -`arrow::Array` <- R6Class("arrow::Array", - inherit = `arrow::Object`, +#' @rdname array +#' @name array +#' @export +Array <- R6Class("Array", + inherit = Object, public = list( IsNull = function(i) Array__IsNull(self, i), IsValid = function(i) Array__IsValid(self, i), @@ -76,84 +69,78 @@ type_id = function() Array__type_id(self), Equals = function(other) Array__Equals(self, other), ApproxEquals = function(other) Array__ApproxEquals(self, other), - data = function() shared_ptr(`arrow::ArrayData`, Array__data(self)), + data = function() shared_ptr(ArrayData, Array__data(self)), as_vector = function() Array__as_vector(self), ToString = function() Array__ToString(self), Slice = function(offset, length = NULL){ if (is.null(length)) { - shared_ptr(`arrow::Array`, Array__Slice1(self, offset)) + shared_ptr(Array, Array__Slice1(self, offset)) } else { - shared_ptr(`arrow::Array`, Array__Slice2(self, offset, length)) + shared_ptr(Array, Array__Slice2(self, offset, length)) } }, RangeEquals = function(other, start_idx, end_idx, other_start_idx) { - assert_that(inherits(other, "arrow::Array")) + assert_is(other, "Array") Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx) }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_type, "arrow::DataType")) - assert_that(inherits(options, "arrow::compute::CastOptions")) - `arrow::Array`$dispatch(Array__cast(self, target_type, options)) + assert_is(target_type, "DataType") + assert_is(options, "CastOptions") + Array$create(Array__cast(self, target_type, options)) } ), active = list( null_count = function() Array__null_count(self), offset = function() Array__offset(self), - type = function() `arrow::DataType`$dispatch(Array__type(self)) + type = function() DataType$create(Array__type(self)) ) ) -`arrow::DictionaryArray` <- R6Class("arrow::DictionaryArray", inherit = `arrow::Array`, +DictionaryArray <- R6Class("DictionaryArray", inherit = Array, public = list( - indices = function() `arrow::Array`$dispatch(DictionaryArray__indices(self)), - dictionary = function() `arrow::Array`$dispatch(DictionaryArray__dictionary(self)) + indices = function() Array$create(DictionaryArray__indices(self)), + dictionary = function() Array$create(DictionaryArray__dictionary(self)) ) ) -`arrow::StructArray` <- R6Class("arrow::StructArray", inherit = `arrow::Array`, +StructArray <- R6Class("StructArray", inherit = Array, public = list( - field = function(i) `arrow::Array`$dispatch(StructArray__field(self, i)), - GetFieldByName = function(name) `arrow::Array`$dispatch(StructArray__GetFieldByName(self, name)), - Flatten = function() map(StructArray__Flatten(self), ~ `arrow::Array`$dispatch(.x)) + field = function(i) Array$create(StructArray__field(self, i)), + GetFieldByName = function(name) Array$create(StructArray__GetFieldByName(self, name)), + Flatten = function() map(StructArray__Flatten(self), ~ Array$create(.x)) ) ) -`arrow::ListArray` <- R6Class("arrow::ListArray", inherit = `arrow::Array`, +ListArray <- R6Class("ListArray", inherit = Array, public = list( - values = function() `arrow::Array`$dispatch(ListArray__values(self)), + values = function() Array$create(ListArray__values(self)), value_length = function(i) ListArray__value_length(self, i), value_offset = function(i) ListArray__value_offset(self, i), raw_value_offsets = function() ListArray__raw_value_offsets(self) ), active = list( - value_type = function() `arrow::DataType`$dispatch(ListArray__value_type(self)) + value_type = function() DataType$create(ListArray__value_type(self)) ) ) -`arrow::Array`$dispatch <- function(xp){ - a <- shared_ptr(`arrow::Array`, xp) +# Add a class method +Array$create <- function(x, type = NULL) { + if (!inherits(x, "externalptr")) { + x <- Array__from_vector(x, type) + } + a <- shared_ptr(Array, x) if (a$type_id() == Type$DICTIONARY){ - a <- shared_ptr(`arrow::DictionaryArray`, xp) + a <- shared_ptr(DictionaryArray, x) } else if (a$type_id() == Type$STRUCT) { - a <- shared_ptr(`arrow::StructArray`, xp) + a <- shared_ptr(StructArray, x) } else if (a$type_id() == Type$LIST) { - a <- shared_ptr(`arrow::ListArray`, xp) + a <- shared_ptr(ListArray, x) } a } #' @export -`length.arrow::Array` <- function(x) x$length() +length.Array <- function(x) x$length() #' @export -`==.arrow::Array` <- function(x, y) x$Equals(y) - -#' create an [arrow::Array][arrow__Array] from an R vector -#' -#' @param x R object -#' @param type Explicit [type][arrow__DataType], or NULL (the default) to infer from the data -#' -#' @export -array <- function(x, type = NULL){ - `arrow::Array`$dispatch(Array__from_vector(x, type)) -} +`==.Array` <- function(x, y) x$Equals(y) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 0f0a26b0d81..00a911bbe25 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -44,7 +44,7 @@ option_use_threads <- function() { } #' @include enums.R -`arrow::Object` <- R6Class("arrow::Object", +Object <- R6Class("Object", public = list( initialize = function(xp) self$set_pointer(xp), diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 7af16a72539..d98a6b06c07 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -100,6 +100,18 @@ ListArray__raw_value_offsets <- function(array){ .Call(`_arrow_ListArray__raw_value_offsets` , array) } +Array__infer_type <- function(x){ + .Call(`_arrow_Array__infer_type` , x) +} + +Array__from_vector <- function(x, s_type){ + .Call(`_arrow_Array__from_vector` , x, s_type) +} + +ChunkedArray__from_list <- function(chunks, s_type){ + .Call(`_arrow_ChunkedArray__from_list` , chunks, s_type) +} + Array__as_vector <- function(array){ .Call(`_arrow_Array__as_vector` , array) } @@ -116,18 +128,6 @@ Table__to_dataframe <- function(table, use_threads){ .Call(`_arrow_Table__to_dataframe` , table, use_threads) } -Array__infer_type <- function(x){ - .Call(`_arrow_Array__infer_type` , x) -} - -Array__from_vector <- function(x, s_type){ - .Call(`_arrow_Array__from_vector` , x, s_type) -} - -ChunkedArray__from_list <- function(chunks, s_type){ - .Call(`_arrow_ChunkedArray__from_list` , chunks, s_type) -} - ArrayData__get_type <- function(x){ .Call(`_arrow_ArrayData__get_type` , x) } diff --git a/r/R/buffer.R b/r/R/buffer.R index 12d0699762d..d1f789175cc 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -15,15 +15,14 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R -#' @include enums.R - -#' @title class arrow::Buffer -#' +#' @title Buffer class #' @usage NULL #' @format NULL #' @docType class -#' +#' @description A Buffer is an object containing a pointer to a piece of +#' contiguous memory with a particular size. +#' @section Factory: +#' `buffer()` lets you create an `arrow::Buffer` from an R object #' @section Methods: #' #' - `$is_mutable()` : @@ -31,9 +30,12 @@ #' - `$size()` : #' - `$capacity()`: #' -#' @rdname arrow__Buffer -#' @name arrow__Buffer -`arrow::Buffer` <- R6Class("arrow::Buffer", inherit = `arrow::Object`, +#' @rdname buffer +#' @name buffer +#' @export +#' @include arrow-package.R +#' @include enums.R +Buffer <- R6Class("Buffer", inherit = Object, public = list( ZeroPadding = function() Buffer__ZeroPadding(self), data = function() Buffer__data(self) @@ -46,47 +48,20 @@ ) ) -#' @export -`as.raw.arrow::Buffer` <- function(x) x$data() - -#' Create a [arrow::Buffer][arrow__Buffer] from an R object -#' -#' @param x R object. Only raw, numeric and integer vectors are currently supported -#' -#' @return an instance of [arrow::Buffer][arrow__Buffer] that borrows memory from `x` -#' -#' @export -buffer <- function(x){ - UseMethod("buffer") -} - -#' @export -buffer.default <- function(x) { - stop("cannot convert to Buffer") -} - -#' @export -buffer.raw <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) -} - -#' @export -buffer.numeric <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) +Buffer$create <- function(x) { + if (inherits(x, "Buffer")) { + return(x) + } else if (inherits(x, c("raw", "numeric", "integer", "complex"))) { + return(shared_ptr(Buffer, r___RBuffer__initialize(x))) + } else { + stop("Cannot convert object of class ", class(x), " to arrow::Buffer") + } } +#' @param x R object. Only raw, numeric and integer vectors are currently supported +#' @return an instance of `Buffer` that borrows memory from `x` #' @export -buffer.integer <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) -} +buffer <- Buffer$create #' @export -buffer.complex <- function(x) { - shared_ptr(`arrow::Buffer`, r___RBuffer__initialize(x)) -} - -#' @export -`buffer.arrow::Buffer` <- function(x) { - x -} - +as.raw.Buffer <- function(x) x$data() diff --git a/r/R/ChunkedArray.R b/r/R/chunked-array.R similarity index 50% rename from r/R/ChunkedArray.R rename to r/R/chunked-array.R index e407a494065..5429efff4aa 100644 --- a/r/R/ChunkedArray.R +++ b/r/R/chunked-array.R @@ -17,50 +17,64 @@ #' @include arrow-package.R -#' @title class arrow::ChunkedArray -#' +#' @title ChunkedArray class #' @usage NULL #' @format NULL #' @docType class +#' @description A `ChunkedArray` is a data structure managing a list of +#' primitive Arrow [Arrays][Array] logically as one large array. +#' @section Factory: +#' The `ChunkedArray$create()` factory method instantiates the object from +#' various Arrays or R vectors. `chunked_array()` is an alias for it. #' #' @section Methods: #' -#' TODO +#' - `$length()` +#' - `$chunk(i)` +#' - `$as_vector()` +#' - `$Slice(offset, length = NULL)` +#' - `$cast(target_type, safe = TRUE, options = cast_options(safe))` +#' - `$null_count()` +#' - `$chunks()` +#' - `$num_chunks()` +#' - `$type()` #' -#' @rdname arrow__ChunkedArray -#' @name arrow__ChunkedArray -`arrow::ChunkedArray` <- R6Class("arrow::ChunkedArray", inherit = `arrow::Object`, +#' @rdname ChunkedArray +#' @name ChunkedArray +#' @seealso [Array] +#' @export +ChunkedArray <- R6Class("ChunkedArray", inherit = Object, public = list( length = function() ChunkedArray__length(self), - chunk = function(i) `arrow::Array`$dispatch(ChunkedArray__chunk(self, i)), + chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { - shared_ptr(`arrow::ChunkedArray`, ChunkArray__Slice1(self, offset)) + shared_ptr(ChunkedArray, ChunkArray__Slice1(self, offset)) } else { - shared_ptr(`arrow::ChunkedArray`, ChunkArray__Slice2(self, offset, length)) + shared_ptr(ChunkedArray, ChunkArray__Slice2(self, offset, length)) } }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_type, "arrow::DataType")) - assert_that(inherits(options, "arrow::compute::CastOptions")) - shared_ptr(`arrow::ChunkedArray`, ChunkedArray__cast(self, target_type, options)) + assert_is(target_type, "DataType") + assert_is(options, "CastOptions") + shared_ptr(ChunkedArray, ChunkedArray__cast(self, target_type, options)) } ), active = list( null_count = function() ChunkedArray__null_count(self), num_chunks = function() ChunkedArray__num_chunks(self), - chunks = function() map(ChunkedArray__chunks(self), ~ `arrow::Array`$dispatch(.x)), - type = function() `arrow::DataType`$dispatch(ChunkedArray__type(self)) + chunks = function() map(ChunkedArray__chunks(self), Array$create), + type = function() DataType$create(ChunkedArray__type(self)) ) ) -#' create an [arrow::ChunkedArray][arrow__ChunkedArray] from various R vectors -#' +ChunkedArray$create <- function(..., type = NULL) { + shared_ptr(ChunkedArray, ChunkedArray__from_list(list2(...), type)) +} + #' @param \dots Vectors to coerce #' @param type currently ignored -#' +#' @rdname ChunkedArray #' @export -chunked_array <- function(..., type = NULL){ - shared_ptr(`arrow::ChunkedArray`, ChunkedArray__from_list(list2(...), type)) -} +chunked_array <- ChunkedArray$create diff --git a/r/R/compression.R b/r/R/compression.R index 6e56a76bbf9..a58defe640a 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -19,10 +19,7 @@ #' @include arrow-package.R #' @include io.R -`arrow::util::Codec` <- R6Class("arrow::util::Codec", inherit = `arrow::Object`) - -`arrow::io::CompressedOutputStream` <- R6Class("arrow::io::CompressedOutputStream", inherit = `arrow::io::OutputStream`) -`arrow::io::CompressedInputStream` <- R6Class("arrow::io::CompressedInputStream", inherit = `arrow::io::InputStream`) +Codec <- R6Class("Codec", inherit = Object) #' codec #' @@ -31,50 +28,57 @@ #' @export compression_codec <- function(type = "GZIP") { type <- CompressionType[[match.arg(type, names(CompressionType))]] - unique_ptr(`arrow::util::Codec`, util___Codec__Create(type)) + unique_ptr(Codec, util___Codec__Create(type)) } - -#' Compressed output stream +#' @title Compressed stream classes +#' @rdname compression +#' @name compression +#' @aliases CompressedInputStream CompressedOutputStream +#' @docType class +#' @usage NULL +#' @format NULL +#' @description `CompressedInputStream` and `CompressedOutputStream` +#' allow you to apply a [compression_codec()] to an +#' input or output stream. #' -#' @details This function is not supported in Windows. +#' @section Factory: #' -#' @param stream Underlying raw output stream -#' @param codec a codec -#' @export -CompressedOutputStream <- function(stream, codec = compression_codec("GZIP")){ - if (.Platform$OS.type == "windows") stop("'CompressedOutputStream' is unsupported in Windows.") - - UseMethod("CompressedOutputStream") -} - -#' @export -CompressedOutputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedOutputStream(FileOutputStream(stream), codec = codec) -} - -#' @export -`CompressedOutputStream.arrow::io::OutputStream` <- function(stream, codec = compression_codec("GZIP")) { - assert_that(inherits(codec, "arrow::util::Codec")) - shared_ptr(`arrow::io::CompressedOutputStream`, io___CompressedOutputStream__Make(codec, stream)) -} - -#' Compressed input stream +#' The `CompressedInputStream$create()` and `CompressedOutputStream$create()` +#' factory methods instantiate the object and take the following arguments: #' -#' @param stream Underlying raw input stream -#' @param codec a codec -#' @export -CompressedInputStream <- function(stream, codec = codec("GZIP")){ - UseMethod("CompressedInputStream") -} - +#' - `stream` An [InputStream] or [OutputStream], respectively +#' - `codec` A `Codec` +#' +#' @section Methods: +#' +#' Methods are inherited from [InputStream] and [OutputStream], respectively #' @export -CompressedInputStream.character <- function(stream, codec = compression_codec("GZIP")){ - CompressedInputStream(ReadableFile(stream), codec = codec) +#' @include arrow-package.R +CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream) +CompressedOutputStream$create <- function(stream, codec = compression_codec()){ + if (.Platform$OS.type == "windows") { + stop("'CompressedOutputStream' is unsupported in Windows.") + } + assert_is(codec, "Codec") + if (is.character(stream)) { + stream <- FileOutputStream$create(stream) + } + assert_is(stream, "OutputStream") + shared_ptr(CompressedOutputStream, io___CompressedOutputStream__Make(codec, stream)) } +#' @rdname compression +#' @usage NULL +#' @format NULL #' @export -`CompressedInputStream.arrow::io::InputStream` <- function(stream, codec = compression_codec("GZIP")) { - assert_that(inherits(codec, "arrow::util::Codec")) - shared_ptr(`arrow::io::CompressedInputStream`, io___CompressedInputStream__Make(codec, stream)) +CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream) +CompressedInputStream$create <- function(stream, codec = compression_codec()){ + # TODO (npr): why would CompressedInputStream work on Windows if CompressedOutputStream doesn't? (and is it still the case that it does not?) + assert_is(codec, "Codec") + if (is.character(stream)) { + stream <- ReadableFile$create(stream) + } + assert_is(stream, "InputStream") + shared_ptr(CompressedInputStream, io___CompressedInputStream__Make(codec, stream)) } diff --git a/r/R/compute.R b/r/R/compute.R index 6cf73139d86..f2eded54c52 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -17,7 +17,7 @@ #' @include array.R -`arrow::compute::CastOptions` <- R6Class("arrow::compute::CastOptions", inherit = `arrow::Object`) +CastOptions <- R6Class("CastOptions", inherit = Object) #' Cast options #' @@ -33,7 +33,7 @@ cast_options <- function( allow_time_truncate = !safe, allow_float_truncate = !safe ){ - shared_ptr(`arrow::compute::CastOptions`, + shared_ptr(CastOptions, compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) ) } diff --git a/r/R/csv.R b/r/R/csv.R index f2d9fab7403..7acb667c9e0 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -32,7 +32,7 @@ #' `parse_options`, `convert_options`, or `read_options` arguments, or you can #' call [csv_table_reader()] directly for lower-level access. #' -#' @param file A character path to a local file, or an Arrow input stream +#' @inheritParams make_readable_file #' @param delim Single character used to separate fields within a record. #' @param quote Single character used to quote strings. #' @param escape_double Does the file escape quotes by doubling them? @@ -64,9 +64,9 @@ #' @param convert_options see [csv_convert_options()] #' @param read_options see [csv_read_options()] #' @param as_tibble Should the function return a `data.frame` or an -#' [arrow::Table][arrow__Table]? +#' [arrow::Table][Table]? #' -#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. +#' @return A `data.frame`, or an Table if `as_tibble = FALSE`. #' @export #' @examples #' \donttest{ @@ -181,15 +181,57 @@ read_tsv_arrow <- function(file, #' @include arrow-package.R -`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`, +CsvTableReader <- R6Class("CsvTableReader", inherit = Object, public = list( - Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self)) + Read = function() shared_ptr(Table, csv___TableReader__Read(self)) ) ) -`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = `arrow::Object`) -`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = `arrow::Object`) -`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = `arrow::Object`) +CsvTableReader$create <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ...) { + file <- make_readable_file(file) + shared_ptr( + CsvTableReader, + csv___TableReader__Make(file, read_options, parse_options, convert_options) + ) +} + +#' Arrow CSV and JSON table readers +#' +#' These methods wrap the Arrow C++ CSV and JSON table readers. +#' For an interface to the CSV reader that's more familiar for R users, see +#' [read_csv_arrow()] +#' +#' @param file A character path to a local file, or an Arrow input stream +#' @param read_options see [csv_read_options()] +#' @param parse_options see [csv_parse_options()] +#' @param convert_options see [csv_convert_options()] +#' @param ... additional parameters. +#' +#' @return An CsvTableReader or JsonTableReader R6 +#' object. Call `$Read()` on it to get an Arrow Table. +#' @export +csv_table_reader <- CsvTableReader$create + +CsvReadOptions <- R6Class("CsvReadOptions", inherit = Object) +CsvReadOptions$create <- function(use_threads = option_use_threads(), + block_size = 1048576L, + skip_rows = 0L, + column_names = character(0), + autogenerate_column_names = FALSE) { + shared_ptr(CsvReadOptions, csv___ReadOptions__initialize( + list( + use_threads = use_threads, + block_size = block_size, + skip_rows = skip_rows, + column_names = column_names, + autogenerate_column_names = autogenerate_column_names + ) + )) +} #' Read options for the Arrow file readers #' @@ -206,21 +248,7 @@ read_tsv_arrow <- function(file, #' be "f0", "f1", ..., "fN". #' #' @export -csv_read_options <- function(use_threads = option_use_threads(), - block_size = 1048576L, - skip_rows = 0L, - column_names = character(0), - autogenerate_column_names = FALSE) { - shared_ptr(`arrow::csv::ReadOptions`, csv___ReadOptions__initialize( - list( - use_threads = use_threads, - block_size = block_size, - skip_rows = skip_rows, - column_names = column_names, - autogenerate_column_names = autogenerate_column_names - ) - )) -} +csv_read_options <- CsvReadOptions$create readr_to_csv_read_options <- function(skip, col_names) { if (isTRUE(col_names)) { @@ -234,6 +262,44 @@ readr_to_csv_read_options <- function(skip, col_names) { } } +CsvParseOptions <- R6Class("CsvParseOptions", inherit = Object) +CsvParseOptions$create <- function(delimiter = ",", + quoting = TRUE, + quote_char = '"', + double_quote = TRUE, + escaping = FALSE, + escape_char = '\\', + newlines_in_values = FALSE, + ignore_empty_lines = TRUE) { + + shared_ptr(CsvParseOptions, csv___ParseOptions__initialize( + list( + delimiter = delimiter, + quoting = quoting, + quote_char = quote_char, + double_quote = double_quote, + escaping = escaping, + escape_char = escape_char, + newlines_in_values = newlines_in_values, + ignore_empty_lines = ignore_empty_lines + ) + )) +} + +#' Parsing options for Arrow file readers +#' +#' @param delimiter Field delimiter +#' @param quoting Whether quoting is used +#' @param quote_char Quoting character (if `quoting` is `TRUE`) +#' @param double_quote Whether a quote inside a value is double-quoted +#' @param escaping Whether escaping is used +#' @param escape_char Escaping character (if `escaping` is `TRUE`) +#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d`) and LF (`0x0a`) characters +#' @param ignore_empty_lines Whether empty lines are ignored. If `FALSE`, an empty line represents +#' +#' @export +csv_parse_options <- CsvParseOptions$create + readr_to_csv_parse_options <- function(delim = ",", quote = '"', escape_double = TRUE, @@ -253,37 +319,22 @@ readr_to_csv_parse_options <- function(delim = ",", ) } -#' Parsing options for Arrow file readers -#' -#' @param delimiter Field delimiter -#' @param quoting Whether quoting is used -#' @param quote_char Quoting character (if `quoting` is `TRUE`) -#' @param double_quote Whether a quote inside a value is double-quoted -#' @param escaping Whether escaping is used -#' @param escape_char Escaping character (if `escaping` is `TRUE`) -#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d`) and LF (`0x0a`) characters -#' @param ignore_empty_lines Whether empty lines are ignored. If `FALSE`, an empty line represents -#' -#' @export -csv_parse_options <- function(delimiter = ",", - quoting = TRUE, - quote_char = '"', - double_quote = TRUE, - escaping = FALSE, - escape_char = '\\', - newlines_in_values = FALSE, - ignore_empty_lines = TRUE) { +CsvConvertOptions <- R6Class("CsvConvertOptions", inherit = Object) +CsvConvertOptions$create <- function(check_utf8 = TRUE, + null_values = c("", "NA"), + strings_can_be_null = FALSE) { + # TODO: there are more conversion options available: + # // Optional per-column types (disabling type inference on those columns) + # std::unordered_map> column_types; + # // Recognized spellings for boolean values + # std::vector true_values; + # std::vector false_values; - shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize( + shared_ptr(CsvConvertOptions, csv___ConvertOptions__initialize( list( - delimiter = delimiter, - quoting = quoting, - quote_char = quote_char, - double_quote = double_quote, - escaping = escaping, - escape_char = escape_char, - newlines_in_values = newlines_in_values, - ignore_empty_lines = ignore_empty_lines + check_utf8 = check_utf8, + null_values = null_values, + strings_can_be_null = strings_can_be_null ) )) } @@ -298,89 +349,8 @@ csv_parse_options <- function(delimiter = ",", #' null values. Similar to the `quoted_na` argument to `readr::read_csv()`. #' #' @export -csv_convert_options <- function(check_utf8 = TRUE, - null_values = c("", "NA"), - strings_can_be_null = FALSE) { - shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize( - list( - check_utf8 = check_utf8, - null_values = null_values, - strings_can_be_null = strings_can_be_null - ) - )) -} +csv_convert_options <- CsvConvertOptions$create readr_to_csv_convert_options <- function(na, quoted_na) { - csv_convert_options(null_values = na, strings_can_be_null = quoted_na) -} - -#' Arrow CSV and JSON table readers -#' -#' These methods wrap the Arrow C++ CSV and JSON table readers. -#' For an interface to the CSV reader that's more familiar for R users, see -#' [read_csv_arrow()] -#' -#' @param file A character path to a local file, or an Arrow input stream -#' @param read_options see [csv_read_options()] -#' @param parse_options see [csv_parse_options()] -#' @param convert_options see [csv_convert_options()] -#' @param ... additional parameters. -#' -#' @return An `arrow::csv::TableReader` or `arrow::json::TableReader` R6 -#' object. Call `$Read()` on it to get an Arrow Table. -#' @export -csv_table_reader <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - UseMethod("csv_table_reader") -} - -#' @export -csv_table_reader.default <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -) { - abort("unsupported") -} - -#' @export -`csv_table_reader.character` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - csv_table_reader(mmap_open(file), - read_options = read_options, - parse_options = parse_options, - convert_options = convert_options, - ... - ) -} - -#' @export -`csv_table_reader.arrow::io::InputStream` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - shared_ptr(`arrow::csv::TableReader`, - csv___TableReader__Make(file, read_options, parse_options, convert_options) - ) -} - -#' @export -`csv_table_reader.arrow::csv::TableReader` <- function(file, - read_options = csv_read_options(), - parse_options = csv_parse_options(), - convert_options = csv_convert_options(), - ... -){ - file + csv_convert_options(null_values = na, strings_can_be_null = quoted_na) } diff --git a/r/R/dictionary.R b/r/R/dictionary.R index 9262a514b5a..ab33c3e1982 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -17,7 +17,7 @@ #' @include type.R -#' @title class arrow::DictionaryType +#' @title class DictionaryType #' #' @usage NULL #' @format NULL @@ -27,14 +27,14 @@ #' #' TODO #' -#' @rdname arrow__DictionaryType -#' @name arrow__DictionaryType -`arrow::DictionaryType` <- R6Class("arrow::DictionaryType", - inherit = `arrow::FixedWidthType`, +#' @rdname DictionaryType +#' @name DictionaryType +DictionaryType <- R6Class("DictionaryType", + inherit = FixedWidthType, active = list( - index_type = function() `arrow::DataType`$dispatch(DictionaryType__index_type(self)), - value_type = function() `arrow::DataType`$dispatch(DictionaryType__value_type(self)), + index_type = function() DataType$create(DictionaryType__index_type(self)), + value_type = function() DataType$create(DictionaryType__value_type(self)), name = function() DictionaryType__name(self), ordered = function() DictionaryType__ordered(self) ) @@ -46,13 +46,13 @@ #' @param value_type value type, probably [utf8()] #' @param ordered Is this an ordered dictionary ? #' -#' @return An [arrow::DictionaryType][arrow__DictionaryType] +#' @return A [DictionaryType] #' @seealso [Other Arrow data types][data-type] #' @export dictionary <- function(index_type, value_type, ordered = FALSE) { assert_that( - inherits(index_type, "arrow::DataType"), - inherits(index_type, "arrow::DataType") + inherits(index_type, "DataType"), + inherits(index_type, "DataType") ) - shared_ptr(`arrow::DictionaryType`, DictionaryType__initialize(index_type, value_type, ordered)) + shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered)) } diff --git a/r/R/enums.R b/r/R/enums.R index 5c24ce8e6e3..5a5cdfa3d25 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -31,17 +31,17 @@ enum <- function(class, ..., .list = list(...)){ #' @name enums #' @export #' @keywords internal -TimeUnit <- enum("arrow::TimeUnit::type", +TimeUnit <- enum("TimeUnit::type", SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L ) #' @rdname enums #' @export -DateUnit <- enum("arrow::DateUnit", DAY = 0L, MILLI = 1L) +DateUnit <- enum("DateUnit", DAY = 0L, MILLI = 1L) #' @rdname enums #' @export -Type <- enum("arrow::Type::type", +Type <- enum("Type::type", "NA" = 0L, BOOL = 1L, UINT8 = 2L, INT8 = 3L, UINT16 = 4L, INT16 = 5L, UINT32 = 6L, INT32 = 7L, UINT64 = 8L, INT64 = 9L, HALF_FLOAT = 10L, FLOAT = 11L, DOUBLE = 12L, STRING = 13L, @@ -52,7 +52,7 @@ Type <- enum("arrow::Type::type", #' @rdname enums #' @export -StatusCode <- enum("arrow::StatusCode", +StatusCode <- enum("StatusCode", OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L, Invalid = 4L, IOError = 5L, CapacityError = 6L, IndexError = 7L, UnknownError = 9L, NotImplemented = 10L, SerializationError = 11L, @@ -63,18 +63,18 @@ StatusCode <- enum("arrow::StatusCode", #' @rdname enums #' @export -FileMode <- enum("arrow::io::FileMode", +FileMode <- enum("FileMode", READ = 0L, WRITE = 1L, READWRITE = 2L ) #' @rdname enums #' @export -MessageType <- enum("arrow::ipc::Message::Type", +MessageType <- enum("Message::Type", NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L ) #' @rdname enums #' @export -CompressionType <- enum("arrow::Compression::type", +CompressionType <- enum("Compression::type", UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L, LZO = 6L, BZ2 = 7L ) diff --git a/r/R/feather.R b/r/R/feather.R index 46c3f5ff2c3..d835ae266ff 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -15,50 +15,10 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R - -`arrow::ipc::feather::TableWriter` <- R6Class("arrow::ipc::feather::TableWriter", inherit = `arrow::Object`, - public = list( - SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), - SetNumRows = function(num_rows) ipc___feather___TableWriter__SetNumRows(self, num_rows), - Append = function(name, values) ipc___feather___TableWriter__Append(self, name, values), - Finalize = function() ipc___feather___TableWriter__Finalize(self) - ) -) - -`arrow::ipc::feather::TableReader` <- R6Class("arrow::ipc::feather::TableReader", inherit = `arrow::Object`, - public = list( - GetDescription = function() ipc___feather___TableReader__GetDescription(self), - HasDescription = function() ipc__feather___TableReader__HasDescription(self), - version = function() ipc___feather___TableReader__version(self), - num_rows = function() ipc___feather___TableReader__num_rows(self), - num_columns = function() ipc___feather___TableReader__num_columns(self), - GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), - GetColumn = function(i) shared_ptr(`arrow::Array`, ipc___feather___TableReader__GetColumn(self, i)), - Read = function(columns) { - shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) - } - ) -) - -#' Create `TableWriter` that writes into a stream -#' -#' @param stream an `OutputStream` -#' -#' @export -FeatherTableWriter <- function(stream) { - UseMethod("FeatherTableWriter") -} - -#' @export -`FeatherTableWriter.arrow::io::OutputStream` <- function(stream){ - unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream)) -} - #' Write data in the Feather format #' -#' @param data `data.frame` or `arrow::RecordBatch` -#' @param stream A file path or an `arrow::io::OutputStream` +#' @param data `data.frame` or RecordBatch +#' @param stream A file path or an OutputStream #' #' @export #' @examples @@ -69,100 +29,75 @@ FeatherTableWriter <- function(stream) { #' write_feather(mtcars, tf) #' }) #' } +#' @include arrow-package.R write_feather <- function(data, stream) { - UseMethod("write_feather", data) -} - -#' @export -write_feather.default <- function(data, stream) { - stop("unsupported") -} + if (is.data.frame(data)) { + data <- record_batch(data) + } + assert_is(data, "RecordBatch") -#' @export -write_feather.data.frame <- function(data, stream) { - write_feather(record_batch(data), stream) -} + if (is.character(stream)) { + stream <- FileOutputStream$create(stream) + on.exit(stream$close()) + } + assert_is(stream, "OutputStream") -#' @method write_feather arrow::RecordBatch -#' @export -`write_feather.arrow::RecordBatch` <- function(data, stream) { - write_feather_RecordBatch(data, stream) + writer <- FeatherTableWriter$create(stream) + ipc___TableWriter__RecordBatch__WriteFeather(writer, data) } -#' Write a record batch in the feather format +#' @title FeatherTableWriter class +#' @rdname FeatherTableWriter +#' @name FeatherTableWriter +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class enables you to write Feather files. See its usage in +#' [write_feather()]. #' -#' @param data `data.frame` or `arrow::RecordBatch` -#' @param stream A file path or an `arrow::io::OutputStream` +#' @section Factory: #' -#' @export -#' @keywords internal -write_feather_RecordBatch <- function(data, stream) { - UseMethod("write_feather_RecordBatch", stream) -} - -#' @export -#' @method write_feather_RecordBatch default -`write_feather_RecordBatch.default` <- function(data, stream) { - stop("unsupported") -} - -#' @export -#' @method write_feather_RecordBatch character -write_feather_RecordBatch.character <- function(data, stream) { - file_stream <- FileOutputStream(stream) - on.exit(file_stream$close()) - `write_feather_RecordBatch.arrow::io::OutputStream`(data, file_stream) -} - -#' @export -#' @method write_feather_RecordBatch arrow::io::OutputStream -`write_feather_RecordBatch.arrow::io::OutputStream` <- function(data, stream) { - ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data) -} - -#' A `arrow::ipc::feather::TableReader` to read from a file +#' The `FeatherTableWriter$create()` factory method instantiates the object and +#' takes the following argument: #' -#' @param file A file path or `arrow::io::RandomAccessFile` -#' @param mmap Is the file memory mapped (applicable to the `character` method) -#' @param ... extra parameters +#' - `stream` An `OutputStream` +#' +#' @section Methods: +#' +#' - `$GetDescription()` +#' - `$HasDescription()` +#' - `$version()` +#' - `$num_rows()` +#' - `$num_columns()` +#' - `$GetColumnName()` +#' - `$GetColumn()` +#' - `$Read(columns)` #' #' @export -FeatherTableReader <- function(file, mmap = TRUE, ...){ - UseMethod("FeatherTableReader") -} - -#' @export -FeatherTableReader.character <- function(file, mmap = TRUE, ...) { - if (isTRUE(mmap)) { - stream <- mmap_open(file, ...) - } else { - stream <- ReadableFile(file, ...) - } - FeatherTableReader(stream) -} - -#' @export -FeatherTableReader.raw <- function(file, mmap = TRUE, ...) { - FeatherTableReader(BufferReader(file), mmap = mmap, ...) -} - -#' @export -`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){ - unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file)) -} +#' @include arrow-package.R +FeatherTableWriter <- R6Class("FeatherTableWriter", inherit = Object, + public = list( + SetDescription = function(description) ipc___feather___TableWriter__SetDescription(self, description), + SetNumRows = function(num_rows) ipc___feather___TableWriter__SetNumRows(self, num_rows), + Append = function(name, values) ipc___feather___TableWriter__Append(self, name, values), + Finalize = function() ipc___feather___TableWriter__Finalize(self) + ) +) -#' @export -`FeatherTableReader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){ - file +FeatherTableWriter$create <- function(stream) { + assert_is(stream, "OutputStream") + unique_ptr(FeatherTableWriter, ipc___feather___TableWriter__Open(stream)) } #' Read a Feather file #' -#' @param file an `arrow::ipc::feather::TableReader` or whatever the [FeatherTableReader()] function can handle +#' @param file A character file path, a raw vector, or `InputStream`, passed to +#' `FeatherTableReader$create()`. #' @inheritParams read_delim_arrow #' @param ... additional parameters #' -#' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise +#' @return A `data.frame` if `as_tibble` is `TRUE` (the default), or an +#' [arrow::Table][Table] otherwise #' #' @export #' @examples @@ -177,8 +112,8 @@ FeatherTableReader.raw <- function(file, mmap = TRUE, ...) { #' df <- read_feather(tf, col_select = starts_with("Sepal")) #' }) #' } -read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){ - reader <- FeatherTableReader(file, ...) +read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...) { + reader <- FeatherTableReader$create(file, ...) all_columns <- ipc___feather___TableReader__column_names(reader) col_select <- enquo(col_select) @@ -192,3 +127,56 @@ read_feather <- function(file, col_select = NULL, as_tibble = TRUE, ...){ } out } + +#' @title FeatherTableReader class +#' @rdname FeatherTableReader +#' @name FeatherTableReader +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class enables you to interact with Feather files. Create +#' one to connect to a file or other InputStream, and call `Read()` on it to +#' make an `arrow::Table`. See its usage in [`read_feather()`]. +#' +#' @section Factory: +#' +#' The `FeatherTableReader$create()` factory method instantiates the object and +#' takes the following arguments: +#' +#' - `file` A character file name, raw vector, or Arrow file connection object +#' (e.g. `RandomAccessFile`). +#' - `mmap` Logical: whether to memory-map the file (default `TRUE`) +#' - `...` Additional arguments, currently ignored +#' +#' @section Methods: +#' +#' - `$GetDescription()` +#' - `$HasDescription()` +#' - `$version()` +#' - `$num_rows()` +#' - `$num_columns()` +#' - `$GetColumnName()` +#' - `$GetColumn()` +#' - `$Read(columns)` +#' +#' @export +#' @include arrow-package.R +FeatherTableReader <- R6Class("FeatherTableReader", inherit = Object, + public = list( + GetDescription = function() ipc___feather___TableReader__GetDescription(self), + HasDescription = function() ipc__feather___TableReader__HasDescription(self), + version = function() ipc___feather___TableReader__version(self), + num_rows = function() ipc___feather___TableReader__num_rows(self), + num_columns = function() ipc___feather___TableReader__num_columns(self), + GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), + GetColumn = function(i) shared_ptr(Array, ipc___feather___TableReader__GetColumn(self, i)), + Read = function(columns) { + shared_ptr(Table, ipc___feather___TableReader__Read(self, columns)) + } + ) +) + +FeatherTableReader$create <- function(file, mmap = TRUE, ...) { + file <- make_readable_file(file, mmap) + unique_ptr(FeatherTableReader, ipc___feather___TableReader__Open(file)) +} diff --git a/r/R/Field.R b/r/R/field.R similarity index 70% rename from r/R/Field.R rename to r/R/field.R index cc2f6cd185d..89e3b2322f2 100644 --- a/r/R/Field.R +++ b/r/R/field.R @@ -16,26 +16,26 @@ # under the License. #' @include arrow-package.R - #' @title class arrow::Field -#' -#' @usage NULL -#' @format NULL #' @docType class -#' +#' @description `field()` lets you create an `arrow::Field` that maps a +#' [DataType][data-type] to a column name. Fields are contained in +#' [Schemas][Schema]. #' @section Methods: #' -#' TODO +#' - `f$ToString()`: convert to a string +#' - `f$Equals(other)`: test for equality. More naturally called as `f == other` #' -#' @rdname arrow__Field -#' @name arrow__Field -`arrow::Field` <- R6Class("arrow::Field", inherit = `arrow::Object`, +#' @rdname Field +#' @name Field +#' @export +Field <- R6Class("Field", inherit = Object, public = list( ToString = function() { Field__ToString(self) }, Equals = function(other) { - inherits(other, "arrow::Field") && Field__Equals(self, other) + inherits(other, "Field") && Field__Equals(self, other) } ), @@ -47,42 +47,40 @@ Field__nullable(self) }, type = function() { - `arrow::DataType`$dispatch(Field__type(self)) + DataType$create(Field__type(self)) } ) ) +Field$create <- function(name, type, metadata) { + assert_that(inherits(name, "character"), length(name) == 1L) + if (!inherits(type, "DataType")) { + if (identical(type, double())) { + # Magic so that we don't have to mask this base function + type <- float64() + } else { + stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE) + } + } + assert_that(missing(metadata), msg = "metadata= is currently ignored") + shared_ptr(Field, Field__initialize(name, type, TRUE)) +} #' @export -`==.arrow::Field` <- function(lhs, rhs){ +`==.Field` <- function(lhs, rhs){ lhs$Equals(rhs) } -#' Factory for a `arrow::Field` -#' #' @param name field name -#' @param type logical type, instance of `arrow::DataType` +#' @param type logical type, instance of [DataType] #' @param metadata currently ignored #' #' @examples #' \donttest{ -#' try({ -#' field("x", int32()) -#' }) +#' field("x", int32()) #' } +#' @rdname Field #' @export -field <- function(name, type, metadata) { - assert_that(inherits(name, "character"), length(name) == 1L) - if (!inherits(type, "arrow::DataType")) { - if (identical(type, double())) { - # Magic so that we don't have to mask this base function - type <- float64() - } else { - stop(name, " must be arrow::DataType, not ", class(type), call. = FALSE) - } - } - assert_that(missing(metadata), msg = "metadata= is currently ignored") - shared_ptr(`arrow::Field`, Field__initialize(name, type, TRUE)) -} +field <- Field$create .fields <- function(.list){ assert_that(!is.null(nms <- names(.list))) diff --git a/r/R/io.R b/r/R/io.R index 59d573f3b14..f5390e32b25 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -21,166 +21,167 @@ # OutputStream ------------------------------------------------------------ -`arrow::io::Writable` <- R6Class("arrow::io::Writable", inherit = `arrow::Object`, +Writable <- R6Class("Writable", inherit = Object, public = list( write = function(x) io___Writable__write(self, buffer(x)) ) ) -#' @title OutputStream -#' +#' @title OutputStream classes +#' @description `FileOutputStream` is for writing to a file; +#' `BufferOutputStream` and `FixedSizeBufferWriter` write to buffers; +#' `MockOutputStream` just reports back how many bytes it received, for testing +#' purposes. You can create one and pass it to any of the table writers, for +#' example. #' @usage NULL #' @format NULL #' @docType class +#' @section Factory: #' -#' @section Methods: +#' The `$create()` factory methods instantiate the `OutputStream` object and +#' take the following arguments, depending on the subclass: +#' +#' - `path` For `FileOutputStream`, a character file name +#' - `initial_capacity` For `BufferOutputStream`, the size in bytes of the +#' buffer. +#' - `x` For `FixedSizeBufferWriter`, a [Buffer] or an object that can be +#' made into a buffer via `buffer()`. #' -#' - `arrow::Buffer` `Read`(`int` nbytes): Read `nbytes` bytes -#' - `void` `close`(): close the stream +#' `MockOutputStream$create()` does not take any arguments. +#' +#' @section Methods: #' -#' @rdname arrow__io__OutputStream -#' @name arrow__io__OutputStream -`arrow::io::OutputStream` <- R6Class("arrow::io::OutputStream", inherit = `arrow::io::Writable`, +#' - `$tell()`: return the position in the stream +#' - `$close()`: close the stream +#' - `$write(x)`: send `x` to the stream +#' - `$capacity()`: for `BufferOutputStream` +#' - `$getvalue()`: for `BufferOutputStream` +#' - `$GetExtentBytesWritten()`: for `MockOutputStream`, report how many bytes +#' were sent. +#' +#' @rdname OutputStream +#' @name OutputStream +OutputStream <- R6Class("OutputStream", inherit = Writable, public = list( close = function() io___OutputStream__Close(self), tell = function() io___OutputStream__Tell(self) ) ) -#' @title class arrow::io::FileOutputStream -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__FileOutputStream -#' @name arrow__io__FileOutputStream -`arrow::io::FileOutputStream` <- R6Class("arrow::io::FileOutputStream", inherit = `arrow::io::OutputStream`) +#' @rdname OutputStream +#' @export +FileOutputStream <- R6Class("FileOutputStream", inherit = OutputStream) +FileOutputStream$create <- function(path) { + path <- normalizePath(path, mustWork = FALSE) + shared_ptr(FileOutputStream, io___FileOutputStream__Open(path)) +} -#' @title class arrow::io::MockOutputStream -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__MockOutputStream -#' @name arrow__io__MockOutputStream -`arrow::io::MockOutputStream` <- R6Class("arrow::io::MockOutputStream", inherit = `arrow::io::OutputStream`, +#' @rdname OutputStream +#' @export +MockOutputStream <- R6Class("MockOutputStream", inherit = OutputStream, public = list( GetExtentBytesWritten = function() io___MockOutputStream__GetExtentBytesWritten(self) ) ) +MockOutputStream$create <- function() { + shared_ptr(MockOutputStream, io___MockOutputStream__initialize()) +} -#' @title class arrow::io::BufferOutputStream -#' #' @usage NULL -#' @docType class -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__BufferOutputStream -#' @name arrow__io__BufferOutputStream -`arrow::io::BufferOutputStream` <- R6Class("arrow::io::BufferOutputStream", inherit = `arrow::io::OutputStream`, +#' @format NULL +#' @rdname OutputStream +#' @export +BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream, public = list( capacity = function() io___BufferOutputStream__capacity(self), - getvalue = function() shared_ptr(`arrow::Buffer`, io___BufferOutputStream__Finish(self)), - - Write = function(bytes) io___BufferOutputStream__Write(self, bytes), - Tell = function() io___BufferOutputStream__Tell(self) + getvalue = function() shared_ptr(Buffer, io___BufferOutputStream__Finish(self)), + write = function(bytes) io___BufferOutputStream__Write(self, bytes), + tell = function() io___BufferOutputStream__Tell(self) ) ) +BufferOutputStream$create <- function(initial_capacity = 0L) { + shared_ptr(BufferOutputStream, io___BufferOutputStream__Create(initial_capacity)) +} -#' @title class arrow::io::FixedSizeBufferWriter -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__FixedSizeBufferWriter -#' @name arrow__io__FixedSizeBufferWriter -`arrow::io::FixedSizeBufferWriter` <- R6Class("arrow::io::FixedSizeBufferWriter", inherit = `arrow::io::OutputStream`) - +#' @rdname OutputStream +#' @export +FixedSizeBufferWriter <- R6Class("FixedSizeBufferWriter", inherit = OutputStream) +FixedSizeBufferWriter$create <- function(x) { + x <- buffer(x) + assert_that(x$is_mutable) + shared_ptr(FixedSizeBufferWriter, io___FixedSizeBufferWriter__initialize(x)) +} # InputStream ------------------------------------------------------------- -#' @title class arrow::io::Readable -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__Readable -#' @name arrow__io__Readable -`arrow::io::Readable` <- R6Class("arrow::io::Readable", inherit = `arrow::Object`, + +Readable <- R6Class("Readable", inherit = Object, public = list( - Read = function(nbytes) shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) + Read = function(nbytes) shared_ptr(Buffer, io___Readable__Read(self, nbytes)) ) ) -#' @title class arrow::io::InputStream -#' +#' @title InputStream classes +#' @description `RandomAccessFile` inherits from `InputStream` and is a base +#' class for: `ReadableFile` for reading from a file; `MemoryMappedFile` for +#' the same but with memory mapping; and `BufferReader` for reading from a +#' buffer. Use these with the various table readers. #' @usage NULL #' @format NULL #' @docType class +#' @section Factory: #' +#' The `$create()` factory methods instantiate the `InputStream` object and +#' take the following arguments, depending on the subclass: #' -#' @section Methods: +#' - `path` For `ReadableFile`, a character file name +#' - `x` For `BufferReader`, a [Buffer] or an object that can be +#' made into a buffer via `buffer()`. +#' +#' To instantiate a `MemoryMappedFile`, call [mmap_open()]. #' -#' TODO +#' @section Methods: #' -#' @rdname arrow__io__InputStream -#' @name arrow__io__InputStream -`arrow::io::InputStream` <- R6Class("arrow::io::InputStream", inherit = `arrow::io::Readable`, +#' - `$GetSize()`: +#' - `$supports_zero_copy()`: Logical +#' - `$seek(position)`: go to that position in the stream +#' - `$tell()`: return the position in the stream +#' - `$close()`: close the stream +#' - `$Read(nbytes)`: read data from the stream, either a specified `nbytes` or +#' all, if `nbytes` is not provided +#' - `$ReadAt(position, nbytes)`: similar to `$seek(position)$Read(nbytes)` +#' - `$Resize(size)`: for a `MemoryMappedFile` that is writeable +#' +#' @rdname InputStream +#' @name InputStream +InputStream <- R6Class("InputStream", inherit = Readable, public = list( close = function() io___InputStream__Close(self) ) ) -#' @title class arrow::io::RandomAccessFile -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__RandomAccessFile -#' @name arrow__io__RandomAccessFile -`arrow::io::RandomAccessFile` <- R6Class("arrow::io::RandomAccessFile", inherit = `arrow::io::InputStream`, +#' @rdname InputStream +#' @export +RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, public = list( GetSize = function() io___RandomAccessFile__GetSize(self), supports_zero_copy = function() io___RandomAccessFile__supports_zero_copy(self), - Seek = function(position) io___RandomAccessFile__Seek(self, position), - Tell = function() io___RandomAccessFile__Tell(self), + seek = function(position) io___RandomAccessFile__Seek(self, position), + tell = function() io___RandomAccessFile__Tell(self), Read = function(nbytes = NULL) { if (is.null(nbytes)) { - shared_ptr(`arrow::Buffer`, io___RandomAccessFile__Read0(self)) + shared_ptr(Buffer, io___RandomAccessFile__Read0(self)) } else { - shared_ptr(`arrow::Buffer`, io___Readable__Read(self, nbytes)) + shared_ptr(Buffer, io___Readable__Read(self, nbytes)) } }, @@ -188,72 +189,51 @@ if (is.null(nbytes)) { nbytes <- self$GetSize() - position } - shared_ptr(`arrow::Buffer`, io___RandomAccessFile__ReadAt(self, position, nbytes)) + shared_ptr(Buffer, io___RandomAccessFile__ReadAt(self, position, nbytes)) } ) ) -#' @title class arrow::io::MemoryMappedFile -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @seealso [mmap_open()], [mmap_create()] -#' -#' -#' @rdname arrow__io__MemoryMappedFile -#' @name arrow__io__MemoryMappedFile -`arrow::io::MemoryMappedFile` <- R6Class("arrow::io::MemoryMappedFile", inherit = `arrow::io::RandomAccessFile`, +#' @rdname InputStream +#' @export +MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile, public = list( Resize = function(size) io___MemoryMappedFile__Resize(self, size) ) ) -#' @title class arrow::io::ReadableFile -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__ReadableFile -#' @name arrow__io__ReadableFile -`arrow::io::ReadableFile` <- R6Class("arrow::io::ReadableFile", inherit = `arrow::io::RandomAccessFile`) +#' @rdname InputStream +#' @export +ReadableFile <- R6Class("ReadableFile", inherit = RandomAccessFile) +ReadableFile$create <- function(path) { + shared_ptr(ReadableFile, io___ReadableFile__Open(normalizePath(path))) +} -#' @title class arrow::io::BufferReader -#' #' @usage NULL #' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__io__BufferReader -#' @name arrow__io__BufferReader -`arrow::io::BufferReader` <- R6Class("arrow::io::BufferReader", inherit = `arrow::io::RandomAccessFile`) +#' @rdname InputStream +#' @export +BufferReader <- R6Class("BufferReader", inherit = RandomAccessFile) +BufferReader$create <- function(x) { + x <- buffer(x) + shared_ptr(BufferReader, io___BufferReader__initialize(x)) +} #' Create a new read/write memory mapped file of a given size #' #' @param path file path #' @param size size in bytes #' -#' @return a [arrow::io::MemoryMappedFile][arrow__io__MemoryMappedFile] +#' @return a [arrow::io::MemoryMappedFile][MemoryMappedFile] #' #' @export mmap_create <- function(path, size) { - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Create(normalizePath(path, mustWork = FALSE), size)) + path <- normalizePath(path, mustWork = FALSE) + shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Create(path, size)) } #' Open a memory mapped file @@ -264,88 +244,26 @@ mmap_create <- function(path, size) { #' @export mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L - shared_ptr(`arrow::io::MemoryMappedFile`, io___MemoryMappedFile__Open(normalizePath(path), mode)) -} - -#' open a [arrow::io::ReadableFile][arrow__io__ReadableFile] -#' -#' @param path file path -#' -#' @return a [arrow::io::ReadableFile][arrow__io__ReadableFile] -#' -#' @export -ReadableFile <- function(path) { - shared_ptr(`arrow::io::ReadableFile`, io___ReadableFile__Open(normalizePath(path))) -} - -#' Open a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] -#' -#' @param path file path -#' -#' @return a [arrow::io::FileOutputStream][arrow__io__FileOutputStream] -#' -#' @export -FileOutputStream <- function(path) { - shared_ptr(`arrow::io::FileOutputStream`, io___FileOutputStream__Open(normalizePath(path, mustWork = FALSE))) -} - -#' Open a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] -#' -#' @return a [arrow::io::MockOutputStream][arrow__io__MockOutputStream] -#' -#' @export -MockOutputStream <- function() { - shared_ptr(`arrow::io::MockOutputStream`, io___MockOutputStream__initialize()) -} - -#' Open a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] -#' -#' @param initial_capacity initial capacity -#' -#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] -#' -#' @export -BufferOutputStream <- function(initial_capacity = 0L) { - shared_ptr(`arrow::io::BufferOutputStream`, io___BufferOutputStream__Create(initial_capacity)) -} - -#' Open a [arrow::io::FixedSizeBufferWriter][arrow__io__FixedSizeBufferWriter] -#' -#' @param buffer [arrow::Buffer][arrow__Buffer] or something [buffer()] can handle -#' -#' @return a [arrow::io::BufferOutputStream][arrow__io__BufferOutputStream] -#' -#' @export -FixedSizeBufferWriter <- function(buffer){ - UseMethod("FixedSizeBufferWriter") + path <- normalizePath(path) + shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Open(path, mode)) } -#' @export -FixedSizeBufferWriter.default <- function(buffer){ - FixedSizeBufferWriter(buffer(buffer)) -} - -#' @export -`FixedSizeBufferWriter.arrow::Buffer` <- function(buffer){ - assert_that(buffer$is_mutable) - shared_ptr(`arrow::io::FixedSizeBufferWriter`, io___FixedSizeBufferWriter__initialize(buffer)) -} - -#' Create a [arrow::io::BufferReader][arrow__io__BufferReader] -#' -#' @param x R object to treat as a buffer or a buffer created by [buffer()] -#' -#' @export -BufferReader <- function(x) { - UseMethod("BufferReader") -} - -#' @export -BufferReader.default <- function(x) { - BufferReader(buffer(x)) -} - -#' @export -`BufferReader.arrow::Buffer` <- function(x) { - shared_ptr(`arrow::io::BufferReader`, io___BufferReader__initialize(x)) +#' Handle a range of possible input sources +#' @param file A character file name, raw vector, or an Arrow input stream +#' @param mmap Logical: whether to memory-map the file (default `TRUE`) +#' @return An `InputStream` or a subclass of one. +#' @keywords internal +make_readable_file <- function(file, mmap = TRUE) { + if (is.character(file)) { + assert_that(length(file) == 1L) + if (isTRUE(mmap)) { + file <- mmap_open(file) + } else { + file <- ReadableFile$create(file) + } + } else if (inherits(file, c("raw", "Buffer"))) { + file <- BufferReader$create(file) + } + assert_is(file, "InputStream") + file } diff --git a/r/R/json.R b/r/R/json.R index cd43231e422..bf038a18fa9 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -15,11 +15,40 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R +#' Read a JSON file +#' +#' Using [JsonTableReader] +#' +#' @inheritParams read_delim_arrow +#' @param ... Additional options, passed to `json_table_reader()` +#' +#' @return A `data.frame`, or an Table if `as_tibble = FALSE`. +#' @export +#' @examples +#' \donttest{ +#' try({ +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' writeLines(' +#' { "hello": 3.5, "world": false, "yo": "thing" } +#' { "hello": 3.25, "world": null } +#' { "hello": 0.0, "world": true, "yo": null } +#' ', tf, useBytes=TRUE) +#' df <- read_json_arrow(tf) +#' }) +#' } +read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { + tab <- json_table_reader(file, ...)$Read()$select(!!enquo(col_select)) + + if (isTRUE(as_tibble)) { + tab <- as.data.frame(tab) + } + tab +} #' @include arrow-package.R #' -#' @title class arrow::json::TableReader +#' @title class JsonTableReader #' #' @usage NULL #' @format NULL @@ -27,23 +56,34 @@ #' #' @section Methods: #' -#' - `Read()` : read the JSON file as an [arrow::Table][arrow__Table] +#' - `Read()` : read the JSON file as an [arrow::Table][Table] #' -#' @rdname arrow__json__TableReader -#' @name arrow__json__TableReader -`arrow::json::TableReader` <- R6Class("arrow::json::TableReader", inherit = `arrow::Object`, +#' @rdname JsonTableReader +#' @name JsonTableReader +JsonTableReader <- R6Class("JsonTableReader", inherit = Object, public = list( - Read = function() shared_ptr(`arrow::Table`, json___TableReader__Read(self)) + Read = function() shared_ptr(Table, json___TableReader__Read(self)) ) ) +JsonTableReader$create <- function(file, + read_options = json_read_options(), + parse_options = json_parse_options(), + ...) { -`arrow::json::ReadOptions` <- R6Class("arrow::json::ReadOptions", inherit = `arrow::Object`) -`arrow::json::ParseOptions` <- R6Class("arrow::json::ParseOptions", inherit = `arrow::Object`) + file <- make_readable_file(file) + shared_ptr( + JsonTableReader, + json___TableReader__Make(file, read_options, parse_options) + ) +} -#' @rdname csv_read_options +#' @rdname csv_table_reader #' @export -json_read_options <- function(use_threads = TRUE, block_size = 1048576L) { - shared_ptr(`arrow::json::ReadOptions`, json___ReadOptions__initialize( +json_table_reader <- JsonTableReader$create + +JsonReadOptions <- R6Class("JsonReadOptions", inherit = Object) +JsonReadOptions$create <- function(use_threads = TRUE, block_size = 1048576L) { + shared_ptr(JsonReadOptions, json___ReadOptions__initialize( list( use_threads = use_threads, block_size = block_size @@ -51,96 +91,20 @@ json_read_options <- function(use_threads = TRUE, block_size = 1048576L) { )) } -#' @rdname csv_parse_options +#' @rdname csv_read_options #' @export -json_parse_options <- function(newlines_in_values = FALSE) { - shared_ptr(`arrow::json::ParseOptions`, json___ParseOptions__initialize( +json_read_options <- JsonReadOptions$create + +JsonParseOptions <- R6Class("JsonParseOptions", inherit = Object) +JsonParseOptions$create <- function(newlines_in_values = FALSE) { + shared_ptr(JsonParseOptions, json___ParseOptions__initialize( list( newlines_in_values = newlines_in_values ) )) } -#' @rdname csv_table_reader -#' @export -json_table_reader <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - UseMethod("json_table_reader") -} - -#' @importFrom rlang abort -#' @export -json_table_reader.default <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -) { - abort("unsupported") -} - -#' @export -`json_table_reader.character` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - json_table_reader(ReadableFile(file), - read_options = read_options, - parse_options = parse_options, - ... - ) -} - -#' @export -`json_table_reader.arrow::io::InputStream` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - shared_ptr(`arrow::json::TableReader`, - json___TableReader__Make(file, read_options, parse_options) - ) -} +#' @rdname csv_parse_options #' @export -`json_table_reader.arrow::json::TableReader` <- function(file, - read_options = json_read_options(), - parse_options = json_parse_options(), - ... -){ - file -} - -#' Read a JSON file -#' -#' Use [arrow::json::TableReader][arrow__json__TableReader] from [json_table_reader()] -#' -#' @inheritParams read_delim_arrow -#' @param ... Additional options, passed to `json_table_reader()` -#' -#' @return A `data.frame`, or an `arrow::Table` if `as_tibble = FALSE`. -#' @export -#' @examples -#' \donttest{ -#' try({ -#' tf <- tempfile() -#' on.exit(unlink(tf)) -#' writeLines(' -#' { "hello": 3.5, "world": false, "yo": "thing" } -#' { "hello": 3.25, "world": null } -#' { "hello": 0.0, "world": true, "yo": null } -#' ', tf, useBytes=TRUE) -#' df <- read_json_arrow(tf) -#' }) -#' } -read_json_arrow <- function(file, col_select = NULL, as_tibble = TRUE, ...) { - tab <- json_table_reader(file, ...)$Read()$select(!!enquo(col_select)) - - if (isTRUE(as_tibble)) { - tab <- as.data.frame(tab) - } - tab -} +json_parse_options <- JsonParseOptions$create diff --git a/r/R/List.R b/r/R/list.R similarity index 72% rename from r/R/List.R rename to r/R/list.R index a970fb895a9..31c72bb3034 100644 --- a/r/R/List.R +++ b/r/R/list.R @@ -17,14 +17,14 @@ #' @include type.R -`arrow::ListType` <- R6Class("arrow::ListType", - inherit = `arrow::NestedType`, +ListType <- R6Class("ListType", + inherit = NestedType, active = list( - value_field = function() shared_ptr(`arrow::Field`, ListType__value_field(self)), - value_type = function() `arrow::DataType`$dispatch(ListType__value_type(self)) + value_field = function() shared_ptr(Field, ListType__value_field(self)), + value_type = function() DataType$create(ListType__value_type(self)) ) ) #' @rdname data-type #' @export -list_of <- function(type) shared_ptr(`arrow::ListType`, list__(type)) +list_of <- function(type) shared_ptr(ListType, list__(type)) diff --git a/r/R/memory_pool.R b/r/R/memory-pool.R similarity index 79% rename from r/R/memory_pool.R rename to r/R/memory-pool.R index 771e05bebf5..67b5428e089 100644 --- a/r/R/memory_pool.R +++ b/r/R/memory-pool.R @@ -27,10 +27,10 @@ #' #' TODO #' -#' @rdname arrow___MemoryPool -#' @name arrow__MemoryPool -`arrow::MemoryPool` <- R6Class("arrow::MemoryPool", - inherit = `arrow::Object`, +#' @rdname MemoryPool +#' @name MemoryPool +MemoryPool <- R6Class("MemoryPool", + inherit = Object, public = list( # TODO: Allocate # TODO: Reallocate @@ -40,10 +40,10 @@ ) ) -#' default [arrow::MemoryPool][arrow__MemoryPool] +#' default [arrow::MemoryPool][MemoryPool] #' -#' @return the default [arrow::MemoryPool][arrow__MemoryPool] +#' @return the default [arrow::MemoryPool][MemoryPool] #' @export default_memory_pool <- function() { - shared_ptr(`arrow::MemoryPool`, MemoryPool__default()) + shared_ptr(MemoryPool, MemoryPool__default()) } diff --git a/r/R/message.R b/r/R/message.R index e0add59ac53..701d157fd43 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -17,7 +17,7 @@ #' @include arrow-package.R -#' @title class arrow::ipc::Message +#' @title class arrow::Message #' #' @usage NULL #' @format NULL @@ -27,12 +27,12 @@ #' #' TODO #' -#' @rdname arrow__ipc__Message -#' @name arrow__ipc__Message -`arrow::ipc::Message` <- R6Class("arrow::ipc::Message", inherit = `arrow::Object`, +#' @rdname Message +#' @name Message +Message <- R6Class("Message", inherit = Object, public = list( Equals = function(other){ - assert_that(inherits(other, "arrow::ipc::Message")) + assert_is(other, "Message") ipc___Message__Equals(self, other) }, body_length = function() ipc___Message__body_length(self), @@ -40,15 +40,15 @@ ), active = list( type = function() ipc___Message__type(self), - metadata = function() shared_ptr(`arrow::Buffer`, ipc___Message__metadata(self)), - body = function() shared_ptr(`arrow::Buffer`, ipc___Message__body(self)) + metadata = function() shared_ptr(Buffer, ipc___Message__metadata(self)), + body = function() shared_ptr(Buffer, ipc___Message__body(self)) ) ) #' @export -`==.arrow::ipc::Message` <- function(x, y) x$Equals(y) +`==.Message` <- function(x, y) x$Equals(y) -#' @title class arrow::ipc::MessageReader +#' @title class arrow::MessageReader #' #' @usage NULL #' @format NULL @@ -58,31 +58,20 @@ #' #' TODO #' -#' @rdname arrow__ipc__MessageReader -#' @name arrow__ipc__MessageReader -`arrow::ipc::MessageReader` <- R6Class("arrow::ipc::MessageReader", inherit = `arrow::Object`, +#' @rdname MessageReader +#' @name MessageReader +#' @export +MessageReader <- R6Class("MessageReader", inherit = Object, public = list( - ReadNextMessage = function() unique_ptr(`arrow::ipc::Message`, ipc___MessageReader__ReadNextMessage(self)) + ReadNextMessage = function() unique_ptr(Message, ipc___MessageReader__ReadNextMessage(self)) ) ) -#' Open a MessageReader that reads from a stream -#' -#' @param stream an InputStream -#' -#' @export -MessageReader <- function(stream) { - UseMethod("MessageReader") -} - -#' @export -MessageReader.default <- function(stream) { - MessageReader(BufferReader(stream)) -} - -#' @export -`MessageReader.arrow::io::InputStream` <- function(stream) { - unique_ptr(`arrow::ipc::MessageReader`, ipc___MessageReader__Open(stream)) +MessageReader$create <- function(stream) { + if (!inherits(stream, "InputStream")) { + stream <- BufferReader$create(stream) + } + unique_ptr(MessageReader, ipc___MessageReader__Open(stream)) } #' Read a Message from a stream @@ -95,16 +84,16 @@ read_message <- function(stream) { } #' @export -read_message.default<- function(stream) { - read_message(BufferReader(stream)) +read_message.default <- function(stream) { + read_message(BufferReader$create(stream)) } #' @export -`read_message.arrow::io::InputStream` <- function(stream) { - unique_ptr(`arrow::ipc::Message`, ipc___ReadMessage(stream) ) +read_message.InputStream <- function(stream) { + unique_ptr(Message, ipc___ReadMessage(stream) ) } #' @export -`read_message.arrow::ipc::MessageReader` <- function(stream) { +read_message.MessageReader <- function(stream) { stream$ReadNextMessage() } diff --git a/r/R/parquet.R b/r/R/parquet.R index 6f122e57fa3..3e85570ea64 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -15,29 +15,128 @@ # specific language governing permissions and limitations # under the License. -#' @include arrow-package.R -`parquet::arrow::FileReader` <- R6Class("parquet::arrow::FileReader", - inherit = `arrow::Object`, +#' Read a Parquet file +#' +#' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format. +#' This function enables you to read Parquet files into R. +#' +#' @inheritParams read_delim_arrow +#' @param props [ParquetReaderProperties] +#' @param ... Additional arguments passed to `ParquetFileReader$create()` +#' +#' @return A [arrow::Table][Table], or a `data.frame` if `as_tibble` is +#' `TRUE`. +#' @examples +#' \donttest{ +#' df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) +#' head(df) +#' } +#' @export +read_parquet <- function(file, + col_select = NULL, + as_tibble = TRUE, + props = ParquetReaderProperties$create(), + ...) { + reader <- ParquetFileReader$create(file, props = props, ...) + tab <- reader$ReadTable(!!enquo(col_select)) + + if (as_tibble) { + tab <- as.data.frame(tab) + } + tab +} + +#' @title ParquetFileReader class +#' @rdname ParquetFileReader +#' @name ParquetFileReader +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class enables you to interact with Parquet files. +#' +#' @section Factory: +#' +#' The `ParquetFileReader$create()` factory method instantiates the object and +#' takes the following arguments: +#' +#' - `file` A character file name, raw vector, or Arrow file connection object +#' (e.g. `RandomAccessFile`). +#' - `props` Optional [ParquetReaderProperties] +#' - `mmap` Logical: whether to memory-map the file (default `TRUE`) +#' - `...` Additional arguments, currently ignored +#' +#' @section Methods: +#' +#' - `$ReadTable(col_select)`: get an `arrow::Table` from the file, possibly +#' with columns filtered by a character vector of column names or a +#' `tidyselect` specification. +#' - `$GetSchema()`: get the `arrow::Schema` of the data in the file +#' +#' @export +#' @examples +#' \donttest{ +#' f <- system.file("v0.7.1.parquet", package="arrow") +#' pq <- ParquetFileReader$create(f) +#' pq$GetSchema() +#' tab <- pq$ReadTable(starts_with("c")) +#' tab$schema +#' } +#' @include arrow-package.R +ParquetFileReader <- R6Class("ParquetFileReader", + inherit = Object, public = list( ReadTable = function(col_select = NULL) { col_select <- enquo(col_select) - if(quo_is_null(col_select)) { - shared_ptr(`arrow::Table`, parquet___arrow___FileReader__ReadTable1(self)) + if (quo_is_null(col_select)) { + shared_ptr(Table, parquet___arrow___FileReader__ReadTable1(self)) } else { - all_vars <- shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema(self))$names + all_vars <- shared_ptr(Schema, parquet___arrow___FileReader__GetSchema(self))$names indices <- match(vars_select(all_vars, !!col_select), all_vars) - 1L - shared_ptr(`arrow::Table`, parquet___arrow___FileReader__ReadTable2(self, indices)) + shared_ptr(Table, parquet___arrow___FileReader__ReadTable2(self, indices)) } }, GetSchema = function() { - shared_ptr(`arrow::Schema`, parquet___arrow___FileReader__GetSchema(self)) + shared_ptr(Schema, parquet___arrow___FileReader__GetSchema(self)) } ) ) -`parquet::arrow::ArrowReaderProperties` <- R6Class("parquet::arrow::ArrowReaderProperties", - inherit = `arrow::Object`, +ParquetFileReader$create <- function(file, + props = ParquetReaderProperties$create(), + mmap = TRUE, + ...) { + file <- make_readable_file(file, mmap) + assert_is(props, "ParquetReaderProperties") + + unique_ptr(ParquetFileReader, parquet___arrow___FileReader__OpenFile(file, props)) +} + +#' @title ParquetReaderProperties class +#' @rdname ParquetReaderProperties +#' @name ParquetReaderProperties +#' @docType class +#' @usage NULL +#' @format NULL +#' @description This class holds settings to control how a Parquet file is read +#' by [ParquetFileReader]. +#' +#' @section Factory: +#' +#' The `ParquetReaderProperties$create()` factory method instantiates the object +#' and takes the following arguments: +#' +#' - `use_threads` Logical: whether to use multithreading (default `TRUE`) +#' +#' @section Methods: +#' +#' - `$read_dictionary(column_index)` +#' - `$set_read_dictionary(column_index, read_dict)` +#' - `$use_threads(use_threads)` +#' +#' @export +ParquetReaderProperties <- R6Class("ParquetReaderProperties", + inherit = Object, public = list( read_dictionary = function(column_index) { parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index) @@ -57,93 +156,27 @@ ) ) -#' Create a new ArrowReaderProperties instance -#' -#' @param use_threads use threads? -#' -#' @export -#' @keywords internal -parquet_arrow_reader_properties <- function(use_threads = option_use_threads()) { - shared_ptr(`parquet::arrow::ArrowReaderProperties`, parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads))) -} - -#' Parquet file reader -#' -#' @inheritParams read_delim_arrow -#' @param props reader file properties, as created by [parquet_arrow_reader_properties()] -#' -#' @param ... additional parameters -#' -#' @export -parquet_file_reader <- function(file, props = parquet_arrow_reader_properties(), ...) { - UseMethod("parquet_file_reader") -} - -#' @export -`parquet_file_reader.arrow::io::RandomAccessFile` <- function(file, props = parquet_arrow_reader_properties(), ...) { - unique_ptr(`parquet::arrow::FileReader`, parquet___arrow___FileReader__OpenFile(file, props)) -} - -#' @export -parquet_file_reader.character <- function(file, - props = parquet_arrow_reader_properties(), - memory_map = TRUE, - ...) { - file <- normalizePath(file) - if (isTRUE(memory_map)) { - parquet_file_reader(mmap_open(file), props = props, ...) - } else { - parquet_file_reader(ReadableFile(file), props = props, ...) - } -} - -#' @export -parquet_file_reader.raw <- function(file, props = parquet_arrow_reader_properties(), ...) { - parquet_file_reader(BufferReader(file), props = props, ...) +ParquetReaderProperties$create <- function(use_threads = option_use_threads()) { + shared_ptr( + ParquetReaderProperties, + parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads)) + ) } -#' Read a Parquet file -#' -#' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format. -#' This function enables you to read Parquet files into R. -#' -#' @inheritParams read_delim_arrow -#' @inheritParams parquet_file_reader -#' -#' @return A [arrow::Table][arrow__Table], or a `data.frame` if `as_tibble` is -#' `TRUE`. -#' @examples -#' \donttest{ -#' try({ -#' df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) -#' }) -#' } -#' @export -read_parquet <- function(file, col_select = NULL, as_tibble = TRUE, props = parquet_arrow_reader_properties(), ...) { - reader <- parquet_file_reader(file, props = props, ...) - tab <- reader$ReadTable(!!enquo(col_select)) - - if (as_tibble) { - tab <- as.data.frame(tab) - } - tab -} #' Write Parquet file to disk #' #' [Parquet](https://parquet.apache.org/) is a columnar storage file format. #' This function enables you to write Parquet files from R. #' -#' @param table An [arrow::Table][arrow__Table], or an object convertible to it +#' @param table An [arrow::Table][Table], or an object convertible to it #' @param file a file path #' #' @examples #' \donttest{ -#' try({ -#' tf <- tempfile(fileext = ".parquet") -#' on.exit(unlink(tf)) -#' write_parquet(tibble::tibble(x = 1:5), tf) -#' }) +#' tf <- tempfile(fileext = ".parquet") +#' on.exit(unlink(tf)) +#' write_parquet(tibble::tibble(x = 1:5), tf) #' } #' @export write_parquet <- function(table, file) { diff --git a/r/R/read_record_batch.R b/r/R/read-record-batch.R similarity index 54% rename from r/R/read_record_batch.R rename to r/R/read-record-batch.R index cc57b447985..112f6539b39 100644 --- a/r/R/read_record_batch.R +++ b/r/R/read-record-batch.R @@ -15,12 +15,12 @@ # specific language governing permissions and limitations # under the License. -#' read [arrow::RecordBatch][arrow__RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][arrow__Schema] +#' read [arrow::RecordBatch][RecordBatch] as encapsulated IPC message, given a known [arrow::Schema][schema] #' -#' @param obj a [arrow::ipc::Message][arrow__ipc__Message], a [arrow::io::InputStream][arrow__io__InputStream], a [arrow::Buffer][arrow__Buffer], or a raw vector -#' @param schema a [arrow::Schema][arrow__Schema] +#' @param obj a [arrow::Message][Message], a [arrow::io::InputStream][InputStream], a [Buffer][buffer], or a raw vector +#' @param schema a [arrow::Schema][schema] #' -#' @return a [arrow::RecordBatch][arrow__RecordBatch] +#' @return a [arrow::RecordBatch][RecordBatch] #' #' @export read_record_batch <- function(obj, schema){ @@ -28,27 +28,27 @@ read_record_batch <- function(obj, schema){ } #' @export -`read_record_batch.arrow::ipc::Message` <- function(obj, schema) { - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__Message__Schema(obj, schema)) +read_record_batch.Message <- function(obj, schema) { + assert_is(schema, "Schema") + shared_ptr(RecordBatch, ipc___ReadRecordBatch__Message__Schema(obj, schema)) } #' @export -`read_record_batch.arrow::io::InputStream` <- function(obj, schema) { - assert_that(inherits(schema, "arrow::Schema")) - shared_ptr(`arrow::RecordBatch`, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) +read_record_batch.InputStream <- function(obj, schema) { + assert_is(schema, "Schema") + shared_ptr(RecordBatch, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) } #' @export read_record_batch.raw <- function(obj, schema){ - stream <- BufferReader(obj) + stream <- BufferReader$create(obj) on.exit(stream$close()) read_record_batch(stream, schema) } #' @export -`read_record_batch.arrow::Buffer` <- function(obj, schema){ - stream <- BufferReader(obj) +read_record_batch.Buffer <- function(obj, schema){ + stream <- BufferReader$create(obj) on.exit(stream$close()) read_record_batch(stream, schema) } diff --git a/r/R/read-table.R b/r/R/read-table.R new file mode 100644 index 00000000000..f372d035b90 --- /dev/null +++ b/r/R/read-table.R @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Read an [arrow::Table][Table] from a stream +#' +#' @param stream stream. +#' +#' - a [arrow::RecordBatchFileReader][RecordBatchFileReader]: +#' read an [arrow::Table][Table] +#' from all the record batches in the reader +#' +#' - a [arrow::RecordBatchStreamReader][RecordBatchStreamReader]: +#' read an [arrow::Table][Table] from the remaining record batches +#' in the reader +#' +#' - a string file path: interpret the file as an arrow +#' binary file format, and uses a [arrow::RecordBatchFileReader][RecordBatchFileReader] +#' to process it. +#' +#' - a raw vector: read using a [arrow::RecordBatchStreamReader][RecordBatchStreamReader] +#' +#' @return +#' +#' - `read_table` returns an [arrow::Table][Table] +#' - `read_arrow` returns a `data.frame` +#' +#' @details +#' +#' The methods using [arrow::RecordBatchFileReader][RecordBatchFileReader] and +#' [arrow::RecordBatchStreamReader][RecordBatchStreamReader] offer the most +#' flexibility. The other methods are for convenience. +#' +#' @export +read_table <- function(stream){ + UseMethod("read_table") +} + +#' @export +read_table.RecordBatchFileReader <- function(stream) { + shared_ptr(Table, Table__from_RecordBatchFileReader(stream)) +} + +#' @export +read_table.RecordBatchStreamReader <- function(stream) { + shared_ptr(Table, Table__from_RecordBatchStreamReader(stream)) +} + +#' @export +read_table.character <- function(stream) { + assert_that(length(stream) == 1L) + stream <- ReadableFile$create(stream) + on.exit(stream$close()) + batch_reader <- RecordBatchFileReader$create(stream) + shared_ptr(Table, Table__from_RecordBatchFileReader(batch_reader)) +} + +#' @export +read_table.raw <- function(stream) { + stream <- BufferReader$create(stream) + on.exit(stream$close()) + batch_reader <- RecordBatchStreamReader$create(stream) + shared_ptr(Table, Table__from_RecordBatchStreamReader(batch_reader)) +} + +#' @rdname read_table +#' @export +read_arrow <- function(stream) { + as.data.frame(read_table(stream)) +} diff --git a/r/R/read_table.R b/r/R/read_table.R deleted file mode 100644 index a05d15dff56..00000000000 --- a/r/R/read_table.R +++ /dev/null @@ -1,83 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#' Read an [arrow::Table][arrow__Table] from a stream -#' -#' @param stream stream. -#' -#' - a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader]: -#' read an [arrow::Table][arrow__Table] -#' from all the record batches in the reader -#' -#' - a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader]: -#' read an [arrow::Table][arrow__Table] from the remaining record batches -#' in the reader -#' -#' - a string file path: interpret the file as an arrow -#' binary file format, and uses a [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] -#' to process it. -#' -#' - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] -#' -#' @return -#' -#' - `read_table` returns an [arrow::Table][arrow__Table] -#' - `read_arrow` returns a `data.frame` -#' -#' @details -#' -#' The methods using [arrow::ipc::RecordBatchFileReader][arrow__ipc__RecordBatchFileReader] and -#' [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] offer the most -#' flexibility. The other methods are for convenience. -#' -#' @export -read_table <- function(stream){ - UseMethod("read_table") -} - -#' @export -`read_table.arrow::ipc::RecordBatchFileReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(stream)) -} - -#' @export -`read_table.arrow::ipc::RecordBatchStreamReader` <- function(stream) { - shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(stream)) -} - -#' @export -read_table.character <- function(stream) { - assert_that(length(stream) == 1L) - stream <- ReadableFile(stream) - on.exit(stream$close()) - batch_reader <- RecordBatchFileReader(stream) - shared_ptr(`arrow::Table`, Table__from_RecordBatchFileReader(batch_reader)) -} - -#' @export -`read_table.raw` <- function(stream) { - stream <- BufferReader(stream) - on.exit(stream$close()) - batch_reader <- RecordBatchStreamReader(stream) - shared_ptr(`arrow::Table`, Table__from_RecordBatchStreamReader(batch_reader)) -} - -#' @rdname read_table -#' @export -read_arrow <- function(stream){ - as.data.frame(read_table(stream)) -} diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R new file mode 100644 index 00000000000..dadf15e815c --- /dev/null +++ b/r/R/record-batch-reader.R @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +#' @title RecordBatchReader classes +#' @description `RecordBatchFileReader` and `RecordBatchStreamReader` are +#' interfaces for generating record batches from different input sources. +#' @usage NULL +#' @format NULL +#' @docType class +#' @section Factory: +#' +#' The `RecordBatchFileReader$create()` and `RecordBatchStreamReader$create()` +#' factory methods instantiate the object and +#' take a single argument, named according to the class: +#' +#' - `file` A character file name, raw vector, or Arrow file connection object +#' (e.g. `RandomAccessFile`). +#' - `stream` A raw vector, [Buffer], or `InputStream`. +#' +#' @section Methods: +#' +#' - `$read_next_batch()`: Returns a `RecordBatch` +#' - `$schema()`: Returns a [Schema] +#' - `$batches()`: Returns a list of `RecordBatch`es +#' - `$get_batch(i)`: For `RecordBatchFileReader`, return a particular batch +#' by an integer index. +#' - `$num_record_batches()`: For `RecordBatchFileReader`, see how many batches +#' are in the file. +#' +#' @rdname RecordBatchReader +#' @name RecordBatchReader +#' @include arrow-package.R +RecordBatchReader <- R6Class("RecordBatchReader", inherit = Object, + public = list( + read_next_batch = function() { + shared_ptr(RecordBatch, RecordBatchReader__ReadNext(self)) + } + ), + active = list( + schema = function() shared_ptr(Schema, RecordBatchReader__schema(self)) + ) +) + +#' @rdname RecordBatchReader +#' @usage NULL +#' @format NULL +#' @export +RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader, + public = list( + batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = RecordBatch) + ) +) +RecordBatchStreamReader$create <- function(stream){ + if (inherits(stream, c("raw", "Buffer"))) { + stream <- BufferReader$create(stream) + } + assert_is(stream, "InputStream") + + shared_ptr(RecordBatchStreamReader, ipc___RecordBatchStreamReader__Open(stream)) +} + +#' @rdname RecordBatchReader +#' @usage NULL +#' @format NULL +#' @export +RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = Object, + # Why doesn't this inherit from RecordBatchReader? + public = list( + get_batch = function(i) shared_ptr(RecordBatch, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)), + + batches = function() map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = RecordBatch) + ), + active = list( + num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self), + schema = function() shared_ptr(Schema, ipc___RecordBatchFileReader__schema(self)) + ) +) +RecordBatchFileReader$create <- function(file) { + file <- make_readable_file(file) + shared_ptr(RecordBatchFileReader, ipc___RecordBatchFileReader__Open(file)) +} diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R new file mode 100644 index 00000000000..208ceb1c3c5 --- /dev/null +++ b/r/R/record-batch-writer.R @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' @title RecordBatchWriter classes +#' @description `RecordBatchFileWriter` and `RecordBatchStreamWriter` are +#' interfaces for writing record batches to either the binary file or streaming +#' format. +#' @usage NULL +#' @format NULL +#' @docType class +#' @section Usage: +#' +#' ``` +#' writer <- RecordBatchStreamWriter$create(sink, schema) +#' +#' writer$write_batch(batch) +#' writer$write_table(table) +#' writer$close() +#' ``` +#' @section Factory: +#' +#' The `RecordBatchFileWriter$create()` and `RecordBatchStreamWriter$create()` +#' factory methods instantiate the object and +#' take a single argument, named according to the class: +#' +#' - `sink` A character file name or an `OutputStream`. +#' - `schema` A [Schema] for the data to be written. +#' +#' @section Methods: +#' +#' - `$write(x)`: Write a [RecordBatch], [Table], or `data.frame`, dispatching +#' to the methods below appropriately +#' - `$write_batch(batch)`: Write a `RecordBatch` to stream +#' - `$write_table(table)`: Write a `Table` to stream +#' - `$close()`: close stream +#' +#' @rdname RecordBatchWriter +#' @name RecordBatchWriter +#' @include arrow-package.R +RecordBatchWriter <- R6Class("RecordBatchWriter", inherit = Object, + public = list( + write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch), + write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table), + + write = function(x) { + if (inherits(x, "RecordBatch")) { + self$write_batch(x) + } else if (inherits(x, "Table")) { + self$write_table(x) + } else if (inherits(x, "data.frame")) { + self$write_table(table(x)) + } else { + abort("unexpected type for RecordBatchWriter$write(), must be an arrow::RecordBatch or an arrow::Table") + } + }, + + close = function() ipc___RecordBatchWriter__Close(self) + ) +) + +#' @usage NULL +#' @format NULL +#' @rdname RecordBatchWriter +#' @export +RecordBatchStreamWriter <- R6Class("RecordBatchStreamWriter", inherit = RecordBatchWriter) +RecordBatchStreamWriter$create <- function(sink, schema) { + if (is.character(sink)) { + sink <- FileOutputStream$create(sink) + } + assert_is(sink, "OutputStream") + assert_is(schema, "Schema") + + shared_ptr(RecordBatchStreamWriter, ipc___RecordBatchStreamWriter__Open(sink, schema)) +} + +#' @usage NULL +#' @format NULL +#' @rdname RecordBatchWriter +#' @export +RecordBatchFileWriter <- R6Class("RecordBatchFileWriter", inherit = RecordBatchStreamWriter) +RecordBatchFileWriter$create <- function(sink, schema) { + if (is.character(sink)) { + sink <- FileOutputStream$create(sink) + } + assert_is(sink, "OutputStream") + assert_is(schema, "Schema") + + shared_ptr(RecordBatchFileWriter, ipc___RecordBatchFileWriter__Open(sink, schema)) +} diff --git a/r/R/RecordBatch.R b/r/R/record-batch.R similarity index 61% rename from r/R/RecordBatch.R rename to r/R/record-batch.R index de8b01ef180..fecba3abb69 100644 --- a/r/R/RecordBatch.R +++ b/r/R/record-batch.R @@ -27,81 +27,83 @@ #' #' TODO #' -#' @rdname arrow__RecordBatch -#' @name arrow__RecordBatch -`arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, +#' @rdname RecordBatch +#' @name RecordBatch +RecordBatch <- R6Class("RecordBatch", inherit = Object, public = list( - column = function(i) shared_ptr(`arrow::Array`, RecordBatch__column(self, i)), + column = function(i) shared_ptr(Array, RecordBatch__column(self, i)), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), Equals = function(other) { - assert_that(inherits(other, "arrow::RecordBatch")) + assert_is(other, "RecordBatch") RecordBatch__Equals(self, other) }, RemoveColumn = function(i){ - shared_ptr(`arrow::RecordBatch`, RecordBatch__RemoveColumn(self, i)) + shared_ptr(RecordBatch, RecordBatch__RemoveColumn(self, i)) }, Slice = function(offset, length = NULL) { if (is.null(length)) { - shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice1(self, offset)) + shared_ptr(RecordBatch, RecordBatch__Slice1(self, offset)) } else { - shared_ptr(`arrow::RecordBatch`, RecordBatch__Slice2(self, offset, length)) + shared_ptr(RecordBatch, RecordBatch__Slice2(self, offset, length)) } }, serialize = function() ipc___SerializeRecordBatch__Raw(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_schema, "arrow::Schema")) - assert_that(inherits(options, "arrow::compute::CastOptions")) + assert_is(target_schema, "Schema") + assert_is(options, "CastOptions") assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") - shared_ptr(`arrow::RecordBatch`, RecordBatch__cast(self, target_schema, options)) + shared_ptr(RecordBatch, RecordBatch__cast(self, target_schema, options)) } ), active = list( num_columns = function() RecordBatch__num_columns(self), num_rows = function() RecordBatch__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, RecordBatch__schema(self)), - columns = function() map(RecordBatch__columns(self), shared_ptr, `arrow::Array`) + schema = function() shared_ptr(Schema, RecordBatch__schema(self)), + columns = function() map(RecordBatch__columns(self), shared_ptr, Array) ) ) +RecordBatch$create <- function(..., schema = NULL){ + arrays <- list2(...) + # making sure there are always names + if (is.null(names(arrays))) { + names(arrays) <- rep_len("", length(arrays)) + } + stopifnot(length(arrays) > 0) + shared_ptr(RecordBatch, RecordBatch__from_arrays(schema, arrays)) +} + #' @export -`names.arrow::RecordBatch` <- function(x) { +names.RecordBatch <- function(x) { x$names() } #' @export -`==.arrow::RecordBatch` <- function(x, y) { +`==.RecordBatch` <- function(x, y) { x$Equals(y) } #' @export -`dim.arrow::RecordBatch` <- function(x) { +dim.RecordBatch <- function(x) { c(x$num_rows, x$num_columns) } #' @export -`as.data.frame.arrow::RecordBatch` <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ +as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ RecordBatch__to_dataframe(x, use_threads = option_use_threads()) } -#' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame +#' Create an [arrow::RecordBatch][RecordBatch] from a data frame #' -#' @param ... A variable number of arrow::Array +#' @param ... A variable number of Array #' @param schema a arrow::Schema #' -#' @return a [arrow::RecordBatch][arrow__RecordBatch] +#' @return a [arrow::RecordBatch][RecordBatch] #' @export -record_batch <- function(..., schema = NULL){ - arrays <- list2(...) - # making sure there are always names - if (is.null(names(arrays))) { - names(arrays) <- rep_len("", length(arrays)) - } - stopifnot(length(arrays) > 0) - shared_ptr(`arrow::RecordBatch`, RecordBatch__from_arrays(schema, arrays)) -} +record_batch <- RecordBatch$create diff --git a/r/R/Schema.R b/r/R/schema.R similarity index 65% rename from r/R/Schema.R rename to r/R/schema.R index 906841b1ccf..9f28fb53d17 100644 --- a/r/R/Schema.R +++ b/r/R/schema.R @@ -16,8 +16,12 @@ # under the License. #' @include arrow-package.R - -#' @title class arrow::Schema +#' @title Schema class +#' +#' @description Create a `Schema` when you +#' want to convert an R `data.frame` to Arrow but don't want to rely on the +#' default mapping of R types to Arrow types, such as when you want to choose a +#' specific numeric precision. #' #' @usage NULL #' @format NULL @@ -39,14 +43,15 @@ #' - `$num_fields()`: returns the number of fields #' - `$field(i)`: returns the field at index `i` (0-based) #' -#' @rdname arrow__Schema -#' @name arrow__Schema -`arrow::Schema` <- R6Class("arrow::Schema", - inherit = `arrow::Object`, +#' @rdname Schema +#' @name Schema +#' @export +Schema <- R6Class("Schema", + inherit = Object, public = list( ToString = function() Schema__ToString(self), num_fields = function() Schema__num_fields(self), - field = function(i) shared_ptr(`arrow::Field`, Schema__field(self, i)), + field = function(i) shared_ptr(Field, Schema__field(self, i)), serialize = function() Schema__serialize(self), Equals = function(other, check_metadata = TRUE) Schema__Equals(self, other, isTRUE(check_metadata)) ), @@ -55,27 +60,16 @@ ) ) +Schema$create <- function(...) shared_ptr(Schema, schema_(.fields(list2(...)))) + #' @export -`==.arrow::Schema` <- function(lhs, rhs){ - lhs$Equals(rhs) -} +`==.Schema` <- function(lhs, rhs) lhs$Equals(rhs) -#' Create a schema -#' -#' This function lets you define a schema for a table. This is useful when you -#' want to convert an R `data.frame` to Arrow but don't want to rely on the -#' default mapping of R types to Arrow types, such as when you want to choose a -#' specific numeric precision. -#' #' @param ... named list of [data types][data-type] -#' -#' @return A [schema][arrow__Schema] object. -#' #' @export +#' @rdname Schema # TODO (npr): add examples once ARROW-5505 merges -schema <- function(...){ - shared_ptr(`arrow::Schema`, schema_(.fields(list2(...)))) -} +schema <- Schema$create #' read a Schema from a stream #' @@ -88,25 +82,25 @@ read_schema <- function(stream, ...) { } #' @export -`read_schema.arrow::io::InputStream` <- function(stream, ...) { - shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) +read_schema.InputStream <- function(stream, ...) { + shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export -`read_schema.arrow::Buffer` <- function(stream, ...) { - stream <- BufferReader(stream) +read_schema.Buffer <- function(stream, ...) { + stream <- BufferReader$create(stream) on.exit(stream$close()) - shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) + shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export -`read_schema.raw` <- function(stream, ...) { - stream <- BufferReader(stream) +read_schema.raw <- function(stream, ...) { + stream <- BufferReader$create(stream) on.exit(stream$close()) - shared_ptr(`arrow::Schema`, ipc___ReadSchema_InputStream(stream)) + shared_ptr(Schema, ipc___ReadSchema_InputStream(stream)) } #' @export -`read_schema.arrow::ipc::Message` <- function(stream, ...) { - shared_ptr(`arrow::Schema`, ipc___ReadSchema_Message(stream)) +read_schema.Message <- function(stream, ...) { + shared_ptr(Schema, ipc___ReadSchema_Message(stream)) } diff --git a/r/R/Struct.R b/r/R/struct.R similarity index 81% rename from r/R/Struct.R rename to r/R/struct.R index fa35b7ec0f2..ab72d4757bf 100644 --- a/r/R/Struct.R +++ b/r/R/struct.R @@ -17,10 +17,10 @@ #' @include type.R -`arrow::StructType` <- R6Class("arrow::StructType", - inherit = `arrow::NestedType`, +StructType <- R6Class("StructType", + inherit = NestedType, public = list( - GetFieldByName = function(name) shared_ptr(`arrow::Field`, StructType__GetFieldByName(self, name)), + GetFieldByName = function(name) shared_ptr(Field, StructType__GetFieldByName(self, name)), GetFieldIndex = function(name) StructType__GetFieldIndex(self, name) ) ) @@ -29,5 +29,5 @@ #' @export struct <- function(...){ xp <- struct_(.fields(list(...))) - shared_ptr(`arrow::StructType`, xp) + shared_ptr(StructType, xp) } diff --git a/r/R/Table.R b/r/R/table.R similarity index 65% rename from r/R/Table.R rename to r/R/table.R index 1255c8a0e1b..6339ac6c8c2 100644 --- a/r/R/Table.R +++ b/r/R/table.R @@ -23,24 +23,32 @@ #' @format NULL #' @docType class #' +#' @section Factory: +#' +#' The `Table$create()` function takes the following arguments: +#' +#' * `...` arrays, chunked arrays, or R vectors +#' * `schema` a schema. The default (`NULL`) infers the schema from the `...` +#' #' @section Methods: #' #' TODO #' -#' @rdname arrow__Table -#' @name arrow__Table -`arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`, +#' @rdname Table +#' @name Table +#' @export +Table <- R6Class("Table", inherit = Object, public = list( - column = function(i) shared_ptr(`arrow::ChunkedArray`, Table__column(self, i)), - field = function(i) shared_ptr(`arrow::Field`, Table__field(self, i)), + column = function(i) shared_ptr(ChunkedArray, Table__column(self, i)), + field = function(i) shared_ptr(Field, Table__field(self, i)), serialize = function(output_stream, ...) write_table(self, output_stream, ...), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { - assert_that(inherits(target_schema, "arrow::Schema")) - assert_that(inherits(options, "arrow::compute::CastOptions")) + assert_is(target_schema, "Schema") + assert_is(options, "CastOptions") assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") - shared_ptr(`arrow::Table`, Table__cast(self, target_schema, options)) + shared_ptr(Table, Table__cast(self, target_schema, options)) }, select = function(spec) { @@ -51,7 +59,7 @@ all_vars <- Table__column_names(self) vars <- vars_select(all_vars, !!spec) indices <- match(vars, all_vars) - shared_ptr(`arrow::Table`, Table__select(self, indices)) + shared_ptr(Table, Table__select(self, indices)) } } @@ -60,35 +68,27 @@ active = list( num_columns = function() Table__num_columns(self), num_rows = function() Table__num_rows(self), - schema = function() shared_ptr(`arrow::Schema`, Table__schema(self)), - columns = function() map(Table__columns(self), shared_ptr, class = `arrow::Column`) + schema = function() shared_ptr(Schema, Table__schema(self)), + columns = function() map(Table__columns(self), shared_ptr, class = Column) ) ) -#' Create an arrow::Table from a data frame -#' -#' @param ... arrays, chunked arrays, or R vectors -#' @param schema a schema. The default (`NULL`) infers the schema from the `...` -#' -#' @return an arrow::Table -#' -#' @export -table <- function(..., schema = NULL){ +Table$create <- function(..., schema = NULL){ dots <- list2(...) # making sure there are always names if (is.null(names(dots))) { names(dots) <- rep_len("", length(dots)) } stopifnot(length(dots) > 0) - shared_ptr(`arrow::Table`, Table__from_dots(dots, schema)) + shared_ptr(Table, Table__from_dots(dots, schema)) } #' @export -`as.data.frame.arrow::Table` <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ +as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ Table__to_dataframe(x, use_threads = option_use_threads()) } #' @export -`dim.arrow::Table` <- function(x) { +dim.Table <- function(x) { c(x$num_rows, x$num_columns) } diff --git a/r/R/type.R b/r/R/type.R index 86b888d1cce..36d81e293c0 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -18,7 +18,7 @@ #' @include arrow-package.R #' @export -`!=.arrow::Object` <- function(lhs, rhs){ +`!=.Object` <- function(lhs, rhs){ !(lhs == rhs) } @@ -32,26 +32,26 @@ #' #' TODO #' -#' @rdname arrow__DataType -#' @name arrow__DataType -`arrow::DataType` <- R6Class("arrow::DataType", - inherit = `arrow::Object`, +#' @rdname DataType +#' @name DataType +DataType <- R6Class("DataType", + inherit = Object, public = list( ToString = function() { DataType__ToString(self) }, Equals = function(other) { - assert_that(inherits(other, "arrow::DataType")) + assert_is(other, "DataType") DataType__Equals(self, other) }, num_children = function() { DataType__num_children(self) }, children = function() { - map(DataType__children_pointer(self), shared_ptr, class= `arrow::Field`) + map(DataType__children_pointer(self), shared_ptr, class = Field) }, - ..dispatch = function(){ + ..dispatch = function() { switch(names(Type)[self$id + 1], "NA" = null(), BOOL = boolean(), @@ -70,33 +70,27 @@ BINARY = stop("Type BINARY not implemented yet"), DATE32 = date32(), DATE64 = date64(), - TIMESTAMP = shared_ptr(`arrow::Timestamp`,self$pointer()), - TIME32 = shared_ptr(`arrow::Time32`,self$pointer()), - TIME64 = shared_ptr(`arrow::Time64`,self$pointer()), + TIMESTAMP = shared_ptr(Timestamp, self$pointer()), + TIME32 = shared_ptr(Time32, self$pointer()), + TIME64 = shared_ptr(Time64, self$pointer()), INTERVAL = stop("Type INTERVAL not implemented yet"), - DECIMAL = shared_ptr(`arrow::Decimal128Type`, self$pointer()), - LIST = shared_ptr(`arrow::ListType`, self$pointer()), - STRUCT = shared_ptr(`arrow::StructType`, self$pointer()), + DECIMAL = shared_ptr(Decimal128Type, self$pointer()), + LIST = shared_ptr(ListType, self$pointer()), + STRUCT = shared_ptr(StructType, self$pointer()), UNION = stop("Type UNION not implemented yet"), - DICTIONARY = shared_ptr(`arrow::DictionaryType`, self$pointer()), + DICTIONARY = shared_ptr(DictionaryType, self$pointer()), MAP = stop("Type MAP not implemented yet") ) } ), active = list( - id = function(){ - DataType__id(self) - }, - name = function() { - DataType__name(self) - } + id = function() DataType__id(self), + name = function() DataType__name(self) ) ) -`arrow::DataType`$dispatch <- function(xp){ - shared_ptr(`arrow::DataType`, xp)$..dispatch() -} +DataType$create <- function(xp) shared_ptr(DataType, xp)$..dispatch() #' infer the arrow Array type from an R vector #' @@ -104,23 +98,19 @@ #' #' @return an arrow logical type #' @export -type <- function(x) { - UseMethod("type") -} +type <- function(x) UseMethod("type") #' @export -type.default <- function(x) { - `arrow::DataType`$dispatch(Array__infer_type(x)) -} +type.default <- function(x) DataType$create(Array__infer_type(x)) #' @export -`type.arrow::Array` <- function(x) x$type +type.Array <- function(x) x$type #' @export -`type.arrow::ChunkedArray` <- function(x) x$type +type.ChunkedArray <- function(x) x$type #' @export -`type.arrow::Column` <- function(x) x$type +type.Column <- function(x) x$type #----- metadata @@ -135,122 +125,70 @@ type.default <- function(x) { #' #' TODO #' -#' @rdname arrow__FixedWidthType -#' @name arrow__FixedWidthType -`arrow::FixedWidthType` <- R6Class("arrow::FixedWidthType", - inherit = `arrow::DataType`, +#' @rdname FixedWidthType +#' @name FixedWidthType +FixedWidthType <- R6Class("FixedWidthType", + inherit = DataType, active = list( bit_width = function() FixedWidthType__bit_width(self) ) ) #' @export -`==.arrow::DataType` <- function(lhs, rhs){ - lhs$Equals(rhs) -} - -"arrow::Int8" <- R6Class("arrow::Int8", - inherit = `arrow::FixedWidthType` -) - -"arrow::Int16" <- R6Class("arrow::Int16", - inherit = `arrow::FixedWidthType` -) - -"arrow::Int32" <- R6Class("arrow::Int32", - inherit = `arrow::FixedWidthType` -) - -"arrow::Int64" <- R6Class("arrow::Int64", - inherit = `arrow::FixedWidthType` -) - - -"arrow::UInt8" <- R6Class("arrow::UInt8", - inherit = `arrow::FixedWidthType` -) - -"arrow::UInt16" <- R6Class("arrow::UInt16", - inherit = `arrow::FixedWidthType` -) - -"arrow::UInt32" <- R6Class("arrow::UInt32", - inherit = `arrow::FixedWidthType` -) - -"arrow::UInt64" <- R6Class("arrow::UInt64", - inherit = `arrow::FixedWidthType` -) - -"arrow::Float16" <- R6Class("arrow::Float16", - inherit = `arrow::FixedWidthType` -) -"arrow::Float32" <- R6Class("arrow::Float32", - inherit = `arrow::FixedWidthType` -) -"arrow::Float64" <- R6Class("arrow::Float64", - inherit = `arrow::FixedWidthType` -) - -"arrow::Boolean" <- R6Class("arrow::Boolean", - inherit = `arrow::FixedWidthType` -) - -"arrow::Utf8" <- R6Class("arrow::Utf8", - inherit = `arrow::DataType` -) - -`arrow::DateType` <- R6Class("arrow::DateType", - inherit = `arrow::FixedWidthType`, +`==.DataType` <- function(lhs, rhs) lhs$Equals(rhs) + +Int8 <- R6Class("Int8", inherit = FixedWidthType) +Int16 <- R6Class("Int16", inherit = FixedWidthType) +Int32 <- R6Class("Int32", inherit = FixedWidthType) +Int64 <- R6Class("Int64", inherit = FixedWidthType) +UInt8 <- R6Class("UInt8", inherit = FixedWidthType) +UInt16 <- R6Class("UInt16", inherit = FixedWidthType) +UInt32 <- R6Class("UInt32", inherit = FixedWidthType) +UInt64 <- R6Class("UInt64", inherit = FixedWidthType) +Float16 <- R6Class("Float16", inherit = FixedWidthType) +Float32 <- R6Class("Float32", inherit = FixedWidthType) +Float64 <- R6Class("Float64", inherit = FixedWidthType) +Boolean <- R6Class("Boolean", inherit = FixedWidthType) +Utf8 <- R6Class("Utf8", inherit = DataType) + +DateType <- R6Class("DateType", + inherit = FixedWidthType, public = list( unit = function() DateType__unit(self) ) ) +Date32 <- R6Class("Date32", inherit = DateType) +Date64 <- R6Class("Date64", inherit = DateType) -"arrow::Date32" <- R6Class("arrow::Date32", - inherit = `arrow::DateType` -) -"arrow::Date64" <- R6Class("arrow::Date64", - inherit = `arrow::DateType` -) - -"arrow::TimeType" <- R6Class("arrow::TimeType", - inherit = `arrow::FixedWidthType`, +TimeType <- R6Class("TimeType", + inherit = FixedWidthType, public = list( unit = function() TimeType__unit(self) ) ) -"arrow::Time32" <- R6Class("arrow::Time32", - inherit = `arrow::TimeType` -) -"arrow::Time64" <- R6Class("arrow::Time64", - inherit = `arrow::TimeType` -) +Time32 <- R6Class("Time32", inherit = TimeType) +Time64 <- R6Class("Time64", inherit = TimeType) -"arrow::Null" <- R6Class("arrow::Null", - inherit = `arrow::DataType` -) +Null <- R6Class("Null", inherit = DataType) -`arrow::Timestamp` <- R6Class( - "arrow::Timestamp", - inherit = `arrow::FixedWidthType` , +Timestamp <- R6Class("Timestamp", + inherit = FixedWidthType, public = list( timezone = function() TimestampType__timezone(self), unit = function() TimestampType__unit(self) ) ) -`arrow::DecimalType` <- R6Class("arrow:::DecimalType", - inherit = `arrow::FixedWidthType`, +DecimalType <- R6Class("DecimalType", + inherit = FixedWidthType, public = list( precision = function() DecimalType__precision(self), scale = function() DecimalType__scale(self) ) ) +Decimal128Type <- R6Class("Decimal128Type", inherit = DecimalType) -"arrow::Decimal128Type" <- R6Class("arrow::Decimal128Type", - inherit = `arrow::DecimalType` -) +NestedType <- R6Class("NestedType", inherit = DataType) #' Apache Arrow data types #' @@ -280,7 +218,7 @@ type.default <- function(x) { #' @param ... For `struct()`, a named list of types to define the struct columns #' #' @name data-type -#' @return An Arrow type object inheriting from `arrow::DataType`. +#' @return An Arrow type object inheriting from DataType. #' @export #' @seealso [dictionary()] for creating a dictionary (factor-like) type. #' @examples @@ -290,39 +228,39 @@ type.default <- function(x) { #' timestamp("ms", timezone = "CEST") #' time64("ns") #' } -int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize()) +int8 <- function() shared_ptr(Int8, Int8__initialize()) #' @rdname data-type #' @export -int16 <- function() shared_ptr(`arrow::Int16`, Int16__initialize()) +int16 <- function() shared_ptr(Int16, Int16__initialize()) #' @rdname data-type #' @export -int32 <- function() shared_ptr(`arrow::Int32`, Int32__initialize()) +int32 <- function() shared_ptr(Int32, Int32__initialize()) #' @rdname data-type #' @export -int64 <- function() shared_ptr(`arrow::Int64`, Int64__initialize()) +int64 <- function() shared_ptr(Int64, Int64__initialize()) #' @rdname data-type #' @export -uint8 <- function() shared_ptr(`arrow::UInt8`, UInt8__initialize()) +uint8 <- function() shared_ptr(UInt8, UInt8__initialize()) #' @rdname data-type #' @export -uint16 <- function() shared_ptr(`arrow::UInt16`, UInt16__initialize()) +uint16 <- function() shared_ptr(UInt16, UInt16__initialize()) #' @rdname data-type #' @export -uint32 <- function() shared_ptr(`arrow::UInt32`, UInt32__initialize()) +uint32 <- function() shared_ptr(UInt32, UInt32__initialize()) #' @rdname data-type #' @export -uint64 <- function() shared_ptr(`arrow::UInt64`, UInt64__initialize()) +uint64 <- function() shared_ptr(UInt64, UInt64__initialize()) #' @rdname data-type #' @export -float16 <- function() shared_ptr(`arrow::Float16`, Float16__initialize()) +float16 <- function() shared_ptr(Float16, Float16__initialize()) #' @rdname data-type #' @export @@ -330,7 +268,7 @@ halffloat <- float16 #' @rdname data-type #' @export -float32 <- function() shared_ptr(`arrow::Float32`, Float32__initialize()) +float32 <- function() shared_ptr(Float32, Float32__initialize()) #' @rdname data-type #' @export @@ -338,11 +276,11 @@ float <- float32 #' @rdname data-type #' @export -float64 <- function() shared_ptr(`arrow::Float64`, Float64__initialize()) +float64 <- function() shared_ptr(Float64, Float64__initialize()) #' @rdname data-type #' @export -boolean <- function() shared_ptr(`arrow::Boolean`, Boolean__initialize()) +boolean <- function() shared_ptr(Boolean, Boolean__initialize()) #' @rdname data-type #' @export @@ -350,7 +288,7 @@ bool <- boolean #' @rdname data-type #' @export -utf8 <- function() shared_ptr(`arrow::Utf8`, Utf8__initialize()) +utf8 <- function() shared_ptr(Utf8, Utf8__initialize()) #' @rdname data-type #' @export @@ -358,11 +296,11 @@ string <- utf8 #' @rdname data-type #' @export -date32 <- function() shared_ptr(`arrow::Date32`, Date32__initialize()) +date32 <- function() shared_ptr(Date32, Date32__initialize()) #' @rdname data-type #' @export -date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize()) +date64 <- function() shared_ptr(Date64, Date64__initialize()) #' @rdname data-type #' @export @@ -371,7 +309,7 @@ time32 <- function(unit = c("ms", "s")) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time32_units) - shared_ptr(`arrow::Time32`, Time32__initialize(unit)) + shared_ptr(Time32, Time32__initialize(unit)) } valid_time32_units <- c( @@ -400,21 +338,6 @@ make_valid_time_unit <- function(unit, valid_units) { unit } -oxford_paste <- function(x, conjunction = "and") { - if (is.character(x)) { - x <- paste0('"', x, '"') - } - if (length(x) < 2) { - return(x) - } - x[length(x)] <- paste(conjunction, x[length(x)]) - if (length(x) > 2) { - return(paste(x, collapse = ", ")) - } else { - return(paste(x, collapse = " ")) - } -} - #' @rdname data-type #' @export time64 <- function(unit = c("ns", "us")) { @@ -422,12 +345,12 @@ time64 <- function(unit = c("ns", "us")) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time64_units) - shared_ptr(`arrow::Time64`, Time64__initialize(unit)) + shared_ptr(Time64, Time64__initialize(unit)) } #' @rdname data-type #' @export -null <- function() shared_ptr(`arrow::Null`, Null__initialize()) +null <- function() shared_ptr(Null, Null__initialize()) #' @rdname data-type #' @export @@ -437,15 +360,15 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone) { } unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) if (missing(timezone)) { - shared_ptr(`arrow::Timestamp`, Timestamp__initialize1(unit)) + shared_ptr(Timestamp, Timestamp__initialize1(unit)) } else { assert_that(is.character(timezone), length(timezone) == 1) - shared_ptr(`arrow::Timestamp`, Timestamp__initialize2(unit, timezone)) + shared_ptr(Timestamp, Timestamp__initialize2(unit, timezone)) } } #' @rdname data-type #' @export -decimal <- function(precision, scale) shared_ptr(`arrow::Decimal128Type`, Decimal128Type__initialize(precision, scale)) - -`arrow::NestedType` <- R6Class("arrow::NestedType", inherit = `arrow::DataType`) +decimal <- function(precision, scale) { + shared_ptr(Decimal128Type, Decimal128Type__initialize(precision, scale)) +} diff --git a/r/R/Column.R b/r/R/util.R similarity index 58% rename from r/R/Column.R rename to r/R/util.R index 0487425140d..0b122f9d224 100644 --- a/r/R/Column.R +++ b/r/R/util.R @@ -15,28 +15,22 @@ # specific language governing permissions and limitations # under the License. -#' @include type.R +oxford_paste <- function(x, conjunction = "and") { + if (is.character(x)) { + x <- paste0('"', x, '"') + } + if (length(x) < 2) { + return(x) + } + x[length(x)] <- paste(conjunction, x[length(x)]) + if (length(x) > 2) { + return(paste(x, collapse = ", ")) + } else { + return(paste(x, collapse = " ")) + } +} -#' @title class arrow::Column -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @section Methods: -#' -#' TODO -#' -#' @rdname arrow__Column -#' @name arrow__Column -`arrow::Column` <- R6Class("arrow::Column", inherit = `arrow::Object`, - public = list( - length = function() Column__length(self), - data = function() shared_ptr(`arrow::ChunkedArray`, Column__data(self)) - ), - - active = list( - null_count = function() Column__null_count(self), - type = function() `arrow::DataType`$dispatch(Column__type(self)) - ) -) +assert_is <- function(object, class) { + msg <- paste(substitute(object), "must be a", oxford_paste(class, "or")) + assert_that(inherits(object, class), msg = msg) +} diff --git a/r/R/write_arrow.R b/r/R/write-arrow.R similarity index 67% rename from r/R/write_arrow.R rename to r/R/write-arrow.R index f57eff36c57..b74a8945013 100644 --- a/r/R/write_arrow.R +++ b/r/R/write-arrow.R @@ -19,35 +19,35 @@ to_arrow <- function(x) { UseMethod("to_arrow") } -`to_arrow.arrow::RecordBatch` <- function(x) x -`to_arrow.arrow::Table` <- function(x) x +to_arrow.RecordBatch <- function(x) x +to_arrow.Table <- function(x) x -# splice the data frame as arguments of table() +# splice the data frame as arguments of Table$create() # see ?rlang::list2() -`to_arrow.data.frame` <- function(x) table(!!!x) +`to_arrow.data.frame` <- function(x) Table$create(!!!x) #' Write Arrow formatted data #' -#' @param x an [arrow::Table][arrow__Table], an [arrow::RecordBatch][arrow__RecordBatch] or a data.frame +#' @param x an [arrow::Table][Table], an [arrow::RecordBatch][RecordBatch] or a data.frame #' #' @param stream where to serialize to #' -#' - A [arrow::ipc::RecordBatchWriter][arrow__ipc__RecordBatchWriter]: the `$write()` +#' - A [arrow::RecordBatchWriter][RecordBatchWriter]: the `$write()` #' of `x` is used. The stream is left open. This uses the streaming format #' or the binary file format depending on the type of the writer. #' #' - A string file path: `x` is serialized with -#' a [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter], i.e. +#' a [arrow::RecordBatchFileWriter][RecordBatchFileWriter], i.e. #' using the binary file format. #' #' - A raw vector: typically of length zero (its data is ignored, and only used for #' dispatch). `x` is serialized using the streaming format, i.e. using the -#' [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] +#' [arrow::RecordBatchStreamWriter][RecordBatchStreamWriter] #' #' @param ... extra parameters, currently ignored #' -#' `write_arrow` is a convenience function, the classes [arrow::ipc::RecordBatchFileWriter][arrow__ipc__RecordBatchFileWriter] -#' and [arrow::ipc::RecordBatchStreamWriter][arrow__ipc__RecordBatchStreamWriter] can be used for more flexibility. +#' `write_arrow` is a convenience function, the classes [arrow::RecordBatchFileWriter][RecordBatchFileWriter] +#' and [arrow::RecordBatchStreamWriter][RecordBatchStreamWriter] can be used for more flexibility. #' #' @export write_arrow <- function(x, stream, ...) { @@ -55,7 +55,7 @@ write_arrow <- function(x, stream, ...) { } #' @export -`write_arrow.arrow::ipc::RecordBatchWriter` <- function(x, stream, ...){ +write_arrow.RecordBatchWriter <- function(x, stream, ...){ stream$write(x) } @@ -63,9 +63,9 @@ write_arrow <- function(x, stream, ...) { `write_arrow.character` <- function(x, stream, ...) { assert_that(length(stream) == 1L) x <- to_arrow(x) - file_stream <- FileOutputStream(stream) + file_stream <- FileOutputStream$create(stream) on.exit(file_stream$close()) - file_writer <- RecordBatchFileWriter(file_stream, x$schema) + file_writer <- RecordBatchFileWriter$create(file_stream, x$schema) on.exit({ # Re-set the exit code to close both connections, LIFO file_writer$close() @@ -82,16 +82,16 @@ write_arrow <- function(x, stream, ...) { schema <- x$schema # how many bytes do we need - mock_stream <- MockOutputStream() - writer <- RecordBatchStreamWriter(mock_stream, schema) + mock_stream <- MockOutputStream$create() + writer <- RecordBatchStreamWriter$create(mock_stream, schema) writer$write(x) writer$close() n <- mock_stream$GetExtentBytesWritten() # now that we know the size, stream in a buffer backed by an R raw vector bytes <- raw(n) - buffer_writer <- FixedSizeBufferWriter(buffer(bytes)) - writer <- RecordBatchStreamWriter(buffer_writer, schema) + buffer_writer <- FixedSizeBufferWriter$create(buffer(bytes)) + writer <- RecordBatchStreamWriter$create(buffer_writer, schema) writer$write(x) writer$close() diff --git a/r/README.Rmd b/r/README.Rmd index 0fad65925d8..1460f21be37 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -54,7 +54,7 @@ When installing from source, if the R and C++ library versions do not match, ins library(arrow) set.seed(24) -tab <- arrow::table(x = 1:10, y = rnorm(10)) +tab <- Table$create(x = 1:10, y = rnorm(10)) tab$schema tab as.data.frame(tab) diff --git a/r/README.md b/r/README.md index 4a6af748faf..9733da91a95 100644 --- a/r/README.md +++ b/r/README.md @@ -69,13 +69,13 @@ Arrow C++ library first. library(arrow) set.seed(24) -tab <- arrow::table(x = 1:10, y = rnorm(10)) +tab <- Table$create(x = 1:10, y = rnorm(10)) tab$schema -#> arrow::Schema +#> Schema #> x: int32 #> y: double tab -#> arrow::Table +#> Table as.data.frame(tab) #> x y #> 1 1 -0.545880758 diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 98baa35fafc..bad95d2e90a 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -49,9 +49,12 @@ reference: - read_json_arrow - read_feather - read_parquet + - read_table - write_arrow - write_feather - write_parquet +- title: C++ reader/writer interface + contents: - csv_convert_options - csv_parse_options - csv_read_options @@ -59,79 +62,46 @@ reference: - json_parse_options - json_read_options - json_table_reader - - parquet_file_reader + - ParquetFileReader + - ParquetReaderProperties + - FeatherTableReader + - FeatherTableWriter + - JsonTableReader + - RecordBatchReader + - RecordBatchWriter - title: Arrow data containers contents: - buffer - array + - ArrayData - chunked_array + - ChunkedArray - record_batch - - table + - RecordBatch + - Table - read_message - read_record_batch - - read_table - title: Arrow data types and schema contents: - - schema + - Schema - type - dictionary - - field + - Field - read_schema - data-type -- title: R6 classes - contents: - - arrow__Array - - arrow__ArrayData - - arrow__Buffer - - arrow__ChunkedArray - - arrow__Column - - arrow__DataType - - arrow__DictionaryType - - arrow__Field - - arrow__FixedWidthType - - arrow__RecordBatch - - arrow__RecordBatchReader - - arrow__Schema - - arrow__Table - - arrow__MemoryPool - - arrow__io__BufferOutputStream - - arrow__io__BufferReader - - arrow__io__FileOutputStream - - arrow__io__FixedSizeBufferWriter - - arrow__io__InputStream - - arrow__io__MemoryMappedFile - - arrow__io__MockOutputStream - - arrow__io__OutputStream - - arrow__io__RandomAccessFile - - arrow__io__Readable - - arrow__io__ReadableFile - - arrow__json__TableReader - - arrow__ipc__Message - - arrow__ipc__MessageReader - - arrow__ipc__RecordBatchFileReader - - arrow__ipc__RecordBatchFileWriter - - arrow__ipc__RecordBatchStreamReader - - arrow__ipc__RecordBatchStreamWriter - - arrow__ipc__RecordBatchWriter -- title: Other functions + - DataType + - DictionaryType + - FixedWidthType + - cast_options +- title: Input/Output contents: - - BufferOutputStream - - BufferReader - - CompressedInputStream - - CompressedOutputStream - - FeatherTableReader - - FeatherTableWriter - - FileOutputStream - - FixedSizeBufferWriter + - InputStream + - mmap_open + - mmap_create + - OutputStream + - Message - MessageReader - - MockOutputStream - - ReadableFile - - RecordBatchFileReader - - RecordBatchFileWriter - - RecordBatchStreamReader - - RecordBatchStreamWriter - - cast_options + - compression - compression_codec + - MemoryPool - default_memory_pool - - mmap_create - - mmap_open diff --git a/r/man/ArrayData.Rd b/r/man/ArrayData.Rd new file mode 100644 index 00000000000..035fee8ac17 --- /dev/null +++ b/r/man/ArrayData.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/array-data.R +\docType{class} +\name{ArrayData} +\alias{ArrayData} +\title{ArrayData class} +\description{ +The \code{ArrayData} class allows you to get and inspect the data +inside an \code{arrow::Array}. +} +\section{Usage}{ +\preformatted{data <- Array$create(x)$data() + +data$type() +data$length() +data$null_count() +data$offset() +data$buffers() +} +} + +\section{Methods}{ + + +... +} + +\keyword{datasets} diff --git a/r/man/BufferOutputStream.Rd b/r/man/BufferOutputStream.Rd deleted file mode 100644 index 1776f995930..00000000000 --- a/r/man/BufferOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{BufferOutputStream} -\alias{BufferOutputStream} -\title{Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream}} -\usage{ -BufferOutputStream(initial_capacity = 0L) -} -\arguments{ -\item{initial_capacity}{initial capacity} -} -\value{ -a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} -} -\description{ -Open a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} -} diff --git a/r/man/BufferReader.Rd b/r/man/BufferReader.Rd deleted file mode 100644 index ea5dd790cdd..00000000000 --- a/r/man/BufferReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{BufferReader} -\alias{BufferReader} -\title{Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader}} -\usage{ -BufferReader(x) -} -\arguments{ -\item{x}{R object to treat as a buffer or a buffer created by \code{\link[=buffer]{buffer()}}} -} -\description{ -Create a \link[=arrow__io__BufferReader]{arrow::io::BufferReader} -} diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd new file mode 100644 index 00000000000..7e617243594 --- /dev/null +++ b/r/man/ChunkedArray.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/chunked-array.R +\docType{class} +\name{ChunkedArray} +\alias{ChunkedArray} +\alias{chunked_array} +\title{ChunkedArray class} +\usage{ +chunked_array(..., type = NULL) +} +\arguments{ +\item{\dots}{Vectors to coerce} + +\item{type}{currently ignored} +} +\description{ +A \code{ChunkedArray} is a data structure managing a list of +primitive Arrow \link[=Array]{Arrays} logically as one large array. +} +\section{Factory}{ + +The \code{ChunkedArray$create()} factory method instantiates the object from +various Arrays or R vectors. \code{chunked_array()} is an alias for it. +} + +\section{Methods}{ + +\itemize{ +\item \code{$length()} +\item \code{$chunk(i)} +\item \code{$as_vector()} +\item \code{$Slice(offset, length = NULL)} +\item \code{$cast(target_type, safe = TRUE, options = cast_options(safe))} +\item \code{$null_count()} +\item \code{$chunks()} +\item \code{$num_chunks()} +\item \code{$type()} +} +} + +\seealso{ +\link{Array} +} +\keyword{datasets} diff --git a/r/man/CompressedInputStream.Rd b/r/man/CompressedInputStream.Rd deleted file mode 100644 index cfff053083d..00000000000 --- a/r/man/CompressedInputStream.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compression.R -\name{CompressedInputStream} -\alias{CompressedInputStream} -\title{Compressed input stream} -\usage{ -CompressedInputStream(stream, codec = codec("GZIP")) -} -\arguments{ -\item{stream}{Underlying raw input stream} - -\item{codec}{a codec} -} -\description{ -Compressed input stream -} diff --git a/r/man/CompressedOutputStream.Rd b/r/man/CompressedOutputStream.Rd deleted file mode 100644 index d32070ebfd1..00000000000 --- a/r/man/CompressedOutputStream.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compression.R -\name{CompressedOutputStream} -\alias{CompressedOutputStream} -\title{Compressed output stream} -\usage{ -CompressedOutputStream(stream, codec = compression_codec("GZIP")) -} -\arguments{ -\item{stream}{Underlying raw output stream} - -\item{codec}{a codec} -} -\description{ -Compressed output stream -} -\details{ -This function is not supported in Windows. -} diff --git a/r/man/arrow__DataType.Rd b/r/man/DataType.Rd similarity index 75% rename from r/man/arrow__DataType.Rd rename to r/man/DataType.Rd index 4eeb05110d2..64755c7d691 100644 --- a/r/man/arrow__DataType.Rd +++ b/r/man/DataType.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \docType{class} -\name{arrow__DataType} -\alias{arrow__DataType} -\alias{arrow::DataType} +\name{DataType} +\alias{DataType} \title{class arrow::DataType} \description{ class arrow::DataType diff --git a/r/man/arrow__DictionaryType.Rd b/r/man/DictionaryType.Rd similarity index 52% rename from r/man/arrow__DictionaryType.Rd rename to r/man/DictionaryType.Rd index ba462ee0114..4d64cb1d986 100644 --- a/r/man/arrow__DictionaryType.Rd +++ b/r/man/DictionaryType.Rd @@ -1,12 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dictionary.R \docType{class} -\name{arrow__DictionaryType} -\alias{arrow__DictionaryType} -\alias{arrow::DictionaryType} -\title{class arrow::DictionaryType} +\name{DictionaryType} +\alias{DictionaryType} +\title{class DictionaryType} \description{ -class arrow::DictionaryType +class DictionaryType } \section{Methods}{ diff --git a/r/man/FeatherTableReader.Rd b/r/man/FeatherTableReader.Rd index 3276628d50e..c0956d4c106 100644 --- a/r/man/FeatherTableReader.Rd +++ b/r/man/FeatherTableReader.Rd @@ -1,18 +1,39 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/feather.R +\docType{class} \name{FeatherTableReader} \alias{FeatherTableReader} -\title{A \code{arrow::ipc::feather::TableReader} to read from a file} -\usage{ -FeatherTableReader(file, mmap = TRUE, ...) +\title{FeatherTableReader class} +\description{ +This class enables you to interact with Feather files. Create +one to connect to a file or other InputStream, and call \code{Read()} on it to +make an \code{arrow::Table}. See its usage in \code{\link[=read_feather]{read_feather()}}. } -\arguments{ -\item{file}{A file path or \code{arrow::io::RandomAccessFile}} +\section{Factory}{ -\item{mmap}{Is the file memory mapped (applicable to the \code{character} method)} -\item{...}{extra parameters} +The \code{FeatherTableReader$create()} factory method instantiates the object and +takes the following arguments: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \code{RandomAccessFile}). +\item \code{mmap} Logical: whether to memory-map the file (default \code{TRUE}) +\item \code{...} Additional arguments, currently ignored } -\description{ -A \code{arrow::ipc::feather::TableReader} to read from a file } + +\section{Methods}{ + +\itemize{ +\item \code{$GetDescription()} +\item \code{$HasDescription()} +\item \code{$version()} +\item \code{$num_rows()} +\item \code{$num_columns()} +\item \code{$GetColumnName()} +\item \code{$GetColumn()} +\item \code{$Read(columns)} +} +} + +\keyword{datasets} diff --git a/r/man/FeatherTableWriter.Rd b/r/man/FeatherTableWriter.Rd index 0db0a884fb4..e127bd89443 100644 --- a/r/man/FeatherTableWriter.Rd +++ b/r/man/FeatherTableWriter.Rd @@ -1,14 +1,35 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/feather.R +\docType{class} \name{FeatherTableWriter} \alias{FeatherTableWriter} -\title{Create \code{TableWriter} that writes into a stream} -\usage{ -FeatherTableWriter(stream) +\title{FeatherTableWriter class} +\description{ +This class enables you to write Feather files. See its usage in +\code{\link[=write_feather]{write_feather()}}. } -\arguments{ -\item{stream}{an \code{OutputStream}} +\section{Factory}{ + + +The \code{FeatherTableWriter$create()} factory method instantiates the object and +takes the following argument: +\itemize{ +\item \code{stream} An \code{OutputStream} +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$GetDescription()} +\item \code{$HasDescription()} +\item \code{$version()} +\item \code{$num_rows()} +\item \code{$num_columns()} +\item \code{$GetColumnName()} +\item \code{$GetColumn()} +\item \code{$Read(columns)} } -\description{ -Create \code{TableWriter} that writes into a stream } + +\keyword{datasets} diff --git a/r/man/Field.Rd b/r/man/Field.Rd new file mode 100644 index 00000000000..c4325f4c821 --- /dev/null +++ b/r/man/Field.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/field.R +\docType{class} +\name{Field} +\alias{Field} +\alias{field} +\title{class arrow::Field} +\format{An object of class \code{R6ClassGenerator} of length 25.} +\usage{ +Field + +field(name, type, metadata) +} +\arguments{ +\item{name}{field name} + +\item{type}{logical type, instance of \link{DataType}} + +\item{metadata}{currently ignored} +} +\description{ +\code{field()} lets you create an \code{arrow::Field} that maps a +\link[=data-type]{DataType} to a column name. Fields are contained in +\link[=Schema]{Schemas}. +} +\section{Methods}{ + +\itemize{ +\item \code{f$ToString()}: convert to a string +\item \code{f$Equals(other)}: test for equality. More naturally called as \code{f == other} +} +} + +\examples{ +\donttest{ +field("x", int32()) +} +} +\keyword{datasets} diff --git a/r/man/FileOutputStream.Rd b/r/man/FileOutputStream.Rd deleted file mode 100644 index 4155d349d1a..00000000000 --- a/r/man/FileOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{FileOutputStream} -\alias{FileOutputStream} -\title{Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream}} -\usage{ -FileOutputStream(path) -} -\arguments{ -\item{path}{file path} -} -\value{ -a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} -} -\description{ -Open a \link[=arrow__io__FileOutputStream]{arrow::io::FileOutputStream} -} diff --git a/r/man/FixedSizeBufferWriter.Rd b/r/man/FixedSizeBufferWriter.Rd deleted file mode 100644 index 553d61b76e1..00000000000 --- a/r/man/FixedSizeBufferWriter.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{FixedSizeBufferWriter} -\alias{FixedSizeBufferWriter} -\title{Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter}} -\usage{ -FixedSizeBufferWriter(buffer) -} -\arguments{ -\item{buffer}{\link[=arrow__Buffer]{arrow::Buffer} or something \code{\link[=buffer]{buffer()}} can handle} -} -\value{ -a \link[=arrow__io__BufferOutputStream]{arrow::io::BufferOutputStream} -} -\description{ -Open a \link[=arrow__io__FixedSizeBufferWriter]{arrow::io::FixedSizeBufferWriter} -} diff --git a/r/man/arrow__FixedWidthType.Rd b/r/man/FixedWidthType.Rd similarity index 72% rename from r/man/arrow__FixedWidthType.Rd rename to r/man/FixedWidthType.Rd index 075c0eeeb14..e06e8a47cc5 100644 --- a/r/man/arrow__FixedWidthType.Rd +++ b/r/man/FixedWidthType.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/type.R \docType{class} -\name{arrow__FixedWidthType} -\alias{arrow__FixedWidthType} -\alias{arrow::FixedWidthType} +\name{FixedWidthType} +\alias{FixedWidthType} \title{class arrow::FixedWidthType} \description{ class arrow::FixedWidthType diff --git a/r/man/InputStream.Rd b/r/man/InputStream.Rd new file mode 100644 index 00000000000..57b49c99dd8 --- /dev/null +++ b/r/man/InputStream.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{InputStream} +\alias{InputStream} +\alias{RandomAccessFile} +\alias{MemoryMappedFile} +\alias{ReadableFile} +\alias{BufferReader} +\title{InputStream classes} +\description{ +\code{RandomAccessFile} inherits from \code{InputStream} and is a base +class for: \code{ReadableFile} for reading from a file; \code{MemoryMappedFile} for +the same but with memory mapping; and \code{BufferReader} for reading from a +buffer. Use these with the various table readers. +} +\section{Factory}{ + + +The \code{$create()} factory methods instantiate the \code{InputStream} object and +take the following arguments, depending on the subclass: +\itemize{ +\item \code{path} For \code{ReadableFile}, a character file name +\item \code{x} For \code{BufferReader}, a \link{Buffer} or an object that can be +made into a buffer via \code{buffer()}. +} + +To instantiate a \code{MemoryMappedFile}, call \code{\link[=mmap_open]{mmap_open()}}. +} + +\section{Methods}{ + +\itemize{ +\item \code{$GetSize()}: +\item \code{$supports_zero_copy()}: Logical +\item \code{$seek(position)}: go to that position in the stream +\item \code{$tell()}: return the position in the stream +\item \code{$close()}: close the stream +\item \code{$Read(nbytes)}: read data from the stream, either a specified \code{nbytes} or +all, if \code{nbytes} is not provided +\item \code{$ReadAt(position, nbytes)}: similar to \code{$seek(position)$Read(nbytes)} +\item \code{$Resize(size)}: for a \code{MemoryMappedFile} that is writeable +} +} + +\keyword{datasets} diff --git a/r/man/JsonTableReader.Rd b/r/man/JsonTableReader.Rd new file mode 100644 index 00000000000..80867df82c7 --- /dev/null +++ b/r/man/JsonTableReader.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/json.R +\docType{class} +\name{JsonTableReader} +\alias{JsonTableReader} +\title{class JsonTableReader} +\description{ +class JsonTableReader +} +\section{Methods}{ + +\itemize{ +\item \code{Read()} : read the JSON file as an \link[=Table]{arrow::Table} +} +} + +\keyword{datasets} diff --git a/r/man/arrow___MemoryPool.Rd b/r/man/MemoryPool.Rd similarity index 59% rename from r/man/arrow___MemoryPool.Rd rename to r/man/MemoryPool.Rd index 9189e8be4a3..e69fc8b4db9 100644 --- a/r/man/arrow___MemoryPool.Rd +++ b/r/man/MemoryPool.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/memory_pool.R +% Please edit documentation in R/memory-pool.R \docType{class} -\name{arrow__MemoryPool} -\alias{arrow__MemoryPool} -\alias{arrow::MemoryPool} +\name{MemoryPool} +\alias{MemoryPool} \title{class arrow::MemoryPool} \description{ class arrow::MemoryPool diff --git a/r/man/arrow__ipc__Message.Rd b/r/man/Message.Rd similarity index 54% rename from r/man/arrow__ipc__Message.Rd rename to r/man/Message.Rd index d3811f8f4c1..f699d513b72 100644 --- a/r/man/arrow__ipc__Message.Rd +++ b/r/man/Message.Rd @@ -1,12 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/message.R \docType{class} -\name{arrow__ipc__Message} -\alias{arrow__ipc__Message} -\alias{arrow::ipc::Message} -\title{class arrow::ipc::Message} +\name{Message} +\alias{Message} +\title{class arrow::Message} \description{ -class arrow::ipc::Message +class arrow::Message } \section{Methods}{ diff --git a/r/man/MessageReader.Rd b/r/man/MessageReader.Rd index 01589f5d078..cabfa66eb86 100644 --- a/r/man/MessageReader.Rd +++ b/r/man/MessageReader.Rd @@ -1,14 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/message.R +\docType{class} \name{MessageReader} \alias{MessageReader} -\title{Open a MessageReader that reads from a stream} -\usage{ -MessageReader(stream) -} -\arguments{ -\item{stream}{an InputStream} -} +\title{class arrow::MessageReader} \description{ -Open a MessageReader that reads from a stream +class arrow::MessageReader +} +\section{Methods}{ + + +TODO } + +\keyword{datasets} diff --git a/r/man/MockOutputStream.Rd b/r/man/MockOutputStream.Rd deleted file mode 100644 index 2e3c0b6d3e3..00000000000 --- a/r/man/MockOutputStream.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{MockOutputStream} -\alias{MockOutputStream} -\title{Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream}} -\usage{ -MockOutputStream() -} -\value{ -a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} -} -\description{ -Open a \link[=arrow__io__MockOutputStream]{arrow::io::MockOutputStream} -} diff --git a/r/man/OutputStream.Rd b/r/man/OutputStream.Rd new file mode 100644 index 00000000000..95661d172d3 --- /dev/null +++ b/r/man/OutputStream.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\docType{class} +\name{OutputStream} +\alias{OutputStream} +\alias{FileOutputStream} +\alias{MockOutputStream} +\alias{BufferOutputStream} +\alias{FixedSizeBufferWriter} +\title{OutputStream classes} +\description{ +\code{FileOutputStream} is for writing to a file; +\code{BufferOutputStream} and \code{FixedSizeBufferWriter} write to buffers; +\code{MockOutputStream} just reports back how many bytes it received, for testing +purposes. You can create one and pass it to any of the table writers, for +example. +} +\section{Factory}{ + + +The \code{$create()} factory methods instantiate the \code{OutputStream} object and +take the following arguments, depending on the subclass: +\itemize{ +\item \code{path} For \code{FileOutputStream}, a character file name +\item \code{initial_capacity} For \code{BufferOutputStream}, the size in bytes of the +buffer. +\item \code{x} For \code{FixedSizeBufferWriter}, a \link{Buffer} or an object that can be +made into a buffer via \code{buffer()}. +} + +\code{MockOutputStream$create()} does not take any arguments. +} + +\section{Methods}{ + +\itemize{ +\item \code{$tell()}: return the position in the stream +\item \code{$close()}: close the stream +\item \code{$write(x)}: send \code{x} to the stream +\item \code{$capacity()}: for \code{BufferOutputStream} +\item \code{$getvalue()}: for \code{BufferOutputStream} +\item \code{$GetExtentBytesWritten()}: for \code{MockOutputStream}, report how many bytes +were sent. +} +} + +\keyword{datasets} diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd new file mode 100644 index 00000000000..1ebc20cddc1 --- /dev/null +++ b/r/man/ParquetFileReader.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parquet.R +\docType{class} +\name{ParquetFileReader} +\alias{ParquetFileReader} +\title{ParquetFileReader class} +\description{ +This class enables you to interact with Parquet files. +} +\section{Factory}{ + + +The \code{ParquetFileReader$create()} factory method instantiates the object and +takes the following arguments: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \code{RandomAccessFile}). +\item \code{props} Optional \link{ParquetReaderProperties} +\item \code{mmap} Logical: whether to memory-map the file (default \code{TRUE}) +\item \code{...} Additional arguments, currently ignored +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ReadTable(col_select)}: get an \code{arrow::Table} from the file, possibly +with columns filtered by a character vector of column names or a +\code{tidyselect} specification. +\item \code{$GetSchema()}: get the \code{arrow::Schema} of the data in the file +} +} + +\examples{ +\donttest{ +f <- system.file("v0.7.1.parquet", package="arrow") +pq <- ParquetFileReader$create(f) +pq$GetSchema() +tab <- pq$ReadTable(starts_with("c")) +tab$schema +} +} +\keyword{datasets} diff --git a/r/man/ParquetReaderProperties.Rd b/r/man/ParquetReaderProperties.Rd new file mode 100644 index 00000000000..90de601a6db --- /dev/null +++ b/r/man/ParquetReaderProperties.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parquet.R +\docType{class} +\name{ParquetReaderProperties} +\alias{ParquetReaderProperties} +\title{ParquetReaderProperties class} +\description{ +This class holds settings to control how a Parquet file is read +by \link{ParquetFileReader}. +} +\section{Factory}{ + + +The \code{ParquetReaderProperties$create()} factory method instantiates the object +and takes the following arguments: +\itemize{ +\item \code{use_threads} Logical: whether to use multithreading (default \code{TRUE}) +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$read_dictionary(column_index)} +\item \code{$set_read_dictionary(column_index, read_dict)} +\item \code{$use_threads(use_threads)} +} +} + +\keyword{datasets} diff --git a/r/man/ReadableFile.Rd b/r/man/ReadableFile.Rd deleted file mode 100644 index 11535321bfb..00000000000 --- a/r/man/ReadableFile.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\name{ReadableFile} -\alias{ReadableFile} -\title{open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile}} -\usage{ -ReadableFile(path) -} -\arguments{ -\item{path}{file path} -} -\value{ -a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} -} -\description{ -open a \link[=arrow__io__ReadableFile]{arrow::io::ReadableFile} -} diff --git a/r/man/arrow__RecordBatch.Rd b/r/man/RecordBatch.Rd similarity index 59% rename from r/man/arrow__RecordBatch.Rd rename to r/man/RecordBatch.Rd index 40ba6323ee0..1e99e4d42bf 100644 --- a/r/man/arrow__RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -1,9 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatch.R +% Please edit documentation in R/record-batch.R \docType{class} -\name{arrow__RecordBatch} -\alias{arrow__RecordBatch} -\alias{arrow::RecordBatch} +\name{RecordBatch} +\alias{RecordBatch} \title{class arrow::RecordBatch} \description{ class arrow::RecordBatch diff --git a/r/man/RecordBatchFileReader.Rd b/r/man/RecordBatchFileReader.Rd deleted file mode 100644 index 3ea04817e0e..00000000000 --- a/r/man/RecordBatchFileReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{RecordBatchFileReader} -\alias{RecordBatchFileReader} -\title{Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file} -\usage{ -RecordBatchFileReader(file) -} -\arguments{ -\item{file}{The file to read from. A file path, or an \link[=arrow__ipc__RecordBatchFileReader]{arrow::io::RandomAccessFile}} -} -\description{ -Create an \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} from a file -} diff --git a/r/man/RecordBatchFileWriter.Rd b/r/man/RecordBatchFileWriter.Rd deleted file mode 100644 index d89578f97be..00000000000 --- a/r/man/RecordBatchFileWriter.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{RecordBatchFileWriter} -\alias{RecordBatchFileWriter} -\title{Create a record batch file writer from a stream} -\usage{ -RecordBatchFileWriter(sink, schema) -} -\arguments{ -\item{sink}{Where to write. Can either be: -\itemize{ -\item a string file path -\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} -}} - -\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} -} -\value{ -an \code{arrow::ipc::RecordBatchWriter} object -} -\description{ -Create a record batch file writer from a stream -} diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd new file mode 100644 index 00000000000..5ed6ba4b4b9 --- /dev/null +++ b/r/man/RecordBatchReader.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/record-batch-reader.R +\docType{class} +\name{RecordBatchReader} +\alias{RecordBatchReader} +\alias{RecordBatchStreamReader} +\alias{RecordBatchFileReader} +\title{RecordBatchReader classes} +\description{ +\code{RecordBatchFileReader} and \code{RecordBatchStreamReader} are +interfaces for generating record batches from different input sources. +} +\section{Factory}{ + + +The \code{RecordBatchFileReader$create()} and \code{RecordBatchStreamReader$create()} +factory methods instantiate the object and +take a single argument, named according to the class: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \code{RandomAccessFile}). +\item \code{stream} A raw vector, \link{Buffer}, or \code{InputStream}. +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$read_next_batch()}: Returns a \code{RecordBatch} +\item \code{$schema()}: Returns a \link{Schema} +\item \code{$batches()}: Returns a list of \code{RecordBatch}es +\item \code{$get_batch(i)}: For \code{RecordBatchFileReader}, return a particular batch +by an integer index. +\item \code{$num_record_batches()}: For \code{RecordBatchFileReader}, see how many batches +are in the file. +} +} + +\keyword{datasets} diff --git a/r/man/RecordBatchStreamReader.Rd b/r/man/RecordBatchStreamReader.Rd deleted file mode 100644 index 4bd0e8ccdc5..00000000000 --- a/r/man/RecordBatchStreamReader.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\name{RecordBatchStreamReader} -\alias{RecordBatchStreamReader} -\title{Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream} -\usage{ -RecordBatchStreamReader(stream) -} -\arguments{ -\item{stream}{input stream, an \link[=arrow__io__InputStream]{arrow::io::InputStream} or a raw vector} -} -\description{ -Create a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} from an input stream -} diff --git a/r/man/RecordBatchStreamWriter.Rd b/r/man/RecordBatchStreamWriter.Rd deleted file mode 100644 index 9d9bbc9ceb0..00000000000 --- a/r/man/RecordBatchStreamWriter.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\name{RecordBatchStreamWriter} -\alias{RecordBatchStreamWriter} -\title{Writer for the Arrow streaming binary format} -\usage{ -RecordBatchStreamWriter(sink, schema) -} -\arguments{ -\item{sink}{Where to write. Can either be: -\itemize{ -\item A string file path -\item \link[=arrow__io__OutputStream]{arrow::io::OutputStream} -}} - -\item{schema}{The \link[=arrow__Schema]{arrow::Schema} for data to be written.} -} -\value{ -a \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} -} -\description{ -Writer for the Arrow streaming binary format -} diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd new file mode 100644 index 00000000000..46e0b87ba45 --- /dev/null +++ b/r/man/RecordBatchWriter.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/record-batch-writer.R +\docType{class} +\name{RecordBatchWriter} +\alias{RecordBatchWriter} +\alias{RecordBatchStreamWriter} +\alias{RecordBatchFileWriter} +\title{RecordBatchWriter classes} +\description{ +\code{RecordBatchFileWriter} and \code{RecordBatchStreamWriter} are +interfaces for writing record batches to either the binary file or streaming +format. +} +\section{Usage}{ +\preformatted{writer <- RecordBatchStreamWriter$create(sink, schema) + +writer$write_batch(batch) +writer$write_table(table) +writer$close() +} +} + +\section{Factory}{ + + +The \code{RecordBatchFileWriter$create()} and \code{RecordBatchStreamWriter$create()} +factory methods instantiate the object and +take a single argument, named according to the class: +\itemize{ +\item \code{sink} A character file name or an \code{OutputStream}. +\item \code{schema} A \link{Schema} for the data to be written. +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$write(x)}: Write a \link{RecordBatch}, \link{Table}, or \code{data.frame}, dispatching +to the methods below appropriately +\item \code{$write_batch(batch)}: Write a \code{RecordBatch} to stream +\item \code{$write_table(table)}: Write a \code{Table} to stream +\item \code{$close()}: close stream +} +} + +\keyword{datasets} diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd new file mode 100644 index 00000000000..2f960dbce6c --- /dev/null +++ b/r/man/Schema.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/schema.R +\docType{class} +\name{Schema} +\alias{Schema} +\alias{schema} +\title{Schema class} +\usage{ +schema(...) +} +\arguments{ +\item{...}{named list of \link[=data-type]{data types}} +} +\description{ +Create a \code{Schema} when you +want to convert an R \code{data.frame} to Arrow but don't want to rely on the +default mapping of R types to Arrow types, such as when you want to choose a +specific numeric precision. +} +\section{Usage}{ +\preformatted{s <- schema(...) + +s$ToString() +s$num_fields() +s$field(i) +} +} + +\section{Methods}{ + +\itemize{ +\item \code{$ToString()}: convert to a string +\item \code{$num_fields()}: returns the number of fields +\item \code{$field(i)}: returns the field at index \code{i} (0-based) +} +} + +\keyword{datasets} diff --git a/r/man/Table.Rd b/r/man/Table.Rd new file mode 100644 index 00000000000..21dc8371eef --- /dev/null +++ b/r/man/Table.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/table.R +\docType{class} +\name{Table} +\alias{Table} +\title{class arrow::Table} +\description{ +class arrow::Table +} +\section{Factory}{ + + +The \code{Table$create()} function takes the following arguments: +\itemize{ +\item \code{...} arrays, chunked arrays, or R vectors +\item \code{schema} a schema. The default (\code{NULL}) infers the schema from the \code{...} +} +} + +\section{Methods}{ + + +TODO +} + +\keyword{datasets} diff --git a/r/man/array.Rd b/r/man/array.Rd index 2b784caf9a1..73234746f93 100644 --- a/r/man/array.Rd +++ b/r/man/array.Rd @@ -1,16 +1,52 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/array.R +\docType{class} \name{array} \alias{array} -\title{create an \link[=arrow__Array]{arrow::Array} from an R vector} -\usage{ -array(x, type = NULL) +\alias{Array} +\title{Array class} +\description{ +Array base type. Immutable data array with some logical type +and some length. } -\arguments{ -\item{x}{R object} +\section{Factory}{ -\item{type}{Explicit \link[=arrow__DataType]{type}, or NULL (the default) to infer from the data} +The \code{Array$create()} factory method instantiates an \code{Array} and +takes the following arguments: +\itemize{ +\item \code{x}: an R vector, list, or \code{data.frame} +\item \code{type}: an optional \link[=data-type]{data type} for \code{x}. If omitted, the type +will be inferred from the data. +} +} + +\section{Usage}{ +\preformatted{a <- Array$create(x) +length(a) + +print(a) +a == a } -\description{ -create an \link[=arrow__Array]{arrow::Array} from an R vector } + +\section{Methods}{ + +\itemize{ +\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck +\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck +\item \code{$length()}: Size in the number of elements this array contains +\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing +\item \code{$null_count()}: The number of null entries in the array +\item \code{$type()}: logical type of data +\item \code{$type_id()}: type id +\item \code{$Equals(other)} : is this array equal to \code{other} +\item \code{$ApproxEquals(other)} : +\item \code{$data()}: return the underlying \link{ArrayData} +\item \code{$as_vector()}: convert to an R vector +\item \code{$ToString()}: string representation of the array +\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. +\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} : +} +} + +\keyword{datasets} diff --git a/r/man/arrow__Array.Rd b/r/man/arrow__Array.Rd deleted file mode 100644 index dabed1f6fa2..00000000000 --- a/r/man/arrow__Array.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/array.R -\docType{class} -\name{arrow__Array} -\alias{arrow__Array} -\alias{arrow::Array} -\title{class arrow::Array - -Array base type. Immutable data array with some logical type and some length.} -\description{ -class arrow::Array - -Array base type. Immutable data array with some logical type and some length. -} -\section{Usage}{ -\preformatted{a <- array(x) - -a$IsNull(i) -a$IsValid(i) -a$length() or length(a) -a$offset() -a$null_count() -a$type() -a$type_id() -a$Equals(b) -a$ApproxEquals(b) -a$as_vector() -a$ToString() -a$Slice(offset, length = NULL) -a$RangeEquals(other, start_idx, end_idx, other_start_idx) - -print(a) -a == a -} -} - -\section{Methods}{ - -\itemize{ -\item \code{$IsNull(i)}: Return true if value at index is null. Does not boundscheck -\item \code{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck -\item \code{$length()}: Size in the number of elements this array contains -\item \code{$offset()}: A relative position into another array's data, to enable zero-copy slicing -\item \code{$null_count()}: The number of null entries in the array -\item \code{$type()}: logical type of data -\item \code{$type_id()}: type id -\item \code{$Equals(other)} : is this array equal to \code{other} -\item \code{$ApproxEquals(other)} : -\item \code{$data()}: return the underlying \link[=arrow__ArrayData]{arrow::ArrayData} -\item \code{$as_vector()}: convert to an R vector -\item \code{$ToString()}: string representation of the array -\item \code{$Slice(offset, length = NULL)} : Construct a zero-copy slice of the array with the indicated offset and length. If length is \code{NULL}, the slice goes until the end of the array. -\item \code{$RangeEquals(other, start_idx, end_idx, other_start_idx)} : -} -} - -\keyword{datasets} diff --git a/r/man/arrow__ArrayData.Rd b/r/man/arrow__ArrayData.Rd deleted file mode 100644 index af48dd334a5..00000000000 --- a/r/man/arrow__ArrayData.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ArrayData.R -\docType{class} -\name{arrow__ArrayData} -\alias{arrow__ArrayData} -\alias{arrow::ArrayData} -\title{class arrow::ArrayData} -\description{ -class arrow::ArrayData -} -\section{Usage}{ -\preformatted{data <- array(x)$data() - -data$type() -data$length() -data$null_count() -data$offset() -data$buffers() -} -} - -\section{Methods}{ - - -... -} - -\keyword{datasets} diff --git a/r/man/arrow__Buffer.Rd b/r/man/arrow__Buffer.Rd deleted file mode 100644 index 135da7a20e7..00000000000 --- a/r/man/arrow__Buffer.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buffer.R -\docType{class} -\name{arrow__Buffer} -\alias{arrow__Buffer} -\alias{arrow::Buffer} -\title{class arrow::Buffer} -\description{ -class arrow::Buffer -} -\section{Methods}{ - -\itemize{ -\item \code{$is_mutable()} : -\item \code{$ZeroPadding()} : -\item \code{$size()} : -\item \code{$capacity()}: -} -} - -\keyword{datasets} diff --git a/r/man/arrow__ChunkedArray.Rd b/r/man/arrow__ChunkedArray.Rd deleted file mode 100644 index a87bf1c0dcc..00000000000 --- a/r/man/arrow__ChunkedArray.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ChunkedArray.R -\docType{class} -\name{arrow__ChunkedArray} -\alias{arrow__ChunkedArray} -\alias{arrow::ChunkedArray} -\title{class arrow::ChunkedArray} -\description{ -class arrow::ChunkedArray -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__Column.Rd b/r/man/arrow__Column.Rd deleted file mode 100644 index 6a0ee6a40a5..00000000000 --- a/r/man/arrow__Column.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Column.R -\docType{class} -\name{arrow__Column} -\alias{arrow__Column} -\alias{arrow::Column} -\title{class arrow::Column} -\description{ -class arrow::Column -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__Field.Rd b/r/man/arrow__Field.Rd deleted file mode 100644 index 893a65aa08e..00000000000 --- a/r/man/arrow__Field.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Field.R -\docType{class} -\name{arrow__Field} -\alias{arrow__Field} -\alias{arrow::Field} -\title{class arrow::Field} -\description{ -class arrow::Field -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__RecordBatchReader.Rd b/r/man/arrow__RecordBatchReader.Rd deleted file mode 100644 index b3ccd3f1749..00000000000 --- a/r/man/arrow__RecordBatchReader.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\docType{class} -\name{arrow__RecordBatchReader} -\alias{arrow__RecordBatchReader} -\alias{arrow::RecordBatchReader} -\title{class arrow::RecordBatchReader} -\description{ -class arrow::RecordBatchReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__Schema.Rd b/r/man/arrow__Schema.Rd deleted file mode 100644 index b657ff2c4a8..00000000000 --- a/r/man/arrow__Schema.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Schema.R -\docType{class} -\name{arrow__Schema} -\alias{arrow__Schema} -\alias{arrow::Schema} -\title{class arrow::Schema} -\description{ -class arrow::Schema -} -\section{Usage}{ -\preformatted{s <- schema(...) - -s$ToString() -s$num_fields() -s$field(i) -} -} - -\section{Methods}{ - -\itemize{ -\item \code{$ToString()}: convert to a string -\item \code{$num_fields()}: returns the number of fields -\item \code{$field(i)}: returns the field at index \code{i} (0-based) -} -} - -\keyword{datasets} diff --git a/r/man/arrow__Table.Rd b/r/man/arrow__Table.Rd deleted file mode 100644 index 139db980acf..00000000000 --- a/r/man/arrow__Table.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R -\docType{class} -\name{arrow__Table} -\alias{arrow__Table} -\alias{arrow::Table} -\title{class arrow::Table} -\description{ -class arrow::Table -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__BufferOutputStream.Rd b/r/man/arrow__io__BufferOutputStream.Rd deleted file mode 100644 index e90d1cc0ed8..00000000000 --- a/r/man/arrow__io__BufferOutputStream.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__BufferOutputStream} -\alias{arrow__io__BufferOutputStream} -\alias{arrow::io::BufferOutputStream} -\title{class arrow::io::BufferOutputStream} -\format{An object of class \code{R6ClassGenerator} of length 24.} -\description{ -class arrow::io::BufferOutputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__BufferReader.Rd b/r/man/arrow__io__BufferReader.Rd deleted file mode 100644 index 609fec5b6d4..00000000000 --- a/r/man/arrow__io__BufferReader.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__BufferReader} -\alias{arrow__io__BufferReader} -\alias{arrow::io::BufferReader} -\title{class arrow::io::BufferReader} -\description{ -class arrow::io::BufferReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__FileOutputStream.Rd b/r/man/arrow__io__FileOutputStream.Rd deleted file mode 100644 index 92eaac13c9f..00000000000 --- a/r/man/arrow__io__FileOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__FileOutputStream} -\alias{arrow__io__FileOutputStream} -\alias{arrow::io::FileOutputStream} -\title{class arrow::io::FileOutputStream} -\description{ -class arrow::io::FileOutputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__FixedSizeBufferWriter.Rd b/r/man/arrow__io__FixedSizeBufferWriter.Rd deleted file mode 100644 index 39d8bb69c25..00000000000 --- a/r/man/arrow__io__FixedSizeBufferWriter.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__FixedSizeBufferWriter} -\alias{arrow__io__FixedSizeBufferWriter} -\alias{arrow::io::FixedSizeBufferWriter} -\title{class arrow::io::FixedSizeBufferWriter} -\description{ -class arrow::io::FixedSizeBufferWriter -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__InputStream.Rd b/r/man/arrow__io__InputStream.Rd deleted file mode 100644 index 37f83308b64..00000000000 --- a/r/man/arrow__io__InputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__InputStream} -\alias{arrow__io__InputStream} -\alias{arrow::io::InputStream} -\title{class arrow::io::InputStream} -\description{ -class arrow::io::InputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__MemoryMappedFile.Rd b/r/man/arrow__io__MemoryMappedFile.Rd deleted file mode 100644 index 409bb17302a..00000000000 --- a/r/man/arrow__io__MemoryMappedFile.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__MemoryMappedFile} -\alias{arrow__io__MemoryMappedFile} -\alias{arrow::io::MemoryMappedFile} -\title{class arrow::io::MemoryMappedFile} -\description{ -class arrow::io::MemoryMappedFile -} -\section{Methods}{ - - -TODO -} - -\seealso{ -\code{\link[=mmap_open]{mmap_open()}}, \code{\link[=mmap_create]{mmap_create()}} -} -\keyword{datasets} diff --git a/r/man/arrow__io__MockOutputStream.Rd b/r/man/arrow__io__MockOutputStream.Rd deleted file mode 100644 index f0b2c06d7a5..00000000000 --- a/r/man/arrow__io__MockOutputStream.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__MockOutputStream} -\alias{arrow__io__MockOutputStream} -\alias{arrow::io::MockOutputStream} -\title{class arrow::io::MockOutputStream} -\description{ -class arrow::io::MockOutputStream -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__OutputStream.Rd b/r/man/arrow__io__OutputStream.Rd deleted file mode 100644 index c41b815c021..00000000000 --- a/r/man/arrow__io__OutputStream.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__OutputStream} -\alias{arrow__io__OutputStream} -\alias{arrow::io::OutputStream} -\title{OutputStream} -\description{ -OutputStream -} -\section{Methods}{ - -\itemize{ -\item \code{arrow::Buffer} \code{Read}(\code{int} nbytes): Read \code{nbytes} bytes -\item \code{void} \code{close}(): close the stream -} -} - -\keyword{datasets} diff --git a/r/man/arrow__io__RandomAccessFile.Rd b/r/man/arrow__io__RandomAccessFile.Rd deleted file mode 100644 index f8cb86abda6..00000000000 --- a/r/man/arrow__io__RandomAccessFile.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__RandomAccessFile} -\alias{arrow__io__RandomAccessFile} -\alias{arrow::io::RandomAccessFile} -\title{class arrow::io::RandomAccessFile} -\description{ -class arrow::io::RandomAccessFile -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__Readable.Rd b/r/man/arrow__io__Readable.Rd deleted file mode 100644 index b0b30a42302..00000000000 --- a/r/man/arrow__io__Readable.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__Readable} -\alias{arrow__io__Readable} -\alias{arrow::io::Readable} -\title{class arrow::io::Readable} -\description{ -class arrow::io::Readable -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__io__ReadableFile.Rd b/r/man/arrow__io__ReadableFile.Rd deleted file mode 100644 index 440149fbbb4..00000000000 --- a/r/man/arrow__io__ReadableFile.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io.R -\docType{class} -\name{arrow__io__ReadableFile} -\alias{arrow__io__ReadableFile} -\alias{arrow::io::ReadableFile} -\title{class arrow::io::ReadableFile} -\description{ -class arrow::io::ReadableFile -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__ipc__MessageReader.Rd b/r/man/arrow__ipc__MessageReader.Rd deleted file mode 100644 index 883e9e0618b..00000000000 --- a/r/man/arrow__ipc__MessageReader.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/message.R -\docType{class} -\name{arrow__ipc__MessageReader} -\alias{arrow__ipc__MessageReader} -\alias{arrow::ipc::MessageReader} -\title{class arrow::ipc::MessageReader} -\description{ -class arrow::ipc::MessageReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchFileReader.Rd b/r/man/arrow__ipc__RecordBatchFileReader.Rd deleted file mode 100644 index 675f636b365..00000000000 --- a/r/man/arrow__ipc__RecordBatchFileReader.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\docType{class} -\name{arrow__ipc__RecordBatchFileReader} -\alias{arrow__ipc__RecordBatchFileReader} -\alias{arrow::ipc::RecordBatchFileReader} -\title{class arrow::ipc::RecordBatchFileReader} -\description{ -class arrow::ipc::RecordBatchFileReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchFileWriter.Rd b/r/man/arrow__ipc__RecordBatchFileWriter.Rd deleted file mode 100644 index a80b55941fb..00000000000 --- a/r/man/arrow__ipc__RecordBatchFileWriter.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\docType{class} -\name{arrow__ipc__RecordBatchFileWriter} -\alias{arrow__ipc__RecordBatchFileWriter} -\alias{arrow::ipc::RecordBatchFileWriter} -\title{class arrow::ipc::RecordBatchFileWriter - -Writer for the Arrow binary file format} -\description{ -class arrow::ipc::RecordBatchFileWriter - -Writer for the Arrow binary file format -} -\section{usage}{ -\preformatted{writer <- RecordBatchFileWriter(sink, schema) - -writer$write_batch(batch) -writer$write_table(table) -writer$close() -} -} - -\section{Factory}{ - - -The \code{\link[=RecordBatchFileWriter]{RecordBatchFileWriter()}} function creates a record batch stream writer. -} - -\section{Methods}{ - -inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} -\itemize{ -\item \code{$write_batch(batch)}: Write record batch to stream -\item \code{$write_table(table)}: write Table to stream -\item \code{$close()}: close stream -} -} - -\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchStreamReader.Rd b/r/man/arrow__ipc__RecordBatchStreamReader.Rd deleted file mode 100644 index 49f57cce057..00000000000 --- a/r/man/arrow__ipc__RecordBatchStreamReader.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchReader.R -\docType{class} -\name{arrow__ipc__RecordBatchStreamReader} -\alias{arrow__ipc__RecordBatchStreamReader} -\alias{arrow::ipc::RecordBatchStreamReader} -\title{class arrow::ipc::RecordBatchStreamReader} -\description{ -class arrow::ipc::RecordBatchStreamReader -} -\section{Methods}{ - - -TODO -} - -\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd b/r/man/arrow__ipc__RecordBatchStreamWriter.Rd deleted file mode 100644 index 3d2030287d1..00000000000 --- a/r/man/arrow__ipc__RecordBatchStreamWriter.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\docType{class} -\name{arrow__ipc__RecordBatchStreamWriter} -\alias{arrow__ipc__RecordBatchStreamWriter} -\alias{arrow::ipc::RecordBatchStreamWriter} -\title{class arrow::ipc::RecordBatchStreamWriter - -Writer for the Arrow streaming binary format} -\description{ -class arrow::ipc::RecordBatchStreamWriter - -Writer for the Arrow streaming binary format -} -\section{usage}{ -\preformatted{writer <- RecordBatchStreamWriter(sink, schema) - -writer$write_batch(batch) -writer$write_table(table) -writer$close() -} -} - -\section{Factory}{ - - -The \code{\link[=RecordBatchStreamWriter]{RecordBatchStreamWriter()}} function creates a record batch stream writer. -} - -\section{Methods}{ - -inherited from \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter} -\itemize{ -\item \code{$write_batch(batch)}: Write record batch to stream -\item \code{$write_table(table)}: write Table to stream -\item \code{$close()}: close stream -} -} - -\keyword{datasets} diff --git a/r/man/arrow__ipc__RecordBatchWriter.Rd b/r/man/arrow__ipc__RecordBatchWriter.Rd deleted file mode 100644 index 08593df8524..00000000000 --- a/r/man/arrow__ipc__RecordBatchWriter.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatchWriter.R -\docType{class} -\name{arrow__ipc__RecordBatchWriter} -\alias{arrow__ipc__RecordBatchWriter} -\alias{arrow::ipc::RecordBatchWriter} -\title{class arrow::ipc::RecordBatchWriter} -\description{ -class arrow::ipc::RecordBatchWriter -} -\section{Methods}{ - -\itemize{ -\item \code{$write_batch(batch)}: Write record batch to stream -\item \code{$write_table(table)}: write Table to stream -\item \code{$close()}: close stream -} -} - -\section{Derived classes}{ - -\itemize{ -\item \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} implements the streaming binary format -\item \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} implements the binary file format -} -} - -\keyword{datasets} diff --git a/r/man/arrow__json__TableReader.Rd b/r/man/arrow__json__TableReader.Rd deleted file mode 100644 index 69b588f3f23..00000000000 --- a/r/man/arrow__json__TableReader.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/json.R -\docType{class} -\name{arrow__json__TableReader} -\alias{arrow__json__TableReader} -\alias{arrow::json::TableReader} -\title{class arrow::json::TableReader} -\description{ -class arrow::json::TableReader -} -\section{Methods}{ - -\itemize{ -\item \code{Read()} : read the JSON file as an \link[=arrow__Table]{arrow::Table} -} -} - -\keyword{datasets} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 60fd25d4bf1..5481ca55964 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -1,8 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/buffer.R +\docType{class} \name{buffer} \alias{buffer} -\title{Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object} +\alias{Buffer} +\title{Buffer class} \usage{ buffer(x) } @@ -10,8 +12,25 @@ buffer(x) \item{x}{R object. Only raw, numeric and integer vectors are currently supported} } \value{ -an instance of \link[=arrow__Buffer]{arrow::Buffer} that borrows memory from \code{x} +an instance of \code{Buffer} that borrows memory from \code{x} } \description{ -Create a \link[=arrow__Buffer]{arrow::Buffer} from an R object +A Buffer is an object containing a pointer to a piece of +contiguous memory with a particular size. } +\section{Factory}{ + +\code{buffer()} lets you create an \code{arrow::Buffer} from an R object +} + +\section{Methods}{ + +\itemize{ +\item \code{$is_mutable()} : +\item \code{$ZeroPadding()} : +\item \code{$size()} : +\item \code{$capacity()}: +} +} + +\keyword{datasets} diff --git a/r/man/chunked_array.Rd b/r/man/chunked_array.Rd deleted file mode 100644 index 07dac8a841d..00000000000 --- a/r/man/chunked_array.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ChunkedArray.R -\name{chunked_array} -\alias{chunked_array} -\title{create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors} -\usage{ -chunked_array(..., type = NULL) -} -\arguments{ -\item{\dots}{Vectors to coerce} - -\item{type}{currently ignored} -} -\description{ -create an \link[=arrow__ChunkedArray]{arrow::ChunkedArray} from various R vectors -} diff --git a/r/man/compression.Rd b/r/man/compression.Rd new file mode 100644 index 00000000000..e9d0ca5d493 --- /dev/null +++ b/r/man/compression.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compression.R +\docType{class} +\name{compression} +\alias{compression} +\alias{CompressedOutputStream} +\alias{CompressedInputStream} +\title{Compressed stream classes} +\description{ +\code{CompressedInputStream} and \code{CompressedOutputStream} +allow you to apply a \code{\link[=compression_codec]{compression_codec()}} to an +input or output stream. +} +\section{Factory}{ + + +The \code{CompressedInputStream$create()} and \code{CompressedOutputStream$create()} +factory methods instantiate the object and take the following arguments: +\itemize{ +\item \code{stream} An \link{InputStream} or \link{OutputStream}, respectively +\item \code{codec} A \code{Codec} +} +} + +\section{Methods}{ + + +Methods are inherited from \link{InputStream} and \link{OutputStream}, respectively +} + +\keyword{datasets} diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd index 1377a63a670..a825b7b86c8 100644 --- a/r/man/csv_table_reader.Rd +++ b/r/man/csv_table_reader.Rd @@ -24,7 +24,7 @@ json_table_reader(file, read_options = json_read_options(), \item{...}{additional parameters.} } \value{ -An \code{arrow::csv::TableReader} or \code{arrow::json::TableReader} R6 +An CsvTableReader or JsonTableReader R6 object. Call \code{$Read()} on it to get an Arrow Table. } \description{ diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index a0355cdb1fa..9280738194f 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/type.R, R/List.R, R/Struct.R +% Please edit documentation in R/type.R, R/list.R, R/struct.R \name{data-type} \alias{data-type} \alias{int8} @@ -98,7 +98,7 @@ take any of those four values.} \item{...}{For \code{struct()}, a named list of types to define the struct columns} } \value{ -An Arrow type object inheriting from \code{arrow::DataType}. +An Arrow type object inheriting from DataType. } \description{ These functions create type objects corresponding to Arrow types. Use them diff --git a/r/man/default_memory_pool.Rd b/r/man/default_memory_pool.Rd index 1725ff0e10a..859b40631af 100644 --- a/r/man/default_memory_pool.Rd +++ b/r/man/default_memory_pool.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/memory_pool.R +% Please edit documentation in R/memory-pool.R \name{default_memory_pool} \alias{default_memory_pool} -\title{default \link[=arrow__MemoryPool]{arrow::MemoryPool}} +\title{default \link[=MemoryPool]{arrow::MemoryPool}} \usage{ default_memory_pool() } \value{ -the default \link[=arrow__MemoryPool]{arrow::MemoryPool} +the default \link[=MemoryPool]{arrow::MemoryPool} } \description{ -default \link[=arrow__MemoryPool]{arrow::MemoryPool} +default \link[=MemoryPool]{arrow::MemoryPool} } diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 334d67e937d..183513e8c47 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -14,7 +14,7 @@ dictionary(index_type, value_type, ordered = FALSE) \item{ordered}{Is this an ordered dictionary ?} } \value{ -An \link[=arrow__DictionaryType]{arrow::DictionaryType} +A \link{DictionaryType} } \description{ Create a dictionary type diff --git a/r/man/enums.Rd b/r/man/enums.Rd index c55170e1ec0..f7f76c3c035 100644 --- a/r/man/enums.Rd +++ b/r/man/enums.Rd @@ -11,7 +11,7 @@ \alias{MessageType} \alias{CompressionType} \title{Arrow enums} -\format{An object of class \code{arrow::TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} +\format{An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.} \usage{ TimeUnit diff --git a/r/man/field.Rd b/r/man/field.Rd deleted file mode 100644 index 8cf260a08f1..00000000000 --- a/r/man/field.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Field.R -\name{field} -\alias{field} -\title{Factory for a \code{arrow::Field}} -\usage{ -field(name, type, metadata) -} -\arguments{ -\item{name}{field name} - -\item{type}{logical type, instance of \code{arrow::DataType}} - -\item{metadata}{currently ignored} -} -\description{ -Factory for a \code{arrow::Field} -} -\examples{ -\donttest{ -try({ - field("x", int32()) -}) -} -} diff --git a/r/man/make_readable_file.Rd b/r/man/make_readable_file.Rd new file mode 100644 index 00000000000..4163cdddd19 --- /dev/null +++ b/r/man/make_readable_file.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io.R +\name{make_readable_file} +\alias{make_readable_file} +\title{Handle a range of possible input sources} +\usage{ +make_readable_file(file, mmap = TRUE) +} +\arguments{ +\item{file}{A character file name, raw vector, or an Arrow input stream} + +\item{mmap}{Logical: whether to memory-map the file (default \code{TRUE})} +} +\value{ +An \code{InputStream} or a subclass of one. +} +\description{ +Handle a range of possible input sources +} +\keyword{internal} diff --git a/r/man/mmap_create.Rd b/r/man/mmap_create.Rd index 050ae18c76f..b8551934808 100644 --- a/r/man/mmap_create.Rd +++ b/r/man/mmap_create.Rd @@ -12,7 +12,7 @@ mmap_create(path, size) \item{size}{size in bytes} } \value{ -a \link[=arrow__io__MemoryMappedFile]{arrow::io::MemoryMappedFile} +a \link[=MemoryMappedFile]{arrow::io::MemoryMappedFile} } \description{ Create a new read/write memory mapped file of a given size diff --git a/r/man/parquet_arrow_reader_properties.Rd b/r/man/parquet_arrow_reader_properties.Rd deleted file mode 100644 index eed75669b1c..00000000000 --- a/r/man/parquet_arrow_reader_properties.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parquet.R -\name{parquet_arrow_reader_properties} -\alias{parquet_arrow_reader_properties} -\title{Create a new ArrowReaderProperties instance} -\usage{ -parquet_arrow_reader_properties(use_threads = option_use_threads()) -} -\arguments{ -\item{use_threads}{use threads?} -} -\description{ -Create a new ArrowReaderProperties instance -} -\keyword{internal} diff --git a/r/man/parquet_file_reader.Rd b/r/man/parquet_file_reader.Rd deleted file mode 100644 index 6c42855d1d4..00000000000 --- a/r/man/parquet_file_reader.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parquet.R -\name{parquet_file_reader} -\alias{parquet_file_reader} -\title{Parquet file reader} -\usage{ -parquet_file_reader(file, props = parquet_arrow_reader_properties(), ...) -} -\arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} - -\item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}} - -\item{...}{additional parameters} -} -\description{ -Parquet file reader -} diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index 4af5e9a027e..d0cd4fb2e2e 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -25,7 +25,7 @@ read_tsv_arrow(file, quote = "\\"", escape_double = TRUE, read_options = NULL, as_tibble = TRUE) } \arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} +\item{file}{A character file name, raw vector, or an Arrow input stream} \item{delim}{Single character used to separate fields within a record.} @@ -71,10 +71,10 @@ parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, et \item{read_options}{see \code{\link[=csv_read_options]{csv_read_options()}}} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} } \value{ -A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}. +A \code{data.frame}, or an Table if \code{as_tibble = FALSE}. } \description{ These functions uses the Arrow C++ CSV reader to read into a \code{data.frame}. diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 07a54246eac..7e720059af7 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -7,7 +7,8 @@ read_feather(file, col_select = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{an \code{arrow::ipc::feather::TableReader} or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} +\item{file}{A character file path, a raw vector, or \code{InputStream}, passed to +\code{FeatherTableReader$create()}.} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a @@ -15,12 +16,13 @@ read_feather(file, col_select = NULL, as_tibble = TRUE, ...) of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} \item{...}{additional parameters} } \value{ -A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or a \link[=arrow__Table]{arrow::Table} otherwise +A \code{data.frame} if \code{as_tibble} is \code{TRUE} (the default), or an +\link[=Table]{arrow::Table} otherwise } \description{ Read a Feather file diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index a3879aceccd..7fcad2c4509 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -7,7 +7,7 @@ read_json_arrow(file, col_select = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} +\item{file}{A character file name, raw vector, or an Arrow input stream} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a @@ -15,15 +15,15 @@ read_json_arrow(file, col_select = NULL, as_tibble = TRUE, ...) of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} \item{...}{Additional options, passed to \code{json_table_reader()}} } \value{ -A \code{data.frame}, or an \code{arrow::Table} if \code{as_tibble = FALSE}. +A \code{data.frame}, or an Table if \code{as_tibble = FALSE}. } \description{ -Use \link[=arrow__json__TableReader]{arrow::json::TableReader} from \code{\link[=json_table_reader]{json_table_reader()}} +Using \link{JsonTableReader} } \examples{ \donttest{ diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index e63e30cc297..5a12e7199f2 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -5,10 +5,10 @@ \title{Read a Parquet file} \usage{ read_parquet(file, col_select = NULL, as_tibble = TRUE, - props = parquet_arrow_reader_properties(), ...) + props = ParquetReaderProperties$create(), ...) } \arguments{ -\item{file}{A character path to a local file, or an Arrow input stream} +\item{file}{A character file name, raw vector, or an Arrow input stream} \item{col_select}{A character vector of column names to keep, as in the "select" argument to \code{data.table::fread()}, or a @@ -16,14 +16,14 @@ read_parquet(file, col_select = NULL, as_tibble = TRUE, of columns, as used in \code{dplyr::select()}.} \item{as_tibble}{Should the function return a \code{data.frame} or an -\link[=arrow__Table]{arrow::Table}?} +\link[=Table]{arrow::Table}?} -\item{props}{reader file properties, as created by \code{\link[=parquet_arrow_reader_properties]{parquet_arrow_reader_properties()}}} +\item{props}{\link{ParquetReaderProperties}} -\item{...}{additional parameters} +\item{...}{Additional arguments passed to \code{ParquetFileReader$create()}} } \value{ -A \link[=arrow__Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} is +A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_tibble} is \code{TRUE}. } \description{ @@ -32,8 +32,7 @@ This function enables you to read Parquet files into R. } \examples{ \donttest{ -try({ - df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) -}) +df <- read_parquet(system.file("v0.7.1.parquet", package="arrow")) +head(df) } } diff --git a/r/man/read_record_batch.Rd b/r/man/read_record_batch.Rd index fef12cbac4a..f335bae2012 100644 --- a/r/man/read_record_batch.Rd +++ b/r/man/read_record_batch.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_record_batch.R +% Please edit documentation in R/read-record-batch.R \name{read_record_batch} \alias{read_record_batch} -\title{read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema}} +\title{read \link[=RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=schema]{arrow::Schema}} \usage{ read_record_batch(obj, schema) } \arguments{ -\item{obj}{a \link[=arrow__ipc__Message]{arrow::ipc::Message}, a \link[=arrow__io__InputStream]{arrow::io::InputStream}, a \link[=arrow__Buffer]{arrow::Buffer}, or a raw vector} +\item{obj}{a \link[=Message]{arrow::Message}, a \link[=InputStream]{arrow::io::InputStream}, a \link[=buffer]{Buffer}, or a raw vector} -\item{schema}{a \link[=arrow__Schema]{arrow::Schema}} +\item{schema}{a \link[=schema]{arrow::Schema}} } \value{ -a \link[=arrow__RecordBatch]{arrow::RecordBatch} +a \link[=RecordBatch]{arrow::RecordBatch} } \description{ -read \link[=arrow__RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=arrow__Schema]{arrow::Schema} +read \link[=RecordBatch]{arrow::RecordBatch} as encapsulated IPC message, given a known \link[=schema]{arrow::Schema} } diff --git a/r/man/read_schema.Rd b/r/man/read_schema.Rd index 408fd1baaa5..1573be2bd5b 100644 --- a/r/man/read_schema.Rd +++ b/r/man/read_schema.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Schema.R +% Please edit documentation in R/schema.R \name{read_schema} \alias{read_schema} \title{read a Schema from a stream} diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index e556b8b0773..9475fbe4abb 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_table.R +% Please edit documentation in R/read-table.R \name{read_table} \alias{read_table} \alias{read_arrow} -\title{Read an \link[=arrow__Table]{arrow::Table} from a stream} +\title{Read an \link[=Table]{arrow::Table} from a stream} \usage{ read_table(stream) @@ -12,29 +12,29 @@ read_arrow(stream) \arguments{ \item{stream}{stream. \itemize{ -\item a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader}: -read an \link[=arrow__Table]{arrow::Table} +\item a \link[=RecordBatchFileReader]{arrow::RecordBatchFileReader}: +read an \link[=Table]{arrow::Table} from all the record batches in the reader -\item a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader}: -read an \link[=arrow__Table]{arrow::Table} from the remaining record batches +\item a \link[=RecordBatchStreamReader]{arrow::RecordBatchStreamReader}: +read an \link[=Table]{arrow::Table} from the remaining record batches in the reader \item a string file path: interpret the file as an arrow -binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} +binary file format, and uses a \link[=RecordBatchFileReader]{arrow::RecordBatchFileReader} to process it. -\item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} +\item a raw vector: read using a \link[=RecordBatchStreamReader]{arrow::RecordBatchStreamReader} }} } \value{ \itemize{ -\item \code{read_table} returns an \link[=arrow__Table]{arrow::Table} +\item \code{read_table} returns an \link[=Table]{arrow::Table} \item \code{read_arrow} returns a \code{data.frame} } } \description{ -Read an \link[=arrow__Table]{arrow::Table} from a stream +Read an \link[=Table]{arrow::Table} from a stream } \details{ -The methods using \link[=arrow__ipc__RecordBatchFileReader]{arrow::ipc::RecordBatchFileReader} and -\link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} offer the most +The methods using \link[=RecordBatchFileReader]{arrow::RecordBatchFileReader} and +\link[=RecordBatchStreamReader]{arrow::RecordBatchStreamReader} offer the most flexibility. The other methods are for convenience. } diff --git a/r/man/record_batch.Rd b/r/man/record_batch.Rd index a9680bf3735..bf53abd7092 100644 --- a/r/man/record_batch.Rd +++ b/r/man/record_batch.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RecordBatch.R +% Please edit documentation in R/record-batch.R \name{record_batch} \alias{record_batch} -\title{Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame} +\title{Create an \link[=RecordBatch]{arrow::RecordBatch} from a data frame} \usage{ record_batch(..., schema = NULL) } \arguments{ -\item{...}{A variable number of arrow::Array} +\item{...}{A variable number of Array} \item{schema}{a arrow::Schema} } \value{ -a \link[=arrow__RecordBatch]{arrow::RecordBatch} +a \link[=RecordBatch]{arrow::RecordBatch} } \description{ -Create an \link[=arrow__RecordBatch]{arrow::RecordBatch} from a data frame +Create an \link[=RecordBatch]{arrow::RecordBatch} from a data frame } diff --git a/r/man/schema.Rd b/r/man/schema.Rd deleted file mode 100644 index 622e5a7e94c..00000000000 --- a/r/man/schema.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Schema.R -\name{schema} -\alias{schema} -\title{Create a schema} -\usage{ -schema(...) -} -\arguments{ -\item{...}{named list of \link[=data-type]{data types}} -} -\value{ -A \link[=arrow__Schema]{schema} object. -} -\description{ -This function lets you define a schema for a table. This is useful when you -want to convert an R \code{data.frame} to Arrow but don't want to rely on the -default mapping of R types to Arrow types, such as when you want to choose a -specific numeric precision. -} diff --git a/r/man/table.Rd b/r/man/table.Rd deleted file mode 100644 index fbf9632a03a..00000000000 --- a/r/man/table.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Table.R -\name{table} -\alias{table} -\title{Create an arrow::Table from a data frame} -\usage{ -table(..., schema = NULL) -} -\arguments{ -\item{...}{arrays, chunked arrays, or R vectors} - -\item{schema}{a schema. The default (\code{NULL}) infers the schema from the \code{...}} -} -\value{ -an arrow::Table -} -\description{ -Create an arrow::Table from a data frame -} diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd index 9ba65cb18f3..1820e0e1536 100644 --- a/r/man/write_arrow.Rd +++ b/r/man/write_arrow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/write_arrow.R +% Please edit documentation in R/write-arrow.R \name{write_arrow} \alias{write_arrow} \title{Write Arrow formatted data} @@ -7,25 +7,25 @@ write_arrow(x, stream, ...) } \arguments{ -\item{x}{an \link[=arrow__Table]{arrow::Table}, an \link[=arrow__RecordBatch]{arrow::RecordBatch} or a data.frame} +\item{x}{an \link[=Table]{arrow::Table}, an \link[=RecordBatch]{arrow::RecordBatch} or a data.frame} \item{stream}{where to serialize to \itemize{ -\item A \link[=arrow__ipc__RecordBatchWriter]{arrow::ipc::RecordBatchWriter}: the \code{$write()} +\item A \link[=RecordBatchWriter]{arrow::RecordBatchWriter}: the \code{$write()} of \code{x} is used. The stream is left open. This uses the streaming format or the binary file format depending on the type of the writer. \item A string file path: \code{x} is serialized with -a \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter}, i.e. +a \link[=RecordBatchFileWriter]{arrow::RecordBatchFileWriter}, i.e. using the binary file format. \item A raw vector: typically of length zero (its data is ignored, and only used for dispatch). \code{x} is serialized using the streaming format, i.e. using the -\link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} +\link[=RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} }} \item{...}{extra parameters, currently ignored -\code{write_arrow} is a convenience function, the classes \link[=arrow__ipc__RecordBatchFileWriter]{arrow::ipc::RecordBatchFileWriter} -and \link[=arrow__ipc__RecordBatchStreamWriter]{arrow::ipc::RecordBatchStreamWriter} can be used for more flexibility.} +\code{write_arrow} is a convenience function, the classes \link[=RecordBatchFileWriter]{arrow::RecordBatchFileWriter} +and \link[=RecordBatchStreamWriter]{arrow::RecordBatchStreamWriter} can be used for more flexibility.} } \description{ Write Arrow formatted data diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index 9eb20021caf..24636a09cb0 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -7,9 +7,9 @@ write_feather(data, stream) } \arguments{ -\item{data}{\code{data.frame} or \code{arrow::RecordBatch}} +\item{data}{\code{data.frame} or RecordBatch} -\item{stream}{A file path or an \code{arrow::io::OutputStream}} +\item{stream}{A file path or an OutputStream} } \description{ Write data in the Feather format diff --git a/r/man/write_feather_RecordBatch.Rd b/r/man/write_feather_RecordBatch.Rd deleted file mode 100644 index b234f7fda4e..00000000000 --- a/r/man/write_feather_RecordBatch.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/feather.R -\name{write_feather_RecordBatch} -\alias{write_feather_RecordBatch} -\title{Write a record batch in the feather format} -\usage{ -write_feather_RecordBatch(data, stream) -} -\arguments{ -\item{data}{\code{data.frame} or \code{arrow::RecordBatch}} - -\item{stream}{A file path or an \code{arrow::io::OutputStream}} -} -\description{ -Write a record batch in the feather format -} -\keyword{internal} diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index 192d950d82a..b0fb7bc6761 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -7,7 +7,7 @@ write_parquet(table, file) } \arguments{ -\item{table}{An \link[=arrow__Table]{arrow::Table}, or an object convertible to it} +\item{table}{An \link[=Table]{arrow::Table}, or an object convertible to it} \item{file}{a file path} } @@ -17,10 +17,8 @@ This function enables you to write Parquet files from R. } \examples{ \donttest{ -try({ - tf <- tempfile(fileext = ".parquet") - on.exit(unlink(tf)) - write_parquet(tibble::tibble(x = 1:5), tf) -}) +tf <- tempfile(fileext = ".parquet") +on.exit(unlink(tf)) +write_parquet(tibble::tibble(x = 1:5), tf) } } diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 6d9c8dcfc19..08686983909 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -793,7 +793,7 @@ std::shared_ptr GetFactorType(SEXP factor) { std::shared_ptr InferType(SEXP x) { switch (TYPEOF(x)) { case ENVSXP: - if (Rf_inherits(x, "arrow::Array")) { + if (Rf_inherits(x, "Array")) { Rcpp::ConstReferenceSmartPtrInputParameter> array( x); return static_cast>(array)->type(); @@ -976,7 +976,7 @@ arrow::Status CheckCompatibleStruct(SEXP obj, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_infered) { // short circuit if `x` is already an Array - if (Rf_inherits(x, "arrow::Array")) { + if (Rf_inherits(x, "Array")) { return Rcpp::ConstReferenceSmartPtrInputParameter>(x); } diff --git a/r/src/array__to_vector.cpp b/r/src/array_to_vector.cpp similarity index 100% rename from r/src/array__to_vector.cpp rename to r/src/array_to_vector.cpp diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 5af51c59677..0ebac0cc5a1 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -394,7 +394,54 @@ RcppExport SEXP _arrow_ListArray__raw_value_offsets(SEXP array_sexp){ } #endif -// array__to_vector.cpp +// array_from_vector.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Array__infer_type(SEXP x); +RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type x(x_sexp); + return Rcpp::wrap(Array__infer_type(x)); +END_RCPP +} +#else +RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ + Rf_error("Cannot call Array__infer_type(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// array_from_vector.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Array__from_vector(SEXP x, SEXP s_type); +RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type x(x_sexp); + Rcpp::traits::input_parameter::type s_type(s_type_sexp); + return Rcpp::wrap(Array__from_vector(x, s_type)); +END_RCPP +} +#else +RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ + Rf_error("Cannot call Array__from_vector(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// array_from_vector.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr ChunkedArray__from_list(Rcpp::List chunks, SEXP s_type); +RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type chunks(chunks_sexp); + Rcpp::traits::input_parameter::type s_type(s_type_sexp); + return Rcpp::wrap(ChunkedArray__from_list(chunks, s_type)); +END_RCPP +} +#else +RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ + Rf_error("Cannot call ChunkedArray__from_list(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) SEXP Array__as_vector(const std::shared_ptr& array); RcppExport SEXP _arrow_Array__as_vector(SEXP array_sexp){ @@ -409,7 +456,7 @@ RcppExport SEXP _arrow_Array__as_vector(SEXP array_sexp){ } #endif -// array__to_vector.cpp +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){ @@ -424,7 +471,7 @@ RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_array_sexp){ } #endif -// array__to_vector.cpp +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) Rcpp::List RecordBatch__to_dataframe(const std::shared_ptr& batch, bool use_threads); RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batch_sexp, SEXP use_threads_sexp){ @@ -440,7 +487,7 @@ RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batch_sexp, SEXP use_threa } #endif -// array__to_vector.cpp +// array_to_vector.cpp #if defined(ARROW_R_WITH_ARROW) Rcpp::List Table__to_dataframe(const std::shared_ptr& table, bool use_threads); RcppExport SEXP _arrow_Table__to_dataframe(SEXP table_sexp, SEXP use_threads_sexp){ @@ -456,53 +503,6 @@ RcppExport SEXP _arrow_Table__to_dataframe(SEXP table_sexp, SEXP use_threads_sex } #endif -// array_from_vector.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Array__infer_type(SEXP x); -RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ -BEGIN_RCPP - Rcpp::traits::input_parameter::type x(x_sexp); - return Rcpp::wrap(Array__infer_type(x)); -END_RCPP -} -#else -RcppExport SEXP _arrow_Array__infer_type(SEXP x_sexp){ - Rf_error("Cannot call Array__infer_type(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - -// array_from_vector.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Array__from_vector(SEXP x, SEXP s_type); -RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ -BEGIN_RCPP - Rcpp::traits::input_parameter::type x(x_sexp); - Rcpp::traits::input_parameter::type s_type(s_type_sexp); - return Rcpp::wrap(Array__from_vector(x, s_type)); -END_RCPP -} -#else -RcppExport SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ - Rf_error("Cannot call Array__from_vector(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - -// array_from_vector.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr ChunkedArray__from_list(Rcpp::List chunks, SEXP s_type); -RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ -BEGIN_RCPP - Rcpp::traits::input_parameter::type chunks(chunks_sexp); - Rcpp::traits::input_parameter::type s_type(s_type_sexp); - return Rcpp::wrap(ChunkedArray__from_list(chunks, s_type)); -END_RCPP -} -#else -RcppExport SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ - Rf_error("Cannot call ChunkedArray__from_list(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - // arraydata.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr ArrayData__get_type(const std::shared_ptr& x); @@ -3725,13 +3725,13 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, + { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, + { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, + { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, - { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, - { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, - { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index f625c89c9e7..19ba471787f 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -116,12 +116,12 @@ std::shared_ptr Time64__initialize(arrow::TimeUnit::type unit) // [[arrow::export]] SEXP list__(SEXP x) { - if (Rf_inherits(x, "arrow::Field")) { + if (Rf_inherits(x, "Field")) { Rcpp::ConstReferenceSmartPtrInputParameter> field(x); return wrap(arrow::list(field)); } - if (Rf_inherits(x, "arrow::DataType")) { + if (Rf_inherits(x, "DataType")) { Rcpp::ConstReferenceSmartPtrInputParameter> type(x); return wrap(arrow::list(type)); } diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 11d3d6ab163..706851a4d11 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -234,7 +234,7 @@ std::shared_ptr RecordBatch__from_arrays__known_schema( // [[arrow::export]] std::shared_ptr RecordBatch__from_arrays(SEXP schema_sxp, SEXP lst) { - if (Rf_inherits(schema_sxp, "arrow::Schema")) { + if (Rf_inherits(schema_sxp, "Schema")) { return RecordBatch__from_arrays__known_schema( arrow::r::extract(schema_sxp), lst); } diff --git a/r/src/table.cpp b/r/src/table.cpp index b179b0c626d..cc1e81df4eb 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -100,7 +100,7 @@ std::shared_ptr Table__select(const std::shared_ptr& bool all_record_batches(SEXP lst) { R_xlen_t n = XLENGTH(lst); for (R_xlen_t i = 0; i < n; i++) { - if (!Rf_inherits(VECTOR_ELT(lst, i), "arrow::RecordBatch")) return false; + if (!Rf_inherits(VECTOR_ELT(lst, i), "RecordBatch")) return false; } return true; } @@ -114,7 +114,7 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { auto batches = arrow::r::List_to_shared_ptr_vector(lst); std::shared_ptr tab; - if (Rf_inherits(schema_sxp, "arrow::Schema")) { + if (Rf_inherits(schema_sxp, "Schema")) { auto schema = arrow::r::extract(schema_sxp); STOP_IF_NOT_OK(arrow::Table::FromRecordBatches(schema, batches, &tab)); } else { @@ -135,11 +135,11 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { SEXP names = Rf_getAttrib(lst, R_NamesSymbol); auto fill_one_column = [&columns, &fields](int j, SEXP x, SEXP name) { - if (Rf_inherits(x, "arrow::ChunkedArray")) { + if (Rf_inherits(x, "ChunkedArray")) { auto chunked_array = arrow::r::extract(x); fields[j] = arrow::field(CHAR(name), chunked_array->type()); columns[j] = chunked_array; - } else if (Rf_inherits(x, "arrow::Array")) { + } else if (Rf_inherits(x, "Array")) { auto array = arrow::r::extract(x); fields[j] = arrow::field(CHAR(name), array->type()); columns[j] = std::make_shared(array); @@ -166,15 +166,15 @@ std::shared_ptr Table__from_dots(SEXP lst, SEXP schema_sxp) { } schema = std::make_shared(std::move(fields)); - } else if (Rf_inherits(schema_sxp, "arrow::Schema")) { + } else if (Rf_inherits(schema_sxp, "Schema")) { // use the schema that is given schema = arrow::r::extract(schema_sxp); auto fill_one_column = [&columns, &schema](int j, SEXP x) { - if (Rf_inherits(x, "arrow::ChunkedArray")) { + if (Rf_inherits(x, "ChunkedArray")) { auto chunked_array = arrow::r::extract(x); columns[j] = chunked_array; - } else if (Rf_inherits(x, "arrow::Array")) { + } else if (Rf_inherits(x, "Array")) { auto array = arrow::r::extract(x); columns[j] = std::make_shared(array); } else { diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 4a903fac923..a50dbbbe9ca 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -15,17 +15,17 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Array") +context("Array") test_that("Array", { - x <- array(c(1:10, 1:10, 1:5)) + x <- Array$create(c(1:10, 1:10, 1:5)) expect_equal(x$type, int32()) - expect_equal(x$length(), 25L) + expect_equal(length(x), 25L) expect_equal(x$as_vector(), c(1:10, 1:10, 1:5)) y <- x$Slice(10) expect_equal(y$type, int32()) - expect_equal(y$length(), 15L) + expect_equal(length(y), 15L) expect_equal(y$as_vector(), c(1:10, 1:5)) expect_true(x$RangeEquals(y, 10, 24, 0)) @@ -35,7 +35,7 @@ test_that("Array", { expect_equal(z$as_vector(), c(1:5)) expect_true(x$RangeEquals(z, 10, 15, 0)) - x_dbl <- array(c(1,2,3,4,5,6)) + x_dbl <- Array$create(c(1,2,3,4,5,6)) expect_equal(x_dbl$type, float64()) expect_equal(x_dbl$length(), 6L) expect_equal(x_dbl$as_vector(), as.numeric(1:6)) @@ -53,8 +53,8 @@ test_that("Array", { }) test_that("Array supports NA", { - x_int <- array(as.integer(c(1:10, NA))) - x_dbl <- array(as.numeric(c(1:10, NA))) + x_int <- Array$create(as.integer(c(1:10, NA))) + x_dbl <- Array$create(as.numeric(c(1:10, NA))) expect_true(x_int$IsValid(0L)) expect_true(x_dbl$IsValid(0L)) expect_true(x_int$IsNull(10L)) @@ -68,19 +68,19 @@ test_that("Array supports NA", { test_that("Array supports logical vectors (ARROW-3341)", { # with NA x <- sample(c(TRUE, FALSE, NA), 1000, replace = TRUE) - arr_lgl <- array(x) + arr_lgl <- Array$create(x) expect_identical(x, arr_lgl$as_vector()) # without NA x <- sample(c(TRUE, FALSE), 1000, replace = TRUE) - arr_lgl <- array(x) + arr_lgl <- Array$create(x) expect_identical(x, arr_lgl$as_vector()) }) test_that("Array supports character vectors (ARROW-3339)", { # with NA x <- c("itsy", NA, "spider") - arr_chr <- array(x) + arr_chr <- Array$create(x) expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) expect_true(arr_chr$IsValid(0)) @@ -92,51 +92,51 @@ test_that("Array supports character vectors (ARROW-3339)", { # without NA x <- c("itsy", "bitsy", "spider") - arr_chr <- array(x) + arr_chr <- Array$create(x) expect_equal(arr_chr$length(), 3L) expect_identical(arr_chr$as_vector(), x) }) test_that("empty arrays are supported", { x <- character() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- integer() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- numeric() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- factor(character()) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- logical() - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) }) test_that("array with all nulls are supported", { nas <- c(NA, NA) x <- as.logical(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.integer(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.numeric(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.character(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) x <- as.factor(nas) - expect_equal(array(x)$as_vector(), x) + expect_equal(Array$create(x)$as_vector(), x) }) test_that("Array supports unordered factors (ARROW-3355)", { # without NA f <- factor(c("itsy", "bitsy", "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 4L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -152,7 +152,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { # with NA f <- factor(c("itsy", "bitsy", NA, "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 5L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -171,7 +171,7 @@ test_that("Array supports unordered factors (ARROW-3355)", { test_that("Array supports ordered factors (ARROW-3355)", { # without NA f <- ordered(c("itsy", "bitsy", "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 4L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -187,7 +187,7 @@ test_that("Array supports ordered factors (ARROW-3355)", { # with NA f <- ordered(c("itsy", "bitsy", NA, "spider", "spider")) - arr_fac <- array(f) + arr_fac <- Array$create(f) expect_equal(arr_fac$length(), 5L) expect_equal(arr_fac$type$index_type, int8()) expect_identical(arr_fac$as_vector(), f) @@ -205,20 +205,20 @@ test_that("Array supports ordered factors (ARROW-3355)", { test_that("array supports Date (ARROW-3340)", { d <- Sys.Date() + 1:10 - a <- array(d) + a <- Array$create(d) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) d[5] <- NA - a <- array(d) + a <- Array$create(d) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) expect_true(a$IsNull(4)) d2 <- d + .5 - a <- array(d2) + a <- Array$create(d2) expect_equal(a$type, date32()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), d) @@ -227,14 +227,14 @@ test_that("array supports Date (ARROW-3340)", { test_that("array supports POSIXct (ARROW-3340)", { times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10 - a <- array(times) + a <- Array$create(times) expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) expect_equal(as.numeric(a$as_vector()), as.numeric(times)) times[5] <- NA - a <- array(times) + a <- Array$create(times) expect_equal(a$type$name, "timestamp") expect_equal(a$type$unit(), unclass(TimeUnit$MICRO)) expect_equal(a$length(), 10L) @@ -244,13 +244,13 @@ test_that("array supports POSIXct (ARROW-3340)", { test_that("array supports integer64", { x <- bit64::as.integer64(1:10) - a <- array(x) + a <- Array$create(x) expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) x[4] <- NA - a <- array(x) + a <- Array$create(x) expect_equal(a$type, int64()) expect_equal(a$length(), 10L) expect_equal(a$as_vector(), x) @@ -259,18 +259,18 @@ test_that("array supports integer64", { test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", { x <- bit64::as.integer64(NA) - a <- array(x) + a <- Array$create(x) expect_true(is.na(a$as_vector())) }) test_that("array supports difftime", { time <- hms::hms(56, 34, 12) - a <- array(c(time, time)) + a <- Array$create(c(time, time)) expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_equal(a$as_vector(), c(time, time)) - a <- array(vctrs::vec_c(time, NA)) + a <- Array$create(vctrs::vec_c(time, NA)) expect_equal(a$type, time32(unit = TimeUnit$SECOND)) expect_equal(a$length(), 2L) expect_true(a$IsNull(1)) @@ -280,13 +280,13 @@ test_that("array supports difftime", { test_that("support for NaN (ARROW-3615)", { x <- c(1, NA, NaN, -1) - y <- array(x) + y <- Array$create(x) expect_true(y$IsValid(2)) expect_equal(y$null_count, 1L) }) test_that("integer types casts (ARROW-3741)", { - a <- array(c(1:10, NA)) + a <- Array$create(c(1:10, NA)) a_int8 <- a$cast(int8()) a_int16 <- a$cast(int16()) a_int32 <- a$cast(int32()) @@ -317,7 +317,7 @@ test_that("integer types casts (ARROW-3741)", { }) test_that("integer types cast safety (ARROW-3741, ARROW-5541)", { - a <- array(-(1:10)) + a <- Array$create(-(1:10)) expect_error(a$cast(uint8()), regexp = "Integer value out of bounds") expect_error(a$cast(uint16()), regexp = "Integer value out of bounds") expect_error(a$cast(uint32()), regexp = "Integer value out of bounds") @@ -331,7 +331,7 @@ test_that("integer types cast safety (ARROW-3741, ARROW-5541)", { test_that("float types casts (ARROW-3741)", { x <- c(1, 2, 3, NA) - a <- array(x) + a <- Array$create(x) a_f32 <- a$cast(float32()) a_f64 <- a$cast(float64()) @@ -347,12 +347,12 @@ test_that("float types casts (ARROW-3741)", { test_that("cast to half float works", { skip("until https://issues.apache.org/jira/browse/ARROW-3802") - a <- array(1:4) + a <- Array$create(1:4) a_f16 <- a$cast(float16()) expect_equal(a_16$type, float16()) }) -test_that("array() supports the type= argument. conversion from INTSXP and int64 to all int types", { +test_that("Array$create() supports the type= argument. conversion from INTSXP and int64 to all int types", { num_int32 <- 12L num_int64 <- bit64::as.integer64(10) @@ -362,38 +362,38 @@ test_that("array() supports the type= argument. conversion from INTSXP and int64 float32(), float64() ) for(type in types) { - expect_equal(array(num_int32, type = type)$type, type) - expect_equal(array(num_int64, type = type)$type, type) + expect_equal(Array$create(num_int32, type = type)$type, type) + expect_equal(Array$create(num_int64, type = type)$type, type) } }) -test_that("array() aborts on overflow", { - expect_error(array(128L, type = int8())$type, "Invalid.*downsize") - expect_error(array(-129L, type = int8())$type, "Invalid.*downsize") +test_that("Array$create() aborts on overflow", { + expect_error(Array$create(128L, type = int8())$type, "Invalid.*downsize") + expect_error(Array$create(-129L, type = int8())$type, "Invalid.*downsize") - expect_error(array(256L, type = uint8())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint8())$type, "Invalid.*downsize") + expect_error(Array$create(256L, type = uint8())$type, "Invalid.*downsize") + expect_error(Array$create(-1L, type = uint8())$type, "Invalid.*downsize") - expect_error(array(32768L, type = int16())$type, "Invalid.*downsize") - expect_error(array(-32769L, type = int16())$type, "Invalid.*downsize") + expect_error(Array$create(32768L, type = int16())$type, "Invalid.*downsize") + expect_error(Array$create(-32769L, type = int16())$type, "Invalid.*downsize") - expect_error(array(65536L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(65536L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(-1L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(65536L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(-1L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(65536L, type = uint16())$type, "Invalid.*downsize") + expect_error(Array$create(-1L, type = uint16())$type, "Invalid.*downsize") - expect_error(array(bit64::as.integer64(2^31), type = int32()), "Invalid.*downsize") - expect_error(array(bit64::as.integer64(2^32), type = uint32()), "Invalid.*downsize") + expect_error(Array$create(bit64::as.integer64(2^31), type = int32()), "Invalid.*downsize") + expect_error(Array$create(bit64::as.integer64(2^32), type = uint32()), "Invalid.*downsize") }) -test_that("array() can convert doubles to integer", { +test_that("Array$create() does not convert doubles to integer", { types <- list( int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64() ) for(type in types) { - a <- array(10, type = type) + a <- Array$create(10, type = type) expect_equal(a$type, type) # exception for now because we cannot handle @@ -404,45 +404,44 @@ test_that("array() can convert doubles to integer", { } }) -test_that("array() converts raw vectors to uint8 arrays (ARROW-3794)", { - expect_equal(array(as.raw(1:10))$type, uint8()) +test_that("Array$create() converts raw vectors to uint8 arrays (ARROW-3794)", { + expect_equal(Array$create(as.raw(1:10))$type, uint8()) }) test_that("Array$as_vector() converts to integer (ARROW-3794)", { - a <- array((-128):127)$cast(int8()) + a <- Array$create((-128):127)$cast(int8()) expect_equal(a$type, int8()) expect_equal(a$as_vector(), (-128):127) - a <- array(0:255)$cast(uint8()) + a <- Array$create(0:255)$cast(uint8()) expect_equal(a$type, uint8()) expect_equal(a$as_vector(), 0:255) }) -test_that("array() recognise arrow::Array (ARROW-3815)", { - a <- array(1:10) - expect_equal(a, array(a)) +test_that("Array$create() recognise arrow::Array (ARROW-3815)", { + a <- Array$create(1:10) + expect_equal(a, Array$create(a)) }) -test_that("array() handles data frame -> struct arrays (ARROW-3811)", { +test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", { df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10]) - a <- array(df) + a <- Array$create(df) expect_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) expect_equivalent(a$as_vector(), df) }) -test_that("array() can handle data frame with custom struct type (not infered)", { +test_that("Array$create() can handle data frame with custom struct type (not infered)", { df <- tibble::tibble(x = 1:10, y = 1:10) type <- struct(x = float64(), y = int16()) - a <- array(df, type = type) + a <- Array$create(df, type = type) expect_equal(a$type, type) type <- struct(x = float64(), y = int16(), z = int32()) - expect_error(array(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame") + expect_error(Array$create(df, type = type), regexp = "Number of fields in struct.* incompatible with number of columns in the data frame") type <- struct(y = int16(), x = float64()) - expect_error(array(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") + expect_error(Array$create(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") type <- struct(x = float64(), y = utf8()) - expect_error(array(df, type = type), regexp = "Cannot convert R object to string array") + expect_error(Array$create(df, type = type), regexp = "Cannot convert R object to string array") }) - diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index d5a141c87ff..e9eff464818 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::RecordBatch") +context("RecordBatch") test_that("RecordBatch", { tbl <- tibble::tibble( @@ -32,7 +32,7 @@ test_that("RecordBatch", { schema( int = int32(), dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), array(letters[1:10])) + fct = dictionary(int32(), Array$create(letters[1:10])) ) ) expect_equal(batch$num_columns, 5L) @@ -45,34 +45,34 @@ test_that("RecordBatch", { expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct")) col_int <- batch$column(0) - expect_true(inherits(col_int, 'arrow::Array')) + expect_true(inherits(col_int, 'Array')) expect_equal(col_int$as_vector(), tbl$int) expect_equal(col_int$type, int32()) col_dbl <- batch$column(1) - expect_true(inherits(col_dbl, 'arrow::Array')) + expect_true(inherits(col_dbl, 'Array')) expect_equal(col_dbl$as_vector(), tbl$dbl) expect_equal(col_dbl$type, float64()) col_lgl <- batch$column(2) - expect_true(inherits(col_dbl, 'arrow::Array')) + expect_true(inherits(col_dbl, 'Array')) expect_equal(col_lgl$as_vector(), tbl$lgl) expect_equal(col_lgl$type, boolean()) col_chr <- batch$column(3) - expect_true(inherits(col_chr, 'arrow::Array')) + expect_true(inherits(col_chr, 'Array')) expect_equal(col_chr$as_vector(), tbl$chr) expect_equal(col_chr$type, utf8()) col_fct <- batch$column(4) - expect_true(inherits(col_fct, 'arrow::Array')) + expect_true(inherits(col_fct, 'Array')) expect_equal(col_fct$as_vector(), tbl$fct) - expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) + expect_equal(col_fct$type, dictionary(int32(), Array$create(letters[1:10]))) batch2 <- batch$RemoveColumn(0) expect_equal( batch2$schema, - schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), array(letters[1:10]))) + schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), Array$create(letters[1:10]))) ) expect_equal(batch2$column(0), batch$column(1)) expect_identical(as.data.frame(batch2), tbl[,-1]) @@ -103,7 +103,7 @@ test_that("RecordBatch with 0 rows are supported", { dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), array(c("a", "b"))) + fct = dictionary(int32(), Array$create(c("a", "b"))) ) ) }) @@ -146,8 +146,8 @@ test_that("RecordBatch dim() and nrow() (ARROW-3816)", { expect_equal(nrow(batch), 10L) }) -test_that("record_batch() handles arrow::Array", { - batch <- record_batch(x = 1:10, y = arrow::array(1:10)) +test_that("record_batch() handles Array", { + batch <- record_batch(x = 1:10, y = Array$create(1:10)) expect_equal(batch$schema, schema(x = int32(), y = int32())) }) @@ -220,4 +220,3 @@ test_that("record_batch() only auto splice data frames", { regexp = "only data frames are allowed as unnamed arguments to be auto spliced" ) }) - diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index fb04bdefdfd..674a4d09c75 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Table") +context("Table") test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { tbl <- tibble::tibble( @@ -23,7 +23,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), chr = letters[1:10] ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) tf <- tempfile() write_arrow(tab, tf) @@ -33,22 +33,22 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { tab1 <- read_table(tf) tab2 <- read_table(fs::path_abs(tf)) - readable_file <- ReadableFile(tf) - file_reader1 <- RecordBatchFileReader(readable_file) + readable_file <- ReadableFile$create(tf) + file_reader1 <- RecordBatchFileReader$create(readable_file) tab3 <- read_table(file_reader1) readable_file$close() mmap_file <- mmap_open(tf) - file_reader2 <- RecordBatchFileReader(mmap_file) + file_reader2 <- RecordBatchFileReader$create(mmap_file) tab4 <- read_table(file_reader2) mmap_file$close() tab5 <- read_table(bytes) - stream_reader <- RecordBatchStreamReader(bytes) + stream_reader <- RecordBatchStreamReader$create(bytes) tab6 <- read_table(stream_reader) - file_reader <- RecordBatchFileReader(tf) + file_reader <- RecordBatchFileReader$create(tf) tab7 <- read_table(file_reader) expect_equal(tab, tab1) @@ -64,7 +64,7 @@ test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", { }) test_that("Table cast (ARROW-3741)", { - tab <- table(x = 1:10, y = 1:10) + tab <- Table$create(x = 1:10, y = 1:10) expect_error(tab$cast(schema(x = int32()))) expect_error(tab$cast(schema(x = int32(), z = int32()))) @@ -77,14 +77,14 @@ test_that("Table cast (ARROW-3741)", { }) test_that("Table dim() and nrow() (ARROW-3816)", { - tab <- table(x = 1:10, y = 1:10) + tab <- Table$create(x = 1:10, y = 1:10) expect_equal(dim(tab), c(10L, 2L)) expect_equal(nrow(tab), 10L) }) test_that("table() handles record batches with splicing", { batch <- record_batch(x = 1:2, y = letters[1:2]) - tab <- table(batch, batch, batch) + tab <- Table$create(batch, batch, batch) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) expect_equivalent( @@ -93,7 +93,7 @@ test_that("table() handles record batches with splicing", { ) batches <- list(batch, batch, batch) - tab <- table(!!!batches) + tab <- Table$create(!!!batches) expect_equal(tab$schema, batch$schema) expect_equal(tab$num_rows, 6L) expect_equivalent( @@ -103,12 +103,12 @@ test_that("table() handles record batches with splicing", { }) test_that("table() handles ... of arrays, chunked arrays, vectors", { - a <- array(1:10) + a <- Array$create(1:10) ca <- chunked_array(1:5, 6:10) v <- rnorm(10) tbl <- tibble::tibble(x = 1:10, y = letters[1:10]) - tab <- table(a = a, b = ca, c = v, !!!tbl) + tab <- Table$create(a = a, b = ca, c = v, !!!tbl) expect_equal( tab$schema, schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8()) @@ -123,15 +123,15 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", { test_that("table() auto splices (ARROW-5718)", { df <- tibble::tibble(x = 1:10, y = letters[1:10]) - tab1 <- table(df) - tab2 <- table(!!!df) + tab1 <- Table$create(df) + tab2 <- Table$create(!!!df) expect_equal(tab1, tab2) expect_equal(tab1$schema, schema(x = int32(), y = utf8())) expect_equivalent(as.data.frame(tab1), df) s <- schema(x = float64(), y = utf8()) - tab3 <- table(df, schema = s) - tab4 <- table(!!!df, schema = s) + tab3 <- Table$create(df, schema = s) + tab4 <- Table$create(!!!df, schema = s) expect_equal(tab3, tab4) expect_equal(tab3$schema, s) expect_equivalent(as.data.frame(tab3), df) diff --git a/r/tests/testthat/test-arraydata.R b/r/tests/testthat/test-array-data.R similarity index 95% rename from r/tests/testthat/test-arraydata.R rename to r/tests/testthat/test-array-data.R index 02ca9b85625..78904823d89 100644 --- a/r/tests/testthat/test-arraydata.R +++ b/r/tests/testthat/test-array-data.R @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ArrayData") +context("ArrayData") test_that("string vectors with only empty strings and nulls don't allocate a data buffer (ARROW-3693)", { - a <- array("") + a <- Array$create("") expect_equal(a$length(), 1L) buffers <- a$data()$buffers diff --git a/r/tests/testthat/test-arrow.R b/r/tests/testthat/test-arrow.R index 0685bd06c70..7856d4fb5c6 100644 --- a/r/tests/testthat/test-arrow.R +++ b/r/tests/testthat/test-arrow.R @@ -22,3 +22,20 @@ if (identical(Sys.getenv("TEST_R_WITH_ARROW"), "TRUE")) { expect_true(arrow_available()) }) } + +r_only({ + test_that("assert_is", { + x <- 42 + expect_true(assert_is(x, "numeric")) + expect_true(assert_is(x, c("numeric", "character"))) + expect_error(assert_is(x, "factor"), 'x must be a "factor"') + expect_error( + assert_is(x, c("factor", "list")), + 'x must be a "factor" or "list"' + ) + expect_error( + assert_is(x, c("factor", "character", "list")), + 'x must be a "factor", "character", or "list"' + ) + }) +}) diff --git a/r/tests/testthat/test-bufferreader.R b/r/tests/testthat/test-buffer-reader.R similarity index 75% rename from r/tests/testthat/test-bufferreader.R rename to r/tests/testthat/test-buffer-reader.R index 72d257101fa..94be16ad569 100644 --- a/r/tests/testthat/test-bufferreader.R +++ b/r/tests/testthat/test-buffer-reader.R @@ -15,16 +15,16 @@ # specific language governing permissions and limitations # under the License. -context("arrow::BufferReader") +context("BufferReader") test_that("BufferReader can be created from R objects", { - num <- BufferReader(numeric(13)) - int <- BufferReader(integer(13)) - raw <- BufferReader(raw(16)) + num <- BufferReader$create(numeric(13)) + int <- BufferReader$create(integer(13)) + raw <- BufferReader$create(raw(16)) - expect_is(num, "arrow::io::BufferReader") - expect_is(int, "arrow::io::BufferReader") - expect_is(raw, "arrow::io::BufferReader") + expect_is(num, "BufferReader") + expect_is(int, "BufferReader") + expect_is(raw, "BufferReader") expect_equal(num$GetSize(), 13*8) expect_equal(int$GetSize(), 13*4) @@ -33,8 +33,8 @@ test_that("BufferReader can be created from R objects", { test_that("BufferReader can be created from Buffer", { buf <- buffer(raw(76)) - reader <- BufferReader(buf) + reader <- BufferReader$create(buf) - expect_is(reader, "arrow::io::BufferReader") + expect_is(reader, "BufferReader") expect_equal(reader$GetSize(), 76) }) diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R index 4dfbecaf1f5..948d5df70e3 100644 --- a/r/tests/testthat/test-buffer.R +++ b/r/tests/testthat/test-buffer.R @@ -15,37 +15,48 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Buffer") +context("Buffer") -test_that("arrow::Buffer can be created from raw vector", { +test_that("Buffer can be created from raw vector", { vec <- raw(123) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 123) }) -test_that("arrow::Buffer can be created from integer vector", { +test_that("Buffer can be created from integer vector", { vec <- integer(17) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 17 * 4) }) -test_that("arrow::Buffer can be created from numeric vector", { +test_that("Buffer can be created from numeric vector", { vec <- numeric(17) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 17 * 8) }) -test_that("arrow::Buffer can be created from complex vector", { +test_that("Buffer can be created from complex vector", { vec <- complex(3) buf <- buffer(vec) - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") expect_equal(buf$size, 3 * 16) }) -test_that("can convert arrow::Buffer to raw", { +test_that("buffer buffer buffers buffers", { + expect_is(buffer(buffer(42)), "Buffer") +}) + +test_that("Other types can't be converted to Buffers", { + expect_error( + buffer(data.frame(a="asdf")), + "Cannot convert object of class data.frame to arrow::Buffer" + ) +}) + +test_that("can convert Buffer to raw", { buf <- buffer(rnorm(10)) expect_equal(buf$data(), as.raw(buf)) }) @@ -56,19 +67,21 @@ test_that("can read remaining bytes of a RandomAccessFile", { lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), chr = letters[1:10] ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) tf <- tempfile() all_bytes <- write_arrow(tab, tf) - file <- ReadableFile(tf) + file <- ReadableFile$create(tf) + expect_equal(file$tell(), 0) x <- file$Read(20)$data() + expect_equal(file$tell(), 20) y <- file$Read()$data() - file <- ReadableFile(tf) + file <- ReadableFile$create(tf) z <- file$Read()$data() - file <- ReadableFile(tf) + file <- ReadableFile$create(tf) a <- file$ReadAt(20)$data() expect_equal(file$GetSize(), length(x) + length(y)) diff --git a/r/tests/testthat/test-chunkedarray.R b/r/tests/testthat/test-chunked-array.R similarity index 95% rename from r/tests/testthat/test-chunkedarray.R rename to r/tests/testthat/test-chunked-array.R index 2e6b7306be1..0d25bc353df 100644 --- a/r/tests/testthat/test-chunkedarray.R +++ b/r/tests/testthat/test-chunked-array.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ChunkedArray") +context("ChunkedArray") test_that("ChunkedArray", { x <- chunked_array(1:10, 1:10, 1:5) @@ -167,10 +167,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { a_int32 <- a$cast(int32()) a_int64 <- a$cast(int64()) - expect_is(a_int8, "arrow::ChunkedArray") - expect_is(a_int16, "arrow::ChunkedArray") - expect_is(a_int32, "arrow::ChunkedArray") - expect_is(a_int64, "arrow::ChunkedArray") + expect_is(a_int8, "ChunkedArray") + expect_is(a_int16, "ChunkedArray") + expect_is(a_int32, "ChunkedArray") + expect_is(a_int64, "ChunkedArray") expect_equal(a_int8$type, int8()) expect_equal(a_int16$type, int16()) expect_equal(a_int32$type, int32()) @@ -181,10 +181,10 @@ test_that("integer types casts for ChunkedArray (ARROW-3741)", { a_uint32 <- a$cast(uint32()) a_uint64 <- a$cast(uint64()) - expect_is(a_uint8, "arrow::ChunkedArray") - expect_is(a_uint16, "arrow::ChunkedArray") - expect_is(a_uint32, "arrow::ChunkedArray") - expect_is(a_uint64, "arrow::ChunkedArray") + expect_is(a_uint8, "ChunkedArray") + expect_is(a_uint16, "ChunkedArray") + expect_is(a_uint32, "ChunkedArray") + expect_is(a_uint64, "ChunkedArray") expect_equal(a_uint8$type, uint8()) expect_equal(a_uint16$type, uint16()) @@ -207,7 +207,7 @@ test_that("chunked_array() supports the type= argument. conversion from INTSXP a } }) -test_that("array() aborts on overflow", { +test_that("Array$create() aborts on overflow", { expect_error(chunked_array(128L, type = int8())$type, "Invalid.*downsize") expect_error(chunked_array(-129L, type = int8())$type, "Invalid.*downsize") @@ -276,7 +276,7 @@ test_that("chunked_array() handles 0 chunks if given a type", { test_that("chunked_array() can ingest arrays (ARROW-3815)", { expect_equal( - chunked_array(1:5, array(6:10))$as_vector(), + chunked_array(1:5, Array$create(6:10))$as_vector(), 1:10 ) }) diff --git a/r/tests/testthat/test-compressed.R b/r/tests/testthat/test-compressed.R index 583f342e6cb..008f974215b 100644 --- a/r/tests/testthat/test-compressed.R +++ b/r/tests/testthat/test-compressed.R @@ -15,23 +15,23 @@ # specific language governing permissions and limitations # under the License. -context("arrow::io::Compressed.*Stream") +context("Compressed.*Stream") test_that("can write Buffer to CompressedOutputStream and read back in CompressedInputStream", { - if (.Platform$OS.type == "windows") skip("Unsupported") + skip_on_os("windows") buf <- buffer(as.raw(sample(0:255, size = 1024, replace = TRUE))) tf1 <- tempfile() - stream1 <- CompressedOutputStream(tf1) + stream1 <- CompressedOutputStream$create(tf1) expect_equal(stream1$tell(), 0) stream1$write(buf) expect_equal(stream1$tell(), buf$size) stream1$close() tf2 <- tempfile() - sink2 <- FileOutputStream(tf2) - stream2 <- CompressedOutputStream(sink2) + sink2 <- FileOutputStream$create(tf2) + stream2 <- CompressedOutputStream$create(sink2) expect_equal(stream2$tell(), 0) stream2$write(buf) expect_equal(stream2$tell(), buf$size) @@ -39,11 +39,11 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse sink2$close() - input1 <- CompressedInputStream(tf1) + input1 <- CompressedInputStream$create(tf1) buf1 <- input1$Read(1024L) - file2 <- ReadableFile(tf2) - input2 <- CompressedInputStream(file2) + file2 <- ReadableFile$create(tf2) + input2 <- CompressedInputStream$create(file2) buf2 <- input2$Read(1024L) expect_equal(buf, buf1) @@ -52,4 +52,3 @@ test_that("can write Buffer to CompressedOutputStream and read back in Compresse unlink(tf1) unlink(tf2) }) - diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R index a2ef4149eab..24354a14186 100644 --- a/r/tests/testthat/test-csv.R +++ b/r/tests/testthat/test-csv.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::csv::TableReader") +context("CsvTableReader") test_that("Can read csv file", { tf <- tempfile() @@ -25,10 +25,10 @@ test_that("Can read csv file", { tab1 <- read_csv_arrow(tf, as_tibble = FALSE) tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = FALSE) - tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = FALSE) + tab3 <- read_csv_arrow(ReadableFile$create(tf), as_tibble = FALSE) iris$Species <- as.character(iris$Species) - tab0 <- table(!!!iris) + tab0 <- Table$create(!!!iris) expect_equal(tab0, tab1) expect_equal(tab0, tab2) expect_equal(tab0, tab3) @@ -42,7 +42,7 @@ test_that("read_csv_arrow(as_tibble=TRUE)", { tab1 <- read_csv_arrow(tf, as_tibble = TRUE) tab2 <- read_csv_arrow(mmap_open(tf), as_tibble = TRUE) - tab3 <- read_csv_arrow(ReadableFile(tf), as_tibble = TRUE) + tab3 <- read_csv_arrow(ReadableFile$create(tf), as_tibble = TRUE) iris$Species <- as.character(iris$Species) expect_equivalent(iris, tab1) @@ -171,7 +171,7 @@ test_that("read_csv_arrow() respects col_select", { write.csv(iris, tf, row.names = FALSE, quote = FALSE) tab <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = FALSE) - expect_equal(tab, table(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) + expect_equal(tab, Table$create(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) tib <- read_csv_arrow(tf, col_select = starts_with("Sepal"), as_tibble = TRUE) expect_equal(tib, tibble::tibble(Sepal.Length = iris$Sepal.Length, Sepal.Width = iris$Sepal.Width)) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index b16da2cca1b..fd8bef1fc79 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::DataType") +context("DataType") test_that("null type works as expected",{ x <- null() diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 4fe058eadc6..0ab20bff546 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -31,7 +31,7 @@ test_that("feather read/write round trip", { expect_true(fs::file_exists(tf2)) tf3 <- tempfile() - stream <- FileOutputStream(tf3) + stream <- FileOutputStream$create(tf3) write_feather(tib, stream) stream$close() expect_true(fs::file_exists(tf3)) @@ -50,7 +50,7 @@ test_that("feather read/write round trip", { expect_is(tab4, "data.frame") # reading directly from arrow::io::ReadableFile - tab5 <- read_feather(ReadableFile(tf3)) + tab5 <- read_feather(ReadableFile$create(tf3)) expect_is(tab5, "data.frame") expect_equal(tib, tab1) @@ -95,7 +95,7 @@ test_that("feather handles col_select = ", { test_that("feather read/write round trip", { tab1 <- read_feather(feather_file, as_tibble = FALSE) - expect_is(tab1, "arrow::Table") + expect_is(tab1, "Table") expect_equal(tib, as.data.frame(tab1)) }) diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index 5d63a7f45fe..d7de087d12f 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Field") +context("Field") test_that("field() factory", { x <- field("x", int32()) diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index b3e7d5638f5..c436cb72596 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::json::TableReader") +context("JsonTableReader") test_that("Can read json file with scalars columns (ARROW-5503)", { tf <- tempfile() @@ -29,7 +29,7 @@ test_that("Can read json file with scalars columns (ARROW-5503)", { tab1 <- read_json_arrow(tf, as_tibble = FALSE) tab2 <- read_json_arrow(mmap_open(tf), as_tibble = FALSE) - tab3 <- read_json_arrow(ReadableFile(tf), as_tibble = FALSE) + tab3 <- read_json_arrow(ReadableFile$create(tf), as_tibble = FALSE) expect_equal(tab1, tab2) expect_equal(tab1, tab3) @@ -56,7 +56,7 @@ test_that("read_json_arrow() converts to tibble", { tab1 <- read_json_arrow(tf) tab2 <- read_json_arrow(mmap_open(tf)) - tab3 <- read_json_arrow(ReadableFile(tf)) + tab3 <- read_json_arrow(ReadableFile$create(tf)) expect_is(tab1, "tbl_df") expect_is(tab2, "tbl_df") @@ -100,7 +100,7 @@ test_that("Can read json file with nested columns (ARROW-5503)", { tab1 <- read_json_arrow(tf, as_tibble = FALSE) tab2 <- read_json_arrow(mmap_open(tf), as_tibble = FALSE) - tab3 <- read_json_arrow(ReadableFile(tf), as_tibble = FALSE) + tab3 <- read_json_arrow(ReadableFile$create(tf), as_tibble = FALSE) expect_equal(tab1, tab2) expect_equal(tab1, tab3) @@ -114,8 +114,8 @@ test_that("Can read json file with nested columns (ARROW-5503)", { ) struct_array <- tab1$column(1)$chunk(0) - ps <- array(c(NA, NA, 78, 90, NA, 19)) - hello <- array(c(NA, NA, "hi", "bonjour", "ciao", NA)) + ps <- Array$create(c(NA, NA, 78, 90, NA, 19)) + hello <- Array$create(c(NA, NA, "hi", "bonjour", "ciao", NA)) expect_equal(struct_array$field(0L), ps) expect_equal(struct_array$GetFieldByName("ps"), ps) expect_equal(struct_array$Flatten(), list(ps, hello)) diff --git a/r/tests/testthat/test-messagereader.R b/r/tests/testthat/test-message-reader.R similarity index 64% rename from r/tests/testthat/test-messagereader.R rename to r/tests/testthat/test-message-reader.R index c7260fed169..0bd6d66c544 100644 --- a/r/tests/testthat/test-messagereader.R +++ b/r/tests/testthat/test-message-reader.R @@ -15,19 +15,19 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ipc::MessageReader") +context("MessageReader") test_that("MessageReader can be created from raw vectors", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - reader <- MessageReader(bytes) + reader <- MessageReader$create(bytes) message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$RECORD_BATCH) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) @@ -35,13 +35,13 @@ test_that("MessageReader can be created from raw vectors", { schema <- schema(x = int32()) bytes <- schema$serialize() - reader <- MessageReader(bytes) + reader <- MessageReader$create(bytes) message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) @@ -51,17 +51,17 @@ test_that("MessageReader can be created from input stream", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - stream <- BufferReader(bytes) - expect_is(stream, "arrow::io::BufferReader") + stream <- BufferReader$create(bytes) + expect_is(stream, "BufferReader") - reader <- MessageReader(stream) - expect_is(reader, "arrow::ipc::MessageReader") + reader <- MessageReader$create(stream) + expect_is(reader, "MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$RECORD_BATCH) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) @@ -69,17 +69,17 @@ test_that("MessageReader can be created from input stream", { schema <- schema(x = int32()) bytes <- schema$serialize() - stream <- BufferReader(bytes) - expect_is(stream, "arrow::io::BufferReader") + stream <- BufferReader$create(bytes) + expect_is(stream, "BufferReader") - reader <- MessageReader(stream) - expect_is(reader, "arrow::ipc::MessageReader") + reader <- MessageReader$create(stream) + expect_is(reader, "MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- reader$ReadNextMessage() expect_null(message) diff --git a/r/tests/testthat/test-message.R b/r/tests/testthat/test-message.R index 5ddff018974..c6cd9fe4b09 100644 --- a/r/tests/testthat/test-message.R +++ b/r/tests/testthat/test-message.R @@ -15,18 +15,18 @@ # specific language governing permissions and limitations # under the License. -context("arrow::ipc::Message") +context("Message") test_that("read_message can read from input stream", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message <- read_message(stream) - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$RECORD_BATCH) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- read_message(stream) expect_null(read_message(stream)) @@ -34,13 +34,13 @@ test_that("read_message can read from input stream", { test_that("read_message() can read Schema messages", { bytes <- schema(x=int32())$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message <- read_message(stream) - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) - expect_is(message$body, "arrow::Buffer") - expect_is(message$metadata, "arrow::Buffer") + expect_is(message$body, "Buffer") + expect_is(message$metadata, "Buffer") message <- read_message(stream) expect_null(read_message(stream)) @@ -49,14 +49,14 @@ test_that("read_message() can read Schema messages", { test_that("read_message() can handle raw vectors", { batch <- record_batch(x = 1:10) bytes <- batch$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message_stream <- read_message(stream) message_raw <- read_message(bytes) expect_equal(message_stream, message_raw) bytes <- schema(x=int32())$serialize() - stream <- BufferReader(bytes) + stream <- BufferReader$create(bytes) message_stream <- read_message(stream) message_raw <- read_message(bytes) diff --git a/r/tests/testthat/test-read_record_batch.R b/r/tests/testthat/test-read-record-batch.R similarity index 86% rename from r/tests/testthat/test-read_record_batch.R rename to r/tests/testthat/test-read-record-batch.R index adbb192fa59..222be775db1 100644 --- a/r/tests/testthat/test-read_record_batch.R +++ b/r/tests/testthat/test-read-record-batch.R @@ -18,7 +18,7 @@ context("read_record_batch()") test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { - tab <- table( + tab <- Table$create( int = 1:10, dbl = as.numeric(1:10), lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), @@ -26,16 +26,16 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { ) tf <- tempfile() - writer <- RecordBatchFileWriter(tf, tab$schema) - expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + writer <- RecordBatchFileWriter$create(tf, tab$schema) + expect_is(writer, "RecordBatchFileWriter") writer$write_table(tab) writer$close() tab2 <- read_table(tf) expect_equal(tab, tab2) - stream <- FileOutputStream(tf) - writer <- RecordBatchFileWriter(stream, tab$schema) - expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + stream <- FileOutputStream$create(tf) + writer <- RecordBatchFileWriter$create(stream, tab$schema) + expect_is(writer, "RecordBatchFileWriter") writer$write_table(tab) writer$close() tab3 <- read_table(tf) @@ -55,7 +55,7 @@ test_that("read_record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3 raw <- batch$serialize() batch2 <- read_record_batch(raw, schema) batch3 <- read_record_batch(buffer(raw), schema) - stream <- BufferReader(raw) + stream <- BufferReader$create(raw) batch4 <- read_record_batch(stream, schema) stream$close() @@ -69,7 +69,7 @@ test_that("read_record_batch() can handle (Message, Schema) parameters (ARROW-34 schema <- batch$schema raw <- batch$serialize() - stream <- BufferReader(raw) + stream <- BufferReader$create(raw) message <- read_message(stream) batch2 <- read_record_batch(message, schema) diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 17d994deab2..ec56d6a783b 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -17,18 +17,18 @@ context("read-write") -test_that("arrow::table round trip", { +test_that("table round trip", { tbl <- tibble::tibble( int = 1:10, dbl = as.numeric(1:10), raw = as.raw(1:10) ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) - # arrow::ChunkedArray + # ChunkedArray chunked_array_int <- tab$column(0) expect_equal(chunked_array_int$length(), 10L) expect_equal(chunked_array_int$null_count, 0L) @@ -41,7 +41,7 @@ test_that("arrow::table round trip", { expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]]) } - # arrow::ChunkedArray + # ChunkedArray chunked_array_dbl <- tab$column(1) expect_equal(chunked_array_dbl$length(), 10L) expect_equal(chunked_array_dbl$null_count, 0L) @@ -54,7 +54,7 @@ test_that("arrow::table round trip", { expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]]) } - # arrow::ChunkedArray + # ChunkedArray chunked_array_raw <- tab$column(2) expect_equal(chunked_array_raw$length(), 10L) expect_equal(chunked_array_raw$null_count, 0L) @@ -76,14 +76,14 @@ test_that("arrow::table round trip", { unlink(tf) }) -test_that("arrow::table round trip handles NA in integer and numeric", { +test_that("table round trip handles NA in integer and numeric", { tbl <- tibble::tibble( int = c(NA, 2:10), dbl = as.numeric(c(1:5, NA, 7:9, NA)), raw = as.raw(1:10) ) - tab <- arrow::table(!!!tbl) + tab <- Table$create(!!!tbl) expect_equal(tab$num_columns, 3L) expect_equal(tab$num_rows, 10L) diff --git a/r/tests/testthat/test-recordbatchreader.R b/r/tests/testthat/test-record-batch-reader.R similarity index 62% rename from r/tests/testthat/test-recordbatchreader.R rename to r/tests/testthat/test-record-batch-reader.R index 65f7933b42d..b557f0669ac 100644 --- a/r/tests/testthat/test-recordbatchreader.R +++ b/r/tests/testthat/test-record-batch-reader.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::RecordBatch.*(Reader|Writer)") +context("RecordBatch.*(Reader|Writer)") test_that("RecordBatchStreamReader / Writer", { batch <- record_batch( @@ -23,20 +23,22 @@ test_that("RecordBatchStreamReader / Writer", { y = letters[1:10] ) - sink <- BufferOutputStream() - writer <- RecordBatchStreamWriter(sink, batch$schema) - expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") + sink <- BufferOutputStream$create() + expect_equal(sink$tell(), 0) + writer <- RecordBatchStreamWriter$create(sink, batch$schema) + expect_is(writer, "RecordBatchStreamWriter") writer$write_batch(batch) + expect_true(sink$tell() > 0) writer$close() buf <- sink$getvalue() - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") - reader <- RecordBatchStreamReader(buf) - expect_is(reader, "arrow::ipc::RecordBatchStreamReader") + reader <- RecordBatchStreamReader$create(buf) + expect_is(reader, "RecordBatchStreamReader") batch1 <- reader$read_next_batch() - expect_is(batch1, "arrow::RecordBatch") + expect_is(batch1, "RecordBatch") expect_equal(batch, batch1) expect_null(reader$read_next_batch()) @@ -48,21 +50,21 @@ test_that("RecordBatchFileReader / Writer", { y = letters[1:10] ) - sink <- BufferOutputStream() - writer <- RecordBatchFileWriter(sink, batch$schema) - expect_is(writer, "arrow::ipc::RecordBatchFileWriter") + sink <- BufferOutputStream$create() + writer <- RecordBatchFileWriter$create(sink, batch$schema) + expect_is(writer, "RecordBatchFileWriter") writer$write_batch(batch) writer$close() buf <- sink$getvalue() - expect_is(buf, "arrow::Buffer") + expect_is(buf, "Buffer") - reader <- RecordBatchFileReader(buf) - expect_is(reader, "arrow::ipc::RecordBatchFileReader") + reader <- RecordBatchFileReader$create(buf) + expect_is(reader, "RecordBatchFileReader") - batch1 <- reader$get_batch(0L) - expect_is(batch1, "arrow::RecordBatch") + batch1 <- reader$get_batch(0) + expect_is(batch1, "RecordBatch") expect_equal(batch, batch1) - expect_equal(reader$num_record_batches, 1L) + expect_equal(reader$num_record_batches, 1) }) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 10e784f49ec..f50adc22a42 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -context("arrow::Schema") +context("Schema") test_that("Alternate type names are supported", { expect_equal( @@ -28,27 +28,27 @@ test_that("reading schema from Buffer", { # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter # maybe there is an easier way to serialize a schema batch <- record_batch(x = 1:10) - expect_is(batch, "arrow::RecordBatch") + expect_is(batch, "RecordBatch") - stream <- BufferOutputStream() - writer <- RecordBatchStreamWriter(stream, batch$schema) - expect_is(writer, "arrow::ipc::RecordBatchStreamWriter") + stream <- BufferOutputStream$create() + writer <- RecordBatchStreamWriter$create(stream, batch$schema) + expect_is(writer, "RecordBatchStreamWriter") writer$close() buffer <- stream$getvalue() - expect_is(buffer, "arrow::Buffer") + expect_is(buffer, "Buffer") - reader <- MessageReader(buffer) - expect_is(reader, "arrow::ipc::MessageReader") + reader <- MessageReader$create(buffer) + expect_is(reader, "MessageReader") message <- reader$ReadNextMessage() - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) - stream <- BufferReader(buffer) - expect_is(stream, "arrow::io::BufferReader") + stream <- BufferReader$create(buffer) + expect_is(stream, "BufferReader") message <- read_message(stream) - expect_is(message, "arrow::ipc::Message") + expect_is(message, "Message") expect_equal(message$type, MessageType$SCHEMA) }) diff --git a/r/tests/testthat/test-cputhreadpoolcapacity.R b/r/tests/testthat/test-thread-pool.R similarity index 100% rename from r/tests/testthat/test-cputhreadpoolcapacity.R rename to r/tests/testthat/test-thread-pool.R diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 70f8df63159..19934c6e472 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -18,11 +18,11 @@ context("test-type") test_that("type() gets the right type for arrow::Array", { - a <- array(1:10) + a <- Array$create(1:10) expect_equal(type(a), a$type) }) -test_that("type() gets the right type for arrow::ChunkedArray", { +test_that("type() gets the right type for ChunkedArray", { a <- chunked_array(1:10, 1:10) expect_equal(type(a), a$type) }) @@ -35,7 +35,7 @@ test_that("type() infers from R type", { expect_equal(type(""), utf8()) expect_equal( type(iris$Species), - dictionary(int8(), array(levels(iris$Species)), FALSE) + dictionary(int8(), Array$create(levels(iris$Species)), FALSE) ) expect_equal( type(lubridate::ymd_hms("2019-02-14 13:55:05")), diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd new file mode 100644 index 00000000000..e7ab08b03b2 --- /dev/null +++ b/r/vignettes/arrow.Rmd @@ -0,0 +1,86 @@ +--- +title: "Using the Arrow C++ Library in R" +description: "This document describes the low-level interface to the Apache Arrow C++ library in R and reviews the patterns and conventions of the R package." +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using the Arrow C++ Library in R} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +The Apache Arrow C++ library provides rich, powerful features for working with columnar data. The `arrow` R package provides both a low-level interface to the C++ library and some higher-level, R-flavored tools for working with it. This vignette provides an overview of how the pieces fit together, and it describes the conventions that the classes and methods follow in R. + +# Reading and writing files + +The `arrow` package provides some simple functions for using the Arrow C++ library to read and write files. These functions are designed to drop into your normal R workflow without requiring any knowledge of the Arrow C++ library and use naming conventions and arguments that follow popular R packages, particularly `readr`. The readers return `data.frame`s (or if you use the `tibble` package, they will act like `tbl_df`s), and the writers take `data.frame`s. + +Importantly, `arrow` provides basic read and write support for the [Apache +Parquet](https://parquet.apache.org/) columnar data file format, without having to set up a database. + +```r +library(arrow) +df <- read_parquet("path/to/file.parquet") +``` + +This function, along with the other readers in the package, takes an optional +`col_select` argument, inspired by the +[`vroom`](https://vroom.r-lib.org/reference/vroom.html) package. This argument +lets you use the ["tidyselect" helper functions](https://tidyselect.r-lib.org/reference/select_helpers.html), as you can do in `dplyr::select()`, to specify that you only want to keep certain columns. You may also provide a character vector of column names to keep, as in the "select" argument to `data.table::fread()`. By narrowing your selection at read time, you can load a `data.frame` with less memory overhead. + +For example, suppose you had written the `iris` dataset to Parquet. You could +read a `data.frame` with only the columns `c("Sepal.Length", "Sepal.Width")` by +doing + +```r +df <- read_parquet("iris.parquet", col_select = starts_with("Sepal")) +``` + +Just as you can read, you can write Parquet files: + +```r +write_parquet(df, "path/to/different_file.parquet") +``` + +The `arrow` package also includes a faster and more robust implementation of the +Feather file format, providing `read_feather()` and +`write_feather()`. [Feather](https://github.com/wesm/feather) was one of the +initial applications of Apache Arrow for Python and R, providing an efficient, +common file format language-agnostic data frame storage, along with +implementations in R and Python. + +As Arrow progressed, development of Feather moved to the +[`apache/arrow`](https://github.com/apache/arrow) project, and for the last two +years, the Python implementation of Feather has just been a wrapper around +`pyarrow`. This meant that as Arrow progressed and bugs were fixed, the Python +version of Feather got the improvements but sadly R did not. + +In the `arrow` package, the R implementation of Feather depends +on the same underlying C++ library as the Python version does. This should +result in more reliable and consistent behavior across the two languages, as +well as [improved performance](https://wesmckinney.com/blog/feather-arrow-future/). + +In addition to these readers and writers, the `arrow` package has wrappers for +other readers in the C++ library; see `?read_csv_arrow` and +`?read_json_arrow`. These readers are being developed to optimize for the +memory layout of the Arrow columnar format and are not intended as a direct +replacement for existing R CSV readers (`base::read.csv`, `readr::read_csv`, +`data.table::fread`) that return an R `data.frame`. + +# Access to Arrow messages, buffers, and streams + +The `arrow` package also provides many lower-level bindings to the C++ library, which enable you +to access and manipulate Arrow objects. You can use these to build connectors +to other applications and services that use Arrow. One example is Spark: the +[`sparklyr`](https://spark.rstudio.com/) package has support for using Arrow to +move data to and from Spark, yielding [significant performance +gains](http://arrow.apache.org/blog/2019/01/25/r-spark-improvements/). + +# Class structure and package conventions + +C++ is an object-oriented language, so the core logic of the Arrow library is encapsulated in classes and methods. In the R package, these classes are implemented as `R6` reference classes, most of which are exported from the namespace. + +In order to match the C++ naming conventions, the `R6` classes are in TitleCase, e.g. `RecordBatch`. This makes it easy to look up the relevant C++ implementations in the [code](https://github.com/apache/arrow/tree/master/cpp) or [documentation](https://arrow.apache.org/docs/cpp/). To simplify things in R, the C++ library namespaces are generally dropped or flattened; that is, where the C++ library has `arrow::io::FileOutputStream`, it is just `FileOutputStream` in the R package. One exception is for the file readers, where the namespace is necessary to disambiguate. So `arrow::csv::TableReader` becomes `CsvTableReader`, and `arrow::json::TableReader` becomes `JsonTableReader`. + +Some of these classes are not meant to be instantiated directly; they may be base classes or other kinds of helpers. For those that you should be able to create, use the `$create()` method to instantiate an object. For example, `rb <- RecordBatch$create(int = 1:10, dbl = as.numeric(1:10))` will create a `RecordBatch`. Many of these factory methods that an R user might most often encounter also have a `snake_case` alias, in order to be more familiar for contemporary R users. So `record_batch(int = 1:10, dbl = as.numeric(1:10))` would do the same as `RecordBatch$create()` above. + +The typical user of the `arrow` R package may never deal directly with the `R6` objects. We provide more R-friendly wrapper functions as a higher-level interface to the C++ library. An R user can call `read_parquet()` without knowing or caring that they're instantiating a `ParquetFileReader` object and calling the `$ReadFile()` method on it. The classes are there and available to the advanced programmer who wants fine-grained control over how the C++ library is used.