From 9408272c421a38d58703db7411e58cc643c95148 Mon Sep 17 00:00:00 2001 From: pringshia Date: Fri, 10 Feb 2023 16:04:15 -0500 Subject: [PATCH 1/4] Rewrite to remove confusion around efficiency claims --- src/std_misc/file/read_lines.md | 78 +++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/src/std_misc/file/read_lines.md b/src/std_misc/file/read_lines.md index 641eb972a0..0fbd9a120f 100644 --- a/src/std_misc/file/read_lines.md +++ b/src/std_misc/file/read_lines.md @@ -1,44 +1,50 @@ # `read_lines` -## Beginner friendly method -This method is NOT efficient. It's here for beginners -who can't understand the efficient method yet. +## A naive approach -```rust,no_run -use std::fs::File; -use std::io::{ self, BufRead, BufReader }; +This might be a reasonable first attempt for a beginner's first +implementation for reading lines from a file. -fn read_lines(filename: String) -> io::Lines> { - // Open the file in read-only mode. - let file = File::open(filename).unwrap(); - // Read the file line by line, and return an iterator of the lines of the file. - return io::BufReader::new(file).lines(); -} +```rust,norun +use std::fs::read_to_string; -fn main() { - // Stores the iterator of lines of the file in lines variable. - let lines = read_lines("./hosts".to_string()); - // Iterate over the lines of the file, and in this case print them. - for line in lines { - println!("{}", line.unwrap()); +fn read_lines(filename: &str) -> Vec { + let mut result = Vec::new(); + + for line in read_to_string(filename).unwrap().lines() { + result.push(line.to_string()) } + + result } ``` -Running this program simply prints the lines individually. -```shell -$ echo -e "127.0.0.1\n192.168.0.1\n" > hosts -$ rustc read_lines.rs && ./read_lines -127.0.0.1 -192.168.0.1 +Since the method `lines()` returns an iterator over the lines in the file, +we can also perform a map inline and collect the results, yielding a more +concise and fluent expression. + +```rust,norun +use std::fs::read_to_string; + +fn read_lines(filename: &str) -> Vec { + read_to_string(filename) + .unwrap() // panic on possible file-reading errors + .lines() // split the string into an iterator of string slices + .map(String::from) // make each slice into a string + .collect() // gather them together into a vector +} ``` -## Efficient method -The method `lines()` returns an iterator over the lines -of a file. +Note that in both examples above, we must convert the `&str` reference +returned from `lines()` to the owned type `String`, using `.to_string()` +and `String::from` respectively. + +## A more efficient approach -`File::open` expects a generic, `AsRef`. That's what -`read_lines()` expects as input. +We use the `BufRead` class to read the file. `BufRead` uses an internal +readahead buffer to performantly reduce the number of times the underlying +file storage layer must be queried per line read. The intermediary buffer +is much faster to access. ```rust,no_run use std::fs::File; @@ -46,8 +52,8 @@ use std::io::{self, BufRead}; use std::path::Path; fn main() { - // File hosts must exist in current path before this produces output - if let Ok(lines) = read_lines("./hosts") { + // File hosts.txt must exist in the current path + if let Ok(lines) = read_lines("./hosts.txt") { // Consumes the iterator, returns an (Optional) String for line in lines { if let Ok(ip) = line { @@ -68,11 +74,15 @@ where P: AsRef, { Running this program simply prints the lines individually. ```shell -$ echo -e "127.0.0.1\n192.168.0.1\n" > hosts +$ echo -e "127.0.0.1\n192.168.0.1\n" > hosts.txt $ rustc read_lines.rs && ./read_lines 127.0.0.1 192.168.0.1 ``` -This process is more efficient than creating a `String` in memory -especially working with larger files. \ No newline at end of file +Note that since `File::open` expects a generic `AsRef` as argument, we define our +generic `read_lines()` method with the same generic constraint, using the `where` keyword. + +This process is more efficient than creating a `String` in memory with all of the file's +contents. This can especially cause performance issues when working with larger files. + From 7b3622359a1394984657595b22805a7a5ac88727 Mon Sep 17 00:00:00 2001 From: pringshia Date: Fri, 10 Feb 2023 16:49:14 -0500 Subject: [PATCH 2/4] Add some more clarification --- src/std_misc/file/read_lines.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/std_misc/file/read_lines.md b/src/std_misc/file/read_lines.md index 0fbd9a120f..4fb1365252 100644 --- a/src/std_misc/file/read_lines.md +++ b/src/std_misc/file/read_lines.md @@ -41,11 +41,14 @@ and `String::from` respectively. ## A more efficient approach -We use the `BufRead` class to read the file. `BufRead` uses an internal +Here we use the `BufRead` class to read the file. `BufRead` uses an internal readahead buffer to performantly reduce the number of times the underlying file storage layer must be queried per line read. The intermediary buffer is much faster to access. +We also update `read_lines` to return an iterator instead of allocating new +`String` objects in memory for each line. + ```rust,no_run use std::fs::File; use std::io::{self, BufRead}; @@ -80,8 +83,8 @@ $ rustc read_lines.rs && ./read_lines 192.168.0.1 ``` -Note that since `File::open` expects a generic `AsRef` as argument, we define our -generic `read_lines()` method with the same generic constraint, using the `where` keyword. +(Note that since `File::open` expects a generic `AsRef` as argument, we define our +generic `read_lines()` method with the same generic constraint, using the `where` keyword.) This process is more efficient than creating a `String` in memory with all of the file's contents. This can especially cause performance issues when working with larger files. From bde43af7e3f05f82022d246743e5b6fc09aa1e47 Mon Sep 17 00:00:00 2001 From: Pratik Date: Fri, 10 Feb 2023 17:12:44 -0500 Subject: [PATCH 3/4] Update src/std_misc/file/read_lines.md Co-authored-by: Samuel Eisenhandler --- src/std_misc/file/read_lines.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/std_misc/file/read_lines.md b/src/std_misc/file/read_lines.md index 4fb1365252..6292288da7 100644 --- a/src/std_misc/file/read_lines.md +++ b/src/std_misc/file/read_lines.md @@ -42,9 +42,7 @@ and `String::from` respectively. ## A more efficient approach Here we use the `BufRead` class to read the file. `BufRead` uses an internal -readahead buffer to performantly reduce the number of times the underlying -file storage layer must be queried per line read. The intermediary buffer -is much faster to access. +buffer to reduce intermediate allocations. We also update `read_lines` to return an iterator instead of allocating new `String` objects in memory for each line. From 9079c89b67acb1808af65c1fe97837194cc28268 Mon Sep 17 00:00:00 2001 From: Pratik Date: Fri, 10 Feb 2023 17:12:49 -0500 Subject: [PATCH 4/4] Update src/std_misc/file/read_lines.md Co-authored-by: Samuel Eisenhandler --- src/std_misc/file/read_lines.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/std_misc/file/read_lines.md b/src/std_misc/file/read_lines.md index 6292288da7..216b0181c5 100644 --- a/src/std_misc/file/read_lines.md +++ b/src/std_misc/file/read_lines.md @@ -41,7 +41,7 @@ and `String::from` respectively. ## A more efficient approach -Here we use the `BufRead` class to read the file. `BufRead` uses an internal +Here we pass ownership of the open `File` to a `BufReader` struct. `BufReader` uses an internal buffer to reduce intermediate allocations. We also update `read_lines` to return an iterator instead of allocating new