From 489b0ebc5d58e5134466e4459e28cf274028c06b Mon Sep 17 00:00:00 2001 From: Henrik Lindberg <563066+hlindberg@users.noreply.github.com> Date: Wed, 20 Mar 2019 12:59:10 +0100 Subject: [PATCH 1/2] (MODULES-8760) Add iterative feature to merge() function This adds a new feature to the `merge()` function such that it builds a hash from hashes returned from a lambda when the function is given an `Iterable` as its only argument. This adds a 4.x version of merge that is backwards compatible (except it generates different error messages for argument errors). Since a 4.x version wins over a 3x the new version will win over the old except for users that use the old API for calling functions (as expected; this is fine). --- README.md | 27 ++++++++++ lib/puppet/functions/merge.rb | 96 +++++++++++++++++++++++++++++++++++ spec/functions/merge_spec.rb | 45 +++++++++++++--- 3 files changed, 161 insertions(+), 7 deletions(-) create mode 100644 lib/puppet/functions/merge.rb diff --git a/README.md b/README.md index 6604a79b4..808593fb3 100644 --- a/README.md +++ b/README.md @@ -1986,6 +1986,33 @@ Since Puppet 4.0.0, you can use the + operator to achieve the same merge. $merged_hash = $hash1 + $hash2 +If merge is given a single `Iterable` (`Array`, `Hash`, etc.) it will call a given block with +up to three parameters, and merge each resulting Hash into the accumulated result. All other types +of values returned from the block (typically `undef`) are skipped (not merged). + +The codeblock can take 2 or three parameters: +* with two, it gets the current hash (as built to this point), and each value (for hash the value is a [key, value] tuple) +* with three, it gets the current hash (as built to this point), the key/index of each value, and then the value + +If the iterable is empty, or no hash was returned from the given block, an empty hash is returned. In the given block, a call to `next()` +will skip that entry, and a call to `break()` will end the iteration. + +*Example: counting occurrences of strings in an array* +```puppet +['a', 'b', 'c', 'c', 'd', 'b'].merge | $hsh, $v | { { $v => $hsh[$v].lest || { 0 } + 1 } } +# would result in { a => 1, b => 2, c => 2, d => 1 } +``` + +*Example: skipping values for entries that are longer than 1 char* + +```puppet +['a', 'b', 'c', 'c', 'd', 'b', 'blah', 'blah'].merge | $hsh, $v | { if $v =~ String[1,1] { { $v => $hsh[$v].lest || { 0 } + 1 } } } +# would result in { a => 1, b => 2, c => 2, d => 1 } since 'blah' is longer than 2 chars +``` + +The iterative `merge()` has an advantage over doing the same with a general `reduce()` in that the constructed hash +does not have to be copied in each iteration and thus will perform much better with large inputs. + *Type*: rvalue. #### `min` diff --git a/lib/puppet/functions/merge.rb b/lib/puppet/functions/merge.rb new file mode 100644 index 000000000..f2002d318 --- /dev/null +++ b/lib/puppet/functions/merge.rb @@ -0,0 +1,96 @@ +# Merges two or more hashes together or hashes resulting from iteration, and returns the resulting hash. +# +# @example Using merge() +# +# $hash1 = {'one' => 1, 'two', => 2} +# $hash2 = {'two' => 'dos', 'three', => 'tres'} +# $merged_hash = merge($hash1, $hash2) +# # The resulting hash is equivalent to: +# # $merged_hash = {'one' => 1, 'two' => 'dos', 'three' => 'tres'} +# +# When there is a duplicate key, the key in the rightmost hash will "win." +# +# Note that since Puppet 4.0.0 the same merge can be achieved with the + operator. +# +# $merged_hash = $hash1 + $hash2 +# +# If merge is given a single Iterable (Array, Hash, etc.) it will call a given block with +# up to three parameters, and merge each resulting Hash into the accumulated result. All other types +# of values returned from the block (typically undef) are skipped (not merged). +# +# The codeblock can take 2 or three parameters: +# * with two, it gets the current hash (as built to this point), and each value (for hash the value is a [key, value] tuple) +# * with three, it gets the current hash (as built to this point), the key/index of each value, and then the value +# +# If the iterable is empty, or no hash was returned from the given block, an empty hash is returned. In the given block, a call to `next()` +# will skip that entry, and a call to `break()` will end the iteration. +# +# @example counting occurrences of strings in an array +# ['a', 'b', 'c', 'c', 'd', 'b'].merge | $hsh, $v | { { $v => $hsh[$v].lest || { 0 } + 1 } } +# # would result in { a => 1, b => 2, c => 2, d => 1 } +# +# @example skipping values for entries that are longer than 1 char +# ['a', 'b', 'c', 'c', 'd', 'b', 'blah', 'blah'].merge | $hsh, $v | { if $v =~ String[1,1] { { $v => $hsh[$v].lest || { 0 } + 1 } } } +# # would result in { a => 1, b => 2, c => 2, d => 1 } since 'blah' is longer than 2 chars +# +# The iterative `merge()` has an advantage over doing the same with a general `reduce()` in that the constructed hash +# does not have to be copied in each iteration and thus will perform much better with large inputs. +# +Puppet::Functions.create_function(:'merge') do + + dispatch :merge2hashes do + repeated_param 'Variant[Hash, Undef, String[0,0]]', :args # this strange type is backwards compatible + return_type 'Hash' + end + + dispatch :merge_iterable3 do + repeated_param 'Iterable', :args + block_param 'Callable[3,3]', :block + return_type 'Hash' + end + + dispatch :merge_iterable2 do + repeated_param 'Iterable', :args + block_param 'Callable[2,2]', :block + return_type 'Hash' + end + + + def merge2hashes(*hashes) + accumulator = {} + hashes.each {|h| accumulator.merge!(h) if h.is_a?(Hash)} + accumulator + end + + def merge_iterable2(iterable, &block) + accumulator = {} + enum = Puppet::Pops::Types::Iterable.asserted_iterable(self, iterable) + enum.each do |v| + r = yield(accumulator, v) + accumulator.merge!(r) if r.is_a?(Hash) + end + accumulator + end + + def merge_iterable3(iterable, &block) + accumulator = {} + enum = Puppet::Pops::Types::Iterable.asserted_iterable(self, iterable) + if enum.hash_style? + enum.each do |entry| + r = yield(accumulator, *entry) + accumulator.merge!(r) if r.is_a?(Hash) + end + else + begin + index = 0 + loop do + r = yield(accumulator, index, enum.next) + accumulator.merge!(r) if r.is_a?(Hash) + index += 1 + end + rescue StopIteration + end + end + accumulator + end +end diff --git a/spec/functions/merge_spec.rb b/spec/functions/merge_spec.rb index 5f9747282..e760bdd80 100644 --- a/spec/functions/merge_spec.rb +++ b/spec/functions/merge_spec.rb @@ -2,17 +2,15 @@ describe 'merge' do it { is_expected.not_to eq(nil) } - it { is_expected.to run.with_params.and_raise_error(Puppet::ParseError, %r{wrong number of arguments}i) } - it { is_expected.to run.with_params({}, 'two').and_raise_error(Puppet::ParseError, %r{unexpected argument type String}) } - it { is_expected.to run.with_params({}, 1).and_raise_error(Puppet::ParseError, %r{unexpected argument type (Fixnum|Integer)}) } + it { is_expected.to run.with_params({}, 'two').and_raise_error(ArgumentError, Regexp.new(Regexp.escape("rejected: parameter 'args' expects a value of type Undef, Hash, or String[0, 0], got String"))) } + it { is_expected.to run.with_params({}, 1).and_raise_error(ArgumentError, %r{parameter 'args' expects a value of type Undef, Hash, or String, got Integer}) } it { is_expected.to run.with_params({ 'one' => 1, 'three' => { 'four' => 4 } }, 'two' => 'dos', 'three' => { 'five' => 5 }).and_return('one' => 1, 'three' => { 'five' => 5 }, 'two' => 'dos') } - it { - pending 'should not special case this' - is_expected.to run.with_params({}).and_return({}) - } + it { is_expected.to run.with_params.and_return({}) } + it { is_expected.to run.with_params({}).and_return({}) } it { is_expected.to run.with_params({}, {}).and_return({}) } it { is_expected.to run.with_params({}, {}, {}).and_return({}) } + describe 'should accept empty strings as puppet undef' do it { is_expected.to run.with_params({}, '').and_return({}) } end @@ -24,4 +22,37 @@ .with_params({ 'key1' => 'value1' }, { 'key2' => 'value2' }, 'key3' => 'value3') \ .and_return('key1' => 'value1', 'key2' => 'value2', 'key3' => 'value3') } + describe 'should accept iterable and merge produced hashes' do + + it { is_expected.to run \ + .with_params([1,2,3]) \ + .with_lambda {|hsh, val| { val => val } } \ + .and_return({ 1 => 1, 2 => 2, 3 => 3 }) } + + it { is_expected.to run \ + .with_params([1,2,3]) \ + .with_lambda {|hsh, val| { val => val } unless val == 2} \ + .and_return({ 1 => 1, 3 => 3 }) } + + it { is_expected.to run \ + .with_params([1,2,3]) \ + .with_lambda {|hsh, val| raise StopIteration.new if val == 3; { val => val } } \ + .and_return({ 1 => 1, 2 => 2 }) } + + it { is_expected.to run \ + .with_params(['a', 'b', 'b', 'c', 'b']) \ + .with_lambda {|hsh, val| { val => (hsh[val] || 0) + 1 } } \ + .and_return({ 'a' => 1, 'b' => 3, 'c' => 1 }) } + + it { is_expected.to run \ + .with_params(['a', 'b', 'c']) \ + .with_lambda {|hsh, idx, val| { idx => val } } \ + .and_return({ 0 => 'a', 1 => 'b', 2 => 'c'}) } + + it { is_expected.to run \ + .with_params({'a' => 'A', 'b' => 'B', 'c' => 'C'}) \ + .with_lambda {|hsh, key, val| { key => "#{key}#{val}" } } \ + .and_return({ 'a' => 'aA', 'b' => 'bB', 'c' => 'cC'}) } + + end end From af96187a2ace4c0c09598a8a1060b0bd2b1ffc10 Mon Sep 17 00:00:00 2001 From: Henrik Lindberg <563066+hlindberg@users.noreply.github.com> Date: Wed, 20 Mar 2019 14:52:50 +0100 Subject: [PATCH 2/2] (maint) Fix stylistic issues --- lib/puppet/functions/merge.rb | 14 +++--- spec/functions/merge_spec.rb | 90 ++++++++++++++++++++++------------- 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/lib/puppet/functions/merge.rb b/lib/puppet/functions/merge.rb index f2002d318..62ddf99d0 100644 --- a/lib/puppet/functions/merge.rb +++ b/lib/puppet/functions/merge.rb @@ -36,10 +36,9 @@ # The iterative `merge()` has an advantage over doing the same with a general `reduce()` in that the constructed hash # does not have to be copied in each iteration and thus will perform much better with large inputs. # -Puppet::Functions.create_function(:'merge') do - +Puppet::Functions.create_function(:merge) do dispatch :merge2hashes do - repeated_param 'Variant[Hash, Undef, String[0,0]]', :args # this strange type is backwards compatible + repeated_param 'Variant[Hash, Undef, String[0,0]]', :args # this strange type is backwards compatible return_type 'Hash' end @@ -55,14 +54,13 @@ return_type 'Hash' end - def merge2hashes(*hashes) accumulator = {} - hashes.each {|h| accumulator.merge!(h) if h.is_a?(Hash)} + hashes.each { |h| accumulator.merge!(h) if h.is_a?(Hash) } accumulator end - def merge_iterable2(iterable, &block) + def merge_iterable2(iterable) accumulator = {} enum = Puppet::Pops::Types::Iterable.asserted_iterable(self, iterable) enum.each do |v| @@ -72,7 +70,7 @@ def merge_iterable2(iterable, &block) accumulator end - def merge_iterable3(iterable, &block) + def merge_iterable3(iterable) accumulator = {} enum = Puppet::Pops::Types::Iterable.asserted_iterable(self, iterable) if enum.hash_style? @@ -88,7 +86,7 @@ def merge_iterable3(iterable, &block) accumulator.merge!(r) if r.is_a?(Hash) index += 1 end - rescue StopIteration + rescue StopIteration # rubocop:disable Lint/HandleExceptions end end accumulator diff --git a/spec/functions/merge_spec.rb b/spec/functions/merge_spec.rb index e760bdd80..7bffaab85 100644 --- a/spec/functions/merge_spec.rb +++ b/spec/functions/merge_spec.rb @@ -2,9 +2,24 @@ describe 'merge' do it { is_expected.not_to eq(nil) } - it { is_expected.to run.with_params({}, 'two').and_raise_error(ArgumentError, Regexp.new(Regexp.escape("rejected: parameter 'args' expects a value of type Undef, Hash, or String[0, 0], got String"))) } - it { is_expected.to run.with_params({}, 1).and_raise_error(ArgumentError, %r{parameter 'args' expects a value of type Undef, Hash, or String, got Integer}) } - it { is_expected.to run.with_params({ 'one' => 1, 'three' => { 'four' => 4 } }, 'two' => 'dos', 'three' => { 'five' => 5 }).and_return('one' => 1, 'three' => { 'five' => 5 }, 'two' => 'dos') } + it { + is_expected.to run \ + .with_params({}, 'two') \ + .and_raise_error( + ArgumentError, \ + Regexp.new(Regexp.escape("rejected: parameter 'args' expects a value of type Undef, Hash, or String[0, 0], got String")), + ) + } + it { + is_expected.to run \ + .with_params({}, 1) \ + .and_raise_error(ArgumentError, %r{parameter 'args' expects a value of type Undef, Hash, or String, got Integer}) + } + it { + is_expected.to run \ + .with_params({ 'one' => 1, 'three' => { 'four' => 4 } }, 'two' => 'dos', 'three' => { 'five' => 5 }) \ + .and_return('one' => 1, 'three' => { 'five' => 5 }, 'two' => 'dos') + } it { is_expected.to run.with_params.and_return({}) } it { is_expected.to run.with_params({}).and_return({}) } @@ -14,6 +29,7 @@ describe 'should accept empty strings as puppet undef' do it { is_expected.to run.with_params({}, '').and_return({}) } end + it { is_expected.to run.with_params({ 'key' => 'value' }, {}).and_return('key' => 'value') } it { is_expected.to run.with_params({}, 'key' => 'value').and_return('key' => 'value') } it { is_expected.to run.with_params({ 'key' => 'value1' }, 'key' => 'value2').and_return('key' => 'value2') } @@ -23,36 +39,42 @@ .and_return('key1' => 'value1', 'key2' => 'value2', 'key3' => 'value3') } describe 'should accept iterable and merge produced hashes' do - - it { is_expected.to run \ - .with_params([1,2,3]) \ - .with_lambda {|hsh, val| { val => val } } \ - .and_return({ 1 => 1, 2 => 2, 3 => 3 }) } - - it { is_expected.to run \ - .with_params([1,2,3]) \ - .with_lambda {|hsh, val| { val => val } unless val == 2} \ - .and_return({ 1 => 1, 3 => 3 }) } - - it { is_expected.to run \ - .with_params([1,2,3]) \ - .with_lambda {|hsh, val| raise StopIteration.new if val == 3; { val => val } } \ - .and_return({ 1 => 1, 2 => 2 }) } - - it { is_expected.to run \ - .with_params(['a', 'b', 'b', 'c', 'b']) \ - .with_lambda {|hsh, val| { val => (hsh[val] || 0) + 1 } } \ - .and_return({ 'a' => 1, 'b' => 3, 'c' => 1 }) } - - it { is_expected.to run \ - .with_params(['a', 'b', 'c']) \ - .with_lambda {|hsh, idx, val| { idx => val } } \ - .and_return({ 0 => 'a', 1 => 'b', 2 => 'c'}) } - - it { is_expected.to run \ - .with_params({'a' => 'A', 'b' => 'B', 'c' => 'C'}) \ - .with_lambda {|hsh, key, val| { key => "#{key}#{val}" } } \ - .and_return({ 'a' => 'aA', 'b' => 'bB', 'c' => 'cC'}) } - + it { + is_expected.to run \ + .with_params([1, 2, 3]) \ + .with_lambda { |_hsh, val| { val => val } } \ + .and_return(1 => 1, 2 => 2, 3 => 3) + } + it { + is_expected.to run \ + .with_params([1, 2, 3]) \ + .with_lambda { |_hsh, val| { val => val } unless val == 2 } \ + .and_return(1 => 1, 3 => 3) + } + it { + is_expected.to run \ + .with_params([1, 2, 3]) \ + # rubocop:disable Style/Semicolon + .with_lambda { |_hsh, val| raise StopIteration if val == 3; { val => val } } \ + .and_return(1 => 1, 2 => 2) + } + it { + is_expected.to run \ + .with_params(['a', 'b', 'b', 'c', 'b']) \ + .with_lambda { |hsh, val| { val => (hsh[val] || 0) + 1 } } \ + .and_return('a' => 1, 'b' => 3, 'c' => 1) + } + it { + is_expected.to run \ + .with_params(['a', 'b', 'c']) \ + .with_lambda { |_hsh, idx, val| { idx => val } } \ + .and_return(0 => 'a', 1 => 'b', 2 => 'c') + } + it { + is_expected.to run \ + .with_params('a' => 'A', 'b' => 'B', 'c' => 'C') \ + .with_lambda { |_hsh, key, val| { key => "#{key}#{val}" } } \ + .and_return('a' => 'aA', 'b' => 'bB', 'c' => 'cC') + } end end