diff --git a/.editorconfig b/.editorconfig index d43b16be5..78f4eb4b0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -34,7 +34,7 @@ indent_size = 2 [metafacture-io/src/test/resources/org/metafacture/io/compressed.txt] insert_final_newline = false -[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt] +[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test*.txt] trim_trailing_whitespace = false [metafacture-runner/src/main/dist/config/java-options.conf] diff --git a/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java b/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java index 53aa28c03..0227d8228 100644 --- a/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java +++ b/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java @@ -43,14 +43,19 @@ * * The default {@link #setEncoding encoding} is UTF-8. * The default {@link #setSeparator separator} is {@code \t}. + * The default {@link #setKeyColumn keyColumn} is {@code 0}. + * The default {@link #setValueColumn valueColumn} is {@code 1}. * - * By setting {@link #allowEmptyValues} to {@code true} the values in the - * {@link Map} can be empty thus enabling e.g. + *

By setting {@link #setAllowEmptyValues allowEmptyValues} to {@code true}, + * the values in the {@link Map} can be empty; thus enabling e.g. * {@link org.metafacture.metamorph.functions.SetReplace} to remove matching * keys. * - * Important: All other lines that are not split in two parts - * by the separator are ignored! + *

By setting {@link #setExpectedColumns expectedColumns} to + * {@code -1}, the number of columns is not checked. + * + *

Important: Otherwise, all lines that are not split into + * the expected number of parts by the separator are ignored! * * @author Markus Michael Geipel */ @@ -59,10 +64,13 @@ public final class FileMap extends AbstractReadOnlyMap { private final FileOpener fileOpener = new FileOpener(); private final Map map = new HashMap<>(); + private ArrayList filenames = new ArrayList<>(); private Pattern split = Pattern.compile("\t", Pattern.LITERAL); private boolean allowEmptyValues; private boolean isUninitialized = true; - private ArrayList filenames = new ArrayList<>(); + private int expectedColumns; + private int keyColumn; + private int valueColumn = 1; /** * Creates an instance of {@link FileMap}. @@ -79,7 +87,7 @@ private void init() { * Sets whether to allow empty values in the {@link Map} or ignore these * entries. * - * Default value: false + * Default value: false * * @param allowEmptyValues true if empty values in the Map are allowed */ @@ -87,6 +95,20 @@ public void setAllowEmptyValues(final boolean allowEmptyValues) { this.allowEmptyValues = allowEmptyValues; } + /** + * Sets number of expected columns; lines with different number of columns + * are ignored. Set to {@code -1} to disable the check and allow arbitrary + * number of columns. + * + * Default value: calculated from {@link #setKeyColumn key} and + * {@link #setValueColumn value} columns + * + * @param expectedColumns number of expected columns + */ + public void setExpectedColumns(final int expectedColumns) { + this.expectedColumns = expectedColumns; + } + /** * Sets a comma separated list of files which provides the {@link Map}. * @@ -141,14 +163,22 @@ private void loadFile(final String file) { Reader reader = fileOpener.open(stream); BufferedReader br = new BufferedReader(reader) ) { + final int minColumns = Math.max(keyColumn, valueColumn) + 1; + final int expColumns = expectedColumns != 0 ? expectedColumns : minColumns; + String line; while ((line = br.readLine()) != null) { if (line.isEmpty()) { continue; } + final String[] parts = allowEmptyValues ? split.split(line, -1) : split.split(line); - if (parts.length == 2) { - map.put(parts[0], parts[1]); + if (parts.length < minColumns) { + continue; + } + + if (expColumns < 0 || parts.length == expColumns) { + map.put(parts[keyColumn], parts[valueColumn]); } } } @@ -205,6 +235,28 @@ public void setSeparator(final String delimiter) { split = Pattern.compile(delimiter, Pattern.LITERAL); } + /** + * Sets the key column (0-based). + * + * Default value: {@code 0} + * + * @param keyColumn the key column + */ + public void setKeyColumn(final int keyColumn) { + this.keyColumn = keyColumn; + } + + /** + * Sets the value column (0-based). + * + * Default value: {@code 1} + * + * @param valueColumn the value column + */ + public void setValueColumn(final int valueColumn) { + this.valueColumn = valueColumn; + } + @Override public String get(final Object key) { if (isUninitialized) { diff --git a/metamorph/src/main/resources/schemata/metamorph.xsd b/metamorph/src/main/resources/schemata/metamorph.xsd index 0fe398e1e..953c4c031 100644 --- a/metamorph/src/main/resources/schemata/metamorph.xsd +++ b/metamorph/src/main/resources/schemata/metamorph.xsd @@ -629,6 +629,37 @@ The default separator is the tabulator. + + + Sets the key column (0-based). + + + + + + + + + + Sets the value column (0-based). + + + + + + + + + + Sets number of expected columns; set to -1 to disable + column check. + + + + + + + diff --git a/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java b/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java index ee92795bf..ea5311f6e 100644 --- a/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java +++ b/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java @@ -18,6 +18,7 @@ import static org.metafacture.metamorph.TestHelpers.assertMorph; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.metafacture.framework.StreamReceiver; @@ -25,6 +26,8 @@ import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; +import java.util.function.Consumer; + /** * Tests for class {@link FileMap}. * @@ -39,6 +42,8 @@ public final class FileMapTest { @Mock private StreamReceiver receiver; + private static String MAPS = "org/metafacture/metamorph/maps/"; + private static String MORPH = "" + " " + @@ -46,7 +51,7 @@ public final class FileMapTest { " " + "" + "" + - " " + + " " + ""; @Test @@ -67,6 +72,24 @@ public void shouldLookupValuesInFileBasedMap() { ); } + @Test + public void shouldLookupValuesInFileBasedMapWithColumnOptions() { + assertMorph(receiver, buildMorph("lookup in", "keyColumn=\"1\" valueColumn=\"0\" expectedColumns=\"2\""), + i -> { + i.startRecord("1"); + i.literal("1", "Germany"); + i.literal("1", "Fiji"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("1", "gw"); + o.get().literal("1", "fj"); + o.get().endRecord(); + } + ); + } + @Test public void shouldWhitelistValuesInFileBasedMap() { assertMorph(receiver, buildMorph("whitelist map", ""), @@ -206,6 +229,121 @@ public void shouldLookupValuesInBlockedGzipFileMap() { ); } + @Test + public void shouldLoadFile() { + assertMap(379, i -> { + Assert.assertEquals("Puerto Rico", i.get("pr")); + Assert.assertNull(i.get("zz")); + }); + } + + @Test + public void shouldLoadFileWithEmptyValues() { + assertMap(380, i -> { + i.setAllowEmptyValues(true); + + Assert.assertEquals("Puerto Rico", i.get("pr")); + Assert.assertEquals("", i.get("zz")); + }); + } + + @Test + public void shouldLoadFileWithSeparator() { + assertMap(99, i -> { + i.setSeparator(" "); + + Assert.assertNull(i.get("pp\tPapua")); + Assert.assertEquals("Rico", i.get("pr\tPuerto")); + }); + } + + @Test + public void shouldLoadFileWithKeyColumn() { + assertMap(21, i -> { + i.setSeparator(" "); + i.setKeyColumn(2); + + Assert.assertEquals("New", i.get("Guinea")); + }); + } + + @Test + public void shouldLoadFileWithValueColumn() { + assertMap(24, i -> { + i.setSeparator(" "); + i.setValueColumn(2); + + Assert.assertEquals("Guinea", i.get("pp\tPapua")); + }); + } + + @Test + public void shouldLoadFileWithKeyAndValueColumn() { + assertMap(66, i -> { + i.setSeparator(" "); + i.setKeyColumn(1); + i.setValueColumn(0); + + Assert.assertEquals("pr\tPuerto", i.get("Rico")); + }); + } + + @Test + public void shouldLoadFileWithExpectedColumns() { + assertMap(24, i -> { + i.setSeparator(" "); + i.setExpectedColumns(3); + + Assert.assertEquals("New", i.get("pp\tPapua")); + }); + } + + @Test + public void shouldLoadFileWithArbitraryExpectedColumns() { + assertMap(149, i -> { + i.setSeparator(" "); + i.setExpectedColumns(-1); + + Assert.assertEquals("New", i.get("pp\tPapua")); + }); + } + + @Test + public void shouldNotLoadFileWithOutOfRangeKeyColumn() { + assertMap(0, i -> { + i.setKeyColumn(2); + }); + } + + @Test + public void shouldNotLoadFileWithOutOfRangeValueColumn() { + assertMap(0, i -> { + i.setValueColumn(2); + }); + } + + @Test + public void shouldNotLoadFileWithTooFewExpectedColumns() { + assertMap(0, i -> { + i.setExpectedColumns(1); + }); + } + + @Test + public void shouldNotLoadFileWithTooManyExpectedColumns() { + assertMap(0, i -> { + i.setExpectedColumns(99); + }); + } + + private void assertMap(final int size, final Consumer consumer) { + final FileMap fileMap = new FileMap(); + fileMap.setFile(MAPS + "file-map-test-columns.txt"); + + consumer.accept(fileMap); + Assert.assertEquals(size, fileMap.keySet().size()); + } + private String buildMorph(final String data, final String options) { return buildMorph(data, "file-map-test.txt", options); } diff --git a/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test-columns.txt b/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test-columns.txt new file mode 100644 index 000000000..b9dcae10d --- /dev/null +++ b/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test-columns.txt @@ -0,0 +1,381 @@ +aa Albania +abc Alberta +ac Ashmore and Cartier Islands +aca Australian Capital Territory +ae Algeria +af Afghanistan +ag Argentina +ai Anguilla +ai Armenia (Republic) +air Armenian S.S.R. +aj Azerbaijan +ajr Azerbaijan S.S.R. +aku Alaska +alu Alabama +am Anguilla +an Andorra +ao Angola +aq Antigua and Barbuda +aru Arkansas +as American Samoa +at Australia +au Austria +aw Aruba +ay Antarctica +azu Arizona +ba Bahrain +bb Barbados +bcc British Columbia +bd Burundi +be Belgium +bf Bahamas +bg Bangladesh +bh Belize +bi British Indian Ocean Territory +bl Brazil +bm Bermuda Islands +bn Bosnia and Hercegovina +bo Bolivia +bp Solomon Islands +br Burma +bs Botswana +bt Bhutan +bu Bulgaria +bv Bouvet Island +bw Belarus +bwr Byelorussian S.S.R. +bx Brunei +ca Caribbean Netherlands +cau California +cb Cambodia +cc China +cd Chad +ce SriLanka +cf Congo (Brazzaville) +cg Congo (Democratic Republic) +ch China (Republic: 1949) +ci Croatia +cj Cayman Islands +ck Colombia +cl Chile +cm Cameroon +cn Canada +co Curaçao +cou Colorado +cp Canton and Enderbury Islands +cq Comoros +cr Costa Rica +cs Czechoslovakia +ctu Connecticut +cu Cuba +cv Cape Verde +cw Cook Islands +cx Central African Republic +cy Cyprus +cz Canal Zone +dcu District of Columbia +deu Delaware +dk Denmark +dm Benin +dq Dominica +dr Dominican Republic +ea Eritrea +ec Ecuador +eg Equatorial Guinea +em Timor Leste +enk England +er Estonia +err Estonia +es El Salvador +et Ethiopia +fa Faroe Islands +fg French Guiana +fi Finland +fj Fiji +fk Falkland Islands +flu Florida +fm Micronesia (Federated States) +fp French Polynesia +fr France +fs Terres australes et antarctiques françaises +ft Djibouti +gau Georgia +gb Kiribati +gd Grenada +ge Germany (East) +gh Ghana +gi Gibraltar +gl Greenland +gm Gambia +gn Gilbert and Ellice Islands +go Gabon +gp Guadeloupe +gr Greece +gs Georgia (Republic) +gsr Georgian S.S.R. +gt Guatemala +gu Guam +gv Guinea +gw Germany +gy Guyana +gz Gaza Strip +hiu Hawaii +hk HongKong +hm Heard and McDonald Islands +ho Honduras +ht Haiti +hu Hungary +iau Iowa +ic Iceland +idu Idaho +ie Ireland +ii India +ilu Illinois +inu Indiana +io Indonesia +iq Iraq +ir Iran +is Israel +it Italy +iu Israel Syria Demilitarized Zones +iv Côte d'Ivoire +iw Israel Jordan Demilitarized Zones +iy Iraq Saudi Arabia Neutral Zone +ja Japan +ji Johnston Atoll +jm Jamaica +jn Jan Mayen +jo Jordan +ke Kenya +kg Kyrgyzstan +kgr Kirghiz S.S.R. +kn Korea (North) +ko Korea (South) +ksu Kansas +ku Kuwait +kv Kosovo +kyu Kentucky +kz Kazakhstan +kzr Kazakh S.S.R. +lau Louisiana +lb Liberia +le Lebanon +lh Liechtenstein +li Lithuania +lir Lithuania +ln Central and Southern Line Islands +lo Lesotho +ls Laos +lu Luxembourg +lv Latvia +lvr Latvia +ly Libya +mau Massachusetts +mbc Manitoba +mc Monaco +mdu Maryland +meu Maine +mf Mauritius +mg Madagascar +mh Macao +miu Michigan +mj Montserrat +mk Oman +ml Mali +mm Malta +mnu Minnesota +mo Montenegro +mou Missouri +mp Mongolia +mq Martinique +mr Morocco +msu Mississippi +mtu Montana +mu Mauritania +mv Moldova +mvr Moldavian S.S.R. +mw Malawi +mx Mexico +my Malaysia +mz Mozambique +na Netherlands Antilles +nbu Nebraska +ncu North Carolina +ndu North Dakota +ne Netherlands +nfc Newfoundland and Labrador +ng Niger +nhu New Hampshire +nik Northern Ireland +nju New Jersey +nkc New Brunswick +nl New Caledonia +nm Northern Mariana Islands +nmu New Mexico +nn Vanuatu +no Norway +np Nepal +nq Nicaragua +nr Nigeria +nsc Nova Scotia +ntc Northwest Territories +nu Nauru +nuc Nunavut +nvu Nevada +nw Northern Mariana Islands +nx Norfolk Island +nyu NewYork (State) +nz New Zealand +ohu Ohio +oku Oklahoma +onc Ontario +oru Oregon +ot Mayotte +pau Pennsylvania +pc Pitcairn Island +pe Peru +pf Paracel Islands +pg Guinea Bissau +ph Philippines +pic Prince Edward Island +pk Pakistan +pl Poland +pn Panama +po Portugal +pp Papua New Guinea +pr Puerto Rico +pt Portuguese Timor +pw Palau +py Paraguay +qa Qatar +qea Queensland +quc Québec (Province) +rb Serbia +re Réunion +rh Zimbabwe +riu Rhode Island +rm Romania +ru Russia (Federation) +rur Russian S.F.S.R. +rw Rwanda +ry Ryukyu Islands, Southern +sa South Africa +sb Svalbard +sc Saint Barthélemy +scu South Carolina +sd South Sudan +sdu South Dakota +se Seychelles +sf Sao Tome and Principe +sg Senegal +sh Spanish North Africa +si Singapore +sj Sudan +sk Sikkim +sl Sierra Leone +sm San Marino +sn Sint Maarten +snc Saskatchewan +so Somalia +sp Spain +sq Swaziland +sr Surinam +ss Western Sahara +st Saint Martin +stk Scotland +su Saudi Arabia +sv Swan Islands +sw Sweden +sx Namibia +sy Syria +sz Switzerland +ta Tajikistan +tar Tajik S.S.R. +tc Turks and Caicos Islands +tg Togo +th Thailand +ti Tunisia +tk Turkmenistan +tkr Turkmen S.S.R. +tl Tokelau +tma Tasmania +tnu Tennessee +to Tonga +tr Trinidad and Tobago +ts United Arab Emirates +tt Trust Territory of the Pacific Islands +tu Turkey +tv Tuvalu +txu Texas +tz Tanzania +ua Egypt +uc United States Misc. Caribbean Islands +ug Uganda +ui United Kingdom Misc. Islands +uik United Kingdom Misc. Islands +uk United Kingdom +un Ukraine +unr Ukraine +up United States Misc. Pacific Islands +ur Soviet Union +us United States +utu Utah +uv Burkina Faso +uy Uruguay +uz Uzbekistan +uzr Uzbek S.S.R. +vau Virginia +vb British Virgin Islands +vc Vatican City +ve Venezuela +vi Virgin Islands of the United States +vm Vietnam +vn Vietnam, North +vp Various places +vra Victoria +vs Vietnam, South +vtu Vermont +wau Washington (State) +wb West Berlin +wea Western Australia +wf Wallis and Futuna +wiu Wisconsin +wj West Bank of the Jordan River +wk Wake Island +wlk Wales +ws Samoa +wvu West Virginia +wyu Wyoming +xa Christmas Island (IndianOcean) +xb Cocos (Keeling) Islands +xc Maldives +xd Saint Kitts Nevis +xe Marshall Islands +xf Midway Islands +xga Coral Sea Islands Territory +xh Niue +xi Saint Kitts Nevis Anguilla +xj Saint Helena +xk Saint Lucia +xl Saint Pierre and Miquelon +xm Saint Vincent and the Grenadines +xn Macedonia +xna New South Wales +xo Slovakia +xoa Northern Territory +xp Spratly Island +xr Czech Republic +xra South Australia +xs South Georgia and the South Sandwich Islands +xv Slovenia +xx No place, unknown, or undetermined +xxc Canada +xxk United Kingdom +xxr Soviet Union +xxu United States +ye Yemen +ykc Yukon Territory +ys Yemen (People's Democratic Republic) +yu Serbia and Montenegro +za Zambia +zz