diff --git a/.editorconfig b/.editorconfig
index d43b16be5..78f4eb4b0 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -34,7 +34,7 @@ indent_size = 2
[metafacture-io/src/test/resources/org/metafacture/io/compressed.txt]
insert_final_newline = false
-[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test.txt]
+[metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test*.txt]
trim_trailing_whitespace = false
[metafacture-runner/src/main/dist/config/java-options.conf]
diff --git a/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java b/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
index 53aa28c03..0227d8228 100644
--- a/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
+++ b/metamorph/src/main/java/org/metafacture/metamorph/maps/FileMap.java
@@ -43,14 +43,19 @@
*
* The default {@link #setEncoding encoding} is UTF-8.
* The default {@link #setSeparator separator} is {@code \t}.
+ * The default {@link #setKeyColumn keyColumn} is {@code 0}.
+ * The default {@link #setValueColumn valueColumn} is {@code 1}.
*
- * By setting {@link #allowEmptyValues} to {@code true} the values in the
- * {@link Map} can be empty thus enabling e.g.
+ *
By setting {@link #setAllowEmptyValues allowEmptyValues} to {@code true},
+ * the values in the {@link Map} can be empty; thus enabling e.g.
* {@link org.metafacture.metamorph.functions.SetReplace} to remove matching
* keys.
*
- * Important: All other lines that are not split in two parts
- * by the separator are ignored!
+ *
By setting {@link #setExpectedColumns expectedColumns} to
+ * {@code -1}, the number of columns is not checked.
+ *
+ *
Important: Otherwise, all lines that are not split into
+ * the expected number of parts by the separator are ignored!
*
* @author Markus Michael Geipel
*/
@@ -59,10 +64,13 @@ public final class FileMap extends AbstractReadOnlyMap {
private final FileOpener fileOpener = new FileOpener();
private final Map map = new HashMap<>();
+ private ArrayList filenames = new ArrayList<>();
private Pattern split = Pattern.compile("\t", Pattern.LITERAL);
private boolean allowEmptyValues;
private boolean isUninitialized = true;
- private ArrayList filenames = new ArrayList<>();
+ private int expectedColumns;
+ private int keyColumn;
+ private int valueColumn = 1;
/**
* Creates an instance of {@link FileMap}.
@@ -79,7 +87,7 @@ private void init() {
* Sets whether to allow empty values in the {@link Map} or ignore these
* entries.
*
- * Default value: false
+ * Default value: false
*
* @param allowEmptyValues true if empty values in the Map are allowed
*/
@@ -87,6 +95,20 @@ public void setAllowEmptyValues(final boolean allowEmptyValues) {
this.allowEmptyValues = allowEmptyValues;
}
+ /**
+ * Sets number of expected columns; lines with different number of columns
+ * are ignored. Set to {@code -1} to disable the check and allow arbitrary
+ * number of columns.
+ *
+ * Default value: calculated from {@link #setKeyColumn key} and
+ * {@link #setValueColumn value} columns
+ *
+ * @param expectedColumns number of expected columns
+ */
+ public void setExpectedColumns(final int expectedColumns) {
+ this.expectedColumns = expectedColumns;
+ }
+
/**
* Sets a comma separated list of files which provides the {@link Map}.
*
@@ -141,14 +163,22 @@ private void loadFile(final String file) {
Reader reader = fileOpener.open(stream);
BufferedReader br = new BufferedReader(reader)
) {
+ final int minColumns = Math.max(keyColumn, valueColumn) + 1;
+ final int expColumns = expectedColumns != 0 ? expectedColumns : minColumns;
+
String line;
while ((line = br.readLine()) != null) {
if (line.isEmpty()) {
continue;
}
+
final String[] parts = allowEmptyValues ? split.split(line, -1) : split.split(line);
- if (parts.length == 2) {
- map.put(parts[0], parts[1]);
+ if (parts.length < minColumns) {
+ continue;
+ }
+
+ if (expColumns < 0 || parts.length == expColumns) {
+ map.put(parts[keyColumn], parts[valueColumn]);
}
}
}
@@ -205,6 +235,28 @@ public void setSeparator(final String delimiter) {
split = Pattern.compile(delimiter, Pattern.LITERAL);
}
+ /**
+ * Sets the key column (0-based).
+ *
+ * Default value: {@code 0}
+ *
+ * @param keyColumn the key column
+ */
+ public void setKeyColumn(final int keyColumn) {
+ this.keyColumn = keyColumn;
+ }
+
+ /**
+ * Sets the value column (0-based).
+ *
+ * Default value: {@code 1}
+ *
+ * @param valueColumn the value column
+ */
+ public void setValueColumn(final int valueColumn) {
+ this.valueColumn = valueColumn;
+ }
+
@Override
public String get(final Object key) {
if (isUninitialized) {
diff --git a/metamorph/src/main/resources/schemata/metamorph.xsd b/metamorph/src/main/resources/schemata/metamorph.xsd
index 0fe398e1e..953c4c031 100644
--- a/metamorph/src/main/resources/schemata/metamorph.xsd
+++ b/metamorph/src/main/resources/schemata/metamorph.xsd
@@ -629,6 +629,37 @@
The default separator is the tabulator.
+
+
+ Sets the key column (0-based).
+
+
+
+
+
+
+
+
+
+ Sets the value column (0-based).
+
+
+
+
+
+
+
+
+
+ Sets number of expected columns; set to -1 to disable
+ column check.
+
+
+
+
+
+
+
diff --git a/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java b/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java
index ee92795bf..ea5311f6e 100644
--- a/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java
+++ b/metamorph/src/test/java/org/metafacture/metamorph/maps/FileMapTest.java
@@ -18,6 +18,7 @@
import static org.metafacture.metamorph.TestHelpers.assertMorph;
+import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.metafacture.framework.StreamReceiver;
@@ -25,6 +26,8 @@
import org.mockito.junit.MockitoJUnit;
import org.mockito.junit.MockitoRule;
+import java.util.function.Consumer;
+
/**
* Tests for class {@link FileMap}.
*
@@ -39,6 +42,8 @@ public final class FileMapTest {
@Mock
private StreamReceiver receiver;
+ private static String MAPS = "org/metafacture/metamorph/maps/";
+
private static String MORPH =
"" +
" " +
@@ -46,7 +51,7 @@ public final class FileMapTest {
" " +
"" +
"" +
- " " +
+ " " +
"";
@Test
@@ -67,6 +72,24 @@ public void shouldLookupValuesInFileBasedMap() {
);
}
+ @Test
+ public void shouldLookupValuesInFileBasedMapWithColumnOptions() {
+ assertMorph(receiver, buildMorph("lookup in", "keyColumn=\"1\" valueColumn=\"0\" expectedColumns=\"2\""),
+ i -> {
+ i.startRecord("1");
+ i.literal("1", "Germany");
+ i.literal("1", "Fiji");
+ i.endRecord();
+ },
+ o -> {
+ o.get().startRecord("1");
+ o.get().literal("1", "gw");
+ o.get().literal("1", "fj");
+ o.get().endRecord();
+ }
+ );
+ }
+
@Test
public void shouldWhitelistValuesInFileBasedMap() {
assertMorph(receiver, buildMorph("whitelist map", ""),
@@ -206,6 +229,121 @@ public void shouldLookupValuesInBlockedGzipFileMap() {
);
}
+ @Test
+ public void shouldLoadFile() {
+ assertMap(379, i -> {
+ Assert.assertEquals("Puerto Rico", i.get("pr"));
+ Assert.assertNull(i.get("zz"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithEmptyValues() {
+ assertMap(380, i -> {
+ i.setAllowEmptyValues(true);
+
+ Assert.assertEquals("Puerto Rico", i.get("pr"));
+ Assert.assertEquals("", i.get("zz"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithSeparator() {
+ assertMap(99, i -> {
+ i.setSeparator(" ");
+
+ Assert.assertNull(i.get("pp\tPapua"));
+ Assert.assertEquals("Rico", i.get("pr\tPuerto"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithKeyColumn() {
+ assertMap(21, i -> {
+ i.setSeparator(" ");
+ i.setKeyColumn(2);
+
+ Assert.assertEquals("New", i.get("Guinea"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithValueColumn() {
+ assertMap(24, i -> {
+ i.setSeparator(" ");
+ i.setValueColumn(2);
+
+ Assert.assertEquals("Guinea", i.get("pp\tPapua"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithKeyAndValueColumn() {
+ assertMap(66, i -> {
+ i.setSeparator(" ");
+ i.setKeyColumn(1);
+ i.setValueColumn(0);
+
+ Assert.assertEquals("pr\tPuerto", i.get("Rico"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithExpectedColumns() {
+ assertMap(24, i -> {
+ i.setSeparator(" ");
+ i.setExpectedColumns(3);
+
+ Assert.assertEquals("New", i.get("pp\tPapua"));
+ });
+ }
+
+ @Test
+ public void shouldLoadFileWithArbitraryExpectedColumns() {
+ assertMap(149, i -> {
+ i.setSeparator(" ");
+ i.setExpectedColumns(-1);
+
+ Assert.assertEquals("New", i.get("pp\tPapua"));
+ });
+ }
+
+ @Test
+ public void shouldNotLoadFileWithOutOfRangeKeyColumn() {
+ assertMap(0, i -> {
+ i.setKeyColumn(2);
+ });
+ }
+
+ @Test
+ public void shouldNotLoadFileWithOutOfRangeValueColumn() {
+ assertMap(0, i -> {
+ i.setValueColumn(2);
+ });
+ }
+
+ @Test
+ public void shouldNotLoadFileWithTooFewExpectedColumns() {
+ assertMap(0, i -> {
+ i.setExpectedColumns(1);
+ });
+ }
+
+ @Test
+ public void shouldNotLoadFileWithTooManyExpectedColumns() {
+ assertMap(0, i -> {
+ i.setExpectedColumns(99);
+ });
+ }
+
+ private void assertMap(final int size, final Consumer consumer) {
+ final FileMap fileMap = new FileMap();
+ fileMap.setFile(MAPS + "file-map-test-columns.txt");
+
+ consumer.accept(fileMap);
+ Assert.assertEquals(size, fileMap.keySet().size());
+ }
+
private String buildMorph(final String data, final String options) {
return buildMorph(data, "file-map-test.txt", options);
}
diff --git a/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test-columns.txt b/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test-columns.txt
new file mode 100644
index 000000000..b9dcae10d
--- /dev/null
+++ b/metamorph/src/test/resources/org/metafacture/metamorph/maps/file-map-test-columns.txt
@@ -0,0 +1,381 @@
+aa Albania
+abc Alberta
+ac Ashmore and Cartier Islands
+aca Australian Capital Territory
+ae Algeria
+af Afghanistan
+ag Argentina
+ai Anguilla
+ai Armenia (Republic)
+air Armenian S.S.R.
+aj Azerbaijan
+ajr Azerbaijan S.S.R.
+aku Alaska
+alu Alabama
+am Anguilla
+an Andorra
+ao Angola
+aq Antigua and Barbuda
+aru Arkansas
+as American Samoa
+at Australia
+au Austria
+aw Aruba
+ay Antarctica
+azu Arizona
+ba Bahrain
+bb Barbados
+bcc British Columbia
+bd Burundi
+be Belgium
+bf Bahamas
+bg Bangladesh
+bh Belize
+bi British Indian Ocean Territory
+bl Brazil
+bm Bermuda Islands
+bn Bosnia and Hercegovina
+bo Bolivia
+bp Solomon Islands
+br Burma
+bs Botswana
+bt Bhutan
+bu Bulgaria
+bv Bouvet Island
+bw Belarus
+bwr Byelorussian S.S.R.
+bx Brunei
+ca Caribbean Netherlands
+cau California
+cb Cambodia
+cc China
+cd Chad
+ce SriLanka
+cf Congo (Brazzaville)
+cg Congo (Democratic Republic)
+ch China (Republic: 1949)
+ci Croatia
+cj Cayman Islands
+ck Colombia
+cl Chile
+cm Cameroon
+cn Canada
+co Curaçao
+cou Colorado
+cp Canton and Enderbury Islands
+cq Comoros
+cr Costa Rica
+cs Czechoslovakia
+ctu Connecticut
+cu Cuba
+cv Cape Verde
+cw Cook Islands
+cx Central African Republic
+cy Cyprus
+cz Canal Zone
+dcu District of Columbia
+deu Delaware
+dk Denmark
+dm Benin
+dq Dominica
+dr Dominican Republic
+ea Eritrea
+ec Ecuador
+eg Equatorial Guinea
+em Timor Leste
+enk England
+er Estonia
+err Estonia
+es El Salvador
+et Ethiopia
+fa Faroe Islands
+fg French Guiana
+fi Finland
+fj Fiji
+fk Falkland Islands
+flu Florida
+fm Micronesia (Federated States)
+fp French Polynesia
+fr France
+fs Terres australes et antarctiques françaises
+ft Djibouti
+gau Georgia
+gb Kiribati
+gd Grenada
+ge Germany (East)
+gh Ghana
+gi Gibraltar
+gl Greenland
+gm Gambia
+gn Gilbert and Ellice Islands
+go Gabon
+gp Guadeloupe
+gr Greece
+gs Georgia (Republic)
+gsr Georgian S.S.R.
+gt Guatemala
+gu Guam
+gv Guinea
+gw Germany
+gy Guyana
+gz Gaza Strip
+hiu Hawaii
+hk HongKong
+hm Heard and McDonald Islands
+ho Honduras
+ht Haiti
+hu Hungary
+iau Iowa
+ic Iceland
+idu Idaho
+ie Ireland
+ii India
+ilu Illinois
+inu Indiana
+io Indonesia
+iq Iraq
+ir Iran
+is Israel
+it Italy
+iu Israel Syria Demilitarized Zones
+iv Côte d'Ivoire
+iw Israel Jordan Demilitarized Zones
+iy Iraq Saudi Arabia Neutral Zone
+ja Japan
+ji Johnston Atoll
+jm Jamaica
+jn Jan Mayen
+jo Jordan
+ke Kenya
+kg Kyrgyzstan
+kgr Kirghiz S.S.R.
+kn Korea (North)
+ko Korea (South)
+ksu Kansas
+ku Kuwait
+kv Kosovo
+kyu Kentucky
+kz Kazakhstan
+kzr Kazakh S.S.R.
+lau Louisiana
+lb Liberia
+le Lebanon
+lh Liechtenstein
+li Lithuania
+lir Lithuania
+ln Central and Southern Line Islands
+lo Lesotho
+ls Laos
+lu Luxembourg
+lv Latvia
+lvr Latvia
+ly Libya
+mau Massachusetts
+mbc Manitoba
+mc Monaco
+mdu Maryland
+meu Maine
+mf Mauritius
+mg Madagascar
+mh Macao
+miu Michigan
+mj Montserrat
+mk Oman
+ml Mali
+mm Malta
+mnu Minnesota
+mo Montenegro
+mou Missouri
+mp Mongolia
+mq Martinique
+mr Morocco
+msu Mississippi
+mtu Montana
+mu Mauritania
+mv Moldova
+mvr Moldavian S.S.R.
+mw Malawi
+mx Mexico
+my Malaysia
+mz Mozambique
+na Netherlands Antilles
+nbu Nebraska
+ncu North Carolina
+ndu North Dakota
+ne Netherlands
+nfc Newfoundland and Labrador
+ng Niger
+nhu New Hampshire
+nik Northern Ireland
+nju New Jersey
+nkc New Brunswick
+nl New Caledonia
+nm Northern Mariana Islands
+nmu New Mexico
+nn Vanuatu
+no Norway
+np Nepal
+nq Nicaragua
+nr Nigeria
+nsc Nova Scotia
+ntc Northwest Territories
+nu Nauru
+nuc Nunavut
+nvu Nevada
+nw Northern Mariana Islands
+nx Norfolk Island
+nyu NewYork (State)
+nz New Zealand
+ohu Ohio
+oku Oklahoma
+onc Ontario
+oru Oregon
+ot Mayotte
+pau Pennsylvania
+pc Pitcairn Island
+pe Peru
+pf Paracel Islands
+pg Guinea Bissau
+ph Philippines
+pic Prince Edward Island
+pk Pakistan
+pl Poland
+pn Panama
+po Portugal
+pp Papua New Guinea
+pr Puerto Rico
+pt Portuguese Timor
+pw Palau
+py Paraguay
+qa Qatar
+qea Queensland
+quc Québec (Province)
+rb Serbia
+re Réunion
+rh Zimbabwe
+riu Rhode Island
+rm Romania
+ru Russia (Federation)
+rur Russian S.F.S.R.
+rw Rwanda
+ry Ryukyu Islands, Southern
+sa South Africa
+sb Svalbard
+sc Saint Barthélemy
+scu South Carolina
+sd South Sudan
+sdu South Dakota
+se Seychelles
+sf Sao Tome and Principe
+sg Senegal
+sh Spanish North Africa
+si Singapore
+sj Sudan
+sk Sikkim
+sl Sierra Leone
+sm San Marino
+sn Sint Maarten
+snc Saskatchewan
+so Somalia
+sp Spain
+sq Swaziland
+sr Surinam
+ss Western Sahara
+st Saint Martin
+stk Scotland
+su Saudi Arabia
+sv Swan Islands
+sw Sweden
+sx Namibia
+sy Syria
+sz Switzerland
+ta Tajikistan
+tar Tajik S.S.R.
+tc Turks and Caicos Islands
+tg Togo
+th Thailand
+ti Tunisia
+tk Turkmenistan
+tkr Turkmen S.S.R.
+tl Tokelau
+tma Tasmania
+tnu Tennessee
+to Tonga
+tr Trinidad and Tobago
+ts United Arab Emirates
+tt Trust Territory of the Pacific Islands
+tu Turkey
+tv Tuvalu
+txu Texas
+tz Tanzania
+ua Egypt
+uc United States Misc. Caribbean Islands
+ug Uganda
+ui United Kingdom Misc. Islands
+uik United Kingdom Misc. Islands
+uk United Kingdom
+un Ukraine
+unr Ukraine
+up United States Misc. Pacific Islands
+ur Soviet Union
+us United States
+utu Utah
+uv Burkina Faso
+uy Uruguay
+uz Uzbekistan
+uzr Uzbek S.S.R.
+vau Virginia
+vb British Virgin Islands
+vc Vatican City
+ve Venezuela
+vi Virgin Islands of the United States
+vm Vietnam
+vn Vietnam, North
+vp Various places
+vra Victoria
+vs Vietnam, South
+vtu Vermont
+wau Washington (State)
+wb West Berlin
+wea Western Australia
+wf Wallis and Futuna
+wiu Wisconsin
+wj West Bank of the Jordan River
+wk Wake Island
+wlk Wales
+ws Samoa
+wvu West Virginia
+wyu Wyoming
+xa Christmas Island (IndianOcean)
+xb Cocos (Keeling) Islands
+xc Maldives
+xd Saint Kitts Nevis
+xe Marshall Islands
+xf Midway Islands
+xga Coral Sea Islands Territory
+xh Niue
+xi Saint Kitts Nevis Anguilla
+xj Saint Helena
+xk Saint Lucia
+xl Saint Pierre and Miquelon
+xm Saint Vincent and the Grenadines
+xn Macedonia
+xna New South Wales
+xo Slovakia
+xoa Northern Territory
+xp Spratly Island
+xr Czech Republic
+xra South Australia
+xs South Georgia and the South Sandwich Islands
+xv Slovenia
+xx No place, unknown, or undetermined
+xxc Canada
+xxk United Kingdom
+xxr Soviet Union
+xxu United States
+ye Yemen
+ykc Yukon Territory
+ys Yemen (People's Democratic Republic)
+yu Serbia and Montenegro
+za Zambia
+zz