eBay · jondegenhardt · Jun 8, 2020 · Jun 7, 2020 · Jun 7, 2020 · Jun 7, 2020
diff --git a/common/src/tsv_utils/common/fieldlist.d b/common/src/tsv_utils/common/fieldlist.d
@@ -241,8 +241,12 @@ alias ConsumeEntireFieldListString = Flag!"consumeEntireFieldListString";
 
    The optional `cmdOptionString` and `headerCmdArg` arguments are used to generate better
    error messages. `cmdOptionString` should be the command line arguments string passed to
-   `std.getopt`. e.g `"f|field"`. The `headerCmdArg` argument should be the option for
-   turning on header line processing. Most tsv-utils tools can use the default value.
+   `std.getopt`. e.g `"f|field"`. This is added to the error message. Callers already
+   adding the option name to the error message should pass the empty string.
+
+   The `headerCmdArg` argument should be the option for turning on header line processing.
+   This is standard for tsv-utils tools (`--H|header`), so most tsv-utils tools will use
+   the default value.
 
    `parseFieldList` returns a reference range. This is so the `consumed` member function
    remains valid when using the range with facilities that would copy a value-based
@@ -1390,7 +1394,7 @@ private auto namedFieldGroupToRegex(const char[] fieldGroup)
             if (g[0] == HYPHEN)
             {
                 enforce(!hyphenSeparatorFound && regexString.data.length != 0,
-                        format("Hyphens in field names must be backslash escaped unless separating two field names: '%s'.\n",
+                        format("Hyphens in field names must be backslash escaped unless separating two field names: '%s'.",
                                fieldGroup));
 
                 assert(field1Regex.empty);
@@ -1418,7 +1422,7 @@ private auto namedFieldGroupToRegex(const char[] fieldGroup)
         }
     }
     enforce(!hyphenSeparatorFound || regexString.data.length != 0,
-            format("Hyphens in field names must be backslash escaped unless separating two field names: '%s'.\n",
+            format("Hyphens in field names must be backslash escaped unless separating two field names: '%s'.",
                    fieldGroup));
 
     if (!hyphenSeparatorFound)

diff --git a/tsv-join/src/tsv_utils/tsv-join.d b/tsv-join/src/tsv_utils/tsv-join.d
@@ -16,6 +16,7 @@ module tsv_utils.tsv_join;
 import std.exception : enforce;
 import std.stdio;
 import std.format : format;
+import std.range;
 import std.typecons : tuple;
 
 auto helpText = q"EOS
@@ -70,12 +71,12 @@ struct TsvJoinOptions
     /* Data available the main program. Variables used only command line argument
      * processing are local to processArgs.
      */
-    string programName;
-    InputSourceRange inputSources;     // Input Files
-    ByLineSourceRange!() filterSource; // Derived: --filter
-    size_t[] keyFields;                // --key-fields
-    size_t[] dataFields;               // --data-fields
-    size_t[] appendFields;             // --append-fields
+    string programName;                /// Program name
+    InputSourceRange inputSources;     /// Input Files
+    ByLineSourceRange!() filterSource; /// Derived: --filter
+    size_t[] keyFields;                /// Derived: --key-fields
+    size_t[] dataFields;               /// Derived: --data-fields
+    size_t[] appendFields;             /// Derived: --append-fields
     bool hasHeader = false;            // --H|header
     string appendHeaderPrefix = "";    // --append-header-prefix
     bool writeAll = false;             // --write-all
@@ -97,14 +98,23 @@ struct TsvJoinOptions
      */
     auto processArgs (ref string[] cmdArgs)
     {
+        import std.array : split;
+        import std.conv : to;
         import std.getopt;
         import std.path : baseName, stripExtension;
         import std.typecons : Yes, No;
-        import tsv_utils.common.fieldlist :  makeFieldListOptionHandler;
+        import tsv_utils.common.fieldlist;
 
-        string filterFile;               // --filter
         bool helpVerbose = false;        // --help-verbose
         bool versionWanted = false;      // --V|version
+        string filterFile;               // --filter
+        string keyFieldsArg;             // --key-fields
+        string dataFieldsArg;            // --data-fields
+        string appendFieldsArg;          // --append-fields
+
+        string keyFieldsOptionString = "k|key-fields";
+        string dataFieldsOptionString = "d|data-fields";
+        string appendFieldsOptionString = "a|append-fields";
 
         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
 
@@ -124,14 +134,17 @@ struct TsvJoinOptions
                 "help-verbose",    "              Print full help.", &helpVerbose,
                 "f|filter-file",   "FILE          (Required) File with records to use as a filter.", &filterFile,
 
-                "k|key-fields",    "<field-list>  Fields to use as join key. Default: 0 (entire line).",
-                keyFields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero),
+                keyFieldsOptionString,
+                "<field-list>  Fields to use as join key. Default: 0 (entire line).",
+                &keyFieldsArg,
 
-                "d|data-fields",   "<field-list>  Data record fields to use as join key, if different than --key-fields.",
-                dataFields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero),
+                dataFieldsOptionString,
+                "<field-list>  Data record fields to use as join key, if different than --key-fields.",
+                &dataFieldsArg,
 
-                "a|append-fields", "<field-list>  Filter fields to append to matched records.",
-                appendFields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero),
+                appendFieldsOptionString,
+                "<field-list>  Filter fields to append to matched records.",
+                &appendFieldsArg,
 
                 std.getopt.config.caseSensitive,
                 "H|header",        "              Treat the first line of each file as a header.", &hasHeader,
@@ -181,6 +194,64 @@ struct TsvJoinOptions
             ReadHeader readHeader = hasHeader ? Yes.readHeader : No.readHeader;
             inputSources = inputSourceRange(filepaths, readHeader);
 
+            /* Field-list args (--k|key-fields, --d|data-fields --a|append-fields are
+             * parsed after header lines from the filter file and first data file have
+             * been read. The files were opened in the previous step, when setting up
+             * the 'filterSource' and 'inputSources' ranges.
+             *
+             * The field-list parsing step translates any named fields to one-based
+             * field numbers. Note that a named field may have different field
+             * numbers in the filter file and data files.
+             *
+             * The 'derivations()' method works off the one-based indices, converting
+             * them to zero-based. It also handles the full-line cases.
+             */
+
+            string[] filterFileHeaderFields;
+            string[] inputSourceHeaderFields;
+
+            if (hasHeader && !filterSource.front.byLine.empty)
+            {
+                filterFileHeaderFields = filterSource.front.byLine.front.split(delim).to!(string[]);
+            }
+
+            if (hasHeader) inputSourceHeaderFields = inputSources.front.header.split(delim).to!(string[]);
+
+            if (!keyFieldsArg.empty)
+            {
+                keyFields =
+                    keyFieldsArg
+                    .parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)
+                    (hasHeader, filterFileHeaderFields, keyFieldsOptionString)
+                    .array;
+            }
+
+            if (!dataFieldsArg.empty)
+            {
+                dataFields =
+                    dataFieldsArg
+                    .parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)
+                    (hasHeader, inputSourceHeaderFields, dataFieldsOptionString)
+                    .array;
+            }
+            else if (!keyFieldsArg.empty)
+            {
+                dataFields =
+                    keyFieldsArg
+                    .parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)
+                    (hasHeader, inputSourceHeaderFields, dataFieldsOptionString)
+                    .array;
+            }
+
+            if (!appendFieldsArg.empty)
+            {
+                appendFields =
+                    appendFieldsArg
+                    .parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)
+                    (hasHeader, filterFileHeaderFields, appendFieldsOptionString)
+                    .array;
+            }
+
             consistencyValidations(cmdArgs);
             derivations();
         }