Command line argument parsing improvements (#1048)

* Support negative numbers as command option values
* Support command line options before arguments
* POSIX-compliant handling of quotes (double and single, terminated and unterminated), whitespace, hyphens, and special characters (e.g. emojis)
This commit is contained in:
Frank Ray
2022-12-05 00:07:53 +00:00
committed by GitHub
parent f895bb175d
commit b793482ebb
14 changed files with 812 additions and 326 deletions

View File

@ -308,19 +308,35 @@ internal class CommandTreeParser
{
// Is this a command?
if (current.Command.FindCommand(valueToken.Value, CaseSensitivity) == null)
{
if (parameter != null)
{
if (parameter.ParameterKind == ParameterKind.Flag)
{
if (!CliConstants.AcceptedBooleanValues.Contains(valueToken.Value, StringComparer.OrdinalIgnoreCase))
{
// Flags cannot be assigned a value.
throw CommandParseException.CannotAssignValueToFlag(context.Arguments, token);
}
}
value = stream.Consume(CommandTreeToken.Kind.String)?.Value;
{
if (parameter != null)
{
if (parameter.ParameterKind == ParameterKind.Flag)
{
if (!CliConstants.AcceptedBooleanValues.Contains(valueToken.Value, StringComparer.OrdinalIgnoreCase))
{
if (!valueToken.HadSeparator)
{
// Do nothing
// - assume valueToken is unrelated to the flag parameter (ie. we've parsed it unnecessarily)
// - rely on the "No value?" code below to set the flag to its default value
// - valueToken will be handled on the next pass of the parser
}
else
{
// Flags cannot be assigned a value.
throw CommandParseException.CannotAssignValueToFlag(context.Arguments, token);
}
}
else
{
value = stream.Consume(CommandTreeToken.Kind.String)?.Value;
}
}
else
{
value = stream.Consume(CommandTreeToken.Kind.String)?.Value;
}
}
else
{

View File

@ -6,7 +6,12 @@ internal sealed class CommandTreeToken
public int Position { get; }
public string Value { get; }
public string Representation { get; }
public bool IsGrouped { get; set; }
public bool IsGrouped { get; set; }
/// <summary>
/// Gets or sets a value indicating whether a separater was encountered immediately before the <see cref="CommandTreeToken.Value"/>.
/// </summary>
public bool HadSeparator { get; set; }
public enum Kind
{

View File

@ -29,7 +29,14 @@ internal static class CommandTreeTokenizer
var context = new CommandTreeTokenizerContext();
foreach (var arg in args)
{
{
if (string.IsNullOrEmpty(arg))
{
// Null strings in the args array are still represented as tokens
tokens.Add(new CommandTreeToken(CommandTreeToken.Kind.String, position, string.Empty, string.Empty));
continue;
}
var start = position;
var reader = new TextBuffer(previousReader, arg);
@ -48,39 +55,30 @@ internal static class CommandTreeTokenizer
}
private static int ParseToken(CommandTreeTokenizerContext context, TextBuffer reader, int position, int start, List<CommandTreeToken> tokens)
{
while (reader.Peek() != -1)
{
if (reader.ReachedEnd)
{
position += reader.Position - start;
break;
}
var character = reader.Peek();
// Eat whitespace
if (char.IsWhiteSpace(character))
{
reader.Consume();
continue;
}
if (character == '-')
{
// Option
tokens.AddRange(ScanOptions(context, reader));
}
else
{
// Command or argument
tokens.Add(ScanString(context, reader));
}
// Flush remaining tokens
context.FlushRemaining();
}
{
if (!reader.ReachedEnd && reader.Peek() == '-')
{
// Option
tokens.AddRange(ScanOptions(context, reader));
}
else
{
// Command or argument
while (reader.Peek() != -1)
{
if (reader.ReachedEnd)
{
position += reader.Position - start;
break;
}
tokens.Add(ScanString(context, reader));
// Flush remaining tokens
context.FlushRemaining();
}
}
return position;
}
@ -89,15 +87,6 @@ internal static class CommandTreeTokenizer
TextBuffer reader,
char[]? stop = null)
{
if (reader.TryPeek(out var character))
{
// Is this a quoted string?
if (character == '\"')
{
return ScanQuotedString(context, reader);
}
}
var position = reader.Position;
var builder = new StringBuilder();
while (!reader.ReachedEnd)
@ -113,48 +102,8 @@ internal static class CommandTreeTokenizer
builder.Append(current);
}
var value = builder.ToString();
return new CommandTreeToken(CommandTreeToken.Kind.String, position, value.Trim(), value);
}
private static CommandTreeToken ScanQuotedString(CommandTreeTokenizerContext context, TextBuffer reader)
{
var position = reader.Position;
context.FlushRemaining();
reader.Consume('\"');
var builder = new StringBuilder();
var terminated = false;
while (!reader.ReachedEnd)
{
var character = reader.Peek();
if (character == '\"')
{
terminated = true;
reader.Read();
break;
}
builder.Append(reader.Read());
}
if (!terminated)
{
var unterminatedQuote = builder.ToString();
var token = new CommandTreeToken(CommandTreeToken.Kind.String, position, unterminatedQuote, $"\"{unterminatedQuote}");
throw CommandParseException.UnterminatedQuote(reader.Original, token);
}
var quotedString = builder.ToString();
// Add to the context
context.AddRemaining(quotedString);
return new CommandTreeToken(
CommandTreeToken.Kind.String,
position, quotedString,
quotedString);
var value = builder.ToString();
return new CommandTreeToken(CommandTreeToken.Kind.String, position, value, value);
}
private static IEnumerable<CommandTreeToken> ScanOptions(CommandTreeTokenizerContext context, TextBuffer reader)
@ -166,7 +115,7 @@ internal static class CommandTreeTokenizer
reader.Consume('-');
context.AddRemaining('-');
if (!reader.TryPeek(out var character))
if (!reader.TryPeek(out var character) || character == ' ')
{
var token = new CommandTreeToken(CommandTreeToken.Kind.ShortOption, position, "-", "-");
throw CommandParseException.OptionHasNoName(reader.Original, token);
@ -200,8 +149,10 @@ internal static class CommandTreeTokenizer
var token = new CommandTreeToken(CommandTreeToken.Kind.String, reader.Position, "=", "=");
throw CommandParseException.OptionValueWasExpected(reader.Original, token);
}
result.Add(ScanString(context, reader));
var tokenValue = ScanString(context, reader);
tokenValue.HadSeparator = true;
result.Add(tokenValue);
}
}
@ -235,12 +186,38 @@ internal static class CommandTreeTokenizer
? new CommandTreeToken(CommandTreeToken.Kind.ShortOption, position, value, $"-{value}")
: new CommandTreeToken(CommandTreeToken.Kind.ShortOption, position + result.Count, value, value));
}
else
else if (result.Count == 0 && char.IsDigit(current))
{
// We require short options to be named with letters. Short options that start with a number
// ("-1", "-2ab", "-3..7") may actually mean values (either for options or arguments) and will
// be tokenized as strings. This block handles parsing those cases, but we only allow this
// when the digit is the first character in the token (i.e. "-a1" is always an error), hence the
// result.Count == 0 check above.
string value = string.Empty;
while (!reader.ReachedEnd)
{
char c = reader.Peek();
if (char.IsWhiteSpace(c))
{
break;
}
value += c.ToString(CultureInfo.InvariantCulture);
reader.Read();
}
value = "-" + value; // Prefix with the minus sign that we originally thought to mean a short option
result.Add(new CommandTreeToken(CommandTreeToken.Kind.String, position, value, value));
}
else
{
// Create a token representing the short option.
var tokenPosition = position + 1 + result.Count;
var represntation = current.ToString(CultureInfo.InvariantCulture);
var token = new CommandTreeToken(CommandTreeToken.Kind.ShortOption, tokenPosition, represntation, represntation);
var representation = current.ToString(CultureInfo.InvariantCulture);
var tokenPosition = position + 1 + result.Count;
var token = new CommandTreeToken(CommandTreeToken.Kind.ShortOption, tokenPosition, representation, representation);
throw CommandParseException.InvalidShortOptionName(reader.Original, token);
}
}
@ -271,7 +248,7 @@ internal static class CommandTreeTokenizer
var name = ScanString(context, reader, new[] { '=', ':' });
// Perform validation of the name.
if (name.Value.Length == 0)
if (name.Value == " ")
{
throw CommandParseException.LongOptionNameIsMissing(reader, position);
}