TokenizerHelper.cs 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. //---------------------------------------------------------------------------
  2. //
  3. // Copyright (C) Microsoft Corporation. All rights reserved.
  4. //
  5. // File: TokenizerHelper.cs
  6. //
  7. // Description: This file contains the implementation of TokenizerHelper.
  8. // This class should be used by most - if not all - MIL parsers.
  9. //
  10. // History:
  11. // 05/19/2003 : Microsoft - Created it
  12. // 05/20/2003 : Microsoft - Moved to Shared
  13. //
  14. //---------------------------------------------------------------------------
  15. using System;
  16. using System.Globalization;
  17. namespace HandyControl.Tools;
  18. internal class TokenizerHelper
  19. {
  20. private char _quoteChar;
  21. private char _argSeparator;
  22. private string _str;
  23. private int _strLen;
  24. private int _charIndex;
  25. private int _currentTokenIndex;
  26. private int _currentTokenLength;
  27. public bool FoundSeparator { get; private set; }
  28. public TokenizerHelper(string str, IFormatProvider formatProvider)
  29. {
  30. var numberSeparator = GetNumericListSeparator(formatProvider);
  31. Initialize(str, '\'', numberSeparator);
  32. }
  33. private void Initialize(string str, char quoteChar, char separator)
  34. {
  35. _str = str;
  36. _strLen = str?.Length ?? 0;
  37. _currentTokenIndex = -1;
  38. _quoteChar = quoteChar;
  39. _argSeparator = separator;
  40. // immediately forward past any whitespace so
  41. // NextToken() logic always starts on the first
  42. // character of the next token.
  43. while (_charIndex < _strLen)
  44. {
  45. if (!char.IsWhiteSpace(_str, _charIndex))
  46. {
  47. break;
  48. }
  49. ++_charIndex;
  50. }
  51. }
  52. public string GetCurrentToken() =>
  53. _currentTokenIndex < 0 ? null : _str.Substring(_currentTokenIndex, _currentTokenLength);
  54. internal bool NextToken() => NextToken(false);
  55. public bool NextToken(bool allowQuotedToken) => NextToken(allowQuotedToken, _argSeparator);
  56. public bool NextToken(bool allowQuotedToken, char separator)
  57. {
  58. _currentTokenIndex = -1; // reset the currentTokenIndex
  59. FoundSeparator = false; // reset
  60. // If we're at end of the string, just return false.
  61. if (_charIndex >= _strLen)
  62. {
  63. return false;
  64. }
  65. var currentChar = _str[_charIndex];
  66. // setup the quoteCount
  67. var quoteCount = 0;
  68. // If we are allowing a quoted token and this token begins with a quote,
  69. // set up the quote count and skip the initial quote
  70. if (allowQuotedToken &&
  71. currentChar == _quoteChar)
  72. {
  73. quoteCount++; // increment quote count
  74. ++_charIndex; // move to next character
  75. }
  76. var newTokenIndex = _charIndex;
  77. var newTokenLength = 0;
  78. // loop until hit end of string or hit a , or whitespace
  79. // if at end of string ust return false.
  80. while (_charIndex < _strLen)
  81. {
  82. currentChar = _str[_charIndex];
  83. // if have a QuoteCount and this is a quote
  84. // decrement the quoteCount
  85. if (quoteCount > 0)
  86. {
  87. // if anything but a quoteChar we move on
  88. if (currentChar == _quoteChar)
  89. {
  90. --quoteCount;
  91. // if at zero which it always should for now
  92. // break out of the loop
  93. if (0 == quoteCount)
  94. {
  95. ++_charIndex; // move past the quote
  96. break;
  97. }
  98. }
  99. }
  100. else if (char.IsWhiteSpace(currentChar) || currentChar == separator)
  101. {
  102. if (currentChar == separator)
  103. {
  104. FoundSeparator = true;
  105. }
  106. break;
  107. }
  108. ++_charIndex;
  109. ++newTokenLength;
  110. }
  111. // if quoteCount isn't zero we hit the end of the string
  112. // before the ending quote
  113. if (quoteCount > 0)
  114. {
  115. throw new InvalidOperationException("TokenizerHelperMissingEndQuote");
  116. }
  117. ScanToNextToken(separator); // move so at the start of the nextToken for next call
  118. // finally made it, update the _currentToken values
  119. _currentTokenIndex = newTokenIndex;
  120. _currentTokenLength = newTokenLength;
  121. if (_currentTokenLength < 1)
  122. {
  123. throw new InvalidOperationException("TokenizerHelperEmptyToken");
  124. }
  125. return true;
  126. }
  127. private void ScanToNextToken(char separator)
  128. {
  129. // if already at end of the string don't bother
  130. if (_charIndex >= _strLen) return;
  131. var currentChar = _str[_charIndex];
  132. // check that the currentChar is a space or the separator. If not
  133. // we have an error. this can happen in the quote case
  134. // that the char after the quotes string isn't a char.
  135. if (currentChar != separator && !char.IsWhiteSpace(currentChar))
  136. {
  137. throw new InvalidOperationException("TokenizerHelperExtraDataEncountered");
  138. }
  139. // loop until hit a character that isn't
  140. // an argument separator or whitespace.
  141. // !!!Todo: if more than one argSet throw an exception
  142. var argSepCount = 0;
  143. while (_charIndex < _strLen)
  144. {
  145. currentChar = _str[_charIndex];
  146. if (currentChar == separator)
  147. {
  148. FoundSeparator = true;
  149. ++argSepCount;
  150. _charIndex++;
  151. if (argSepCount > 1)
  152. {
  153. throw new InvalidOperationException("TokenizerHelperEmptyToken");
  154. }
  155. }
  156. else if (char.IsWhiteSpace(currentChar))
  157. {
  158. ++_charIndex;
  159. }
  160. else
  161. {
  162. break;
  163. }
  164. }
  165. // if there was a separatorChar then we shouldn't be
  166. // at the end of string or means there was a separator
  167. // but there isn't an arg
  168. if (argSepCount > 0 && _charIndex >= _strLen)
  169. {
  170. throw new InvalidOperationException("TokenizerHelperEmptyToken");
  171. }
  172. }
  173. internal static char GetNumericListSeparator(IFormatProvider provider)
  174. {
  175. var numericSeparator = ',';
  176. // Get the NumberFormatInfo out of the provider, if possible
  177. // If the IFormatProvider doesn't not contain a NumberFormatInfo, then
  178. // this method returns the current culture's NumberFormatInfo.
  179. var numberFormat = NumberFormatInfo.GetInstance(provider);
  180. // Is the decimal separator is the same as the list separator?
  181. // If so, we use the ";".
  182. if (numberFormat.NumberDecimalSeparator.Length > 0 && numericSeparator == numberFormat.NumberDecimalSeparator[0])
  183. {
  184. numericSeparator = ';';
  185. }
  186. return numericSeparator;
  187. }
  188. }