JsonScanner.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /* Copyright 2010-present MongoDB Inc.
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. using System;
  16. using System.IO;
  17. using System.Text;
  18. namespace MongoDB.Bson.IO
  19. {
  20. /// <summary>
  21. /// A static class that represents a JSON scanner.
  22. /// </summary>
  23. internal static class JsonScanner
  24. {
  25. // public static methods
  26. /// <summary>
  27. /// Gets the next JsonToken from a JsonBuffer.
  28. /// </summary>
  29. /// <param name="buffer">The buffer.</param>
  30. /// <returns>The next token.</returns>
  31. public static JsonToken GetNextToken(JsonBuffer buffer)
  32. {
  33. // skip leading whitespace
  34. var c = buffer.Read();
  35. while (c != -1 && char.IsWhiteSpace((char)c))
  36. {
  37. c = buffer.Read();
  38. }
  39. if (c == -1)
  40. {
  41. return new JsonToken(JsonTokenType.EndOfFile, "<eof>");
  42. }
  43. // leading character determines token type
  44. switch (c)
  45. {
  46. case '{': return new JsonToken(JsonTokenType.BeginObject, "{");
  47. case '}': return new JsonToken(JsonTokenType.EndObject, "}");
  48. case '[': return new JsonToken(JsonTokenType.BeginArray, "[");
  49. case ']': return new JsonToken(JsonTokenType.EndArray, "]");
  50. case '(': return new JsonToken(JsonTokenType.LeftParen, "(");
  51. case ')': return new JsonToken(JsonTokenType.RightParen, ")");
  52. case ':': return new JsonToken(JsonTokenType.Colon, ":");
  53. case ',': return new JsonToken(JsonTokenType.Comma, ",");
  54. case '\'':
  55. case '"':
  56. return GetStringToken(buffer, (char)c);
  57. case '/': return GetRegularExpressionToken(buffer);
  58. default:
  59. if (c == '-' || char.IsDigit((char)c))
  60. {
  61. return GetNumberToken(buffer, c);
  62. }
  63. else if (c == '$' || c == '_' || char.IsLetter((char)c))
  64. {
  65. return GetUnquotedStringToken(buffer);
  66. }
  67. else
  68. {
  69. buffer.UnRead(c);
  70. throw new FormatException(FormatMessage("Invalid JSON input", buffer, buffer.Position));
  71. }
  72. }
  73. }
  74. // private methods
  75. private static string FormatMessage(string message, JsonBuffer buffer, int start)
  76. {
  77. var maxLength = 20;
  78. var snippet = buffer.GetSnippet(start, maxLength);
  79. return string.Format("{0} '{1}'.", message, snippet);
  80. }
  81. private static JsonToken GetNumberToken(JsonBuffer buffer, int firstChar)
  82. {
  83. var c = firstChar;
  84. // leading digit or '-' has already been read
  85. var start = buffer.Position - 1;
  86. NumberState state;
  87. switch (c)
  88. {
  89. case '-': state = NumberState.SawLeadingMinus; break;
  90. case '0': state = NumberState.SawLeadingZero; break;
  91. default: state = NumberState.SawIntegerDigits; break;
  92. }
  93. var type = JsonTokenType.Int64; // assume integer until proved otherwise
  94. while (true)
  95. {
  96. c = buffer.Read();
  97. switch (state)
  98. {
  99. case NumberState.SawLeadingMinus:
  100. switch (c)
  101. {
  102. case '0':
  103. state = NumberState.SawLeadingZero;
  104. break;
  105. case 'I':
  106. state = NumberState.SawMinusI;
  107. break;
  108. default:
  109. if (char.IsDigit((char)c))
  110. {
  111. state = NumberState.SawIntegerDigits;
  112. }
  113. else
  114. {
  115. state = NumberState.Invalid;
  116. }
  117. break;
  118. }
  119. break;
  120. case NumberState.SawLeadingZero:
  121. switch (c)
  122. {
  123. case '.':
  124. state = NumberState.SawDecimalPoint;
  125. break;
  126. case 'e':
  127. case 'E':
  128. state = NumberState.SawExponentLetter;
  129. break;
  130. case ',':
  131. case '}':
  132. case ']':
  133. case ')':
  134. case -1:
  135. state = NumberState.Done;
  136. break;
  137. default:
  138. if (char.IsWhiteSpace((char)c))
  139. {
  140. state = NumberState.Done;
  141. }
  142. else
  143. {
  144. state = NumberState.Invalid;
  145. }
  146. break;
  147. }
  148. break;
  149. case NumberState.SawIntegerDigits:
  150. switch (c)
  151. {
  152. case '.':
  153. state = NumberState.SawDecimalPoint;
  154. break;
  155. case 'e':
  156. case 'E':
  157. state = NumberState.SawExponentLetter;
  158. break;
  159. case ',':
  160. case '}':
  161. case ']':
  162. case ')':
  163. case -1:
  164. state = NumberState.Done;
  165. break;
  166. default:
  167. if (char.IsDigit((char)c))
  168. {
  169. state = NumberState.SawIntegerDigits;
  170. }
  171. else if (char.IsWhiteSpace((char)c))
  172. {
  173. state = NumberState.Done;
  174. }
  175. else
  176. {
  177. state = NumberState.Invalid;
  178. }
  179. break;
  180. }
  181. break;
  182. case NumberState.SawDecimalPoint:
  183. type = JsonTokenType.Double;
  184. if (char.IsDigit((char)c))
  185. {
  186. state = NumberState.SawFractionDigits;
  187. }
  188. else
  189. {
  190. state = NumberState.Invalid;
  191. }
  192. break;
  193. case NumberState.SawFractionDigits:
  194. switch (c)
  195. {
  196. case 'e':
  197. case 'E':
  198. state = NumberState.SawExponentLetter;
  199. break;
  200. case ',':
  201. case '}':
  202. case ']':
  203. case ')':
  204. case -1:
  205. state = NumberState.Done;
  206. break;
  207. default:
  208. if (char.IsDigit((char)c))
  209. {
  210. state = NumberState.SawFractionDigits;
  211. }
  212. else if (char.IsWhiteSpace((char)c))
  213. {
  214. state = NumberState.Done;
  215. }
  216. else
  217. {
  218. state = NumberState.Invalid;
  219. }
  220. break;
  221. }
  222. break;
  223. case NumberState.SawExponentLetter:
  224. type = JsonTokenType.Double;
  225. switch (c)
  226. {
  227. case '+':
  228. case '-':
  229. state = NumberState.SawExponentSign;
  230. break;
  231. default:
  232. if (char.IsDigit((char)c))
  233. {
  234. state = NumberState.SawExponentDigits;
  235. }
  236. else
  237. {
  238. state = NumberState.Invalid;
  239. }
  240. break;
  241. }
  242. break;
  243. case NumberState.SawExponentSign:
  244. if (char.IsDigit((char)c))
  245. {
  246. state = NumberState.SawExponentDigits;
  247. }
  248. else
  249. {
  250. state = NumberState.Invalid;
  251. }
  252. break;
  253. case NumberState.SawExponentDigits:
  254. switch (c)
  255. {
  256. case ',':
  257. case '}':
  258. case ']':
  259. case ')':
  260. case -1:
  261. state = NumberState.Done;
  262. break;
  263. default:
  264. if (char.IsDigit((char)c))
  265. {
  266. state = NumberState.SawExponentDigits;
  267. }
  268. else if (char.IsWhiteSpace((char)c))
  269. {
  270. state = NumberState.Done;
  271. }
  272. else
  273. {
  274. state = NumberState.Invalid;
  275. }
  276. break;
  277. }
  278. break;
  279. case NumberState.SawMinusI:
  280. var sawMinusInfinity = true;
  281. var nfinity = new char[] { 'n', 'f', 'i', 'n', 'i', 't', 'y' };
  282. for (var i = 0; i < nfinity.Length; i++)
  283. {
  284. if (c != nfinity[i])
  285. {
  286. sawMinusInfinity = false;
  287. break;
  288. }
  289. c = buffer.Read();
  290. }
  291. if (sawMinusInfinity)
  292. {
  293. type = JsonTokenType.Double;
  294. switch (c)
  295. {
  296. case ',':
  297. case '}':
  298. case ']':
  299. case ')':
  300. case -1:
  301. state = NumberState.Done;
  302. break;
  303. default:
  304. if (char.IsWhiteSpace((char)c))
  305. {
  306. state = NumberState.Done;
  307. }
  308. else
  309. {
  310. state = NumberState.Invalid;
  311. }
  312. break;
  313. }
  314. }
  315. else
  316. {
  317. state = NumberState.Invalid;
  318. }
  319. break;
  320. }
  321. switch (state)
  322. {
  323. case NumberState.Done:
  324. buffer.UnRead(c);
  325. var lexeme = buffer.GetSubstring(start, buffer.Position - start);
  326. if (type == JsonTokenType.Double)
  327. {
  328. var value = JsonConvert.ToDouble(lexeme);
  329. return new DoubleJsonToken(lexeme, value);
  330. }
  331. else
  332. {
  333. var value = JsonConvert.ToInt64(lexeme);
  334. if (value < int.MinValue || value > int.MaxValue)
  335. {
  336. return new Int64JsonToken(lexeme, value);
  337. }
  338. else
  339. {
  340. return new Int32JsonToken(lexeme, (int)value);
  341. }
  342. }
  343. case NumberState.Invalid:
  344. throw new FormatException(FormatMessage("Invalid JSON number", buffer, start));
  345. }
  346. }
  347. }
  348. private static JsonToken GetRegularExpressionToken(JsonBuffer buffer)
  349. {
  350. // opening slash has already been read
  351. var start = buffer.Position - 1;
  352. var state = RegularExpressionState.InPattern;
  353. while (true)
  354. {
  355. var c = buffer.Read();
  356. switch (state)
  357. {
  358. case RegularExpressionState.InPattern:
  359. switch (c)
  360. {
  361. case '/': state = RegularExpressionState.InOptions; break;
  362. case '\\': state = RegularExpressionState.InEscapeSequence; break;
  363. default: state = RegularExpressionState.InPattern; break;
  364. }
  365. break;
  366. case RegularExpressionState.InEscapeSequence:
  367. state = RegularExpressionState.InPattern;
  368. break;
  369. case RegularExpressionState.InOptions:
  370. switch (c)
  371. {
  372. case 'i':
  373. case 'm':
  374. case 'x':
  375. case 's':
  376. state = RegularExpressionState.InOptions;
  377. break;
  378. case ',':
  379. case '}':
  380. case ']':
  381. case ')':
  382. case -1:
  383. state = RegularExpressionState.Done;
  384. break;
  385. default:
  386. if (char.IsWhiteSpace((char)c))
  387. {
  388. state = RegularExpressionState.Done;
  389. }
  390. else
  391. {
  392. state = RegularExpressionState.Invalid;
  393. }
  394. break;
  395. }
  396. break;
  397. }
  398. switch (state)
  399. {
  400. case RegularExpressionState.Done:
  401. buffer.UnRead(c);
  402. var lexeme = buffer.GetSubstring(start, buffer.Position - start);
  403. var regex = new BsonRegularExpression(lexeme);
  404. return new RegularExpressionJsonToken(lexeme, regex);
  405. case RegularExpressionState.Invalid:
  406. throw new FormatException(FormatMessage("Invalid JSON regular expression", buffer, start));
  407. }
  408. }
  409. }
  410. private static JsonToken GetStringToken(JsonBuffer buffer, char quoteCharacter)
  411. {
  412. // opening quote has already been read
  413. var start = buffer.Position - 1;
  414. var sb = new StringBuilder();
  415. while (true)
  416. {
  417. var c = buffer.Read();
  418. switch (c)
  419. {
  420. case '\\':
  421. c = buffer.Read();
  422. switch (c)
  423. {
  424. case '\'': sb.Append('\''); break;
  425. case '"': sb.Append('"'); break;
  426. case '\\': sb.Append('\\'); break;
  427. case '/': sb.Append('/'); break;
  428. case 'b': sb.Append('\b'); break;
  429. case 'f': sb.Append('\f'); break;
  430. case 'n': sb.Append('\n'); break;
  431. case 'r': sb.Append('\r'); break;
  432. case 't': sb.Append('\t'); break;
  433. case 'u':
  434. var u1 = buffer.Read();
  435. var u2 = buffer.Read();
  436. var u3 = buffer.Read();
  437. var u4 = buffer.Read();
  438. if (u4 != -1)
  439. {
  440. var hex = new string(new char[] { (char)u1, (char)u2, (char)u3, (char)u4 });
  441. var n = Convert.ToInt32(hex, 16);
  442. sb.Append((char)n);
  443. }
  444. break;
  445. default:
  446. if (c != -1)
  447. {
  448. var message = string.Format("Invalid escape sequence in JSON string '\\{0}'.", (char)c);
  449. throw new FormatException(message);
  450. }
  451. break;
  452. }
  453. break;
  454. default:
  455. if (c == quoteCharacter)
  456. {
  457. var lexeme = buffer.GetSubstring(start, buffer.Position - start);
  458. return new StringJsonToken(JsonTokenType.String, lexeme, sb.ToString());
  459. }
  460. if (c != -1)
  461. {
  462. sb.Append((char)c);
  463. }
  464. break;
  465. }
  466. if (c == -1)
  467. {
  468. throw new FormatException(FormatMessage("End of file in JSON string.", buffer, start));
  469. }
  470. }
  471. }
  472. private static JsonToken GetUnquotedStringToken(JsonBuffer buffer)
  473. {
  474. // opening letter or $ has already been read
  475. var start = buffer.Position - 1;
  476. var c = buffer.Read();
  477. while (c == '$' || c == '_' || char.IsLetterOrDigit((char)c))
  478. {
  479. c = buffer.Read();
  480. }
  481. buffer.UnRead(c);
  482. var lexeme = buffer.GetSubstring(start, buffer.Position - start);
  483. return new StringJsonToken(JsonTokenType.UnquotedString, lexeme, lexeme);
  484. }
  485. // nested types
  486. private enum NumberState
  487. {
  488. SawLeadingMinus,
  489. SawLeadingZero,
  490. SawIntegerDigits,
  491. SawDecimalPoint,
  492. SawFractionDigits,
  493. SawExponentLetter,
  494. SawExponentSign,
  495. SawExponentDigits,
  496. SawMinusI,
  497. Done,
  498. Invalid
  499. }
  500. private enum RegularExpressionState
  501. {
  502. InPattern,
  503. InEscapeSequence,
  504. InOptions,
  505. Done,
  506. Invalid
  507. }
  508. }
  509. }