JsonScanner.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. /* Copyright 2010-2014 MongoDB Inc.
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. using System;
  16. using System.IO;
  17. using System.Text;
  18. using System.Xml;
  19. namespace MongoDB.Bson.IO
  20. {
  21. /// <summary>
  22. /// A static class that represents a JSON scanner.
  23. /// </summary>
  24. public static class JsonScanner
  25. {
  26. // public static methods
  27. /// <summary>
  28. /// Gets the next JsonToken from a JsonBuffer.
  29. /// </summary>
  30. /// <param name="buffer">The buffer.</param>
  31. /// <returns>The next token.</returns>
  32. public static JsonToken GetNextToken(JsonBuffer buffer)
  33. {
  34. // skip leading whitespace
  35. var c = buffer.Read();
  36. while (c != -1 && char.IsWhiteSpace((char)c))
  37. {
  38. c = buffer.Read();
  39. }
  40. if (c == -1)
  41. {
  42. return new JsonToken(JsonTokenType.EndOfFile, "<eof>");
  43. }
  44. // leading character determines token type
  45. switch (c)
  46. {
  47. case '{': return new JsonToken(JsonTokenType.BeginObject, "{");
  48. case '}': return new JsonToken(JsonTokenType.EndObject, "}");
  49. case '[': return new JsonToken(JsonTokenType.BeginArray, "[");
  50. case ']': return new JsonToken(JsonTokenType.EndArray, "]");
  51. case '(': return new JsonToken(JsonTokenType.LeftParen, "(");
  52. case ')': return new JsonToken(JsonTokenType.RightParen, ")");
  53. case ':': return new JsonToken(JsonTokenType.Colon, ":");
  54. case ',': return new JsonToken(JsonTokenType.Comma, ",");
  55. case '\'':
  56. case '"':
  57. return GetStringToken(buffer, (char)c);
  58. case '/': return GetRegularExpressionToken(buffer);
  59. default:
  60. if (c == '-' || char.IsDigit((char)c))
  61. {
  62. return GetNumberToken(buffer, c);
  63. }
  64. else if (c == '$' || c == '_' || char.IsLetter((char)c))
  65. {
  66. return GetUnquotedStringToken(buffer);
  67. }
  68. else
  69. {
  70. buffer.UnRead(c);
  71. throw new Exception(FormatMessage("Invalid JSON input", buffer, buffer.Position));
  72. }
  73. }
  74. }
  75. // private methods
  76. private static string FormatMessage(string message, JsonBuffer buffer, int start)
  77. {
  78. var length = 20;
  79. string snippet;
  80. if (buffer.Position + length >= buffer.Length)
  81. {
  82. snippet = buffer.Substring(start);
  83. }
  84. else
  85. {
  86. snippet = buffer.Substring(start, length) + "...";
  87. }
  88. return string.Format("{0} '{1}'.", message, snippet);
  89. }
  90. private static JsonToken GetNumberToken(JsonBuffer buffer, int firstChar)
  91. {
  92. var c = firstChar;
  93. // leading digit or '-' has already been read
  94. var start = buffer.Position - 1;
  95. NumberState state;
  96. switch (c)
  97. {
  98. case '-': state = NumberState.SawLeadingMinus; break;
  99. case '0': state = NumberState.SawLeadingZero; break;
  100. default: state = NumberState.SawIntegerDigits; break;
  101. }
  102. var type = JsonTokenType.Int64; // assume integer until proved otherwise
  103. while (true)
  104. {
  105. c = buffer.Read();
  106. switch (state)
  107. {
  108. case NumberState.SawLeadingMinus:
  109. switch (c)
  110. {
  111. case '0':
  112. state = NumberState.SawLeadingZero;
  113. break;
  114. case 'I':
  115. state = NumberState.SawMinusI;
  116. break;
  117. default:
  118. if (char.IsDigit((char)c))
  119. {
  120. state = NumberState.SawIntegerDigits;
  121. }
  122. else
  123. {
  124. state = NumberState.Invalid;
  125. }
  126. break;
  127. }
  128. break;
  129. case NumberState.SawLeadingZero:
  130. switch (c)
  131. {
  132. case '.':
  133. state = NumberState.SawDecimalPoint;
  134. break;
  135. case 'e':
  136. case 'E':
  137. state = NumberState.SawExponentLetter;
  138. break;
  139. case ',':
  140. case '}':
  141. case ']':
  142. case ')':
  143. case -1:
  144. state = NumberState.Done;
  145. break;
  146. default:
  147. if (char.IsWhiteSpace((char)c))
  148. {
  149. state = NumberState.Done;
  150. }
  151. else
  152. {
  153. state = NumberState.Invalid;
  154. }
  155. break;
  156. }
  157. break;
  158. case NumberState.SawIntegerDigits:
  159. switch (c)
  160. {
  161. case '.':
  162. state = NumberState.SawDecimalPoint;
  163. break;
  164. case 'e':
  165. case 'E':
  166. state = NumberState.SawExponentLetter;
  167. break;
  168. case ',':
  169. case '}':
  170. case ']':
  171. case ')':
  172. case -1:
  173. state = NumberState.Done;
  174. break;
  175. default:
  176. if (char.IsDigit((char)c))
  177. {
  178. state = NumberState.SawIntegerDigits;
  179. }
  180. else if (char.IsWhiteSpace((char)c))
  181. {
  182. state = NumberState.Done;
  183. }
  184. else
  185. {
  186. state = NumberState.Invalid;
  187. }
  188. break;
  189. }
  190. break;
  191. case NumberState.SawDecimalPoint:
  192. type = JsonTokenType.Double;
  193. if (char.IsDigit((char)c))
  194. {
  195. state = NumberState.SawFractionDigits;
  196. }
  197. else
  198. {
  199. state = NumberState.Invalid;
  200. }
  201. break;
  202. case NumberState.SawFractionDigits:
  203. switch (c)
  204. {
  205. case 'e':
  206. case 'E':
  207. state = NumberState.SawExponentLetter;
  208. break;
  209. case ',':
  210. case '}':
  211. case ']':
  212. case ')':
  213. case -1:
  214. state = NumberState.Done;
  215. break;
  216. default:
  217. if (char.IsDigit((char)c))
  218. {
  219. state = NumberState.SawFractionDigits;
  220. }
  221. else if (char.IsWhiteSpace((char)c))
  222. {
  223. state = NumberState.Done;
  224. }
  225. else
  226. {
  227. state = NumberState.Invalid;
  228. }
  229. break;
  230. }
  231. break;
  232. case NumberState.SawExponentLetter:
  233. type = JsonTokenType.Double;
  234. switch (c)
  235. {
  236. case '+':
  237. case '-':
  238. state = NumberState.SawExponentSign;
  239. break;
  240. default:
  241. if (char.IsDigit((char)c))
  242. {
  243. state = NumberState.SawExponentDigits;
  244. }
  245. else
  246. {
  247. state = NumberState.Invalid;
  248. }
  249. break;
  250. }
  251. break;
  252. case NumberState.SawExponentSign:
  253. if (char.IsDigit((char)c))
  254. {
  255. state = NumberState.SawExponentDigits;
  256. }
  257. else
  258. {
  259. state = NumberState.Invalid;
  260. }
  261. break;
  262. case NumberState.SawExponentDigits:
  263. switch (c)
  264. {
  265. case ',':
  266. case '}':
  267. case ']':
  268. case ')':
  269. case -1:
  270. state = NumberState.Done;
  271. break;
  272. default:
  273. if (char.IsDigit((char)c))
  274. {
  275. state = NumberState.SawExponentDigits;
  276. }
  277. else if (char.IsWhiteSpace((char)c))
  278. {
  279. state = NumberState.Done;
  280. }
  281. else
  282. {
  283. state = NumberState.Invalid;
  284. }
  285. break;
  286. }
  287. break;
  288. case NumberState.SawMinusI:
  289. var sawMinusInfinity = true;
  290. var nfinity = new char[] { 'n', 'f', 'i', 'n', 'i', 't', 'y' };
  291. for (var i = 0; i < nfinity.Length; i++)
  292. {
  293. if (c != nfinity[i])
  294. {
  295. sawMinusInfinity = false;
  296. break;
  297. }
  298. c = buffer.Read();
  299. }
  300. if (sawMinusInfinity)
  301. {
  302. type = JsonTokenType.Double;
  303. switch (c)
  304. {
  305. case ',':
  306. case '}':
  307. case ']':
  308. case ')':
  309. case -1:
  310. state = NumberState.Done;
  311. break;
  312. default:
  313. if (char.IsWhiteSpace((char)c))
  314. {
  315. state = NumberState.Done;
  316. }
  317. else
  318. {
  319. state = NumberState.Invalid;
  320. }
  321. break;
  322. }
  323. }
  324. else
  325. {
  326. state = NumberState.Invalid;
  327. }
  328. break;
  329. }
  330. switch (state)
  331. {
  332. case NumberState.Done:
  333. buffer.UnRead(c);
  334. var lexeme = buffer.Substring(start, buffer.Position - start);
  335. if (type == JsonTokenType.Double)
  336. {
  337. var value = XmlConvert.ToDouble(lexeme);
  338. return new DoubleJsonToken(lexeme, value);
  339. }
  340. else
  341. {
  342. var value = XmlConvert.ToInt64(lexeme);
  343. if (value < int.MinValue || value > int.MaxValue)
  344. {
  345. return new Int64JsonToken(lexeme, value);
  346. }
  347. else
  348. {
  349. return new Int32JsonToken(lexeme, (int)value);
  350. }
  351. }
  352. case NumberState.Invalid:
  353. throw new Exception(FormatMessage("Invalid JSON number", buffer, start));
  354. }
  355. }
  356. }
  357. private static JsonToken GetRegularExpressionToken(JsonBuffer buffer)
  358. {
  359. // opening slash has already been read
  360. var start = buffer.Position - 1;
  361. var state = RegularExpressionState.InPattern;
  362. while (true)
  363. {
  364. var c = buffer.Read();
  365. switch (state)
  366. {
  367. case RegularExpressionState.InPattern:
  368. switch (c)
  369. {
  370. case '/': state = RegularExpressionState.InOptions; break;
  371. case '\\': state = RegularExpressionState.InEscapeSequence; break;
  372. default: state = RegularExpressionState.InPattern; break;
  373. }
  374. break;
  375. case RegularExpressionState.InEscapeSequence:
  376. state = RegularExpressionState.InPattern;
  377. break;
  378. case RegularExpressionState.InOptions:
  379. switch (c)
  380. {
  381. case 'i':
  382. case 'm':
  383. case 'x':
  384. case 's':
  385. state = RegularExpressionState.InOptions;
  386. break;
  387. case ',':
  388. case '}':
  389. case ']':
  390. case ')':
  391. case -1:
  392. state = RegularExpressionState.Done;
  393. break;
  394. default:
  395. if (char.IsWhiteSpace((char)c))
  396. {
  397. state = RegularExpressionState.Done;
  398. }
  399. else
  400. {
  401. state = RegularExpressionState.Invalid;
  402. }
  403. break;
  404. }
  405. break;
  406. }
  407. switch (state)
  408. {
  409. case RegularExpressionState.Done:
  410. buffer.UnRead(c);
  411. var lexeme = buffer.Substring(start, buffer.Position - start);
  412. var regex = new BsonRegularExpression(lexeme);
  413. return new RegularExpressionJsonToken(lexeme, regex);
  414. case RegularExpressionState.Invalid:
  415. throw new Exception(FormatMessage("Invalid JSON regular expression", buffer, start));
  416. }
  417. }
  418. }
  419. private static JsonToken GetStringToken(JsonBuffer buffer, char quoteCharacter)
  420. {
  421. // opening quote has already been read
  422. var start = buffer.Position - 1;
  423. var sb = new StringBuilder();
  424. while (true)
  425. {
  426. var c = buffer.Read();
  427. switch (c)
  428. {
  429. case '\\':
  430. c = buffer.Read();
  431. switch (c)
  432. {
  433. case '\'': sb.Append('\''); break;
  434. case '"': sb.Append('"'); break;
  435. case '\\': sb.Append('\\'); break;
  436. case '/': sb.Append('/'); break;
  437. case 'b': sb.Append('\b'); break;
  438. case 'f': sb.Append('\f'); break;
  439. case 'n': sb.Append('\n'); break;
  440. case 'r': sb.Append('\r'); break;
  441. case 't': sb.Append('\t'); break;
  442. case 'u':
  443. var u1 = buffer.Read();
  444. var u2 = buffer.Read();
  445. var u3 = buffer.Read();
  446. var u4 = buffer.Read();
  447. if (u4 != -1)
  448. {
  449. var hex = new string(new char[] { (char)u1, (char)u2, (char)u3, (char)u4 });
  450. var n = Convert.ToInt32(hex, 16);
  451. sb.Append((char)n);
  452. }
  453. break;
  454. default:
  455. if (c != -1)
  456. {
  457. var message = string.Format("Invalid escape sequence in JSON string '\\{0}'.", (char)c);
  458. throw new Exception(message);
  459. }
  460. break;
  461. }
  462. break;
  463. default:
  464. if (c == quoteCharacter)
  465. {
  466. var lexeme = buffer.Substring(start, buffer.Position - start);
  467. return new StringJsonToken(JsonTokenType.String, lexeme, sb.ToString());
  468. }
  469. if (c != -1)
  470. {
  471. sb.Append((char)c);
  472. }
  473. break;
  474. }
  475. if (c == -1)
  476. {
  477. throw new Exception(FormatMessage("End of file in JSON string.", buffer, start));
  478. }
  479. }
  480. }
  481. private static JsonToken GetUnquotedStringToken(JsonBuffer buffer)
  482. {
  483. // opening letter or $ has already been read
  484. var start = buffer.Position - 1;
  485. var c = buffer.Read();
  486. while (c == '$' || c == '_' || char.IsLetterOrDigit((char)c))
  487. {
  488. c = buffer.Read();
  489. }
  490. buffer.UnRead(c);
  491. var lexeme = buffer.Substring(start, buffer.Position - start);
  492. return new StringJsonToken(JsonTokenType.UnquotedString, lexeme, lexeme);
  493. }
  494. // nested types
  495. private enum NumberState
  496. {
  497. SawLeadingMinus,
  498. SawLeadingZero,
  499. SawIntegerDigits,
  500. SawDecimalPoint,
  501. SawFractionDigits,
  502. SawExponentLetter,
  503. SawExponentSign,
  504. SawExponentDigits,
  505. SawMinusI,
  506. Done,
  507. Invalid
  508. }
  509. private enum RegularExpressionState
  510. {
  511. InPattern,
  512. InEscapeSequence,
  513. InOptions,
  514. Done,
  515. Invalid
  516. }
  517. }
  518. }