| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532 |
- /* Copyright 2010-2014 MongoDB Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using System;
- using System.IO;
- using System.Text;
- using System.Xml;
- namespace MongoDB.Bson.IO
- {
- /// <summary>
- /// A static class that represents a JSON scanner.
- /// </summary>
- public static class JsonScanner
- {
- // public static methods
- /// <summary>
- /// Gets the next JsonToken from a JsonBuffer.
- /// </summary>
- /// <param name="buffer">The buffer.</param>
- /// <returns>The next token.</returns>
- public static JsonToken GetNextToken(JsonBuffer buffer)
- {
- // skip leading whitespace
- var c = buffer.Read();
- while (c != -1 && char.IsWhiteSpace((char)c))
- {
- c = buffer.Read();
- }
- if (c == -1)
- {
- return new JsonToken(JsonTokenType.EndOfFile, "<eof>");
- }
- // leading character determines token type
- switch (c)
- {
- case '{': return new JsonToken(JsonTokenType.BeginObject, "{");
- case '}': return new JsonToken(JsonTokenType.EndObject, "}");
- case '[': return new JsonToken(JsonTokenType.BeginArray, "[");
- case ']': return new JsonToken(JsonTokenType.EndArray, "]");
- case '(': return new JsonToken(JsonTokenType.LeftParen, "(");
- case ')': return new JsonToken(JsonTokenType.RightParen, ")");
- case ':': return new JsonToken(JsonTokenType.Colon, ":");
- case ',': return new JsonToken(JsonTokenType.Comma, ",");
- case '\'':
- case '"':
- return GetStringToken(buffer, (char)c);
- case '/': return GetRegularExpressionToken(buffer);
- default:
- if (c == '-' || char.IsDigit((char)c))
- {
- return GetNumberToken(buffer, c);
- }
- else if (c == '$' || c == '_' || char.IsLetter((char)c))
- {
- return GetUnquotedStringToken(buffer);
- }
- else
- {
- buffer.UnRead(c);
- throw new Exception(FormatMessage("Invalid JSON input", buffer, buffer.Position));
- }
- }
- }
- // private methods
- private static string FormatMessage(string message, JsonBuffer buffer, int start)
- {
- var length = 20;
- string snippet;
- if (buffer.Position + length >= buffer.Length)
- {
- snippet = buffer.Substring(start);
- }
- else
- {
- snippet = buffer.Substring(start, length) + "...";
- }
- return string.Format("{0} '{1}'.", message, snippet);
- }
- private static JsonToken GetNumberToken(JsonBuffer buffer, int firstChar)
- {
- var c = firstChar;
- // leading digit or '-' has already been read
- var start = buffer.Position - 1;
- NumberState state;
- switch (c)
- {
- case '-': state = NumberState.SawLeadingMinus; break;
- case '0': state = NumberState.SawLeadingZero; break;
- default: state = NumberState.SawIntegerDigits; break;
- }
- var type = JsonTokenType.Int64; // assume integer until proved otherwise
- while (true)
- {
- c = buffer.Read();
- switch (state)
- {
- case NumberState.SawLeadingMinus:
- switch (c)
- {
- case '0':
- state = NumberState.SawLeadingZero;
- break;
- case 'I':
- state = NumberState.SawMinusI;
- break;
- default:
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawIntegerDigits;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- break;
- case NumberState.SawLeadingZero:
- switch (c)
- {
- case '.':
- state = NumberState.SawDecimalPoint;
- break;
- case 'e':
- case 'E':
- state = NumberState.SawExponentLetter;
- break;
- case ',':
- case '}':
- case ']':
- case ')':
- case -1:
- state = NumberState.Done;
- break;
- default:
- if (char.IsWhiteSpace((char)c))
- {
- state = NumberState.Done;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- break;
- case NumberState.SawIntegerDigits:
- switch (c)
- {
- case '.':
- state = NumberState.SawDecimalPoint;
- break;
- case 'e':
- case 'E':
- state = NumberState.SawExponentLetter;
- break;
- case ',':
- case '}':
- case ']':
- case ')':
- case -1:
- state = NumberState.Done;
- break;
- default:
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawIntegerDigits;
- }
- else if (char.IsWhiteSpace((char)c))
- {
- state = NumberState.Done;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- break;
- case NumberState.SawDecimalPoint:
- type = JsonTokenType.Double;
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawFractionDigits;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- case NumberState.SawFractionDigits:
- switch (c)
- {
- case 'e':
- case 'E':
- state = NumberState.SawExponentLetter;
- break;
- case ',':
- case '}':
- case ']':
- case ')':
- case -1:
- state = NumberState.Done;
- break;
- default:
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawFractionDigits;
- }
- else if (char.IsWhiteSpace((char)c))
- {
- state = NumberState.Done;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- break;
- case NumberState.SawExponentLetter:
- type = JsonTokenType.Double;
- switch (c)
- {
- case '+':
- case '-':
- state = NumberState.SawExponentSign;
- break;
- default:
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawExponentDigits;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- break;
- case NumberState.SawExponentSign:
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawExponentDigits;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- case NumberState.SawExponentDigits:
- switch (c)
- {
- case ',':
- case '}':
- case ']':
- case ')':
- case -1:
- state = NumberState.Done;
- break;
- default:
- if (char.IsDigit((char)c))
- {
- state = NumberState.SawExponentDigits;
- }
- else if (char.IsWhiteSpace((char)c))
- {
- state = NumberState.Done;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- break;
- case NumberState.SawMinusI:
- var sawMinusInfinity = true;
- var nfinity = new char[] { 'n', 'f', 'i', 'n', 'i', 't', 'y' };
- for (var i = 0; i < nfinity.Length; i++)
- {
- if (c != nfinity[i])
- {
- sawMinusInfinity = false;
- break;
- }
- c = buffer.Read();
- }
- if (sawMinusInfinity)
- {
- type = JsonTokenType.Double;
- switch (c)
- {
- case ',':
- case '}':
- case ']':
- case ')':
- case -1:
- state = NumberState.Done;
- break;
- default:
- if (char.IsWhiteSpace((char)c))
- {
- state = NumberState.Done;
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- }
- else
- {
- state = NumberState.Invalid;
- }
- break;
- }
- switch (state)
- {
- case NumberState.Done:
- buffer.UnRead(c);
- var lexeme = buffer.Substring(start, buffer.Position - start);
- if (type == JsonTokenType.Double)
- {
- var value = XmlConvert.ToDouble(lexeme);
- return new DoubleJsonToken(lexeme, value);
- }
- else
- {
- var value = XmlConvert.ToInt64(lexeme);
- if (value < int.MinValue || value > int.MaxValue)
- {
- return new Int64JsonToken(lexeme, value);
- }
- else
- {
- return new Int32JsonToken(lexeme, (int)value);
- }
- }
- case NumberState.Invalid:
- throw new Exception(FormatMessage("Invalid JSON number", buffer, start));
- }
- }
- }
- private static JsonToken GetRegularExpressionToken(JsonBuffer buffer)
- {
- // opening slash has already been read
- var start = buffer.Position - 1;
- var state = RegularExpressionState.InPattern;
- while (true)
- {
- var c = buffer.Read();
- switch (state)
- {
- case RegularExpressionState.InPattern:
- switch (c)
- {
- case '/': state = RegularExpressionState.InOptions; break;
- case '\\': state = RegularExpressionState.InEscapeSequence; break;
- default: state = RegularExpressionState.InPattern; break;
- }
- break;
- case RegularExpressionState.InEscapeSequence:
- state = RegularExpressionState.InPattern;
- break;
- case RegularExpressionState.InOptions:
- switch (c)
- {
- case 'i':
- case 'm':
- case 'x':
- case 's':
- state = RegularExpressionState.InOptions;
- break;
- case ',':
- case '}':
- case ']':
- case ')':
- case -1:
- state = RegularExpressionState.Done;
- break;
- default:
- if (char.IsWhiteSpace((char)c))
- {
- state = RegularExpressionState.Done;
- }
- else
- {
- state = RegularExpressionState.Invalid;
- }
- break;
- }
- break;
- }
- switch (state)
- {
- case RegularExpressionState.Done:
- buffer.UnRead(c);
- var lexeme = buffer.Substring(start, buffer.Position - start);
- var regex = new BsonRegularExpression(lexeme);
- return new RegularExpressionJsonToken(lexeme, regex);
- case RegularExpressionState.Invalid:
- throw new Exception(FormatMessage("Invalid JSON regular expression", buffer, start));
- }
- }
- }
- private static JsonToken GetStringToken(JsonBuffer buffer, char quoteCharacter)
- {
- // opening quote has already been read
- var start = buffer.Position - 1;
- var sb = new StringBuilder();
- while (true)
- {
- var c = buffer.Read();
- switch (c)
- {
- case '\\':
- c = buffer.Read();
- switch (c)
- {
- case '\'': sb.Append('\''); break;
- case '"': sb.Append('"'); break;
- case '\\': sb.Append('\\'); break;
- case '/': sb.Append('/'); break;
- case 'b': sb.Append('\b'); break;
- case 'f': sb.Append('\f'); break;
- case 'n': sb.Append('\n'); break;
- case 'r': sb.Append('\r'); break;
- case 't': sb.Append('\t'); break;
- case 'u':
- var u1 = buffer.Read();
- var u2 = buffer.Read();
- var u3 = buffer.Read();
- var u4 = buffer.Read();
- if (u4 != -1)
- {
- var hex = new string(new char[] { (char)u1, (char)u2, (char)u3, (char)u4 });
- var n = Convert.ToInt32(hex, 16);
- sb.Append((char)n);
- }
- break;
- default:
- if (c != -1)
- {
- var message = string.Format("Invalid escape sequence in JSON string '\\{0}'.", (char)c);
- throw new Exception(message);
- }
- break;
- }
- break;
- default:
- if (c == quoteCharacter)
- {
- var lexeme = buffer.Substring(start, buffer.Position - start);
- return new StringJsonToken(JsonTokenType.String, lexeme, sb.ToString());
- }
- if (c != -1)
- {
- sb.Append((char)c);
- }
- break;
- }
- if (c == -1)
- {
- throw new Exception(FormatMessage("End of file in JSON string.", buffer, start));
- }
- }
- }
- private static JsonToken GetUnquotedStringToken(JsonBuffer buffer)
- {
- // opening letter or $ has already been read
- var start = buffer.Position - 1;
- var c = buffer.Read();
- while (c == '$' || c == '_' || char.IsLetterOrDigit((char)c))
- {
- c = buffer.Read();
- }
- buffer.UnRead(c);
- var lexeme = buffer.Substring(start, buffer.Position - start);
- return new StringJsonToken(JsonTokenType.UnquotedString, lexeme, lexeme);
- }
- // nested types
- private enum NumberState
- {
- SawLeadingMinus,
- SawLeadingZero,
- SawIntegerDigits,
- SawDecimalPoint,
- SawFractionDigits,
- SawExponentLetter,
- SawExponentSign,
- SawExponentDigits,
- SawMinusI,
- Done,
- Invalid
- }
- private enum RegularExpressionState
- {
- InPattern,
- InEscapeSequence,
- InOptions,
- Done,
- Invalid
- }
- }
- }
|