987 lines
35 KiB
Markdown
987 lines
35 KiB
Markdown
# PLJSON_PARSER
|
|
|
|
## Package Specification
|
|
|
|
```sql
|
|
package pljson_parser as
|
|
/*
|
|
Copyright (c) 2010 Jonas Krogsboell
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
/** Internal type for processing. */
|
|
/* scanner tokens:
|
|
'{', '}', ',', ':', '[', ']', STRING, NUMBER, TRUE, FALSE, NULL
|
|
*/
|
|
type rToken IS RECORD (
|
|
type_name VARCHAR2(7),
|
|
line PLS_INTEGER,
|
|
col PLS_INTEGER,
|
|
data VARCHAR2(32767),
|
|
data_overflow clob); -- max_string_size
|
|
|
|
type lTokens is table of rToken index by pls_integer;
|
|
type json_src is record (len number, offset number, offset_chars number, src varchar2(32767), s_clob clob);
|
|
|
|
json_strict boolean not null := false;
|
|
|
|
ucs2_exception EXCEPTION;
|
|
pragma exception_init(ucs2_exception, -22831);
|
|
|
|
function lengthcc(buf clob) return number;
|
|
|
|
function next_char(indx number, s in out nocopy json_src) return varchar2;
|
|
function next_char2(indx number, s in out nocopy json_src, amount number default 1) return varchar2;
|
|
function parseObj(tokens lTokens, indx in out nocopy pls_integer) return pljson;
|
|
|
|
function prepareClob(buf in clob) return pljson_parser.json_src;
|
|
function prepareVarchar2(buf in varchar2) return pljson_parser.json_src;
|
|
function lexer(jsrc in out nocopy json_src) return lTokens;
|
|
procedure print_token(t rToken);
|
|
|
|
/**
|
|
* <p>Primary parsing method. It can parse a JSON object.</p>
|
|
*
|
|
* @return An instance of <code>pljson</code>.
|
|
* @throws PARSER_ERROR -20101 when invalid input found.
|
|
* @throws SCANNER_ERROR -20100 when lexing fails.
|
|
*/
|
|
function parser(str varchar2) return pljson;
|
|
function parse_list(str varchar2) return pljson_list;
|
|
function parse_any(str varchar2) return pljson_element;
|
|
function parser(str clob) return pljson;
|
|
function parse_list(str clob) return pljson_list;
|
|
function parse_any(str clob) return pljson_element;
|
|
procedure remove_duplicates(obj in out nocopy pljson);
|
|
function get_version return varchar2;
|
|
|
|
end pljson_parser;```
|
|
|
|
## Package Body
|
|
|
|
```sql
|
|
package body pljson_parser as
|
|
/*
|
|
Copyright (c) 2009 Jonas Krogsboell
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
decimalpoint varchar2(1 char) := '.';
|
|
|
|
/* moved to package spec
|
|
ucs2_exception EXCEPTION;
|
|
pragma exception_init(ucs2_exception, -22831);
|
|
*/
|
|
|
|
function lengthcc(buf clob) return number as
|
|
offset number := 0;
|
|
len number := 0;
|
|
src varchar2(32767);
|
|
src_len number;
|
|
begin
|
|
while true loop
|
|
begin
|
|
src := dbms_lob.substr(buf, 4000, offset+1);
|
|
exception
|
|
when ucs2_exception then
|
|
src := dbms_lob.substr(buf, 3999, offset+1);
|
|
end;
|
|
exit when src is null;
|
|
len := len + length(src);
|
|
offset := offset + length2(src);
|
|
--dbms_output.put_line('offset = ' || offset || ' len = ' || len);
|
|
end loop;
|
|
return len;
|
|
end;
|
|
|
|
procedure update_decimalpoint as
|
|
begin
|
|
select substr(value, 1, 1)
|
|
into decimalpoint
|
|
from nls_session_parameters
|
|
where parameter = 'NLS_NUMERIC_CHARACTERS';
|
|
end update_decimalpoint;
|
|
|
|
/* type json_src is record (len number, offset number, src varchar2(32767), s_clob clob); */
|
|
/* assertions
|
|
offset: contains 0-base offset of buffer,
|
|
so 1-st entry is offset + 1, 4000-th entry = offset + 4000
|
|
src: contains offset + 1 .. offset + 4000, ex. 1..4000, 4001..8000, etc.
|
|
*/
|
|
function next_char(indx number, s in out nocopy json_src) return varchar2 as
|
|
begin
|
|
if (indx > s.len) then return null; end if;
|
|
|
|
--right offset?
|
|
/* if (indx > 4000 + s.offset or indx < s.offset) then */
|
|
/* fix for issue #37 */
|
|
/* code before fix for issue #169
|
|
if (indx > 4000 + s.offset or indx <= s.offset) then
|
|
s.offset := indx - (indx mod 4000);
|
|
-- addon fix for issue #37
|
|
if s.offset = indx then
|
|
s.offset := s.offset - 4000;
|
|
end if;
|
|
s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
|
|
end if;
|
|
--read from s.src
|
|
return substr(s.src, indx-s.offset, 1);
|
|
*/
|
|
|
|
/* use of length, so works correctly for 4-byte unicode characters (issue #169) */
|
|
/* lengthc does not work (issue #190) */
|
|
if (indx > length(s.src) + s.offset_chars) then
|
|
while (indx > length(s.src) + s.offset_chars) loop
|
|
s.offset_chars := s.offset_chars + length(s.src);
|
|
s.offset := s.offset + length2(s.src);
|
|
/* exception check, so works correctly for 4-byte unicode characters (issue #169) */
|
|
begin
|
|
s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
|
|
exception
|
|
when ucs2_exception then
|
|
s.src := dbms_lob.substr(s.s_clob, 3999, s.offset+1);
|
|
end;
|
|
end loop;
|
|
elsif (indx <= s.offset_chars) then
|
|
s.offset_chars := 0;
|
|
s.offset := 0;
|
|
/* exception check, so works correctly for 4-byte unicode characters (issue #169) */
|
|
begin
|
|
s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
|
|
exception
|
|
when ucs2_exception then
|
|
s.src := dbms_lob.substr(s.s_clob, 3999, s.offset+1);
|
|
end;
|
|
while (indx > length(s.src) + s.offset_chars) loop
|
|
s.offset_chars := s.offset_chars + length(s.src);
|
|
s.offset := s.offset + length2(s.src);
|
|
/* exception check, so works correctly for 4-byte unicode characters (issue #169) */
|
|
begin
|
|
s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
|
|
exception
|
|
when ucs2_exception then
|
|
s.src := dbms_lob.substr(s.s_clob, 3999, s.offset+1);
|
|
end;
|
|
end loop;
|
|
end if;
|
|
--dbms_output.put_line('indx: ' || indx || ' offset: ' || s.offset || ' (chars: ' || s.offset_chars || ') src chars: ' || length(s.src));
|
|
--read from s.src
|
|
return substr(s.src, indx-s.offset_chars, 1);
|
|
end;
|
|
|
|
function next_char2(indx number, s in out nocopy json_src, amount number default 1) return varchar2 as
|
|
buf varchar2(32767) := '';
|
|
begin
|
|
for i in 1..amount loop
|
|
buf := buf || next_char(indx-1+i, s);
|
|
end loop;
|
|
return buf;
|
|
end;
|
|
|
|
function prepareClob(buf clob) return pljson_parser.json_src as
|
|
temp pljson_parser.json_src;
|
|
begin
|
|
temp.s_clob := buf;
|
|
temp.offset_chars := 0;
|
|
temp.offset := 0;
|
|
/* exception check, so works correctly for 4-byte unicode characters (issue #169) */
|
|
begin
|
|
temp.src := dbms_lob.substr(buf, 4000, temp.offset+1);
|
|
exception
|
|
when ucs2_exception then
|
|
temp.src := dbms_lob.substr(buf, 3999, temp.offset+1);
|
|
end;
|
|
/* use of lengthcc, so works correctly for 4-byte unicode characters (issue #169) */
|
|
temp.len := lengthcc(buf); --dbms_lob.getlength(buf);
|
|
return temp;
|
|
end;
|
|
|
|
function prepareVarchar2(buf varchar2) return pljson_parser.json_src as
|
|
temp pljson_parser.json_src;
|
|
begin
|
|
temp.s_clob := buf;
|
|
temp.offset_chars := 0;
|
|
temp.offset := 0;
|
|
temp.src := substr(buf, 1, 4000);
|
|
temp.len := length(buf);
|
|
return temp;
|
|
end;
|
|
|
|
procedure debug(text varchar2) as
|
|
begin
|
|
dbms_output.put_line(text);
|
|
end;
|
|
|
|
procedure print_token(t rToken) as
|
|
begin
|
|
dbms_output.put_line('Line: '||t.line||' - Column: '||t.col||' - Type: '||t.type_name||' - Content: '||t.data);
|
|
end print_token;
|
|
|
|
/* SCANNER FUNCTIONS START */
|
|
procedure s_error(text varchar2, line number, col number) as
|
|
begin
|
|
raise_application_error(-20100, 'JSON Scanner exception @ line: '||line||' column: '||col||' - '||text);
|
|
end;
|
|
|
|
procedure s_error(text varchar2, tok rToken) as
|
|
begin
|
|
raise_application_error(-20100, 'JSON Scanner exception @ line: '||tok.line||' column: '||tok.col||' - '||text);
|
|
end;
|
|
|
|
function mt(t varchar2, l pls_integer, c pls_integer, d varchar2) return rToken as
|
|
token rToken;
|
|
begin
|
|
token.type_name := t;
|
|
token.line := l;
|
|
token.col := c;
|
|
token.data := d;
|
|
return token;
|
|
end;
|
|
|
|
function lexNumber(jsrc in out nocopy json_src, tok in out nocopy rToken, indx in out nocopy pls_integer) return pls_integer as
|
|
numbuf varchar2(4000) := '';
|
|
buf varchar2(4);
|
|
checkLoop boolean;
|
|
begin
|
|
buf := next_char(indx, jsrc);
|
|
if (buf = '-') then numbuf := '-'; indx := indx + 1; end if;
|
|
buf := next_char(indx, jsrc);
|
|
--0 or [1-9]([0-9])*
|
|
if (buf = '0') then
|
|
numbuf := numbuf || '0'; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
elsif (buf >= '1' and buf <= '9') then
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
--read digits
|
|
buf := next_char(indx, jsrc);
|
|
while (buf >= '0' and buf <= '9') loop
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
end loop;
|
|
end if;
|
|
--fraction
|
|
if (buf = '.') then
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
checkLoop := FALSE;
|
|
while (buf >= '0' and buf <= '9') loop
|
|
checkLoop := TRUE;
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
end loop;
|
|
if (not checkLoop) then
|
|
s_error('Expected: digits in fraction', tok);
|
|
end if;
|
|
end if;
|
|
--exp part
|
|
if (buf in ('e', 'E')) then
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
if (buf = '+' or buf = '-') then
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
end if;
|
|
checkLoop := FALSE;
|
|
while (buf >= '0' and buf <= '9') loop
|
|
checkLoop := TRUE;
|
|
numbuf := numbuf || buf; indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
end loop;
|
|
if (not checkLoop) then
|
|
s_error('Expected: digits in exp', tok);
|
|
end if;
|
|
end if;
|
|
|
|
tok.data := numbuf;
|
|
return indx;
|
|
end lexNumber;
|
|
|
|
-- [a-zA-Z]([a-zA-Z0-9])*
|
|
function lexName(jsrc in out nocopy json_src, tok in out nocopy rToken, indx in out nocopy pls_integer) return pls_integer as
|
|
varbuf varchar2(32767) := '';
|
|
buf varchar(4);
|
|
num number;
|
|
begin
|
|
buf := next_char(indx, jsrc);
|
|
while (REGEXP_LIKE(buf, '^[[:alnum:]\_]$', 'i')) loop
|
|
varbuf := varbuf || buf;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
if (buf is null) then
|
|
goto retname;
|
|
--debug('Premature string ending');
|
|
end if;
|
|
end loop;
|
|
<<retname>>
|
|
--could check for reserved keywords here
|
|
--debug(varbuf);
|
|
tok.data := varbuf;
|
|
return indx-1;
|
|
end lexName;
|
|
|
|
procedure updateClob(v_extended in out nocopy clob, v_str varchar2) as
|
|
begin
|
|
/* use of length2, so works correctly for 4-byte unicode characters (issue #169) */
|
|
dbms_lob.writeappend(v_extended, length2(v_str), v_str);
|
|
end updateClob;
|
|
|
|
function lexString(jsrc in out nocopy json_src, tok in out nocopy rToken, indx in out nocopy pls_integer, endChar char) return pls_integer as
|
|
v_extended clob := null; v_count number := 0;
|
|
varbuf varchar2(32767) := '';
|
|
buf varchar(4);
|
|
wrong boolean;
|
|
max_string_chars number := 5000; /* chunk size, less than this number may be copied */
|
|
begin
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
while (buf != endChar) loop
|
|
--clob control
|
|
if (v_count > 8191) then --crazy oracle error (16383 is the highest working length with unistr - 8192 choosen to be safe)
|
|
if (v_extended is null) then
|
|
v_extended := empty_clob();
|
|
dbms_lob.createtemporary(v_extended, true);
|
|
end if;
|
|
updateClob(v_extended, unistr(varbuf));
|
|
varbuf := ''; v_count := 0;
|
|
end if;
|
|
if (buf = Chr(13) or buf = CHR(9) or buf = CHR(10)) then
|
|
s_error('Control characters not allowed (CHR(9),CHR(10),CHR(13))', tok);
|
|
end if;
|
|
if (buf = '\') then
|
|
--varbuf := varbuf || buf;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
case
|
|
when buf in ('\') then
|
|
varbuf := varbuf || buf || buf; v_count := v_count + 2;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
when buf in ('"', '/') then
|
|
varbuf := varbuf || buf; v_count := v_count + 1;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
when buf = '''' then
|
|
if (json_strict = false) then
|
|
varbuf := varbuf || buf; v_count := v_count + 1;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
else
|
|
s_error('strictmode - expected: " \ / b f n r t u ', tok);
|
|
end if;
|
|
when buf in ('b', 'f', 'n', 'r', 't') then
|
|
--backspace b = U+0008
|
|
--formfeed f = U+000C
|
|
--newline n = U+000A
|
|
--carret r = U+000D
|
|
--tabulator t = U+0009
|
|
case buf
|
|
when 'b' then varbuf := varbuf || chr(8);
|
|
when 'f' then varbuf := varbuf || chr(12);
|
|
when 'n' then varbuf := varbuf || chr(10);
|
|
when 'r' then varbuf := varbuf || chr(13);
|
|
when 't' then varbuf := varbuf || chr(9);
|
|
end case;
|
|
--varbuf := varbuf || buf;
|
|
v_count := v_count + 1;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
when buf = 'u' then
|
|
--four hexadecimal chars
|
|
declare
|
|
four varchar2(4);
|
|
begin
|
|
four := next_char2(indx+1, jsrc, 4);
|
|
wrong := FALSE;
|
|
if (upper(substr(four, 1, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
|
|
if (upper(substr(four, 2, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
|
|
if (upper(substr(four, 3, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
|
|
if (upper(substr(four, 4, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
|
|
if (wrong) then
|
|
s_error('expected: " \u([0-9][A-F]){4}', tok);
|
|
end if;
|
|
-- varbuf := varbuf || buf || four;
|
|
varbuf := varbuf || '\'||four;--chr(to_number(four,'XXXX'));
|
|
v_count := v_count + 5;
|
|
indx := indx + 5;
|
|
buf := next_char(indx, jsrc);
|
|
end;
|
|
else
|
|
s_error('expected: " \ / b f n r t u ', tok);
|
|
end case;
|
|
else
|
|
varbuf := varbuf || buf; v_count := v_count + 1;
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc);
|
|
end if;
|
|
end loop;
|
|
|
|
if (buf is null) then
|
|
s_error('string ending not found', tok);
|
|
--debug('Premature string ending');
|
|
end if;
|
|
|
|
--debug(varbuf);
|
|
--dbms_output.put_line(varbuf);
|
|
if (v_extended is not null) then
|
|
updateClob(v_extended, unistr(varbuf));
|
|
tok.data_overflow := v_extended;
|
|
-- tok.data := dbms_lob.substr(v_extended, 1, 32767);
|
|
/* may read less than "max_string_chars" characters but it's a sample so doesn't matter */
|
|
dbms_lob.read(v_extended, max_string_chars, 1, tok.data);
|
|
else
|
|
tok.data := unistr(varbuf);
|
|
end if;
|
|
return indx;
|
|
end lexString;
|
|
|
|
/* scanner tokens:
|
|
'{', '}', ',', ':', '[', ']', STRING, NUMBER, TRUE, FALSE, NULL
|
|
*/
|
|
function lexer(jsrc in out nocopy json_src) return lTokens as
|
|
tokens lTokens;
|
|
indx pls_integer := 1;
|
|
tok_indx pls_integer := 1;
|
|
buf varchar2(4);
|
|
lin_no number := 1;
|
|
col_no number := 0;
|
|
begin
|
|
while (indx <= jsrc.len) loop
|
|
--read into buf
|
|
buf := next_char(indx, jsrc);
|
|
col_no := col_no + 1;
|
|
--convert to switch case
|
|
case
|
|
when buf = '{' then tokens(tok_indx) := mt('{', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
when buf = '}' then tokens(tok_indx) := mt('}', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
when buf = ',' then tokens(tok_indx) := mt(',', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
when buf = ':' then tokens(tok_indx) := mt(':', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
when buf = '[' then tokens(tok_indx) := mt('[', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
when buf = ']' then tokens(tok_indx) := mt(']', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
when buf = 't' then
|
|
if (next_char2(indx, jsrc, 4) != 'true') then
|
|
if (json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i')) then
|
|
tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
|
|
indx := lexName(jsrc, tokens(tok_indx), indx);
|
|
col_no := col_no + length(tokens(tok_indx).data) + 1;
|
|
tok_indx := tok_indx + 1;
|
|
else
|
|
s_error('Expected: ''true''', lin_no, col_no);
|
|
end if;
|
|
else
|
|
tokens(tok_indx) := mt('TRUE', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
indx := indx + 3;
|
|
col_no := col_no + 3;
|
|
end if;
|
|
when buf = 'n' then
|
|
if (next_char2(indx, jsrc, 4) != 'null') then
|
|
if (json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i')) then
|
|
tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
|
|
indx := lexName(jsrc, tokens(tok_indx), indx);
|
|
col_no := col_no + length(tokens(tok_indx).data) + 1;
|
|
tok_indx := tok_indx + 1;
|
|
else
|
|
s_error('Expected: ''null''', lin_no, col_no);
|
|
end if;
|
|
else
|
|
tokens(tok_indx) := mt('NULL', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
indx := indx + 3;
|
|
col_no := col_no + 3;
|
|
end if;
|
|
when buf = 'f' then
|
|
if (next_char2(indx, jsrc, 5) != 'false') then
|
|
if (json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i')) then
|
|
tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
|
|
indx := lexName(jsrc, tokens(tok_indx), indx);
|
|
col_no := col_no + length(tokens(tok_indx).data) + 1;
|
|
tok_indx := tok_indx + 1;
|
|
else
|
|
s_error('Expected: ''false''', lin_no, col_no);
|
|
end if;
|
|
else
|
|
tokens(tok_indx) := mt('FALSE', lin_no, col_no, null); tok_indx := tok_indx + 1;
|
|
indx := indx + 4;
|
|
col_no := col_no + 4;
|
|
end if;
|
|
/* -- 9 = TAB, 10 = \n, 13 = \r (Linux = \n, Windows = \r\n, Mac = \r */
|
|
when (buf = Chr(10)) then --linux newlines
|
|
lin_no := lin_no + 1;
|
|
col_no := 0;
|
|
|
|
when (buf = Chr(13)) then --Windows or Mac way
|
|
lin_no := lin_no + 1;
|
|
col_no := 0;
|
|
if (jsrc.len >= indx+1) then -- better safe than sorry
|
|
buf := next_char(indx+1, jsrc);
|
|
if (buf = Chr(10)) then --\r\n
|
|
indx := indx + 1;
|
|
end if;
|
|
end if;
|
|
|
|
when (buf = CHR(9)) then null; --tabbing
|
|
when (buf in ('-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) then --number
|
|
tokens(tok_indx) := mt('NUMBER', lin_no, col_no, null);
|
|
indx := lexNumber(jsrc, tokens(tok_indx), indx)-1;
|
|
col_no := col_no + length(tokens(tok_indx).data);
|
|
tok_indx := tok_indx + 1;
|
|
when buf = '"' then --string
|
|
tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
|
|
indx := lexString(jsrc, tokens(tok_indx), indx, '"');
|
|
col_no := col_no + length(tokens(tok_indx).data) + 1;
|
|
tok_indx := tok_indx + 1;
|
|
when buf = '''' and json_strict = false then --string
|
|
tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
|
|
indx := lexString(jsrc, tokens(tok_indx), indx, '''');
|
|
col_no := col_no + length(tokens(tok_indx).data) + 1; --hovsa her
|
|
tok_indx := tok_indx + 1;
|
|
when json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i') then
|
|
tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
|
|
indx := lexName(jsrc, tokens(tok_indx), indx);
|
|
if (tokens(tok_indx).data_overflow is not null) then
|
|
/* use of lengthcc, so works correctly for 4-byte unicode characters (issue #169) */
|
|
col_no := col_no + lengthcc(tokens(tok_indx).data_overflow) + 1; --dbms_lob.getlength(tokens(tok_indx).data_overflow) + 1;
|
|
else
|
|
col_no := col_no + length(tokens(tok_indx).data) + 1;
|
|
end if;
|
|
tok_indx := tok_indx + 1;
|
|
when json_strict = false and buf||next_char(indx+1, jsrc) = '/*' then --strip comments
|
|
declare
|
|
saveindx number := indx;
|
|
un_esc clob;
|
|
begin
|
|
indx := indx + 1;
|
|
loop
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc)||next_char(indx+1, jsrc);
|
|
exit when buf = '*/';
|
|
exit when buf is null;
|
|
end loop;
|
|
|
|
if (indx = saveindx+2) then
|
|
--enter unescaped mode
|
|
--dbms_output.put_line('Entering unescaped mode');
|
|
un_esc := empty_clob();
|
|
dbms_lob.createtemporary(un_esc, true);
|
|
indx := indx + 1;
|
|
loop
|
|
indx := indx + 1;
|
|
buf := next_char(indx, jsrc)||next_char(indx+1, jsrc)||next_char(indx+2, jsrc)||next_char(indx+3, jsrc);
|
|
exit when buf = '/**/';
|
|
if buf is null then
|
|
s_error('Unexpected sequence /**/ to end unescaped data: '||buf, lin_no, col_no);
|
|
end if;
|
|
buf := next_char(indx, jsrc);
|
|
/* use of length2, so works correctly for 4-byte unicode characters (issue #169) */
|
|
dbms_lob.writeappend(un_esc, length2(buf), buf);
|
|
end loop;
|
|
tokens(tok_indx) := mt('ESTRING', lin_no, col_no, null);
|
|
tokens(tok_indx).data_overflow := un_esc;
|
|
/* use of lengthcc, so works correctly for 4-byte unicode characters (issue #169) */
|
|
col_no := col_no + lengthcc(un_esc) + 1; --dbms_lob.getlength(un_esc) + 1; --note: line count won't work properly
|
|
tok_indx := tok_indx + 1;
|
|
indx := indx + 2;
|
|
end if;
|
|
|
|
indx := indx + 1;
|
|
end;
|
|
when buf = ' ' then null; --space
|
|
else
|
|
s_error('Unexpected char: '||buf, lin_no, col_no);
|
|
end case;
|
|
|
|
indx := indx + 1;
|
|
end loop;
|
|
|
|
return tokens;
|
|
end lexer;
|
|
|
|
/* SCANNER END */
|
|
|
|
/* PARSER FUNCTIONS START */
|
|
procedure p_error(text varchar2, tok rToken) as
|
|
begin
|
|
raise_application_error(-20101, 'JSON Parser exception @ line: '||tok.line||' column: '||tok.col||' - '||text);
|
|
end;
|
|
|
|
function parseArr(tokens lTokens, indx in out nocopy pls_integer) return pljson_list as
|
|
e_arr pljson_element_array := pljson_element_array();
|
|
ret_list pljson_list := pljson_list();
|
|
v_count number := 0;
|
|
tok rToken;
|
|
pv pljson_number;
|
|
begin
|
|
--value, value, value ]
|
|
if (indx > tokens.count) then p_error('more elements in array was excepted', tok); end if;
|
|
tok := tokens(indx);
|
|
while (tok.type_name != ']') loop
|
|
e_arr.extend;
|
|
v_count := v_count + 1;
|
|
case tok.type_name
|
|
when 'TRUE' then e_arr(v_count) := pljson_bool(true);
|
|
when 'FALSE' then e_arr(v_count) := pljson_bool(false);
|
|
when 'NULL' then e_arr(v_count) := pljson_null();
|
|
when 'STRING' then e_arr(v_count) := case when tok.data_overflow is not null then pljson_string(tok.data_overflow) else pljson_string(tok.data) end;
|
|
when 'ESTRING' then e_arr(v_count) := pljson_string(tok.data_overflow, false);
|
|
/* E.I.Sarmas (github.com/dsnz) 2016-12-01 support for binary_double numbers */
|
|
--when 'NUMBER' then e_arr(v_count) := pljson_number(to_number(replace(tok.data, '.', decimalpoint)));
|
|
when 'NUMBER' then
|
|
pv := pljson_number(0);
|
|
pv.parse_number(replace(tok.data, '.', decimalpoint));
|
|
e_arr(v_count) := pv;
|
|
when '[' then
|
|
declare e_list pljson_list; begin
|
|
indx := indx + 1;
|
|
e_list := parseArr(tokens, indx);
|
|
e_arr(v_count) := e_list;
|
|
end;
|
|
when '{' then
|
|
indx := indx + 1;
|
|
e_arr(v_count) := parseObj(tokens, indx);
|
|
else
|
|
p_error('Expected a value', tok);
|
|
end case;
|
|
indx := indx + 1;
|
|
if (indx > tokens.count) then p_error('] not found', tok); end if;
|
|
tok := tokens(indx);
|
|
if (tok.type_name = ',') then --advance
|
|
indx := indx + 1;
|
|
if (indx > tokens.count) then p_error('more elements in array was excepted', tok); end if;
|
|
tok := tokens(indx);
|
|
if (tok.type_name = ']') then --premature exit
|
|
p_error('Premature exit in array', tok);
|
|
end if;
|
|
elsif (tok.type_name != ']') then --error
|
|
p_error('Expected , or ]', tok);
|
|
end if;
|
|
|
|
end loop;
|
|
ret_list.list_data := e_arr;
|
|
return ret_list;
|
|
end parseArr;
|
|
|
|
function parseMem(tokens lTokens, indx in out pls_integer, mem_name varchar2, mem_indx number) return pljson_element as
|
|
mem pljson_element;
|
|
tok rToken;
|
|
pv pljson_number;
|
|
begin
|
|
tok := tokens(indx);
|
|
case tok.type_name
|
|
when 'TRUE' then mem := pljson_bool(true);
|
|
when 'FALSE' then mem := pljson_bool(false);
|
|
when 'NULL' then mem := pljson_null();
|
|
when 'STRING' then mem := case when tok.data_overflow is not null then pljson_string(tok.data_overflow) else pljson_string(tok.data) end;
|
|
when 'ESTRING' then mem := pljson_string(tok.data_overflow, false);
|
|
/* E.I.Sarmas (github.com/dsnz) 2016-12-01 support for binary_double numbers */
|
|
--when 'NUMBER' then mem := pljson_number(to_number(replace(tok.data, '.', decimalpoint)));
|
|
when 'NUMBER' then
|
|
pv := pljson_number(0);
|
|
pv.parse_number(replace(tok.data, '.', decimalpoint));
|
|
mem := pv;
|
|
when '[' then
|
|
declare
|
|
e_list pljson_list;
|
|
begin
|
|
indx := indx + 1;
|
|
e_list := parseArr(tokens, indx);
|
|
mem := e_list;
|
|
end;
|
|
when '{' then
|
|
indx := indx + 1;
|
|
mem := parseObj(tokens, indx);
|
|
else
|
|
p_error('Found '||tok.type_name, tok);
|
|
end case;
|
|
mem.mapname := mem_name;
|
|
mem.mapindx := mem_indx;
|
|
|
|
indx := indx + 1;
|
|
return mem;
|
|
end parseMem;
|
|
|
|
/*procedure test_duplicate_members(arr in json_member_array, mem_name in varchar2, wheretok rToken) as
|
|
begin
|
|
for i in 1 .. arr.count loop
|
|
if (arr(i).member_name = mem_name) then
|
|
p_error('Duplicate member name', wheretok);
|
|
end if;
|
|
end loop;
|
|
end test_duplicate_members;*/
|
|
|
|
function parseObj(tokens lTokens, indx in out nocopy pls_integer) return pljson as
|
|
type memmap is table of number index by varchar2(4000); -- i've read somewhere that this is not possible - but it is!
|
|
mymap memmap;
|
|
nullelemfound boolean := false;
|
|
|
|
obj pljson;
|
|
tok rToken;
|
|
mem_name varchar(4000);
|
|
arr pljson_element_array := pljson_element_array();
|
|
begin
|
|
--what to expect?
|
|
while (indx <= tokens.count) loop
|
|
tok := tokens(indx);
|
|
--debug('E: '||tok.type_name);
|
|
case tok.type_name
|
|
when 'STRING' then
|
|
--member
|
|
mem_name := substr(tok.data, 1, 4000);
|
|
begin
|
|
if (mem_name is null) then
|
|
if (nullelemfound) then
|
|
p_error('Duplicate empty member: ', tok);
|
|
else
|
|
nullelemfound := true;
|
|
end if;
|
|
elsif (mymap(mem_name) is not null) then
|
|
p_error('Duplicate member name: '||mem_name, tok);
|
|
end if;
|
|
exception
|
|
when no_data_found then mymap(mem_name) := 1;
|
|
end;
|
|
|
|
indx := indx + 1;
|
|
if (indx > tokens.count) then p_error('Unexpected end of input', tok); end if;
|
|
tok := tokens(indx);
|
|
indx := indx + 1;
|
|
if (indx > tokens.count) then p_error('Unexpected end of input', tok); end if;
|
|
if (tok.type_name = ':') then
|
|
--parse
|
|
declare
|
|
jmb pljson_element;
|
|
x number;
|
|
begin
|
|
x := arr.count + 1;
|
|
jmb := parseMem(tokens, indx, mem_name, x);
|
|
arr.extend;
|
|
arr(x) := jmb;
|
|
end;
|
|
else
|
|
p_error('Expected '':''', tok);
|
|
end if;
|
|
--move indx forward if ',' is found
|
|
if (indx > tokens.count) then p_error('Unexpected end of input', tok); end if;
|
|
|
|
tok := tokens(indx);
|
|
if (tok.type_name = ',') then
|
|
--debug('found ,');
|
|
indx := indx + 1;
|
|
tok := tokens(indx);
|
|
if (tok.type_name = '}') then --premature exit
|
|
p_error('Premature exit in json object', tok);
|
|
end if;
|
|
elsif (tok.type_name != '}') then
|
|
p_error('A comma seperator is probably missing', tok);
|
|
end if;
|
|
when '}' then
|
|
obj := pljson();
|
|
obj.json_data := arr;
|
|
return obj;
|
|
else
|
|
p_error('Expected string or }', tok);
|
|
end case;
|
|
end loop;
|
|
|
|
p_error('} not found', tokens(indx-1));
|
|
|
|
return obj;
|
|
|
|
end;
|
|
|
|
function parser(str varchar2) return pljson as
|
|
tokens lTokens;
|
|
obj pljson;
|
|
indx pls_integer := 1;
|
|
jsrc json_src;
|
|
begin
|
|
update_decimalpoint();
|
|
jsrc := prepareVarchar2(str);
|
|
tokens := lexer(jsrc);
|
|
if (tokens(indx).type_name = '{') then
|
|
indx := indx + 1;
|
|
obj := parseObj(tokens, indx);
|
|
else
|
|
raise_application_error(-20101, 'JSON Parser exception - no { start found');
|
|
end if;
|
|
if (tokens.count != indx) then
|
|
p_error('} should end the JSON object', tokens(indx));
|
|
end if;
|
|
|
|
return obj;
|
|
end parser;
|
|
|
|
function parse_list(str varchar2) return pljson_list as
|
|
tokens lTokens;
|
|
obj pljson_list;
|
|
indx pls_integer := 1;
|
|
jsrc json_src;
|
|
begin
|
|
update_decimalpoint();
|
|
jsrc := prepareVarchar2(str);
|
|
tokens := lexer(jsrc);
|
|
if (tokens(indx).type_name = '[') then
|
|
indx := indx + 1;
|
|
obj := parseArr(tokens, indx);
|
|
else
|
|
raise_application_error(-20101, 'JSON List Parser exception - no [ start found');
|
|
end if;
|
|
if (tokens.count != indx) then
|
|
p_error('] should end the JSON List object', tokens(indx));
|
|
end if;
|
|
|
|
return obj;
|
|
end parse_list;
|
|
|
|
function parse_list(str clob) return pljson_list as
|
|
tokens lTokens;
|
|
obj pljson_list;
|
|
indx pls_integer := 1;
|
|
jsrc json_src;
|
|
begin
|
|
update_decimalpoint();
|
|
jsrc := prepareClob(str);
|
|
tokens := lexer(jsrc);
|
|
if (tokens(indx).type_name = '[') then
|
|
indx := indx + 1;
|
|
obj := parseArr(tokens, indx);
|
|
else
|
|
raise_application_error(-20101, 'JSON List Parser exception - no [ start found');
|
|
end if;
|
|
if (tokens.count != indx) then
|
|
p_error('] should end the JSON List object', tokens(indx));
|
|
end if;
|
|
|
|
return obj;
|
|
end parse_list;
|
|
|
|
function parser(str clob) return pljson as
|
|
tokens lTokens;
|
|
obj pljson;
|
|
indx pls_integer := 1;
|
|
jsrc json_src;
|
|
begin
|
|
update_decimalpoint();
|
|
--dbms_output.put_line('Using clob');
|
|
jsrc := prepareClob(str);
|
|
tokens := lexer(jsrc);
|
|
if (tokens(indx).type_name = '{') then
|
|
indx := indx + 1;
|
|
obj := parseObj(tokens, indx);
|
|
else
|
|
raise_application_error(-20101, 'JSON Parser exception - no { start found');
|
|
end if;
|
|
if (tokens.count != indx) then
|
|
p_error('} should end the JSON object', tokens(indx));
|
|
end if;
|
|
|
|
return obj;
|
|
end parser;
|
|
|
|
function parse_any(str varchar2) return pljson_element as
|
|
tokens lTokens;
|
|
obj pljson_list;
|
|
ret pljson_element;
|
|
indx pls_integer := 1;
|
|
jsrc json_src;
|
|
begin
|
|
update_decimalpoint();
|
|
jsrc := prepareVarchar2(str);
|
|
tokens := lexer(jsrc);
|
|
tokens(tokens.count+1).type_name := ']';
|
|
obj := parseArr(tokens, indx);
|
|
if (tokens.count != indx) then
|
|
p_error('] should end the JSON List object', tokens(indx));
|
|
end if;
|
|
|
|
return obj.head();
|
|
end parse_any;
|
|
|
|
function parse_any(str clob) return pljson_element as
|
|
tokens lTokens;
|
|
obj pljson_list;
|
|
indx pls_integer := 1;
|
|
jsrc json_src;
|
|
begin
|
|
update_decimalpoint();
|
|
jsrc := prepareClob(str);
|
|
tokens := lexer(jsrc);
|
|
tokens(tokens.count+1).type_name := ']';
|
|
obj := parseArr(tokens, indx);
|
|
if (tokens.count != indx) then
|
|
p_error('] should end the JSON List object', tokens(indx));
|
|
end if;
|
|
|
|
return obj.head();
|
|
end parse_any;
|
|
|
|
/* last entry is the one to keep */
|
|
procedure remove_duplicates(obj in out nocopy pljson) as
|
|
type memberlist is table of pljson_element index by varchar2(4000);
|
|
members memberlist;
|
|
nulljsonvalue pljson_element := null;
|
|
validated pljson := pljson();
|
|
indx varchar2(4000);
|
|
begin
|
|
for i in 1 .. obj.count loop
|
|
if (obj.get(i).mapname is null) then
|
|
nulljsonvalue := obj.get(i);
|
|
else
|
|
members(obj.get(i).mapname) := obj.get(i);
|
|
end if;
|
|
end loop;
|
|
|
|
validated.check_duplicate(false);
|
|
indx := members.first;
|
|
loop
|
|
exit when indx is null;
|
|
validated.put(indx, members(indx));
|
|
indx := members.next(indx);
|
|
end loop;
|
|
if (nulljsonvalue is not null) then
|
|
validated.put('', nulljsonvalue);
|
|
end if;
|
|
|
|
validated.check_for_duplicate := obj.check_for_duplicate;
|
|
|
|
obj := validated;
|
|
end;
|
|
|
|
function get_version return varchar2 as
|
|
begin
|
|
return 'PL/JSON 3.5.2';
|
|
end get_version;
|
|
|
|
end pljson_parser;```
|