Files
2025-12-17 13:02:12 +01:00

35 KiB

PLJSON_PARSER

Package Specification

package pljson_parser as
  /*
  Copyright (c) 2010 Jonas Krogsboell

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
  */

  /** Internal type for processing. */
  /* scanner tokens:
    '{', '}', ',', ':', '[', ']', STRING, NUMBER, TRUE, FALSE, NULL
  */
  type rToken IS RECORD (
    type_name VARCHAR2(7),
    line PLS_INTEGER,
    col PLS_INTEGER,
    data VARCHAR2(32767),
    data_overflow clob); -- max_string_size

  type lTokens is table of rToken index by pls_integer;
  type json_src is record (len number, offset number, offset_chars number, src varchar2(32767), s_clob clob);

  json_strict boolean not null := false;

  ucs2_exception EXCEPTION;
  pragma exception_init(ucs2_exception, -22831);

  function lengthcc(buf clob) return number;

  function next_char(indx number, s in out nocopy json_src) return varchar2;
  function next_char2(indx number, s in out nocopy json_src, amount number default 1) return varchar2;
  function parseObj(tokens lTokens, indx in out nocopy pls_integer) return pljson;

  function prepareClob(buf in clob) return pljson_parser.json_src;
  function prepareVarchar2(buf in varchar2) return pljson_parser.json_src;
  function lexer(jsrc in out nocopy json_src) return lTokens;
  procedure print_token(t rToken);

  /**
   * <p>Primary parsing method. It can parse a JSON object.</p>
   *
   * @return An instance of <code>pljson</code>.
   * @throws PARSER_ERROR -20101 when invalid input found.
   * @throws SCANNER_ERROR -20100 when lexing fails.
   */
  function parser(str varchar2) return pljson;
  function parse_list(str varchar2) return pljson_list;
  function parse_any(str varchar2) return pljson_element;
  function parser(str clob) return pljson;
  function parse_list(str clob) return pljson_list;
  function parse_any(str clob) return pljson_element;
  procedure remove_duplicates(obj in out nocopy pljson);
  function get_version return varchar2;

end pljson_parser;```

## Package Body

```sql
package body pljson_parser as
  /*
  Copyright (c) 2009 Jonas Krogsboell

  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
  */

  decimalpoint varchar2(1 char) := '.';

  /* moved to package spec
  ucs2_exception EXCEPTION;
  pragma exception_init(ucs2_exception, -22831);
  */

  function lengthcc(buf clob) return number as
   offset number := 0;
   len number := 0;
   src varchar2(32767);
   src_len number;
  begin
    while true loop
      begin
        src := dbms_lob.substr(buf, 4000, offset+1);
      exception
      when ucs2_exception then
        src := dbms_lob.substr(buf, 3999, offset+1);
      end;
      exit when src is null;
      len := len + length(src);
      offset := offset + length2(src);
      --dbms_output.put_line('offset = ' || offset || ' len = ' || len);
    end loop;
    return len;
  end;

  procedure update_decimalpoint as
  begin
    select substr(value, 1, 1)
    into decimalpoint
    from nls_session_parameters
    where parameter = 'NLS_NUMERIC_CHARACTERS';
  end update_decimalpoint;

  /* type json_src is record (len number, offset number, src varchar2(32767), s_clob clob); */
  /* assertions
    offset: contains 0-base offset of buffer,
      so 1-st entry is offset + 1, 4000-th entry = offset + 4000
    src: contains offset + 1 .. offset + 4000, ex. 1..4000, 4001..8000, etc.
  */
  function next_char(indx number, s in out nocopy json_src) return varchar2 as
  begin
    if (indx > s.len) then return null; end if;

    --right offset?
    /* if (indx > 4000 + s.offset or indx < s.offset) then */
    /* fix for issue #37 */
    /* code before fix for issue #169
    if (indx > 4000 + s.offset or indx <= s.offset) then
      s.offset := indx - (indx mod 4000);
      -- addon fix for issue #37
      if s.offset = indx then
        s.offset := s.offset - 4000;
      end if;
      s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
    end if;
    --read from s.src
    return substr(s.src, indx-s.offset, 1);
    */

    /* use of length, so works correctly for 4-byte unicode characters (issue #169) */
    /* lengthc does not work (issue #190) */
    if (indx > length(s.src) + s.offset_chars) then
      while (indx > length(s.src) + s.offset_chars) loop
        s.offset_chars := s.offset_chars + length(s.src);
        s.offset := s.offset + length2(s.src);
        /* exception check, so works correctly for 4-byte unicode characters (issue #169) */
        begin
          s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
        exception
        when ucs2_exception then
          s.src := dbms_lob.substr(s.s_clob, 3999, s.offset+1);
        end;
      end loop;
    elsif (indx <= s.offset_chars) then
      s.offset_chars := 0;
      s.offset := 0;
      /* exception check, so works correctly for 4-byte unicode characters (issue #169) */
      begin
        s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
      exception
      when ucs2_exception then
        s.src := dbms_lob.substr(s.s_clob, 3999, s.offset+1);
      end;
      while (indx > length(s.src) + s.offset_chars) loop
        s.offset_chars := s.offset_chars + length(s.src);
        s.offset := s.offset + length2(s.src);
        /* exception check, so works correctly for 4-byte unicode characters (issue #169) */
        begin
          s.src := dbms_lob.substr(s.s_clob, 4000, s.offset+1);
        exception
        when ucs2_exception then
          s.src := dbms_lob.substr(s.s_clob, 3999, s.offset+1);
        end;
      end loop;
    end if;
    --dbms_output.put_line('indx: ' || indx || ' offset: ' || s.offset || ' (chars: ' || s.offset_chars || ') src chars: ' || length(s.src));
    --read from s.src
    return substr(s.src, indx-s.offset_chars, 1);
  end;

  function next_char2(indx number, s in out nocopy json_src, amount number default 1) return varchar2 as
    buf varchar2(32767) := '';
  begin
    for i in 1..amount loop
      buf := buf || next_char(indx-1+i, s);
    end loop;
    return buf;
  end;

  function prepareClob(buf clob) return pljson_parser.json_src as
    temp pljson_parser.json_src;
  begin
    temp.s_clob := buf;
    temp.offset_chars := 0;
    temp.offset := 0;
    /* exception check, so works correctly for 4-byte unicode characters (issue #169) */
    begin
      temp.src := dbms_lob.substr(buf, 4000, temp.offset+1);
    exception
    when ucs2_exception then
      temp.src := dbms_lob.substr(buf, 3999, temp.offset+1);
    end;
    /* use of lengthcc, so works correctly for 4-byte unicode characters (issue #169) */
    temp.len := lengthcc(buf); --dbms_lob.getlength(buf);
    return temp;
  end;

  function prepareVarchar2(buf varchar2) return pljson_parser.json_src as
    temp pljson_parser.json_src;
  begin
    temp.s_clob := buf;
    temp.offset_chars := 0;
    temp.offset := 0;
    temp.src := substr(buf, 1, 4000);
    temp.len := length(buf);
    return temp;
  end;

  procedure debug(text varchar2) as
  begin
    dbms_output.put_line(text);
  end;

  procedure print_token(t rToken) as
  begin
    dbms_output.put_line('Line: '||t.line||' - Column: '||t.col||' - Type: '||t.type_name||' - Content: '||t.data);
  end print_token;

  /* SCANNER FUNCTIONS START */
  procedure s_error(text varchar2, line number, col number) as
  begin
    raise_application_error(-20100, 'JSON Scanner exception @ line: '||line||' column: '||col||' - '||text);
  end;

  procedure s_error(text varchar2, tok rToken) as
  begin
    raise_application_error(-20100, 'JSON Scanner exception @ line: '||tok.line||' column: '||tok.col||' - '||text);
  end;

  function mt(t varchar2, l pls_integer, c pls_integer, d varchar2) return rToken as
    token rToken;
  begin
    token.type_name := t;
    token.line := l;
    token.col := c;
    token.data := d;
    return token;
  end;

  function lexNumber(jsrc in out nocopy json_src, tok in out nocopy rToken, indx in out nocopy pls_integer) return pls_integer as
    numbuf varchar2(4000) := '';
    buf varchar2(4);
    checkLoop boolean;
  begin
    buf := next_char(indx, jsrc);
    if (buf = '-') then numbuf := '-'; indx := indx + 1; end if;
    buf := next_char(indx, jsrc);
    --0 or [1-9]([0-9])*
    if (buf = '0') then
      numbuf := numbuf || '0'; indx := indx + 1;
      buf := next_char(indx, jsrc);
    elsif (buf >= '1' and buf <= '9') then
      numbuf := numbuf || buf; indx := indx + 1;
      --read digits
      buf := next_char(indx, jsrc);
      while (buf >= '0' and buf <= '9') loop
        numbuf := numbuf || buf; indx := indx + 1;
        buf := next_char(indx, jsrc);
      end loop;
    end if;
    --fraction
    if (buf = '.') then
      numbuf := numbuf || buf; indx := indx + 1;
      buf := next_char(indx, jsrc);
      checkLoop := FALSE;
      while (buf >= '0' and buf <= '9') loop
        checkLoop := TRUE;
        numbuf := numbuf || buf; indx := indx + 1;
        buf := next_char(indx, jsrc);
      end loop;
      if (not checkLoop) then
        s_error('Expected: digits in fraction', tok);
      end if;
    end if;
    --exp part
    if (buf in ('e', 'E')) then
      numbuf := numbuf || buf; indx := indx + 1;
      buf := next_char(indx, jsrc);
      if (buf = '+' or buf = '-') then
        numbuf := numbuf || buf; indx := indx + 1;
        buf := next_char(indx, jsrc);
      end if;
      checkLoop := FALSE;
      while (buf >= '0' and buf <= '9') loop
        checkLoop := TRUE;
        numbuf := numbuf || buf; indx := indx + 1;
        buf := next_char(indx, jsrc);
      end loop;
      if (not checkLoop) then
        s_error('Expected: digits in exp', tok);
      end if;
    end if;

    tok.data := numbuf;
    return indx;
  end lexNumber;

  -- [a-zA-Z]([a-zA-Z0-9])*
  function lexName(jsrc in out nocopy json_src, tok in out nocopy rToken, indx in out nocopy pls_integer) return pls_integer as
    varbuf varchar2(32767) := '';
    buf varchar(4);
    num number;
  begin
    buf := next_char(indx, jsrc);
    while (REGEXP_LIKE(buf, '^[[:alnum:]\_]$', 'i')) loop
      varbuf := varbuf || buf;
      indx := indx + 1;
      buf := next_char(indx, jsrc);
      if (buf is null) then
        goto retname;
        --debug('Premature string ending');
      end if;
    end loop;
    <<retname>>
    --could check for reserved keywords here
    --debug(varbuf);
    tok.data := varbuf;
    return indx-1;
  end lexName;

  procedure updateClob(v_extended in out nocopy clob, v_str varchar2) as
  begin
    /* use of length2, so works correctly for 4-byte unicode characters (issue #169) */
    dbms_lob.writeappend(v_extended, length2(v_str), v_str);
  end updateClob;

  function lexString(jsrc in out nocopy json_src, tok in out nocopy rToken, indx in out nocopy pls_integer, endChar char) return pls_integer as
    v_extended clob := null; v_count number := 0;
    varbuf varchar2(32767) := '';
    buf varchar(4);
    wrong boolean;
    max_string_chars number := 5000; /* chunk size, less than this number may be copied */
  begin
    indx := indx + 1;
    buf := next_char(indx, jsrc);
    while (buf != endChar) loop
      --clob control
      if (v_count > 8191) then --crazy oracle error (16383 is the highest working length with unistr - 8192 choosen to be safe)
        if (v_extended is null) then
          v_extended := empty_clob();
          dbms_lob.createtemporary(v_extended, true);
        end if;
        updateClob(v_extended, unistr(varbuf));
        varbuf := ''; v_count := 0;
      end if;
      if (buf = Chr(13) or buf = CHR(9) or buf = CHR(10)) then
        s_error('Control characters not allowed (CHR(9),CHR(10),CHR(13))', tok);
      end if;
      if (buf = '\') then
        --varbuf := varbuf || buf;
        indx := indx + 1;
        buf := next_char(indx, jsrc);
        case
          when buf in ('\') then
            varbuf := varbuf || buf || buf; v_count := v_count + 2;
            indx := indx + 1;
            buf := next_char(indx, jsrc);
          when buf in ('"', '/') then
            varbuf := varbuf || buf; v_count := v_count + 1;
            indx := indx + 1;
            buf := next_char(indx, jsrc);
          when buf = '''' then
            if (json_strict = false) then
              varbuf := varbuf || buf; v_count := v_count + 1;
              indx := indx + 1;
              buf := next_char(indx, jsrc);
            else
              s_error('strictmode - expected: " \ / b f n r t u ', tok);
            end if;
          when buf in ('b', 'f', 'n', 'r', 't') then
            --backspace b = U+0008
            --formfeed  f = U+000C
            --newline   n = U+000A
            --carret    r = U+000D
            --tabulator t = U+0009
            case buf
            when 'b' then varbuf := varbuf || chr(8);
            when 'f' then varbuf := varbuf || chr(12);
            when 'n' then varbuf := varbuf || chr(10);
            when 'r' then varbuf := varbuf || chr(13);
            when 't' then varbuf := varbuf || chr(9);
            end case;
            --varbuf := varbuf || buf;
            v_count := v_count + 1;
            indx := indx + 1;
            buf := next_char(indx, jsrc);
          when buf = 'u' then
            --four hexadecimal chars
            declare
              four varchar2(4);
            begin
              four := next_char2(indx+1, jsrc, 4);
              wrong := FALSE;
              if (upper(substr(four, 1, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
              if (upper(substr(four, 2, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
              if (upper(substr(four, 3, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
              if (upper(substr(four, 4, 1)) not in ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','a','b','c','d','e','f')) then wrong := TRUE; end if;
              if (wrong) then
                s_error('expected: " \u([0-9][A-F]){4}', tok);
              end if;
--              varbuf := varbuf || buf || four;
              varbuf := varbuf || '\'||four;--chr(to_number(four,'XXXX'));
              v_count := v_count + 5;
              indx := indx + 5;
              buf := next_char(indx, jsrc);
              end;
          else
            s_error('expected: " \ / b f n r t u ', tok);
        end case;
      else
        varbuf := varbuf || buf; v_count := v_count + 1;
        indx := indx + 1;
        buf := next_char(indx, jsrc);
      end if;
    end loop;

    if (buf is null) then
      s_error('string ending not found', tok);
      --debug('Premature string ending');
    end if;

    --debug(varbuf);
    --dbms_output.put_line(varbuf);
    if (v_extended is not null) then
      updateClob(v_extended, unistr(varbuf));
      tok.data_overflow := v_extended;
      -- tok.data := dbms_lob.substr(v_extended, 1, 32767);
      /* may read less than "max_string_chars" characters but it's a sample so doesn't matter */
      dbms_lob.read(v_extended, max_string_chars, 1, tok.data);
    else
      tok.data := unistr(varbuf);
    end if;
    return indx;
  end lexString;

  /* scanner tokens:
    '{', '}', ',', ':', '[', ']', STRING, NUMBER, TRUE, FALSE, NULL
  */
  function lexer(jsrc in out nocopy json_src) return lTokens as
    tokens lTokens;
    indx pls_integer := 1;
    tok_indx pls_integer := 1;
    buf varchar2(4);
    lin_no number := 1;
    col_no number := 0;
  begin
    while (indx <= jsrc.len) loop
      --read into buf
      buf := next_char(indx, jsrc);
      col_no := col_no + 1;
      --convert to switch case
      case
        when buf = '{' then tokens(tok_indx) := mt('{', lin_no, col_no, null); tok_indx := tok_indx + 1;
        when buf = '}' then tokens(tok_indx) := mt('}', lin_no, col_no, null); tok_indx := tok_indx + 1;
        when buf = ',' then tokens(tok_indx) := mt(',', lin_no, col_no, null); tok_indx := tok_indx + 1;
        when buf = ':' then tokens(tok_indx) := mt(':', lin_no, col_no, null); tok_indx := tok_indx + 1;
        when buf = '[' then tokens(tok_indx) := mt('[', lin_no, col_no, null); tok_indx := tok_indx + 1;
        when buf = ']' then tokens(tok_indx) := mt(']', lin_no, col_no, null); tok_indx := tok_indx + 1;
        when buf = 't' then
          if (next_char2(indx, jsrc, 4) != 'true') then
            if (json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i')) then
              tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
              indx := lexName(jsrc, tokens(tok_indx), indx);
              col_no := col_no + length(tokens(tok_indx).data) + 1;
              tok_indx := tok_indx + 1;
            else
              s_error('Expected: ''true''', lin_no, col_no);
            end if;
          else
            tokens(tok_indx) := mt('TRUE', lin_no, col_no, null); tok_indx := tok_indx + 1;
            indx := indx + 3;
            col_no := col_no + 3;
          end if;
        when buf = 'n' then
          if (next_char2(indx, jsrc, 4) != 'null') then
            if (json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i')) then
              tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
              indx := lexName(jsrc, tokens(tok_indx), indx);
              col_no := col_no + length(tokens(tok_indx).data) + 1;
              tok_indx := tok_indx + 1;
            else
              s_error('Expected: ''null''', lin_no, col_no);
            end if;
          else
            tokens(tok_indx) := mt('NULL', lin_no, col_no, null); tok_indx := tok_indx + 1;
            indx := indx + 3;
            col_no := col_no + 3;
          end if;
        when buf = 'f' then
          if (next_char2(indx, jsrc, 5) != 'false') then
            if (json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i')) then
              tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
              indx := lexName(jsrc, tokens(tok_indx), indx);
              col_no := col_no + length(tokens(tok_indx).data) + 1;
              tok_indx := tok_indx + 1;
            else
              s_error('Expected: ''false''', lin_no, col_no);
            end if;
          else
            tokens(tok_indx) := mt('FALSE', lin_no, col_no, null); tok_indx := tok_indx + 1;
            indx := indx + 4;
            col_no := col_no + 4;
          end if;
        /* -- 9 = TAB, 10 = \n, 13 = \r (Linux = \n, Windows = \r\n, Mac = \r */
        when (buf = Chr(10)) then --linux newlines
          lin_no := lin_no + 1;
          col_no := 0;

        when (buf = Chr(13)) then --Windows or Mac way
          lin_no := lin_no + 1;
          col_no := 0;
          if (jsrc.len >= indx+1) then -- better safe than sorry
            buf := next_char(indx+1, jsrc);
            if (buf = Chr(10)) then --\r\n
              indx := indx + 1;
            end if;
          end if;

        when (buf = CHR(9)) then null; --tabbing
        when (buf in ('-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) then --number
          tokens(tok_indx) := mt('NUMBER', lin_no, col_no, null);
          indx := lexNumber(jsrc, tokens(tok_indx), indx)-1;
          col_no := col_no + length(tokens(tok_indx).data);
          tok_indx := tok_indx + 1;
        when buf = '"' then --string
          tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
          indx := lexString(jsrc, tokens(tok_indx), indx, '"');
          col_no := col_no + length(tokens(tok_indx).data) + 1;
          tok_indx := tok_indx + 1;
        when buf = '''' and json_strict = false then --string
          tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
          indx := lexString(jsrc, tokens(tok_indx), indx, '''');
          col_no := col_no + length(tokens(tok_indx).data) + 1; --hovsa her
          tok_indx := tok_indx + 1;
        when json_strict = false and REGEXP_LIKE(buf, '^[[:alpha:]]$', 'i') then
          tokens(tok_indx) := mt('STRING', lin_no, col_no, null);
          indx := lexName(jsrc, tokens(tok_indx), indx);
          if (tokens(tok_indx).data_overflow is not null) then
            /* use of lengthcc, so works correctly for 4-byte unicode characters (issue #169) */
            col_no := col_no + lengthcc(tokens(tok_indx).data_overflow) + 1; --dbms_lob.getlength(tokens(tok_indx).data_overflow) + 1;
          else
            col_no := col_no + length(tokens(tok_indx).data) + 1;
          end if;
          tok_indx := tok_indx + 1;
        when json_strict = false and buf||next_char(indx+1, jsrc) = '/*' then --strip comments
          declare
            saveindx number := indx;
            un_esc clob;
          begin
            indx := indx + 1;
            loop
              indx := indx + 1;
              buf := next_char(indx, jsrc)||next_char(indx+1, jsrc);
              exit when buf = '*/';
              exit when buf is null;
            end loop;

            if (indx = saveindx+2) then
              --enter unescaped mode
              --dbms_output.put_line('Entering unescaped mode');
              un_esc := empty_clob();
              dbms_lob.createtemporary(un_esc, true);
              indx := indx + 1;
              loop
                indx := indx + 1;
                buf := next_char(indx, jsrc)||next_char(indx+1, jsrc)||next_char(indx+2, jsrc)||next_char(indx+3, jsrc);
                exit when buf = '/**/';
                if buf is null then
                  s_error('Unexpected sequence /**/ to end unescaped data: '||buf, lin_no, col_no);
                end if;
                buf := next_char(indx, jsrc);
                /* use of length2, so works correctly for 4-byte unicode characters (issue #169) */
                dbms_lob.writeappend(un_esc, length2(buf), buf);
              end loop;
              tokens(tok_indx) := mt('ESTRING', lin_no, col_no, null);
              tokens(tok_indx).data_overflow := un_esc;
              /* use of lengthcc, so works correctly for 4-byte unicode characters (issue #169) */
              col_no := col_no + lengthcc(un_esc) + 1; --dbms_lob.getlength(un_esc) + 1; --note: line count won't work properly
              tok_indx := tok_indx + 1;
              indx := indx + 2;
            end if;

            indx := indx + 1;
          end;
        when buf = ' ' then null; --space
        else
          s_error('Unexpected char: '||buf, lin_no, col_no);
      end case;

      indx := indx + 1;
    end loop;

    return tokens;
  end lexer;

  /* SCANNER END */

  /* PARSER FUNCTIONS START */
  procedure p_error(text varchar2, tok rToken) as
  begin
    raise_application_error(-20101, 'JSON Parser exception @ line: '||tok.line||' column: '||tok.col||' - '||text);
  end;

  function parseArr(tokens lTokens, indx in out nocopy pls_integer) return pljson_list as
    e_arr pljson_element_array := pljson_element_array();
    ret_list pljson_list := pljson_list();
    v_count number := 0;
    tok rToken;
    pv pljson_number;
  begin
    --value, value, value ]
    if (indx > tokens.count) then p_error('more elements in array was excepted', tok); end if;
    tok := tokens(indx);
    while (tok.type_name != ']') loop
      e_arr.extend;
      v_count := v_count + 1;
      case tok.type_name
        when 'TRUE' then e_arr(v_count) := pljson_bool(true);
        when 'FALSE' then e_arr(v_count) := pljson_bool(false);
        when 'NULL' then e_arr(v_count) := pljson_null();
        when 'STRING' then e_arr(v_count) := case when tok.data_overflow is not null then pljson_string(tok.data_overflow) else pljson_string(tok.data) end;
        when 'ESTRING' then e_arr(v_count) := pljson_string(tok.data_overflow, false);
        /* E.I.Sarmas (github.com/dsnz)   2016-12-01   support for binary_double numbers */
        --when 'NUMBER' then e_arr(v_count) := pljson_number(to_number(replace(tok.data, '.', decimalpoint)));
        when 'NUMBER' then
          pv := pljson_number(0);
          pv.parse_number(replace(tok.data, '.', decimalpoint));
          e_arr(v_count) := pv;
        when '[' then
          declare e_list pljson_list; begin
            indx := indx + 1;
            e_list := parseArr(tokens, indx);
            e_arr(v_count) := e_list;
          end;
        when '{' then
          indx := indx + 1;
          e_arr(v_count) := parseObj(tokens, indx);
        else
          p_error('Expected a value', tok);
      end case;
      indx := indx + 1;
      if (indx > tokens.count) then p_error('] not found', tok); end if;
      tok := tokens(indx);
      if (tok.type_name = ',') then --advance
        indx := indx + 1;
        if (indx > tokens.count) then p_error('more elements in array was excepted', tok); end if;
        tok := tokens(indx);
        if (tok.type_name = ']') then --premature exit
          p_error('Premature exit in array', tok);
        end if;
      elsif (tok.type_name != ']') then --error
        p_error('Expected , or ]', tok);
      end if;

    end loop;
    ret_list.list_data := e_arr;
    return ret_list;
  end parseArr;

  function parseMem(tokens lTokens, indx in out pls_integer, mem_name varchar2, mem_indx number) return pljson_element as
    mem pljson_element;
    tok rToken;
    pv pljson_number;
  begin
    tok := tokens(indx);
    case tok.type_name
      when 'TRUE' then mem := pljson_bool(true);
      when 'FALSE' then mem := pljson_bool(false);
      when 'NULL' then mem := pljson_null();
      when 'STRING' then mem := case when tok.data_overflow is not null then pljson_string(tok.data_overflow) else pljson_string(tok.data) end;
      when 'ESTRING' then mem := pljson_string(tok.data_overflow, false);
      /* E.I.Sarmas (github.com/dsnz)   2016-12-01   support for binary_double numbers */
      --when 'NUMBER' then mem := pljson_number(to_number(replace(tok.data, '.', decimalpoint)));
      when 'NUMBER' then
        pv := pljson_number(0);
        pv.parse_number(replace(tok.data, '.', decimalpoint));
        mem := pv;
      when '[' then
        declare
          e_list pljson_list;
        begin
          indx := indx + 1;
          e_list := parseArr(tokens, indx);
          mem := e_list;
        end;
      when '{' then
        indx := indx + 1;
        mem := parseObj(tokens, indx);
      else
        p_error('Found '||tok.type_name, tok);
    end case;
    mem.mapname := mem_name;
    mem.mapindx := mem_indx;

    indx := indx + 1;
    return mem;
  end parseMem;

  /*procedure test_duplicate_members(arr in json_member_array, mem_name in varchar2, wheretok rToken) as
  begin
    for i in 1 .. arr.count loop
      if (arr(i).member_name = mem_name) then
        p_error('Duplicate member name', wheretok);
      end if;
    end loop;
  end test_duplicate_members;*/

  function parseObj(tokens lTokens, indx in out nocopy pls_integer) return pljson as
    type memmap is table of number index by varchar2(4000); -- i've read somewhere that this is not possible - but it is!
    mymap memmap;
    nullelemfound boolean := false;

    obj pljson;
    tok rToken;
    mem_name varchar(4000);
    arr pljson_element_array := pljson_element_array();
  begin
    --what to expect?
    while (indx <= tokens.count) loop
      tok := tokens(indx);
      --debug('E: '||tok.type_name);
      case tok.type_name
      when 'STRING' then
        --member
        mem_name := substr(tok.data, 1, 4000);
        begin
          if (mem_name is null) then
            if (nullelemfound) then
              p_error('Duplicate empty member: ', tok);
            else
              nullelemfound := true;
            end if;
          elsif (mymap(mem_name) is not null) then
            p_error('Duplicate member name: '||mem_name, tok);
          end if;
        exception
          when no_data_found then mymap(mem_name) := 1;
        end;

        indx := indx + 1;
        if (indx > tokens.count) then p_error('Unexpected end of input', tok); end if;
        tok := tokens(indx);
        indx := indx + 1;
        if (indx > tokens.count) then p_error('Unexpected end of input', tok); end if;
        if (tok.type_name = ':') then
          --parse
          declare
            jmb pljson_element;
            x number;
          begin
            x := arr.count + 1;
            jmb := parseMem(tokens, indx, mem_name, x);
            arr.extend;
            arr(x) := jmb;
          end;
        else
          p_error('Expected '':''', tok);
        end if;
        --move indx forward if ',' is found
        if (indx > tokens.count) then p_error('Unexpected end of input', tok); end if;

        tok := tokens(indx);
        if (tok.type_name = ',') then
          --debug('found ,');
          indx := indx + 1;
          tok := tokens(indx);
          if (tok.type_name = '}') then --premature exit
            p_error('Premature exit in json object', tok);
          end if;
        elsif (tok.type_name != '}') then
           p_error('A comma seperator is probably missing', tok);
        end if;
      when '}' then
        obj := pljson();
        obj.json_data := arr;
        return obj;
      else
        p_error('Expected string or }', tok);
      end case;
    end loop;

    p_error('} not found', tokens(indx-1));

    return obj;

  end;

  function parser(str varchar2) return pljson as
    tokens lTokens;
    obj pljson;
    indx pls_integer := 1;
    jsrc json_src;
  begin
    update_decimalpoint();
    jsrc := prepareVarchar2(str);
    tokens := lexer(jsrc);
    if (tokens(indx).type_name = '{') then
      indx := indx + 1;
      obj := parseObj(tokens, indx);
    else
      raise_application_error(-20101, 'JSON Parser exception - no { start found');
    end if;
    if (tokens.count != indx) then
      p_error('} should end the JSON object', tokens(indx));
    end if;

    return obj;
  end parser;

  function parse_list(str varchar2) return pljson_list as
    tokens lTokens;
    obj pljson_list;
    indx pls_integer := 1;
    jsrc json_src;
  begin
    update_decimalpoint();
    jsrc := prepareVarchar2(str);
    tokens := lexer(jsrc);
    if (tokens(indx).type_name = '[') then
      indx := indx + 1;
      obj := parseArr(tokens, indx);
    else
      raise_application_error(-20101, 'JSON List Parser exception - no [ start found');
    end if;
    if (tokens.count != indx) then
      p_error('] should end the JSON List object', tokens(indx));
    end if;

    return obj;
  end parse_list;

  function parse_list(str clob) return pljson_list as
    tokens lTokens;
    obj pljson_list;
    indx pls_integer := 1;
    jsrc json_src;
  begin
    update_decimalpoint();
    jsrc := prepareClob(str);
    tokens := lexer(jsrc);
    if (tokens(indx).type_name = '[') then
      indx := indx + 1;
      obj := parseArr(tokens, indx);
    else
      raise_application_error(-20101, 'JSON List Parser exception - no [ start found');
    end if;
    if (tokens.count != indx) then
      p_error('] should end the JSON List object', tokens(indx));
    end if;

    return obj;
  end parse_list;

  function parser(str clob) return pljson as
    tokens lTokens;
    obj pljson;
    indx pls_integer := 1;
    jsrc json_src;
  begin
    update_decimalpoint();
    --dbms_output.put_line('Using clob');
    jsrc := prepareClob(str);
    tokens := lexer(jsrc);
    if (tokens(indx).type_name = '{') then
      indx := indx + 1;
      obj := parseObj(tokens, indx);
    else
      raise_application_error(-20101, 'JSON Parser exception - no { start found');
    end if;
    if (tokens.count != indx) then
      p_error('} should end the JSON object', tokens(indx));
    end if;

    return obj;
  end parser;

  function parse_any(str varchar2) return pljson_element as
    tokens lTokens;
    obj pljson_list;
    ret pljson_element;
    indx pls_integer := 1;
    jsrc json_src;
  begin
    update_decimalpoint();
    jsrc := prepareVarchar2(str);
    tokens := lexer(jsrc);
    tokens(tokens.count+1).type_name := ']';
    obj := parseArr(tokens, indx);
    if (tokens.count != indx) then
      p_error('] should end the JSON List object', tokens(indx));
    end if;

    return obj.head();
  end parse_any;

  function parse_any(str clob) return pljson_element as
    tokens lTokens;
    obj pljson_list;
    indx pls_integer := 1;
    jsrc json_src;
  begin
    update_decimalpoint();
    jsrc := prepareClob(str);
    tokens := lexer(jsrc);
    tokens(tokens.count+1).type_name := ']';
    obj := parseArr(tokens, indx);
    if (tokens.count != indx) then
      p_error('] should end the JSON List object', tokens(indx));
    end if;

    return obj.head();
  end parse_any;

  /* last entry is the one to keep */
  procedure remove_duplicates(obj in out nocopy pljson) as
    type memberlist is table of pljson_element index by varchar2(4000);
    members memberlist;
    nulljsonvalue pljson_element := null;
    validated pljson := pljson();
    indx varchar2(4000);
  begin
    for i in 1 .. obj.count loop
      if (obj.get(i).mapname is null) then
        nulljsonvalue := obj.get(i);
      else
        members(obj.get(i).mapname) := obj.get(i);
      end if;
    end loop;

    validated.check_duplicate(false);
    indx := members.first;
    loop
      exit when indx is null;
      validated.put(indx, members(indx));
      indx := members.next(indx);
    end loop;
    if (nulljsonvalue is not null) then
      validated.put('', nulljsonvalue);
    end if;

    validated.check_for_duplicate := obj.check_for_duplicate;

    obj := validated;
  end;

  function get_version return varchar2 as
  begin
    return 'PL/JSON 3.5.2';
  end get_version;

end pljson_parser;```