/* 	$NetBSD$ */

/*-
 * Copyright (c) 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jachym Holecek <freza@NetBSD.org>.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the NetBSD
 *      Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/queue.h>
#include <sys/param.h> 		/* roundup() */

#if defined(_KERNEL)
#include <sys/systm.h>
#elif defined(_STANDALONE)
#include <lib/libkern/libkern.h>
#else
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#if defined(DEBUG)
#include <stdarg.h>
#include <stdio.h>
#endif
#include <stdlib.h>
#endif

#include <prop/proplib.h>
#include "prop_object_impl.h"
#include "prop_codec_impl.h"


#if defined(_STANDALONE)
#define BUFINCR 		16
#else
#define BUFINCR 		128
#endif

#if !defined(__UNCONST)
#define __UNCONST(a) 		((void *)(unsigned long)(const void *)(a))
#endif

/*
 * This needs to go via a macro -- even if verbose()'s body would be empty
 * ifndef DEBUG, GCC still isn't smart enough to eliminate calls to funcs
 * that only exist if DEBUG. And we don't want to clutter the code with
 * preprocessor conditionals.
 */
#if defined(DEBUG)
#define VERBOSE(s, ...) 	verbose(s "\n", ## __VA_ARGS__)
#else
#define VERBOSE(s, ...) 	/* silence */
#endif

/* Ewww, depolymerise function names. */
#define	poec 				_prop_object_externalize_context 
#define poec_create 			_prop_object_externalize_context_alloc
#define poec_append 			_prop_object_externalize_append_cstring
#define poec_push 			_prop_object_externalize_append_char
#define poec_destroy 			_prop_object_externalize_context_free
#define	prop_keysym_str 		prop_dictionary_keysym_cstring_nocopy

#define _TK_COMPOUND_VALUES 		\
			TK_ARRAYO: 	\
	case 		TK_DICTO

typedef enum {
	/* Self representing. */
	TK_FIRST 		= 0, 	/* So that memset(0) DTRT. */
	TK_ARRAYC,
	TK_ARRAYO,
	TK_DATA,
	TK_DICTC,
	TK_DICTO,
	TK_WHITE,
	TK_LAST,

	/* Values. */
	TK_STRING,
	TK_SINT64,
	TK_SYMBOL,
	TK_UINT64
} token_type_t;

#define _PA_TOKEN_LAST 		TK_UINT64
#define _PA_TOKEN_NAMES 	\
	"FIRST", "ARRAYC", "ARRAYO", "DATA", "DICTC", "DICTO", "WHITE", \
	"LAST", "STRING", "SINT64", "SYMBOL", "UINT64",

typedef enum {
	SC_FIRST 		= 0,
	SC_WHITE,
	SC_SINT64,
	SC_UINT64,
	SC_QUOTED,
	SC_SYMBOL,
	SC_TOPLEVEL,
	SC_ERROR,
	SC_BASE64_APPEND,
	SC_BASE64_MAYBE_NEXT,
	SC_BASE64_NEXT,
	SC_BASE64_CLOSE,
	SC_STRING_APPEND,
	SC_STRING_MAYBE_NEXT,
	SC_STRING_CONCAT,
	SC_STRING_CLOSE,
	SC_COMMENT
} scan_state_t;

#define _SC_STATE_LAST 		SC_COMMENT
#define _SC_STATE_NAMES 	\
	"FIRST", "WHITE", "SINT64", "UINT64", "QUOTED", "SYMBOL", "TOPLEVEL", \
	"ERROR", "BASE64_APPEND", "BASE64_MAYBE_NEXT", "BASE64_NEXT", \
	"SC_BASE64_CLOSE", "STRING_APPEND", "STRING_MAYBE_NEXT", \
	"STRING_CONCAT", "STRING_CLOSE", "COMMENT"

typedef enum {
	PA_TOPLEVEL 		= 0,
	PA_ARRAY,
	PA_DICTIONARY,
	PA_ERROR,
	PA_HOME,
	PA_OBJECT
} parse_state_t;

#define _PA_STATE_LAST 		PA_OBJECT
#define _PA_STATE_NAMES 	\
	"TOPLEVEL", "ARRAY", "DICTIONARY", "ERROR", "HOME", "OBJECT"

union value {
        char 			*val_string;
	uint64_t 		val_unsigned;
	int64_t 		val_signed;
	struct {
		const u_char 	*vd_buf;
		size_t 		vd_len;
	} 			val_data;
};

/* Scanner constructs tokens and enqueues them to parser. */
struct token {
	SIMPLEQ_ENTRY(token) 	tok_link;
	token_type_t 		tok_type;

#if !defined(_STANDALONE)
	size_t 			tok_pos_line;
	size_t 			tok_pos_col;
#endif
	union value 		tok_value;
#define	tok_string 		tok_value.val_string
#define	tok_signed 		tok_value.val_signed
#define	tok_unsigned 		tok_value.val_unsigned
#define tok_data_buf 		tok_value.val_data.vd_buf
#define tok_data_len 		tok_value.val_data.vd_len
};

/* Stack for nested objects, entries relinked to consq on completion. */
struct frame {
	SIMPLEQ_ENTRY(frame) 	se_link; 	/* Stack entry link. */
	prop_object_t 		se_object; 	/* Compound object. */

	/* Dec: key to parent dict, Enc: next object from parent. */
	union {
		const char 		*un_symbol;
		prop_object_iterator_t 	un_iter;
	} 			se_un;
#define se_symbol 		se_un.un_symbol
#define se_iter 		se_un.un_iter
};

SIMPLEQ_HEAD(stack, frame);

struct parser {
	/* Incoming tokens, operation stack, list of constructed objects. */
	SIMPLEQ_HEAD(, token) 	pa_tokens; 	/* FIFO */
	struct stack 		pa_stack; 	/* LIFO */
	struct stack 		pa_consq; 	/* FIFO */

	/* Store parser state across calls. */
	parse_state_t 		pa_state;
	parse_state_t 		pa_prev;
	parse_state_t 		pa_last;

	/* Store scanner state across calls. */
	scan_state_t 		sc_state; /* Current state. */
	scan_state_t 		sc_prev;  /* State at start of this cycle. */
	scan_state_t 		sc_last;  /* Last value of prev != state. */

#if !defined(_STANDALONE)
	/* Position in input stream. */
	size_t 			sc_pos_line;
	size_t 			sc_pos_col;
#endif
	/* Scanner internalization buffer. */
	u_char 			*sc_string;
	size_t 			sc_strcur;
	size_t 			sc_strlen;

	/* Base64 decoder. */
	size_t 			sc_base64_size; 	/* in bits */
};

/* Base64 code space (plus '='). */
static const char base64abc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopq"
    "rstuvwxyz0123456789+/";



#if defined(_KERNEL) || defined(_STANDALONE)
static char *
strdup(const char *s)
{
	char 			*p;
	size_t 			l;

	l = strlen(s) + 1;
	p = _PROP_MALLOC(l, M_TEMP);

	return (memcpy(p, s, l));
}

static int
isprint(int c)
{
	return (c >= ' ' && c <= '~');
}
#endif /* _KERNEL || _STANDALONE */

static boolean_t
stroneof(const char *s, const char *t[])
{
	int 			i;

	for (i = 0; t[i]; i++)
		if (strcasecmp(s, t[i]) == 0)
			return (TRUE);
	return (FALSE);
}

#if defined(DEBUG)

static void
verbose(const char *fmt, ...)
{
	static FILE 		*tracefile = NULL;
	char 			*s;
	va_list 		ap;

	/* XXX locking */
	if (tracefile == NULL) {
		s = getenv("PROPLIB_SCN_TRACEFILE");
		if (s == NULL)
			s = "__prop_scn.out";

		tracefile = fopen(s, "w");
		if (tracefile == NULL)
			abort();
	}

	va_start(ap, fmt);
	vfprintf(tracefile, fmt, ap);
	va_end(ap);
}

static const char *
scanner_quote_char(u_char c)
{
	static char 		buf[8]; 	/* 'c', '\n', 0xab */
	u_char 			d = 0;

	switch (c) {
	case '\f': 	d = 'f'; 	break;
	case '\n': 	d = 'n'; 	break;
	case '\r': 	d = 'r'; 	break;
	case '\t': 	d = 't'; 	break;
	case '\v': 	d = 'v'; 	break;
	}
	if (d) {
		sprintf(buf, "'\\%c'", d);
		return (buf);
	}

	if (isprint(c))
		sprintf(buf, "'%c'", (char)c);
	else
		sprintf(buf, "0x%02x", (u_int)c);
	return (buf);
}

static const char *
scanner_state_name(scan_state_t n)
{
	static const char *const 	names[] = { _SC_STATE_NAMES };
	static int 			idx = 0; 	/* XXX hack, printf */
	static char 			buf[5][32];

	if (n < 0 || n > _SC_STATE_LAST) {
		if (++idx == 5)
			idx = 0;

		snprintf(buf[idx], 32, "<wrong %d>", n);
		return (buf[idx]);
	}

	return (names[n]);
}

static const char *
parser_state_name(parse_state_t n)
{
	static const char *const 	names[] = { _PA_STATE_NAMES };
	static int 			idx = 0; 	/* XXX hack, printf */
	static char 			buf[5][32];

	if (++idx == 5)
		idx = 0;

	if (n < 0 || n > _PA_STATE_LAST) {
		snprintf(buf[idx], 32, "<wrong %d>", n);
		return (buf[idx]);
	}

	return (names[n]);
}

static const char *
parser_token_name(token_type_t n)
{
	static const char *const 	names[] = { _PA_TOKEN_NAMES };

	if (n < 0 || n > _PA_TOKEN_LAST)
		return ("<wrong token>");
	return (names[n]);
}

static const char *
parser_token_desc(struct token *t)
{
	static char 			buf[32];
	static const size_t 		sof = sizeof(buf);
	const char 			*s;
	int 				n = t->tok_type;

	s = parser_token_name(n);

	switch (n) {
	case TK_SYMBOL:
		snprintf(buf, sof, "%s '%s'", s, t->tok_string);
		return (buf);
	case TK_SINT64:
		snprintf(buf, sof, "%s %"PRId64, s, t->tok_signed);
		return (buf);
	case TK_STRING:
		snprintf(buf, sof, "%s \"%s\"", s, t->tok_string);
		return (buf);
	case TK_UINT64:
		snprintf(buf, sof, "%s #%"PRIx64, s, t->tok_unsigned);
		return (buf);
	case TK_DATA:
		snprintf(buf, sof, "%s %zdB", s, t->tok_data_len);
		return (buf);
	}
	return (s);
}

#endif /* PROP_SCN_DEBUG */

static prop_string_t
prop_scn_create_string(struct token *t)
{
	prop_string_t 			ps;
	char 				*str = t->tok_string;

	ps = _prop_string_alloc();
	if (ps == NULL)
		return (NULL);

	ps->ps_mutable = str;
	ps->ps_size = strlen(str);

	return (ps);
}

static prop_number_t
prop_scn_create_uint(struct token *t)
{
	return (prop_number_create_unsigned_integer(t->tok_unsigned));
}

static prop_number_t
prop_scn_create_sint(struct token *t)
{
	return (prop_number_create_integer(t->tok_signed));
}

static boolean_t
scanner_ensure_strlen(struct parser *pa)
{
	void 				*b;

	/* Enough room for <char, '\0'> or base64 triplet. */
	if ((pa->sc_strlen - pa->sc_strcur) < 3) {
		b = _PROP_REALLOC(pa->sc_string, pa->sc_strlen + BUFINCR,
		    M_TEMP);
		if (b == NULL) {
			VERBOSE("scanner: ENOMEM internalization buffer");
			return (TRUE);
		}

		pa->sc_string = b;
		pa->sc_strlen += BUFINCR;
	}

	/* No error == success. */
	return (FALSE);
}

static struct token *
parser_token_put(struct parser *pa, token_type_t tok)
{
	struct token 			*t;

	t = _PROP_MALLOC(sizeof(struct token), M_TEMP);
	if (t == NULL) {
		VERBOSE(" parser: ENOMEM token %s", parser_token_name(tok));
		return (NULL);
	}

	memset(t, 0, sizeof(struct token));
	t->tok_type = tok;

#if !defined(_STANDALONE)
	t->tok_pos_line = pa->sc_pos_line;
	t->tok_pos_col = pa->sc_pos_col;
#endif

	SIMPLEQ_INSERT_TAIL(&pa->pa_tokens, t, tok_link);
	return (t);
}

static void
parser_token_free(struct token *t, boolean_t force)
{
	/* Normally, tok_string is donated upwards, not duplicated. */
	if (force && (t->tok_type == TK_STRING || t->tok_type == TK_SYMBOL) &&
	    t->tok_string != NULL)
		_PROP_FREE(t->tok_string, M_TEMP);
	_PROP_FREE(t, M_TEMP);
}

#define SC_ARROW_P(src, dst) 	(pa->sc_prev == (src) && pa->sc_state == (dst))
#define SC_CYCLE_P(s) 		SC_ARROW_P(s, s)

static u_char
scanner_base64_decode(u_char c)
{
	const char 			*s;

	if (c == '=')
		return (0);

	s = strchr(base64abc, (int)(u_int)c);
	_PROP_ASSERT(s);

	return ((u_char)(s - base64abc));
}

static int
prop_scanner_exec(struct parser *pa, const u_char *input, size_t length)
{
	const u_char 			*p = input;
	const u_char 			*pe = input + length;
	struct token 			*t;
	boolean_t 			keepchar;
	u_char 				c;

	if (input == NULL || length == 0) {
		if (parser_token_put(pa, TK_LAST) == NULL)
			return (ENOMEM);
		return (0);
	}

 advance:
	if (p == pe)
		return (0);
	c = *p++;

 dispatch:
	keepchar = TRUE; 	/* for callcc */

	switch (pa->sc_state) {
	case SC_FIRST:
		/* Force transition. */
		pa->sc_state = SC_TOPLEVEL;
		goto callcc;

	case SC_TOPLEVEL:
		switch (c) {
		case '#':
			pa->sc_state = SC_UINT64;
			goto callnext;
		case '{':
			if (parser_token_put(pa, TK_DICTO) == NULL)
				return (ENOMEM);
			goto callnext;
		case '}':
			if (parser_token_put(pa, TK_DICTC) == NULL)
				return (ENOMEM);
			goto callnext;
		case '[':
			if (parser_token_put(pa, TK_ARRAYO) == NULL)
				return (ENOMEM);
			goto callnext;
		case ']':
			if (parser_token_put(pa, TK_ARRAYC) == NULL)
				return (ENOMEM);
			goto callnext;
		case '"':
			/* Opening quote, clear from edge action.  */
			pa->sc_state = SC_STRING_APPEND;
			goto callnext;
		case ';':
			pa->sc_state = SC_COMMENT;
			goto callnext;
		case ':':
			pa->sc_state = SC_BASE64_APPEND;
			goto callnext;
		}

		if (c == '-' || c == '+' || isdigit(c)) {
			pa->sc_state = SC_SINT64;
			goto callnext;
		} else
		if (isspace(c)) {
			pa->sc_state = SC_WHITE;
			goto callnext;
		}
		pa->sc_state = SC_SYMBOL;
		goto callnext;

	case SC_BASE64_APPEND:
		if (strchr(base64abc, c) != NULL || c == '=') {
			char 			d = scanner_base64_decode(c);

			if (scanner_ensure_strlen(pa))
				return (ENOMEM);

			switch (pa->sc_base64_size % 24) {
			case 0:
				pa->sc_string[pa->sc_strcur] = d << 2;
				break;
			case 6:
				/* LINTED: sc_string & d are u_char */
				pa->sc_string[pa->sc_strcur] |= d >> 4;
				pa->sc_strcur++;
				pa->sc_string[pa->sc_strcur] = d << 4;
				break;
			case 12:
				/* LINTED: sc_string & d are u_char */
				pa->sc_string[pa->sc_strcur] |= d >> 2;
				pa->sc_strcur++;
				pa->sc_string[pa->sc_strcur] = d << 6;
				break;
			case 18:
				pa->sc_string[pa->sc_strcur++] |= d;
				break;
			}

			pa->sc_base64_size += 6;
		} else {
			pa->sc_state = SC_BASE64_MAYBE_NEXT;
			goto callcc;
		}
		break;

	case SC_BASE64_MAYBE_NEXT:
		if (pa->sc_base64_size == 0) {
			VERBOSE("scanner: BASE64 first chunk empty");
			pa->sc_state = SC_ERROR;
			goto callcc;
		}
		if (c == '.') {
			pa->sc_state = SC_BASE64_NEXT;
			goto callnext;
		}
		if (! isspace(c)) {
			pa->sc_state = SC_BASE64_CLOSE;
			goto callcc;
		}
		break;

	case SC_BASE64_NEXT:
		if (strchr(base64abc, c) != NULL || c == '=') {
			pa->sc_state = SC_BASE64_APPEND;
			goto callcc;
		}
		if (! isspace(c)) {
			pa->sc_state = SC_ERROR;
			goto callcc;
		}
		break;

	case SC_BASE64_CLOSE:
		{
			u_char 			*s;

			/* Length must always be multiple of 3 bytes. */
			if (pa->sc_base64_size % 24) {
				/* Warrants we're long enough to roundup. */
				if (scanner_ensure_strlen(pa))
					return (ENOMEM);

				/* Don't overwrite string[strcur]! */
				memset(pa->sc_string + pa->sc_strcur + 1, 0,
				    pa->sc_strlen - pa->sc_strcur - 1);

				pa->sc_base64_size =
				    roundup(pa->sc_base64_size, 24);
			}

			/* Convert length to bytes. */
			pa->sc_base64_size = roundup(pa->sc_base64_size, 8)/8;

			s = _PROP_MALLOC(pa->sc_base64_size, M_TEMP);
			if (s == NULL) {
				VERBOSE("scanner: ENOMEM data");
				return (ENOMEM);
			}
			memcpy(s, pa->sc_string, pa->sc_base64_size);

			if ((t = parser_token_put(pa, TK_DATA)) == NULL)
				return (ENOMEM);
			t->tok_data_buf = s;
			t->tok_data_len = pa->sc_base64_size;

			pa->sc_state = SC_TOPLEVEL;
			goto callcc;
		}
		/* UNREACHED */

	case SC_STRING_APPEND:
		if (c == '\\') {
			pa->sc_state = SC_QUOTED;
			goto callnext;
		}
		if (c == '"') {
			/* Closing quote, see if concatenation follows. */
			pa->sc_state = SC_STRING_MAYBE_NEXT;
			goto callnext;
		}
		if (! isprint(c)) {
			pa->sc_state = SC_ERROR;
			goto callcc;
		}
		/* Append from edge action. */
		break;

	case SC_STRING_MAYBE_NEXT:
		if (c == '.') {
			pa->sc_state = SC_STRING_CONCAT;
			goto callnext;
		}
		if (! isspace(c)) {
			pa->sc_state = SC_STRING_CLOSE;
			goto callcc;
		}
		break;

	case SC_STRING_CONCAT:
		if (c == '"') {
			/* Opening quote of concat string. */
			pa->sc_state = SC_STRING_APPEND;
			goto callnext;
		}
		if (! isspace(c)) {
			pa->sc_state = SC_ERROR;
			goto callcc;
		}
		break;

	case SC_STRING_CLOSE:
		{
			char 			*s;

			s = strdup((const char *)pa->sc_string);
			if (s == NULL) {
				VERBOSE("scanner: ENOMEM string");
				return (ENOMEM);
			}

			if ((t = parser_token_put(pa, TK_STRING)) == NULL)
				return (ENOMEM);
			t->tok_string = s;

			pa->sc_state = SC_TOPLEVEL;
			goto callcc;
		}
		/* NOTREACHED */

	case SC_QUOTED:
		{
			u_char 			d;

			if (scanner_ensure_strlen(pa))
				return (ENOMEM);

			switch (d = c) {
			case '\\': 	d = '\\'; 	break;
			case 'n': 	d = '\n'; 	break;
			case 't': 	d = '\t'; 	break;
			case '"': 	d = '\"'; 	break;
			}

			pa->sc_string[pa->sc_strcur++] = d;
			pa->sc_string[pa->sc_strcur] = '\0';

			pa->sc_state = SC_STRING_APPEND;
			goto callnext;
		}
		/* NOTREACHED */

	case SC_WHITE:
		if (! isspace(c)) {
			if ((t = parser_token_put(pa, TK_WHITE)) == NULL)
				return (ENOMEM);

			pa->sc_state = SC_TOPLEVEL;
			goto callcc;
		}
		break;

	case SC_SINT64:
		if (! isdigit(c) && c != '+' && c != '-') {
			long long 		n;
			char 			*end;

			n = strtoll((const char *)pa->sc_string, &end, 10);
			if (*end != '\0' || n > INT64_MAX || n < INT64_MIN) {
				VERBOSE("scanner: wrong SINT64 '%s'",
				    pa->sc_string);
				return (EINVAL);
			}

			if ((t = parser_token_put(pa, TK_SINT64)) == NULL)
				return (ENOMEM);
			t->tok_unsigned = (int64_t)n;

			pa->sc_state = SC_TOPLEVEL;
			goto callcc;
		}
		break;

	case SC_UINT64:
		if (! isxdigit(c)) {
			unsigned long long 	u;
			char 			*end;

			u = strtoull((const char *)pa->sc_string, &end, 16);
			if (*end != '\0' || u > UINT64_MAX) {
				VERBOSE("scanner: wrong UINT64 '%s'",
				    pa->sc_string);
				return (EINVAL);
			}

			if ((t = parser_token_put(pa, TK_UINT64)) == NULL)
				return (ENOMEM);
			t->tok_unsigned = (uint64_t)u;

			pa->sc_state = SC_TOPLEVEL;
			goto callcc;
		}
		break;

	case SC_SYMBOL:
		if (isspace(c) || c == '\\') {
			char 			*s;

			s = strdup((const char *)pa->sc_string);
			if (s == NULL) {
				VERBOSE("scanner: ENOMEM symbol");
				return (ENOMEM);
			}

			if ((t = parser_token_put(pa, TK_SYMBOL)) == NULL) {
				_PROP_FREE(s, M_TEMP);
				return (ENOMEM);
			}
			t->tok_string = s;

			pa->sc_state = SC_TOPLEVEL;
			goto callcc;
		}
		if (! isprint(c)) {
			pa->sc_state = SC_ERROR;
			goto callcc;
		}
		break;

	case SC_COMMENT:
		if (c == '\n' || c == '\r') {
			pa->sc_state = SC_TOPLEVEL;
			goto callnext;
		}
		break;

	case SC_ERROR:
		VERBOSE("scanner: wrong char '%c' (line %zd char %zd) in "
		    "state %s", c, pa->sc_pos_line, pa->sc_pos_col,
		    scanner_state_name(pa->sc_last));
		return (EINVAL);
	}

 callnext:
	keepchar = FALSE;

 callcc:
	if (pa->sc_state != pa->sc_prev) {
		VERBOSE("scanner: %-17s --> %-17s %s\t[%d, %d]",
		    ((keepchar && pa->sc_prev != SC_FIRST) ?
		     "" : scanner_state_name(pa->sc_prev)),
		    scanner_state_name(pa->sc_state),
		    scanner_quote_char(c), pa->sc_pos_line, pa->sc_pos_col);
	} else {
		VERBOSE("scanner: %-17s ::: %-17s %s\t[%d, %d]", "", "",
		    scanner_quote_char(c), pa->sc_pos_line, pa->sc_pos_col);
	}

	/* Be specific about transitions, let compiler deal w/redundancy. */
	if (SC_ARROW_P(SC_TOPLEVEL, SC_BASE64_APPEND)) {
		pa->sc_base64_size = 0;
		pa->sc_strcur = 0;
	}
	if (SC_ARROW_P(SC_TOPLEVEL, SC_STRING_APPEND) ||
	    SC_ARROW_P(SC_TOPLEVEL, SC_SYMBOL) ||
	    SC_ARROW_P(SC_TOPLEVEL, SC_UINT64) ||
	    SC_ARROW_P(SC_TOPLEVEL, SC_SINT64)) {
		pa->sc_strcur = 0;
	}
	if (SC_CYCLE_P(SC_UINT64) ||
	    SC_CYCLE_P(SC_SINT64) ||
	    SC_CYCLE_P(SC_STRING_APPEND) ||
	    SC_CYCLE_P(SC_SYMBOL) ||
	    SC_ARROW_P(SC_TOPLEVEL, SC_SINT64) ||
	    SC_ARROW_P(SC_TOPLEVEL, SC_SYMBOL)) {
		if (scanner_ensure_strlen(pa))
			return (ENOMEM);

		pa->sc_string[pa->sc_strcur++] = c;
		pa->sc_string[pa->sc_strcur] = '\0';
	}

#if !defined(_STANDALONE)
	if (! keepchar) {
		if (c == '\n') {
			pa->sc_pos_line++;
			pa->sc_pos_col = 1;
		} else {
			if (c == '\t')
				pa->sc_pos_col = roundup(pa->sc_pos_col, 8);
			else
				pa->sc_pos_col++;
		}
	}
#endif

	/* Commited to new state. */
	if (pa->sc_state != pa->sc_prev)
		pa->sc_last = pa->sc_prev;
	pa->sc_prev = pa->sc_state;

	if (keepchar)
		goto dispatch;
	else
		goto advance;

	/* UNREACHED */
}

#undef SC_CYCLE_P
#undef SC_ARROW_P

static void
parser_frame_free(struct frame *e)
{
	if (e->se_symbol)
		_PROP_FREE(__UNCONST(e->se_symbol), M_TEMP);
	if (e->se_object != NULL)
		prop_object_release(e->se_object);
	_PROP_FREE(e, M_TEMP);
}

static boolean_t
parser_frame_enter(struct parser *pa, prop_object_t o)
{
	struct frame 			*e;

	if (o == NULL)
		return (TRUE);

	e = _PROP_MALLOC(sizeof(struct frame), M_TEMP);
	if (e == NULL)
		return (TRUE);

	memset(e, 0, sizeof(struct frame));
	e->se_object = o;

	SIMPLEQ_INSERT_HEAD(&pa->pa_stack, e, se_link);
	return (FALSE);
}

static int
parser_frame_store(struct parser *pa, prop_object_t o)
{
	struct frame 			*e;
	prop_object_t 			the;

	e = SIMPLEQ_FIRST(&pa->pa_stack);
	if (e == NULL) {
		VERBOSE(" parser: stack underflow");
		return (EINVAL);
	}
	the = e->se_object;

	switch (prop_object_type(the)) {
	case PROP_TYPE_ARRAY:
		_PROP_ASSERT(e->se_symbol == NULL);
		if (prop_array_add(the, o) == FALSE)
			return (ENOMEM);
		break;

	case PROP_TYPE_DICTIONARY:
		_PROP_ASSERT(e->se_symbol != NULL);
		if (prop_dictionary_set(the, e->se_symbol, o) == FALSE)
			return (ENOMEM);

		_PROP_FREE(__UNCONST(e->se_symbol), M_TEMP);
		e->se_symbol = NULL;
		break;

	default:
		VERBOSE(" parser: wrong object on stack, not compound");
		return (EINVAL);
	}

	prop_object_release(o);
	return (0);
}

static int
parser_frame_leave(struct parser *pa)
{
	struct frame 			*e;
	prop_object_t 			o;

	/* Get hold of the lower object. */
	if ((e = SIMPLEQ_FIRST(&pa->pa_stack)) == NULL) {
		VERBOSE(" parser: stack underflow");
		return (EINVAL);
	}
	SIMPLEQ_REMOVE_HEAD(&pa->pa_stack, se_link);

	/* Move it to finished objects if it's toplevel. */
	if (SIMPLEQ_EMPTY(&pa->pa_stack)) {
		SIMPLEQ_INSERT_TAIL(&pa->pa_consq, e, se_link);
		_PROP_ASSERT(e->se_object);
		return (0);
	}

	/* Otherwise insert into current compound. */
	o = e->se_object;

	/* Make sure ${o} isn't released, parser_frame_store() will do it. */
	e->se_object = NULL;
	parser_frame_free(e);

	return (parser_frame_store(pa, o));
}

static int
prop_scn_parser_create(prop_parser_t *pp)
{
	struct parser 			*pa;

	pa = _PROP_MALLOC(sizeof(struct parser), M_TEMP);
	if (pa == NULL)
		return (ENOMEM);
	memset(pa, 0, sizeof(struct parser));

	SIMPLEQ_INIT(&pa->pa_tokens);
	SIMPLEQ_INIT(&pa->pa_stack);
	SIMPLEQ_INIT(&pa->pa_consq);

#if !defined(_STANDALONE)
	/* Text editors tend to count from 1, be friendly. */
	pa->sc_pos_line = 1;
	pa->sc_pos_col = 1;
#endif

	if (parser_token_put(pa, TK_FIRST) == NULL) {
		_PROP_FREE(pa, M_TEMP);
		return (ENOMEM);
	}

	*pp = pa;
	return (0);
}

static void
prop_scn_parser_destroy(prop_parser_t arg)
{
	struct parser 			*pa = arg;
	struct token 			*t;
	struct frame 			*e;

	if (pa->sc_string)
		_PROP_FREE(pa->sc_string, M_TEMP);

	/* Free any pending tokens. */
	while ((t = SIMPLEQ_FIRST(&pa->pa_tokens)) != NULL) {
		SIMPLEQ_REMOVE_HEAD(&pa->pa_tokens, tok_link);
		parser_token_free(t, TRUE);
	}

	/* Free any active stack frames. */
	while ((e = SIMPLEQ_FIRST(&pa->pa_stack)) != NULL) {
		SIMPLEQ_REMOVE_HEAD(&pa->pa_stack, se_link);
		parser_frame_free(e);
	}

	/* Free any finished objects. */
	while ((e = SIMPLEQ_FIRST(&pa->pa_consq)) != NULL) {
		SIMPLEQ_REMOVE_HEAD(&pa->pa_consq, se_link);
		parser_frame_free(e);
	}

	_PROP_FREE(pa, M_TEMP);
}

static prop_object_t
prop_scn_parser_yield(prop_parser_t arg)
{
	struct parser 			*pa = arg;
	struct frame 			*e;
	prop_object_t 			o;

	if ((e = SIMPLEQ_FIRST(&pa->pa_consq)) == NULL)
		return (NULL);
	SIMPLEQ_REMOVE_HEAD(&pa->pa_consq, se_link);

	o = e->se_object;
	e->se_object = NULL;

	_PROP_ASSERT(e);
	_PROP_ASSERT(o);

	parser_frame_free(e);
	return (o);
}

static int
prop_scn_parser_exec(prop_parser_t arg, const u_char *input, size_t length)
{
	static const char *__truths[] =	{ "true", "yes", "on", NULL };
	static const char *__lies[] = { "false", "no", "off", NULL };
	prop_object_t 			the;
	struct frame 			*frame;
	struct token 			*t;
	struct parser 			*pa = arg;
	int 				nexttoken, ret;

	ret = prop_scanner_exec(pa, input, length);
	if (ret)
		return (ret);

 advance:
	if ((t = SIMPLEQ_FIRST(&pa->pa_tokens)) == NULL)
		return (0);

 dispatch:
	pa->pa_prev = pa->pa_state;
	nexttoken = FALSE; 	/* for callcc */

	switch (pa->pa_state) {
	case PA_TOPLEVEL:
		switch (t->tok_type) {
		case TK_FIRST:
			/* XXX read version token */
			goto callnext;

		case TK_DICTO:
			if (parser_frame_enter(pa,
			    (prop_object_t)prop_dictionary_create())) {
				VERBOSE(" parser: ENOMEM dictionary");
				return (ENOMEM);
			}
			pa->pa_state = PA_DICTIONARY;
			goto callnext;

		case TK_ARRAYO:
			if (parser_frame_enter(pa,
			    (prop_object_t)prop_array_create())) {
				VERBOSE(" parser: ENOMEM array");
				return (ENOMEM);
			}
			pa->pa_state = PA_ARRAY;
			goto callnext;

		case TK_LAST:
			if (SIMPLEQ_NEXT(t, tok_link) != NULL) {
				VERBOSE(" parser: stack not empty at EOF");
				return (EINVAL);
			}
			return (0);

		default:
			/* GCC tries to be smart but fails. */
			break;
		}
		if (t->tok_type != TK_WHITE) {
			pa->pa_state = PA_ERROR;
			goto callcc;
		}
		break;

	case PA_ARRAY:
		if (t->tok_type == TK_ARRAYC) {
			if ((ret = parser_frame_leave(pa)) != 0) {
				if (ret == EINVAL)
					VERBOSE(" parser: [%d, %d] "
					    "misplaced ']'",
					    t->tok_pos_line, t->tok_pos_col);
				return (ret);
			}
			pa->pa_state = PA_HOME;
			goto callcc;
		} else
		if (t->tok_type != TK_WHITE) {
			pa->pa_state = PA_OBJECT;
			goto callcc;
		}
		break;

	case PA_DICTIONARY:
		switch (t->tok_type) {
		case TK_DICTC:
			if ((ret = parser_frame_leave(pa)) != 0) {
				if (ret == EINVAL)
					VERBOSE(" parser: [%d, %d] "
					    "misplaced '}'",
					    t->tok_pos_line, t->tok_pos_col);
				return (ret);
			}
			pa->pa_state = PA_HOME;
			goto callcc;

		case TK_SYMBOL:
			frame = SIMPLEQ_FIRST(&pa->pa_stack);
			_PROP_ASSERT(frame && frame->se_symbol == NULL);
			frame->se_symbol = (const char *)t->tok_string;

			pa->pa_state = PA_OBJECT;
			goto callnext;

		default:
			/* GCC */
			break;
		}
		/* WHITE or ERROR */
		break;

	case PA_OBJECT:
		if (t->tok_type == TK_WHITE)
			break;

		switch (t->tok_type) {
		case TK_STRING:
			the = prop_scn_create_string(t);
			break;

		case TK_UINT64:
			the = prop_scn_create_uint(t);
			break;

		case TK_SINT64:
			the = prop_scn_create_sint(t);
			break;

		case TK_DATA:
			the = prop_data_create_data_nocopy(t->tok_data_buf,
			    t->tok_data_len);
			break;

		case TK_SYMBOL:
			/* Coerce SYMBOL to bool at value position. */
			if (stroneof((const char *)t->tok_string, __truths))
				the = prop_bool_create(TRUE);
			else
			if (stroneof((const char *)t->tok_string, __lies))
				the = prop_bool_create(FALSE);
			else {
				VERBOSE(" parser: [%d, %d] wrong BOOL '%s'",
				    t->tok_pos_line, t->tok_pos_col,
				    t->tok_string);
				return (EINVAL);
			}
			break;

		case _TK_COMPOUND_VALUES:
			/* Descend one level deeper via TOPLEVEL actions. */
			pa->pa_state = PA_TOPLEVEL;
			goto callcc;

		default:
			pa->pa_state = PA_ERROR;
			goto callcc;
		}

		/* We're supposed to have valid simple object now. */
		if (the == NULL) {
			VERBOSE(" parser: ENOMEM for %s",
			    parser_token_name(t->tok_type));
			return (ENOMEM);
		}

		/* Store it in current container. */
		if (parser_frame_store(pa, the))
			return (ENOMEM);

		pa->pa_state = PA_HOME;
		goto callcc;

	case PA_HOME:
		/*
		 * We've just finished an object (simple or compound).
		 * Continue where we came from -- at the parent container's
		 * main entry point. We get here through callcc.
		 */
		frame = SIMPLEQ_FIRST(&pa->pa_stack);
		if (frame == NULL) {
			pa->pa_state = PA_TOPLEVEL;
			break;
		}
		_PROP_ASSERT(frame->se_object);

		switch (prop_object_type(frame->se_object)) {
		case PROP_TYPE_ARRAY:
			pa->pa_state = PA_ARRAY;
			break;

		case PROP_TYPE_DICTIONARY:
			pa->pa_state = PA_DICTIONARY;
			break;

		default:
			VERBOSE(" parser: wrong object on stack");
			return (EINVAL);
		}
		break;

	case PA_ERROR:
		VERBOSE(" parser: [%d, %d] wrong token %s in state %s",
		    t->tok_pos_line, t->tok_pos_col,
		    parser_token_desc(t), parser_state_name(pa->pa_last));
		return (EINVAL);
	}

	/* Call to next implies token was accepted, so stay above. */
	if (pa->pa_prev == pa->pa_state &&
	    (pa->pa_state == PA_TOPLEVEL || pa->pa_state == PA_ARRAY ||
	     pa->pa_state == PA_DICTIONARY))
		if (t->tok_type != TK_WHITE) {
			pa->pa_state = PA_ERROR;
			goto callcc;
		}

 callnext:
	nexttoken = TRUE;

 callcc:
	if (pa->pa_state != pa->pa_last) {
		VERBOSE(" parser: %-17s --> %-17s %s",
		    (nexttoken ? parser_state_name(pa->pa_prev) : ""),
		    parser_state_name(pa->pa_state),
		    parser_token_desc(t));

		pa->pa_last = pa->pa_prev;
	} else {
		VERBOSE(" parser: %-17s ::: %-17s %s", "", "",
		    parser_token_desc(t));
	}

	if (nexttoken) {
		SIMPLEQ_REMOVE_HEAD(&pa->pa_tokens, tok_link);
		parser_token_free(t, FALSE);

		goto advance;
	} else {
		goto dispatch;
	}
	/* UNREACHED */
}

static boolean_t
format_string_quote(struct poec *ec, const char *s)
{
	const char 			*t;
	boolean_t 			ret;

	if (! poec_push(ec, '"'))
		return (TRUE);

	while (*s) {
		t = NULL;

		switch (*s) {
		case '\f': 	t = "\\f"; 	break;
		case '\n': 	t = "\\n"; 	break;
		case '\r': 	t = "\\r"; 	break;
		case '\t': 	t = "\\t"; 	break;
		case '\v': 	t = "\\v"; 	break;
		case '"': 	t = "\\\""; 	break;
		}

		if (t)
			ret = poec_append(ec, t);
		else
			ret = poec_push(ec, *s);
		if (ret == FALSE)
			return (TRUE);

		s++;
	}

	if (! poec_push(ec, '"'))
		return (TRUE);
	return (FALSE);
}

static boolean_t
format_data_base64(struct poec *ec, const char *s, size_t size)
{
	const u_char 			*b = (const u_char *)s;
	size_t 				i;
	u_int 				n;
	int 				ret = 0; 	/* XXX gcc sux */

	poec_push(ec, ':');

	/* LINTED n & b[] are unsigned */
	n = b[0] >> 2;

	for (i = 0; i < size; i++) {
		switch (i % 3) {
		case 0:
			/* LINTED: b[] is u_char */
			ret = poec_push(ec, base64abc[b[i] >> 2]);
			n = b[i] & 0x03;
			break;
		case 1:
			/* LINTED: b[] is u_char */
			ret = poec_push(ec,
			    base64abc[(n << 4) | (b[i] >> 4)]);
			n = b[i] & 0x0f;
			break;
		case 2:
			/* LINTED: b[] is u_char */
			if (!poec_push(ec, base64abc[(n << 2) | (b[i] >> 6)]) ||
			    !poec_push(ec, base64abc[b[i] & 0x3f]))
				ret = FALSE;
			else
				ret = TRUE;
			break;
		}
		if (ret == FALSE)
			return (TRUE);
	}

	/* Finish based on how many bytes of a triplet we already have. */
	switch (size % 3) {
	case 1:
		if (!poec_push(ec, base64abc[n << 4]))
			return (TRUE);
		break;
	case 2:
		if (!poec_push(ec, base64abc[n << 2]))
			return (TRUE);
		break;
	}

	/* Finally, pad to multiple of four characters of encoded text. */
	switch (size % 3) {
	case 1:
		if (!poec_push(ec, '='))
			return (TRUE);
		/* FALLTHROUGH */
	case 2:
		if (!poec_push(ec, '='))
			return (TRUE);
	}

	return (FALSE);
}

static boolean_t
format_indent(struct poec *ec)
{
	int 				i;

#if 0
	VERBOSE(" format: %zd/%zdB used, nesting depth %d",
	    ec->poec_len, ec->poec_capacity, ec->poec_depth);
#endif

	for (i = 0; i < ec->poec_depth; i++)
		if (poec_push(ec, '\t') == FALSE) {
			VERBOSE(" format: ENOMEM indent");
			return (TRUE);
		}
	return (FALSE);
}

static char *
prop_scn_externalize(prop_object_t o)
{
	char 				buf[32];
	struct stack 			sp;
	struct frame 			*e;
	struct poec 			*ec;
	char 				*s;
	prop_object_t 			the;
	boolean_t 			ret;

	SIMPLEQ_INIT(&sp);
	if ((ec = poec_create()) == NULL)
		return (NULL);

	/*
	 * We only work for compound objects, like XML codec does.
	 * Note that this ensures we'll record ${o} on the stack.
	 */
	switch (prop_object_type(o)) {
	case PROP_TYPE_ARRAY:
	case PROP_TYPE_DICTIONARY:
		break;
	default:
		goto lose;
	}
	the = o;

 again:
	switch (prop_object_type(the)) {
	case PROP_TYPE_BOOL:
		VERBOSE(" format: bool       [%d]", ec->poec_depth);
		if (prop_bool_true(the))
			ret = poec_append(ec, "True");
		else
			ret = poec_append(ec, "False");
		if (ret == FALSE)
			goto lose;
		break;

	case PROP_TYPE_NUMBER:
		VERBOSE(" format: number     [%d]", ec->poec_depth);
		if (prop_number_unsigned(the))
			snprintf(buf, sizeof(buf), "#%" PRIx64,
			    prop_number_unsigned_integer_value(the));
		else
			snprintf(buf, sizeof(buf), "%" PRId64,
			    prop_number_integer_value(the));
		if (poec_append(ec, buf) == FALSE)
			goto lose;
		break;

	case PROP_TYPE_STRING:
		VERBOSE(" format: string     [%d]", ec->poec_depth);
		if (format_string_quote(ec, prop_string_cstring_nocopy(the)))
			goto lose;
		break;

	case PROP_TYPE_DATA:
		VERBOSE(" format: data       [%d]", ec->poec_depth);
		if (format_data_base64(ec, prop_data_data_nocopy(the),
		    prop_data_size(the)))
			goto lose;
		break;

	case PROP_TYPE_ARRAY:
		VERBOSE(" format: array      [%d]", ec->poec_depth);
		if (!poec_append(ec, "["))
			goto lose;
		ec->poec_depth++;

		if ((e = _PROP_MALLOC(sizeof(struct frame), M_TEMP)) == NULL)
			goto lose;
		memset(e, 0, sizeof(struct frame));
		SIMPLEQ_INSERT_HEAD(&sp, e, se_link);

		e->se_object = the;
		e->se_iter = prop_array_iterator(the);
		if (e->se_iter == NULL)
			goto lose;
		break;

	case PROP_TYPE_DICTIONARY:
		VERBOSE(" format: dictionary [%d]", ec->poec_depth);
		if (!poec_append(ec, "{")) {
			VERBOSE(" format: ENOMEM dictionary open");
			goto lose;
		}
		ec->poec_depth++;

		if ((e = _PROP_MALLOC(sizeof(struct frame), M_TEMP)) == NULL) {
			VERBOSE(" format: ENOMEM stack frame");
			goto lose;
		}
		memset(e, 0, sizeof(struct frame));
		SIMPLEQ_INSERT_HEAD(&sp, e, se_link);

		e->se_object = the;
		e->se_iter = prop_dictionary_iterator(the);
		if (e->se_iter == NULL) {
			VERBOSE(" format: ENOMEM dictionary iterator");
			goto lose;
		}
		break;

	default:
		VERBOSE(" format: object %p wrong type %d", the,
		    prop_object_type(the));
		goto lose;
	}
	if (! poec_append(ec, "\n")) {
		VERBOSE(" format: ENOMEM newline after object");
		goto lose;
	}

 pop:
	e = SIMPLEQ_FIRST(&sp);
	if (e == NULL) {
		_PROP_ASSERT(ec->poec_depth == 0);
		printf("DONE\n"); /* XXX done */
	}
	_PROP_ASSERT(e->se_iter != NULL);

	/* Grab next object. */
	o = prop_object_iterator_next(e->se_iter);
	if (o == NULL) {
		SIMPLEQ_REMOVE_HEAD(&sp, se_link);

		_PROP_ASSERT(ec->poec_depth != 0);
		ec->poec_depth --;

		if (format_indent(ec)) {
			VERBOSE(" format: ENOMEM close indent");
			goto lose;
		}
		if (prop_object_type(e->se_object) == PROP_TYPE_DICTIONARY)
			ret = poec_append(ec, "}\n");
		else
			ret = poec_append(ec, "]\n");
		if (ret == FALSE) {
			VERBOSE(" format: ENOMEM close compound");
			goto lose;
		}

		prop_object_iterator_release(e->se_iter);
		_PROP_FREE(e, M_TEMP);

		if (SIMPLEQ_EMPTY(&sp))
			goto done;
		else
			goto pop;
	}

	/* Lookup the real object if we got indirect reference. */
	if (prop_object_type(o) == PROP_TYPE_DICT_KEYSYM) {
		const char 			*r;
		prop_object_t 			p;

		p = prop_dictionary_get_keysym((prop_dictionary_t)e->se_object,
		    (prop_dictionary_keysym_t)o);
		_PROP_ASSERT(p != NULL);

		r = prop_keysym_str((prop_dictionary_keysym_t)o);
		if (r == NULL) {
			VERBOSE(" format: EINVAL dictionary key");
			goto lose;
		}

		if (format_indent(ec) || poec_append(ec, r) == FALSE ||
		    poec_push(ec, '\t') == FALSE) {
			VERBOSE(" format: ENOMEM dictionary key");
			goto lose;
		}

		the = p;
	} else {
		if (format_indent(ec)) {
			VERBOSE(" format: ENOMEM array indent");
			goto lose;
		}
		the = o;
	}

	/* We've got a fresh object from the compound, analyse it. */
	goto again;

 done:
	/* Prepare the result for caller. */
	ec->poec_buf[ec->poec_len] = '\0';
	s = ec->poec_buf;

	/* The stack is empty at this point, just free externalize context. */
	poec_destroy(ec);

	return (s);
	/*NOTREACHED*/

 lose:
	VERBOSE(" format: LOST");
	while ((e = SIMPLEQ_FIRST(&sp)) != NULL) {
		SIMPLEQ_REMOVE_HEAD(&sp, se_link);
		if (e->se_iter)
			prop_object_iterator_release(e->se_iter);
		_PROP_FREE(e, M_TEMP);
	}

	if (ec->poec_buf)
		_PROP_FREE(ec->poec_buf, M_TEMP);
	poec_destroy(ec);

	return (NULL);
}

const struct _prop_codec prop_codec_scn = {
	.codec_name 			= "scn",
	.codec_sense 			= (const u_char *)".{[;",
	.codec_externalize_compound 	= prop_scn_externalize,
	.codec_parser_create 		= prop_scn_parser_create,
	.codec_parser_exec 		= prop_scn_parser_exec,
	.codec_parser_yield 		= prop_scn_parser_yield,
	.codec_parser_destroy 		= prop_scn_parser_destroy,
};
