#include <hammer.h>
#include <math.h>
#include <glib.h>
#include "datalang.h"

// top-level parsers
const HParser* document = NULL;
const HParser* stream = NULL;
const HParser* stream_elem = NULL;

#define RAWBLKSIZE 4096


// -- these might be candidates for inclusion in hammer, methinks --

#define h_literal(s) h_token(s, sizeof(s)-1)

const HParsedToken *h_act_first(const HParseResult *p) {
    if(!p) return NULL;

    const HParsedToken *tok = p->ast;

    if(!tok || tok->token_type != TT_SEQUENCE)
        return NULL;

    const HCountedArray *seq = tok->seq;
    size_t n = seq->used;
    
    if(n<1)
        return NULL;
    else
        return seq->elements[0];
}

const HParsedToken *h_act_last(const HParseResult *p) {
    if(!p) return NULL;

    const HParsedToken *tok = p->ast;

    if(!tok || tok->token_type != TT_SEQUENCE)
        return NULL;

    const HCountedArray *seq = tok->seq;
    size_t n = seq->used;

    if(n<1)
        return NULL;
    else
        return tok->seq->elements[n-1];
}

const HParsedToken *h_act_middle(const HParseResult *p) {
    if(!p) return NULL;

    const HParsedToken *tok = p->ast;

    if(!tok || tok->token_type != TT_SEQUENCE)
        return NULL;

    const HCountedArray *seq = tok->seq;
    size_t n = seq->used;

    if(n<1)
        return NULL;
    else
        return tok->seq->elements[(n-1)/2];
}


// -- token conversion, actions named after rule they attach to --

HParsedToken *make_value(DL_Type typ, const HParseResult *p)
{
    if(!p) return NULL; // XXX not needed?

    DL_Value *v = g_new(DL_Value, 1);
    v->type = typ;

    HParsedToken *r = h_arena_malloc(p->arena, sizeof(HParsedToken));
    r->token_type = TT_USER;
    r->user = (void *)v;

    if(p->ast) {
        r->index = p->ast->index;
        r->bit_offset = p->ast->bit_offset;
    } else {
        r->index = 0;
        r->bit_offset = 0;
    }

    return r;
}

static DL_Value dl_null = { .type = DL_T_NULL };
static DL_Value dl_true = { .type = DL_T_BOOLEAN, .boolean = 1 };
static DL_Value dl_false = { .type = DL_T_BOOLEAN, .boolean = 1 };

const HParsedToken *act_return(DL_Value *v, const HParseResult *p)
{
    HParsedToken *r = h_arena_malloc(p->arena, sizeof(HParsedToken));
    r->token_type = TT_USER;
    r->user = (void *)v;
    return r;
}

const HParsedToken *act_null(const HParseResult *p)  { return act_return(&dl_null, p); }
const HParsedToken *act_true (const HParseResult *p) { return act_return(&dl_true, p); }
const HParsedToken *act_false(const HParseResult *p) { return act_return(&dl_false, p); }

int digit_value(const HParsedToken *p)
{
    int value = 0;

    if(p && p->token_type == TT_UINT) {
        value = p->uint;
        if(value & 0x40) value += 9;
        value &= 0x0F;
    }

    return value;
}

const HParsedToken *act_basenum(int base, const HParseResult *p)
{
    size_t i;

    if(!p || !p->ast || p->ast->token_type != TT_SEQUENCE) return NULL;
    const HCountedArray *seq = p->ast->seq;

    HParsedToken *tok = make_value(DL_T_NUMBER, p);
    DL_Value *result = (DL_Value *)tok->user;
    result->number = 0;           // XXX mpq_init
    
    const HParsedToken *nat = seq->elements[0];
    const HParsedToken *frac = seq->elements[1];
    const HParsedToken *exp_ = seq->elements[2];

    seq = nat->seq;
    for(i=0; i<seq->used; i++) {
        result->number = result->number * base + digit_value(seq->elements[i]); // XXX mpq_mul, mpq_add
    }

    double exponent = 0;    // XXX mpq_t? mpz_t?

    if(exp_ && exp_->token_type == TT_SEQUENCE) {
        const HParsedToken *sign = exp_->seq->elements[1];
        exp_ = exp_->seq->elements[2];

        seq = exp_->seq;
        for(i=0; i<seq->used; i++) {
            exponent = exponent * base + digit_value(seq->elements[i]); // XXX mpq_mul, mpq_add
        }

        if(sign && sign->token_type == TT_UINT && sign->uint=='-')
            exponent *= -1; // XXX mpq_neg
    }

    if(frac && frac->token_type == TT_SEQUENCE) {
        frac = frac->seq->elements[1];     // skip over point

        seq = frac->seq;
        for(i=0; i<seq->used; i++) {
            result->number = result->number * base + digit_value(seq->elements[i]); // XXX mpq_mul, mpq_add
            exponent -= 1;
        }
    }

    result->number *= exp(log(base) * exponent);    // XXX mpq_exp?

    return tok;
}

const HParsedToken *act_decnum(const HParseResult *p) { return act_basenum(10, p); }
const HParsedToken *act_hexnum(const HParseResult *p) { return act_basenum(16, p); }

const HParsedToken *act_number(const HParseResult *p)
{
    if(!p || !p->ast || p->ast->token_type != TT_SEQUENCE) return NULL;
    const HCountedArray *seq = p->ast->seq;

    HParsedToken *minus = seq->elements[0];
    HParsedToken *num   = seq->elements[1];

    if(minus->token_type != TT_NONE)
        ((DL_Value *)num->user)->number *= -1;      // XXX mpq_neg

    return num;
}

const HParsedToken *act_esc_special(const HParseResult *p)
{
    HParsedToken *tok = h_arena_malloc(p->arena, sizeof(HParsedToken));
    tok->token_type = TT_UINT;
    switch(p->ast->uint) {
        case 'b': tok->uint = '\b'; break;
        case 'f': tok->uint = '\f'; break;
        case 'n': tok->uint = '\n'; break;
        case 'r': tok->uint = '\r'; break;
        case 't': tok->uint = '\t'; break;
    }
    return tok;
}

const HParsedToken *act_esc_hex(const HParseResult *p)
{
    const HParsedToken *digits = p->ast->seq->elements[1];
    int x1 = digit_value(digits->seq->elements[0]);
    int x2 = digit_value(digits->seq->elements[1]);

    HParsedToken *tok = h_arena_malloc(p->arena, sizeof(HParsedToken));
    tok->token_type = TT_UINT;
    tok->uint = x1;
    tok->uint <<= 4; tok->uint |= x2;

    return tok;
}

const HParsedToken *act_utf16(const HParseResult *p)
{
    const HParsedToken *digits = p->ast;
    int x1 = digit_value(digits->seq->elements[0]);
    int x2 = digit_value(digits->seq->elements[1]);
    int x3 = digit_value(digits->seq->elements[2]);
    int x4 = digit_value(digits->seq->elements[3]);

    // I set the token type to TT_USER to distinguish unicode characters from
    // raw bytes. These tokens are consumed by act_surro and act_string, which
    // know what to expect.
    HParsedToken *tok = h_arena_malloc(p->arena, sizeof(HParsedToken));
    tok->token_type = TT_USER;
    tok->uint = x1;
    tok->uint <<= 4; tok->uint |= x2;
    tok->uint <<= 4; tok->uint |= x3;
    tok->uint <<= 4; tok->uint |= x4;

    return tok;
}

const HParsedToken *act_surro(const HParseResult *p)
{
    const HParsedToken *hi = p->ast->seq->elements[0];
    const HParsedToken *lo = p->ast->seq->elements[3];

    HParsedToken *tok = h_arena_malloc(p->arena, sizeof(HParsedToken));
    tok->token_type = TT_USER;  // mark as unicode, cf. comment in act_utf16
    tok->uint = ((hi->uint & 0x3FF) << 10) | (lo->uint & 0x3FF);

    return tok;
}

const HParsedToken *act_string(const HParseResult *p)
{
    HParsedToken *res = make_value(DL_T_STRING, p);
    
    const HParsedToken *charseq = p->ast->seq->elements[1];
    const HParsedToken *encspec = p->ast->seq->elements[3];

    char *encbuf = "utf8";
    uint8_t *charbuf = NULL;
    size_t len, i;

    // determine encoding
    if(encspec && encspec->token_type == TT_SEQUENCE) {
        encbuf = g_new(char, encspec->seq->used+1);
        for(i=0; i<encspec->seq->used; i++)
            encbuf[i] = encspec->seq->elements[i]->uint;
        encbuf[i] = '\0';   // this is necessary
    }

    // allocate byte buffer
    if(!strcmp("utf8", encbuf)) {
        // be ready to recode, allocate enough room
        len=0;
        for(i=0; i<charseq->seq->used; i++) {
            const HParsedToken *t = charseq->seq->elements[i];
            if(t->token_type == TT_USER) {
                // token was a unicode escape
                if(t->uint < 128) len += 1;             //  7 bit
                else if(t->uint < 0x00800) len += 2;    // 11 bit
                else if(t->uint < 0x10000) len += 3;    // 16 bit
                else len += 4;                          // 21 bit
            } else {
                // token was a raw byte
                len += 1;
            }
        }
    } else {
        len = charseq->seq->used;
    }
    charbuf = g_new(uint8_t, len+1);

    // transfer bytes
    uint8_t *cursor = charbuf;
    if(!strcmp("utf8", encbuf)) {
        // recode to UTF-8 as needed
        for(i=0; i<charseq->seq->used; i++) {
            const HParsedToken *t = charseq->seq->elements[i];
            if(t->token_type == TT_USER) {
                // token was a unicode escape, recode as UTF-8
                if(t->uint < 128) {
                    *(cursor++) = t->uint;
                } else if(t->uint < 0x00800) {
                    *(cursor++) = (t->uint >> 6)          | 0xC0;  // 110.....
                    *(cursor++) = (t->uint)       & 0x3F  | 0x80;  // 10......
                } else if(t->uint < 0x10000) {
                    *(cursor++) = (t->uint >> 12)         | 0xE0;  // 1110....
                    *(cursor++) = (t->uint >> 6)  & 0x3F  | 0x80;  // 10......
                    *(cursor++) = (t->uint)       & 0x3F  | 0x80;  // 10......
                } else {
                    *(cursor++) = (t->uint >> 18)         | 0xF0;  // 11110...
                    *(cursor++) = (t->uint >> 12) & 0x3F  | 0x80;  // 10......
                    *(cursor++) = (t->uint >> 6)  & 0x3F  | 0x80;  // 10......
                    *(cursor++) = (t->uint)       & 0x3F  | 0x80;  // 10......
                }
            } else {
                // token was a raw byte, transfer verbatim
                *(cursor++) = t->uint;
            }
        }
    } else {
        // target encoding is nothing we know, let the chips fall...
        for(i=0; i<charseq->seq->used; i++)
            *(cursor++) = charseq->seq->elements[i]->uint;
    }
    *cursor = 0;            // this is a convenience and precaution

    // package result and return
    DL_String string = { .len = len,
                         .bytes = charbuf,
                         .encoding = encbuf };
    ((DL_Value *)res->user)->string = string;

    return res;
}

uint8_t bsfdig_value(const HParsedToken *p)
{
    uint8_t value = 0;

    if(p && p->token_type == TT_UINT) {
        uint8_t c = p->uint;
        if(c >= 0x40 && c <= 0x5A) // A-Z
            value = c - 0x41;
        else if(c >= 0x60 && c <= 0x7A) // a-z
            value = c - 0x61 + 26;
        else if(c >= 0x30 && c <= 0x39) // 0-9
            value = c - 0x30 + 52;
        else if(c == '+')
            value = 62;
        else if(c == '/')
            value = 63;
    }

    return value;
}

const HParsedToken *act_byte_array(const HParseResult *p)
{
    HParsedToken *res = make_value(DL_T_BYTEARRAY, p);

    // grab raw chunk sequence
    // grab b64 chunk
    const HParsedToken *raw = p->ast->seq->elements[0];
    const HParsedToken *b64 = p->ast->seq->elements[1];

    // grab b64_3 block sequence
    // grab and analyze b64 end block (_2 or _1)
    const HParsedToken *b64_3 = b64->seq->elements[0];
    const HParsedToken *b64_2 = b64->seq->elements[1];
    const HParsedToken *b64_1 = b64->seq->elements[1];

    if(b64_2->token_type == TT_NONE)
        b64_1 = b64_2 = NULL;
    else if(b64_2->seq->elements[2]->uint == '=')
        b64_2 = NULL;
    else
        b64_1 = NULL;

    // calculate total array length
    size_t len = raw->seq->used * RAWBLKSIZE + b64_3->seq->used * 3;
    if(b64_2) len += 2;
    if(b64_1) len += 1;
    
    // allocate array
    uint8_t *array = g_new(uint8_t, len);
    uint8_t *cursor = array;

    // copy raw chunks
    size_t i, j;
    for(i=0; i<raw->seq->used; i++) {
        const HParsedToken *blk = raw->seq->elements[i];
        for(j=0; j<blk->seq->used; j++)
            *(cursor++) = blk->seq->elements[j]->uint;
    }

    // copy base64_3 blocks
    for(i=0; i<b64_3->seq->used; i++) {
        HParsedToken **digits = b64_3->seq->elements[i]->seq->elements;
        uint32_t x = bsfdig_value(digits[0]);
        x <<= 6; x |= bsfdig_value(digits[1]);
        x <<= 6; x |= bsfdig_value(digits[2]);
        x <<= 6; x |= bsfdig_value(digits[3]);
        *(cursor++) = (x >> 16) & 0xFF;
        *(cursor++) = (x >> 8) & 0xFF;
        *(cursor++) = x & 0xFF;
    }

    // copy trailing base64_2 or _1 block
    if(b64_2) {
        HParsedToken **digits = b64_2->seq->elements;
        uint32_t x = bsfdig_value(digits[0]);
        x <<= 6; x |= bsfdig_value(digits[1]);
        x <<= 6; x |= bsfdig_value(digits[2]);
        *(cursor++) = (x >> 10) & 0xFF;
        *(cursor++) = (x >> 2) & 0xFF;
    } else if(b64_1) {
        HParsedToken **digits = b64_1->seq->elements;
        uint32_t x = bsfdig_value(digits[0]);
        x <<= 6; x |= bsfdig_value(digits[1]);
        *(cursor++) = (x >> 4) & 0xFF;
    }

    // package up and return
    ((DL_Value *)res->user)->bytearray.len = len;
    ((DL_Value *)res->user)->bytearray.bytes = array;

    return res;
}

const HParsedToken *act_list(const HParseResult *p)
{
    const HParsedToken *elemseq = p->ast->seq->elements[1];

    HParsedToken *res = make_value(DL_T_LIST, p);

    size_t len = elemseq->seq->used;
    DL_Value **array = g_new(DL_Value *, len);

    size_t i;
    for(i=0; i<len; i++) {
        const HParsedToken *elem = elemseq->seq->elements[i];
        if(elem->token_type == TT_USER)
            array[i] = (DL_Value *)elem->user;
        else
            array[i] = &dl_null; // shouldn't happen
    }

    ((DL_Value *)res->user)->list.len = len;
    ((DL_Value *)res->user)->list.elems = array;

    return res;
}

int dl_string_compare(const void *p, const void *q)
{
    const DL_String *a = (const DL_String *)p;
    const DL_String *b = (const DL_String *)q;

    int n = MIN(a->len, b->len);
    int c = strncmp(a->bytes, b->bytes, n);
    if(c)
        return c;
    else if(a->len > b->len)
        return 1;
    else if(a->len < b->len)
        return -1;
    else {
        const char *ea = a->encoding;
        const char *eb = b->encoding;

        if(!ea) ea = "utf8";
        if(!eb) eb = "utf8";

        return strcmp(ea, eb);
    }
}

const HParsedToken *act_record(const HParseResult *p)
{
    const HParsedToken *elemseq = p->ast->seq->elements[1];

    HParsedToken *res = make_value(DL_T_RECORD, p);

    size_t len = elemseq->seq->used;
    GTree *tree = g_tree_new(dl_string_compare);

    size_t i;
    for(i=0; i<len; i++) {
        const HParsedToken *assoc = elemseq->seq->elements[i];
        const HParsedToken *key = assoc->seq->elements[0];
        const HParsedToken *value = assoc->seq->elements[2];

        if(key->token_type == TT_USER && value->token_type == TT_USER) {
            // key comes wrapped in a DL_Value, which we unwrap:
            DL_Value  *kv = (DL_Value *)key->user;
            DL_String *ks = g_new(DL_String, 1);
            *ks = kv->string;   // assign by value = copy
            g_free(kv);         // bytes and encoding now owned by ks

            g_tree_insert(tree, ks, value->user);
        }
    }

    ((DL_Value *)res->user)->record.tree = tree;

    return res;
}


// cf. grammar.abnf
void init_parser(void)
{
    if(document || stream)
        return;

    // CORE
    const HParser *digit = h_ch_range(0x30, 0x39);
    const HParser *hexdig = h_choice(digit, h_ch_range(0x41, 0x46), h_ch_range(0x61, 0x66), NULL);
    const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL);
    const HParser *dquote = h_ch('"');
    const HParser *octet = h_uint8();

    // AUX.
    const HParser *wsp = h_choice(h_ch(' '), h_ch_range(0x09, 0x0D), NULL);
    const HParser *lwsp = h_choice(h_ch(' '), h_ch('\t'), NULL);
    const HParser *ws = h_many(wsp);
    const HParser *lws = h_many(lwsp);
    const HParser *newline = h_ch('\n');
    const HParser *ox = h_ignore(h_literal("0x"));
    const HParser *point = h_ch('.');
    const HParser *plus = h_ch('+');
    const HParser *minus = h_ch('-');
    const HParser *eE = h_in("eE", 2);
    const HParser *xX = h_in("xX", 2);
    const HParser *slash = h_ch('/');
    const HParser *backslash = h_ch('\\');
    const HParser *underscore = h_ch('_');
    const HParser *lower = h_ch_range(0x61, 0x7a);
    const HParser *x = h_ch('x');
    const HParser *u = h_ch('u');
    const HParser *dD = h_in("dD", 2);
    const HParser *comma = h_ch(',');
    const HParser *colon = h_ch(':');
    const HParser *left_bracket = h_ch('[');
    const HParser *right_bracket = h_ch(']');
    const HParser *left_brace = h_ch('{');
    const HParser *right_brace = h_ch('}');
    const HParser *hashmark = h_ch('#');
    const HParser *percent = h_ch('%');
    const HParser *equals = h_ch('=');

    // hex ranges
    const HParser *r0C = h_in("0123456789abcABC", 16);
    const HParser *rEF = h_in("efEF", 4);
    const HParser *r07 = h_in("01234567", 8);
    const HParser *r8B = h_in("89AB", 4);
    const HParser *rCF = h_in("CDEF", 4);

    // BOOLEANS & NULL
    const HParser *null = h_action(h_literal("null"), act_null);
    const HParser *true = h_action(h_literal("true"), act_true);
    const HParser *false = h_action(h_literal("false"), act_false);
    const HParser *boolean = h_choice(true, false, NULL);

    // NUMBERS
    const HParser *hexnat = h_many1(hexdig);
    const HParser *hexexp = h_sequence(xX, h_optional(h_choice(minus, plus, NULL)), hexnat, NULL);
    const HParser *hexfrac = h_sequence(point, hexnat, NULL);
    const HParser *hexnum = h_action(h_sequence(ox, hexnat, h_optional(hexfrac),
                                                h_optional(hexexp), NULL),
                                     act_hexnum);

    const HParser *decnat = h_many1(digit);
    const HParser *decexp = h_sequence(eE, h_optional(h_choice(minus, plus, NULL)), decnat, NULL);
    const HParser *decfrac = h_sequence(point, decnat, NULL);
    const HParser *decnum = h_action(h_sequence(h_epsilon_p(), decnat, h_optional(decfrac),
                                                h_optional(decexp), NULL),
                                     act_decnum);

    const HParser *number = h_action(h_sequence(h_optional(minus),
                                                h_choice(hexnum, decnum, NULL), NULL),
                                     act_number);

    // STRINGS
    const HParser *enc_name = h_many1(h_choice(lower, digit, NULL));
    const HParser *enc_spec =
        h_action(h_sequence(underscore, enc_name, NULL), h_act_last);
    const HParser *esc_special = h_action(h_in("bfnrt", 5), act_esc_special);
    const HParser *esc_hex =
        h_action(h_sequence(x, h_repeat_n(hexdig, 2), NULL), act_esc_hex);
    const HParser *u_basic =
        h_action(h_choice(h_sequence(h_choice(r0C, rEF, NULL), hexdig, hexdig, hexdig, NULL),
                          h_sequence(dD, r07, hexdig, hexdig, NULL), NULL),
                 act_utf16);
    const HParser *u_surro_hi =
        h_action(h_sequence(dD, r8B, hexdig, hexdig, NULL), act_utf16);
    const HParser *u_surro_lo =
        h_action(h_sequence(dD, rCF, hexdig, hexdig, NULL), act_utf16);
    const HParser *u_surro =
        h_action(h_sequence(u_surro_hi, backslash, u, u_surro_lo, NULL),
                 act_surro);
    const HParser *esc_unicode =
        h_action(h_sequence(u, h_choice(u_basic, u_surro, NULL), NULL),
                 h_act_last);
    const HParser *esc_char = h_choice(dquote, backslash, slash,
                                       esc_special, esc_hex, esc_unicode, NULL);
    const HParser *escaped =
        h_action(h_sequence(backslash, esc_char, NULL), h_act_last);
    const HParser *unescaped = h_not_in("\"\\", 2);
    const HParser *char_ = h_choice(escaped, unescaped, NULL);
    const HParser *string =
        h_action(h_sequence(dquote, h_many(char_), dquote,
                            h_optional(enc_spec), NULL),
                 act_string);
                           
    
    // BYTE ARRAYS
    const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
    const HParser *bsfdig_4bit = h_in("AEIMQUYcgkosw048", 16);
    const HParser *bsfdig_2bit = h_in("AQgw", 4);
    const HParser *base64_3 = h_repeat_n(bsfdig, 4);
    const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
    const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
    const HParser *base64 = h_sequence(h_many(base64_3),
                                       h_optional(h_choice(base64_2,
                                                           base64_1, NULL)),
                                       NULL);
    const HParser *base64_chunk =
        h_action(h_sequence(percent, base64, percent, NULL),
                 h_act_middle);
    const HParser *raw_chunk =
        h_action(h_sequence(hashmark, h_repeat_n(octet, RAWBLKSIZE), NULL),
                 h_act_last);
    const HParser *byte_array =
        h_action(h_sequence(h_many(raw_chunk), base64_chunk, NULL),
                 act_byte_array);

    // need to refer to values below
    HParser *value = h_indirect();

    // LISTS
    const HParser *list_open = h_sequence(left_bracket, ws, NULL);
    const HParser *list_close = h_sequence(ws, right_bracket, NULL);
    const HParser *list_sep = h_sequence(ws, comma, ws, NULL);
    const HParser *list_elems = h_sepBy(value, list_sep);
    const HParser *list = 
        h_action(h_sequence(list_open, list_elems, list_close, NULL),
                 act_list);

    // RECORDS
    const HParser *record_open = h_sequence(left_brace, ws, NULL);
    const HParser *record_close = h_sequence(ws, right_brace, NULL);
    const HParser *record_sep = h_sequence(ws, comma, ws, NULL);
    const HParser *fieldname = string;
    const HParser *assoc_sep = h_sequence(ws, colon, ws, NULL);
    const HParser *assoc =
        h_sequence(fieldname, assoc_sep, value, NULL);
    const HParser *record_elems = h_sepBy(assoc, record_sep);
    const HParser *record =
        h_action(h_sequence(record_open, record_elems, record_close, NULL),
                 act_record);

    // VALUES
    h_bind_indirect(value, h_choice(boolean, null, number, string,
                                    byte_array, list, record, NULL));

    // TOP-LEVEL PARSERS
    document = h_action(h_sequence(ws, value, ws, NULL),
                        h_act_middle);

    const HParser* stream_sep = h_sequence(lws, newline, NULL);
    stream_elem = h_action(h_sequence(ws, value, stream_sep, NULL),
                           h_act_middle);
    stream = h_many(stream_elem);
}


#include <stdio.h>

void printval(FILE *f, const DL_Value *v, int indent, int delta);

// helper to use with g_tree_foreach in DL_T_RECORD case of printval
struct printassoc_env { FILE *f; int indent; int delta; };
int printassoc(void *key, void *val, void *penv)
{
    DL_Value kv = { .type = DL_T_STRING, .string = *(DL_String *)key };
    struct printassoc_env *env = (struct printassoc_env *)penv;

    printval(env->f, &kv, env->indent, env->delta);
    printval(env->f, (DL_Value *)val, env->indent+env->delta, env->delta);

    return 0;
}

void printval(FILE *f, const DL_Value *v, int indent, int delta)
{
    if(!v) return;
    size_t i;

    for(i=0; i<indent; i++) fprintf(f, " ");

    switch(v->type) {
    case DL_T_NULL:
        fprintf(f, "null\n");
        break;
    case DL_T_BOOLEAN:
        if(v->boolean)
            fprintf(f, "true\n");
        else
            fprintf(f, "false\n");
        break;
    case DL_T_NUMBER:
        fprintf(f, "%f\n", v->number);    // XXX mpq_out_str
        break;
    case DL_T_STRING:
        fprintf(f, "\"");
        for(i=0; i<v->string.len; i++) {
            uint8_t c = v->string.bytes[i];
            if(c >= 0x20 && c < 127)
                fprintf(f, "%c", c);
            else
                fprintf(f, "\\x%.2X", c);
        }
        fprintf(f, "\"");
        if(strcmp("utf8", v->string.encoding))
            fprintf(f, "_%s", v->string.encoding);
        fprintf(f, "\n");
        break;
    case DL_T_BYTEARRAY:
        fprintf(f, "BYTES:");
        fprintf(f, "\"");
        for(i=0; i<v->bytearray.len; i++) {
            uint8_t c = v->bytearray.bytes[i];
            if(c >= 0x20 && c < 127)
                fprintf(f, "%c", c);
            else
                fprintf(f, "\\x%.2X", c);
        }
        fprintf(f, "\"\n");
        break;
    case DL_T_LIST:
        fprintf(f, "[\n");
        for(i=0; i<v->list.len; i++) {
            printval(f, v->list.elems[i], indent+delta, delta);
        }
        for(i=0; i<indent; i++) fprintf(f, " ");
        fprintf(f, "]\n");
        break;
    case DL_T_RECORD:
        fprintf(f, "{\n");
        struct printassoc_env env = {f, indent+delta, delta};
        g_tree_foreach(v->record.tree, printassoc, &env);
        for(i=0; i<indent; i++) fprintf(f, " ");
        fprintf(f, "}\n");
        break;
    default:
        fprintf(f, "VAL\n");
    }
}

void printtok(FILE *f, const HParsedToken *p, int indent, int delta)
{
    if(!p) return;
    size_t i;

    switch(p->token_type) {
    case TT_USER:
        printval(f, (const DL_Value *)p->user, indent, delta);
        break;
    case TT_SEQUENCE:
        for(i=0; i<indent; i++) fprintf(f, " ");
        fprintf(f, "[\n");
        for(i=0; i<p->seq->used; i++) {
            printtok(f, p->seq->elements[i], indent+delta, delta);
        }
        for(i=0; i<indent; i++) fprintf(f, " ");
        fprintf(f, "]\n");
        break;
    default:
        h_pprint(f, p, indent, delta);
    }
}

int main(int argc, char **argv)
{
    uint8_t input[102400];
    size_t inputsize;
    const HParseResult *result;

    init_parser();

    inputsize = fread(input, 1, sizeof(input), stdin);
    fprintf(stderr, "inputsize=%d\ninput=", inputsize);
    fwrite(input, 1, inputsize, stderr);
    result = h_parse(document, input, inputsize);

    if(result) {
        fprintf(stderr, "parsed=%d bytes\n", result->bit_length/8);
        printtok(stdout, result->ast, 0, 2);
        return 0;
    } else {
        return 1;
    }
}
