import string, re class TomlError(RuntimeError): def __init__(self, kind, line, col): self.kind = kind self.line = line self.col = col RuntimeError.__init__(self, kind) def __str__(self): return '%s(%d, %d)' % (self.kind, self.line, self.col) class _CharSource: def __init__(self, s): self._s = s self._index = 0 self._mark = 0 self._line = 1 self._col = 1 self._update_cur() def __bool__(self): return self.cur is not None def __len__(self): return len(self._s[self._index:]) def __getitem__(self, item): return self._s[self._index:][item] def next(self, l=1): for ch in self[:l]: if ch == '\n': self._line += 1 self._col = 1 else: self._col += 1 self._index += l self._update_cur() def mark(self): self._mark = self._index self._mark_pos = self._line, self._col def rollback(self): self._index = self._mark self._line, self._col = self._mark_pos self._update_cur() def commit(self, type=None, text=None): tok = self._s[self._mark:self._index] pos = (self._mark_pos, (self._line, self._col)) if type is None: type = tok if text is None: text = tok return type, text, pos def error(self, kind): raise TomlError(kind, self._line, self._col) def _update_cur(self): self.tail = self._s[self._index:] if self._index < len(self._s): self.cur = self._s[self._index] else: self.cur = None def lex(s): src = _CharSource(s.replace('\r\n', '\n')) def is_id(ch): return ch is not None and (ch.isalnum() or ch in '-_') def is_ws(ch): return ch is not None and ch in ' \t' def fetch_esc(): escapes = {'b':'\b', 't':'\t', 'n':'\n', 'f':'\f', 'r':'\r', '"':'"', '\\':'\\'} if src.cur == 'u': if len(src) < 5 or any(ch not in string.hexdigits for ch in src[1:5]): src.error('invalid_escape_sequence') res = unichr(int(src[1:5], 16)) src.next(5) elif src.cur == 'U': if len(src) < 9 or any(ch not in string.hexdigits for ch in src[1:9]): src.error('invalid_escape_sequence') res = unichr(int(src[1:9], 16)) src.next(9) elif src.cur == '\n': while src and src.cur in ' \n\t': src.next() res = '' elif src.cur in escapes: res = escapes[src.cur] src.next(1) else: src.error('invalid_escape_sequence') return res datetime_re = re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})') def consume_datetime(): m = datetime_re.match(src.tail) if not m: return False src.next(len(m.group(0))) return True def consume_int(): if not src: src.error('malformed') if src.cur in '+-': src.next() if not src or src.cur not in '0123456789': src.error('malformed') while src and src.cur in '0123456789_': src.next() def consume_float(): consume_int() type = 'int' if src and src.cur == '.': type = 'float' src.next() if not src or src.cur not in '0123456789_': src.error('malformed_float') while src and src.cur in '0123456789_': src.next() if src and src.cur in 'eE': type = 'float' src.next() consume_int() return type while src: src.mark() if src.cur in ' \t': src.next() while src and src.cur in ' \t': src.next() elif src.cur == '#': src.next() while src and src.cur != '\n': src.next() elif src.cur in '0123456789': if consume_datetime(): yield src.commit('datetime') else: src.rollback() type = consume_float() yield src.commit(type) elif src.cur in '+-': type = consume_float() yield src.commit(type) elif is_id(src.cur): while is_id(src.cur): src.next() yield src.commit('id') elif src.cur in '[]{}=.,\n': src.next() yield src.commit() elif src.tail.startswith("'''"): src.next(3) if src.cur == '\n': src.next() end_quote = src.tail.find("'''") if end_quote == -1: src.error('unclosed_multiline_string') text = src[:end_quote] src.next(end_quote+3) yield src.commit('str', text) elif src.cur == "'": src.next() end_quote = src.tail.find("'") if end_quote == -1: src.error('unclosed_string') text = src[:end_quote] src.next(end_quote+1) yield src.commit('str', text) elif src.tail.startswith('"""'): src.next(3) if src.cur == '\n': src.next() res = [] while True: src.mark() end_quote = src.tail.find('"""') if end_quote == -1: src.error('unclosed_multiline_string') esc_pos = src.tail.find('\\') if esc_pos == -1 or esc_pos > end_quote: res.append(src[:end_quote]) src.next(end_quote+3) break res.append(src[:esc_pos]) src.next(esc_pos+1) res.append(fetch_esc()) yield src.commit('str', ''.join(res)) elif src.cur == '"': src.next() res = [] while True: src.mark() end_quote = src.tail.find('"') if end_quote == -1: src.error('unclosed_string') esc_pos = src.tail.find('\\') if esc_pos == -1 or esc_pos > end_quote: res.append(src[:end_quote]) src.next(end_quote+1) break res.append(src[:esc_pos]) src.next(esc_pos+1) res.append(fetch_esc()) yield src.commit('str', ''.join(res)) else: src.error('unexpected_char') src.mark() yield src.commit('\n', '') yield src.commit('eof', '') class _TokSource: def __init__(self, s): self._lex = iter(lex(s)) self.pos = None self.next() def next(self): self.prev_pos = self.pos self.tok, self.text, self.pos = next(self._lex) def consume(self, kind): if self.tok == kind: self.next() return True return False def consume_adjacent(self, kind): if self.prev_pos is None or self.prev_pos[1] != self.pos[0]: return False return self.consume(kind) def consume_nls(self): while self.consume('\n'): pass def error(self, kind): raise TomlError(kind, self.pos[0][0], self.pos[0][1]) def _translate_literal(type, text): if type == 'bool': return text == 'true' elif type == 'int': return int(text.replace('_', '')) elif type == 'float': return float(text.replace('_', '')) elif type == 'str': return text elif type == 'datetime': return text def load(fin, translate_literal=_translate_literal, translate_array=id): return loads(fin.read(), translate_literal=translate_literal, translate_array=translate_array) def loads(s, translate_literal=_translate_literal, translate_array=id): if isinstance(s, str): s = s.decode('utf-8') toks = _TokSource(s) def read_value(): while True: if toks.tok == 'id': if toks.text in ('true', 'false'): value = translate_literal('bool', toks.text) toks.next() return 'bool', value else: toks.error('unexpected_identifier') elif toks.tok in ('int', 'str', 'float', 'datetime'): type = toks.tok value = translate_literal(toks.tok, toks.text) toks.next() return type, value elif toks.consume('['): res = [] toks.consume_nls() if not toks.consume(']'): toks.consume_nls() type, val = read_value() res.append(val) toks.consume_nls() while toks.consume(','): toks.consume_nls() if toks.consume(']'): break cur_type, val = read_value() if type != cur_type: toks.error('heterogenous_array') res.append(val) toks.consume_nls() else: if not toks.consume(']'): toks.error('expected_right_brace') return 'array', translate_array(res) elif toks.consume('{'): res = {} while toks.tok in ('id', 'str'): k = toks.text toks.next() if k in res: toks.error('duplicate_key') if not toks.consume('='): toks.error('expected_equals') type, v = read_value() res[k] = v if not toks.consume(','): break if not toks.consume('}'): toks.error('expected_closing_brace') return 'table', res else: toks.error('unexpected_token') root = {} tables = {} scope = root while toks.tok != 'eof': if toks.tok in ('id', 'str'): k = toks.text toks.next() if not toks.consume('='): toks.error('expected_equals') type, v = read_value() if k in scope: toks.error('duplicate_keys') scope[k] = v elif toks.consume('\n'): pass elif toks.consume('['): is_table_array = toks.consume_adjacent('[') path = [] if toks.tok not in ('id', 'str'): toks.error('expected_table_name') path.append(toks.text) toks.next() while toks.consume('.'): if toks.tok not in ('id', 'str'): toks.error('expected_table_name') path.append(toks.text) toks.next() if not toks.consume(']') or (is_table_array and not toks.consume_adjacent(']')): toks.error('malformed_table_name') if not toks.consume('\n'): toks.error('garbage_after_table_name') cur = tables for name in path[:-1]: if isinstance(cur.get(name), list): d, cur = cur[name][-1] else: d, cur = cur.setdefault(name, (None, {})) scope = {} name = path[-1] if name not in cur: if is_table_array: cur[name] = [(scope, {})] else: cur[name] = (scope, {}) elif isinstance(cur[name], list): if not is_table_array: toks.error('table_type_mismatch') cur[name].append((scope, {})) else: if is_table_array: toks.error('table_type_mismatch') old_scope, next_table = cur[name] if old_scope is not None: toks.error('duplicate_tables') cur[name] = (scope, next_table) else: toks.error('unexpected') def merge_tables(scope, tables): if scope is None: scope = {} for k, v in tables.iteritems(): if k in scope: toks.error('key_table_conflict') if isinstance(v, list): scope[k] = [merge_tables(sc, tbl) for sc, tbl in v] else: scope[k] = merge_tables(v[0], v[1]) return scope return merge_tables(root, tables) if __name__ == '__main__': import sys, json t = sys.stdin.read() print json.dumps(loads(t), indent=4)