From 54d8c0a91399e55c151da0a7c9efa68336fbc51a Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Thu, 16 Nov 2006 21:38:13 +0000
Subject: [PATCH] lexer, basic parse tree structure, exception classes

---
 lib/mako/exceptions.py |  10 +++
 lib/mako/lexer.py      | 135 +++++++++++++++++++++++++++++++++++++++++
 lib/mako/parsetree.py  | 119 ++++++++++++++++++++++++++++++++++++
 test/lexer.py          |  33 ++++++++++
 test/pygen.py          |   3 -
 5 files changed, 297 insertions(+), 3 deletions(-)
 create mode 100644 lib/mako/exceptions.py
 create mode 100644 lib/mako/lexer.py
 create mode 100644 lib/mako/parsetree.py
 create mode 100644 test/lexer.py

diff --git a/lib/mako/exceptions.py b/lib/mako/exceptions.py
new file mode 100644
index 0000000..30eef75
--- /dev/null
+++ b/lib/mako/exceptions.py
@@ -0,0 +1,10 @@
+"""exception classes"""
+
+class MakoException(Exception):
+    pass
+class CompileException(MakoException):
+    pass
+class SyntaxException(MakoException):
+    def __init__(self, message, lineno):
+        MakoException.__init__(self, message + " at line: %d" % lineno)
+        self.lineno =lineno
\ No newline at end of file
diff --git a/lib/mako/lexer.py b/lib/mako/lexer.py
new file mode 100644
index 0000000..bff2c83
--- /dev/null
+++ b/lib/mako/lexer.py
@@ -0,0 +1,135 @@
+import re
+from mako import parsetree, exceptions
+
+class Lexer(object):
+    def __init__(self, text):
+        self.text = text
+        self.nodes = []
+        self.matched_lineno = 1
+        self.matched_charpos = 0
+        self.lineno = 1
+        self.match_position = 0
+        self.tag = []
+        
+    def match(self, regexp, flags=None):
+        """return a matching function that will operate on this Lexer's text and current match position"""
+        mp = self.match_position
+        if flags:
+            reg = re.compile(regexp, flags)
+        else:
+            reg = re.compile(regexp)
+        match = reg.match(self.text, self.match_position)
+        if match:
+            (start, end) = match.span()
+            if end == start:
+                self.match_position = end + 1
+            else:
+                self.match_position = end
+            self.matched_lineno = self.lineno
+            lines = re.findall(r"\n", self.text[mp:self.match_position])
+            cp = mp - 1
+            while (cp >= 0 and cp<len(self.text) and self.text[cp] != '\n'):
+                cp -=1
+            self.matched_charpos = mp - cp
+            self.lineno += len(lines)
+            #print "MATCHED:", match.group(0), "LINE START:", self.matched_lineno, "LINE END:", self.lineno
+        #print "MATCH:", regexp, "\n", self.text[mp : mp + 15], (match and "TRUE" or "FALSE")
+        return match
+    
+    def append_node(self, nodecls, *args, **kwargs):
+        kwargs['lineno'] = self.matched_lineno
+        kwargs['pos'] = self.matched_charpos
+        node = nodecls(*args, **kwargs)
+        if len(self.tag):
+            self.tag[-1].nodes.append(node)
+        else:
+            self.nodes.append(node)
+        if isinstance(node, parsetree.Tag):
+            self.tag.append(node)
+            
+    def parse(self):
+        length = len(self.text)
+        while (True):
+            if self.match_position > length: 
+                break
+        
+            if self.match_end():
+                break
+            
+            if self.match_tag_start(): 
+                continue
+            if self.match_tag_end():
+                continue
+                
+            if self.match_text(): 
+                continue
+            
+            if (self.current.match_position > len(self.current.source)):
+                break
+        
+            raise exceptions.Compiler("Infinite parsing loop encountered - Lexer bug?")
+        if len(self.tag):
+            raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, self.matched_lineno)
+        return self.nodes    
+
+    def match_tag_start(self):
+        match = self.match(r'\<%(\w+)(\s+[^>]*)?\s*>', re.I | re.S )
+        if match:
+            (keyword, attr) = (match.group(1).lower(), match.group(2))
+            self.keyword = keyword
+            attributes = {}
+            if attr:
+                for att in re.findall(r"\s*((\w+)\s*=\s*('[^']*'|\"[^\"]*\"|\w+))\s*", attr):
+                    (full, key, val) = att
+                    attributes[key] = val
+
+            self.append_node(parsetree.Tag, keyword, attributes)
+            return True
+        else: 
+            return False
+        
+    def match_tag_end(self):
+        if not len(self.tag):
+            return False
+        match = self.match(r'\</%\s*' + self.tag[-1].keyword + '\s*>')
+        if match:
+            self.tag.pop()
+            return True
+        else:
+            return False
+            
+    def match_end(self):
+        match = self.match(r'\Z', re.S)
+        if match:
+            string = match.group()
+            if string:
+                return string
+            else:
+                return True
+        else:
+            return False
+    
+    def match_text(self):
+        match = self.match(r"""
+                (.*?)         # anything, followed by:
+                (
+                 (?<=\n)\s*(?=[%#]) # an eval or comment line, preceded by a consumed \n and whitespace
+                 |
+                 (?=</?[%&])  # a substitution or block or call start or end
+                                              # - don't consume
+                 |
+                 (\\\n)         # an escaped newline  - throw away
+                 |
+                 \Z           # end of string
+                )""", re.X | re.S)
+        
+        
+        if match:
+            text = match.group(1)
+            self.append_node(parsetree.Text, text)
+            return True
+        else:
+            return False
+                
+    def _count_lines(self, text):
+        return len(re.findall(r"\n", text)) 
\ No newline at end of file
diff --git a/lib/mako/parsetree.py b/lib/mako/parsetree.py
new file mode 100644
index 0000000..bd2927a
--- /dev/null
+++ b/lib/mako/parsetree.py
@@ -0,0 +1,119 @@
+"""object model defining a Mako template."""
+
+from mako import exceptions
+
+class Node(object):
+    """base class for a Node in the parse tree."""
+    def __init__(self, lineno, pos):
+        self.lineno = lineno
+        self.pos = pos
+
+class ControlLine(Node):
+    """defines a control line, a line-oriented python line or end tag.
+    
+    % if foo:
+        (markup)
+    % endif
+    """
+    def __init__(self, keyword, isend, text, **kwargs):
+        super(ControlLine, self).__init__(**kwargs)
+        self.keyword = keyword
+        self.text = text
+        self.isend = isend
+    def __repr__(self):
+        return "ControlLine(%s, %s, %s, %s)" % (repr(self.keyword), repr(self.text), repr(self.isend), repr((self.lineno, self.pos)))
+
+class Text(Node):
+    """defines plain text in the template."""
+    def __init__(self, content, **kwargs):
+        super(Text, self).__init__(**kwargs)
+        self.content = content
+    def __repr__(self):
+        return "Text(%s, %s)" % (repr(self.content), repr((self.lineno, self.pos)))
+        
+class Code(Node):
+    """defines a Python code block, either inline or module level.
+    
+    inline:
+    <%
+        x = 12
+    %>
+    
+    module level:
+    <%!
+        import logger
+    %>
+    
+    """
+    def __init__(self, text, ismodule, **kwargs):
+        super(Code, self).__init__(**kwargs)
+        self.text = text
+        self.ismodule = ismodule
+    def __repr__(self):
+        return "Comment(%s, %s, %s)" % (repr(self.text), repr(self.ismodule), repr((self.lineno, self.pos)))
+        
+class Comment(Node):
+    """defines a comment line.
+    
+    # this is a comment
+    
+    """
+    def __init__(self, text, **kwargs):
+        super(Comment, self).__init__(**kwargs)
+        self.text = text
+    def __repr__(self):
+        return "Comment(%s, %s)" % (repr(self.text), repr((self.lineno, self.pos)))
+        
+class Expression(Node):
+    """defines an inline expression.
+    
+    ${x+y}
+    
+    """
+    def __init__(self, text, **kwargs):
+        super(Expression, self).__init__(**kwargs)
+        self.text = text
+    def __repr__(self):
+        return "Expression(%s, %s)" % (self.text, repr((self.lineno, self.pos)))
+        
+class _TagMeta(type):
+    """metaclass to allow Tag to produce a subclass according to its keyword"""
+    _classmap = {}
+    def __init__(cls, clsname, bases, dict):
+        if cls.__keyword__ is not None:
+            cls._classmap[cls.__keyword__] = cls
+            super(_TagMeta, cls).__init__(clsname, bases, dict)
+    def __call__(cls, keyword, attributes, **kwargs):
+        try:
+            cls = _TagMeta._classmap[keyword]
+        except KeyError:
+            raise exceptions.CompileError("No such tag: '%s'" % keyword)
+        return type.__call__(cls, keyword, attributes, **kwargs)
+        
+class Tag(Node):
+    """base class for tags.
+    
+    <%sometag/>
+    
+    <%someothertag>
+        stuff
+    </%someothertag>
+    """
+    __metaclass__ = _TagMeta
+    __keyword__ = None
+    def __init__(self, keyword, attributes, **kwargs):
+        super(Tag, self).__init__(**kwargs)
+        self.keyword = keyword
+        self.attributes = attributes
+        self.nodes = []
+    def __repr__(self):
+        return "%s(%s, %s, %s, %s)" % (self.__class__.__name__, repr(self.keyword), repr(self.attributes), repr((self.lineno, self.pos)), repr([repr(x) for x in self.nodes]))
+        
+class IncludeTag(Tag):
+    __keyword__ = 'include'
+class NamespaceTag(Tag):
+    __keyword__ = 'namespace'
+class ComponentTag(Tag):
+    __keyword__ = 'component'
+class CallTag(Tag):
+    __keyword__ = 'call'
diff --git a/test/lexer.py b/test/lexer.py
new file mode 100644
index 0000000..1ba692f
--- /dev/null
+++ b/test/lexer.py
@@ -0,0 +1,33 @@
+import unittest
+
+from mako.lexer import Lexer
+from mako import exceptions
+
+class LexerTest(unittest.TestCase):
+    def test_text_and_tag(self):
+        template = """
+<b>Hello world</b>
+        <%component name="foo">
+                this is a component.
+        </%component>
+        
+        and some more text.
+"""
+        nodes = Lexer(template).parse()
+        #print repr(nodes)
+        assert repr(nodes) == r"""[Text('\n<b>Hello world</b>\n        ', (1, 1)), ComponentTag('component', {'name': '"foo"'}, (3, 9), ["Text('\\n                this is a component.\\n        ', (3, 32))"]), Text('\n        \n        and some more text.\n', (5, 22))]"""
+
+    def test_unclosed_tag(self):
+        template = """
+        
+            <%component name="foo">
+             other text
+        """
+        try:
+            nodes = Lexer(template).parse()
+            assert False
+        except exceptions.SyntaxException, e:
+            assert str(e) == "Unclosed tag: <%component> at line: 5"
+            
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/pygen.py b/test/pygen.py
index c67aeaa..4f3da8e 100644
--- a/test/pygen.py
+++ b/test/pygen.py
@@ -115,9 +115,6 @@ and more block.
         printer.print_adjusted_line(block)
         printer.close()
         print stream.getvalue()
-        
-
-
 
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab