python26-tokenizer.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

# Description: Fix the tokenizer so comment docstrings work with Python 2.6.
# Bug: https://sourceforge.net/tracker/index.php?func=detail&aid=2585292&group_id=32455&atid=405618
# Bug-Debian: http://bugs.debian.org/590112
# Origin: https://sourceforge.net/tracker/?func=detail&aid=2872545&group_id=32455&atid=405620
# Author: Andre Malo (ndparker)
# Reviewed-by: Kenneth J. Pronovici <pronovic@debian.org>
--- a/epydoc/docparser.py
+++ b/epydoc/docparser.py
@@ -72,6 +72,26 @@
 from epydoc.compat import *
 
 ######################################################################
+## Tokenizer change in 2.6
+######################################################################
+
+def comment_includes_nl():
+    """ Determine whether comments are parsed as one or two tokens... """
+    readline = iter(u'\n#\n\n'.splitlines(True)).next
+    tokens = [
+        token.tok_name[tup[0]] for tup in tokenize.generate_tokens(readline)
+    ]
+    if tokens == ['NL', 'COMMENT', 'NL', 'ENDMARKER']:
+        return True
+    elif tokens == ['NL', 'COMMENT', 'NL', 'NL', 'ENDMARKER']:
+        return False
+    raise AssertionError(
+        "Tokenizer returns unexexpected tokens: %r" % tokens
+    )
+
+comment_includes_nl = comment_includes_nl()
+
+######################################################################
 ## Doc Parser
 ######################################################################
 
@@ -520,6 +540,10 @@
     # inside that block, not outside it.
     start_group = None
 
+    # If the comment tokens do not include the NL, every comment token
+    # sets this to True in order to swallow the next NL token unprocessed.
+    comment_nl_waiting = False
+
     # Check if the source file declares an encoding.
     encoding = get_module_encoding(module_doc.filename)
 
@@ -570,7 +594,9 @@
         # then discard them: blank lines are not allowed between a
         # comment block and the thing it describes.
         elif toktype == tokenize.NL:
-            if comments and not line_toks:
+            if comment_nl_waiting:
+                comment_nl_waiting = False
+            elif comments and not line_toks:
                 log.warning('Ignoring docstring comment block followed by '
                             'a blank line in %r on line %r' %
                             (module_doc.filename, srow-1))
@@ -578,6 +604,7 @@
                 
         # Comment token: add to comments if appropriate.
         elif toktype == tokenize.COMMENT:
+            comment_nl_waiting = not comment_includes_nl
             if toktext.startswith(COMMENT_DOCSTRING_MARKER):
                 comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip()
                 if comment_line.startswith(" "):