Files
openide/python/helpers/generator3.py
2010-04-23 17:03:17 +03:00

1257 lines
44 KiB
Python

# encoding: utf-8
"""
This thing tries to restore public interface of objects that don't have a python
source: C extensions and built-in objects. It does not reimplement the
'inspect' module, but complements it.
Since built-ins don't have many features that full-blown objects have,
we do not support some fancier things like metaclasses.
We use certain kind of doc comments ("f(int) -> list") as a hint for functions'
input and output, especially in builtin functions.
This code has to work with CPython versions from 2.2 to 3.0+, and hopefully with
compatible versions of Jython and IronPython.
NOTE: Currently python 3 support is outright BROKEN, because bare asterisks and param decorators
are not parsed. This is deliberate in current version, since the rest of PyCharm does not support
all this too.
"""
import sys
import os
import string
import stat
import types
import atexit
#import __builtin__
try:
import inspect
except ImportError:
inspect = None # it may fail
import re
if sys.platform == 'cli':
import clr
string_mod = string
version = (
(sys.hexversion & (0xff << 24)) >> 24,
(sys.hexversion & (0xff << 16)) >> 16
)
if version[0] >= 3:
import builtins as the_builtins
string = "".__class__
#LETTERS = string_mod.ascii_letters
STR_TYPES = (getattr(the_builtins, "bytes"), str)
NUM_TYPES = (int, float)
SIMPLEST_TYPES = NUM_TYPES + STR_TYPES + (None.__class__,)
EASY_TYPES = NUM_TYPES + STR_TYPES + (None.__class__, dict, tuple, list)
def str_join(a_list, a_str):
return string.join(a_str, a_list)
def the_exec(source, context):
exec(source, context)
else: # < 3.0
import __builtin__ as the_builtins
#LETTERS = string_mod.letters
STR_TYPES = (getattr(the_builtins, "unicode"), str)
NUM_TYPES = (int, long, float)
SIMPLEST_TYPES = NUM_TYPES + STR_TYPES + (types.NoneType,)
EASY_TYPES = NUM_TYPES + STR_TYPES + (types.NoneType, dict, tuple, list)
def str_join(a_list, a_str):
return string_mod.join(a_list, a_str)
def the_exec(source, context):
exec (source) in context
BUILTIN_MOD_NAME = the_builtins.__name__
if version[0] == 2 and version[1] < 4:
HAS_DECORATORS = False
def lstrip(s, prefix):
i = 0
while s[i] == prefix:
i += 1
return s[i:]
else:
HAS_DECORATORS = True
lstrip = string.lstrip
#
IDENT_PATTERN = "[A-Za-z_][0-9A-Za-z_]*" # re pattern for identifier
NUM_IDENT_PATTERN = re.compile("([A-Za-z_]+)[0-9]?[A-Za-z_]*") # 'foo_123' -> $1 = 'foo_'
STR_CHAR_PATTERN = "[0-9A-Za-z_.,\+\-&\*% ]"
DOC_FUNC_RE = re.compile("(?:.*\.)?(\w+)\(([^\)]*)\).*") # $1 = function name, $2 = arglist
SANE_REPR_RE = re.compile(IDENT_PATTERN + "(?:\(.*\))?") # identifier with possible (...), go catches
IDENT_RE = re.compile("(" + IDENT_PATTERN + ")") # $1 = identifier
STARS_IDENT_RE = re.compile("(\*?\*?" + IDENT_PATTERN + ")") # $1 = identifier, maybe with a * or **
IDENT_EQ_RE = re.compile("(" + IDENT_PATTERN + "\s*=)") # $1 = identifier with a following '='
SIMPLE_VALUE_RE = re.compile(
"([+-]?[0-9]+\.?[0-9]*(?:[Ee]?[+-]?[0-9]+\.?[0-9]*)?)|"+ # number
"('" + STR_CHAR_PATTERN + "*')|"+ # single-quoted string
'("' + STR_CHAR_PATTERN + '*")|'+ # double-quoted string
"(\[\])|"+
"(\{\})|"+
"(\(\))|" +
"(True|False|None)"
) # $? = sane default value
def _searchbases(cls, accum):
# logic copied from inspect.py
if cls not in accum:
accum.append(cls)
for x in cls.__bases__:
_searchbases(x, accum)
def getMRO(a_class):
# logic copied from inspect.py
"Returns a tuple of MRO classes."
if hasattr(a_class, "__mro__"):
return a_class.__mro__
elif hasattr(a_class, "__bases__"):
bases = []
_searchbases(a_class, bases)
return tuple(bases)
else:
return tuple()
def getBases(a_class): # TODO: test for classes that don't fit this scheme
"Returns a sequence of class's bases."
if hasattr(a_class, "__bases__"):
return a_class.__bases__
else:
return ()
def isCallable(x):
return hasattr(x, '__call__')
def sortedNoCase(p_array):
"Sort an array case insensitevely, returns a sorted copy"
p_array = list(p_array)
if version[0] < 3:
def c(x, y):
x = x.upper()
y = y.upper()
if x > y:
return 1
elif x < y:
return -1
else:
return 0
p_array.sort(c)
else:
p_array.sort(key=lambda x: x.upper())
return p_array
def cleanup(value):
result = ''
for c in value:
if c == '\n': result += '\\n'
elif c == '\r': result += '\\r'
elif c < ' ' or c > chr(127): result += '?'
else: result += c
return result
# http://blogs.msdn.com/curth/archive/2009/03/29/an-ironpython-profiler.aspx
def print_profile():
data = []
data.extend(clr.GetProfilerData())
data.sort(lambda x, y: -cmp(x.ExclusiveTime, y.ExclusiveTime))
for p in data:
print('%s\t%d\t%d\t%d' % (p.Name, p.InclusiveTime, p.ExclusiveTime, p.Calls))
def is_clr_type(t):
if not t: return False
try:
clr.GetClrType(t)
return True
except TypeError:
return False
_prop_types = [type(property())]
try: _prop_types.append(types.GetSetDescriptorType)
except: pass
try: _prop_types.append(types.MemberDescriptorType)
except: pass
_prop_types = tuple(_prop_types)
def isProperty(x):
return isinstance(x, _prop_types)
FAKE_CLASSOBJ_NAME = "___Classobj"
def sanitizeIdent(x):
"Takes an identifier and returns it sanitized"
if x in ("class", "object", "def", "list", "tuple", "int", "float", "str", "unicode" "None"):
return "p_" + x
else:
return x.replace("-", "_").replace(" ", "_") # for things like "list-or-tuple" or "list or tuple"
def sanitizeValue(p_value):
"Returns p_value or its part if it represents a sane simple value, else returns 'None'"
if isinstance(p_value, STR_TYPES):
match = SIMPLE_VALUE_RE.match(p_value)
if match:
return match.groups()[match.lastindex-1]
else:
return 'None'
elif isinstance(p_value, NUM_TYPES):
return repr(p_value)
elif p_value is None:
return 'None'
else:
if hasattr(p_value, "__name__") and hasattr(p_value, "__module__") and p_value.__module__ == BUILTIN_MOD_NAME:
return p_value.__name__ # float -> "float"
else:
return repr(repr(p_value)) # function -> "<function ...>", etc
def extractAlphaPrefix(p_string, default="some"):
"Returns 'foo' for things like 'foo1' or 'foo2'; it prefix cannot be found, the default is returned"
match = NUM_IDENT_PATTERN.match(p_string)
name = match and match.groups()[match.lastindex-1] or None
return name or default
class FakeClassObj:
"A mock class representing the old style class base."
__module__ = None
__class__ = None
def __init__(self):
pass
if version[0] < 3:
from pyparsing import *
else:
from pyparsing_py3 import *
# grammar to parse parameter lists
# // snatched from parsePythonValue.py, from pyparsing samples, copyright 2006 by Paul McGuire but under BSD license.
# we don't suppress lots of punctuation because we want it back when we reconstuct the lists
lparen,rparen,lbrack,rbrack,lbrace,rbrace,colon = map(Literal,"()[]{}:")
integer = Combine(Optional(oneOf("+ -")) + Word(nums))\
.setName("integer")
real = Combine(Optional(oneOf("+ -")) + Word(nums) + "." +
Optional(Word(nums)) +
Optional(oneOf("e E")+Optional(oneOf("+ -")) +Word(nums)))\
.setName("real")
tupleStr = Forward()
listStr = Forward()
dictStr = Forward()
boolLiteral = oneOf("True False")
noneLiteral = Literal("None")
listItem = real|integer|quotedString|unicodeString|boolLiteral|noneLiteral| \
Group(listStr) | tupleStr | dictStr
tupleStr << ( Suppress("(") + Optional(delimitedList(listItem)) +
Optional(Literal(",")) + Suppress(")") ).setResultsName("tuple")
listStr << (lbrack + Optional(delimitedList(listItem) +
Optional(Literal(","))) + rbrack).setResultsName("list")
dictEntry = Group( listItem + colon + listItem )
dictStr << (lbrace + Optional(delimitedList(dictEntry) + Optional(Literal(","))) + rbrace).setResultsName("dict")
# \\ end of the snatched part
# our output format is s-expressions:
# (simple name optional_value) is name or name=value
# (nested (simple ...) (simple ...)) is (name, name,...)
# (opt ...) is [, ...] or suchlike.
T_SIMPLE = 'Simple'
T_NESTED = 'Nested'
T_OPTIONAL = 'Opt'
TRIPLE_DOT = '...'
COMMA = Suppress(",")
APOS = Suppress("'")
QUOTE = Suppress('"')
SP = Suppress(Optional(White()))
ident = Word(alphas+"_", alphanums+"_-").setName("ident") # we accept things like "foo-or-bar"
decorated_ident = ident + Optional(Suppress(SP + Literal(":") + SP + ident)) # accept "foo: bar", ignore "bar"
spaced_ident = Combine(decorated_ident + ZeroOrMore(Literal(' ') + decorated_ident)) # we accept 'list or tuple' or 'C struct'
# allow quoted names, because __setattr__, etc docs use it
paramname = spaced_ident | \
APOS + spaced_ident + APOS | \
QUOTE + spaced_ident + QUOTE
initializer = (SP + Suppress("=") + SP + Combine(listItem | ident)).setName("=init") # accept foo=defaultfoo
param = Group(Empty().setParseAction(replaceWith(T_SIMPLE)) + Combine(Optional(oneOf("* **"))+ paramname) + Optional(initializer))
ellipsis = Group(
Empty().setParseAction(replaceWith(T_SIMPLE))+ \
(Literal("..") + \
ZeroOrMore(Literal('.'))).setParseAction(replaceWith(TRIPLE_DOT)) # we want to accept both 'foo,..' and 'foo, ...'
)
paramSlot = Forward()
simpleParamSeq = ZeroOrMore(paramSlot + COMMA) + Optional(paramSlot + Optional(COMMA))
nestedParamSeq = Group(
Suppress('(').setParseAction(replaceWith(T_NESTED)) + \
simpleParamSeq + Optional(ellipsis + Optional(COMMA) + Optional(simpleParamSeq)) + \
Suppress(')')
) # we accept "(a1, ... an)"
paramSlot << (param | nestedParamSeq)
optionalPart = Forward()
paramSeq = simpleParamSeq + Optional(optionalPart) # this is our approximate target
optionalPart << (
Group(
Suppress('[').setParseAction(replaceWith(T_OPTIONAL)) + Optional(COMMA) + \
paramSeq + Optional(ellipsis) + \
Suppress(']')
) \
| ellipsis
)
# this is our ideal target, with balancing paren and a multiline rest of doc.
paramSeqAndRest = paramSeq + Suppress(')') + Suppress(Optional(Regex(".*(?s)")))
def transformSeq(results, toplevel=True):
"Transforms a tree of ParseResults into a param spec string."
ret = [] # add here token to join
for token in results:
token_type = token[0]
if token_type is T_SIMPLE:
token_name = token[1]
if len(token) == 3: # name with value
if toplevel:
ret.append(sanitizeIdent(token_name) + "=" + sanitizeValue(token[2]))
else:
# smth like "a, (b1=1, b2=2)", make it "a, p_b"
return "p_" + results[0][1]
elif token_name == TRIPLE_DOT:
if toplevel and not hasItemStartingWith(ret, "*"):
ret.append("*more") # TODO check if *param is present already
else:
# we're in a "foo, (bar1, bar2, ...)"; make it "foo, bar_tuple"
return extractAlphaPrefix(results[0][1]) + "_tuple"
else: # just name
ret.append(sanitizeIdent(token_name))
elif token_type is T_NESTED:
ret.append(transformSeq(token[1:], False))
elif token_type is T_OPTIONAL:
ret.extend(transformOptionalSeq(token))
else:
raise Exception("This cannot be a token type: " + repr(token_type))
return ret
def transformOptionalSeq(results):
"""
Produces a string that describes the optional part of parameters.
@param results must start from T_OPTIONAL.
"""
assert results[0] is T_OPTIONAL, "transformOptionalSeq expects a T_OPTIONAL node, sees " + repr(results[0])
ret = []
for token in results[1:]:
token_type = token[0]
if token_type is T_SIMPLE:
token_name = token[1]
if len(token) == 3: # name with value; little sense, but can happen in a deeply nested optional
ret.append(sanitizeIdent(token_name) + "=" + sanitizeValue(token[2]))
elif token_name == '...':
# we're in a "foo, [bar, ...]"; make it "foo, *bar"
return ["*" + extractAlphaPrefix(results[1][1])] # we must return a seq; [1] is first simple, [1][1] is its name
else: # just name
ret.append(sanitizeIdent(token_name)+ "=None")
elif token_type is T_OPTIONAL:
ret.extend(transformOptionalSeq(token))
# maybe handle T_NESTED if such cases ever occur in real life
# it can't be nested in a sane case, really
return ret
def flatten(seq):
"Transforms tree lists like ['a', ['b', 'c'], 'd'] to strings like '(a, (b, c), d)', enclosing each tree level in parens."
ret = []
for one in seq:
if type(one) is list:
ret.append(flatten(one))
else:
ret.append(one)
return "(" + ", ".join(ret) + ")"
def makeNamesUnique(seq, name_map=None):
"""
Returns a copy of tree list seq where all clashing names are modified by numeric suffixes:
['a', 'b', 'a', 'b'] becomes ['a', 'b', 'a_1', 'b_1'].
Each repeating name has its own counter in the name_map.
"""
ret = []
if not name_map:
name_map = {}
for one in seq:
if type(one) is list:
ret.append(makeNamesUnique(one, name_map))
else:
one_key = lstrip(one, "*") # starred parameters are unique sans stars
if one_key in name_map:
old_one = one_key
one = one + "_" + str(name_map[old_one])
name_map[old_one] += 1
else:
name_map[one_key] = 1
ret.append(one)
return ret
def hasItemStartingWith(p_seq, p_start):
for item in p_seq:
if isinstance(item, STR_TYPES) and item.startswith(p_start):
return True
return False
class ModuleRedeclarator(object):
def __init__(self, module, outfile, indent_size=4, doing_builtins = False):
"""
Create new instance.
@param module module to restore.
@param outfile output file, must be open and writable.
@param indent_size amount of space characters per indent
"""
self.module = module
self.outfile = outfile
self.indent_size = indent_size
self._indent_step = " " * indent_size
self.imported_modules = {"": the_builtins}
self._defined = {} # contains True for every name defined so far
self.doing_builtins = doing_builtins
def indent(self, level):
"Return indentation whitespace for given level."
return self._indent_step * level
def out(self, what, indent=0):
"Output the argument, indenting as nedded, and adding a eol"
self.outfile.write(self.indent(indent))
self.outfile.write(what)
self.outfile.write("\n")
def outDocstring(self, docstring, indent):
if isinstance(docstring, str):
lines = docstring.strip().split("\n")
if lines:
if len(lines) == 1:
self.out('""" ' + lines[0] + ' """', indent)
else:
self.out('"""', indent)
for line in lines:
self.out(line, indent)
self.out('"""', indent)
def outDocAttr(self, p_object, indent, p_class=None):
the_doc = p_object.__doc__
if the_doc:
if p_class and the_doc == object.__init__.__doc__ and p_object is not object.__init__ and p_class.__doc__:
the_doc = str(p_class.__doc__) # replace stock init's doc with class's; make it a certain string.
the_doc += "\n# (copied from class doc)"
self.outDocstring(the_doc, indent)
else:
self.out("# no doc", indent)
# Some values are known to be of no use in source and needs to be suppressed.
# Dict is keyed by module names, with "*" meaning "any module";
# values are lists of names of members whose value must be pruned.
SKIP_VALUE_IN_MODULE = {
"sys": (
"modules", "path_importer_cache", "argv", "builtins",
"last_traceback", "last_type", "last_value",
),
"posix": (
"environ",
),
"zipimport": (
"_zip_directory_cache",
),
"*": (BUILTIN_MOD_NAME,)
}
# Some values are special and are better represented by hand-crafted constructs.
# Dict is keyed by (module name, member name) and value is the replacement.
REPLACE_MODULE_VALUES = {
(BUILTIN_MOD_NAME, "None"): "object()"
}
if version[0] <= 2:
for std_file in ("stdin", "stdout", "stderr"):
REPLACE_MODULE_VALUES[("sys", std_file)] = "file('')" #
# Some functions and methods of some builtin classes have special signatures.
# {("class", "method"): ("signature_string")}
PREDEFINED_BUILTIN_SIGS = {
("object", "__init__"): "(self)",
("object", "__new__"): "(cls, *more)", # only for the sake of parameter names readability
("str", "__init__"): "(self, x)", # overrides a fake
("type", "__init__"): "(self, name, bases=None, dict=None)", # overrides a fake
("int", "__init__"): "(self, x, base=10)", # overrides a fake
("list", "__init__"): "(self, seq=())",
("tuple", "__init__"): "(self, seq=())", # overrides a fake
("set", "__init__"): "(self, seq=())",
("dict", "__init__"): "(self, seq=None, **kwargs)",
(None, "zip"): "(seq1, seq2, *more_seqs)",
(None, "range"): "(start=None, stop=None, step=None)", # suboptimal: allows empty arglist
(None, "filter"): "(function_or_none, sequence)",
(None, "iter"): "(source, sentinel=None)",
}
if version[0] < 3:
PREDEFINED_BUILTIN_SIGS[("unicode", "__init__")] = "(self, x, encoding=None, errors='strict')" # overrides a fake
PREDEFINED_BUILTIN_SIGS[("super", "__init__")] = "(self, type1, type2=None)"
PREDEFINED_BUILTIN_SIGS[(None, "min")] = "(*args, **kwargs)" # too permissive, but py2.x won't allow a better sig
PREDEFINED_BUILTIN_SIGS[(None, "max")] = "(*args, **kwargs)"
else:
PREDEFINED_BUILTIN_SIGS[("super", "__init__")] = "(self, type1=None, type2=None)"
PREDEFINED_BUILTIN_SIGS[(None, "min")] = "(*args, key)"
PREDEFINED_BUILTIN_SIGS[(None, "max")] = "(*args, key)"
if version == (2, 5):
PREDEFINED_BUILTIN_SIGS[("unicode", "splitlines")] = "(keepends=None)" # a typo in docstring there
# Some builtin classes effectively change __init__ signature without overriding it.
# This callable serves as a placeholder to be replaced via REDEFINED_BUILTIN_SIGS
def fake_builtin_init(self): pass # just a callable, sig doesn't matter
fake_builtin_init.__doc__ = object.__init__.__doc__ # this forces class's doc to be used instead
# This is a list of builtin classes to use fake init
FAKE_BUILTIN_INITS = (tuple, type, int, str)
if version[0] < 3:
import __builtin__ as b2
FAKE_BUILTIN_INITS = FAKE_BUILTIN_INITS + (getattr(b2, "unicode"),)
del b2
# Some builtin methods are decorated, but this is hard to detect.
# {("class_name", "method_name"): "decorator"}
KNOWN_DECORATORS = {
("dict", "fromkeys"): "staticmethod",
}
def isSkippedInModule(self, p_module, p_value):
"Returns True if p_value's value must be skipped for module p_module."
skip_list = self.SKIP_VALUE_IN_MODULE.get(p_module, [])
if p_value in skip_list:
return True
skip_list = self.SKIP_VALUE_IN_MODULE.get("*", [])
if p_value in skip_list:
return True
return False
def findImportedName(self, item):
"""
Finds out how the item is represented in imported modules.
@param item what to check
@return qualified name (like "sys.stdin") or None
"""
if not isinstance(item, SIMPLEST_TYPES):
for mname in self.imported_modules:
m = self.imported_modules[mname]
for inner_name in m.__dict__:
suspect = getattr(m, inner_name)
if suspect is item:
if mname:
mname += "."
elif self.module is the_builtins: # don't short-circuit builtins
return None
return mname + inner_name
return None
def fmtValue(self, p_value, indent, prefix="", postfix="", as_name=None):
"""
Formats and outputs value (it occupies and entire line).
@param p_value the value.
@param indent indent level.
@param prefix text to print before the value
@param postfix text to print after the value
@param as_name hints which name are we trying to print; helps with circular refs.
"""
if isinstance(p_value, SIMPLEST_TYPES):
self.out(prefix + repr(p_value) + postfix, indent)
else:
if sys.platform == "cli":
imported_name = None
else:
imported_name = self.findImportedName(p_value)
if imported_name:
self.out(prefix + imported_name + postfix, indent)
else:
if isinstance(p_value, (list, tuple)):
if len(p_value) == 0:
self.out(prefix + repr(p_value) + postfix, indent)
else:
if isinstance(p_value, list):
lpar, rpar = "[", "]"
else:
lpar, rpar = "(", ")"
self.out(prefix + lpar, indent)
for v in p_value:
self.fmtValue(v, indent+1, postfix=",")
self.out(rpar + postfix, indent)
elif isinstance(p_value, dict):
if len(p_value) == 0:
self.out(prefix + repr(p_value) + postfix, indent)
else:
self.out(prefix + "{", indent)
for k in p_value:
v = p_value[k]
if isinstance(k, SIMPLEST_TYPES):
self.fmtValue(v, indent+1, prefix=repr(k)+": ", postfix=",")
else:
# both key and value need fancy formatting
self.fmtValue(k, indent+1, postfix=": ")
self.fmtValue(v, indent+2)
self.out(",", indent+1)
self.out("}" + postfix, indent)
else: # something else, maybe representable
# look up this value in the module.
if sys.platform == "cli":
self.out(prefix + "None" + postfix, indent)
return
found_name = ""
for inner_name in self.module.__dict__:
if self.module.__dict__[inner_name] is p_value:
found_name = inner_name
break
if self._defined.get(found_name, False):
self.out(prefix + found_name + postfix, indent)
else:
# a forward / circular declaration happens
notice = ""
s = cleanup(repr(p_value))
if found_name:
if found_name == as_name:
notice = " # (!) real value is " + s
s = "None"
else:
notice = " # (!) forward: " + found_name + ", real value is " + s
if SANE_REPR_RE.match(s):
self.out(prefix + s + postfix + notice, indent)
else:
if not found_name:
notice = " # (!) real value is " + s
self.out(prefix + "None" + postfix + notice, indent)
def seemsToHaveSelf(self, reqargs):
""""
@param requargs a list of required arguments of a method
@return true if param_name looks like a 'self' parameter
"""
return reqargs and reqargs[0] == "self"
SIG_DOC_NOTE = "restored from __doc__"
SIG_DOC_UNRELIABLY = "NOTE: unreliably restored from __doc__ "
def restoreByDocString(self, signature_string, func_name, class_name):
"""
@param signature_string: parameter list extracted from the doc string.
@param func_name: name of the function.
@param class_name: name of the containing class, or None
@return (reconstructed_spec, note) or (None, None) if failed.
"""
# parse
parsing_failed = False
try:
# strict parsing
tokens = paramSeqAndRest.parseString(signature_string, True)
except ParseException:
# it did not parse completely; scavenge what we can
parsing_failed = True
tokens = []
try:
# most unrestrictive parsing
tokens = paramSeq.parseString(signature_string, False)
except ParseException:
pass
#
seq = transformSeq(tokens)
# add safe defaults for unparsed
if parsing_failed:
note = self.SIG_DOC_UNRELIABLY
starred = None
double_starred = None
for one in seq:
if type(one) is str:
if one.startswith("**"):
double_starred = one
elif one.startswith("*"):
starred = one
if not starred:
seq.append("*args")
if not double_starred:
seq.append("**kwargs")
else:
note = self.SIG_DOC_NOTE
# add 'self' if needed
if class_name:
if self.KNOWN_DECORATORS.get((class_name, func_name), None) == "staticmethod":
pass
elif not self.seemsToHaveSelf(seq) and func_name != "__new__":
seq.insert(0, "self")
seq = makeNamesUnique(seq)
return (func_name + flatten(seq), note)
def parseFuncDoc(self, func_doc, func_name, class_name):
"""
@param func_doc: __doc__ of the function.
@param func_name: name of the function.
@param class_name: name of the containing class, or None
@return (reconstructed_spec, note) or (None, None) if failed.
"""
# find the first thing to look like a definition
prefix_re = re.compile(func_name + "\s*\(") # "foo(..."
match = prefix_re.search(func_doc)
# parse the part that looks right
if match:
spec, note = self.restoreByDocString(func_doc[match.end():], func_name, class_name)
# if "NOTE" in note:
# print "------\n", func_name, "@", match.end()
# print "------\n", func_doc
# print
return (spec, note)
else:
return (None, None)
def isPredefinedBuiltin(self, module_name, class_name, func_name):
return self.doing_builtins and module_name == BUILTIN_MOD_NAME and (class_name, func_name) in self.PREDEFINED_BUILTIN_SIGS
def restorePredefinedBuiltin(self, class_name, func_name):
spec = func_name + self.PREDEFINED_BUILTIN_SIGS[(class_name, func_name)]
note = "known special case of " + (class_name and class_name+"." or "") + func_name
return (spec, note)
def restoreByInspect(self, p_func):
"Returns paramlist restored by inspect."
args, varg, kwarg, defaults = inspect.getargspec(p_func)
spec = []
if defaults:
dcnt = len(defaults)-1
else:
dcnt = -1
args = args or []
args.reverse() # backwards, for easier defaults handling
for arg in args:
if dcnt >= 0:
arg += "=" + sanitizeValue(defaults[dcnt])
dcnt -= 1
spec.insert(0, arg)
if varg:
spec.append("*" + varg)
if kwarg:
spec.append("**" + kwarg)
return flatten(spec)
def restoreParametersForOverloads(self, methods):
param_index = 0
star_args = False
optional = False
params = []
while True:
methods_copy = [m for m in methods]
for m in methods_copy:
if param_index >= len(m.GetParameters()):
methods.remove(m)
optional = True
if not methods:
methods = methods_copy
break
name = methods[0].GetParameters() [param_index].Name
for m in methods[1:]:
if m.GetParameters()[param_index].Name != name:
star_args = True
break
if star_args: break
if optional:
params.append(name + '=None')
else:
params.append(name)
param_index += 1
if star_args:
params.append("*__args")
if not methods[0].IsStatic:
params = ['self'] + params
return params
def restoreClr(self, p_name, p_class):
"""Restore the function signature by the CLR type signature"""
clr_type = clr.GetClrType(p_class)
if p_name == '__new__':
methods = [c for c in clr_type.GetConstructors()]
if not methods:
return p_name + '(*args)', 'cannot find CLR constructor'
else:
methods = [m for m in clr_type.GetMethods() if m.Name == p_name]
if not methods:
bases = p_class.__bases__
if len(bases) == 1 and p_name in dir(bases[0]):
# skip inherited methods
return None, None
return p_name + '(*args)', 'cannot find CLR method'
params = self.restoreParametersForOverloads(methods)
return p_name + '(' + ', '.join(params) + ')', None
def redoFunction(self, p_func, p_name, indent, p_class=None, p_modname=None):
"""
Restore function argument list as best we can.
@param p_func function or method object
@param p_name function name as known to owner
@param indent indentation level
@param p_class the class that contains this function as a method
"""
# real work
classname = p_class and p_class.__name__ or None
deco = None
deco_comment = ""
# any decorators?
if self.doing_builtins and p_modname == BUILTIN_MOD_NAME:
deco = self.KNOWN_DECORATORS.get((classname, p_name), None)
if deco:
#self.out(deco + " # known case", indent)
deco_comment = " # known case"
elif p_class:
func_repr = repr(p_func)
# detect native methods declared with METH_CLASS flag
if p_name != "__new__" and type(p_func).__name__.startswith('classmethod'): # 'classmethod_descriptor' in Python 2.x and 3.x, 'classmethod' in Jython
#self.out('@classmethod', indent)
deco = "classmethod"
elif type(p_func).__name__.startswith('staticmethod'):
#self.out('@staticmethod', indent)
deco = "staticmethod"
if p_name == "__new__":
#self.out("@staticmethod # known case of __new__", indent)
deco = "staticmethod"
deco_comment = " # known case of __new__"
if deco and HAS_DECORATORS:
self.out("@" + deco + deco_comment, indent)
if inspect and inspect.isfunction(p_func):
self.out("def " + p_name + self.restoreByInspect(p_func) +": # reliably restored by inspect", indent)
self.outDocAttr(p_func, indent+1, p_class)
elif self.isPredefinedBuiltin(p_modname, classname, p_name):
spec, sig_note = self.restorePredefinedBuiltin(classname, p_name)
self.out("def " + spec + ": # " + sig_note, indent)
self.outDocAttr(p_func, indent+1, p_class)
elif sys.platform == 'cli' and is_clr_type(p_class):
spec, sig_note = self.restoreClr(p_name, p_class)
if not spec: return
if sig_note:
self.out("def " + spec + ": #" + sig_note, indent)
else:
self.out("def " + spec + ":", indent)
if not p_name in ['__gt__', '__ge__', '__lt__', '__le__', '__ne__', '__reduce_ex__', '__str__']:
self.outDocAttr(p_func, indent+1, p_class)
else:
# __doc__ is our best source of arglist
sig_note = "real signature unknown"
spec = ""
is_init = (p_name == "__init__" and p_class is not None)
funcdoc = None
if is_init and hasattr(p_class, "__doc__"):
if hasattr(p_func, "__doc__"):
funcdoc = p_func.__doc__
if funcdoc == object.__init__.__doc__:
funcdoc = p_class.__doc__
elif hasattr(p_func, "__doc__"):
funcdoc = p_func.__doc__
sig_restored = False
if isinstance(funcdoc, STR_TYPES):
(spec, more_notes) = self.parseFuncDoc(funcdoc, p_name, classname)
sig_restored = spec is not None
if more_notes:
if sig_note:
sig_note += "; "
sig_note += more_notes
if not sig_restored:
# use an allow-all declaration
decl = []
if p_class:
decl.append("self")
decl.append("*args")
decl.append("**kwargs")
spec = p_name + "(" + ", ".join(decl) + ")"
self.out("def " + spec + ": # " + sig_note, indent)
self.outDocstring(funcdoc, indent+1)
# empty body
self.out("pass", indent+1)
if deco and not HAS_DECORATORS:
self.out(p_name + " = " + deco + "(" + p_name + ")" + deco_comment, indent)
self.out("", 0) # empty line after each item
def redoClass(self, p_class, p_name, indent, p_modname=None):
"""
Restores a class definition.
@param p_class the class object
@param p_name function name as known to owner
@param indent indentation level
"""
bases = getBases(p_class)
base_def = ""
if bases:
base_def = "(" + ", ".join([x.__name__ for x in bases]) + ")"
self.out("class " + p_name + base_def + ":", indent)
self.outDocAttr(p_class, indent+1)
# inner parts
if hasattr(p_class, "__dict__"):
methods = {}
properties = {}
others = {}
we_are_the_base_class = p_modname == BUILTIN_MOD_NAME and p_name in ("object", FAKE_CLASSOBJ_NAME)
for item_name in p_class.__dict__:
if item_name in ("__doc__", "__module__"):
if we_are_the_base_class:
item = "" # must be declared in base types
else:
continue # in all other cases. must be skipped
else:
item = p_class.__dict__[item_name]
if isCallable(item):
methods[item_name] = item
elif isProperty(item):
properties[item_name] = item
else:
others[item_name] = item
#
if we_are_the_base_class:
others["__dict__"] = {} # force-feed it, for __dict__ does not contain a reference to itself :)
# add fake __init__s to type and tuple to have the right sig
if p_class in self.FAKE_BUILTIN_INITS:
methods["__init__"] = self.fake_builtin_init
#
for item_name in sortedNoCase(methods.keys()):
item = methods[item_name]
self.redoFunction(item, item_name, indent+1, p_class, p_modname)
#
for item_name in sortedNoCase(properties.keys()):
item = properties[item_name]
self.out(item_name + " = property(None, None, None)", indent+1) # TODO: handle docstring
if properties:
self.out("", 0) # empty line after the block
#
for item_name in sortedNoCase(others.keys()):
item = others[item_name]
self.fmtValue(item, indent+1, prefix = item_name + " = ")
if others:
self.out("", 0) # empty line after the block
#
if not methods and not properties and not others:
self.out("pass", indent+1)
def redo(self, p_name):
"""
Restores module declarations.
Intended for built-in modules and thus does not handle import statements.
"""
self.out("# encoding: utf-8", 0) # NOTE: maybe encoding should be selectable
if hasattr(self.module, "__name__"):
mod_name = " calls itself " + self.module.__name__
else:
mod_name = " does not know its name"
self.out("# module " + p_name + mod_name, 0)
if hasattr(self.module, "__file__"):
self.out("# from file " + self.module.__file__, 0)
self.outDocAttr(self.module, 0)
# find whatever other self.imported_modules the module knows; effectively these are imports
for item_name in self.module.__dict__:
item = self.module.__dict__[item_name]
if isinstance(item, type(sys)):
self.imported_modules[item_name] = item
if hasattr(item, "__name__"):
self.out("import " + item.__name__ + " as " + item_name + " # refers to " + str(item))
else:
self.out(item_name + " = None # XXX name unknown, refers to " + str(item))
self.out("", 0) # empty line after imports
# group what else we have into buckets
vars_simple = {}
vars_complex = {}
funcs = {}
classes = {}
reexports = {} # contains not real objects, but qualified id strings, like "sys.stdout"
#
for item_name in self.module.__dict__:
if item_name in ("__dict__", "__doc__", "__module__", "__file__", "__name__"):
continue
try:
item = getattr(self.module, item_name) # let getters do the magic
except:
item = self.module.__dict__[item_name] # have it raw
# check if it has percolated from an imported module
if sys.platform == "cli" and p_name != "System":
# IronPython has non-trivial reexports in System module, but not in others
imported_name = None
else:
imported_name = self.findImportedName(item)
if imported_name is not None:
reexports[item_name] = imported_name
else:
if isinstance(item, type) or item is FakeClassObj: # some classes are callable, check them before functions
classes[item_name] = item
elif isCallable(item):
funcs[item_name] = item
elif isinstance(item, type(sys)):
continue # self.imported_modules handled above already
else:
if isinstance(item, SIMPLEST_TYPES):
vars_simple[item_name] = item
else:
vars_complex[item_name] = item
#
# sort and output every bucket
if reexports:
self.out("# reexported imports", 0)
self.out("", 0)
for item_name in sortedNoCase(reexports.keys()):
item = reexports[item_name]
self.out(item_name + " = " + item, 0)
self._defined[item_name] = True
self.out("", 0) # empty line after group
#
if vars_simple:
prefix = "" # try to group variables by common prefix
PREFIX_LEN = 2 # default prefix length if we can't guess better
self.out("# Variables with simple values", 0)
for item_name in sortedNoCase(vars_simple.keys()):
item = vars_simple[item_name]
# track the prefix
if len(item_name) >= PREFIX_LEN:
prefix_pos = string.rfind(item_name, "_") # most prefixes end in an underscore
if prefix_pos < 1:
prefix_pos = PREFIX_LEN
beg = item_name[0:prefix_pos]
if prefix != beg:
self.out("", 0) # space out from other prefix
prefix = beg
else:
prefix = ""
# output
replacement = self.REPLACE_MODULE_VALUES.get((p_name, item_name), None)
if replacement is not None:
self.out(item_name + " = " + replacement + " # real value of type "+ str(type(item)) + " replaced", 0)
elif self.isSkippedInModule(p_name, item_name):
self.out(item_name + " = None # real value of type "+ str(type(item)) + " skipped", 0)
else:
self.fmtValue(item, 0, prefix = item_name + " = " )
self._defined[item_name] = True
self.out("", 0) # empty line after vars
#
if funcs:
self.out("# functions", 0)
self.out("", 0)
for item_name in sortedNoCase(funcs.keys()):
item = funcs[item_name]
self.redoFunction(item, item_name, 0, p_modname=p_name)
self._defined[item_name] = True
self.out("", 0) # empty line after each item
else:
self.out("# no functions", 0)
#
if classes:
self.out("# classes", 0)
self.out("", 0)
# sort classes so that inheritance order is preserved
cls_list = [] # items are (class_name, mro_tuple)
for cls_name in sortedNoCase(classes.keys()):
cls = classes[cls_name]
ins_index = len(cls_list)
for i in range(ins_index):
maybe_child_bases = cls_list[i][1]
if cls in maybe_child_bases:
ins_index = i # we could not go farther than current ins_index
break # ...and need not go fartehr than first known child
cls_list.insert(ins_index, (cls_name, getMRO(cls)))
for item_name in [cls_item[0] for cls_item in cls_list]:
item = classes[item_name]
self.redoClass(item, item_name, 0, p_modname=p_name)
self._defined[item_name] = True
self.out("", 0) # empty line after each item
else:
self.out("# no classes", 0)
#
if vars_complex:
self.out("# variables with complex values", 0)
self.out("", 0)
for item_name in sortedNoCase(vars_complex.keys()):
item = vars_complex[item_name]
replacement = self.REPLACE_MODULE_VALUES.get((p_name, item_name), None)
if replacement is not None:
self.out(item_name + " = " + replacement + " # real value of type "+ str(type(item)) + " replaced", 0)
elif self.isSkippedInModule(p_name, item_name):
self.out(item_name + " = None # real value of type "+ str(type(item)) + " skipped", 0)
else:
self.fmtValue(item, 0, prefix = item_name + " = " , as_name = item_name)
self._defined[item_name] = True
self.out("", 0) # empty line after each item
# command-line interface
if __name__ == "__main__":
from getopt import getopt
import os
if sys.version_info[0] > 2:
import io # in 3.0
fopen = io.open
else:
fopen = open
# handle cmdline
helptext="""Generates interface skeletons for python modules.
Usage: generator [options] [name ...]
Every "name" is a (qualified) module name, e.g. "foo.bar"
Output files will be named as modules plus ".py" suffix.
Normally every name processed will be printed and stdout flushed.
Options are:
-h -- prints this help message.
-d dir -- output dir, must be writable. If not given, current dir is used.
-b -- use names from sys.builtin_module_names
-q -- quiet, do not print anything on stdout. Errors still go to stderr.
-u -- update, only recreate skeletons for newer files, and skip unchanged.
-x -- die on exceptions with a stacktrace; only for debugging.
-c modules -- import CLR assemblies with specified names
-p -- run CLR profiler
"""
opts, fnames = getopt(sys.argv[1:], "d:hbquxc:p")
opts = dict(opts)
if not opts or '-h' in opts:
print(helptext)
sys.exit(0)
if '-b' not in opts and not fnames:
sys.stderr.write("Neither -b nor any module name given\n")
sys.exit(1)
quiet = '-q' in opts
update_mode = "-u" in opts
debug_mode = "-x" in opts
subdir = opts.get('-d', '')
# determine names
names = fnames
if '-b' in opts:
doing_builtins = True
names.extend(sys.builtin_module_names)
if not BUILTIN_MOD_NAME in names:
names.append(BUILTIN_MOD_NAME)
if '__main__' in names:
names.remove('__main__') # we don't want ourselves processed
else:
doing_builtins = False
if sys.platform == 'cli':
refs = opts.get('-c', '')
if refs:
for ref in refs.split(';'): clr.AddReferenceByPartialName(ref)
if '-p' in opts:
atexit.register(print_profile)
from System import DateTime
start = DateTime.Now
# go on
for name in names:
if not quiet:
sys.stdout.write(name + "\n")
sys.stdout.flush()
action = "doing nothing"
try:
quals = name.split(".")
dirname = subdir
if dirname:
dirname += os.path.sep # "a -> a/"
for pathindex in range(len(quals)-1): # create dirs for all quals but last
subdirname = dirname + os.path.sep.join(quals[0 : pathindex+1])
if not os.path.isdir(subdirname):
action = "creating subdir " + subdirname
os.makedirs(subdirname)
init_py = os.path.join(subdirname, "__init__.py")
if os.path.isfile(subdirname + ".py"):
os.rename(subdirname + ".py", init_py)
elif not os.path.isfile(init_py):
init = fopen(init_py, "w")
init.close()
target_dir = dirname + os.path.sep.join(quals[0 : len(quals)-1])
#sys.stderr.write("target dir is " + repr(target_dir) + "\n")
target_name = target_dir + os.path.sep + quals[-1]
if os.path.isdir(target_name):
fname = os.path.join(target_name, "__init__.py")
else:
fname = target_name + ".py"
#
action = "importing"
try:
mod = __import__(name)
except ImportError:
sys.stderr.write("Name " + name + " failed to import\n")
continue
# we can't really import a.b.c, only a, so follow the path
for q in quals[1:]:
action = "getting submodule " + q
try:
mod = getattr(mod, q)
except AttributeError:
sys.stderr.write("Name " + name + " is not really importable at point " + q + "\n")
sys.exit(0)
#
if update_mode and hasattr(mod, "__file__"):
action = "probing " + fname
mod_mtime = os.path.exists(mod.__file__) and os.path.getmtime(mod.__file__) or 0.0
file_mtime = os.path.exists(fname) and os.path.getmtime(fname) or 0.0
if mod_mtime <= file_mtime:
continue # skip the file
if doing_builtins and name == BUILTIN_MOD_NAME:
action = "grafting"
setattr(mod, FAKE_CLASSOBJ_NAME, FakeClassObj)
action = "opening " + fname
outfile = fopen(fname, "w")
action = "restoring"
r = ModuleRedeclarator(mod, outfile, doing_builtins=doing_builtins)
r.redo(name)
action = "closing " + fname
outfile.close()
except:
sys.stderr.write("Failed to process " + name + " while " + action + "\n")
if debug_mode:
raise
else:
continue
if sys.platform == 'cli':
print("Generation completed in " + str((DateTime.Now - start).TotalMilliseconds) + " ms")