Commit 70aa06d8 authored by Thanassis Tsiodras's avatar Thanassis Tsiodras
Browse files

Added Static Analysis tests, and ability to use different cross-toolchain tools.

parent c960c0a6
.analysed
.setup
TARGET:=orchestrator/checkStackUsage.py
PYTHON:=python3
VENV:=.venv
all: .setup .analysed
.analysed: ${TARGET}
$(MAKE) flake8
$(MAKE) pylint
$(MAKE) mypy
@touch $@
flake8: dev-install
@echo "============================================"
@echo " Running flake8..."
@echo "============================================"
${VENV}/bin/flake8 ${TARGET}
pylint: dev-install
@echo "============================================"
@echo " Running pylint..."
@echo "============================================"
${VENV}/bin/pylint --disable=I --rcfile=pylint.cfg ${TARGET}
mypy: dev-install
@echo "============================================"
@echo " Running mypy..."
@echo "============================================"
${VENV}/bin/mypy --ignore-missing-imports ${TARGET}
test: dev-install
${VENV}/bin/pytest -v test_detector.py
test-dbg: dev-install
${VENV}/bin/pytest -v --pdb test_detector.py
dev-install: .setup | prereq
prereq:
@${PYTHON} -c 'import sys; sys.exit(1 if (sys.version_info.major<3 or sys.version_info.minor<5) else 0)' || { \
echo "=============================================" ; \
echo "[x] You need at least Python 3.5 to run this." ; \
echo "=============================================" ; \
exit 1 ; \
}
.setup: requirements.txt
@if [ ! -d ${VENV} ] ; then \
echo "[-] Installing VirtualEnv environment..." ; \
${PYTHON} -m venv ${VENV} || exit 1 ; \
fi
echo "[-] Installing packages inside environment..." ; \
. ${VENV}/bin/activate || exit 1 ; \
${PYTHON} -m pip install -r requirements.txt || exit 1
touch $@
clean:
rm -rf .cache/ .mypy_cache/ .analysed .setup __pycache__ \
tests/__pycache__ .pytest_cache/ .processed .coverage
.PHONY: flake8 pylint mypy clean dev-install prereq
#!/usr/bin/env python2
#!/usr/bin/env python3
"""
Utility to detect recursive calls and calculate total stack usage per function
(via following the call graph). Works for x86 and SPARC/Leon binaries.
......@@ -12,6 +12,8 @@ import re
import sys
import operator
from typing import Dict, Set, Optional, List
class Matcher:
"""regexp helper"""
......@@ -31,14 +33,16 @@ class Matcher:
return self._hit.group(idx)
def CheckForCycles(callGraph, badNodes):
def CheckForCycles(
callGraph: Dict[str, Optional[Set]],
badNodes: List[str]) -> None:
"""Detect cycles in function call graphs"""
def journey(path):
node = path[-1]
if node not in callGraph:
return
if callGraph[node] is None: # has been marked as recursive,
# so propagate the marking
# if marked as recursive, propagate the marking
if callGraph[node] is None:
badNodes[:] = path[:]
return
neighbours = callGraph[node]
......@@ -54,7 +58,11 @@ def CheckForCycles(callGraph, badNodes):
return
def findStackUsage(fn, stackUsagePerFunction, callGraph, cache={}):
def findStackUsage(
fn: str,
stackUsagePerFunction: Dict[str, Optional[int]],
callGraph: Dict[str, Optional[Set]],
cache: Dict[str, Optional[int]] = {}) -> Optional[int]:
"""
Calculate the total stack usage of each function,
taking into account who it calls
......@@ -63,83 +71,115 @@ def findStackUsage(fn, stackUsagePerFunction, callGraph, cache={}):
if fn in cache: # memoization
return cache[fn]
if fn not in stackUsagePerFunction:
return 0
if fn not in callGraph or not callGraph[fn]:
cache[fn] = stackUsagePerFunction[fn]
return stackUsagePerFunction[fn]
totalStackUsage = max( # the largest of the possible call chains
((stackUsagePerFunction[fn] +
findStackUsage(x, stackUsagePerFunction, callGraph))
for x in callGraph[fn]))
return None
thisFunctionStackSize = stackUsagePerFunction[fn]
if thisFunctionStackSize is None:
return None
calledFunctions = callGraph.get(fn, set())
if fn not in callGraph or not calledFunctions:
cache[fn] = thisFunctionStackSize
return thisFunctionStackSize
usageOfCalledFunctions = [
findStackUsage(x, stackUsagePerFunction, callGraph)
for x in calledFunctions
]
totalStackUsage = 0 # type: Optional[int]
for x in usageOfCalledFunctions:
if x is None:
totalStackUsage = None
break
assert totalStackUsage is not None
totalStackUsage = max(totalStackUsage, x+thisFunctionStackSize)
cache[fn] = totalStackUsage
return totalStackUsage
def main():
if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
print "Usage: %s ELFbinary" % sys.argv[0]
def main() -> None:
if len(sys.argv) < 2 or not os.path.exists(sys.argv[-1]):
print("Usage: %s [-cross PREFIX] ELFbinary" % sys.argv[0])
print("\nwhere the default prefix is:\n")
print("\tarm-eabi- for ARM binaries")
print("\tsparc-elf- for SPARC binaries")
print("\t(no prefix) for x86/amd64 binaries")
print("\nNote that if you use '-cross', SPARC opcodes are assumed.\n")
sys.exit(1)
binarySignature = os.popen("file \"%s\"" % sys.argv[1]).readlines()[0]
x86 = Matcher(r'ELF 32-bit LSB.*80.86')
x64 = Matcher(r'ELF 64-bit LSB.*x86-64')
leon = Matcher(r'ELF 32-bit MSB.*SPARC')
arm = Matcher(r'ELF 32-bit LSB.*ARM')
if x86.search(binarySignature):
objdump = 'objdump'
nm = 'nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*call\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(r'^.*[add|sub]\s+\$(0x\S+),%esp')
elif x64.search(binarySignature):
objdump = 'objdump'
nm = 'nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*callq\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(r'^.*[add|sub]\s+\$(0x\S+),%rsp')
elif leon.search(binarySignature):
objdump = 'sparc-elf-objdump'
nm = 'sparc-elf-nm'
try:
idx = sys.argv.index("-cross")
except ValueError:
idx = -1
if idx != -1:
cross_prefix = sys.argv[idx+1]
objdump = cross_prefix + 'objdump'
nm = cross_prefix + 'nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*call\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(
r'^.*save.*%sp, (-([0-9]{2}|[3-9])[0-9]{2}), %sp')
elif arm.search(binarySignature):
objdump = 'arm-eabi-objdump'
nm = 'arm-eabi-nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*bl\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(
r'^.*sub.*sp, (#[0-9][0-9]*)')
else:
print "Unknown signature:", binarySignature
sys.exit(1)
binarySignature = os.popen("file \"%s\"" % sys.argv[-1]).readlines()[0]
x86 = Matcher(r'ELF 32-bit LSB.*80.86')
x64 = Matcher(r'ELF 64-bit LSB.*x86-64')
leon = Matcher(r'ELF 32-bit MSB.*SPARC')
arm = Matcher(r'ELF 32-bit LSB.*ARM')
if x86.search(binarySignature):
objdump = 'objdump'
nm = 'nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*call\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(r'^.*[add|sub]\s+\$(0x\S+),%esp')
elif x64.search(binarySignature):
objdump = 'objdump'
nm = 'nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*callq\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(r'^.*[add|sub]\s+\$(0x\S+),%rsp')
elif leon.search(binarySignature):
objdump = 'sparc-elf-objdump'
nm = 'sparc-elf-nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*call\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(
r'^.*save.*%sp, (-([0-9]{2}|[3-9])[0-9]{2}), %sp')
elif arm.search(binarySignature):
objdump = 'arm-eabi-objdump'
nm = 'arm-eabi-nm'
functionNamePattern = Matcher(r'^(\S+) <([a-zA-Z0-9_]+?)>:')
callPattern = Matcher(r'^.*bl\s+\S+\s+<([a-zA-Z0-9_]+)>')
stackUsagePattern = Matcher(
r'^.*sub.*sp, (#[0-9][0-9]*)')
else:
print("Unknown signature:", binarySignature, "please use -cross")
sys.exit(1)
# Store .text symbol offsets and sizes (use nm)
offsetOfSymbol = {}
offsetOfSymbol = {} # type: Dict[str, int]
for line in os.popen(
nm + " \"" + sys.argv[1] + "\" | grep ' [Tt] '").readlines():
offset, unused, symbol = line.split()
offsetOfSymbol[symbol] = int(offset, 16)
nm + " \"" + sys.argv[-1] + "\" | grep ' [Tt] '").readlines():
offsetData, unused, symbolData = line.split()
offsetOfSymbol[symbolData] = int(offsetData, 16)
sizeOfSymbol = {}
lastOffset = 0
lastSymbol = None
for symbol, offset in sorted(
offsetOfSymbol.iteritems(), key=operator.itemgetter(1)):
sortedSymbols = sorted(
offsetOfSymbol.items(), key=operator.itemgetter(1))
for symbolStr, offsetInt in sortedSymbols:
if lastSymbol:
sizeOfSymbol[lastSymbol] = offset-lastOffset
lastSymbol = symbol
lastOffset = offset
sizeOfSymbol[lastSymbol] = offsetInt-lastOffset
lastSymbol = symbolStr
lastOffset = offsetInt
sizeOfSymbol[lastSymbol] = 2**31 # allow last .text symbol to roam free
# Parse disassembly to create callgraph (use objdump -d)
functionName = ""
stackUsagePerFunction = {}
callGraph = {}
stackUsagePerFunction = {} # type: Dict[str, Optional[int]]
callGraph = {} # type: Dict[str, Optional[Set]]
insideFunctionBody = False
currentFunctionStackSize = 0
offsetPattern = Matcher(r'^([0-9A-Za-z]+):')
for line in os.popen(objdump + " -d \"" + sys.argv[1] + "\"").readlines():
for line in os.popen(objdump + " -d \"" + sys.argv[-1] + "\"").readlines():
# Have we matched a function name yet?
if functionName != "":
# Yes, update "insideFunctionBody" boolean by checking
......@@ -164,13 +204,13 @@ def main():
# make sure this is the function we found with nm
# UPDATE: no, can't do - if a symbol is of local file scope
# (i.e. if it was declared with 'static')
# then it can appear in multiple places...
# then it may appear in multiple offsets!...
#
#if functionName in offsetOfSymbol:
# if offsetOfSymbol[functionName] != offset:
# print "Weird,", functionName, \
# "is not at offset reported by", nm
# print hex(offsetOfSymbol[functionName]), hex(offset)
# if functionName in offsetOfSymbol:
# if offsetOfSymbol[functionName] != offset:
# print "Weird,", functionName, \
# "is not at offset reported by", nm
# print hex(offsetOfSymbol[functionName]), hex(offset)
insideFunctionBody = True
foundFirstCall = False
stackUsagePerFunction[functionName] = 0
......@@ -184,7 +224,9 @@ def main():
if functionName != "" and call:
foundFirstCall = True
calledFunction = call.group(1)
callGraph[functionName].add(calledFunction)
calledFunctions = callGraph[functionName]
if calledFunctions is not None:
calledFunctions.add(calledFunction)
# Check to see if we have a stack reduction opcode
# 8048bec: 83 ec 04 sub $0x46,%esp
......@@ -205,39 +247,42 @@ def main():
else:
# save %sp, -104, %sp
value = -int(value)
assert(
stackUsagePerFunction[functionName] is not None)
stackUsagePerFunction[functionName] += value
#for fn,v in stackUsagePerFunction.items():
# print fn,v
# print "CALLS:", callGraph[fn]
# for fn,v in stackUsagePerFunction.items():
# print fn,v
# print "CALLS:", callGraph[fn]
# First, detect cycles and remove "bad" nodes from calculations
# (recursive calls would lead to infinite stack usage)
while True:
badNodes = []
badNodes = [] # type: List[str]
CheckForCycles(callGraph, badNodes)
if not badNodes:
break
lastStep = badNodes[-1] + " (recursive)"
badNodes.pop()
badNodes.append(lastStep)
print "Detected cycle and will ignore these functions:\n\t", \
"\n\t".join(badNodes)
print("Detected cycle and will ignore these functions:\n\t",
"\n\t".join(badNodes))
for n in set(badNodes):
stackUsagePerFunction[n] = None # marked as recursive
callGraph[n] = None # marked as recursive
print "Cumulative stack usage per function:"
print("Cumulative stack usage per function:")
# Then, navigate the graph to calculate stack needs per function
results = []
for fn, value in stackUsagePerFunction.items():
if value is not None:
results.append(
(fn, findStackUsage(fn, stackUsagePerFunction, callGraph)))
#else:
# results.append((fn, 'recursive'))
# else:
# results.append((fn, 'recursive'))
for fn, value in sorted(results, key=operator.itemgetter(1)):
print "%10s: %s" % (value, fn)
print("%10s: %s" % (value, fn))
if __name__ == "__main__":
main()
[MASTER]
# Specify a configuration file.
#rcfile=
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=
# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS
# Pickle collected data for later comparisons.
persistent=yes
# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
load-plugins=
# Use multiple processes to speed up Pylint.
jobs=1
# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
unsafe-load-any-extension=no
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code
extension-pkg-whitelist=lxml,clang
# Allow optimization of some AST trees. This will activate a peephole AST
# optimizer, which will apply various small optimizations. For instance, it can
# be used to obtain the result of joining multiple strings with the addition
# operator. Joining a lot of strings can lead to a maximum recursion error in
# Pylint and this flag can prevent that. It has one side effect, the resulting
# AST will be different than the one from reality.
optimize-ast=no
[MESSAGES CONTROL]
# Only show warnings with the listed confidence levels. Leave empty to show
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
confidence=
# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
#enable=
# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once).You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use"--disable=all --enable=classes
# --disable=W"
disable=coerce-method,nonzero-method,buffer-builtin,unichr-builtin,reload-builtin,using-cmp-argument,reduce-builtin,filter-builtin-not-iterating,zip-builtin-not-iterating,raising-string,long-builtin,backtick,long-suffix,delslice-method,suppressed-message,cmp-method,old-octal-literal,basestring-builtin,metaclass-assignment,print-statement,execfile-builtin,round-builtin,oct-method,standarderror-builtin,hex-method,import-star-module-level,indexing-exception,map-builtin-not-iterating,old-ne-operator,setslice-method,input-builtin,apply-builtin,range-builtin-not-iterating,xrange-builtin,parameter-unpacking,no-absolute-import,old-raise-syntax,dict-iter-method,unicode-builtin,unpacking-in-except,old-division,file-builtin,next-method-called,useless-suppression,raw_input-builtin,intern-builtin,getslice-method,dict-view-method,cmp-builtin,coerce-builtin,line-too-long,missing-docstring,protected-access,global-statement,too-many-arguments,too-many-branches,too-many-locals,bare-except,invalid-name,too-many-statements,broad-except,too-many-instance-attributes,too-many-public-methods,too-few-public-methods,similarities,no-else-return,fixme,relative-beyond-top-level,import-outside-toplevel
never-returning-functions=dmt.commonPy.utility.panic,sys.exit
[REPORTS]
# Set the output format. Available formats are text, parseable, colorized, msvs
# (visual studio) and html. You can also give a reporter class, eg
# mypackage.mymodule.MyReporterClass.
output-format=text
# Put messages in a separate file for each module / package specified on the
# command line instead of printing them on stdout. Reports (if any) will be
# written in a file name "pylint_global.[txt|html]".
files-output=no
# Tells whether to display a full report or only the messages
reports=yes
# Python expression which should return a note less than 10 (10 is the highest
# note). You have access to the variables errors warning, statement which
# respectively contain the number of errors / warnings messages and the total
# number of statements analyzed. This is used by the global evaluation report
# (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
# Template used to display messages. This is a python new-style format string
# used to format the message information. See doc for all details
#msg-template=
[BASIC]
# List of builtins function names that should not be used, separated by a comma
bad-functions=map,filter
# Good variable names which should always be accepted, separated by a comma
good-names=i,j,k,ex,Run,_
# Bad variable names which should always be refused, separated by a comma
bad-names=foo,bar,baz,toto,tutu,tata
# Colon-delimited sets of names that determine each other's naming style when
# the name regexes allow several styles.
name-group=
# Include a hint for the correct naming format with invalid-name
include-naming-hint=no
# Regular expression matching correct module names
module-rgx=(([a-z_][A-Za-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
# Naming hint for module names
module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
# Regular expression matching correct constant names
const-rgx=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$
# Naming hint for constant names
const-name-hint=(([a-zA-Z_][A-Z0-9_]*)|(__.*__))$
# Regular expression matching correct class names
class-rgx=[A-Z_][a-zA-Z0-9]+$
# Naming hint for class names
class-name-hint=[A-Z_][a-zA-Z0-9]+$
# Regular expression matching correct method names
method-rgx=[a-zA-Z_][a-zA-Z0-9_]{2,30}$
# Naming hint for method names
method-name-hint=[a-z_][a-z0-9_]{2,30}$
# Regular expression matching correct argument names
argument-rgx=[a-z_][A-Za-z0-9_]{2,30}$
# Naming hint for argument names
argument-name-hint=[a-z_][A-Za-z0-9_]{2,30}$
# Regular expression matching correct class attribute names
class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
# Naming hint for class attribute names
class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
# Regular expression matching correct inline iteration names
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
# Naming hint for inline iteration names
inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
# Regular expression matching correct function names
function-rgx=[A-Za-z_][A-Za-z0-9_]{2,30}$
# Naming hint for function names
function-name-hint=[a-z_][a-z0-9_]{2,30}$
# Regular expression matching correct attribute names
attr-rgx=[a-z_][A-Za-z0-9_]{2,30}$
# Naming hint for attribute names
attr-name-hint=[a-z_][a-z0-9_]{2,30}$
# Regular expression matching correct variable names
variable-rgx=[a-z_][A-Za-z0-9_]{2,30}$
# Naming hint for variable names
variable-name-hint=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match function or class names that do
# not require a docstring.
no-docstring-rgx=^_
# Minimum line length for functions/classes that require docstrings, shorter
# ones are exempt.
docstring-min-length=-1
[ELIF]
# Maximum number of nested blocks for function / method body
max-nested-blocks=6
[SIMILARITIES]
# Minimum lines number of a similarity.
min-similarity-lines=4
# Ignore comments when computing similarities.
ignore-comments=yes
# Ignore docstrings when computing similarities.
ignore-docstrings=yes
# Ignore imports when computing similarities.
ignore-imports=no
[FORMAT]
# Maximum number of characters on a single line.
max-line-length=140
# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
# Allow the body of an if to be on the same line as the test if there is no
# else.
single-line-if-stmt=no
# List of optional constructs for which whitespace checking is disabled. `dict-
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
# `empty-line` allows space-only lines.
no-space-check=trailing-comma,dict-separator
# Maximum number of lines in a module
max-module-lines=1000
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
indent-string=' '
# Number of spaces of indent required inside a hanging or continued line.
indent-after-paren=4
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=
[VARIABLES]
# Tells whether we should check for unused import in __init__ files.
init-import=no
# A regular expression matching the name of dummy variables (i.e. expectedly
# not used).
dummy-variables-rgx=_$|dummy|unused.*$|__*
# List of additional names supposed to be defined in builtins. Remember that
# you should avoid to define new builtins when possible.
additional-builtins=
# List of strings which can identify a callback function by name. A callback
# name must start or end with one of those strings.
callbacks=cb_,_cb
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,XXX,TODO
[SPELLING]
# Spelling dictionary name. Available dictionaries: none. To make it working
# install python-enchant package.
spelling-dict=
# List of comma separated words that should not be checked.
spelling-ignore-words=
# A path to a file that contains private dictionary; one word per line.
spelling-private-dict-file=
# Tells whether to store unknown words to indicated private dictionary in
# --spelling-private-dict-file option instead of raising a message.