[PATCH] scripts: Compile out syscalls given a specific userspace

Iulia Manda iulia.manda21 at gmail.com
Mon Feb 23 18:41:08 EET 2015


This patch suggests which syscalls can be compiled out in the kernel given a
specific userspace, by mapping each syscall with its corresponding symbol(s)
and deciding which of them can be disabled.

The steps taken in the script are the following:

1. Get the list of syscalls a userspace uses (nm) - this will give us more
symbols than those that match syscalls, but the next step will filter them
out;
2. Intersect that list with the list of all optional syscalls (check-syscalls
script that finds what syscalls can be compiled out in kernel/sys_ni.c) => we
will obtain a list containing all the optional syscalls that we can compile
out;
3. Parse C files and Makefiles in the kernel source code in order to map each
syscall with the symbols that compile it out:
- we need a stack in order to know between which ifdef and endif a syscall is
defined;
- we keep a dictionary where the key is the syscall and the values are all the
symbols that it depends on and the conditionals between them;
4. The output will be a list of symbols that can be disabled, and the
corresponding list of those syscalls that need to be enabled in order for the
application to work.

In case of uncertainty (e.g: compound conditionals), it choses to enable all
the symbols that syscall depends on.

On a short note, it provides with correct solutions, not necessarily the
optimal one yet (for example, in case of a disjunction, both symbols are set
to True, even though only one is needed in order for the syscall to be
compiled in).

You can run the script as follows:

compile_syscalls.py object_file syscalls-optional \
        `find staging/ -name "*.c"` > output

Signed-off-by: Iulia Manda <iulia.manda21 at gmail.com>
---
 scripts/compile_syscalls.py |  194 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 194 insertions(+)
 create mode 100755 scripts/compile_syscalls.py

diff --git a/scripts/compile_syscalls.py b/scripts/compile_syscalls.py
new file mode 100755
index 0000000..e573686
--- /dev/null
+++ b/scripts/compile_syscalls.py
@@ -0,0 +1,194 @@
+#!/usr/bin/python
+
+import re, sys, os, fileinput
+import pprint
+
+if len(sys.argv) < 3:
+    sys.stderr.write("usage: %s object_file syscalls-optional source_files\n"
+                        % sys.argv[0])
+    sys.exit(-1)
+
+
+# Find what syscalls a userspace uses
+def get_userspace_syscalls(file):
+    sym = []
+    lines = iter(os.popen("nm " + file).readlines())
+    for l in lines:
+        if not '@@GLIBC' in l:
+            continue
+        words = l.split()
+        for e in words:
+            if '@@GLIBC' in e:
+                sym.append(re.split("@@GLIBC", e)[0])
+                break
+    return sym
+
+
+# Find which syscalls from userspace can be optionally compiled in the kernel
+def get_optional_syscalls(file):
+    cnf = []
+    # Run this on the object file of the application
+    sym = get_userspace_syscalls(sys.argv[1])
+    for e in sym:
+        with open(file) as f:
+            lines = f.read().splitlines()
+            i = "sys_" + e 
+            if i in lines:
+                cnf.append(e)     
+    return cnf
+
+
+ 
+def c_to_o(file):
+    f = re.split('/', file)[-1]
+    name, ext = os.path.splitext(f)
+    return " " + name + ".o"
+
+
+def add_to_dictionary(dict1, key, value):
+    if key in dict1:
+        dict1[key].extend(value)
+    else:
+        dict1[key] = value
+    
+def get_syscall_name(line):
+    name = re.split('[(,)]', line)[1]
+    return name
+
+
+def get_ifdef_symbols(line):
+    name = line.split()
+    return [name[1]]
+
+
+def get_defined_symbols(line):
+    delim = ['#if', 'defined ', 'defined(', ')', '&&', '||', '>=']
+    for d in delim:
+        line = line.replace(d, '')
+    line = line.split()
+    return line 
+
+class Node:
+    def __init__(self, parent=None, name=""):
+        self.parent = parent
+        self.name = name
+        self.children = []
+
+    def add_child(self, el):
+        self.children.append(el)
+
+
+# Check the Makefile in order to see if a file containing a syscall
+# is compiled out as a whole
+curr = Node()
+map_sys = {}
+def parse_makefile(file):
+    global curr
+    sys_list = []
+    with open(file) as f:
+        lines = f.read().splitlines()
+        for l in lines:
+            if re.search("^SYSCALL_DEFINE", l) or \
+                re.search("^COMPAT_SYSCALL_DEFINE", l):
+                sys_list.append(get_syscall_name(l)) 
+    if sys_list == []:
+        return
+    search_for = c_to_o(file)
+    try:
+        f = open(os.path.dirname(file) + "/Makefile")
+        lines = f.read().replace('\\\n', '').splitlines()
+        yes = '\n'.join([l for l in lines if search_for in l])
+        if re.search('.*-\$\(CONFIG.*\)', yes):
+            value = re.split('[$()]', yes)[2]
+            for e in sys_list:
+                if not e in map_sys:
+                    map_sys[e] = []
+                map_sys[e].append(value)
+        # Check if a file is compiled under ifdefs
+        for l in lines:
+            if re.search('^ifdef', l):
+                name = get_ifdef_symbols(l)
+                new = Node(parent=curr, name=name)
+                curr.add_child(new)
+                curr = new
+            elif search_for in l:
+                if curr.name:
+                    for e in sys_list:
+                        add_to_dictionary(map_sys, e, curr.name)
+            elif re.search('^endif', l):
+                if curr.parent is not None:
+                    curr = curr.parent
+    except:
+        pass
+
+
+curr = Node()
+def parse_line(line):
+    global curr
+    if re.search("^#ifdef",line):
+        name = get_ifdef_symbols(line)
+        new = Node(parent=curr, name=name)
+        curr.add_child(new)
+        curr = new
+    elif re.search("^SYSCALL_DEFINE", line) or \
+            re.search("^COMPAT_SYSCALL_DEFINE", line):
+        syscall_name = get_syscall_name(line)
+        if curr.name:
+            add_to_dictionary(map_sys, syscall_name, curr.name)
+    elif re.search('^#endif', line):
+        if curr.parent is not None:
+            curr = curr.parent
+    elif (re.search('^#if', line)) and ('defined' not in line):
+        new = Node(parent=curr)
+        curr = new
+    elif re.search("^#if defined", line):
+        name = get_defined_symbols(line)
+        new = Node(parent=curr, name=name)
+        curr.add_child(new)
+        curr = new
+        
+
+def parse_files():
+    for n in sys.argv[3:]:
+        with open(n) as f:
+            # need to compact lines that contain the same info
+            lines = f.read().replace('\\\n', '').splitlines()
+            for l in lines:
+                parse_line(l)
+        parse_makefile(n)
+
+parse_files()
+# One can use pprint in order to see the intermediate output
+# more human-readable :)
+# pprint.pprint(map_sys)
+# print "\n"
+
+
+# At first, we set all symbols to False (no symbol is enabled)
+bool_dict = {}
+for k,v in map_sys.iteritems():
+    for e in v:
+        bool_dict[e] = False
+
+
+def enable_symbol():
+    cnf = get_optional_syscalls(sys.argv[2])
+    for e in cnf:
+        if e not in map_sys:
+            continue
+        for sym in map_sys[e]:
+            bool_dict[sym] = True
+            
+enable_symbol()
+
+print "\n"
+print "You can disable the following symbols:\n"
+for k,v in bool_dict.iteritems():
+    if v is False:
+        print k
+print "\n"
+
+print "The following symbols have to be enabled:\n"
+for k,v in bool_dict.iteritems():
+    if v is True:
+        print k
-- 
1.7.10.4



More information about the firefly mailing list