summaryrefslogtreecommitdiff
path: root/libjava/contrib/aotcompile.py.in
blob: 17a2dd68ea817a7d8ea199a5967dcb14aed7c82c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# -*- python -*-

## Copyright (C) 2005, 2006, 2008 Free Software Foundation
## Written by Gary Benson <gbenson@redhat.com>
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

import classfile
import copy
# The md5 module is deprecated in Python 2.5
try: 
    from hashlib import md5 
except ImportError: 
    from md5 import md5
import operator
import os
import sys
import cStringIO as StringIO
import zipfile

PATHS = {"make":   "@MAKE@",
         "gcj":    "@prefix@/bin/gcj@gcc_suffix@",
         "dbtool": "@prefix@/bin/gcj-dbtool@gcc_suffix@"}

MAKEFLAGS = []
GCJFLAGS = ["-fPIC", "-findirect-dispatch", "-fjni"]
LDFLAGS = ["-Wl,-Bsymbolic"]

MAX_CLASSES_PER_JAR = 1024
MAX_BYTES_PER_JAR = 1048576

MAKEFILE = "Makefile"

MAKEFILE_HEADER = '''\
GCJ = %(gcj)s
DBTOOL = %(dbtool)s
GCJFLAGS = %(gcjflags)s
LDFLAGS = %(ldflags)s

%%.o: %%.jar
	$(GCJ) -c $(GCJFLAGS) $< -o $@

TARGETS = \\
%(targets)s

all: $(TARGETS)'''

MAKEFILE_JOB = '''
%(base)s_SOURCES = \\
%(jars)s

%(base)s_OBJECTS = \\
$(%(base)s_SOURCES:.jar=.o)

%(dso)s: $(%(base)s_OBJECTS)
	$(GCJ) -shared $(GCJFLAGS) $(LDFLAGS) $^ -o $@

%(db)s: $(%(base)s_SOURCES)
	$(DBTOOL) -n $@ 64
	for jar in $^; do \\
            $(DBTOOL) -f $@ $$jar \\
                %(libdir)s/%(dso)s; \\
        done'''

ZIPMAGIC, CLASSMAGIC = "PK\x03\x04", "\xca\xfe\xba\xbe"

class Error(Exception):
    pass

class Compiler:
    def __init__(self, srcdir, libdir, prefix = None):
        self.srcdir = os.path.abspath(srcdir)
        self.libdir = os.path.abspath(libdir)
        if prefix is None:
            self.dstdir = self.libdir
        else:
            self.dstdir = os.path.join(prefix, self.libdir.lstrip(os.sep))

        # Calling code may modify these parameters
        self.gcjflags = copy.copy(GCJFLAGS)
        self.ldflags = copy.copy(LDFLAGS)
        self.makeflags = copy.copy(MAKEFLAGS)
        self.exclusions = []

    def compile(self):
        """Search srcdir for classes and jarfiles, then generate
        solibs and mappings databases for them all in libdir."""
        if not os.path.isdir(self.dstdir):
            os.makedirs(self.dstdir)
        oldcwd = os.getcwd()
        os.chdir(self.dstdir)
        try:            
            jobs = self.getJobList()
            if not jobs:
                raise Error, "nothing to do"
            self.writeMakefile(MAKEFILE, jobs)
            for job in jobs:
                job.writeJars()
            system([PATHS["make"]] + self.makeflags)
            for job in jobs:
                job.clean()
            os.unlink(MAKEFILE)
        finally:
            os.chdir(oldcwd)

    def getJobList(self):
        """Return all jarfiles and class collections in srcdir."""
        jobs = weed_jobs(find_jobs(self.srcdir, self.exclusions))
        set_basenames(jobs)
        return jobs

    def writeMakefile(self, path, jobs):
        """Generate a makefile to build the solibs and mappings
        databases for the specified list of jobs."""
        fp = open(path, "w")
        print >>fp, MAKEFILE_HEADER % {
            "gcj": PATHS["gcj"],
            "dbtool": PATHS["dbtool"],
            "gcjflags": " ".join(self.gcjflags),
            "ldflags": " ".join(self.ldflags),
            "targets": " \\\n".join(reduce(operator.add, [
                (job.dsoName(), job.dbName()) for job in jobs]))}
        for job in jobs:
            values = job.ruleArguments()
            values["libdir"] = self.libdir
            print >>fp, MAKEFILE_JOB % values
        fp.close()

def find_jobs(dir, exclusions = ()):
    """Scan a directory and find things to compile: jarfiles (zips,
    wars, ears, rars, etc: we go by magic rather than file extension)
    and directories of classes."""
    def visit((classes, zips), dir, items):
        for item in items:
            path = os.path.join(dir, item)
            if os.path.islink(path) or not os.path.isfile(path):
                continue
            magic = open(path, "r").read(4)
            if magic == ZIPMAGIC:
                zips.append(path)
            elif magic == CLASSMAGIC:
                classes.append(path)
    classes, paths = [], []
    os.path.walk(dir, visit, (classes, paths))
    # Convert the list of classes into a list of directories
    while classes:
        # XXX this requires the class to be correctly located in its heirachy.
        path = classes[0][:-len(os.sep + classname(classes[0]) + ".class")]
        paths.append(path)
        classes = [cls for cls in classes if not cls.startswith(path)]
    # Handle exclusions.  We're really strict about them because the
    # option is temporary in aot-compile-rpm and dead options left in
    # specfiles will hinder its removal.
    for path in exclusions:
        if path in paths:
            paths.remove(path)
        else:
            raise Error, "%s: path does not exist or is not a job" % path
    # Build the list of jobs
    jobs = []
    paths.sort()
    for path in paths:
        if os.path.isfile(path):
            job = JarJob(path)
        else:
            job = DirJob(path)
        if len(job.classes):
            jobs.append(job)
    return jobs

class Job:
    """A collection of classes that will be compiled as a unit."""
    
    def __init__(self, path):
        self.path, self.classes, self.blocks = path, {}, None
        self.classnames = {}

    def addClass(self, bytes, name):
        """Subclasses call this from their __init__ method for
        every class they find."""
        digest = md5(bytes).digest()
        self.classes[digest] = bytes
        self.classnames[digest] = name

    def __makeBlocks(self):
        """Split self.classes into chunks that can be compiled to
        native code by gcj.  In the majority of cases this is not
        necessary -- the job will have come from a jarfile which will
        be equivalent to the one we generate -- but this only happens
        _if_ the job was a jarfile and _if_ the jarfile isn't too big
        and _if_ the jarfile has the correct extension and _if_ all
        classes are correctly named and _if_ the jarfile has no
        embedded jarfiles.  Fitting a special case around all these
        conditions is tricky to say the least.

        Note that this could be called at the end of each subclass's
        __init__ method.  The reason this is not done is because we
        need to parse every class file.  This is slow, and unnecessary
        if the job is subsetted."""
        names = {}
        for hash, bytes in self.classes.items():
            try:
                name = classname(bytes)
            except:
                warn("job %s: class %s malformed or not a valid class file" \
                     % (self.path, self.classnames[hash]))
                raise
            if not names.has_key(name):
                names[name] = []
            names[name].append(hash)
        names = names.items()
        # We have to sort somehow, or the jars we generate 
        # We sort by name in a simplistic attempt to keep related
        # classes together so inter-class optimisation can happen.
        names.sort()
        self.blocks, bytes = [[]], 0
        for name, hashes in names:
            for hash in hashes:
                if len(self.blocks[-1]) >= MAX_CLASSES_PER_JAR \
                   or bytes >= MAX_BYTES_PER_JAR:
                    self.blocks.append([])
                    bytes = 0
                self.blocks[-1].append((name, hash))
                bytes += len(self.classes[hash])

    # From Archit Shah:
    #   The implementation and the documentation don't seem to match.
    #  
    #    [a, b].isSubsetOf([a]) => True
    #  
    #   Identical copies of all classes this collection do not exist
    #   in the other. I think the method should be named isSupersetOf
    #   and the documentation should swap uses of "this" and "other"
    #
    # XXX think about this when I've had more sleep...
    def isSubsetOf(self, other):
        """Returns True if identical copies of all classes in this
        collection exist in the other."""
        for item in other.classes.keys():
            if not self.classes.has_key(item):
                return False
        return True

    def __targetName(self, ext):
        return self.basename + ext

    def tempJarName(self, num):
        return self.__targetName(".%d.jar" % (num + 1))

    def tempObjName(self, num):
        return self.__targetName(".%d.o" % (num + 1))

    def dsoName(self):
        """Return the filename of the shared library that will be
        built from this job."""
        return self.__targetName(".so")

    def dbName(self):
        """Return the filename of the mapping database that will be
        built from this job."""
        return self.__targetName(".db")

    def ruleArguments(self):
        """Return a dictionary of values that when substituted
        into MAKEFILE_JOB will create the rules required to build
        the shared library and mapping database for this job."""
        if self.blocks is None:
            self.__makeBlocks()
        return {
            "base": "".join(
                [c.isalnum() and c or "_" for c in self.dsoName()]),
            "jars": " \\\n".join(
                [self.tempJarName(i) for i in xrange(len(self.blocks))]),
            "dso": self.dsoName(),
            "db": self.dbName()}

    def writeJars(self):
        """Generate jarfiles that can be native compiled by gcj."""
        if self.blocks is None:
            self.__makeBlocks()
        for block, i in zip(self.blocks, xrange(len(self.blocks))):
            jar = zipfile.ZipFile(self.tempJarName(i), "w", zipfile.ZIP_STORED)
            for name, hash in block:
                jar.writestr(
                    zipfile.ZipInfo("%s.class" % name), self.classes[hash])
            jar.close()

    def clean(self):
        """Delete all temporary files created during this job's build."""
        if self.blocks is None:
            self.__makeBlocks()
        for i in xrange(len(self.blocks)):
            os.unlink(self.tempJarName(i))
            os.unlink(self.tempObjName(i))

class JarJob(Job):
    """A Job whose origin was a jarfile."""

    def __init__(self, path):
        Job.__init__(self, path)
        self._walk(zipfile.ZipFile(path, "r"))

    def _walk(self, zf):
        for name in zf.namelist():
            bytes = zf.read(name)
            if bytes.startswith(ZIPMAGIC):
                self._walk(zipfile.ZipFile(StringIO.StringIO(bytes)))
            elif bytes.startswith(CLASSMAGIC):
                self.addClass(bytes, name)

class DirJob(Job):
    """A Job whose origin was a directory of classfiles."""

    def __init__(self, path):
        Job.__init__(self, path)
        os.path.walk(path, DirJob._visit, self)

    def _visit(self, dir, items):
        for item in items:
            path = os.path.join(dir, item)
            if os.path.islink(path) or not os.path.isfile(path):
                continue
            fp = open(path, "r")
            magic = fp.read(4)
            if magic == CLASSMAGIC:
                self.addClass(magic + fp.read(), name)
    
def weed_jobs(jobs):
    """Remove any jarfiles that are completely contained within
    another.  This is more common than you'd think, and we only
    need one nativified copy of each class after all."""
    jobs = copy.copy(jobs)
    while True:
        for job1 in jobs:
            for job2 in jobs:
                if job1 is job2:
                    continue
                if job1.isSubsetOf(job2):
                    msg = "subsetted %s" % job2.path
                    if job2.isSubsetOf(job1):
                        if (isinstance(job1, DirJob) and
                            isinstance(job2, JarJob)):
                            # In the braindead case where a package
                            # contains an expanded copy of a jarfile
                            # the jarfile takes precedence.
                            continue
                        msg += " (identical)"
                    warn(msg)
                    jobs.remove(job2)
                    break
            else:
                continue
            break
        else:
            break
        continue
    return jobs

def set_basenames(jobs):
    """Ensure that each jarfile has a different basename."""
    names = {}
    for job in jobs:
        name = os.path.basename(job.path)
        if not names.has_key(name):
            names[name] = []
        names[name].append(job)
    for name, set in names.items():
        if len(set) == 1:
            set[0].basename = name
            continue
        # prefix the jar filenames to make them unique
        # XXX will not work in most cases -- needs generalising
        set = [(job.path.split(os.sep), job) for job in set]
        minlen = min([len(bits) for bits, job in set])
        set = [(bits[-minlen:], job) for bits, job in set]
        bits = apply(zip, [bits for bits, job in set])
        while True:
            row = bits[-2]
            for bit in row[1:]:
                if bit != row[0]:
                    break
            else:
                del bits[-2]
                continue
            break
        set = zip(
            ["_".join(name) for name in apply(zip, bits[-2:])],
            [job for bits, job in set])
        for name, job in set:
            warn("building %s as %s" % (job.path, name))
            job.basename = name
    # XXX keep this check until we're properly general
    names = {}
    for job in jobs:
        name = job.basename
        if names.has_key(name):
            raise Error, "%s: duplicate jobname" % name
        names[name] = 1

def system(command):
    """Execute a command."""
    status = os.spawnv(os.P_WAIT, command[0], command)
    if status > 0:
        raise Error, "%s exited with code %d" % (command[0], status)
    elif status < 0:
        raise Error, "%s killed by signal %d" % (command[0], -status)

def warn(msg):
    """Print a warning message."""
    print >>sys.stderr, "%s: warning: %s" % (
        os.path.basename(sys.argv[0]), msg)

def classname(bytes):
    """Extract the class name from the bytes of a class file."""
    klass = classfile.Class(bytes)
    return klass.constants[klass.constants[klass.name][1]][1]