From 249fd706018343b061af8157a4805c05000f6e0a Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Mon, 30 Sep 2013 16:50:13 +0200 Subject: [PATCH 01/10] added a tool to help you get perl packages and it's dependencies to .eb format --- easybuild/scripts/cpan2eb.py | 137 +++++++++++++++++++++++++++++++++++ easybuild/tools/agithub.py | 10 ++- 2 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 easybuild/scripts/cpan2eb.py diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py new file mode 100644 index 0000000000..d44a2f32b1 --- /dev/null +++ b/easybuild/scripts/cpan2eb.py @@ -0,0 +1,137 @@ +## +# Copyright 2013 Ghent University +# +# This file is part of EasyBuild, +# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en), +# with support of Ghent University (http://ugent.be/hpc), +# the Flemish Supercomputer Centre (VSC) (https://vscentrum.be/nl/en), +# the Hercules foundation (http://www.herculesstichting.be/in_English) +# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en). +# +# http://github.com/hpcugent/easybuild +# +# EasyBuild is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation v2. +# +# EasyBuild is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with EasyBuild. If not, see . +## +""" +This script takes a perl module name as argument, and generates +a string compatible with the easyconfig format with metadata about the module +and all it's dependencies + +@author: Jens Timmerman +""" +import sys + +from easybuild.tools.agithub import Client + +from vsc.utils.generaloption import simple_option +from vsc.utils import fancylogger + + +logger = fancylogger.getLogger() + + +class CpanMeta(object): + """This class gets meta information from cpan + + This uses the metacpan.org api + """ + def __init__(self): + """Constructor""" + dummy = {'download_url': 'example.com/bla', 'release': 0, 'version': 0} + self.cache = {'ExtUtils::MakeMaker': dummy, 'perl': dummy} + self.graph = {'ExtUtils::MakeMaker': [], 'perl': []} + self.client = Client('api.metacpan.org', token="bla") + + def get_module_data(self, modulename): + """Get some metadata about the current version of the module""" + json = self.client.get("/v0/module/%s" % modulename)[1] + depsjson = self.client.get("/v0/release/%(author)s/%(release)s" % json) + depsjson = depsjson[1] + depsjson.update(json) + return depsjson + + def get_recursive_data(self, modulename): + """Recursively get all data (so for all dependencies)""" + # check if we have been here before + if modulename in self.cache: + logger.info('%s module cached ', modulename) + return self.graph + data = self.get_module_data(modulename) + self.cache[modulename] = data + ## module's are somtimes included in a release we already know, so skip this also + if data['release'] in self.cache: + logger.info('%s release cached', data['release']) + return self.graph + self.cache[data['release']] = data + dependencies = set() + # do the recursive thing + for dep in data['dependency']: + if "requires" in dep["relationship"]: + cpan.get_recursive_data(dep['module']) + #if "build" in dep["phase"] or "configure" in dep["phase"]: + dependencies.add(dep['module']) + self.graph[modulename] = dependencies + return self.graph + + +def post_order(graph, root): + """Walk the graph from the given root in a post-order manner, by providing the correspoding generator.""" + for node in graph[root]: + for child in post_order(graph, node): + yield child + yield root + + +def topological_sort(graph, root): + """Perform a topological sorting of the given graph. + + The graph needs to be in the following format: + + g = { t1: [t2, t3], + t2: [t4, t5, t6], + t3: [] + t4: [] + t5: [t3] + t6: [t5] + } + + where each node is mapped to a list of nodes it has an edge to. + + @returns: generator for traversing the graph in the desired order + """ + visited = set() + for node in post_order(graph, root): + if node not in visited: + yield node + visited.add(node) # been there, done that. + +# me might do a lot of recursion... +sys.setrecursionlimit(320000000) + +go = simple_option() + +cpan = CpanMeta() +modules = cpan.get_recursive_data(go.args[0]) +print modules +#print cpan.cache +# topological soft, so we get correct dependencies order +for module in topological_sort(modules, go.args[0]): + data = cpan.cache[module] + url, name = data['download_url'].rsplit("/", 1) + data.update({'url': url, 'distribution': name}) # distribution sometimes contains subdirs + #print module + if data['release'] != '0' and data['version'] != '0': + print """('%(release)s', '%(version)s', { + 'source_tmpl': '%(distribution)s', + 'source_urls': ['%(url)s'], + }),""" % data diff --git a/easybuild/tools/agithub.py b/easybuild/tools/agithub.py index a760c9f172..7fb24436f2 100644 --- a/easybuild/tools/agithub.py +++ b/easybuild/tools/agithub.py @@ -38,7 +38,7 @@ except ImportError: import simplejson as json -from vsc import fancylogger +from vsc.utils import fancylogger class Client(object): http_methods = ( 'get', @@ -47,7 +47,7 @@ class Client(object): 'put', ) - def __init__(self, username=None, password=None, token=None): + def __init__(self, url, username=None, password=None, token=None): if username is not None: if password is None and token is None: raise TypeError("You need a password to authenticate as " + username) @@ -59,6 +59,7 @@ def __init__(self, username=None, password=None, token=None): elif token is not None: self.auth_header = 'Token %s' % token self.username = username + self.url = url def get(self, url, headers={}, **params): url += self.urlencode(params) @@ -80,7 +81,7 @@ def request(self, method, url, body, headers): if self.username: headers['Authorization'] = self.auth_header else: - headers['User-Agent'] = 'agithub' + headers['User-Agent'] = 'easybuild' fancylogger.getLogger().debug('cli request: %s, %s, %s %s', method, url, body, headers) #TODO: Context manager conn = self.get_connection() @@ -105,7 +106,7 @@ def hash_pass(self, password): return 'Basic ' + base64.b64encode('%s:%s' % (self.username, password)).strip() def get_connection(self): - return httplib.HTTPSConnection('api.github.com') + return httplib.HTTPSConnection(self.url) class Github(object): @@ -132,6 +133,7 @@ class Github(object): automatically supports the full API--so why should you care? ''' def __init__(self, *args, **kwargs): + kwargs['url'] = "api.github.com" self.client = Client(*args, **kwargs) def __getattr__(self, key): return RequestBuilder(self.client).__getattr__(key) From 92ace6e19d44f0d68bfbda9cae0c444d8776153d Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Mon, 30 Sep 2013 17:05:22 +0200 Subject: [PATCH 02/10] fixed extension name in templates --- easybuild/scripts/cpan2eb.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py index d44a2f32b1..d6c2bc6b50 100644 --- a/easybuild/scripts/cpan2eb.py +++ b/easybuild/scripts/cpan2eb.py @@ -47,7 +47,7 @@ class CpanMeta(object): """ def __init__(self): """Constructor""" - dummy = {'download_url': 'example.com/bla', 'release': 0, 'version': 0} + dummy = {'download_url': 'example.com/bla', 'release': 0, 'version': 0, 'distribution': 'ExtUtils-MakeMaker'} self.cache = {'ExtUtils::MakeMaker': dummy, 'perl': dummy} self.graph = {'ExtUtils::MakeMaker': [], 'perl': []} self.client = Client('api.metacpan.org', token="bla") @@ -128,10 +128,10 @@ def topological_sort(graph, root): for module in topological_sort(modules, go.args[0]): data = cpan.cache[module] url, name = data['download_url'].rsplit("/", 1) - data.update({'url': url, 'distribution': name}) # distribution sometimes contains subdirs + data.update({'url': url, 'tarball': name}) # distribution sometimes contains subdirs #print module if data['release'] != '0' and data['version'] != '0': - print """('%(release)s', '%(version)s', { - 'source_tmpl': '%(distribution)s', + print """('%(distribution)s', '%(version)s', { + 'source_tmpl': '%(tarball)s', 'source_urls': ['%(url)s'], }),""" % data From 16cbf1b1a20d319508f610bad67ebba1460371dc Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Mon, 30 Sep 2013 17:10:23 +0200 Subject: [PATCH 03/10] better fix --- easybuild/scripts/cpan2eb.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py index d6c2bc6b50..d61cb62af0 100644 --- a/easybuild/scripts/cpan2eb.py +++ b/easybuild/scripts/cpan2eb.py @@ -47,7 +47,8 @@ class CpanMeta(object): """ def __init__(self): """Constructor""" - dummy = {'download_url': 'example.com/bla', 'release': 0, 'version': 0, 'distribution': 'ExtUtils-MakeMaker'} + dummy = {'download_url': 'example.com/bla', 'release': '0', 'version': '0', 'distribution': 'ExtUtils-MakeMaker', + 'modulename': 'ExtUtils::MakeMaker'} self.cache = {'ExtUtils::MakeMaker': dummy, 'perl': dummy} self.graph = {'ExtUtils::MakeMaker': [], 'perl': []} self.client = Client('api.metacpan.org', token="bla") @@ -58,6 +59,7 @@ def get_module_data(self, modulename): depsjson = self.client.get("/v0/release/%(author)s/%(release)s" % json) depsjson = depsjson[1] depsjson.update(json) + depsjson.update({'modulename': modulename}) return depsjson def get_recursive_data(self, modulename): @@ -130,8 +132,8 @@ def topological_sort(graph, root): url, name = data['download_url'].rsplit("/", 1) data.update({'url': url, 'tarball': name}) # distribution sometimes contains subdirs #print module - if data['release'] != '0' and data['version'] != '0': - print """('%(distribution)s', '%(version)s', { + if data['release'] is not '0' and data['version'] is not '0': + print """('%(modulename)s', '%(version)s', { 'source_tmpl': '%(tarball)s', 'source_urls': ['%(url)s'], }),""" % data From de704dc507d07afb6f33f9b684fa0dfab553334c Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Wed, 29 Jan 2014 14:07:06 +0100 Subject: [PATCH 04/10] fixed remarks --- easybuild/scripts/cpan2eb.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py index d61cb62af0..a879a9abf8 100644 --- a/easybuild/scripts/cpan2eb.py +++ b/easybuild/scripts/cpan2eb.py @@ -23,8 +23,8 @@ # along with EasyBuild. If not, see . ## """ -This script takes a perl module name as argument, and generates -a string compatible with the easyconfig format with metadata about the module +This script takes a Perl module name as argument, and generates +a string compatible with the easyconfig 1.x format with metadata about the module and all it's dependencies @author: Jens Timmerman @@ -51,7 +51,7 @@ def __init__(self): 'modulename': 'ExtUtils::MakeMaker'} self.cache = {'ExtUtils::MakeMaker': dummy, 'perl': dummy} self.graph = {'ExtUtils::MakeMaker': [], 'perl': []} - self.client = Client('api.metacpan.org', token="bla") + self.client = Client('api.metacpan.org') def get_module_data(self, modulename): """Get some metadata about the current version of the module""" @@ -70,7 +70,7 @@ def get_recursive_data(self, modulename): return self.graph data = self.get_module_data(modulename) self.cache[modulename] = data - ## module's are somtimes included in a release we already know, so skip this also + # module's are somtimes included in a release we already know, so skip this also if data['release'] in self.cache: logger.info('%s release cached', data['release']) return self.graph @@ -80,7 +80,8 @@ def get_recursive_data(self, modulename): for dep in data['dependency']: if "requires" in dep["relationship"]: cpan.get_recursive_data(dep['module']) - #if "build" in dep["phase"] or "configure" in dep["phase"]: + # if for some reason you get to many hits here, you might want to filter on build and confirure in phase: (To be further tested) + # if "build" in dep["phase"] or "configure" in dep["phase"]: dependencies.add(dep['module']) self.graph[modulename] = dependencies return self.graph @@ -117,21 +118,18 @@ def topological_sort(graph, root): yield node visited.add(node) # been there, done that. -# me might do a lot of recursion... -sys.setrecursionlimit(320000000) go = simple_option() cpan = CpanMeta() modules = cpan.get_recursive_data(go.args[0]) print modules -#print cpan.cache + # topological soft, so we get correct dependencies order for module in topological_sort(modules, go.args[0]): data = cpan.cache[module] url, name = data['download_url'].rsplit("/", 1) data.update({'url': url, 'tarball': name}) # distribution sometimes contains subdirs - #print module if data['release'] is not '0' and data['version'] is not '0': print """('%(modulename)s', '%(version)s', { 'source_tmpl': '%(tarball)s', From d1263f6997289fbc4bc538c465a18166200260b4 Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Wed, 29 Jan 2014 14:08:24 +0100 Subject: [PATCH 05/10] fixed a bug. --- easybuild/scripts/cpan2eb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py index a879a9abf8..d197ec6e41 100644 --- a/easybuild/scripts/cpan2eb.py +++ b/easybuild/scripts/cpan2eb.py @@ -79,7 +79,7 @@ def get_recursive_data(self, modulename): # do the recursive thing for dep in data['dependency']: if "requires" in dep["relationship"]: - cpan.get_recursive_data(dep['module']) + self.get_recursive_data(dep['module']) # if for some reason you get to many hits here, you might want to filter on build and confirure in phase: (To be further tested) # if "build" in dep["phase"] or "configure" in dep["phase"]: dependencies.add(dep['module']) From bac87e7311e383f3718cd98c8965b8df4ac2d713 Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Wed, 29 Jan 2014 15:02:48 +0100 Subject: [PATCH 06/10] Update github.py --- easybuild/tools/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/tools/github.py b/easybuild/tools/github.py index 3fd8a6bbbf..22d6199ed9 100644 --- a/easybuild/tools/github.py +++ b/easybuild/tools/github.py @@ -50,7 +50,7 @@ def __init__(self, githubuser, reponame, branchname="master", username=None, pas @param token: (optional) a github api token. """ self.log = fancylogger.getLogger(self.__class__.__name__, fname=False) - self.gh = Github(username, password, token) + self.gh = Github(username=username, password=password, token=token) self.githubuser = githubuser self.reponame = reponame self.branchname = branchname From db2695186e0f55b4f212bd12caddbfe662787576 Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Wed, 5 Feb 2014 11:59:04 +0100 Subject: [PATCH 07/10] always set user-agent --- easybuild/tools/agithub.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/easybuild/tools/agithub.py b/easybuild/tools/agithub.py index 4825657178..4244b9d60d 100644 --- a/easybuild/tools/agithub.py +++ b/easybuild/tools/agithub.py @@ -85,8 +85,7 @@ def put(self, url, body=None, headers={}, **params): def request(self, method, url, body, headers): if self.auth_header is not None: headers['Authorization'] = self.auth_header - else: - headers['User-Agent'] = 'easybuild' + headers['User-Agent'] = 'easybuild' fancylogger.getLogger().debug('cli request: %s, %s, %s %s', method, url, body, headers) #TODO: Context manager conn = self.get_connection() From 52c38be415db11705e8897bc77ed1cf7348c17c6 Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Tue, 9 Dec 2014 11:33:27 +0100 Subject: [PATCH 08/10] moved to new vsc-base RestClient --- easybuild/scripts/cpan2eb.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py index d197ec6e41..8d03bff36d 100644 --- a/easybuild/scripts/cpan2eb.py +++ b/easybuild/scripts/cpan2eb.py @@ -1,5 +1,5 @@ ## -# Copyright 2013 Ghent University +# Copyright 2013-2014 Ghent University # # This file is part of EasyBuild, # originally created by the HPC team of Ghent University (http://ugent.be/hpc/en), @@ -29,10 +29,7 @@ @author: Jens Timmerman """ -import sys - -from easybuild.tools.agithub import Client - +from vsc.utils.rest import RestClient from vsc.utils.generaloption import simple_option from vsc.utils import fancylogger @@ -51,12 +48,13 @@ def __init__(self): 'modulename': 'ExtUtils::MakeMaker'} self.cache = {'ExtUtils::MakeMaker': dummy, 'perl': dummy} self.graph = {'ExtUtils::MakeMaker': [], 'perl': []} - self.client = Client('api.metacpan.org') + self.client = RestClient('http://api.metacpan.org') def get_module_data(self, modulename): """Get some metadata about the current version of the module""" - json = self.client.get("/v0/module/%s" % modulename)[1] - depsjson = self.client.get("/v0/release/%(author)s/%(release)s" % json) + json = self.client.v0.module[modulename].get()[1] + #depsjson = self.client.get("/v0/release/%(author)s/%(release)s" % json) + depsjson = self.client.v0.release[json['author']][json['release']].get() depsjson = depsjson[1] depsjson.update(json) depsjson.update({'modulename': modulename}) @@ -80,9 +78,10 @@ def get_recursive_data(self, modulename): for dep in data['dependency']: if "requires" in dep["relationship"]: self.get_recursive_data(dep['module']) - # if for some reason you get to many hits here, you might want to filter on build and confirure in phase: (To be further tested) - # if "build" in dep["phase"] or "configure" in dep["phase"]: - dependencies.add(dep['module']) + # we filter on dependendencies for the build and configure phase, otherwise we end up with circular + # dependencies + if "build" in dep["phase"] or "configure" in dep["phase"]: + dependencies.add(dep['module']) self.graph[modulename] = dependencies return self.graph From 894128d2089235c916342b864b39277bdbb9adab Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Tue, 9 Dec 2014 11:58:49 +0100 Subject: [PATCH 09/10] don't bother to do the recursive thing if we're going to throw away the results anyway + make httperrors print out errors, not crash --- easybuild/scripts/cpan2eb.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/easybuild/scripts/cpan2eb.py b/easybuild/scripts/cpan2eb.py index 8d03bff36d..c32e547294 100644 --- a/easybuild/scripts/cpan2eb.py +++ b/easybuild/scripts/cpan2eb.py @@ -29,6 +29,7 @@ @author: Jens Timmerman """ +from urllib2 import HTTPError from vsc.utils.rest import RestClient from vsc.utils.generaloption import simple_option from vsc.utils import fancylogger @@ -52,9 +53,12 @@ def __init__(self): def get_module_data(self, modulename): """Get some metadata about the current version of the module""" - json = self.client.v0.module[modulename].get()[1] - #depsjson = self.client.get("/v0/release/%(author)s/%(release)s" % json) - depsjson = self.client.v0.release[json['author']][json['release']].get() + try: + json = self.client.v0.module[modulename].get()[1] + depsjson = self.client.v0.release[json['author']][json['release']].get() + except HTTPError: + logger.error("API error for getting %s this will have to be resolved manually", modulename) + return {'release': '0', 'dependency': [] } depsjson = depsjson[1] depsjson.update(json) depsjson.update({'modulename': modulename}) @@ -77,10 +81,10 @@ def get_recursive_data(self, modulename): # do the recursive thing for dep in data['dependency']: if "requires" in dep["relationship"]: - self.get_recursive_data(dep['module']) # we filter on dependendencies for the build and configure phase, otherwise we end up with circular # dependencies - if "build" in dep["phase"] or "configure" in dep["phase"]: + if dep['phase'] in ('build', 'configure') : + self.get_recursive_data(dep['module']) dependencies.add(dep['module']) self.graph[modulename] = dependencies return self.graph From c691d255c6a70f54b902f4c96f80a46c2a384813 Mon Sep 17 00:00:00 2001 From: Jens Timmerman Date: Tue, 9 Dec 2014 13:09:08 +0100 Subject: [PATCH 10/10] fixed typo --- easybuild/tools/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/tools/github.py b/easybuild/tools/github.py index 6fdae943d3..9b0bb0ac52 100644 --- a/easybuild/tools/github.py +++ b/easybuild/tools/github.py @@ -268,7 +268,7 @@ def download(url, path=None): if not sorted(tmp_files) == sorted(all_files): _log.error("Not all patched files were downloaded to %s: %s vs %s" % (path, tmp_files, all_files)) - ec_files = [os.path.join(path, fnanme) for fname in tmp_files] + ec_files = [os.path.join(path, fname) for fname in tmp_files] return ec_files