diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2485343 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +deb/requirements/ +deb/code.zip + +rpm/code.zip + +rpm/__pycache__ +rpm/boto3* +rpm/botocore* +rpm/concurrent* +rpm/docutils* +rpm/dateutil* +rpm/six* +rpm/futures* +rpm/pyrpm* +rpm/tests* +rpm/jmespath* +rpm/python_gnupg* +rpm/python_dateutil* + +rpm/code.zip + +rpm/six.py +rpm/gnupg.py +rpm/gnupg.pyc diff --git a/deb/Makefile b/deb/Makefile new file mode 100644 index 0000000..36fd44f --- /dev/null +++ b/deb/Makefile @@ -0,0 +1,10 @@ +ZIPPED := aptS3.py gnupg.py debian/* + + +all: requires compress + +requires: + pip install -t . -r requirements.txt + +compress: + zip code.zip $(ZIPPED) \ No newline at end of file diff --git a/deb/README.md b/deb/README.md new file mode 100644 index 0000000..c212262 --- /dev/null +++ b/deb/README.md @@ -0,0 +1,80 @@ + +# AWS Lambda APT repository manager for S3 + +Rewrite of [szinck/s3apt](https://github.com/szinck/s3apt) with a few changes and extra features - Release file is being generated and is signed with GPG key provided + +## Setting up S3 and Lambda + +Clone the repo and get all other required files +``` +git clone https://github.com/tactycal/lambdaRepos.git +cd lambdaRepos/deb +pip install -t . -r requirements.txt +``` + +Compress all needed files +``` +zip code.zip aptS3.py gnupg.py debian/* +``` + +Presuming you already have GPG key generated export public key +``` +gpg --export-secret-key > secret.key +``` + +Create new lambda function, set handler to **s3apt.lambda_handler** and the triggers to: + + * Object Created, suffix 'deb' + * Object Removed, suffix 'deb' + * If you are using certain directory as a repo, set it as prefix + +Upload `code.zip` to lambda function + +Set the environment variables + +| Key | Value | +| --- | ---| +| PUBLIC | True/False | +| GPG_KEY | File | +| GPG_PASS | GPG key password | +| BUCKET_NAME | Bucket Name | +| CACHE_PREFIX | Directory | + +**PUBLIC** Set to True for the outputs to be publicly readable + +**GPG_KEY** Location of your GPG private key from root of the bucket (e.g. secret/private.key) + +**GPG_PASS** Password of private key uploaded to GPG_KEY (Note: environental variables are/can be encripted using KMS keys) + +**BUCKET_NAME** Name of the bucket. Should be the same as the one selected in triggers and the one you're using for repository + +**CACHE_PREFIX** Path to folder for packages cache(e.g. deb/cache) + + +Make folder in your S3 bucket with the same name as CACHE_PREFIX variable + +Upload secret key file to location you specified as GPG_KEY + +Upload .deb file to desired folder, lambda function should now keep your repository up to date + +## Setting up apt + +First time set up +``` +sudo echo "deb https://s3.$AWS_SERVER.amazonaws.com/$BUCKET_NAME/$PATH_TO_FOLDER_WITH_DEBIAN_FILES /" > /etc/apt/sources.list +#an example of link "https://s3.eu-central-1.amazonaws.com/testbucket/repo" +#add public key to trusted sources - you have to export public key or use key server +apt-key add +sudo apt update +sudo apt install +``` + +Upgrading package +``` +sudo apt update +sudo apt upgrade +``` + +## Notes + +.deb, Release and Package files are and should be publicly accessible for previously mentioned method of setting up apt's sources list to work, if you don't want them to be, then change PUBLIC in environment variables to False and refer to szinck's guide [here](http://webscale.plumbing/managing-apt-repos-in-s3-using-lambda) diff --git a/deb/requirements.txt b/deb/requirements.txt new file mode 100644 index 0000000..c76b6c9 --- /dev/null +++ b/deb/requirements.txt @@ -0,0 +1,9 @@ +boto3==1.3.1 +botocore==1.4.41 +docutils==0.12 +futures==3.0.5 +jmespath==0.9.0 +python-dateutil==2.5.3 +python-debian==0.1.28 +six==1.10.0 +python-gnupg==0.4.1 \ No newline at end of file diff --git a/deb/s3apt.py b/deb/s3apt.py new file mode 100644 index 0000000..fc4e029 --- /dev/null +++ b/deb/s3apt.py @@ -0,0 +1,278 @@ +from __future__ import print_function +from time import gmtime, strftime +import urllib +import boto3 +import botocore +import tempfile +import tarfile +import debian.arfile +import hashlib +import re +import sys +import os +import gnupg + +def lambda_handler(event, context): + print('Starting lambda function') + #Get bucket and key info + bucket = event['Records'][0]['s3']['bucket']['name'] + key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8') + + if bucket == os.environ['BUCKET_NAME'] and key.endswith(".deb"): + #Build packages file + if event['Records'][0]['eventName'].startswith('ObjectCreated'): + s3 = boto3.resource('s3') + deb_obj = s3.Object(bucket_name=bucket, key=key) + print("S3 Notification of new key. Ensuring cached control data exists: %s" % (str(deb_obj))) + get_cached_control_data(deb_obj) + + prefix = "/".join(key.split('/')[0:-1]) + '/' + #Update packages file + rebuild_package_index(prefix) + + #Build Release file + build_release_file(prefix) + + #Sign Release file + sign_release_file(prefix) + + +def get_cached_control_data(deb_obj): + #gets debian control data + s3 = boto3.resource('s3') + etag = deb_obj.e_tag.strip('"') + + cache_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=os.environ['CACHE_PREFIX'] + '/' + etag) + exists = True + try: + control_data = cache_obj.get()['Body'].read() + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == 'NoSuchKey': + exists = False + else: + raise(e) + + if not exists: + control_data = read_control_data(deb_obj) + cache_obj.put(Body=control_data) + + return control_data + +def read_control_data(deb_obj): + fd, tmp = tempfile.mkstemp() + fh = os.fdopen(fd, "wb") + s3fh = deb_obj.get()['Body'] + size = 1024*1024 + while True: + dat = s3fh.read(size) + fh.write(dat) + if len(dat) < size: + break + fh.close() + + try: + ctrl = get_control_data(tmp) + pkg_rec = format_package_record(ctrl, tmp) + return pkg_rec + finally: + os.remove(tmp) + +def get_control_data(debfile): + ar = debian.arfile.ArFile(debfile) + + control_fh = ar.getmember('control.tar.gz') + + tar_file = tarfile.open(fileobj=control_fh, mode='r:gz') + + # control file can be named different things + control_file_name = [x for x in tar_file.getmembers() if x.name in ['control', './control']][0] + + control_data = tar_file.extractfile(control_file_name).read().strip() + # Strip out control fields with blank values. This tries to allow folded + # and multiline fields to pass through. See the debian policy manual for + # more info on folded and multiline fields. + # https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-binarycontrolfiles + lines = control_data.strip().split("\n") + filtered = [] + for line in lines: + # see if simple field + if re.search(r"^\w[\w\d_-]+\s*:", line): + k, v = line.split(':', 1) + if v.strip() != "": + filtered.append(line) + else: + # otherwise folded or multiline, just pass it through + filtered.append(line) + + return "\n".join(filtered) + +def format_package_record(ctrl, fname): + pkgrec = ctrl.strip().split("\n") + + stat = os.stat(fname) + pkgrec.append("Size: %d" % (stat.st_size)) + + md5, sha1, sha256 = checksums(fname) + pkgrec.append("MD5sum: %s" % (md5)) + pkgrec.append("SHA1: %s" % (sha1)) + pkgrec.append("SHA256: %s" % (sha256)) + + return "\n".join(pkgrec) + +def checksums(fname): + + fh = open(fname, "rb") + + md5 = hashlib.md5() + sha1 = hashlib.sha1() + sha256 = hashlib.sha256() + + size = 1024 * 1024 + while True: + dat = fh.read(size) + md5.update(dat) + sha1.update(dat) + sha256.update(dat) + if len(dat) < size: + break + + fh.close() + + return md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest() + +def rebuild_package_index(prefix): + # Get all .deb keys in directory + # Get the cache entry + # build package file + deb_names = [] + deb_objs = [] + + print("REBUILDING PACKAGE INDEX: %s" % (prefix)) + s3 = boto3.resource('s3') + for obj in s3.Bucket(os.environ['BUCKET_NAME']).objects.filter(Prefix=prefix): + if not obj.key.endswith(".deb"): + continue + deb_objs.append(obj) + deb_names.append(obj.key.split('/')[-1]) + + if not len(deb_objs): + print("NOT BUILDING EMPTY PACKAGE INDEX") + return + + # See if we need to rebuild the package index + metadata_pkghash = get_package_index_hash(prefix) + calcd_pkghash = calc_package_index_hash(deb_names) + print("calcd_pkghash=%s, metadata_pkghash=%s" % (calcd_pkghash, metadata_pkghash)) + if metadata_pkghash == calcd_pkghash: + print("PACKAGE INDEX ALREADY UP TO DATE") + return + + pkginfos = [] + for obj in deb_objs: + print(obj.key) + + pkginfo = get_cached_control_data(obj) + if obj.key.startswith(prefix): + filename = obj.key[len(prefix):] + pkginfo = pkginfo + "\n%s\n" % ("Filename: %s" % filename) + else: + pkginfo = pkginfo + "\n%s\n" % ("Filename: %s" % obj.key) + + pkginfos.append(pkginfo) + + package_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Packages") + print("Writing package index: %s" % (str(package_index_obj))) + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + package_index_obj.put(Body="\n".join(sorted(pkginfos)), Metadata={'packages-hash': calcd_pkghash}, ACL=acl) + + print("DONE REBUILDING PACKAGE INDEX") + +def calc_package_index_hash(deb_names): + """ + Calculates a hash of all the given deb file names. This is deterministic so + we can use it for short-circuiting. + """ + + md5 = hashlib.md5() + md5.update("\n".join(sorted(deb_names))) + return md5.hexdigest() + +def get_package_index_hash(prefix): + """ + Returns the md5 hash of the names of all the packages in the index. This can be used + to detect if all the packages are represented without having to load a control data cache + file for each package.can be used + to detect if all the packages are represented without having to load a control data cache + file for each package. + """ + s3 = boto3.resource('s3') + try: + print("looking for existing Packages file: %sPackages" % prefix) + package_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + 'Packages') + return package_index_obj.metadata.get('packages-hash', None) + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == '404': + return None + else: + raise(e) + +def build_release_file(prefix): + """ + gets info from Package, get the sums and puts them into file + """ + s3 = boto3.client('s3') + release_file = "" + s3.download_file(os.environ['BUCKET_NAME'], prefix + "Packages", '/tmp/Packages') + md5, sha1, sha256 = checksums("/tmp/Packages") + + time = 'Date: ' + strftime("%a, %d %b %Y %X UTC", gmtime()) + stat = os.stat("/tmp/Packages") + release_file = release_file +(time + '\nMD5sum:\n ' + md5) + for i in range(0,17-len(str(stat.st_size))): + release_file = release_file +(' ') + release_file = release_file +(str(stat.st_size) + ' Packages\nSHA1:\n '+sha1 ) + for i in range(0,17-len(str(stat.st_size))): + release_file = release_file +(' ') + release_file = release_file +(str(stat.st_size) + ' Packages\nSHA256:\n '+sha256 ) + for i in range(0,17-len(str(stat.st_size))): + release_file = release_file +(' ') + release_file = release_file +(str(stat.st_size) + ' Packages') + + s3 = boto3.resource('s3') + + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + + release_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Release") + print("Writing Release file: %s" % (str(release_index_obj))) + release_index_obj.put(Body=release_file, ACL=acl) + +def sign_release_file(prefix): + ''' + Using gpg password assigned in env variable `GPG_PASS` and key, which's file directory is + assigned in env variable `GPG_KEY` + ''' + gpg = gnupg.GPG(gnupghome='/tmp/gpgdocs') + s3 = boto3.client('s3') + s3.download_file(os.environ['BUCKET_NAME'], os.environ['GPG_KEY'], '/tmp/gpgdocs/sec.key') + s3.download_file(os.environ['BUCKET_NAME'], prefix + 'Release', '/tmp/gpgdocs/Release') + + sec = gpg.import_keys(open('/tmp/gpgdocs/sec.key').read(-1)) + print("Key import returned: ") + print(sec.results) + stream = open('/tmp/gpgdocs/Release') + signed = gpg.sign_file(stream, passphrase=os.environ['GPG_PASS'], clearsign=True, detach=True, binary=False) + + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + s3 = boto3.resource('s3') + print(signed) + sign_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Release.gpg") + sign_obj.put(Body=str(signed), ACL=acl) \ No newline at end of file diff --git a/rpm/Makefile b/rpm/Makefile new file mode 100644 index 0000000..0ede490 --- /dev/null +++ b/rpm/Makefile @@ -0,0 +1,20 @@ +ZIPPED_FILES := s3rpm.py gnupg.py # files to compress in root of zip +ZIPPED_DIRS := pyrpm # folders to compress to zip +all: requires test package + +help: ## displays this message + @grep -E '^[a-zA-Z_/%\-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +requires: ## installs required packages + pip3 install -t . -r requirements.txt + +package: ## creates zip of code + zip code.zip $(ZIPPED_FILES) + zip -r code.zip $(ZIPPED_DIRS) + +clean: ## cleans up the repository + /bin/rm -rf code.zip __pycache__ boto3* botocore* concurrent* docutils* dateutil* six* futures* pyrpm* tests* jmespath* python_gnupg* python_dateutil* six.py gnupg.py gnupg.pyc + + +test: ## runs the tests + python3.6 s3rpm_test.py \ No newline at end of file diff --git a/rpm/README.md b/rpm/README.md new file mode 100644 index 0000000..44f42d0 --- /dev/null +++ b/rpm/README.md @@ -0,0 +1,171 @@ +# AWS Lambda YUM repository manager for S3 + +Automatic YUM repository building inside S3 bucket using with lambda support + +## Readme contents + +* [Setting up code, S3 and Lambda](#setting-up-code-s3-and-lambda) + * [Getting the code](#getting-the-code) + * [GPG key](#gpg-key) + * [Environmental variables](#environmental-variables) + * [Set up role](#set-up-role) + * [Set up lambda with CLI](#set-up-lambda-with-cli) + * [Set up lambda manually](#set-up-lambda-manually) + * [The triggers](#the-triggers) + * [Set up S3](#set-up-s3) +* [Setting up yum](#setting-up-yum) + * [First time set up](#first-time-set-up) + * [Install/update](#installupdate) +* [Notes](#notes) +* [Tests](#tests) + +## Setting up code, S3 and Lambda + +### Getting the code +Clone the repo, get all other required files and compress them +``` +git clone https://github.com/tactycal/lambdaRepos.git +cd lambdaRepos/rpm +make all +``` + +### GPG key +create your gpg key (skip to exporting your key, if you already have it) +``` +gpg --gen-key +# Follow the instructions +# Create 'RSA and RSA' key - option 1 +# For maxium encryption it is recommended to make 4096 bits long key +# Key should not expire +``` + +export your key + +``` +gpg --export-secret-key -a "User Name" > secret.key # exports secret key to secret.key +``` + +### Set up role + +Create new role with s3 write/read access + +Here is a minimal requirement for the policy that is included in role: +``` +{"Version": "2012-10-17", + "Statement": [ + {"Sid": "", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:PutObjectAcl"], + "Effect": "Allow", + "Resource": "arn:aws:s3:::/*"}]} +``` + +### Environmental variables +These are the environmental variables you will have to set: + +| Key | Value | +| --- | ---| +| PUBLIC | True/False | +| GPG_KEY | File | +| GPG_PASS | GPG key password | +| BUCKET_NAME | Bucket Name | +| REPO_DIR | Directory | +| CACHE | Directory | + +**PUBLIC** Set to True for the outputs to be publicly readable + +**GPG_KEY** Location of your GPG private key from root of the bucket (e.g. secret/private.key). Not providing this variable will cause lambda to skip GPG singing + +**GPG_PASS** Password of private key uploaded to GPG_KEY (Note: environmental variables are/can be encrypted using KMS keys) + +**BUCKET_NAME** Name of the bucket. Should be the same as the one selected in triggers and the one you're using for repository + +**REPO_DIR** Path to repositroy from bucket root. If none is set, it is assumed root of repository is root of the bucket + +**CACHE** Path to cache folder from bucket root (e.g. repo/cache) + +### Set up lambda with CLI + +[Install aws cli](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) + +Create new lambda function: +``` +aws lambda create-function \ + --function-name \ + --zip-file fileb://code.zip \ + --role \ # arn from role with S3 read/write access + --handler s3rpm.lambda_handler \ + --runtime python3.6 \ +# Replace '<...>' with environmental variables + --environment Variables='{PUBLIC=, GPG_KEY=, GPG_PASS=, BUCKET_NAME=, REPO_DIR=, CACHE=}' +``` + +### Set up lambda manually + +If CLI is not your thing, then you can upload code manaully + +Create new lambda function, set handler to **s3rpm.lambda_handler**, runtime to **python 3.6** + +Upload `code.zip` to lambda function + +### The triggers + + * Object Created(All), suffix 'rpm' + * Object Removed(All), suffix 'rpm' + * If you are using certain directory as a repo, set it as prefix + +### Set up S3 +Upload secret key file to location you specified as GPG_KEY + +Upload .rpm file to desired folder, lambda function should now keep your repository up to date + +## Setting up yum + +### First time set up + +create `example.repo` file in `/etc/yum.repos.d/example.repo` +``` +vi /etc/yum.repos.d/example.repo +``` +with following contents: +``` +[reponame] +name=Repo name +baseurl=https://s3.$AWS_SERVER.amazonaws.com/$BUCKET_NAME/$PATH_TO_REPO +enabled=1 +gpgcheck=0 +repo_gpgcheck=1 +gpgkey= +``` + +* You can do `repo_gpgcheck=0` to skip gpg verification when installing packages +* You can do `gpgcheck=1` if you are uploading signed rpm packages(lambda does not sign them, it signs only metadata xml file) + +### Install/update +Install package +``` +sudo yum install +``` + +Upgrading package +``` +sudo yum upgrade +``` + +## Notes + +* .rpm and repodata/* in repository directory are and should be publicly accessible for the + +* Don't forget to increase the timeout of lambda function + +* If somebody tries to inject a malicious rpm file in your repo it will be automaticly added to repository. It is your job to make bucket secure enough for this not to happen.!!! + +## Tests + +To run unit tests: +``` +make requires #gets dependancies +make test #runs the tests +``` diff --git a/rpm/requirements.txt b/rpm/requirements.txt new file mode 100644 index 0000000..8c4541b --- /dev/null +++ b/rpm/requirements.txt @@ -0,0 +1,8 @@ +git+https://github.com/02strich/pyrpm.git@97df294 +python-gnupg==0.4.1 +boto3==1.3.1 +botocore==1.4.41 +docutils==0.12 +futures==3.0.5 +jmespath==0.9.0 +six==1.10.0 diff --git a/rpm/s3rpm.py b/rpm/s3rpm.py new file mode 100644 index 0000000..f5918c2 --- /dev/null +++ b/rpm/s3rpm.py @@ -0,0 +1,221 @@ +from __future__ import print_function +from pyrpm.yum import YumPackage +from pyrpm.tools.createrepo import YumRepository +import boto3 +import os +import botocore +import gnupg +import json +import shutil +def lambda_handler(event, context): + s3 = boto3.client('s3') + + bucket = event['Records'][0]['s3']['bucket']['name'] + key = event['Records'][0]['s3']['object']['key'] + repo = YumRepository('/tmp/repo/') # set repository + prefix = '/'.join(key.split('/')[0:-1])+'/' + + s3_repo_dir = os.environ['REPO_DIR'].strip('/') + + #make sure we are working with correct files + if bucket == os.environ['BUCKET_NAME'] and key.endswith(".rpm") and prefix.startswith(s3_repo_dir): + #check if repodata already exist, if not create new with key file + print('Bucket and key\'s file accepted') + exists = check_bucket_file_existance(s3_repo_dir+'/repodata/repomd.xml') + files = ['repomd.xml', 'primary.xml.gz','filelists.xml.gz', 'other.xml.gz'] + + #make /tmp/repodata path + create_new_dir_if_not_exist(repo.repodir+'/repodata/') + # if repodata files exist download them to /tmp where we can manipulate with them + if exists: + print('repodata already exists, old files will be overwriten') + for f in files: + s3.download_file(os.environ['BUCKET_NAME'], s3_repo_dir+'/repodata/'+f, repo.repodir+'repodata/'+f) + repo.read() + print('Creating Metadata files') + if event['Records'][0]['eventName'].startswith('ObjectCreated'): + repo, cache = check_changed_files(repo, s3_repo_dir, newfile=event['Records'][0]['s3']['object']['key']) + else: + repo, cache = check_changed_files(repo, s3_repo_dir) + #save cache to bucket + s3 = boto3.resource('s3') + f_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=os.environ['CACHE']+'/repo_cache') + print("Writing file: %s" % (str(f_index_obj))) + f_index_obj.put(Body=str(json.dumps(cache))) + + repo.save() + + #sign metadata + if not os.environ['GPG_KEY']=='': + sign_md_file(repo, s3_repo_dir) + + #save files to bucket + for f in files: + with open(repo.repodir+'repodata/'+f, 'rb') as g: + f_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=s3_repo_dir+'/repodata/'+f) + print("Writing file: %s" % (str(f_index_obj))) + f_index_obj.put(Body=g.read(-1), ACL=get_public()) + + #Let us clean up + shutil.rmtree(repo.repodir) + if os.path.exists('/tmp/gpgdocs'): + shutil.rmtree('/tmp/gpgdocs') + + print('METADATA GENERATION COMPLETED') + +def create_new_dir_if_not_exist(path): + """ + Creates dir at 'path' if it does not exist + + returns true on success + returns false if dir already exists + """ + if not os.path.exists(path): + os.makedirs(path) + return True + else: + return False +def check_bucket_file_existance(path): + """ + checks if file exsist in bucket + + returns bool + """ + s3 = boto3.resource('s3') + try: + s3.Object(os.environ['BUCKET_NAME'], path).load() + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == "404": + return False + else: + raise e + else: + return True + +def get_public(): + """ + If env variable PUBLIC is set to true returns 'public-read', else returns 'private' + """ + if os.environ['PUBLIC'] == 'True' : + acl = 'public-read' + else: + acl = 'private' + return acl + +def get_cache(repo, s3_repo_dir): + """ + Check for cache file + """ + if check_bucket_file_existance(os.environ['CACHE']+'/repo_cache'): + print('Repodata cache (%s) found, attempting to write to it' %(os.environ['CACHE']+'/repo_cache')) + s3 = boto3.client('s3') + s3.download_file(os.environ['BUCKET_NAME'], os.environ['CACHE']+'/repo_cache', repo.repodir + 'repo_cache') + with open(repo.repodir + 'repo_cache', 'r') as f: + cache = json.loads(f.read(-1)) + else: + print('repodata_cache file doesn\'t exist. Creating new one') + cache = {} + return cache + +def remove_overwritten_file_from_cache(cache, newfile, s3_repo_dir, repo): + """ + remove pkg from metadata and repo + """ + fname = newfile[len(s3_repo_dir):] # '/filename.rpm' - without path + print('file %s has been overwritten and will be removed from md and repo' % (fname)) + pkg_id = cache[fname] + del cache[fname] + + # save cache in case new event occurs + s3 = boto3.resource('s3') + f_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=os.environ['CACHE']+'/repo_cache') + f_index_obj.put(Body=str(json.dumps(cache))) + + repo.remove_package(pkg_id) + return cache + + +def check_changed_files(repo, s3_repo_dir,newfile=None): + """ + check if there are any new files in bucket or any deleted files + """ + print("Checking for changes : %s" % (s3_repo_dir)) + cache = get_cache(repo, s3_repo_dir) + s3 = boto3.resource('s3') + files = [] + #if file was overwriten and is in repocache then remove it from cache, so next for loop will add back the new + if newfile != None and newfile[len(s3_repo_dir):] in cache: + cache = remove_overwritten_file_from_cache(cache, newfile, s3_repo_dir, repo) + #cycle through all objects ending with .rpm in REPO_DIR and check if they are already in repodata, if not add them + for obj in s3.Bucket(os.environ['BUCKET_NAME']).objects.filter(Prefix=s3_repo_dir): + files.append(obj.key) + if not obj.key.endswith(".rpm"): + print('skipping %s - not rpm file' %(obj.key)) + continue + fname = obj.key[len(s3_repo_dir):] # '/filename.rpm' - without path + if fname not in cache: + s3c = boto3.client('s3') + #Create path to folder where to download file, if it not yet exists + prefix = '/'.join(obj.key.split('/')[0:-1])[len(s3_repo_dir):] + create_new_dir_if_not_exist(repo.repodir+prefix) + #Download file to repodir + path = repo.repodir + fname + s3c.download_file(os.environ['BUCKET_NAME'], obj.key, path) + with open(path, 'rb') as f: + package = YumPackage(f) + #add package to repo and cache + repo.add_package(package) + cache[fname] = package.checksum + print('File %s added to metadata'%(obj.key)) + else: + print('File %s is already in metadata'%(obj.key)) + + removedPkgs = [] + for f in cache: + if f.endswith('.rpm') and s3_repo_dir+f not in files: + print('removing ' +f) + repo = remove_pkg(repo, cache, f) + removedPkgs.append(f) + + for removed in removedPkgs: + del cache[removed] + return repo, cache + +def remove_pkg(repo, cache, key): + """ + remove package from metadata + """ + prefix = '/'.join(key.split('/')[0:-1]) + filename = key[len(prefix):] + if filename in cache: + repo.remove_package(cache[filename]) + print('%s has been removed from metadata' % (filename)) + else: + print('Tried to delete %s entry but was not found in cache' % (filename)) + return repo + +def sign_md_file(repo, s3_repo_dir): + ''' + Using gpg password assigned in env variable `GPG_PASS` and key, which's file directory is + assigned in env variable `GPG_KEY` + ''' + gpg = gnupg.GPG(gnupghome='/tmp/gpgdocs') + s3 = boto3.client('s3') + s3.download_file(os.environ['BUCKET_NAME'], os.environ['GPG_KEY'], '/tmp/gpgdocs/sec.key') + + with open('/tmp/gpgdocs/sec.key') as stream: + sec = gpg.import_keys(stream.read(-1)) + print("Key import returned: ") + print(str(sec.results)) + with open(repo.repodir + 'repodata/repomd.xml', 'rb') as stream: + # If gpgkey has no password set, leaving GPG_PASS empty caues badsign, + # that is why we are not calling passphrase in gpg.sign_file + if os.environ['GPG_PASS'] == '': + signed = gpg.sign_file(stream, clearsign=True, detach=True, binary=False) + else: + signed = gpg.sign_file(stream, passphrase=os.environ['GPG_PASS'], clearsign=True, detach=True, binary=False) + + s3 = boto3.resource('s3') + sign_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=s3_repo_dir + "/repodata/repomd.xml.asc") + print('uploading repomd.xml.asc to /repodata') + sign_obj.put(Body=str(signed), ACL=get_public()) \ No newline at end of file diff --git a/rpm/s3rpm_test.py b/rpm/s3rpm_test.py new file mode 100644 index 0000000..b5ead2a --- /dev/null +++ b/rpm/s3rpm_test.py @@ -0,0 +1,185 @@ +import unittest +from unittest.mock import MagicMock +from unittest.mock import mock_open +from unittest.mock import patch +from unittest.mock import PropertyMock + +import s3rpm + +import botocore +import os +import json +import shutil + +class SubFunctionsTest(unittest.TestCase): + + def setUp(self): + os.environ['BUCKET_NAME'] = 'bucket' + os.environ['REPO_DIR'] = 'test_s3rpm' + os.environ['GPG_KEY'] = '' + os.environ['PUBLIC'] = 'True' + os.environ['GPG_PASS']='123' + os.environ['CACHE']='test_s3rpm' + + def tearDown(self): + if os.path.exists('test_s3rpm'): + shutil.rmtree('test_s3rpm') + + def test_public_private(self): + os.environ['PUBLIC'] = 'True' + self.assertEqual(s3rpm.get_public(), 'public-read') + + os.environ['PUBLIC'] = '' + self.assertEqual(s3rpm.get_public(), 'private') + + os.environ['PUBLIC'] = 'False' + self.assertEqual(s3rpm.get_public(), 'private') + + + @patch('s3rpm.boto3') + def test_file_existance(self, s3_mock): + ret = s3rpm.check_bucket_file_existance('path') + self.assertEqual(ret, True) + s3_mock.resource().Object.assert_called_with("bucket", "path") + s3_mock.resource().Object().load.assert_called_with() + + #404 error + p = PropertyMock(side_effect=botocore.exceptions.ClientError({'Error':{'Code': '404','Message':'no msg'}}, 'aa')) + s3_mock.resource().Object().load = p + + ret = s3rpm.check_bucket_file_existance('path') + self.assertEqual(ret, False) + #non404 error + p = PropertyMock(side_effect=botocore.exceptions.ClientError({'Error':{'Code': '403','Message':'no msg'}}, 'aa')) + s3_mock.resource().Object().load = p + + with self.assertRaises(botocore.exceptions.ClientError): + s3rpm.check_bucket_file_existance('path') + + @patch('s3rpm.YumRepository') + @patch('s3rpm.boto3') + @patch('s3rpm.check_bucket_file_existance') + def test_cache(self, check_mock, s3_mock, yum_mock): + yum_mock = MagicMock(repodir='test_s3rpm/') + cache = '{"pkgname" : "ID"}' + repo = yum_mock + m = mock_open(read_data=cache) + check_mock.return_value = True + + with patch('s3rpm.open', m): + cachenew = s3rpm.get_cache(repo, os.environ['REPO_DIR']) + s3_mock.client().download_file.assert_called_with('bucket', 'test_s3rpm/repo_cache', 'test_s3rpm/repo_cache') + self.assertEqual(json.loads(cache), cachenew) + + check_mock.return_value = False + + cachenew = s3rpm.get_cache(repo,os.environ['REPO_DIR']) + self.assertEqual(cachenew, {}) + + @patch('s3rpm.YumRepository') + @patch('s3rpm.YumPackage') + @patch('s3rpm.get_cache') + @patch('s3rpm.boto3') + def test_new_files(self, s3_mock, cache_mock, yump_mock, yum_mock): + + cache_mock.return_value = {"/pkgname-0.3.7-x86_64.rpm": "test_id1"} + yum_mock = MagicMock(repodir='test_s3rpm/') + repo = yum_mock + yump_mock.return_value = MagicMock(checksum='test_id2') + cache = {"/pkgname-0.3.7-x86_64.rpm": "test_id1", "/pkgname-0.3.8-x86_64.rpm": "test_id2"} + + + s3_mock.resource().Bucket().objects.filter.return_value = [MagicMock(key='test.file'),MagicMock(key='test_s3rpm/pkgname-0.3.8-x86_64.rpm'), MagicMock(key='test_s3rpm/pkgname-0.3.7-x86_64.rpm')] + m = mock_open(read_data='') + with patch('s3rpm.open', m): + reponew, cachenew = s3rpm.check_changed_files(repo, os.environ['REPO_DIR']) + + self.assertEqual(cache, cachenew) + self.assertEqual(yum_mock.add_package.call_count, 1) + + + @patch('s3rpm.YumRepository') + @patch('s3rpm.get_cache') + @patch('s3rpm.boto3') + def test_delete_files(self, s3_mock, cache_mock, yum_mock): + cache_mock.return_value = {"/pkgname-0.3.7-x86_64.rpm": "test_id1"} + yum_mock = MagicMock(repodir='test_s3rpm/') + repo = yum_mock + cache = {} + + s3_mock.resource().Bucket().objects.filter.return_value = [MagicMock(key='test.file')] + _, cachenew = s3rpm.check_changed_files(repo, os.environ['REPO_DIR']) + self.assertEqual(cache, cachenew) + self.assertEqual(yum_mock.remove_package.call_count, 1) + + @patch('s3rpm.YumRepository') + @patch('s3rpm.gnupg') + @patch('s3rpm.boto3') + def test_gpg(self, s3_mock, gpg_mock, yum_mock): + os.environ['GPG_KEY'] = 'KeyNowExists' + m = mock_open() + repo = yum_mock() + with patch('s3rpm.open', m): + s3rpm.sign_md_file(repo, os.environ['REPO_DIR']) + gpg_mock.GPG().sign_file.assert_called_with(s3rpm.open(), binary=False, clearsign=True, detach=True, passphrase='123') + s3_mock.resource().Object().put.assert_called_with(ACL='public-read', Body=str(gpg_mock.GPG().sign_file())) + + def test_create_dir(self): + ret = s3rpm.create_new_dir_if_not_exist('test_s3rpm/testfolder') + self.assertEqual(True, ret) + ret = s3rpm.create_new_dir_if_not_exist('test_s3rpm/testfolder') + self.assertEqual(False, ret) + + +class HandlerTest(unittest.TestCase): + def setUp(self): + os.environ['BUCKET_NAME'] = 'bucket' + os.environ['REPO_DIR'] = 'test_s3rpm' + os.environ['GPG_KEY'] = '' + os.environ['PUBLIC'] = 'True' + os.environ['GPG_PASS']='123' + os.environ['CACHE']='test_s3rpm' + self.m = mock_open(read_data='') + + def tearDown(self): + if os.path.exists('test_s3rpm'): + shutil.rmtree('test_s3rpm') + @patch('s3rpm.get_cache') + @patch('s3rpm.YumRepository') + @patch('s3rpm.boto3') + def test_defined_repodir(self, s3_mock, yum_mock, cache_mock): + cache_mock.return_value = {"pkgname":"ID"} + + yum_mock.return_value = MagicMock(repodir='test_s3rpm/') + with patch('s3rpm.open', self.m): + s3rpm.lambda_handler(S3_EVENT, {}) + self.assertEqual(len(s3_mock.resource().Object().put.mock_calls), 5) + self.assertEqual(os.environ['REPO_DIR'],'test_s3rpm') + + @patch('s3rpm.gnupg') + @patch('s3rpm.shutil') + @patch('s3rpm.get_cache') + @patch('s3rpm.YumRepository') + @patch('s3rpm.boto3') + def test_gpg_from_handler(self, s3_mock, yum_mock, cache_mock, sh_mock, gpg_mock): + cache_mock.return_value = {"pkgname":"ID"} + + os.environ['GPG_KEY'] = 'KeyNowExists' + check = MagicMock() + check.return_value = False + yum_mock.return_value = MagicMock(repodir='test_s3rpm/testrepo/') + with patch('s3rpm.open', self.m): + with patch('s3rpm.check_bucket_file_existance', check): + s3rpm.lambda_handler(S3_EVENT, {}) + gpg_mock.GPG().sign_file.assert_called_with(s3rpm.open(), binary=False, clearsign=True, detach=True, passphrase='123') + assert os.path.exists('test_s3rpm/testrepo/') == True + + @patch('s3rpm.boto3') + def test_bad_bucket_name(self, s3_mock): + os.environ['BUCKET_NAME'] = 'iamfakebucket' + s3rpm.lambda_handler(S3_EVENT, {}) + s3_mock.client.assert_called_with('s3') + self.assertEqual(len(s3_mock.resource().Object().put.mock_calls), 0) +S3_EVENT = {"Records":[{"s3": {"object": {"key": "test_s3rpm/repo/pkgname-0.3.8-x86_64.rpm",},"bucket": {"name": "bucket",},},"eventName": "ObjectCreated:*"}]} +if __name__ == '__main__': + unittest.main()