diff --git a/submit_ce/domain/uploads.py b/submit_ce/domain/uploads.py index 234eeeed..a9b3fc7b 100644 --- a/submit_ce/domain/uploads.py +++ b/submit_ce/domain/uploads.py @@ -148,3 +148,18 @@ def is_file_tgz(file: Optional[SubmitFile]) -> bool: file.content_type in TARGZ_MIMETYPES or bool(file.filename and file.filename.endswith('.tar.gz')) ) + + +ZIP_MIMETYPES = frozenset({ + 'application/zip', + 'application/x-zip-compressed', + 'application/x-zip', +}) +"""zip mime types.""" + +def is_file_zip(file: Optional[SubmitFile]) -> bool: + """Return True if the uploaded file is a zip archive.""" + return bool(file) and ( + file.content_type in ZIP_MIMETYPES or + bool(file.filename and file.filename.endswith('.zip')) + ) diff --git a/submit_ce/implementations/file_store/gs_file_store.py b/submit_ce/implementations/file_store/gs_file_store.py index 5ecc57f6..4123eab3 100644 --- a/submit_ce/implementations/file_store/gs_file_store.py +++ b/submit_ce/implementations/file_store/gs_file_store.py @@ -8,6 +8,7 @@ import io import logging import tarfile +import zipfile from typing_extensions import override from arxiv.files import FileObj, FileDoesNotExist @@ -18,7 +19,7 @@ from submit_ce.api import SubmissionFileStore from submit_ce.domain import Workspace from submit_ce.domain.uploads import UploadLifecycleStates, UploadStatus, FileStatus -from submit_ce.domain.uploads import SubmitFile +from submit_ce.domain.uploads import SubmitFile, is_file_tgz from google.cloud import storage @@ -142,19 +143,36 @@ def store_source_package(self, package_blob.upload_from_file(content.stream, content_type=content.content_type) content.stream.seek(0) - files=[] + files = [] src_dir = self._source_path(submission_id) - with tarfile.open(fileobj=content.stream, mode="r:*") as tar: - for member in tar.getmembers(): - if not member.isfile(): - continue - with tar.extractfile(member) as file: - store_at = str(src_dir / member.name) - self._check_path_safe(submission_id, store_at) # TODO this will be strange, what to do? - blob = self.bucket.blob(store_at) - blob.upload_from_file(file, size=member.size) - files.append({"file": member.name, "bytes": member.size}) + is_zip = (content.content_type in ('application/zip', 'application/x-zip-compressed', 'application/x-zip') + or (content.filename and content.filename.endswith('.zip'))) + + if is_zip: + with zipfile.ZipFile(content.stream) as zf: + for info in zf.infolist(): + if info.is_dir(): + continue + store_at = str(src_dir / info.filename) + self._check_path_safe(submission_id, store_at) + with zf.open(info) as file: + blob = self.bucket.blob(store_at) + blob.upload_from_file(file, size=info.file_size) + files.append({"file": info.filename, "bytes": info.file_size}) + elif is_file_tgz(content): + with tarfile.open(fileobj=content.stream, mode="r:*") as tar: + for member in tar.getmembers(): + if not member.isfile(): + continue + with tar.extractfile(member) as file: + store_at = str(src_dir / member.name) + self._check_path_safe(submission_id, store_at) + blob = self.bucket.blob(store_at) + blob.upload_from_file(file, size=member.size) + files.append({"file": member.name, "bytes": member.size}) + else: + raise ValueError(f"Unsupported source package content type: {content.content_type!r}") return files diff --git a/submit_ce/ui/controllers/new/upload.py b/submit_ce/ui/controllers/new/upload.py index e2fe1915..72eee4e3 100644 --- a/submit_ce/ui/controllers/new/upload.py +++ b/submit_ce/ui/controllers/new/upload.py @@ -37,7 +37,7 @@ from submit_ce.domain.event.file import UploadArchive, UploadFiles from submit_ce.domain.submission import Submission from submit_ce.domain.uploads import SourceFormat -from submit_ce.domain.uploads import Workspace, FileStatus, UploadStatus, is_file_tgz +from submit_ce.domain.uploads import Workspace, FileStatus, UploadStatus, is_file_tgz, is_file_zip from submit_ce.domain.exceptions import SaveError from submit_ce.ui.auth import user_and_client_from_session @@ -68,11 +68,11 @@ def _single_file_archive(files: MultiDict) -> bool: - """Return True if the uploaded file is a tar.gz archive.""" + """Return True if the uploaded file is a tar.gz or zip archive.""" pointer = files.get('file') if pointer is None: return False - return is_file_tgz(pointer) + return is_file_tgz(pointer) or is_file_zip(pointer) class AddfilesForm(csrf.CSRFForm): @@ -154,7 +154,7 @@ def upload_files(method: str, params: MultiDict, session: Session, alerts.flash_failure("No file was uploaded; please try again.") return stay_on_this_stage((rdata, status.OK, {})) - is_archive = "ARCHIVE" if is_file_tgz(file) else "NONARCHIVE" + is_archive = "ARCHIVE" if (is_file_tgz(file) or is_file_zip(file)) else "NONARCHIVE" # TODO not sure if has_files is useful any more. _upload_files can upload with or without files, has_files = submission.uncompressed_size > 0 try: