Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions submit_ce/domain/uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,18 @@ def is_file_tgz(file: Optional[SubmitFile]) -> bool:
file.content_type in TARGZ_MIMETYPES or
bool(file.filename and file.filename.endswith('.tar.gz'))
)


ZIP_MIMETYPES = frozenset({
'application/zip',
'application/x-zip-compressed',
'application/x-zip',
})
"""zip mime types."""

def is_file_zip(file: Optional[SubmitFile]) -> bool:
"""Return True if the uploaded file is a zip archive."""
return bool(file) and (
file.content_type in ZIP_MIMETYPES or
bool(file.filename and file.filename.endswith('.zip'))
)
42 changes: 30 additions & 12 deletions submit_ce/implementations/file_store/gs_file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import io
import logging
import tarfile
import zipfile
from typing_extensions import override

from arxiv.files import FileObj, FileDoesNotExist
Expand All @@ -18,7 +19,7 @@
from submit_ce.api import SubmissionFileStore
from submit_ce.domain import Workspace
from submit_ce.domain.uploads import UploadLifecycleStates, UploadStatus, FileStatus
from submit_ce.domain.uploads import SubmitFile
from submit_ce.domain.uploads import SubmitFile, is_file_tgz

from google.cloud import storage

Expand Down Expand Up @@ -142,19 +143,36 @@ def store_source_package(self,
package_blob.upload_from_file(content.stream, content_type=content.content_type)

content.stream.seek(0)
files=[]
files = []
src_dir = self._source_path(submission_id)

with tarfile.open(fileobj=content.stream, mode="r:*") as tar:
for member in tar.getmembers():
if not member.isfile():
continue
with tar.extractfile(member) as file:
store_at = str(src_dir / member.name)
self._check_path_safe(submission_id, store_at) # TODO this will be strange, what to do?
blob = self.bucket.blob(store_at)
blob.upload_from_file(file, size=member.size)
files.append({"file": member.name, "bytes": member.size})
is_zip = (content.content_type in ('application/zip', 'application/x-zip-compressed', 'application/x-zip')
or (content.filename and content.filename.endswith('.zip')))

if is_zip:
with zipfile.ZipFile(content.stream) as zf:
for info in zf.infolist():
if info.is_dir():
continue
store_at = str(src_dir / info.filename)
self._check_path_safe(submission_id, store_at)
with zf.open(info) as file:
blob = self.bucket.blob(store_at)
blob.upload_from_file(file, size=info.file_size)
files.append({"file": info.filename, "bytes": info.file_size})
elif is_file_tgz(content):
with tarfile.open(fileobj=content.stream, mode="r:*") as tar:
for member in tar.getmembers():
if not member.isfile():
continue
with tar.extractfile(member) as file:
store_at = str(src_dir / member.name)
self._check_path_safe(submission_id, store_at)
blob = self.bucket.blob(store_at)
blob.upload_from_file(file, size=member.size)
files.append({"file": member.name, "bytes": member.size})
else:
raise ValueError(f"Unsupported source package content type: {content.content_type!r}")

return files

Expand Down
8 changes: 4 additions & 4 deletions submit_ce/ui/controllers/new/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from submit_ce.domain.event.file import UploadArchive, UploadFiles
from submit_ce.domain.submission import Submission
from submit_ce.domain.uploads import SourceFormat
from submit_ce.domain.uploads import Workspace, FileStatus, UploadStatus, is_file_tgz
from submit_ce.domain.uploads import Workspace, FileStatus, UploadStatus, is_file_tgz, is_file_zip
from submit_ce.domain.exceptions import SaveError

from submit_ce.ui.auth import user_and_client_from_session
Expand Down Expand Up @@ -68,11 +68,11 @@


def _single_file_archive(files: MultiDict) -> bool:
"""Return True if the uploaded file is a tar.gz archive."""
"""Return True if the uploaded file is a tar.gz or zip archive."""
pointer = files.get('file')
if pointer is None:
return False
return is_file_tgz(pointer)
return is_file_tgz(pointer) or is_file_zip(pointer)


class AddfilesForm(csrf.CSRFForm):
Expand Down Expand Up @@ -154,7 +154,7 @@ def upload_files(method: str, params: MultiDict, session: Session,
alerts.flash_failure("No file was uploaded; please try again.")
return stay_on_this_stage((rdata, status.OK, {}))

is_archive = "ARCHIVE" if is_file_tgz(file) else "NONARCHIVE"
is_archive = "ARCHIVE" if (is_file_tgz(file) or is_file_zip(file)) else "NONARCHIVE"
# TODO not sure if has_files is useful any more. _upload_files can upload with or without files,
has_files = submission.uncompressed_size > 0
try:
Expand Down
Loading