Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ github_pages_url <- description$GITHUB_PAGES

<p style="font-size: 16px;"><em>Public Database Submission Pipeline</em></p>

**Beta Version**: v1.3.2. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!
**Beta Version**: v1.3.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!

**General Disclaimer**: This repository was created for use by CDC programs to collaborate on public health related projects in support of the [CDC mission](https://www.cdc.gov/about/organization/mission.htm). GitHub is not hosted by the CDC, but is a third party website used by CDC and its partners to share information and collaborate on software. CDC use of GitHub does not imply an endorsement of any one particular service, product, or enterprise.

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<!-- ![GitHub last commit](https://img.shields.io/github/last-commit/montilab/cadra) -->

**Beta Version**: 1.3.2. This pipeline is currently in Beta testing, and
**Beta Version**: 1.3.3. This pipeline is currently in Beta testing, and
issues could appear during submission. Please use it at your own risk.
Feedback and suggestions are welcome\!

Expand Down
7 changes: 7 additions & 0 deletions argument_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ def args_parser():
description="Downloads the BioSample Package XML from NCBI and updates SeqSender's metadata schema options for the BioSample database."
)

# check_submission_status command
test_network_module = subparser_modules.add_parser(
"test_network_connection",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Runs a series of test network connections to NCBI and GISAID to troubleshoot submission issues."
)

# version command
version_module = subparser_modules.add_parser(
"version",
Expand Down
2 changes: 1 addition & 1 deletion config/genbank/genbank_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
description="Optional internal field for how the GenBank submission should be named when viewed from the NCBI submission portal, . If not provided, when performing submissions <--submission_name> with the suffix \"-GB\" will be used instead.",
title="genbank submission portal title",
),
"sra-comment": Column(
"gb-comment": Column(
dtype="object",
checks=[
Check(lambda s: s.nunique() == 1),
Expand Down
2 changes: 1 addition & 1 deletion docs/app.json

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions ncbi_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

# Local imports
import tools
import setup

# Process NCBI Report file
def get_ncbi_report(database: str, submission_name: str, submission_dir: str, config_dict: Dict[str, Any], submission_type: str) -> Optional[str]:
Expand Down Expand Up @@ -63,8 +64,16 @@ def create_submit_ready_file(ftp, submission_dir: str):
return ftp

def ncbi_login(config_dict: Dict[str, Any]):
ftp = ftplib.FTP(NCBI_FTP_HOST)
ftp.login(user=config_dict["Username"], passwd=config_dict["Password"])
try:
ftp = ftplib.FTP(NCBI_FTP_HOST)
ftp.login(user=config_dict["Username"], passwd=config_dict["Password"])
except ftplib.error_perm as err:
print(f"Error: login error. Possible incorrect credentials for NCBI FTP site in config file. \nException{err}", file=sys.stderr)
except Exception as err:
print("Error unable to connect to FTP site. Running network test...", file=sys.stderr)
setup.test_internet_connection(databases=["NCBI"])
print(f"Exception: {err}", file=sys.stderr)
sys.exit(1)
return ftp

def ftp_upload_file(ftp, upload_file: str, upload_name: Optional[str] = None):
Expand Down
2 changes: 2 additions & 0 deletions seqsender.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ def main():
elif command == "update_biosample":
print("Updating BioSample requirements.", file=sys.stdout)
setup.download_biosample_xml_list()
elif command == "test_network_connection":
setup.test_internet_connection(databases=["GENERAL","NCBI","GISAID"])
else:
# If no command display help
parser.print_help()
Expand Down
2 changes: 1 addition & 1 deletion settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
PROG_DIR: str = os.path.dirname(os.path.abspath(__file__))

# SeqSender version
VERSION: str = "1.3.2 (Beta)"
VERSION: str = "1.3.3 (Beta)"

# Organism options with unique submission options
ORGANISM_CHOICES: List[str] = ["FLU", "COV", "POX", "ARBO", "RSV", "OTHER"]
Expand Down
56 changes: 53 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import sys
from zipfile import ZipFile
import ftplib
import io
import os
import json
import subprocess
import socket
import pandas as pd
import shutil
import platform
from urllib.request import urlopen
import urllib
import gzip
import stat
import requests
Expand All @@ -23,6 +25,7 @@
# Local imports
sys.path.insert(0, str(pathlib.Path(__file__).parent))
import tools
from settings import NCBI_FTP_HOST

# Get program directory
PROG_DIR: str = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -95,6 +98,14 @@
title=\"biosample submission portal description\",
)"""

TEST_CONNECTIONS = {"HTTP": {"website":"http://www.google.com", "database": "GENERAL", "error_msg": "Possible internet connectivity issues; unable to connect to 'http://www.google.com'."},
"HTTPS": {"website": "https://www.google.com", "database": "GENERAL", "error_msg": "Possible internet connectivity issues; unable to connect to 'https://www.google.com'."},
"NCBI": {"website": "https://www.ncbi.nlm.nih.gov", "database": "NCBI", "error_msg": "Unable to connect to 'https://www.ncbi.nlm.nih.gov'; ensure NCBI services are running and you are able to connect to them before proceeding."},
"NCBI API": {"website": "https://submit.ncbi.nlm.nih.gov", "database": "NCBI", "error_msg": "Unable to connect to 'https://submit.ncbi.nlm.nih.gov'; ensure NCBI services are running and you are able to connect to them before proceeding."},
"GISAID": {"website": "https://www.epicov.org/epi3/start", "database": "GISAID", "error_msg": "Unable to connect to 'https://www.epicov.org/epi3'; ensure GISAID services are running and you are able to connect to them before proceeding."},
"GISAID": {"website": "https://gisaid.org/", "database": "GISAID", "error_msg": "Unable to connect to 'https://www.epicov.org/epi3'; ensure GISAID services are running and you are able to connect to them before proceeding."}
}

# Create example data for testing
def create_test_data(organism: str, database: List[str], submission_dir: str) -> None:
if organism not in ["FLU", "COV"]:
Expand Down Expand Up @@ -154,7 +165,7 @@ def download_table2asn(table2asn_dir: str) -> None:
# Determine which platform to download table2asn
if platform.system() == "Windows":
zip_url = "https://ftp.ncbi.nlm.nih.gov/asn1-converters/by_program/table2asn/win64.table2asn.zip"
with urlopen(zip_url) as zip_response:
with urllib.request.urlopen(zip_url) as zip_response:
with ZipFile(BytesIO(zip_response.read())) as zip_file:
zip_file.extractall(table2asn_dir)
return
Expand All @@ -168,7 +179,7 @@ def download_table2asn(table2asn_dir: str) -> None:
# Extract table2asn to tmp folder
try:
with open(table2asn_dir, "wb") as file:
with urlopen(zip_url) as zip_response:
with urllib.request.urlopen(zip_url) as zip_response:
file.write(gzip.decompress(zip_response.read()))
st = os.stat(table2asn_dir)
os.chmod(table2asn_dir, st.st_mode | stat.S_IXOTH | stat.S_IRWXU)
Expand Down Expand Up @@ -309,3 +320,42 @@ def biosample_package_to_pandera_schema(xml_file: str, name: str) -> None:
indentation = indentation[:-1]
file.write(indentation + ")")
os.remove(xml_file)

def test_internet_connection(databases: List[str]) -> None:
error_list = []
print("Checking network settings...", file=sys.stdout)
for test, info in TEST_CONNECTIONS.items():
if info["database"] == "GENERAL" or info["database"] in databases:
print(f"Checking {test} connection...", file=sys.stdout)
try:
query = requests.get(info['website'])
response = query.status_code
except Exception as e:
error_list.append(f"{test} connectivity test failed for '{info['website']}'. Check possible firewall issues. \nException:{e}")
if response in (200, 204, 301, 302):
print(f"{test} '{info['website']}' connectivity test ok.", file=sys.stdout)
else:
error_list.append(f"{info['error_msg']} Error code received:'{response}'")
if "NCBI" in databases:
print("Checking DNS resolution for FTP site...", file=sys.stdout)
try:
ip_address = socket.gethostbyname(NCBI_FTP_HOST)
except Exception as e:
error_list.append(f"Unable to reach '{NCBI_FTP_HOST}'; possible DNS error. \nException:{e}")
if not ip_address:
error_list.append(f"Unable to resolve address for '{NCBI_FTP_HOST}'; check DNS server settings for possible issues.")
else:
print(f"DNS resolution test ok. Able to reach ('{NCBI_FTP_HOST} -> {ip_address})", file=sys.stdout)
print("Checking port status...", file=sys.stdout)
try:
ftp = ftplib.FTP()
ftp.connect(NCBI_FTP_HOST, 21, timeout=10)
ftp.quit()
print(f"{NCBI_FTP_HOST} open on port 21.", file=sys.stdout)
except Exception as e:
error_list.append(f"Port 21 not open for {NCBI_FTP_HOST}. Check possible firewall/server issues. \nException:{e}")
if error_list:
for error_string in error_list:
print(error_string, file=sys.stderr)
else:
print("No network connection issues detected.", file=sys.stdout)
19 changes: 18 additions & 1 deletion shiny/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
header = (
ui.card_header(
ui.HTML(
"""<p><strong>Beta Version</strong>: 1.3.2. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!</p>"""
"""<p><strong>Beta Version</strong>: 1.3.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!</p>"""
)
),
)
Expand Down Expand Up @@ -583,6 +583,8 @@
shiny_tools.command_accordion_panel("test_data", description=" command is used generate test data for seqsender, to be used for testing the prep and submit commands."),
# Update biosample command
shiny_tools.command_accordion_panel("update_biosample", description=" command is used to update biosample schema options based on available BioSample Packages."),
# Update biosample command
shiny_tools.command_accordion_panel("test_network_connection", description=" command is used to run a series of test network connections to NCBI and GISAID to troubleshoot submission issues."),
# version command
shiny_tools.command_accordion_panel("version", description=" command prints the current seqsender version."),
),
Expand All @@ -609,6 +611,8 @@
ui.nav_panel("Output Files", output_body),
ui.nav_panel("Commands", commands_body),
# ui.nav_panel("FAQ", faq_body),
ui.nav_spacer(),
ui.nav_control(ui.a("GitHub", href="https://github.com/CDCgov/seqsender/", target="_blank")),
selected="SeqSender",
header=header,
footer=footer,
Expand Down Expand Up @@ -668,6 +672,19 @@ def read_biosample_file():
@reactive.file_reader(dir / "templates/")
def read_genbank_file():
df = pd.read_csv(dir / "templates/config.genbank.genbank.schema_template.csv", index_col = "column_name")
if input.GenBank_schemas() == "FLU":
src_df = pd.read_csv(dir / "templates/config.genbank.genbank.flu.src.schema_template.csv", index_col = "column_name")
else:
src_df = pd.read_csv(dir / "templates/config.genbank.genbank.src.schema_template.csv", index_col = "column_name")
cmt_df = pd.DataFrame({
"column_name": ["cmt-StructuredCommentPrefix", "cmt-StructuredCommentSuffix", "cmt-Assembly Method"],
"required_column": ["Required", "Required", "Required"],
"description": ["Structured comment keyword. ONLY REQUIRED IF INCLUDING COMMENT FILE. For FLU use 'FluData', HIV use 'HIV-DataBaseData', and for COV and other organisms use 'Assembly-Data'.",
"Structured comment keyword. ONLY REQUIRED IF INCLUDING COMMENT FILE. For FLU use 'FluData', HIV use 'HIV-DataBaseData', and for COV and other organisms use 'Assembly-Data'.",
"ONLY REQUIRED IF INCLUDING COMMENT FILE. Process used to assemble genome."]
})
cmt_df = cmt_df.set_index("column_name")
df = pd.concat([df, src_df, cmt_df])
df = df.fillna("")
df = df.transpose()
return df
Expand Down
2 changes: 1 addition & 1 deletion shiny/templates/config.genbank.genbank.schema_template.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ sequence_name,Required,Sequence identifier used in fasta file. This is used to c
gb-sample_name,Required,Identifier name used for GenBank. Max length is 50 characters.
gb-fasta_definition_line_modifiers,Optional,"NCBI fasta definition line modifiers can be added here. As many modifiers as you like can be added, but each must bounded by a set of brackets. Some of the available keywords are listed at ""https://www.ncbi.nlm.nih.gov/genbank/mods_fastadefline/""."
gb-title,Optional,"Optional internal field for how the GenBank submission should be named when viewed from the NCBI submission portal, . If not provided, when performing submissions <--submission_name> with the suffix ""-GB"" will be used instead."
sra-comment,Optional,Optional internal field explaining the purpose of the submission for when interacting and resolving submission issues with NCBI.
gb-comment,Optional,Optional internal field explaining the purpose of the submission for when interacting and resolving submission issues with NCBI.