diff --git a/src-executables/Main-trident.hs b/src-executables/Main-trident.hs index 8d28b09b..a52d5877 100644 --- a/src-executables/Main-trident.hs +++ b/src-executables/Main-trident.hs @@ -273,7 +273,7 @@ serveOptParser = ServeOptions <$> parseArchiveConfig jannocoalesceOptParser :: OP.Parser JannoCoalesceOptions jannocoalesceOptParser = JannoCoalesceOptions <$> parseJannocoalSourceSpec <*> parseJannocoalTargetFile - <*> parseJannocoalOutSpec + <*> parseJannocoalOutFile <*> parseJannocoalJannoColumns <*> parseJannocoalOverride <*> parseJannocoalSourceKey diff --git a/src/Poseidon/CLI/Jannocoalesce.hs b/src/Poseidon/CLI/Jannocoalesce.hs index 967124c8..2b3ede00 100644 --- a/src/Poseidon/CLI/Jannocoalesce.hs +++ b/src/Poseidon/CLI/Jannocoalesce.hs @@ -10,6 +10,8 @@ import Poseidon.Package (PackageReadOptions (..), defaultPackageReadOptions, getJointJanno, readPoseidonPackageCollection) +import Poseidon.PoseidonVersion (VersionedFile (..), + latestPoseidonVersion) import Poseidon.Utils (PoseidonException (..), PoseidonIO, logDebug, logInfo, logWarning) @@ -22,13 +24,12 @@ import qualified Data.HashMap.Strict as HM import qualified Data.IORef as R import Data.List ((\\)) import Data.Text (pack, replace, unpack) -import Poseidon.PoseidonVersion (latestPoseidonVersion) import System.Directory (createDirectoryIfMissing) import System.FilePath (takeDirectory) import Text.Regex.TDFA ((=~)) -- the source can be a single janno file, or a set of base directories as usual. -data JannoSourceSpec = JannoSourceSingle FilePath | JannoSourceBaseDirs [FilePath] +data JannoSourceSpec = JannoSourceSingle VersionedFile | JannoSourceBaseDirs [FilePath] data CoalesceJannoColumnSpec = AllJannoColumns @@ -37,8 +38,8 @@ data CoalesceJannoColumnSpec = data JannoCoalesceOptions = JannoCoalesceOptions { _jannocoalesceSource :: JannoSourceSpec - , _jannocoalesceTarget :: FilePath - , _jannocoalesceOutSpec :: Maybe FilePath -- Nothing means "in place" + , _jannocoalesceTarget :: VersionedFile + , _jannocoalesceOutFile :: FilePath , _jannocoalesceJannoColumns :: CoalesceJannoColumnSpec , _jannocoalesceOverwriteColumns :: Bool , _jannocoalesceSourceKey :: String -- by default set to "Poseidon_ID" @@ -47,9 +48,9 @@ data JannoCoalesceOptions = JannoCoalesceOptions } runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO () -runJannocoalesce (JannoCoalesceOptions sourceSpec target outSpec fields overwrite sKey tKey maybeStrip) = do +runJannocoalesce (JannoCoalesceOptions sourceSpec (VersionedFile targetPV targetPath) outPath fields overwrite sKey tKey maybeStrip) = do JannoRows sourceRows <- case sourceSpec of - JannoSourceSingle sourceFile -> readJannoFile latestPoseidonVersion [] sourceFile + JannoSourceSingle (VersionedFile sourcePV sourcePath) -> readJannoFile sourcePV [] sourcePath JannoSourceBaseDirs sourceDirs -> do let pacReadOpts = defaultPackageReadOptions { _readOptIgnoreChecksums = True @@ -58,11 +59,10 @@ runJannocoalesce (JannoCoalesceOptions sourceSpec target outSpec fields overwrit , _readOptOnlyLatest = True } getJointJanno <$> readPoseidonPackageCollection pacReadOpts sourceDirs - JannoRows targetRows <- readJannoFile latestPoseidonVersion [] target + JannoRows targetRows <- readJannoFile targetPV [] targetPath newJanno <- makeNewJannoRows sourceRows targetRows fields overwrite sKey tKey maybeStrip - let outPath = maybe target id outSpec logInfo $ "Writing to file (directory will be created if missing): " ++ outPath liftIO $ do createDirectoryIfMissing True (takeDirectory outPath) diff --git a/src/Poseidon/CLI/OptparseApplicativeParsers.hs b/src/Poseidon/CLI/OptparseApplicativeParsers.hs index 84af1798..a4ec2909 100644 --- a/src/Poseidon/CLI/OptparseApplicativeParsers.hs +++ b/src/Poseidon/CLI/OptparseApplicativeParsers.hs @@ -24,6 +24,7 @@ import Poseidon.GenotypeData (GenoDataSource (..), GenotypeFileSpec (..), GenotypeOutFormatSpec (..), SNPSetSpec (..)) +import Poseidon.PoseidonVersion import Poseidon.ServerClient (AddColSpec (..), ArchiveEndpoint (..)) import Poseidon.Utils (ErrorLength (..), LogMode (..), @@ -37,7 +38,7 @@ import Control.Applicative ((<|>)) import qualified Data.ByteString.Char8 as Bchs import Data.List (intercalate) import Data.List.Split (splitOn) -import Data.Version (Version) +import Data.Version (Version, makeVersion) import qualified Options.Applicative as OP import SequenceFormats.Plink (PlinkPopNameMode (PlinkPopNameAsBoth, PlinkPopNameAsFamily, PlinkPopNameAsPhenotype)) import System.FilePath (splitExtension, splitExtensions, @@ -434,18 +435,33 @@ parseInPoseidonYamlFile = OP.strOption ( OP.metavar "FILE" <> OP.help "Path to a POSEIDON.yml file.") -parseInJannoFile :: OP.Parser FilePath -parseInJannoFile = OP.strOption ( +parseInJannoFile :: OP.Parser VersionedFile +parseInJannoFile = VersionedFile <$> parsePoseidonVersion "pvJanno" <*> OP.strOption ( OP.long "janno" <> OP.metavar "FILE" <> OP.help "Path to a .janno file.") -parseInSSFile :: OP.Parser FilePath -parseInSSFile = OP.strOption ( +parseInSSFile :: OP.Parser VersionedFile +parseInSSFile = VersionedFile <$> parsePoseidonVersion "pvSSF" <*> OP.strOption ( OP.long "ssf" <> OP.metavar "FILE" <> OP.help "Path to a .ssf file.") +parsePoseidonVersion :: String -> OP.Parser PoseidonVersion +parsePoseidonVersion longName = OP.option (OP.eitherReader parsePV) ( + OP.long longName <> + OP.metavar "VERSION" <> + OP.help "Poseidon version (e.g. 2.7.1)." <> + OP.value latestPoseidonVersion <> + OP.showDefaultWith showPoseidonVersion) + where + parsePV s = case readVersion s of + Just v -> if PoseidonVersion v `elem` validPoseidonVersions + then Right (PoseidonVersion v) + else Left $ "must be one of " ++ intercalate ", " (map showPoseidonVersion validPoseidonVersions) + Nothing -> Left "invalid version string" + readVersion = fmap makeVersion . traverse readMaybe . splitOn "." + parseInBibFile :: OP.Parser FilePath parseInBibFile = OP.strOption ( OP.long "bib" <> @@ -879,31 +895,34 @@ parseMaybeArchiveName = OP.option (Just <$> OP.str) ( parseJannocoalSourceSpec :: OP.Parser JannoSourceSpec parseJannocoalSourceSpec = parseJannocoalSingleSource <|> (JannoSourceBaseDirs <$> parseBasePaths) - where - parseJannocoalSingleSource = OP.option (JannoSourceSingle <$> OP.str) ( - OP.long "sourceFile" <> - OP.short 's' <> - OP.metavar "FILE" <> - OP.help "The source .janno file." - ) - -parseJannocoalTargetFile :: OP.Parser FilePath -parseJannocoalTargetFile = OP.strOption ( + where + parseJannocoalSingleSource :: OP.Parser JannoSourceSpec + parseJannocoalSingleSource = + JannoSourceSingle <$> (VersionedFile <$> parsePoseidonVersion "pvSource" <*> OP.strOption ( + OP.long "sourceFile" <> + OP.short 's' <> + OP.metavar "FILE" <> + OP.help "The source .janno file." + )) + +parseJannocoalTargetFile :: OP.Parser VersionedFile +parseJannocoalTargetFile = VersionedFile <$> parsePoseidonVersion "pvTarget" <*> OP.strOption ( OP.long "targetFile" <> OP.short 't' <> OP.metavar "FILE" <> OP.help "The target .janno file to fill." ) -parseJannocoalOutSpec :: OP.Parser (Maybe FilePath) -parseJannocoalOutSpec = OP.option (Just <$> OP.str) ( +parseJannocoalOutFile :: OP.Parser FilePath +parseJannocoalOutFile = OP.strOption ( OP.long "outFile" <> OP.short 'o' <> OP.metavar "FILE" <> - OP.value Nothing <> OP.showDefault <> - OP.help "An optional file to write the results to. \ - \If not specified, change the target file in place." + OP.help ("File path to write the result to. Can be identical to --targetFile to overwrite the \ + \target file in place. Note that trident only writes .janno files in the \ + \latest Poseidon version it supports, so in this case v" ++ + showPoseidonVersion latestPoseidonVersion ++ ".") ) parseJannocoalJannoColumns :: OP.Parser CoalesceJannoColumnSpec diff --git a/src/Poseidon/CLI/Validate.hs b/src/Poseidon/CLI/Validate.hs index 0866a168..d77b2075 100644 --- a/src/Poseidon/CLI/Validate.hs +++ b/src/Poseidon/CLI/Validate.hs @@ -27,7 +27,7 @@ import qualified Data.ByteString.Char8 as Bchs import Data.List (groupBy, intercalate, sortOn) import Data.Yaml (decodeEither') import Poseidon.EntityTypes (IndividualInfo (..)) -import Poseidon.PoseidonVersion (latestPoseidonVersion) +import Poseidon.PoseidonVersion (VersionedFile (..)) import System.Exit (exitFailure, exitSuccess) -- | A datatype representing command line options for the validate command @@ -50,8 +50,8 @@ data ValidatePlan = } | ValPlanPoseidonYaml FilePath | ValPlanGeno GenotypeDataSpec - | ValPlanJanno FilePath - | ValPlanSSF FilePath + | ValPlanJanno VersionedFile + | ValPlanSSF VersionedFile | ValPlanBib FilePath runValidate :: ValidateOptions -> PoseidonIO () @@ -105,14 +105,14 @@ runValidate (ValidateOptions (ValPlanGeno geno) _ _ noExitCode _) = do pac <- makePseudoPackageFromGenotypeData geno validateGeno pac True conclude True noExitCode -runValidate (ValidateOptions (ValPlanJanno path) mandatoryJannoCols _ noExitCode _) = do +runValidate (ValidateOptions (ValPlanJanno (VersionedFile pv path)) mandatoryJannoCols _ noExitCode _) = do logInfo $ "Validating: " ++ path - (JannoRows entries) <- readJannoFile latestPoseidonVersion mandatoryJannoCols path + (JannoRows entries) <- readJannoFile pv mandatoryJannoCols path logInfo $ "All " ++ show (length entries) ++ " entries are valid" conclude True noExitCode -runValidate (ValidateOptions (ValPlanSSF path) _ mandatorySSFCols noExitCode _) = do +runValidate (ValidateOptions (ValPlanSSF (VersionedFile pv path)) _ mandatorySSFCols noExitCode _) = do logInfo $ "Validating: " ++ path - (SeqSourceRows entries) <- readSeqSourceFile latestPoseidonVersion mandatorySSFCols path + (SeqSourceRows entries) <- readSeqSourceFile pv mandatorySSFCols path logInfo $ "All " ++ show (length entries) ++ " entries are valid" conclude True noExitCode runValidate (ValidateOptions (ValPlanBib path) _ _ noExitCode _) = do diff --git a/src/Poseidon/PoseidonVersion.hs b/src/Poseidon/PoseidonVersion.hs index af327bd0..40b9fc7b 100644 --- a/src/Poseidon/PoseidonVersion.hs +++ b/src/Poseidon/PoseidonVersion.hs @@ -24,3 +24,6 @@ showPoseidonVersion (PoseidonVersion x) = showVersion x -- this is for the server minimalRequiredClientVersion :: Version minimalRequiredClientVersion = makeVersion [1, 1, 8, 5] + +-- and this for validate and jannocoalesce +data VersionedFile = VersionedFile PoseidonVersion FilePath diff --git a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs index bbe0a073..bd11f9b3 100644 --- a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs +++ b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs @@ -39,6 +39,8 @@ import Poseidon.GenotypeData (GenoDataSource (..), GenotypeFileSpec (..), GenotypeOutFormatSpec (..), SNPSetSpec (..)) +import Poseidon.PoseidonVersion (VersionedFile (..), + latestPoseidonVersion) import Poseidon.ServerClient (AddColSpec (..), ArchiveEndpoint (..)) import Poseidon.Utils (LogMode (..), TestMode (..), @@ -346,10 +348,14 @@ testPipelineValidate testDir checkFilePath = do } } & run 6 validateOpts1 { - _validatePlan = ValPlanJanno $ testPacsDir "Schiffels_2016" "Schiffels_2016.janno" + _validatePlan = ValPlanJanno $ VersionedFile + latestPoseidonVersion + (testPacsDir "Schiffels_2016" "Schiffels_2016.janno") } & run 7 validateOpts1 { - _validatePlan = ValPlanSSF $ testPacsDir "Schiffels_2016" "ena_table.ssf" + _validatePlan = ValPlanSSF $ VersionedFile + latestPoseidonVersion + (testPacsDir "Schiffels_2016" "ena_table.ssf") } & run 8 validateOpts1 { _validatePlan = ValPlanBib $ testPacsDir "Schiffels_2016" "sources.bib" @@ -1416,9 +1422,11 @@ testPipelineJannocoalesce :: FilePath -> FilePath -> IO () testPipelineJannocoalesce testDir checkFilePath = do -- simple coalesce let jannocoalesceOpts1 = JannoCoalesceOptions { - _jannocoalesceSource = JannoSourceSingle "test/testDat/testJannoFiles/normal.janno", - _jannocoalesceTarget = "test/testDat/testJannoFiles/minimal.janno", - _jannocoalesceOutSpec = Just (testDir "jannocoalesce" "target1.janno"), + _jannocoalesceSource = JannoSourceSingle $ VersionedFile latestPoseidonVersion + "test/testDat/testJannoFiles/normal.janno", + _jannocoalesceTarget = VersionedFile latestPoseidonVersion + "test/testDat/testJannoFiles/minimal.janno", + _jannocoalesceOutFile = testDir "jannocoalesce" "target1.janno", _jannocoalesceJannoColumns = AllJannoColumns, _jannocoalesceOverwriteColumns = False, _jannocoalesceSourceKey = "Poseidon_ID", @@ -1430,9 +1438,11 @@ testPipelineJannocoalesce testDir checkFilePath = do ] -- only coalesce certain columns (--includeColumns) let jannocoalesceOpts2 = JannoCoalesceOptions { - _jannocoalesceSource = JannoSourceSingle "test/testDat/testJannoFiles/normal.janno", - _jannocoalesceTarget = "test/testDat/testJannoFiles/minimal.janno", - _jannocoalesceOutSpec = Just (testDir "jannocoalesce" "target2.janno"), + _jannocoalesceSource = JannoSourceSingle $ VersionedFile latestPoseidonVersion + "test/testDat/testJannoFiles/normal.janno", + _jannocoalesceTarget = VersionedFile latestPoseidonVersion + "test/testDat/testJannoFiles/minimal.janno", + _jannocoalesceOutFile = testDir "jannocoalesce" "target2.janno", _jannocoalesceJannoColumns = IncludeJannoColumns ["Latitude", "Longitude"], _jannocoalesceOverwriteColumns = False, _jannocoalesceSourceKey = "Poseidon_ID", @@ -1444,9 +1454,11 @@ testPipelineJannocoalesce testDir checkFilePath = do ] -- do not coalesce certain columns (--excludeColumns) let jannocoalesceOpts3 = JannoCoalesceOptions { - _jannocoalesceSource = JannoSourceSingle "test/testDat/testJannoFiles/normal.janno", - _jannocoalesceTarget = "test/testDat/testJannoFiles/minimal.janno", - _jannocoalesceOutSpec = Just (testDir "jannocoalesce" "target3.janno"), + _jannocoalesceSource = JannoSourceSingle $ VersionedFile latestPoseidonVersion + "test/testDat/testJannoFiles/normal.janno", + _jannocoalesceTarget = VersionedFile latestPoseidonVersion + "test/testDat/testJannoFiles/minimal.janno", + _jannocoalesceOutFile = testDir "jannocoalesce" "target3.janno", _jannocoalesceJannoColumns = ExcludeJannoColumns ["Latitude", "Longitude"], _jannocoalesceOverwriteColumns = False, _jannocoalesceSourceKey = "Poseidon_ID",