Conversation
src/MMseqsBase.cpp
Outdated
| {"resultDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::resultDb }, | ||
| {"alignmentDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::alignmentDb }}}, | ||
| {"alignproteome", alignproteome, &par.alignproteome, COMMAND_ALIGNMENT, | ||
| "Within-result all-vs-all gapped local alignment", |
There was a problem hiding this comment.
I would improve this text.
src/MMseqsBase.cpp
Outdated
| {"alignproteome", alignproteome, &par.alignproteome, COMMAND_ALIGNMENT, | ||
| "Within-result all-vs-all gapped local alignment", | ||
| NULL, | ||
| "Martin Steinegger <[email protected]>", |
src/commons/ClusterSpecies.cpp
Outdated
There was a problem hiding this comment.
This file looks incomplete
src/commons/ClusterSpecies.h
Outdated
src/commons/DBReader.cpp
Outdated
| } | ||
| } | ||
| if (dataMode & USE_LOOKUP || dataMode & USE_LOOKUP_REV) { | ||
| Debug(Debug::INFO) << "ReadLookup file: " << dataFileName << "\n"; //gyuri |
There was a problem hiding this comment.
I would remove this print out.
src/commons/DBReader.cpp
Outdated
| } | ||
| char* lookupDataChar = (char *) lookupData.getData(); | ||
| size_t lookupDataSize = lookupData.size(); | ||
| Debug(Debug::INFO) << "Lookup Data size is " << lookupDataSize << "\n"; //gyuri |
There was a problem hiding this comment.
I would remove this print out.
src/commons/DBReader.cpp
Outdated
| size_t lookupDataSize = lookupData.size(); | ||
| Debug(Debug::INFO) << "Lookup Data size is " << lookupDataSize << "\n"; //gyuri | ||
| lookupSize = Util::ompCountLines(lookupDataChar, lookupDataSize, threads); | ||
| Debug(Debug::INFO) << "Lookup size is " << lookupSize << "\n"; //gyuri |
There was a problem hiding this comment.
I would remove this print out.
src/commons/CMakeLists.txt
Outdated
| commons/BaseMatrix.cpp | ||
| commons/Command.cpp | ||
| commons/CommandCaller.cpp | ||
| # commons/ClusterSpecies.cpp |
There was a problem hiding this comment.
Please remove it.
src/alignment/Matcher.h
Outdated
|
|
||
| static size_t resultToBuffer(char * buffer, const result_t &result, bool addBacktrace, bool compress = true, bool addOrfPosition = false); | ||
|
|
||
| static size_t resultToBuffer_str(char * buffer, const result_t &result, bool addBacktrace, bool compress, bool addOrfPosition=false); |
There was a problem hiding this comment.
Why is this needed?
|
|
||
| DBReader<unsigned int> tdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX); | ||
| // DBReader<unsigned int> tdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX); | ||
| DBReader<unsigned int> tdbr(par.db1.c_str(), par.db1Index.c_str(), par.threads, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX|DBReader<unsigned int>::USE_LOOKUP); |
There was a problem hiding this comment.
I assume this is not needed anymore
src/util/alignall.cpp
Outdated
| EvalueComputation evaluer(tdbr.getAminoAcidDBSize(), subMat, gapOpen, gapExtend); | ||
| const size_t flushSize = 100000000; | ||
| size_t iterations = static_cast<int>(ceil(static_cast<double>(dbr_res.getSize()) / static_cast<double>(flushSize))); | ||
| Debug(Debug::INFO) << "Number of iterations: " << iterations << " Size of linclust dbr_res : " << dbr_res.getSize() << '\n'; |
src/util/alignproteome.cpp
Outdated
| } | ||
| }; | ||
|
|
||
| struct __attribute__((__packed__)) memberProteinEntry{ |
There was a problem hiding this comment.
MemberProteinEntry
src/commons/DBReader.h
Outdated
| size_t getSize() const; | ||
|
|
||
| unsigned int getProteomeTotalLen(size_t id); //gyuri | ||
| unsigned int getProteomeTotalLen(size_t id); |
There was a problem hiding this comment.
I would name this getSourceTotalLen or getSetTotalLen to be consistent with the .source naming
src/workflow/EasyAlignproteome.cpp
Outdated
| } | ||
|
|
||
| // void setEasyAlignproteomeMustPassAlong(Parameters *p){ | ||
|
|
There was a problem hiding this comment.
You need to implement this whenever you use the createParameterString(..., true) parameter. Or it won't get passed along.
src/workflow/EasyAlignproteome.cpp
Outdated
| par.filenames.pop_back(); | ||
|
|
||
| CommandCaller cmd; | ||
| cmd.addVariable("ALIGNPROTEOME_PAR", par.createParameterString(par.alignproteome,true).c_str()); // what? |
There was a problem hiding this comment.
CLUSTER_PAR, THREADS_PAR etc are missing
src/workflow/EasyAlignproteome.cpp
Outdated
| hash = FileUtil::getHashFromSymLink(tmpDir + "/latest"); | ||
| } | ||
| tmpDir = FileUtil::createTemporaryDirectory(tmpDir, hash); | ||
| par.filenames.pop_back(); |
There was a problem hiding this comment.
You need to pop_back() the output path too and pass it as an environment variable. Or else it will also be passed to createdb.
…of different fasta files
…usage to createdb and easy-search in MMseqsBase.cpp.
| ) : sequenceReader(NULL), index(NULL) { | ||
| int targetDbtype = FileUtil::parseDbType(dataName.c_str()); | ||
| if (Parameters::isEqualDbtype(targetDbtype, Parameters::DBTYPE_INDEX_DB)) { | ||
| index = new DBReader<unsigned int>(dataName.c_str(), (dataName + ".index").c_str(), 1, DBReader<unsigned int>::USE_DATA|DBReader<unsigned int>::USE_INDEX); |
src/util/proteomecluster.cpp
Outdated
| const unsigned int ProteomeId = lookup[i].fileNumber; | ||
| const unsigned int ProteinId = lookup[i].id; | ||
| ProteomeList[ProteomeId].addSeqLen(tProteinDB.getSeqLen(ProteinId)); | ||
| if (ProteomeList[ProteomeId].proteomeKey == -1){ |
There was a problem hiding this comment.
style: please add spaces around if:
if[space](...)[space]{
src/util/proteomecluster.cpp
Outdated
| std::vector<unsigned int> memberKeys; | ||
| memberKeys.reserve(50); // store key for every protein in a cluster | ||
|
|
||
| std::vector<bool> isProteomeInCluster(localMemCount.size(), false); ; |
There was a problem hiding this comment.
double ;
also careful about bool vectors. They are not multi-threading friendly. Here its fine, as you don't do any multi-threading
src/util/proteomecluster.cpp
Outdated
| if (memberKeys.size() > 1) { //If not a singleton cluster | ||
| ClusterEntry eachClusterRep(memberKeys.size()); | ||
| //init MemberProteinEntry and add it to memberProteins vector | ||
| for (auto& eachMemberKey : memberKeys){ |
There was a problem hiding this comment.
style: missing space after the for
| for (size_t idx = 0; idx < ProteomeList.size(); idx++){ | ||
| if (ProteomeList[idx].isCovered()) { | ||
| continue; | ||
| }else{ |
There was a problem hiding this comment.
style: missing spaces around else
src/util/proteomecluster.cpp
Outdated
| DBReader<unsigned int>::LookupEntry* tLookup = tProteinDB.getLookup(); | ||
| const size_t tLookupSize = tProteinDB.getLookupSize(); | ||
| unsigned int totalProteomeNumber = tLookup[tLookupSize - 1].fileNumber; | ||
| std::vector<ProteomeEntry> ProteomeList(totalProteomeNumber + 1); |
There was a problem hiding this comment.
style: should be lowercase: proteomeList
src/util/proteomecluster.cpp
Outdated
|
|
||
| int gapOpen, gapExtend; | ||
| BaseMatrix *subMat; | ||
| subMat = new SubstitutionMatrix(par.scoringMatrixFile.values.aminoacid().c_str(), 2.0, par.scoreBias); |
There was a problem hiding this comment.
You can avoid the explicit allocation here:
SubstitutionMatrix subMat(par.scoringMatrixFile.values.aminoacid().c_str(), 2.0, par.scoreBias);
...
src/util/proteomecluster.cpp
Outdated
| #pragma omp critical | ||
| { | ||
| for (size_t idx=0; idx < localMemCount.size(); idx++){ | ||
| ProteomeList[idx].incrementMemCount(localMemCount[idx]); |
There was a problem hiding this comment.
if you use __sync_fetch_and_add inside incrementMemCount you don't need the critical
src/util/proteomecluster.cpp
Outdated
| #pragma omp critical | ||
| { | ||
| for (size_t idx = 0; idx < ProteomeList.size(); idx++) { | ||
| ProteomeList[idx].addSeqId(localSeqIds[idx]); |
There was a problem hiding this comment.
here you might need the critical as atomic floats can be tricky to do correctly
src/util/proteomecluster.cpp
Outdated
| proteomeClustWriter.close(); | ||
| proteinClustWriter.close(); | ||
| tProteinDB.close(); | ||
| delete subMat; |
There was a problem hiding this comment.
if you rewrite the subMat init above, you can remove this
…mber proteome is detected)
…teome handling update
No description provided.