diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..4e1ef42
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
diff --git a/.github/workflows/test-pipeline.yml b/.github/workflows/test-pipeline.yml
index f65c756..cbab6eb 100644
--- a/.github/workflows/test-pipeline.yml
+++ b/.github/workflows/test-pipeline.yml
@@ -2,7 +2,7 @@ name: Test bedstat pipeline
 
 on:
   push:
-    branches: [master, dev]
+    branches: [master, dev, dev_alex]
   pull_request:
     branches: [master, dev]
 
@@ -10,7 +10,7 @@ jobs:
   pytest:
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.6, 3.8]
         os: [ubuntu-latest] # can't use macOS when using service containers or container jobs
         r: [release]
     runs-on: ${{ matrix.os }}
@@ -46,25 +46,27 @@ jobs:
         sudo apt-get update
         sudo apt-get install libcurl4-openssl-dev
 
-    - uses: r-lib/actions/setup-r@master
-      with:
-        r-version: ${{ matrix.r }}
+    - name: Install package
+      run: python -m pip install .
 
-    - name: Install R dependancies
-      run: Rscript scripts/installRdeps.R
+    - name: Run pytest tests
+      run: pytest tests -x -vv
 
-    - name: Run pipeline
-      run: looper run -p local tests/data/bedstat_config.yaml
+      #    - uses: r-lib/actions/setup-r@master
+#      with:
+#        r-version: ${{ matrix.r }}
+#
+#    - name: Install R dependancies
+#      run: Rscript scripts/installRdeps.R
 
-    - name: Test plots exist
-      run: |
-        if ls $GITHUB_WORKSPACE/outputs/bedstat_output/a6a08126cb6f4b1953ba0ec8675df85a/test_hg38*.png 1> /dev/null 2>&1; then
-          echo "SUCCESS";
-        else
-          echo "ERROR: files do not exist: $GITHUB_WORKSPACE/outputs/bedstat_output/a6a08126cb6f4b1953ba0ec8675df85a/test_hg38*.png";
-          exit 1
-        fi
-
-    - name: Test record in PostgreSQL
-      run: |
-        echo "from bbconf import BedBaseConf; from bbconf.const import *; bbc = BedBaseConf('$GITHUB_WORKSPACE/tests/data/config_min.yaml'); assert bbc.bed.record_count == 1, 'Number of records in the bedfiles table not equal 1'" | python3 -
+#    - name: Run pipeline
+#      run: looper run -p local tests/data/bedstat_config.yaml
+#
+#    - name: Test plots exist
+#      run: |
+#        if ls $GITHUB_WORKSPACE/outputs/bedstat_output/a6a08126cb6f4b1953ba0ec8675df85a/test_hg38*.png 1> /dev/null 2>&1; then
+#          echo "SUCCESS";
+#        else
+#          echo "ERROR: files do not exist: $GITHUB_WORKSPACE/outputs/bedstat_output/a6a08126cb6f4b1953ba0ec8675df85a/test_hg38*.png";
+#          exit 1
+#        fi
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..1b78bad
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,9 @@
+Copyright 2017 Nathan Sheffield
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..5ba56a6
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+include README.md
+include requirements/*
+include schemas/*
+include bedstat/scripts/*
+include bedstat/tools/regionstat.R
+include bedstat/pep_schema.yaml
diff --git a/README.md b/README.md
index d821232..32e0807 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ The above command will build the csv file looper needs to run the pipeline on al
 
 ### 1. Validate your PEP with [`eido`](https://github.com/pepkit/eido)
 
-The input PEP can be validated against the [JSON schema in this repository](pep_schema.yaml). This ensures the PEP consists of all required attributes to run `bedstat` pipeline.
+The input PEP can be validated against the [JSON schema in this repository](schemas/pep_schema.yaml). This ensures the PEP consists of all required attributes to run `bedstat` pipeline.
 
 ```
 eido validate <path/to/pep> -s https://schema.databio.org/pipelines/bedstat.yaml
@@ -58,7 +58,7 @@ The data loaded into PostgreSQL should persist between PostgreSQL invocations, o
 
 ## Additional dependencies
 
-[regionstat.R](tools/regionstat.R) script is used to calculate the bed file statistics, so the pipeline also depends on several R packages:
+[regionstat.R](bedstat/tools/regionstat.R) script is used to calculate the bed file statistics, so the pipeline also depends on several R packages:
 
 * `R.utils`
 * `BiocManager`
@@ -71,7 +71,7 @@ The data loaded into PostgreSQL should persist between PostgreSQL invocations, o
 * `BSgenome.<organim>.UCSC.<genome>` *depending on the genome used* 
 * `LOLA`
 
-you can use [installRdeps.R](scripts/installRdeps.R) helper script to easily install the required packages:
+you can use [installRdeps.R](bedstat/scripts/installRdeps.R) helper script to easily install the required packages:
 
 ```
 Rscript scripts/installRdeps.R
diff --git a/bedstat/__init__.py b/bedstat/__init__.py
new file mode 100644
index 0000000..c776603
--- /dev/null
+++ b/bedstat/__init__.py
@@ -0,0 +1,3 @@
+""" Package-level data """
+from .bedstat import *
+from ._version import __version__
diff --git a/bedstat/_version.py b/bedstat/_version.py
new file mode 100644
index 0000000..3dc1f76
--- /dev/null
+++ b/bedstat/_version.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
diff --git a/bedstat/bedstat.py b/bedstat/bedstat.py
new file mode 100755
index 0000000..f382b44
--- /dev/null
+++ b/bedstat/bedstat.py
@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+"""
+bedfile statistics generating pipeline
+"""
+
+from argparse import ArgumentParser
+from hashlib import md5
+import json
+import yaml
+import os
+import sys
+import warnings
+import tempfile
+import requests
+import gzip
+
+import pypiper
+import bbconf
+import time
+
+
+__author__ = [
+    "Michal Stolarczyk",
+    "Ognen Duzlevski",
+    "Jose Verdezoto",
+    "Bingjie Xue",
+    "Oleksandr Khoroshevskyi",
+]
+__email__ = "khorosh@virginia.edu"
+
+SCHEMA_PATH = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), "pep_schema.yaml"
+)
+
+
+def hash_bedfile(filepath):
+    """generate digest for bedfile"""
+    with gzip.open(filepath, "rb") as f:
+        # concate column values
+        chrs = ",".join([row.split()[0].decode("utf-8") for row in f])
+        starts = ",".join([row.split()[1].decode("utf-8") for row in f])
+        ends = ",".join([row.split()[2].decode("utf-8") for row in f])
+        # hash column values
+        chr_digest = md5(chrs.encode("utf-8")).hexdigest()
+        start_digest = md5(starts.encode("utf-8")).hexdigest()
+        end_digest = md5(ends.encode("utf-8")).hexdigest()
+        # hash column digests
+        bed_digest = md5(
+            ",".join([chr_digest, start_digest, end_digest]).encode("utf-8")
+        ).hexdigest()
+
+        return bed_digest
+
+
+def convert_unit(size_in_bytes):
+    """Convert the size from bytes to other units like KB, MB or GB"""
+    if size_in_bytes < 1024:
+        return str(size_in_bytes) + "bytes"
+    elif size_in_bytes in range(1024, 1024 * 1024):
+        return str(round(size_in_bytes / 1024, 2)) + "KB"
+    elif size_in_bytes in range(1024 * 1024, 1024 * 1024 * 1024):
+        return str(round(size_in_bytes / (1024 * 1024))) + "MB"
+    elif size_in_bytes >= 1024 * 1024 * 1024:
+        return str(round(size_in_bytes / (1024 * 1024 * 1024))) + "GB"
+
+
+def get_file_size(file_name):
+    """Get file in size in given unit like KB, MB or GB"""
+    size = os.path.getsize(file_name)
+    return convert_unit(size)
+
+
+def run_bedstat(
+    bedfile: str,
+    bedbase_config: str,
+    genome_assembly: str,
+    ensdb: str = None,
+    open_signal_matrix: str = None,
+    bigbed: str = None,
+    sample_yaml: str = None,
+    just_db_commit: bool = False,
+    no_db_commit: bool = False,
+):
+    """
+    Main function to run bedstats. Can be used without runing from command line
+    :param bedfile: a full path to bed file to process
+    :param bigbed: a path to the bedbase configuration file
+    :param bedbase_config: a path to the bedbase configuration file
+    :param open_signal_matrix: a full path to the openSignalMatrix required for the tissue
+        specificity plots
+    :param genome_assembly: genome assembly of the sample
+    :param ensdb: a full path to the ensdb gtf file required for genomes not in GDdata
+    :param sample_yaml: a yaml config file with sample attributes to pass on more metadata
+        into the database
+    :param schema: sample table schema
+    :param just_db_commit: whether just to commit the JSON to the database
+    :param no_db_commit: whether the JSON commit to the database should be skipped
+    """
+    bbc = bbconf.BedBaseConf(config_path=bedbase_config, database_only=True)
+    bedstat_output_path = bbc.get_bedstat_output_path()
+
+    bed_digest = md5(open(bedfile, "rb").read()).hexdigest()
+    # bed_digest = hash_bedfile(args.bedfile)
+    bedfile_name = os.path.split(bedfile)[1]
+    # need to split twice since there are 2 exts
+    fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0]
+    outfolder = os.path.abspath(os.path.join(bedstat_output_path, bed_digest))
+    json_file_path = os.path.abspath(os.path.join(outfolder, fileid + ".json"))
+    json_plots_file_path = os.path.abspath(
+        os.path.join(outfolder, fileid + "_plots.json")
+    )
+    bed_relpath = os.path.relpath(
+        bedfile,
+        os.path.abspath(os.path.join(bedstat_output_path, os.pardir, os.pardir)),
+    )
+    bigbed_relpath = os.path.relpath(
+        os.path.join(bigbed, fileid + ".bigBed"),
+        os.path.abspath(os.path.join(bedstat_output_path, os.pardir, os.pardir)),
+    )
+    if not just_db_commit:
+        pm = pypiper.PipelineManager(
+            name="bedstat-pipeline",
+            outfolder=outfolder,
+        )
+
+        # run Rscript
+        rscript_path = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+            "bedstat",
+            "tools",
+            "regionstat.R",
+        )
+        assert os.path.exists(rscript_path), FileNotFoundError(
+            f"'{rscript_path}' script not found"
+        )
+        command = (
+            f"Rscript {rscript_path} --bedfilePath={bedfile} "
+            f"--fileId={fileid} --openSignalMatrix={open_signal_matrix} "
+            f"--outputFolder={outfolder} --genome={genome_assembly} "
+            f"--ensdb={ensdb} --digest={bed_digest}"
+        )
+        print(command)
+        pm.run(cmd=command, target=json_file_path)
+        pm.stop_pipeline()
+
+    # now get the resulting json file and load it into Elasticsearch
+    # if the file exists, of course
+    if not no_db_commit:
+        data = {}
+        if os.path.exists(json_file_path):
+            with open(json_file_path, "r", encoding="utf-8") as f:
+                data = json.loads(f.read())
+        if os.path.exists(json_plots_file_path):
+            with open(json_plots_file_path, "r", encoding="utf-8") as f_plots:
+                plots = json.loads(f_plots.read())
+        else:
+            plots = []
+        if sample_yaml and os.path.exists(sample_yaml):
+            # get the sample-specific metadata from the sample yaml representation
+            y = yaml.safe_load(open(sample_yaml, "r"))
+            # if schema and os.path.exists(schema):
+            schema = yaml.safe_load(open(SCHEMA_PATH, "r"))
+            schema = schema["properties"]["samples"]["items"]["properties"]
+
+            for key in list(y):
+                if key in schema:
+                    if not schema[key]["db_commit"]:
+                        y.pop(key, None)
+                elif key in ["bedbase_config", "pipeline_interfaces", "yaml_file"]:
+                    y.pop(key, None)
+            data.update({"other": y})
+        # unlist the data, since the output of regionstat.R is a dict of lists of
+        # length 1 and force keys to lower to correspond with the
+        # postgres column identifiers
+        data = {k.lower(): v[0] if isinstance(v, list) else v for k, v in data.items()}
+        data.update(
+            {
+                "bedfile": {
+                    "path": bed_relpath,
+                    "size": get_file_size(bedfile),
+                    "title": "Path to the BED file",
+                }
+            }
+        )
+
+        if os.path.exists(
+            os.path.join(bigbed, fileid + ".bigBed")
+        ) and not os.path.islink(os.path.join(bigbed, fileid + ".bigBed")):
+            digest = requests.get(
+                f"http://refgenomes.databio.org/genomes/genome_digest/{genome_assembly}"
+            ).text.strip('""')
+
+            data.update(
+                {
+                    "genome": {
+                        "alias": genome_assembly,
+                        "digest": digest,
+                    }
+                }
+            )
+            data.update(
+                {
+                    "bigbedfile": {
+                        "path": bigbed_relpath,
+                        "size": get_file_size(os.path.join(bigbed, fileid + ".bigBed")),
+                        "title": "Path to the big BED file",
+                    }
+                }
+            )
+
+        else:
+            data.update(
+                {
+                    "genome": {
+                        "alias": genome_assembly,
+                        "digest": "",
+                    }
+                }
+            )
+
+        for plot in plots:
+            plot_id = plot["name"]
+            del plot["name"]
+            data.update({plot_id: plot})
+        bbc.bed.report(record_identifier=bed_digest, values=data)
+
+
+def _parse_cmdl():
+    parser = ArgumentParser(
+        description="A pipeline to read a file in BED format and produce metadata "
+        "in JSON format."
+    )
+    parser.add_argument(
+        "--bedfile", help="a full path to bed file to process", required=True
+    )
+    parser.add_argument(
+        "--open-signal-matrix",
+        type=str,
+        required=False,
+        default=None,
+        help="a full path to the openSignalMatrix required for the tissue "
+        "specificity plots",
+    )
+
+    parser.add_argument(
+        "--ensdb",
+        type=str,
+        required=False,
+        default=None,
+        help="a full path to the ensdb gtf file required for genomes not in GDdata ",
+    )
+
+    parser.add_argument(
+        "--bigbed",
+        type=str,
+        required=False,
+        default=None,
+        help="a full path to the bigbed files",
+    )
+
+    parser.add_argument(
+        "--bedbase-config",
+        dest="bedbase_config",
+        type=str,
+        default=None,
+        help="a path to the bedbase configuration file",
+    )
+    parser.add_argument(
+        "-y",
+        "--sample-yaml",
+        dest="sample_yaml",
+        type=str,
+        required=False,
+        help="a yaml config file with sample attributes to pass on more metadata "
+        "into the database",
+    )
+    # parser.add_argument(
+    #     "--schema",
+    #     dest="schema",
+    #     type=str,
+    #     required=False,
+    #     help="schema for the sample table",
+    # )
+    parser.add_argument(
+        "--genome",
+        dest="genome_assembly",
+        type=str,
+        required=True,
+        help="genome assembly of the sample",
+    )
+    exclusive_group = parser.add_mutually_exclusive_group()
+    exclusive_group.add_argument(
+        "--no-db-commit",
+        action="store_true",
+        help="whether the JSON commit to the database should be skipped",
+    )
+    exclusive_group.add_argument(
+        "--just-db-commit",
+        action="store_true",
+        help="whether just to commit the JSON to the database",
+    )
+    args = parser.parse_args(sys.argv[1:])
+
+    return args
+
+
+def main():
+    args = _parse_cmdl()
+    args_dict = vars(args)
+    run_bedstat(**args_dict)
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        print("Pipeline aborted.")
+        sys.exit(1)
diff --git a/pep_schema.yaml b/bedstat/pep_schema.yaml
similarity index 63%
rename from pep_schema.yaml
rename to bedstat/pep_schema.yaml
index da49b90..65bc588 100644
--- a/pep_schema.yaml
+++ b/bedstat/pep_schema.yaml
@@ -8,46 +8,72 @@ properties:
       properties:
         sample_name: 
           type: string
+          db_commit: TRUE
           description: "name of the sample, which is the name of the output BED file"
         input_file_path:
           type: string
+          db_commit: FALSE
           description: "absolute path the file to convert"
         output_file_path:
           type: string
+          db_commit: FALSE
           description: "absolute path the file to the output BED file (derived attribute)"
+        bigbed:
+          type: string
+          db_commit: FALSE
+          description: "dir path where the bigbed file stored (derived attribute)"
         genome:
           type: string
+          db_commit: TRUE
           description: "organism genome code"
         narrowpeak:
-          type: integer
-          minimum: 0
-          maximum: 1
+          type: boolean
+          db_commit: TRUE
           description: "binary number indicating whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
         format:
           type: string
+          db_commit: TRUE
           description: "file format"
           enum: ["bigWig", "bigBed", "bed", "wig", "bedGraph"]
         cell_type:
           type: string
+          db_commit: TRUE
           description: "cell type code"
         antibody:
           type: string
+          db_commit: TRUE
           description: "antibody used if ChIP-seq experiment"
         description:
           type: string
+          db_commit: TRUE
           description: "freeform description of the sample"
         exp_protocol:
           type: string
+          db_commit: TRUE
           description: "type of the experiment the file was generated in"
         data_source:
           type: string
+          db_commit: TRUE
           description: "source of the sample, preferably a GSE* code"
         treatment:
           type: string
+          db_commit: TRUE
           description: "freeform description of the sample treatment"
+        ensdb:
+          type: string
+          db_commit: FALSE
+          description: "path of gtf annotation for genomes not in GDdata"
+        fasta:
+          type: string
+          db_commit: FALSE
+          description: "path of for genomes not in GDdata"
+        open_signal_matrix:
+          type: string
+          db_commit: FALSE
+          description: "path of for the open signal matrixm file for the given genome"
       required:
         - output_file_path
         - genome
         - sample_name
 required:
-  - samples
+  - samples
\ No newline at end of file
diff --git a/scripts/__init__.py b/bedstat/scripts/__init__.py
similarity index 100%
rename from scripts/__init__.py
rename to bedstat/scripts/__init__.py
diff --git a/scripts/installRdeps.R b/bedstat/scripts/installRdeps.R
similarity index 88%
rename from scripts/installRdeps.R
rename to bedstat/scripts/installRdeps.R
index 3cad82f..e0416f6 100644
--- a/scripts/installRdeps.R
+++ b/bedstat/scripts/installRdeps.R
@@ -17,9 +17,10 @@
 .install_pkg("ensembldb", bioc=TRUE)
 .install_pkg("LOLA", bioc=TRUE)
 .install_pkg("BSgenome", bioc=TRUE)
+.install_pkg("GenomicDistributions", bioc=TRUE)
 if(!require(package = "GenomicDistributions", character.only=TRUE)) {
     devtools::install_github("databio/GenomicDistributions")
 }
 if(!require(package = "GenomicDistributionsData", character.only=TRUE)) {
-    install.packages("http://big.databio.org/GenomicDistributionsData/GenomicDistributionsData_0.0.1.tar.gz", repos=NULL)
+    install.packages("http://big.databio.org/GenomicDistributionsData/GenomicDistributionsData_0.0.2.tar.gz", repos=NULL)
 }
diff --git a/scripts/process_LOLA.py b/bedstat/scripts/process_LOLA.py
similarity index 100%
rename from scripts/process_LOLA.py
rename to bedstat/scripts/process_LOLA.py
diff --git a/tools/regionstat.R b/bedstat/tools/regionstat.R
similarity index 54%
rename from tools/regionstat.R
rename to bedstat/tools/regionstat.R
index d8c1320..540b243 100644
--- a/tools/regionstat.R
+++ b/bedstat/tools/regionstat.R
@@ -1,5 +1,7 @@
 library(GenomicDistributions)
 library(GenomicDistributionsData)
+library(GenomeInfoDb)
+library(ensembldb)
 library(optparse)
 library(tools)
 library(R.utils)
@@ -18,7 +20,11 @@ option_list = list(
   make_option(c("--outputFolder"), type="character", default="output",
               help="base output folder for results", metavar="character"),
   make_option(c("--genome"), type="character", default="hg38",
-              help="genome reference to calculate against", metavar="character"))
+              help="genome reference to calculate against", metavar="character"),
+  make_option(c("--ensdb"), type="character",
+              help="path to the Ensembl annotation gtf file", metavar="character")
+)
+
 
 opt_parser = OptionParser(option_list=option_list);
 opt = parse_args(opt_parser);    
@@ -38,6 +44,29 @@ if (is.null(opt$digest)) {
   stop("digest input missing.")
 }
 
+myPartitionList <- function(gtffile){
+  features = c("gene", "exon", "three_prime_utr", "five_prime_utr")
+  geneModels = getGeneModelsFromGTF(gtffile, features, TRUE)
+  partitionList = genomePartitionList(geneModels$gene,
+                                      geneModels$exon,
+                                      geneModels$three_prime_utr,
+                                      geneModels$five_prime_utr)
+  
+  return (partitionList) 
+}
+
+
+myChromSizes <- function(genome){
+  if (requireNamespace(BSgm, quietly=TRUE)){
+    library (BSgm, character.only = TRUE)
+    BSG = eval(as.name(BSgm))
+  } else {
+    library (BSg, character.only = TRUE)
+    BSG = eval(as.name(BSg))
+  }
+  chromSizesGenome = seqlengths(BSG)
+  return(chromSizesGenome)
+}
 
 plotBoth <- function(plotId, g){
   pth = paste0(opt$outputFolder, "/", fileId, "_", plotId)
@@ -66,8 +95,18 @@ doItAall <- function(query, fileId, genome, cellMatrix) {
   # TSS distance plot
   tryCatch(
     expr = {
-      TSSdist = calcFeatureDistRefTSS(query, genome)
-      plotBoth("tssdist", plotFeatureDist(TSSdist, featureName="TSS"))
+      if (!(genome %in% c("hg19", "hg38", "mm10", "mm9")) && gtffile == "None"){
+        message("Ensembl annotation gtf file not provided. Skipping TSS distance plot ... ")
+      } else{
+        if (genome %in% c("hg19", "hg38", "mm10", "mm9")) {
+          TSSdist = calcFeatureDistRefTSS(query, genome)
+          plotBoth("tssdist", plotFeatureDist( TSSdist, featureName="TSS"))
+        } else {
+        tss = getTssFromGTF(gtffile, TRUE)
+        TSSdist = calcFeatureDist(query, tss)
+        plotBoth("tssdist", plotFeatureDist( TSSdist, featureName="TSS"))
+        }
+      }
       plots = rbind(plots, getPlotReportDF("tssdist", "Region-TSS distance distribution"))
       message("Successfully calculated and plot TSS distance.")
     },
@@ -81,7 +120,14 @@ doItAall <- function(query, fileId, genome, cellMatrix) {
   # Chromosomes region distribution plot
   tryCatch(
     expr = {
-      plotBoth("chrombins", plotChromBins(calcChromBinsRef(query, genome)))
+      if (genome %in% c("mm39", "dm3", "dm6", "ce10", "ce11", "danRer10", "danRer10", "T2T")){
+        chromSizes = myChromSizes(genome)
+        genomeBins  = getGenomeBins(chromSizes)
+        plotBoth("chrombins", plotChromBins(calcChromBins(query, genomeBins)))
+      } else{
+        plotBoth("chrombins", plotChromBins(calcChromBinsRef(query, genome)))
+      }
+      
       plots = rbind(plots, getPlotReportDF("chrombins", "Regions distribution over chromosomes"))
       message("Successfully calculated and plot chromosomes region distribution.")
     },
@@ -96,7 +142,15 @@ doItAall <- function(query, fileId, genome, cellMatrix) {
   if (bsGenomeAvail) {
     tryCatch(
       expr = {
-        gcvec = calcGCContentRef(query, genome)
+        if (requireNamespace(BSgm, quietly=TRUE)){
+          library (BSgm, character.only = TRUE)
+          bsg = eval(as.name(BSgm))
+          gcvec = calcGCContent(query, bsg)
+        } else {
+          library (BSg, character.only = TRUE)
+          bsg = eval(as.name(BSg))
+          gcvec = calcGCContent(query, bsg)
+        }
         plotBoth("gccontent", plotGCContent(gcvec))
         plots = rbind(plots, getPlotReportDF("gccontent", "GC content"))
         message("Successfully calculated and plot GC content.")
@@ -111,19 +165,29 @@ doItAall <- function(query, fileId, genome, cellMatrix) {
   # Partition plots, default to percentages
   tryCatch(
     expr = {
-      gp = calcPartitionsRef(query, genome)
-      plotBoth("paritions", plotPartitions(gp))
-      plots = rbind(plots, getPlotReportDF("paritions", "Regions distribution over genomic partitions"))
-      # flatten the result returned by the function above
-      partiotionNames = as.vector(gp[,"partition"])
-      partitionsList = list()
-      for(i in seq_along(partiotionNames)){
-        partitionsList[[paste0(partiotionNames[i], "_frequency")]] = 
-          as.vector(gp[,"Freq"])[i]
-        partitionsList[[paste0(partiotionNames[i], "_percentage")]] = 
-          as.vector(gp[,"Freq"])[i]/length(query)	        
+      if (!(genome %in% c("hg19", "hg38", "mm10")) && gtffile == "None"){
+        message("Ensembl annotation gtf file not provided. Skipping partition plot ... ")
+      } else {
+        if (genome %in% c("hg19", "hg38", "mm10")) {
+          gp = calcPartitionsRef(query, genome)
+          plotBoth("paritions", plotPartitions(gp))
+        } else {
+          partitionList = myPartitionList(gtffile)
+          gp = calcPartitions(query, partitionList)
+          plotBoth("paritions", plotPartitions(gp))
+        }
+        plots = rbind(plots, getPlotReportDF("paritions", "Regions distribution over genomic partitions"))
+        # flatten the result returned by the function above
+        partiotionNames = as.vector(gp[,"partition"])
+        partitionsList = list()
+        for(i in seq_along(partiotionNames)){
+          partitionsList[[paste0(partiotionNames[i], "_frequency")]] = 
+            as.vector(gp[,"Freq"])[i]
+          partitionsList[[paste0(partiotionNames[i], "_percentage")]] = 
+            as.vector(gp[,"Freq"])[i]/length(query)	        
+        }
+        message("Successfully calculated and plot regions distribution over genomic partitions.")
       }
-      message("Successfully calculated and plot regions distribution over genomic partitions.")
     },
     error = function(e){
       message('Caught an error!')
@@ -134,9 +198,20 @@ doItAall <- function(query, fileId, genome, cellMatrix) {
   # Expected partition plots
   tryCatch(
     expr = {
-      plotBoth("expected_partitions", plotExpectedPartitions(calcExpectedPartitionsRef(query, genome)))
-      plots = rbind(plots, getPlotReportDF("expected_partitions", "Expected distribution over genomic partitions"))
-      message("Successfully calculated and plot expected distribution over genomic partitions.")
+      if (!(genome %in% c("hg19", "hg38", "mm10")) && gtffile == "None"){
+        message("Ensembl annotation gtf file not provided. Skipping expected partition plot ... ")
+      } else{
+        if (genome %in% c("hg19", "hg38", "mm10")) {
+          plotBoth("expected_partitions", plotExpectedPartitions(calcExpectedPartitionsRef(query, genome)))
+        } else {
+          partitionList = myPartitionList(gtffile)
+          chromSizes = myChromSizes(genome)
+          genomeSize = sum(chromSizes)
+          plotBoth("expected_partitions", plotExpectedPartitions(calcExpectedPartitions(query, partitionList, genomeSize)))
+        }
+        plots = rbind(plots, getPlotReportDF("expected_partitions", "Expected distribution over genomic partitions"))
+        message("Successfully calculated and plot expected distribution over genomic partitions.")
+      }
     },
     error = function(e){
       message('Caught an error!')
@@ -147,9 +222,18 @@ doItAall <- function(query, fileId, genome, cellMatrix) {
   # Cumulative partition plots
   tryCatch(
     expr = {
-      plotBoth("cumulative_partitions", plotCumulativePartitions(calcCumulativePartitionsRef(query, genome)))
-      plots = rbind(plots, getPlotReportDF("cumulative_partitions", "Cumulative distribution over genomic partitions"))
-      message("Successfully calculated and plot cumulative distribution over genomic partitions.")
+      if (!(genome %in% c("hg19", "hg38", "mm10")) && gtffile == "None"){
+        message("Ensembl annotation gtf file not provided. Skipping cumulative partition plot ... ")
+      } else{
+        if (genome %in% c("hg19", "hg38", "mm10")) {
+          plotBoth("cumulative_partitions", plotCumulativePartitions(calcCumulativePartitionsRef(query, genome)))
+        } else{
+          partitionList = myPartitionList(gtffile)
+          plotBoth("cumulative_partitions", plotCumulativePartitions(calcCumulativePartitions(query, partitionList)))
+        }
+        plots = rbind(plots, getPlotReportDF("cumulative_partitions", "Cumulative distribution over genomic partitions"))
+        message("Successfully calculated and plot cumulative distribution over genomic partitions.")
+      }
     },
     error = function(e){
       message('Caught an error!')
@@ -233,12 +317,29 @@ bedPath = opt$bedfilePath
 outfolder = opt$outputFolder
 genome = opt$genome
 cellMatrix = opt$openSignalMatrix
-orgName = "Mmusculus"
+gtffile = opt$ensdb
+
 
 # build BSgenome package ID to check whether it's installed
-if (startsWith(genome, "hg") | startsWith(genome, "grch")) orgName = "Hsapiens"
+if (genome == "T2T"){
+  BSg = "BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0"
+} else {
+  if (startsWith(genome, "hg") | startsWith(genome, "grch")) {
+  orgName = "Hsapiens"
+  } else if (startsWith(genome, "mm") | startsWith(genome, "grcm")){
+  orgName = "Mmusculus"
+  } else if (startsWith(genome, "dm")){
+  orgName = "Dmelanogaster"
+  } else if (startsWith(genome, "ce")){
+  orgName = "Celegans"
+  } else if (startsWith(genome, "danRer")){
+  orgName = "Drerio"
+  }  else if (startsWith(genome, "TAIR")){
+    orgName = "Athaliana"
+  }
+  BSg = paste0("BSgenome.", orgName , ".UCSC.", genome)
+}
 
-BSg = paste0("BSgenome.", orgName , ".UCSC.", genome)
 BSgm = paste0(BSg, ".masked")
 
 # read bed file and run doitall()
diff --git a/pipeline/bedstat.py b/pipeline/bedstat.py
deleted file mode 100755
index 0c8cbe9..0000000
--- a/pipeline/bedstat.py
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/env python3
-"""
-bedfile statistics generating pipeline
-"""
-
-__author__ = [
-    "Michal Stolarczyk",
-    "Ognen Duzlevski",
-    "Jose Verdezoto",
-    "Bingjie Xue",
-]
-__email__ = "michal@virginia.edu"
-__version__ = "0.0.4-dev"
-
-from argparse import ArgumentParser
-from hashlib import md5
-import json
-import yaml
-import os
-import sys
-import warnings
-import tempfile
-import requests
-import gzip
-
-import pypiper
-import bbconf
-import time
-
-parser = ArgumentParser(
-    description="A pipeline to read a file in BED format and produce metadata "
-    "in JSON format."
-)
-
-parser.add_argument(
-    "--bedfile", help="a full path to bed file to process", required=True
-)
-parser.add_argument(
-    "--open-signal-matrix",
-    type=str,
-    required=False,
-    default=None,
-    help="a full path to the openSignalMatrix required for the tissue "
-    "specificity plots",
-)
-parser.add_argument(
-    "--bigbed",
-    type=str,
-    required=False,
-    default=None,
-    help="a full path to the bigbed files",
-)
-parser.add_argument(
-    "--bedbase-config",
-    dest="bedbase_config",
-    type=str,
-    default=None,
-    help="a path to the bedbase configuratiion file",
-)
-parser.add_argument(
-    "-y",
-    "--sample-yaml",
-    dest="sample_yaml",
-    type=str,
-    required=False,
-    help="a yaml config file with sample attributes to pass on more metadata "
-    "into the database",
-)
-exclusive_group = parser.add_mutually_exclusive_group()
-exclusive_group.add_argument(
-    "--no-db-commit",
-    action="store_true",
-    help="whether the JSON commit to the database should be skipped",
-)
-exclusive_group.add_argument(
-    "--just-db-commit",
-    action="store_true",
-    help="whether just to commit the JSON to the database",
-)
-parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "looper", "ngs"])
-
-args = parser.parse_args()
-
-
-def hash_bedfile(filepath):
-    """generate digest for bedfile"""
-    with gzip.open(filepath, "rb") as f:
-        # concate column values
-        chrs = ",".join([row.split()[0].decode("utf-8") for row in f])
-        starts = ",".join([row.split()[1].decode("utf-8") for row in f])
-        ends = ",".join([row.split()[2].decode("utf-8") for row in f])
-        # hash column values
-        chr_digest = md5(chrs.encode("utf-8")).hexdigest()
-        start_digest = md5(starts.encode("utf-8")).hexdigest()
-        end_digest = md5(ends.encode("utf-8")).hexdigest()
-        # hash column digests
-        bed_digest = md5(
-            ",".join([chr_digest, start_digest, end_digest]).encode("utf-8")
-        ).hexdigest()
-
-        return bed_digest
-
-
-def convert_unit(size_in_bytes):
-    """Convert the size from bytes to other units like KB, MB or GB"""
-    if size_in_bytes < 1024:
-        return str(size_in_bytes) + "bytes"
-    elif size_in_bytes in range(1024, 1024 * 1024):
-        return str(round(size_in_bytes / 1024, 2)) + "KB"
-    elif size_in_bytes in range(1024 * 1024, 1024 * 1024 * 1024):
-        return str(round(size_in_bytes / (1024 * 1024))) + "MB"
-    elif size_in_bytes >= 1024 * 1024 * 1024:
-        return str(round(size_in_bytes / (1024 * 1024 * 1024))) + "GB"
-
-
-def get_file_size(file_name):
-    """Get file in size in given unit like KB, MB or GB"""
-    size = os.path.getsize(file_name)
-    return convert_unit(size)
-
-
-bbc = bbconf.BedBaseConf(config_path=args.bedbase_config, database_only=True)
-bedstat_output_path = bbc.get_bedstat_output_path()
-
-bed_digest = md5(open(args.bedfile, "rb").read()).hexdigest()
-# bed_digest = hash_bedfile(args.bedfile)
-bedfile_name = os.path.split(args.bedfile)[1]
-# need to split twice since there are 2 exts
-fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0]
-outfolder = os.path.abspath(os.path.join(bedstat_output_path, bed_digest))
-json_file_path = os.path.abspath(os.path.join(outfolder, fileid + ".json"))
-json_plots_file_path = os.path.abspath(os.path.join(outfolder, fileid + "_plots.json"))
-bed_relpath = os.path.relpath(
-    args.bedfile,
-    os.path.abspath(os.path.join(bedstat_output_path, os.pardir, os.pardir)),
-)
-bigbed_relpath = os.path.relpath(
-    os.path.join(args.bigbed, fileid + ".bigBed"),
-    os.path.abspath(os.path.join(bedstat_output_path, os.pardir, os.pardir)),
-)
-
-
-def main():
-    if not args.just_db_commit:
-        pm = pypiper.PipelineManager(
-            name="bedstat-pipeline", outfolder=outfolder, args=args
-        )
-
-        # run Rscript
-        rscript_path = os.path.join(
-            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
-            "tools",
-            "regionstat.R",
-        )
-        assert os.path.exists(rscript_path), FileNotFoundError(
-            f"'{rscript_path}' script not found"
-        )
-        command = (
-            f"Rscript {rscript_path} --bedfilePath={args.bedfile} "
-            f"--fileId={fileid} --openSignalMatrix={args.open_signal_matrix} "
-            f"--outputFolder={outfolder} --genome={args.genome_assembly} "
-            f"--digest={bed_digest}"
-        )
-        print(command)
-        pm.run(cmd=command, target=json_file_path)
-        pm.stop_pipeline()
-
-    # now get the resulting json file and load it into Elasticsearch
-    # if the file exists, of course
-    if not args.no_db_commit:
-        data = {}
-        if os.path.exists(json_file_path):
-            with open(json_file_path, "r", encoding="utf-8") as f:
-                data = json.loads(f.read())
-        if os.path.exists(json_plots_file_path):
-            with open(json_plots_file_path, "r", encoding="utf-8") as f_plots:
-                plots = json.loads(f_plots.read())
-        if args.sample_yaml:
-            # get the sample-specific metadata from the sample yaml representation
-            other = {}
-            if os.path.exists(args.sample_yaml):
-                y = yaml.safe_load(open(args.sample_yaml, "r"))
-                data.update({"other": y})
-        # unlist the data, since the output of regionstat.R is a dict of lists of
-        # length 1 and force keys to lower to correspond with the
-        # postgres column identifiers
-        data = {k.lower(): v[0] if isinstance(v, list) else v for k, v in data.items()}
-        data.update(
-            {
-                "bedfile": {
-                    "path": bed_relpath,
-                    "size": get_file_size(args.bedfile),
-                    "title": "Path to the BED file",
-                }
-            }
-        )
-
-        if os.path.exists(
-            os.path.join(args.bigbed, fileid + ".bigBed")
-        ) and not os.path.islink(os.path.join(args.bigbed, fileid + ".bigBed")):
-            digest = requests.get(
-                f"http://refgenomes.databio.org/genomes/genome_digest/{args.genome_assembly}"
-            ).text.strip('""')
-
-            data.update(
-                {
-                    "genome": {
-                        "alias": args.genome_assembly,
-                        "digest": digest,
-                    }
-                }
-            )
-            data.update(
-                {
-                    "bigbedfile": {
-                        "path": bigbed_relpath,
-                        "size": get_file_size(
-                            os.path.join(args.bigbed, fileid + ".bigBed")
-                        ),
-                        "title": "Path to the big BED file",
-                    }
-                }
-            )
-
-        else:
-            data.update(
-                {
-                    "genome": {
-                        "alias": args.genome_assembly,
-                        "digest": "",
-                    }
-                }
-            )
-
-        for plot in plots:
-            plot_id = plot["name"]
-            del plot["name"]
-            data.update({plot_id: plot})
-        bbc.bed.report(record_identifier=bed_digest, values=data)
-
-
-if __name__ == "__main__":
-    try:
-        sys.exit(main())
-    except KeyboardInterrupt:
-        print("Pipeline aborted.")
-        sys.exit(1)
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index ffc1db4..af4b488 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -1,3 +1,6 @@
 piper>=0.12.1
 looper>=1.2.1
-# bbconf>=0.1.0
\ No newline at end of file
+bbconf>=0.1.0
+requests>=2.27.0
+yacman>=0.8.4
+pipestat>=0.3.1
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
index 6c7c573..b28b04f 100644
--- a/requirements/requirements-dev.txt
+++ b/requirements/requirements-dev.txt
@@ -1,4 +1,3 @@
--e git+https://github.com/databio/yacman@dev#egg=yacman
--e git+https://github.com/pepkit/pipestat@dev#egg=pipestat
+
 
 
diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt
new file mode 100644
index 0000000..f77c910
--- /dev/null
+++ b/requirements/requirements-test.txt
@@ -0,0 +1 @@
+pytest>=7.0.0
diff --git a/schemas/pep_schema.yaml b/schemas/pep_schema.yaml
new file mode 100644
index 0000000..65bc588
--- /dev/null
+++ b/schemas/pep_schema.yaml
@@ -0,0 +1,79 @@
+description: bedstat PEP schema
+
+properties:
+  samples:
+    type: array
+    items:
+      type: object
+      properties:
+        sample_name: 
+          type: string
+          db_commit: TRUE
+          description: "name of the sample, which is the name of the output BED file"
+        input_file_path:
+          type: string
+          db_commit: FALSE
+          description: "absolute path the file to convert"
+        output_file_path:
+          type: string
+          db_commit: FALSE
+          description: "absolute path the file to the output BED file (derived attribute)"
+        bigbed:
+          type: string
+          db_commit: FALSE
+          description: "dir path where the bigbed file stored (derived attribute)"
+        genome:
+          type: string
+          db_commit: TRUE
+          description: "organism genome code"
+        narrowpeak:
+          type: boolean
+          db_commit: TRUE
+          description: "binary number indicating whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
+        format:
+          type: string
+          db_commit: TRUE
+          description: "file format"
+          enum: ["bigWig", "bigBed", "bed", "wig", "bedGraph"]
+        cell_type:
+          type: string
+          db_commit: TRUE
+          description: "cell type code"
+        antibody:
+          type: string
+          db_commit: TRUE
+          description: "antibody used if ChIP-seq experiment"
+        description:
+          type: string
+          db_commit: TRUE
+          description: "freeform description of the sample"
+        exp_protocol:
+          type: string
+          db_commit: TRUE
+          description: "type of the experiment the file was generated in"
+        data_source:
+          type: string
+          db_commit: TRUE
+          description: "source of the sample, preferably a GSE* code"
+        treatment:
+          type: string
+          db_commit: TRUE
+          description: "freeform description of the sample treatment"
+        ensdb:
+          type: string
+          db_commit: FALSE
+          description: "path of gtf annotation for genomes not in GDdata"
+        fasta:
+          type: string
+          db_commit: FALSE
+          description: "path of for genomes not in GDdata"
+        open_signal_matrix:
+          type: string
+          db_commit: FALSE
+          description: "path of for the open signal matrixm file for the given genome"
+      required:
+        - output_file_path
+        - genome
+        - sample_name
+required:
+  - samples
\ No newline at end of file
diff --git a/pipeline_interface.yaml b/schemas/pipeline_interface.yaml
similarity index 67%
rename from pipeline_interface.yaml
rename to schemas/pipeline_interface.yaml
index fdbcbc9..0dd5915 100644
--- a/pipeline_interface.yaml
+++ b/schemas/pipeline_interface.yaml
@@ -2,7 +2,8 @@ pipeline_name: BEDSTAT
 pipeline_type: sample
 var_templates:
   path: "{looper.piface_dir}/pipeline/bedstat.py"
-input_schema: http://schema.databio.org/pipelines/bedstat.yaml
+#input_schema: http://schema.databio.org/pipelines/bedstat.yaml
+input_schema: pep_schema.yaml
 pre_submit:
   python_functions:
     - looper.write_sample_yaml
@@ -11,6 +12,9 @@ command_template: >
   --bedfile {sample.output_file_path}
   --genome {sample.genome}
   --sample-yaml {sample.yaml_file}
+  --schema "{looper.piface_dir}/{pipeline.input_schema}"
   {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}
   {% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %}
+  {% if sample.ensdb is defined %} --ensdb {sample.ensdb} {% endif %}
+  {% if sample.fasta is defined %} --fasta {sample.fasta} {% endif %}
   {% if sample.bigbed is defined %} --bigbed {sample.bigbed} {% endif %}
diff --git a/pipeline_interface_just_db_commit.yaml b/schemas/pipeline_interface_just_db_commit.yaml
similarity index 68%
rename from pipeline_interface_just_db_commit.yaml
rename to schemas/pipeline_interface_just_db_commit.yaml
index 919f3a4..510cde5 100644
--- a/pipeline_interface_just_db_commit.yaml
+++ b/schemas/pipeline_interface_just_db_commit.yaml
@@ -2,7 +2,8 @@ pipeline_name: BEDSTAT
 pipeline_type: sample
 var_templates:
   path: "{looper.piface_dir}/pipeline/bedstat.py"
-input_schema: http://schema.databio.org/pipelines/bedstat.yaml
+#input_schema: http://schema.databio.org/pipelines/bedstat.yaml
+input_schema: pep_schema.yaml
 pre_submit:
   python_functions:
     - looper.write_sample_yaml
@@ -11,7 +12,10 @@ command_template: >
   --bedfile {sample.output_file_path}
   --genome {sample.genome}
   --sample-yaml {sample.yaml_file}
+  --schema "{looper.piface_dir}/{pipeline.input_schema}"
   --just-db-commit
   {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}
   {% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %}
+  {% if sample.ensdb is defined %} --ensdb {sample.ensdb} {% endif %}
+  {% if sample.fasta is defined %} --fasta {sample.fasta} {% endif %}
   {% if sample.bigbed is defined %} --bigbed {sample.bigbed} {% endif %}
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ebdc80f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,71 @@
+#! /usr/bin/env python
+
+import os
+from setuptools import setup
+import sys
+
+PACKAGE = "bedstat"
+REQDIR = "requirements"
+
+# Additional keyword arguments for setup().
+extra = {}
+
+# Ordinary dependencies
+
+
+def read_reqs(reqs_name):
+    deps = []
+    with open(os.path.join(REQDIR, "requirements-{}.txt".format(reqs_name)), "r") as f:
+        for l in f:
+            if not l.strip():
+                continue
+            deps.append(l)
+    return deps
+
+
+DEPENDENCIES = read_reqs("all")
+extra["install_requires"] = DEPENDENCIES
+
+scripts = None
+
+with open("{}/_version.py".format(PACKAGE), "r") as versionfile:
+    version = versionfile.readline().split()[-1].strip("\"'\n")
+
+with open("README.md") as f:
+    long_description = f.read()
+
+setup(
+    name=PACKAGE,
+    packages=[PACKAGE],
+    version=version,
+    description="Python Package to convert various genome region files to bed file.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    classifiers=[
+        "Development Status :: 3 - Alpha",
+        "License :: OSI Approved :: BSD License",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Topic :: Scientific/Engineering :: Bio-Informatics",
+    ],
+    keywords="pipeline, bioinformatics, workflow",
+    url="https://github.com/databio/{}/".format(PACKAGE),
+    author="Oleksandr Khoroshevskyi, Bingjie Xue, Michal Stolarczyk, Ognen Duzlevski,"
+    "Jose Verdezoto, ",
+    license="BSD2",
+    entry_points={
+        "console_scripts": [
+            "bedstat = bedstat.bedstat:main",
+        ],
+    },
+    package_data={PACKAGE: ["templates/*"]},
+    scripts=scripts,
+    include_package_data=True,
+    test_suite="tests",
+    tests_require=read_reqs("dev"),
+    setup_requires=(
+        ["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else []
+    ),
+    **extra
+)
diff --git a/tests/config_db_github.yaml b/tests/config_db_github.yaml
new file mode 100644
index 0000000..2231d73
--- /dev/null
+++ b/tests/config_db_github.yaml
@@ -0,0 +1,16 @@
+path:
+  pipeline_output_path: $BEDBASE_DATA_PATH_HOST/
+  bedstat_dir: outputs/bedstat_output
+  bedbuncher_dir: outputs/bedbuncher_output
+  remote_url_base: null
+database:
+  name: postgres
+  user: postgres
+  password: bedbasepassword
+  host: localhost
+  port: 5432
+  dialect: postgresql
+  driver: psycopg2
+server:
+  host: 0.0.0.0
+  port: 8000
\ No newline at end of file
diff --git a/tests/config_db_local.yaml b/tests/config_db_local.yaml
new file mode 100644
index 0000000..35e3334
--- /dev/null
+++ b/tests/config_db_local.yaml
@@ -0,0 +1,16 @@
+path:
+  pipeline_output_path: ./
+  bedstat_dir: outputs/bedstat_output
+  bedbuncher_dir: outputs/bedbuncher_output
+  remote_url_base: null
+database:
+  host: localhost
+  port: 5432
+  password: bedbasepassword
+  user: postgres
+  name: postgres
+  dialect: postgresql
+  driver: psycopg2
+server:
+  host: 0.0.0.0
+  port: 80pip
\ No newline at end of file
diff --git a/tests/data/bedstat_annotation_sheet.csv b/tests/data/bedstat_annotation_sheet.csv
deleted file mode 100644
index 8ea1c8d..0000000
--- a/tests/data/bedstat_annotation_sheet.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-sample_name,file_name,genome,exp_protocol,cell_type,tissue,antibody,treatment,data_source,GSE,GSM,description,format
-bedbase_demo1,test_hg38.bed.gz,hg38,ChiPseq,GM12878,NA,IKZF1,None,GEO,GSE105587,,IKZF1 ChIP-seq on human GM12878,bed
\ No newline at end of file
diff --git a/tests/data/bedstat_config.yaml b/tests/data/bedstat_config.yaml
deleted file mode 100644
index 78a2a84..0000000
--- a/tests/data/bedstat_config.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-pep_version: 2.0.0
-sample_table: bedstat_annotation_sheet.csv
-
-looper:
-  output_dir: $GITHUB_WORKSPACE/outputs/bedstat_output
-
-sample_modifiers:
-  append:
-    bedbase_config: $GITHUB_WORKSPACE/tests/data/config_min.yaml
-    pipeline_interfaces: $GITHUB_WORKSPACE/pipeline_interface.yaml
-    output_file_path: OUTPUT
-    yaml_file: SAMPLE_YAML
-  derive:
-    attributes: [output_file_path, yaml_file]
-    sources:
-      OUTPUT: "$GITHUB_WORKSPACE/tests/data/{file_name}"
-      SAMPLE_YAML: "$GITHUB_WORKSPACE/outputs/bedstat_output/submission/{sample_name}.yaml"
\ No newline at end of file
diff --git a/tests/data/config_min.yaml b/tests/data/config_min.yaml
deleted file mode 100644
index e55baf4..0000000
--- a/tests/data/config_min.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-database:
-  name: pipestat-test
-  user: postgres
-  password: pipestat-password
-  host: localhost
-  port: 5432
-path:
-  pipeline_output_path: $BEDBASE_DATA_PATH/outputs
-  bedstat_dir: bedstat_output
-  bedbuncher_dir: bedbuncher_output
-  remote_url_base: null
-server:
-  host: 0.0.0.0
-  port: 8000
\ No newline at end of file
diff --git a/tests/data/csv/hg38/test_data_csv/index.txt b/tests/data/csv/hg38/test_data_csv/index.txt
deleted file mode 100644
index 0d38fc3..0000000
--- a/tests/data/csv/hg38/test_data_csv/index.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-filename,cellType,treatment,antibody,description
-a.bed,a,None,None,first cell line
-b.bed,b,None,None,second cell line
-c.bed,c,EWS-FLI1,None,test
diff --git a/tests/data/csv/hg38/test_data_csv/regions/a.bed b/tests/data/csv/hg38/test_data_csv/regions/a.bed
deleted file mode 100644
index 2e65efe..0000000
--- a/tests/data/csv/hg38/test_data_csv/regions/a.bed
+++ /dev/null
@@ -1 +0,0 @@
-a
\ No newline at end of file
diff --git a/tests/data/csv/hg38/test_data_csv/regions/b.bed b/tests/data/csv/hg38/test_data_csv/regions/b.bed
deleted file mode 100644
index 63d8dbd..0000000
--- a/tests/data/csv/hg38/test_data_csv/regions/b.bed
+++ /dev/null
@@ -1 +0,0 @@
-b
\ No newline at end of file
diff --git a/tests/data/csv/hg38/test_data_csv/regions/c.bed b/tests/data/csv/hg38/test_data_csv/regions/c.bed
deleted file mode 100644
index 3410062..0000000
--- a/tests/data/csv/hg38/test_data_csv/regions/c.bed
+++ /dev/null
@@ -1 +0,0 @@
-c
\ No newline at end of file
diff --git a/tests/data/f1/AML_db358.bed.gz b/tests/data/f1/AML_db358.bed.gz
new file mode 100644
index 0000000..acf6882
Binary files /dev/null and b/tests/data/f1/AML_db358.bed.gz differ
diff --git a/tests/data/f1/AML_db358.bigBed b/tests/data/f1/AML_db358.bigBed
new file mode 100644
index 0000000..900f3a3
Binary files /dev/null and b/tests/data/f1/AML_db358.bigBed differ
diff --git a/tests/data/f1/AML_db358_sample.yaml b/tests/data/f1/AML_db358_sample.yaml
new file mode 100644
index 0000000..3e7e2a5
--- /dev/null
+++ b/tests/data/f1/AML_db358_sample.yaml
@@ -0,0 +1,19 @@
+sample_name: AML_db358
+file_name: AML_db358.bed.gz
+genome: hg19
+exp_protocol: ChiPseq
+cell_type: ' K562'
+tissue: Lung
+antibody: ZEB2
+treatment: None
+data_source: GEO
+GSE: 'GSE91663 '
+description: ZEB2 ChIP-seq on human K562 (ENCODE)
+format: bed
+old_name: GSE91663_ENCFF129WGK_optimal_idr_thresholded_peaks_hg19.bigBed
+bedbase_config: $CODE/bedbase/tutorial_files/bedbase_configuration_compose.yaml
+pipeline_interfaces: $CODE/bedstat/pipeline_interface.yaml
+output_file_path: /home/bnt4me/Virginia/bed_base_all/bedbase/bedbase_tutorial/bed_files/AML_db358.bed.gz
+yaml_file: /home/bnt4me/Virginia/bed_base_all/bedbase/bedbase_tutorial/outputs/bedstat_output/bedstat_pipeline_logs/submission/AML_db358_sample.yaml
+open_signal_matrix: /home/bnt4me/Virginia/bed_base_all/bedbase/bedbase_tutorial/openSignalMatrix_hg19_percentile99_01_quantNormalized_round4d.txt.gz
+bigbed: /home/bnt4me/Virginia/bed_base_all/bedbase/bedbase_tutorial/bigbed_files
diff --git a/tests/data/test_hg38.bed.gz b/tests/data/test_hg38.bed.gz
deleted file mode 100644
index 344da49..0000000
Binary files a/tests/data/test_hg38.bed.gz and /dev/null differ
diff --git a/tests/data/tsv/hg38/test_data_tsv/index.txt b/tests/data/tsv/hg38/test_data_tsv/index.txt
deleted file mode 100644
index c92a6ab..0000000
--- a/tests/data/tsv/hg38/test_data_tsv/index.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-filename	cellType	treatment  	description
-a.bed	a	None	first  cell line
-b.bed	b	None	second  cell line
-c.bed	c	EWS-FLI1	test
\ No newline at end of file
diff --git a/tests/data/tsv/hg38/test_data_tsv/regions/a.bed b/tests/data/tsv/hg38/test_data_tsv/regions/a.bed
deleted file mode 100644
index 2e65efe..0000000
--- a/tests/data/tsv/hg38/test_data_tsv/regions/a.bed
+++ /dev/null
@@ -1 +0,0 @@
-a
\ No newline at end of file
diff --git a/tests/data/tsv/hg38/test_data_tsv/regions/b.bed b/tests/data/tsv/hg38/test_data_tsv/regions/b.bed
deleted file mode 100644
index 63d8dbd..0000000
--- a/tests/data/tsv/hg38/test_data_tsv/regions/b.bed
+++ /dev/null
@@ -1 +0,0 @@
-b
\ No newline at end of file
diff --git a/tests/data/tsv/hg38/test_data_tsv/regions/c.bed b/tests/data/tsv/hg38/test_data_tsv/regions/c.bed
deleted file mode 100644
index 3410062..0000000
--- a/tests/data/tsv/hg38/test_data_tsv/regions/c.bed
+++ /dev/null
@@ -1 +0,0 @@
-c
\ No newline at end of file
diff --git a/tests/test_bedstat.py b/tests/test_bedstat.py
new file mode 100644
index 0000000..681558f
--- /dev/null
+++ b/tests/test_bedstat.py
@@ -0,0 +1,31 @@
+import bedstat
+import os
+import pytest
+import bbconf
+
+
+file_path = "./tests"
+# file_path = "./"
+CONFIG_FILE = f"{file_path}/config_db_github.yaml"
+# CONFIG_FILE = f"{file_path}/config_db_local.yaml"
+
+
+class TestBedstat:
+    def test_uploading(self):
+        bedstat.run_bedstat(
+            bedfile=f"{file_path}/data/f1/AML_db358.bed.gz",
+            sample_yaml=f"{file_path}/data/f1/AML_db358_sample.yaml",
+            bedbase_config=CONFIG_FILE,
+            bigbed=f"{file_path}/data/f1/",
+            just_db_commit=True,
+            genome_assembly="hg19",
+        )
+
+    def test_db_content(self):
+        bbc = bbconf.BedBaseConf(config_path=CONFIG_FILE, database_only=True)
+        assert bbc.bed.record_count == 1
+
+    def test_db_record(self):
+        bbc = bbconf.BedBaseConf(config_path=CONFIG_FILE, database_only=True)
+        geonome_check = bbc.bed.select(columns=["other"])[0][0]["genome"]
+        assert geonome_check == "hg19"
diff --git a/tests/test_full.py b/tests/test_full.py
new file mode 100644
index 0000000..8a64fa3
--- /dev/null
+++ b/tests/test_full.py
@@ -0,0 +1,30 @@
+import bedstat
+import os
+import pytest
+import bbconf
+
+
+file_path = "./tests"
+# file_path = "./"
+# CONFIG_FILE = f"{file_path}/config_db_github.yaml"
+CONFIG_FILE = f"{file_path}/config_db_local.yaml"
+SAMPLE_YAML = f"{file_path}/data/f1/AML_db358_sample_new.yaml"
+
+# has to be change manually
+OPEN_MATRIX19 = "/home/bnt4me/Virginia/bed_base_all/bedbase/bedbase_tutorial/openSignalMatrix_hg19_percentile99_01_quantNormalized_round4d.txt.gz"
+
+
+@pytest.mark.skipif(True, reason="This test can be run only locally")
+class TestBedstatProcessing:
+    def test_processing(self):
+        bedstat.run_bedstat(
+            bedfile=f"{file_path}/data/f1/AML_db358.bed.gz",
+            sample_yaml=SAMPLE_YAML,
+            bedbase_config=CONFIG_FILE,
+            bigbed=f"{file_path}/data/f1/",
+            just_db_commit=False,
+            genome_assembly="hg19",
+            open_signal_matrix=OPEN_MATRIX19,
+        )
+
+    # We should add some additionall tests here
diff --git a/tests/test_process_LOLA.py b/tests/test_process_LOLA.py
deleted file mode 100644
index 5497db0..0000000
--- a/tests/test_process_LOLA.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from scripts.process_LOLA import process_index_file
-
-
-def test_process_index_file(capsys, tsv_base_dir, csv_base_dir):
-    """ Check if process_index_file produces the same results for TSV and CSV files """
-    process_index_file(csv_base_dir, "hg38")
-    out1, _ = capsys.readouterr()
-    process_index_file(tsv_base_dir, "hg38")
-    out2, _ = capsys.readouterr()
-    # number of characters in each captured output should be exactly the same
-    assert len(out1) == len(out2)