diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 0000000..f58e4c6
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,11 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+      - uses: psf/black@stable
diff --git a/.github/workflows/test-pipeline.yml b/.github/workflows/test-pipeline.yml
new file mode 100644
index 0000000..f65c756
--- /dev/null
+++ b/.github/workflows/test-pipeline.yml
@@ -0,0 +1,70 @@
+name: Test bedstat pipeline
+
+on:
+  push:
+    branches: [master, dev]
+  pull_request:
+    branches: [master, dev]
+
+jobs:
+  pytest:
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest] # can't use macOS when using service containers or container jobs
+        r: [release]
+    runs-on: ${{ matrix.os }}
+    services:
+        postgres:
+          image: postgres
+          env:
+            POSTGRES_USER: postgres
+            POSTGRES_PASSWORD: bedbasepassword
+            POSTGRES_DB: postgres
+          ports:
+          - 5432:5432
+          options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependancies
+      run: if [ -f requirements/requirements-all.txt ]; then pip install -r requirements/requirements-all.txt; fi
+
+    - name: Install dev dependancies
+      run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi
+
+    - name: Install test dependancies
+      run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
+
+    - name: Install libcurl
+      run: |
+        sudo apt-get update
+        sudo apt-get install libcurl4-openssl-dev
+
+    - uses: r-lib/actions/setup-r@master
+      with:
+        r-version: ${{ matrix.r }}
+
+    - name: Install R dependancies
+      run: Rscript scripts/installRdeps.R
+
+    - name: Run pipeline
+      run: looper run -p local tests/data/bedstat_config.yaml
+
+    - name: Test plots exist
+      run: |
+        if ls $GITHUB_WORKSPACE/outputs/bedstat_output/a6a08126cb6f4b1953ba0ec8675df85a/test_hg38*.png 1> /dev/null 2>&1; then
+          echo "SUCCESS";
+        else
+          echo "ERROR: files do not exist: $GITHUB_WORKSPACE/outputs/bedstat_output/a6a08126cb6f4b1953ba0ec8675df85a/test_hg38*.png";
+          exit 1
+        fi
+
+    - name: Test record in PostgreSQL
+      run: |
+        echo "from bbconf import BedBaseConf; from bbconf.const import *; bbc = BedBaseConf('$GITHUB_WORKSPACE/tests/data/config_min.yaml'); assert bbc.bed.record_count == 1, 'Number of records in the bedfiles table not equal 1'" | python3 -
diff --git a/README.md b/README.md
index 3f2bace..d821232 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+![Test bedstat pipeline](https://github.com/databio/bedstat/workflows/Test%20bedstat%20pipeline/badge.svg)
 # bedstat
 pipeline for obtaining statistics about bed files
 
@@ -35,60 +36,37 @@ The input PEP can be validated against the [JSON schema in this repository](pep_
 eido validate <path/to/pep> -s https://schema.databio.org/pipelines/bedstat.yaml
 ```
 
-### 2. Create a persistent volume to house elasticsearch data
+### 2. Run PostgreSQL
 
-```
-docker volume create es-data
-```
-
-### 3. Run the docker container for elasticsearch
+For example, to run an instance in a container and make the data persist, execute:
 
 ```
-docker run -p 9200:9200 -p 9300:9300 -v es-data:/usr/share/elasticsearch/data -e "xpack.ml.enabled=false" \
-  -e "discovery.type=single-node" elasticsearch:7.5.1
+docker volume create postgres-data
+docker run -d --name bedbase-postgres -p 5432:5432 -e POSTGRES_PASSWORD=bedbasepassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres -v postgres-data:/var/lib/postgresql/data postgres
 ```
+Provided environment variables need to match the settings in bedbase configuration file
 
-### 4. Run the bedstat pipeline on the PEP
+### 3. Run the bedstat pipeline on the PEP
 
-Then simply run the looper command to run the pipeline for each bed file. It will create a set of plots and statistics per bed file and insert the metadata into Elastic:
+Then simply run the `looper run` command to run the pipeline for each bed file. It will create a set of plots and statistics per bed file and insert the metadata into PostgreSQL:
 
 ```
 looper run project/bedstat_config.yaml
 ```
 
-The data loaded into elasticsearch should persist between elasticsearch invocations, on the es-data docker volume created above in step 2.
-
-### 5. (optional) Run Kibana
-
-Kibana can be used in order to see ElasticSearch data in a "GUI" kind of a way.
-
-Pull a matching Kibana docker image. Make sure the Elasticsearch and Kibana container tags match:
-```
-docker pull docker.elastic.co/kibana/kibana:7.5.1
-```
-
-Get the ID of the docker container (started above) running ElasticSearch via 
-```
-docker ps | grep elasticsearch
-```
-
-Run Kibana to link to that container:
-```
-docker run --link <ID OF ELASTIC CONTAINER HERE>:elasticsearch -p 5601:5601  docker.elastic.co/kibana/kibana:7.5.1
-```
-
-Point your local web browser to http://localhost:5601
-
----
+The data loaded into PostgreSQL should persist between PostgreSQL invocations, on the `postgres-data` docker volume created above in step 2.
 
 ## Additional dependencies
 
 [regionstat.R](tools/regionstat.R) script is used to calculate the bed file statistics, so the pipeline also depends on several R packages:
 
+* `R.utils`
 * `BiocManager`
 * `optparse`
 * `devtools`
 * `GenomicRanges`
+* `GenomicFeatures`
+* `ensembldb`
 * `GenomicDistributions`
 * `BSgenome.<organim>.UCSC.<genome>` *depending on the genome used* 
 * `LOLA`
diff --git a/pep_schema.yaml b/pep_schema.yaml
index 02ce4b7..da49b90 100644
--- a/pep_schema.yaml
+++ b/pep_schema.yaml
@@ -18,7 +18,6 @@ properties:
         genome:
           type: string
           description: "organism genome code"
-          enum: ["hg18", "hg19", "hg38", "mm9", "mm10"]
         narrowpeak:
           type: integer
           minimum: 0
diff --git a/pipeline/bedstat.py b/pipeline/bedstat.py
index de81999..0c8cbe9 100755
--- a/pipeline/bedstat.py
+++ b/pipeline/bedstat.py
@@ -3,105 +3,245 @@
 bedfile statistics generating pipeline
 """
 
-__author__ = ["Michal Stolarczyk", "Ognen Duzlevski", "Jose Verdezoto"]
+__author__ = [
+    "Michal Stolarczyk",
+    "Ognen Duzlevski",
+    "Jose Verdezoto",
+    "Bingjie Xue",
+]
 __email__ = "michal@virginia.edu"
-__version__ = "0.0.1"
+__version__ = "0.0.4-dev"
 
 from argparse import ArgumentParser
 from hashlib import md5
 import json
 import yaml
 import os
+import sys
 import warnings
+import tempfile
+import requests
+import gzip
 
-from bbconf.const import *
 import pypiper
 import bbconf
+import time
 
 parser = ArgumentParser(
     description="A pipeline to read a file in BED format and produce metadata "
-                "in JSON format.")
+    "in JSON format."
+)
 
 parser.add_argument(
-    '--bedfile', help='a full path to bed file to process', required=True)
+    "--bedfile", help="a full path to bed file to process", required=True
+)
 parser.add_argument(
-    '--open-signal-matrix', type=str, required=False, default=None,
-    help='a full path to the openSignalMatrix required for the tissue '
-         'specificity plots')
+    "--open-signal-matrix",
+    type=str,
+    required=False,
+    default=None,
+    help="a full path to the openSignalMatrix required for the tissue "
+    "specificity plots",
+)
 parser.add_argument(
-    "--bedbase-config", dest="bedbase_config", type=str, default=None,
-    help="a path to the bedbase configuratiion file")
+    "--bigbed",
+    type=str,
+    required=False,
+    default=None,
+    help="a full path to the bigbed files",
+)
 parser.add_argument(
-    "-y", "--sample-yaml", dest="sample_yaml", type=str, required=False,
+    "--bedbase-config",
+    dest="bedbase_config",
+    type=str,
+    default=None,
+    help="a path to the bedbase configuratiion file",
+)
+parser.add_argument(
+    "-y",
+    "--sample-yaml",
+    dest="sample_yaml",
+    type=str,
+    required=False,
     help="a yaml config file with sample attributes to pass on more metadata "
-         "into the database")
+    "into the database",
+)
 exclusive_group = parser.add_mutually_exclusive_group()
 exclusive_group.add_argument(
-    '--no-db-commit', action='store_true',
-    help='whether the JSON commit to the database should be skipped')
+    "--no-db-commit",
+    action="store_true",
+    help="whether the JSON commit to the database should be skipped",
+)
 exclusive_group.add_argument(
-    '--just-db-commit', action='store_true',
-    help='whether just to commit the JSON to the database')
-parser = pypiper.add_pypiper_args(parser,
-                                  groups=["pypiper", "common", "looper", "ngs"])
+    "--just-db-commit",
+    action="store_true",
+    help="whether just to commit the JSON to the database",
+)
+parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "looper", "ngs"])
 
 args = parser.parse_args()
 
-bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config))
 
-bed_digest = md5(open(args.bedfile, 'rb').read()).hexdigest()
+def hash_bedfile(filepath):
+    """generate digest for bedfile"""
+    with gzip.open(filepath, "rb") as f:
+        # concate column values
+        chrs = ",".join([row.split()[0].decode("utf-8") for row in f])
+        starts = ",".join([row.split()[1].decode("utf-8") for row in f])
+        ends = ",".join([row.split()[2].decode("utf-8") for row in f])
+        # hash column values
+        chr_digest = md5(chrs.encode("utf-8")).hexdigest()
+        start_digest = md5(starts.encode("utf-8")).hexdigest()
+        end_digest = md5(ends.encode("utf-8")).hexdigest()
+        # hash column digests
+        bed_digest = md5(
+            ",".join([chr_digest, start_digest, end_digest]).encode("utf-8")
+        ).hexdigest()
+
+        return bed_digest
+
+
+def convert_unit(size_in_bytes):
+    """Convert the size from bytes to other units like KB, MB or GB"""
+    if size_in_bytes < 1024:
+        return str(size_in_bytes) + "bytes"
+    elif size_in_bytes in range(1024, 1024 * 1024):
+        return str(round(size_in_bytes / 1024, 2)) + "KB"
+    elif size_in_bytes in range(1024 * 1024, 1024 * 1024 * 1024):
+        return str(round(size_in_bytes / (1024 * 1024))) + "MB"
+    elif size_in_bytes >= 1024 * 1024 * 1024:
+        return str(round(size_in_bytes / (1024 * 1024 * 1024))) + "GB"
+
+
+def get_file_size(file_name):
+    """Get file in size in given unit like KB, MB or GB"""
+    size = os.path.getsize(file_name)
+    return convert_unit(size)
+
+
+bbc = bbconf.BedBaseConf(config_path=args.bedbase_config, database_only=True)
+bedstat_output_path = bbc.get_bedstat_output_path()
+
+bed_digest = md5(open(args.bedfile, "rb").read()).hexdigest()
+# bed_digest = hash_bedfile(args.bedfile)
 bedfile_name = os.path.split(args.bedfile)[1]
 # need to split twice since there are 2 exts
 fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0]
-outfolder = os.path.abspath(os.path.join(
-    bbc[CFG_PATH_KEY][CFG_BEDSTAT_OUTPUT_KEY], bed_digest))
+outfolder = os.path.abspath(os.path.join(bedstat_output_path, bed_digest))
 json_file_path = os.path.abspath(os.path.join(outfolder, fileid + ".json"))
+json_plots_file_path = os.path.abspath(os.path.join(outfolder, fileid + "_plots.json"))
+bed_relpath = os.path.relpath(
+    args.bedfile,
+    os.path.abspath(os.path.join(bedstat_output_path, os.pardir, os.pardir)),
+)
+bigbed_relpath = os.path.relpath(
+    os.path.join(args.bigbed, fileid + ".bigBed"),
+    os.path.abspath(os.path.join(bedstat_output_path, os.pardir, os.pardir)),
+)
+
+
+def main():
+    if not args.just_db_commit:
+        pm = pypiper.PipelineManager(
+            name="bedstat-pipeline", outfolder=outfolder, args=args
+        )
+
+        # run Rscript
+        rscript_path = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+            "tools",
+            "regionstat.R",
+        )
+        assert os.path.exists(rscript_path), FileNotFoundError(
+            f"'{rscript_path}' script not found"
+        )
+        command = (
+            f"Rscript {rscript_path} --bedfilePath={args.bedfile} "
+            f"--fileId={fileid} --openSignalMatrix={args.open_signal_matrix} "
+            f"--outputFolder={outfolder} --genome={args.genome_assembly} "
+            f"--digest={bed_digest}"
+        )
+        print(command)
+        pm.run(cmd=command, target=json_file_path)
+        pm.stop_pipeline()
+
+    # now get the resulting json file and load it into Elasticsearch
+    # if the file exists, of course
+    if not args.no_db_commit:
+        data = {}
+        if os.path.exists(json_file_path):
+            with open(json_file_path, "r", encoding="utf-8") as f:
+                data = json.loads(f.read())
+        if os.path.exists(json_plots_file_path):
+            with open(json_plots_file_path, "r", encoding="utf-8") as f_plots:
+                plots = json.loads(f_plots.read())
+        if args.sample_yaml:
+            # get the sample-specific metadata from the sample yaml representation
+            other = {}
+            if os.path.exists(args.sample_yaml):
+                y = yaml.safe_load(open(args.sample_yaml, "r"))
+                data.update({"other": y})
+        # unlist the data, since the output of regionstat.R is a dict of lists of
+        # length 1 and force keys to lower to correspond with the
+        # postgres column identifiers
+        data = {k.lower(): v[0] if isinstance(v, list) else v for k, v in data.items()}
+        data.update(
+            {
+                "bedfile": {
+                    "path": bed_relpath,
+                    "size": get_file_size(args.bedfile),
+                    "title": "Path to the BED file",
+                }
+            }
+        )
+
+        if os.path.exists(
+            os.path.join(args.bigbed, fileid + ".bigBed")
+        ) and not os.path.islink(os.path.join(args.bigbed, fileid + ".bigBed")):
+            digest = requests.get(
+                f"http://refgenomes.databio.org/genomes/genome_digest/{args.genome_assembly}"
+            ).text.strip('""')
+
+            data.update(
+                {
+                    "genome": {
+                        "alias": args.genome_assembly,
+                        "digest": digest,
+                    }
+                }
+            )
+            data.update(
+                {
+                    "bigbedfile": {
+                        "path": bigbed_relpath,
+                        "size": get_file_size(
+                            os.path.join(args.bigbed, fileid + ".bigBed")
+                        ),
+                        "title": "Path to the big BED file",
+                    }
+                }
+            )
 
-if not args.just_db_commit:
-    pm = pypiper.PipelineManager(name="bedstat-pipeline", outfolder=outfolder,
-                                 args=args)
-    rscript_path = os.path.join(os.path.dirname(
-        os.path.dirname(os.path.abspath(__file__))), "tools", "regionstat.R")
-    assert os.path.exists(rscript_path), \
-        FileNotFoundError("'{}' script not found".format(rscript_path))
-    cmd_vars = dict(rscript=rscript_path, bed=args.bedfile, id=fileid,
-                    matrix=args.open_signal_matrix, out=outfolder,
-                    genome=args.genome_assembly, digest=bed_digest)
-    command = "Rscript {rscript} --bedfile={bed} --fileId={id} " \
-              "--openSignalMatrix={matrix} --outputfolder={out} " \
-              "--genome={genome} --digest={digest}".format(**cmd_vars)
-    pm.run(cmd=command, target=json_file_path)
-    pm.stop_pipeline()
-
-# now get the resulting json file and load it into Elasticsearch
-# it the file exists, of course
-if not args.no_db_commit:
-    # open connection to elastic
-    bbc.establish_elasticsearch_connection()
-    data = {}
-    with open(json_file_path, 'r', encoding='utf-8') as f:
-        data = json.loads(f.read())
-    if args.sample_yaml:
-        # get the sample line from the yaml config file
-        if os.path.exists(args.sample_yaml):
-            y = yaml.safe_load(open(args.sample_yaml, "r"))
-            for key in JSON_METADATA_VALUES:
-                try:
-                    data[key] = [y[key]]
-                except KeyError:
-                    print("'{}' metadata not available".format(key))
         else:
-            warnings.warn("Specified sample_yaml path does not exist: {}".
-                          format(args.sample_yaml))
-    # enrich the data from R with the data from the sample line itself
-    # the bedfile_path below needs to be overwritten in Elastic in case the
-    # pipeline run was split into two computing environments. Currently used
-    # for the development. This concept leverages the potability introduced by
-    # environment variable in the bedfile path in the PEP. Locally the
-    # environment variable points to a different path than on the compute
-    # cluster where the heavy calculations happen.
-    data[BEDFILE_PATH_KEY] = [args.bedfile]
-    print("Data: {}".format(data))
-    bbc.insert_bedfiles_data(data=data, doc_id=bed_digest)
+            data.update(
+                {
+                    "genome": {
+                        "alias": args.genome_assembly,
+                        "digest": "",
+                    }
+                }
+            )
+
+        for plot in plots:
+            plot_id = plot["name"]
+            del plot["name"]
+            data.update({plot_id: plot})
+        bbc.bed.report(record_identifier=bed_digest, values=data)
 
 
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        print("Pipeline aborted.")
+        sys.exit(1)
diff --git a/pipeline_interface.yaml b/pipeline_interface.yaml
index a94dfa0..fdbcbc9 100644
--- a/pipeline_interface.yaml
+++ b/pipeline_interface.yaml
@@ -1,15 +1,16 @@
-protocol_mapping:
-    bedstat: bedstat
-
-pipelines:
-  bedstat:
-    name: BEDSTAT
-    path: pipeline/bedstat.py
-    looper_args: True
-    arguments:
-      "--bedfile": output_file_path
-      "--genome": genome
-      "--sample-yaml": yaml_file
-    optional_arguments:
-       "--open-signal-matrix": open_signal_matrix
-
+pipeline_name: BEDSTAT
+pipeline_type: sample
+var_templates:
+  path: "{looper.piface_dir}/pipeline/bedstat.py"
+input_schema: http://schema.databio.org/pipelines/bedstat.yaml
+pre_submit:
+  python_functions:
+    - looper.write_sample_yaml
+command_template: >
+  {pipeline.var_templates.path}
+  --bedfile {sample.output_file_path}
+  --genome {sample.genome}
+  --sample-yaml {sample.yaml_file}
+  {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}
+  {% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %}
+  {% if sample.bigbed is defined %} --bigbed {sample.bigbed} {% endif %}
diff --git a/pipeline_interface_new.yaml b/pipeline_interface_just_db_commit.yaml
similarity index 63%
rename from pipeline_interface_new.yaml
rename to pipeline_interface_just_db_commit.yaml
index 9da5f31..919f3a4 100644
--- a/pipeline_interface_new.yaml
+++ b/pipeline_interface_just_db_commit.yaml
@@ -1,11 +1,17 @@
 pipeline_name: BEDSTAT
 pipeline_type: sample
-path: pipeline/bedstat.py
+var_templates:
+  path: "{looper.piface_dir}/pipeline/bedstat.py"
 input_schema: http://schema.databio.org/pipelines/bedstat.yaml
+pre_submit:
+  python_functions:
+    - looper.write_sample_yaml
 command_template: >
-  {pipeline.path}
+  {pipeline.var_templates.path}
   --bedfile {sample.output_file_path}
   --genome {sample.genome}
   --sample-yaml {sample.yaml_file}
+  --just-db-commit
   {% if sample.bedbase_config is defined %} --bedbase-config {sample.bedbase_config} {% endif %}
   {% if sample.open_signal_matrix is defined %} --open-signal-matrix {sample.open_signal_matrix} {% endif %}
+  {% if sample.bigbed is defined %} --bigbed {sample.bigbed} {% endif %}
diff --git a/project/bedstat_config.yaml b/project/bedstat_config.yaml
deleted file mode 100644
index 2cc802a..0000000
--- a/project/bedstat_config.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-metadata:
-    sample_annotation: ../lolacore.csv
-    output_dir: output
-    pipeline_interfaces: ../pipeline_interface.yaml
\ No newline at end of file
diff --git a/project/bedstat_config_new.yaml b/project/bedstat_config_new.yaml
deleted file mode 100644
index d8d366d..0000000
--- a/project/bedstat_config_new.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-pep_version: 2.0.0
-sample_table: path
-looper:
-  output_dir: output
-sample_modifiers:
-  append:
-    pipeline_interfaces: ../pipeline_interface_new.yaml
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index f803877..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-piper
-loopercli
-bbconf>=0.0.1
\ No newline at end of file
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
new file mode 100644
index 0000000..ffc1db4
--- /dev/null
+++ b/requirements/requirements-all.txt
@@ -0,0 +1,3 @@
+piper>=0.12.1
+looper>=1.2.1
+# bbconf>=0.1.0
\ No newline at end of file
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
new file mode 100644
index 0000000..6c7c573
--- /dev/null
+++ b/requirements/requirements-dev.txt
@@ -0,0 +1,4 @@
+-e git+https://github.com/databio/yacman@dev#egg=yacman
+-e git+https://github.com/pepkit/pipestat@dev#egg=pipestat
+
+
diff --git a/scripts/process_LOLA.py b/scripts/process_LOLA.py
index d5fc326..7d780b8 100644
--- a/scripts/process_LOLA.py
+++ b/scripts/process_LOLA.py
@@ -10,22 +10,27 @@
 # for reading TSV files
 import csv
 
+
 class LOLAIndexFileError(Exception):
     pass
 
+
 class LOLAIndexColumnError(Exception):
     pass
 
+
 global idx_file_keywords
 
-idx_file_keywords = ['filename',
-                     'cellType',
-                     'cellTypeSubtype',
-                     'antibody',
-                     'mappingGenome',
-                     'description',
-                     'tissue',
-                     'species']
+idx_file_keywords = [
+    "filename",
+    "cellType",
+    "cellTypeSubtype",
+    "antibody",
+    "mappingGenome",
+    "description",
+    "tissue",
+    "species",
+]
 
 # process index file from LOLA core DB
 # expects a base directory for the collection
@@ -38,71 +43,88 @@ def process_index_file(base_dir, genome):
     idx_file = os.path.join(base_dir, "index.txt")
     with open(idx_file, "r") as tsvfile, open(idx_file, "r") as test:
         try:
-            reader = csv.DictReader(test, dialect='excel-tab')
+            reader = csv.DictReader(test, dialect="excel-tab")
             n = reader.__next__()
             l = list(n)
             if len(n) == 1:
                 # csv case
-                if (l[0].find(',') != -1):
+                if l[0].find(",") != -1:
                     test.seek(0)
                     reader = csv.DictReader(test)
                 else:
-                    raise Exception("Not tsv and not csv format for %s. Aborting." % idx_file)
+                    raise Exception(
+                        "Not tsv and not csv format for %s. Aborting." % idx_file
+                    )
             else:
                 # tsv case
-                reader = csv.DictReader(tsvfile, dialect='excel-tab')
+                reader = csv.DictReader(tsvfile, dialect="excel-tab")
             for row in reader:
                 try:
-                    s = ''
+                    s = ""
                     for kw in idx_file_keywords:
                         if kw not in row:
-                            if kw == 'filename':
+                            if kw == "filename":
                                 raise LOLAIndexColumnError
-                            s = s + ',unspecified'
+                            s = s + ",unspecified"
                         else:
-                            if kw == 'filename':
+                            if kw == "filename":
                                 # see if the file name makes sense - sometimes some filenames have , or . in them
                                 # and are split inappropriately
-                                s = os.path.abspath(os.path.join(bed_path_base, row['filename']))
+                                s = os.path.abspath(
+                                    os.path.join(bed_path_base, row["filename"])
+                                )
                                 f = Path(s)
                                 if not (f.exists() and f.is_file()):
                                     # skip file
                                     raise LOLAIndexFileError
                             else:
-                                if row[kw] == '':
-                                    t = 'unspecified'
+                                if row[kw] == "":
+                                    t = "unspecified"
                                 else:
                                     t = row[kw]
-                                s = s + ',' + t
+                                s = s + "," + t
                 except LOLAIndexFileError as lie:
-                    fn_err = os.path.abspath(os.path.join(bed_path_base, row['filename']))
-                    msg = "Skipping line #{} in index file {}, file {} not found.\n".format(reader.line_num, idx_file, fn_err)
+                    fn_err = os.path.abspath(
+                        os.path.join(bed_path_base, row["filename"])
+                    )
+                    msg = "Skipping line #{} in index file {}, file {} not found.\n".format(
+                        reader.line_num, idx_file, fn_err
+                    )
                     sys.stderr.write(msg)
                 except LOLAIndexColumnError as lce:
-                    msg = "Skipping line {} in index file {}, no filename column.\n".format(row, idx_file)
+                    msg = "Skipping line {} in index file {}, no filename column.\n".format(
+                        row, idx_file
+                    )
                     sys.stderr.write(msg)
                 # add the protocol and genome at the end of each row
-                s = s + ',bedstat' + ',' + genome
+                s = s + ",bedstat" + "," + genome
                 print(s)
         except Exception as e:
             print("Exception reading %s file" % idx_file)
             raise
 
+
 def process_collection_file(base_dir, cfile):
-    #print("Processing collection file %s" % cfile)
-    #print("Base dir: %s" % base_dir)
+    # print("Processing collection file %s" % cfile)
+    # print("Base dir: %s" % base_dir)
     pass
 
-if __name__ == '__main__':
-    parser = ArgumentParser(description="Script to process LOLA database into PEP format.")
 
-    parser.add_argument('--lola_loc', help='full path to base LOLA db (folder whose subfolders are hg38, hg19 etc.')
-    parser.add_argument('--genome', help='genome to process, e.g. hg38', default='hg38')
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Script to process LOLA database into PEP format."
+    )
+
+    parser.add_argument(
+        "--lola_loc",
+        help="full path to base LOLA db (folder whose subfolders are hg38, hg19 etc.",
+    )
+    parser.add_argument("--genome", help="genome to process, e.g. hg38", default="hg38")
 
     args = parser.parse_args()
 
     # get all the arguments and prepare to call R script
-    if (args.lola_loc is None):
+    if args.lola_loc is None:
         parser.print_help()
         sys.exit()
 
@@ -114,24 +136,25 @@ def process_collection_file(base_dir, cfile):
     full_path = os.path.abspath(os.path.join(lola_base, genome))
 
     # what we are interested in for now from index.txt files
-    header = 'sample_name,' + ','.join(idx_file_keywords[1:]) + ',protocol,genome'
+    header = "sample_name," + ",".join(idx_file_keywords[1:]) + ",protocol,genome"
     print(header)
     # search for subfolders and index.txt
-    for filename in Path(full_path).glob('**/index.txt'):
+    for filename in Path(full_path).glob("**/index.txt"):
         # break down the path from each index.txt file
         # to get its base directory relative to lola_loc
         # and see if collection.txt can be obtained as well
         try:
             path_parts = os.path.split(filename)
-            if (len(path_parts) != 2): break
+            if len(path_parts) != 2:
+                break
             # process index file
             process_index_file(path_parts[0], genome)
-            
+
             # process collections file
-            #collection_file = os.path.abspath(os.path.join(path_parts[0], 'collection.txt'))
-            #cpath = Path(collection_file)
-            #if (cpath.exists() and cpath.is_file()):
-                # we also have a collection file      
+            # collection_file = os.path.abspath(os.path.join(path_parts[0], 'collection.txt'))
+            # cpath = Path(collection_file)
+            # if (cpath.exists() and cpath.is_file()):
+            # we also have a collection file
             #    process_collection_file(path_parts[0], cpath)
         except Exception as e:
             print("Error accessing file: {0}".format(e))
diff --git a/tests/conftest.py b/tests/conftest.py
index 729cec9..eb228e6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,4 +24,4 @@ def tsv_data_path(data_path):
 
 @pytest.fixture
 def tsv_base_dir(tsv_data_path):
-    return os.path.join(tsv_data_path, "hg38", "test_data_tsv")
\ No newline at end of file
+    return os.path.join(tsv_data_path, "hg38", "test_data_tsv")
diff --git a/tests/data/bedstat_annotation_sheet.csv b/tests/data/bedstat_annotation_sheet.csv
new file mode 100644
index 0000000..8ea1c8d
--- /dev/null
+++ b/tests/data/bedstat_annotation_sheet.csv
@@ -0,0 +1,2 @@
+sample_name,file_name,genome,exp_protocol,cell_type,tissue,antibody,treatment,data_source,GSE,GSM,description,format
+bedbase_demo1,test_hg38.bed.gz,hg38,ChiPseq,GM12878,NA,IKZF1,None,GEO,GSE105587,,IKZF1 ChIP-seq on human GM12878,bed
\ No newline at end of file
diff --git a/tests/data/bedstat_config.yaml b/tests/data/bedstat_config.yaml
new file mode 100644
index 0000000..78a2a84
--- /dev/null
+++ b/tests/data/bedstat_config.yaml
@@ -0,0 +1,17 @@
+pep_version: 2.0.0
+sample_table: bedstat_annotation_sheet.csv
+
+looper:
+  output_dir: $GITHUB_WORKSPACE/outputs/bedstat_output
+
+sample_modifiers:
+  append:
+    bedbase_config: $GITHUB_WORKSPACE/tests/data/config_min.yaml
+    pipeline_interfaces: $GITHUB_WORKSPACE/pipeline_interface.yaml
+    output_file_path: OUTPUT
+    yaml_file: SAMPLE_YAML
+  derive:
+    attributes: [output_file_path, yaml_file]
+    sources:
+      OUTPUT: "$GITHUB_WORKSPACE/tests/data/{file_name}"
+      SAMPLE_YAML: "$GITHUB_WORKSPACE/outputs/bedstat_output/submission/{sample_name}.yaml"
\ No newline at end of file
diff --git a/tests/data/config_min.yaml b/tests/data/config_min.yaml
new file mode 100644
index 0000000..e55baf4
--- /dev/null
+++ b/tests/data/config_min.yaml
@@ -0,0 +1,14 @@
+database:
+  name: pipestat-test
+  user: postgres
+  password: pipestat-password
+  host: localhost
+  port: 5432
+path:
+  pipeline_output_path: $BEDBASE_DATA_PATH/outputs
+  bedstat_dir: bedstat_output
+  bedbuncher_dir: bedbuncher_output
+  remote_url_base: null
+server:
+  host: 0.0.0.0
+  port: 8000
\ No newline at end of file
diff --git a/tests/data/test_hg38.bed.gz b/tests/data/test_hg38.bed.gz
new file mode 100644
index 0000000..344da49
Binary files /dev/null and b/tests/data/test_hg38.bed.gz differ
diff --git a/tools/regionstat.R b/tools/regionstat.R
index 7197637..d8c1320 100644
--- a/tools/regionstat.R
+++ b/tools/regionstat.R
@@ -2,156 +2,235 @@ library(GenomicDistributions)
 library(GenomicDistributionsData)
 library(optparse)
 library(tools)
-data(TSS_hg38)
+library(R.utils)
+
+trim <- IRanges::trim
 
 option_list = list(
-    make_option(c("--bedfile"), type="character", default=NULL, 
-              help="path to a BED file to process", metavar="character"),
-	make_option(c("--fileId"), type="character", default=NULL,
+  make_option(c("--bedfilePath"), type="character", default=NULL, 
+              help="full path to a BED file to process", metavar="character"),
+  make_option(c("--fileId"), type="character", default=NULL,
               help="BED file ID to use for output files prefix", metavar="character"),
-	make_option(c("--openSignalMatrix"), type="character",
-			  help="path to the open signal matrix required for the tissue specificity plot", metavar="character"),
-    make_option(c("--digest"), type="character", default=NULL,
+  make_option(c("--openSignalMatrix"), type="character",
+              help="path to the open signal matrix required for the tissue specificity plot", metavar="character"),
+  make_option(c("--digest"), type="character", default=NULL,
               help="digest of the BED file", metavar="character"),
-    make_option(c("--outputfolder"), type="character", default="output",
+  make_option(c("--outputFolder"), type="character", default="output",
               help="base output folder for results", metavar="character"),
-    make_option(c("--genome"), type="character", default="hg38",
+  make_option(c("--genome"), type="character", default="hg38",
               help="genome reference to calculate against", metavar="character"))
- 
+
 opt_parser = OptionParser(option_list=option_list);
 opt = parse_args(opt_parser);    
 
-if (is.null(opt$bedfile)) {
-    print_help(opt_parser)
-    stop("Bed file input missing.")
+if (is.null(opt$bedfilePath)) {
+  print_help(opt_parser)
+  stop("Bed file input missing.")
 }
 
 if (is.null(opt$fileId)) {
-    print_help(opt_parser)
-    stop("fileId input missing.")
+  print_help(opt_parser)
+  stop("fileId input missing.")
 }
 
 if (is.null(opt$digest)) {
-    print_help(opt_parser)
-    stop("digest input missing.")
+  print_help(opt_parser)
+  stop("digest input missing.")
 }
 
 
-plotBoth <- function(plotPth, g){
-    print(paste0("Plotting: ", plotPth))
-    ggplot2::ggsave(paste0(plotPth, ".png"), g, device="png", width=8, height=8, units="in")
-    ggplot2::ggsave(paste0(plotPth, ".pdf"), g, device="pdf", width=8, height=8, units="in")
+plotBoth <- function(plotId, g){
+  pth = paste0(opt$outputFolder, "/", fileId, "_", plotId)
+  print(paste0("Plotting: ", pth))
+  ggplot2::ggsave(paste0(pth, ".png"), g, device="png", width=8, height=8, units="in")
+  ggplot2::ggsave(paste0(pth, ".pdf"), g, device="pdf", width=8, height=8, units="in")
 }
 
-doItAall <- function(query, fname, fileId, genome, cellMatrix) {
-    plots = data.frame(stringsAsFactors=F)
-    bsGenomeAvail = ifelse((requireNamespace(BSg, quietly=TRUE) | requireNamespace(BSgm, quietly=TRUE)), TRUE, FALSE)
-    ## continue on with calculations
-	TSSdist = calcFeatureDistRefTSS(query, genome)
-	plotId = "tssdist"
-	plotBoth(paste0(outfolder, "/", fileId, "_", plotId), 
-	         plotFeatureDist(TSSdist, featureName="TSS"))
-	newPlot = data.frame("name"=plotId, "caption"="Region-TSS distance distribution")
-    plots = rbind(plots, newPlot)
-    
-    
-    # Chromosomes region distribution plot
-	x = calcChromBinsRef(query, genome)
-    plotId = "chrombins"
-    plotBoth(paste0(outfolder, "/", fileId, "_", plotId), 
-             plotChromBins(x))
-    newPlot = data.frame("name"=plotId, "caption"="Regions distribution over chromosomes")
-    plots = rbind(plots, newPlot)
-    
-	# OPTIONAL: Plot GC content only if proper BSgenome package is installed. 
-	if (bsGenomeAvail) {
-		gcvec = calcGCContentRef(query, genome)
-		plotId = "gccontent"
-		plotBoth(paste0(outfolder, "/", fileId, "_", plotId),
-					plotGCContent(gcvec))
-		newPlot = data.frame("name"=plotId, "caption"="GC content")
-		plots = rbind(plots, newPlot)
-	}
-    # Partition Plots, default to percentages
-	gp = calcPartitionsRef(query, genome)
-	plotId = "partitions"
-	plotBoth(paste0(outfolder, "/", fileId, "_", plotId), 
-	         plotPartitions(gp))
-	newPlot = data.frame("name"=plotId, "caption"="Regions distribution over genomic partitions")
-	plots = rbind(plots, newPlot)
-
-	ep = calcExpectedPartitionsRef(query, genome)
-	plotId = "expected_partitions"
-	plotBoth(paste0(outfolder, "/", fileId, "_", plotId), 
-	         plotExpectedPartitions(ep))
-	newPlot = data.frame("name"=plotId, "caption"="Expected distribution over genomic partitions")
-	plots = rbind(plots, newPlot)
-
-	cp = calcCumulativePartitionsRef(query, genome)
-	plotId = "cumulative_partitions"
-	plotBoth(paste0(outfolder, "/", fileId, "_", plotId),
-			 plotCumulativePartitions(cp))
-	newPlot = data.frame("name"=plotId, "caption"="Cumulative distribution over genomic partitions")
-	plots = rbind(plots, newPlot)
-
-
-	# flatten the result returned by the function above
-	partiotionNames = as.vector(gp[,"partition"])
-	partitionsList = list()
-	for(i in seq_along(partiotionNames)){
-	    partitionsList[[paste0(partiotionNames[i], "_frequency")]] = 
-	        as.vector(gp[,"Freq"])[i]
-	    partitionsList[[paste0(partiotionNames[i], "_percentage")]] = 
-	        as.vector(gp[,"Freq"])[i]/length(query)	        
-	}
-	
-	# QThist plot
-	widths = calcWidth(query)
-	plotId = "widths_histogram"
-	plotBoth(paste0(outfolder, "/", fileId, "_", plotId),
-		plotQTHist(widths))
-	newPlot = data.frame("name"=plotId, "caption"="Quantile-Trimmed Histogram of Widths")
-	plots = rbind(plots, newPlot)
-
-	# Neighbor regions distance plots
-	dist = calcNeighborDist(query)
-	plotId = "neighbor_distances"
-	plotBoth(paste0(outfolder, "/", fileId, "_", plotId), 
-	         plotNeighborDist(dist))
-	newPlot = data.frame("name"=plotId, "caption"="Distance between neighbor regions")
-	plots = rbind(plots, newPlot)
+getPlotReportDF <- function(plotId, title){
+  pth = paste0(opt$outputFolder, "/", fileId, "_", plotId)
+  rel_pth = getRelativePath(pth, paste0(opt$outputFolder, "/../../../"))
+  print(paste0("Writing plot json: ", rel_pth))
+  newPlot = data.frame(
+    "name"=plotId, 
+    "title"=title, 
+    "thumbnail_path"=paste0(rel_pth, ".png"), 
+    "path"=paste0(rel_pth, ".pdf")
+  )
+  return(newPlot)
+}
 
-	# OPTIONAL: Add tissue specificity plot if open signal matrix is provided
-	if (cellMatrix == "None") {
-		message("open signal matrix not provided. Skipping tissue specificity plot ... ")
-	} else {
-		matrix = data.table::fread(cellMatrix)
-		op = calcOpenSignal(query, matrix)
-		plotId = "open_chromatin"
-		plotBoth(paste0(outfolder, "/", fileId, "_", plotId), 
-		         plotOpenSignal(op))
-		newPlot = data.frame("name"=plotId, "caption"="Cell specific enrichment for open chromatin")
-		plots = rbind(plots, newPlot)
-	}
 
-	# Note: names of the list elements MUST match what's defined in: https://github.com/databio/bbconf/blob/master/bbconf/const.py
-	bedmeta = list(
-	    id=fileId,
-		gc_content=ifelse(bsGenomeAvail, mean(gcvec), NA),
-		regions_no=length(query),
-		mean_absolute_TSS_dist=mean(abs(TSSdist), na.rm=TRUE),
-		mean_region_width=mean(widths),
-		md5sum=opt$digest,
-		plots=plots,
-		bedfile_path=fname
-	)
-	write(jsonlite::toJSON(c(bedmeta, partitionsList), pretty=TRUE), paste0(outfolder, "/", fileId, ".json"))
+doItAall <- function(query, fileId, genome, cellMatrix) {
+  plots = data.frame(stringsAsFactors=F)
+  bsGenomeAvail = ifelse((requireNamespace(BSg, quietly=TRUE) | requireNamespace(BSgm, quietly=TRUE)), TRUE, FALSE)
+  # TSS distance plot
+  tryCatch(
+    expr = {
+      TSSdist = calcFeatureDistRefTSS(query, genome)
+      plotBoth("tssdist", plotFeatureDist(TSSdist, featureName="TSS"))
+      plots = rbind(plots, getPlotReportDF("tssdist", "Region-TSS distance distribution"))
+      message("Successfully calculated and plot TSS distance.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e)
+    }
+  ) 
+  
+  
+  # Chromosomes region distribution plot
+  tryCatch(
+    expr = {
+      plotBoth("chrombins", plotChromBins(calcChromBinsRef(query, genome)))
+      plots = rbind(plots, getPlotReportDF("chrombins", "Regions distribution over chromosomes"))
+      message("Successfully calculated and plot chromosomes region distribution.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e)
+    }
+  ) 
+  
+  
+  # OPTIONAL: Plot GC content only if proper BSgenome package is installed. 
+  if (bsGenomeAvail) {
+    tryCatch(
+      expr = {
+        gcvec = calcGCContentRef(query, genome)
+        plotBoth("gccontent", plotGCContent(gcvec))
+        plots = rbind(plots, getPlotReportDF("gccontent", "GC content"))
+        message("Successfully calculated and plot GC content.")
+      },
+      error = function(e){
+        message('Caught an error!')
+        print(e, gcvec)
+      }
+    ) 
+  }
+  
+  # Partition plots, default to percentages
+  tryCatch(
+    expr = {
+      gp = calcPartitionsRef(query, genome)
+      plotBoth("paritions", plotPartitions(gp))
+      plots = rbind(plots, getPlotReportDF("paritions", "Regions distribution over genomic partitions"))
+      # flatten the result returned by the function above
+      partiotionNames = as.vector(gp[,"partition"])
+      partitionsList = list()
+      for(i in seq_along(partiotionNames)){
+        partitionsList[[paste0(partiotionNames[i], "_frequency")]] = 
+          as.vector(gp[,"Freq"])[i]
+        partitionsList[[paste0(partiotionNames[i], "_percentage")]] = 
+          as.vector(gp[,"Freq"])[i]/length(query)	        
+      }
+      message("Successfully calculated and plot regions distribution over genomic partitions.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e)
+    }
+  ) 
+  
+  # Expected partition plots
+  tryCatch(
+    expr = {
+      plotBoth("expected_partitions", plotExpectedPartitions(calcExpectedPartitionsRef(query, genome)))
+      plots = rbind(plots, getPlotReportDF("expected_partitions", "Expected distribution over genomic partitions"))
+      message("Successfully calculated and plot expected distribution over genomic partitions.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e)
+    }
+  ) 
+ 
+  # Cumulative partition plots
+  tryCatch(
+    expr = {
+      plotBoth("cumulative_partitions", plotCumulativePartitions(calcCumulativePartitionsRef(query, genome)))
+      plots = rbind(plots, getPlotReportDF("cumulative_partitions", "Cumulative distribution over genomic partitions"))
+      message("Successfully calculated and plot cumulative distribution over genomic partitions.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e)
+    }
+  ) 
+  
+  # QThist plot
+  tryCatch(
+    expr = {
+      widths = calcWidth(query)
+      plotBoth("widths_histogram", plotQTHist(widths))
+      plots = rbind(plots, getPlotReportDF("widths_histogram", "Quantile-trimmed histogram of widths"))
+      message("Successfully calculated and plot quantile-trimmed histogram of widths.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e, widths)
+    }
+  ) 
+  
+  # Neighbor regions distance plots
+  tryCatch(
+    expr = {
+      plotBoth("neighbor_distances", plotNeighborDist(calcNeighborDist(query)))
+      plots = rbind(plots, getPlotReportDF("neighbor_distances", "Distance between neighbor regions"))
+      message("Successfully calculated and plot distance between neighbor regions.")
+    },
+    error = function(e){
+      message('Caught an error!')
+      print(e)
+    }
+  ) 
+  
+  # Tissue specificity plot if open signal matrix is provided
+  if (cellMatrix == "None") {
+    message("open signal matrix not provided. Skipping tissue specificity plot ... ")
+  } else {
+    tryCatch(
+      expr = {
+        plotBoth("open_chromatin", plotSummarySignal(calcSummarySignal(query, data.table::fread(cellMatrix))))
+        plots = rbind(plots, getPlotReportDF("open_chromatin", "Cell specific enrichment for open chromatin"))
+        message("Successfully calculated and plot cell specific enrichment for open chromatin.")
+      },
+      error = function(e){
+        message('Caught an error!')
+        print(e)
+      }
+    ) 
+  }
+  
+  # Note: names of the list elements MUST match what's defined in: https://github.com/databio/bbconf/blob/master/bbconf/schemas/bedfiles_schema.yaml
+  bedmeta = list(
+    name=fileId,
+    regions_no=length(query),
+    mean_region_width=ifelse(exists('widths'), signif(mean(widths), digits = 4), NA),
+    md5sum=opt$digest
+  )
+  if (exists('gcvec') && !isEmpty(gcvec)){
+    gc_content <- list(gc_content = signif(mean(gcvec), digits = 4))
+    bedmeta = append(bedmeta, gc_content)
+  }
+  if (exists('TSSdist') && !all(is.na(TSSdist))){
+    tss <- list(median_TSS_dist = signif(median(abs(TSSdist), na.rm=TRUE), digits = 4))
+    bedmeta = append(bedmeta, tss)
+  }
+  if (exists('partitionsList')){
+    write(jsonlite::toJSON(c(bedmeta, partitionsList), pretty=TRUE), paste0(outfolder, "/", fileId, ".json"))
+  } else {
+     write(jsonlite::toJSON(c(bedmeta), pretty=TRUE), paste0(outfolder, "/", fileId, ".json"))
+    }
+  
+  if (exists('plots')){
+    write(jsonlite::toJSON(plots, pretty=TRUE), paste0(outfolder, "/", fileId, "_plots.json"))
+  }
 }
 
 # define values and output folder for doitall()
 fileId = opt$fileId
-fn = opt$bedfile
-outfolder = opt$outputfolder
+bedPath = opt$bedfilePath
+outfolder = opt$outputFolder
 genome = opt$genome
 cellMatrix = opt$openSignalMatrix
 orgName = "Mmusculus"
@@ -163,7 +242,5 @@ BSg = paste0("BSgenome.", orgName , ".UCSC.", genome)
 BSgm = paste0(BSg, ".masked")
 
 # read bed file and run doitall()
-query = LOLA::readBed(fn)
-doItAall(query, fn, fileId, genome, cellMatrix)
-
-
+query = LOLA::readBed(bedPath)
+doItAall(query, fileId, genome, cellMatrix)