galaxyproject · Andrei-EPFL · Jul 22, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/CONTRIBUTORS.yaml b/CONTRIBUTORS.yaml
@@ -187,6 +187,13 @@ andreasrichter:
     github: false
     joined: 2017-09
 
+Andrei-EPFL:
+    name: Andrei Variu
+    joined: 2025-07
+    affiliations:
+      - epfl
+    orcid: 0000-0001-8615-602X
+
 andrewr:
     name: Andrew Rajczewski
     affiliations:

diff --git a/ORGANISATIONS.yaml b/ORGANISATIONS.yaml
@@ -122,6 +122,12 @@ embl-ebi:
     github: false
     ror: "02catss52"
 
+epfl:
+    short_name: EPFL
+    name: École Polytechnique Fédérale de Lausanne
+    url: https://www.epfl.ch
+    avatar: https://www.epfl.ch/about/overview/wp-content/themes/wp-theme-2018/assets/svg/epfl-logo.svg
+
 erasmusmc:
     short_name: ErasmusMC
     name: Erasmus Medical Center

@@ -0,0 +1,63 @@
+
+# This is the bibliography file for your tutorial.
+#
+# To add bibliography (bibtex) entries here, follow these steps:
+#  1) Find the DOI for the article you want to cite
+#  2) Go to https://doi2bib.org and fill in the DOI
+#  3) Copy the resulting bibtex entry into this file
+#
+# To cite the example below, in your tutorial.md file
+# use {% cite Batut2018 %}
+#
+# If you want to cite an online resourse (website etc)
+# you can use the 'online' format (see below)
+#
+# You can remove the examples below
+
+@article{Batut2018,
+  doi = {10.1016/j.cels.2018.05.012},
+  url = {https://doi.org/10.1016/j.cels.2018.05.012},
+  year = {2018},
+  month = jun,
+  publisher = {Elsevier {BV}},
+  volume = {6},
+  number = {6},
+  pages = {752--758.e1},
+  author = {B{\'{e}}r{\'{e}}nice Batut and Saskia Hiltemann and Andrea Bagnacani and Dannon Baker and Vivek Bhardwaj and
+           Clemens Blank and Anthony Bretaudeau and Loraine Brillet-Gu{\'{e}}guen and Martin {\v{C}}ech and John Chilton
+           and Dave Clements and Olivia Doppelt-Azeroual and Anika Erxleben and Mallory Ann Freeberg and Simon Gladman and
+           Youri Hoogstrate and Hans-Rudolf Hotz and Torsten Houwaart and Pratik Jagtap and Delphine Larivi{\`{e}}re and
+           Gildas Le Corguill{\'{e}} and Thomas Manke and Fabien Mareuil and Fidel Ram{\'{i}}rez and Devon Ryan and
+           Florian Christoph Sigloch and Nicola Soranzo and Joachim Wolff and Pavankumar Videm and Markus Wolfien and
+           Aisanjiang Wubuli and Dilmurat Yusuf and James Taylor and Rolf Backofen and Anton Nekrutenko and Bj\"{o}rn Gr\"{u}ning},
+  title = {Community-Driven Data Analysis Training for Biology},
+  journal = {Cell Systems}
+}
+
+@online{gtn-website,
+  author = {GTN community},
+  title = {GTN Training Materials: Collection of tutorials developed and maintained by the worldwide Galaxy community},
+  url = {https://training.galaxyproject.org},
+  urldate = {2021-03-24}
+}
+
+
+
+@ARTICLE{legacy-survey-astronomy,
+       author = {{Dey}, Arjun and {Schlegel}, David J. and {Lang}, Dustin and {Blum}, Robert and {Burleigh}, Kaylan and {Fan}, Xiaohui and {Findlay}, Joseph R. and {Finkbeiner}, Doug and {Herrera}, David and {Juneau}, St{\'e}phanie and {Landriau}, Martin and {Levi}, Michael and {McGreer}, Ian and {Meisner}, Aaron and {Myers}, Adam D. and {Moustakas}, John and {Nugent}, Peter and {Patej}, Anna and {Schlafly}, Edward F. and {Walker}, Alistair R. and {Valdes}, Francisco and {Weaver}, Benjamin A. and {Y{\`e}che}, Christophe and {Zou}, Hu and {Zhou}, Xu and {Abareshi}, Behzad and {Abbott}, T.~M.~C. and {Abolfathi}, Bela and {Aguilera}, C. and {Alam}, Shadab and {Allen}, Lori and {Alvarez}, A. and {Annis}, James and {Ansarinejad}, Behzad and {Aubert}, Marie and {Beechert}, Jacqueline and {Bell}, Eric F. and {BenZvi}, Segev Y. and {Beutler}, Florian and {Bielby}, Richard M. and {Bolton}, Adam S. and {Brice{\~n}o}, C{\'e}sar and {Buckley-Geer}, Elizabeth J. and {Butler}, Karen and {Calamida}, Annalisa and {Carlberg}, Raymond G. and {Carter}, Paul and {Casas}, Ricard and {Castander}, Francisco J. and {Choi}, Yumi and {Comparat}, Johan and {Cukanovaite}, Elena and {Delubac}, Timoth{\'e}e and {DeVries}, Kaitlin and {Dey}, Sharmila and {Dhungana}, Govinda and {Dickinson}, Mark and {Ding}, Zhejie and {Donaldson}, John B. and {Duan}, Yutong and {Duckworth}, Christopher J. and {Eftekharzadeh}, Sarah and {Eisenstein}, Daniel J. and {Etourneau}, Thomas and {Fagrelius}, Parker A. and {Farihi}, Jay and {Fitzpatrick}, Mike and {Font-Ribera}, Andreu and {Fulmer}, Leah and {G{\"a}nsicke}, Boris T. and {Gaztanaga}, Enrique and {George}, Koshy and {Gerdes}, David W. and {Gontcho}, Satya Gontcho A. and {Gorgoni}, Claudio and {Green}, Gregory and {Guy}, Julien and {Harmer}, Diane and {Hernandez}, M. and {Honscheid}, Klaus and {Huang}, Lijuan Wendy and {James}, David J. and {Jannuzi}, Buell T. and {Jiang}, Linhua and {Joyce}, Richard and {Karcher}, Armin and {Karkar}, Sonia and {Kehoe}, Robert and {Kneib}, Jean-Paul and {Kueter-Young}, Andrea and {Lan}, Ting-Wen and {Lauer}, Tod R. and {Le Guillou}, Laurent and {Le Van Suu}, Auguste and {Lee}, Jae Hyeon and {Lesser}, Michael and {Perreault Levasseur}, Laurence and {Li}, Ting S. and {Mann}, Justin L. and {Marshall}, Robert and {Mart{\'\i}nez-V{\'a}zquez}, C.~E. and {Martini}, Paul and {du Mas des Bourboux}, H{\'e}lion and {McManus}, Sean and {Meier}, Tobias Gabriel and {M{\'e}nard}, Brice and {Metcalfe}, Nigel and {Mu{\~n}oz-Guti{\'e}rrez}, Andrea and {Najita}, Joan and {Napier}, Kevin and {Narayan}, Gautham and {Newman}, Jeffrey A. and {Nie}, Jundan and {Nord}, Brian and {Norman}, Dara J. and {Olsen}, Knut A.~G. and {Paat}, Anthony and {Palanque-Delabrouille}, Nathalie and {Peng}, Xiyan and {Poppett}, Claire L. and {Poremba}, Megan R. and {Prakash}, Abhishek and {Rabinowitz}, David and {Raichoor}, Anand and {Rezaie}, Mehdi and {Robertson}, A.~N. and {Roe}, Natalie A. and {Ross}, Ashley J. and {Ross}, Nicholas P. and {Rudnick}, Gregory and {Safonova}, Sasha and {Saha}, Abhijit and {S{\'a}nchez}, F. Javier and {Savary}, Elodie and {Schweiker}, Heidi and {Scott}, Adam and {Seo}, Hee-Jong and {Shan}, Huanyuan and {Silva}, David R. and {Slepian}, Zachary and {Soto}, Christian and {Sprayberry}, David and {Staten}, Ryan and {Stillman}, Coley M. and {Stupak}, Robert J. and {Summers}, David L. and {Sien Tie}, Suk and {Tirado}, H. and {Vargas-Maga{\~n}a}, Mariana and {Vivas}, A. Katherina and {Wechsler}, Risa H. and {Williams}, Doug and {Yang}, Jinyi and {Yang}, Qian and {Yapici}, Tolga and {Zaritsky}, Dennis and {Zenteno}, A. and {Zhang}, Kai and {Zhang}, Tianmeng and {Zhou}, Rongpu and {Zhou}, Zhimin},
+        title = "{Overview of the DESI Legacy Imaging Surveys}",
+      journal = {\aj},
+     keywords = {catalogs, surveys, Astrophysics - Instrumentation and Methods for Astrophysics},
+         year = 2019,
+        month = may,
+       volume = {157},
+       number = {5},
+          eid = {168},
+        pages = {168},
+          doi = {10.3847/1538-3881/ab089d},
+archivePrefix = {arXiv},
+       eprint = {1804.08657},
+ primaryClass = {astro-ph.IM},
+       adsurl = {https://ui.adsabs.harvard.edu/abs/2019AJ....157..168D},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
@@ -0,0 +1,238 @@
+---
+layout: tutorial_hands_on
+
+title: Source extractor on DESI Legacy Surveys sky images
+questions:
+  - How do I detect luminous sources from a dark background?
+  - What are the required inputs and their formats?
+  - How can I easily get sky images?
+  - How can detections be improved?
+  - How can I use the extracted source properties?
+  - How can I get the seed image for the Voronoi segmentation tutorial?
+objectives:
+  - How to perform luminous source extraction in Galaxy.
+  - How to identify objects.
+  - How to analyse sky images in Galaxy. 
+  - How to create a simple segmentation mask. 
+  - How to visualize the detected sources. 
+time_estimation: 1H
+key_points:
+- Source Extractor is a well known astronomy library to detect luminous sources from sky images. 
+- This tutorial shows how to analyse image data for object detection and showcases how an astronomy software tool can be applied to data from several different domains. 
+requirements:
+  -
+    type: "internal"
+    topic_name: imaging
+    tutorials:
+      - imaging-introduction
+
+contributions:
+  authorship:
+    - Andrei-EPFL
+  funding:
+    - oscars
+    - fiesta
+    - eurosciencegateway
+tags:
+- imageanalysis
+- astronomy
+- object detection
+
+---
+
+One key objective in astronomy and large-scale sky surveys is to identify individual celestial sources, such as stars and galaxies, in wide-field sky images to enable further detailed scientific analyses. For instance, the DESI Legacy Surveys have imaged approximately one-third of the sky, detecting billions of luminous sources. As a follow-up, the DESI project measures individual galaxies' spectra from a subsample of about 50 million targets, selected based on their photometric properties.
+
+[SExtractor (Source Extractor)](https://www.astromatic.net/software/sextractor/) is a widely used tool in astronomy for detecting and measuring sources in astronomical images. The Galaxy source-extractor tool is built on top of [SEP](https://sep.readthedocs.io/en/stable/index.html), a Python library derived from the core routines of SExtractor.
+
+For more in-depth documentation, you can refer to:
+-  [SEP documentation](https://sep.readthedocs.io/en/v1.0.x/index.html)
+-  [SEP paper](https://joss.theoj.org/papers/10.21105/joss.00058)
+-  [Source Extractor for Dummies](https://arxiv.org/abs/astro-ph/0512139)
+-  [Source Extractor paper](https://ui.adsabs.harvard.edu/abs/1996A%26AS..117..393B/abstract)
+-  [Source Extractor website](https://www.astromatic.net/software/sextractor/)
+
+> <agenda-title></agenda-title>
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+
+## Input Requirements 
+
+The source-extractor tool accepts a single image file as input, with the option to provide a mask and/or a filter. Typically, for astronomy, a sky image contains luminous sources. In addition, the tool accepts several parameters related to the background estimation and source detectionm, which are set to the suggested default values. A subset of them is described in the subsection below.
+
+**Image:** 
+- Preferrably: light sources on a dark background.
+- Format: a single-channel 2D array stored as ```.tiff``` or ```.fits``` ([FITS](https://fits.gsfc.nasa.gov/) is a widely used format in the astronomy community). 
+
+**Mask (Optional):** 
+- Masks regions affected by bright sources (e.g. stars) to improve background estimation. 
+- Pixels with
+``` python
+value > maskthresh
+```
+or boolean ```True``` are masked.
+- Format: a single-channel 2D array stored as ```.tiff``` or ```.fits```.  
+
+> <comment-title> Checking the metadata of an image </comment-title>
+>
+> Tip 1: Use {% tool [Show image info](toolshed.g2.bx.psu.edu/repos/imgteam/image_info/ip_imageinfo/5.7.1+galaxy1) %} to inspect ```.tiff``` metadata. Required:
+>
+> ``` RGB = false (1) ```
+> ``` Interleaved = false ```
+> ``` SizeZ = 1 ```
+> ``` SizeT = 1 ```
+> ``` SizeC = 1 ```
+>
+> Tip 2: Use {% tool [astropy fitsinfo](toolshed.g2.bx.psu.edu/repos/astroteam/astropy_fitsinfo/astropy_fitsinfo/0.2.0+galaxy2) %} to check ```.fits``` metadata. Required:
+> ```Dimensions (N, M) ```, where ```N``` and ```M``` are pixel dimensions in 2D. 
+{: .comment}
+
+
+**Filter Kernel (Optional):** 
+The filter kernel is used to smooth the input image, which can enhance the detection of faint and extended sources. However, in crowded fields, filtering may reduce performance by blending nearby objects.
+
+- If ```Filter Case``` is set to ```none```, no filtering is applied.
+- If ```Filter Case``` is ```default```, a built-in smoothing kernel is used:
+```markdown
+1 2 1
+2 4 2
+1 2 1
+```
+- If ```Filter Case``` is ```file```, you must provide a custom 2D array stored as plain text file, that contains whitespace-separated values.
+> <comment-title> Checking the metadata of an image </comment-title>
+> You can check on your computer whether the filter file has the correct format by reading it with:
+> ``` import numpy as np ```
+> ``` kernel = np.loadtxt("filter.txt")```
+> since this is the way the tool's back-end implementation loads the file.
+{: .comment}
+
+
+### Parameters for Background Estimation and Thresholding
+
+In this subsection, we describe a subset of tool's parameters that you can change.
+
+Before source detection, the tool estimates the image background. This is done by dividing the image into a grid of boxes, each with a default size of:
+``` python
+bw = 64  # box width in pixels
+bh = 64  # box height in pixels
+```
+Within each box, the pixel histogram is filtered to remove outliers, and the background level is estimated using a mode approximation based on the median and mean of the remaining pixel values. While 64 is the default value in the [SEP](https://sep.readthedocs.io/en/stable/index.html) package, the original [paper](https://ui.adsabs.harvard.edu/abs/1996A%26AS..117..393B/abstract) suggests that on most images, a value between 32 to 128 pixels should work fine.
+
+After background estimation, the tool identifies groups of pixels that exceed a defined brightness threshold. These parameters should help distinguish between real luminous sources and random fluctuations that can appear in the background.
+
+Detection Criteria:
+
+- Minimum Area: The number of connected pixels required to consider something a source.
+
+``` python
+minarea = 5 # default
+```
+
+- Threshold: The value of the pixel (j, i) must exceed:
+
+``` python
+thresh * err[j,i]
+```
+
+where:
+
+``` python
+thresh = 1.5 # default
+```
+
+The interpretation of ```err[j,i]``` depends on the ```err_option``` parameter:
+``` python
+err_option = 'float_globalrms'  # Use global RMS (i.e. root mean square) of the background (default)
+err_option = 'array_rms'        # Use a pixel-wise RMS array of the background
+err_option = 'none'             # Use 'thresh' as an absolute threshold
+```
+It is advisable to adapt the error estimation to the studied image: e.g. if the background is reasonably uniform, using a global value should be sufficient. In contrast, if the background changes drastically in different regions of the image, a pixel-wise RMS would be preferred.
+
+
+## Getting data from DESI Legacy Surveys
+> <hands-on-title> Data Acquisition </hands-on-title>
+>
+> 1. Create a new history for this tutorial. You can rename the default unnamed history.
+> 
+>    {% snippet faqs/galaxy/histories_create_new.md %}
+> 
+> 2. Run the {% tool [DESI Legacy Survey](toolshed.g2.bx.psu.edu/repos/astroteam/desi_legacy_survey_astro_tool/desi_legacy_survey_astro_tool/0.0.2+galaxy0) %} tool. 
+> 
+>    - **Important:** Choose the Data Product **Image**.
+> 
+>    The default values are used for this tutorial.
+>    The history now contains the ```.fits``` image file that is used as input for the source-extractor tool.
+{: .hands_on}
+
+## Running the Source-Extractor Tool
+
+Once you’ve selected the source-extractor tool, choose the input file named: ``` DESI Legacy Survey -> Image fits ```. After the tool has finished running, several output images and data products will be available:
+- The background subtracted image with detected sources highlighted by red ellipses
+- The estimated background
+- The background RMS
+- The segmentation map
+- A catalog table listing the detected sources along with measured parameters such as flux (i.e. sum of member pixels) , position, size, and shape
+
+### Example Outputs:
+![Data and sources image](../../images/astronomy-source-extractor/source-extractor_data_sources_no_mask.png "Data and detected sources image.")
+
+The original image is published by [Legacy Surveys / D. Lang (Perimeter Institute)](https://www.legacysurvey.org/acknowledgment/). The Legacy Surveys are described in {% cite legacy-survey-astronomy %}.
+
+![Background image](../../images/astronomy-source-extractor/source-extractor_background_no_mask.png "Background image.")
+
+> <hands-on-title> Ellipse drawing </hands-on-title>
+>
+> The tool already provides as output an image with ellipses around detected objects. Nevertheless, if you want to create a figure by yourself you can use the table of detected sources returned by the tool ```objects``` in the following way:
+>    ``` python
+>    from matplotlib.patches import Ellipse
+>    import matplotlib.pyplot as plt
+>    
+>    fig, ax = plt.subplots()
+>    for i in range(len(objects)):
+>        e = Ellipse(xy=(objects['x'][i], objects['y'][i]),
+>                    width=6*objects['a'][i],
+>                    height=6*objects['b'][i],
+>                    angle=objects['theta'][i] * 180. / np.pi)
+>        e.set_facecolor('none')
+>        e.set_edgecolor('red')
+>        ax.add_artist(e)
+>    ```
+>    
+{: .hands_on}
+
+## Using a Mask to Improve Source Detection
+
+Bright stars can skew background estimation and obscure nearby faint sources. In the previous output, some central sources were missed due to bright star interference.
+
+A simple mask can help. Here's an example:
+
+![Mask](../../images/astronomy-source-extractor/source-extractor-mask.png "Mask.")
+
+This mask can be easily created with:
+
+``` python
+import numpy as np
+import tifffile
+mask = np.zeros((360,360))
+mask[270:325, :] = 1
+mask[239:, :200] = 1
+tifffile.imwrite("mask.tiff", mask)
+```
+Upload the mask to Galaxy, select it in the source-extractor tool, and re-run.
+
+### Improved Outputs:
+![Data and sources image with mask](../../images/astronomy-source-extractor/source-extractor_data_sources_with_mask.png "Data and detected sources image.")
+
+![Background image with mask](../../images/astronomy-source-extractor/source-extractor_background_with_mask.png "Background image.")
+
+You can observe that the central sources are now detected and also the background dynamic range has decreased, due to the mask.
+
+An important output of this tool is the segmentation map of the detected sources:
+![Segmentation map with mask](../../images/astronomy-source-extractor/segmentation-map-with-mask.png "Segmentation map.")
+
+This map can be used as the seed image required by [Voronoi segmentation tutorial]({% link topics/imaging/tutorials/voronoi-segmentation/tutorial.md %}). In this case, you can observe that the two bright stars still have an important effect on the source detection. Therefore, to improve the results, you can try: better masking, using the array RMS as a relative error in thresholding or different background mesh sizes.