uvarc · rsdmse · Mar 27, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/content/courses/parallel-computing-introduction/codes/mpi_io.f90 b/content/courses/parallel-computing-introduction/codes/mpi_io.f90
@@ -5,20 +5,23 @@ program mpiwrite
    integer            :: N, M
    integer            :: i,j
    character(len=80)  :: arg
-   integer, allocatable, dimension(:,:)  :: loc_u
+   integer, allocatable, dimension(:,:)  :: u, gu
+   integer            :: numargs
 
    integer            :: rank, nprocs, nrows, ncols
-   integer            :: lrow, lcol, nrl, ncl
+   integer            :: lrow, lcol
+   integer            :: nrl, ncl, nr_total, nc_total, nghosts
    integer, parameter :: root=0
    type(MPI_Status)   :: mpi_stat
-   type(MPI_Datatype) :: locarr
+   type(MPI_Datatype) :: locarr, fullarr
    type(MPI_File)     :: fh
    integer            :: amode
    integer            :: mpi_err, gmpi_err
    integer(kind=MPI_OFFSET_KIND) :: disp=0
    character(len=24)  :: fname
    integer            :: ndims=2
-   integer, dimension(2) :: ldims, gdims, start_arr
+   integer, dimension(2) :: starts,sizes,subsizes
+   integer, dimension(2) :: gstarts,gsizes,gsubsizes
    character(len=36)  :: myfile
 
 
@@ -61,66 +64,98 @@ program mpiwrite
    !Grid coordinates
    lrow=rank/ncols
    lcol=mod(rank,ncols)
-
 
    !Hardcode each local array to be relatively small so we can see 
    !what we're doing
    nrl=4
    ncl=4
 
-   !Global array size
    N=nrl*nrows
    M=ncl*ncols
 
+   nghosts=2
+   nr_total=nrl+2*nghosts
+   nc_total=ncl+2*nghosts
+
    ! Set up values
-   allocate(loc_u(nrl,ncl))
-   do i=1,nrl
-      do j=1,ncl
-         loc_u(i,j)=(rank+1)*(i+j)
+   allocate(u(0:nr_total-1,0:nc_total-1))
+
+   u=-9
+   do i=nghosts,nrl+nghosts-1
+      do j=nghosts,ncl+nghosts-1
+         u(i,j)=rank
       enddo
    enddo
 
-   gdims=[N,M]
-   ldims=[nrl,ncl]
-   start_arr=[ncl*lrow,nrl*lcol]
-   print *, rank, lrow, lcol, start_arr
+   !array sizes
+   gsizes=[N,M]
+   sizes=[nr_total,nc_total]
 
    write(myfile,'(a,i2.2)') trim(fname),rank
    open(10,file=myfile)
    write(10,*) rank
    do i=1,nrl
-       write(10,*) loc_u(i,:)
+       write(10,*) u(i,:)
     enddo
 
-   !Define a subarray for each local array within the global array
-   call MPI_TYPE_CREATE_SUBARRAY(ndims, gdims, ldims, start_arr,               &
+   starts=[nghosts,nghosts]
+   subsizes=[nrl,ncl]
+   !Define a subarray for each local array
+   !Size includes ghost zones, starts picks out locations
+   call MPI_TYPE_CREATE_SUBARRAY(ndims, sizes, subsizes, starts,               &
                                  MPI_ORDER_FORTRAN, MPI_INTEGER, locarr)
    call MPI_TYPE_COMMIT(locarr)
 
+   !Create the subarray for the global file view
+   !Excludes ghost zones
+   !Remember that subarry starts assume 0 lower bound like C
+   gsizes=[N,M]
+   gstarts=[lrow*nrl,lcol*ncl]
+   gsubsizes=[nrl,ncl]
+   call MPI_TYPE_CREATE_SUBARRAY(ndims, gsizes, gsubsizes, gstarts,            &
+                                 MPI_ORDER_FORTRAN, MPI_INTEGER, fullarr)
+   call MPI_TYPE_COMMIT(fullarr)
+
    amode=ior(MPI_MODE_CREATE, MPI_MODE_WRONLY)
    call MPI_FILE_OPEN(MPI_COMM_WORLD,trim(fname),amode,MPI_INFO_NULL,fh,mpi_err)
 
-   call MPI_Allreduce(mpi_err, gmpi_err,1,MPI_INTEGER, MPI_LOR, MPI_COMM_WORLD)
+   call MPI_Allreduce(mpi_err,gmpi_err,1,MPI_INTEGER, MPI_BOR, MPI_COMM_WORLD)
 
    if ( gmpi_err /= MPI_SUCCESS ) then
       stop "Unable to open MPI file, terminating"
    endif
 
    !Need a header for the sizes, only root should write this
-   if ( rank==0 ) then
-       call MPI_File_write(fh, [N, M], 2, MPI_INTEGER, MPI_STATUS_IGNORE)
-   endif
-
-   !Everybody write its section
-   disp=2*sizeof(N)
+   !if ( rank==0 ) then
+   !    call MPI_File_write(fh, [N, M], 2, MPI_INTEGER, MPI_STATUS_IGNORE)
+   !endif
 
-   call MPI_FILE_SET_VIEW(fh,disp,MPI_INTEGER,locarr,"native", MPI_INFO_NULL)
-   call MPI_FILE_WRITE_ALL(fh, loc_u, size(loc_u), MPI_INTEGER, mpi_stat)
+   call MPI_FILE_SET_VIEW(fh,disp,MPI_INTEGER,fullarr,"native", MPI_INFO_NULL)
+   call MPI_FILE_WRITE_ALL(fh, u, 1, locarr, mpi_stat)
 
    call MPI_FILE_CLOSE(fh)
 
+   !Read it back in
+   if (rank==root) then
+      allocate(gu(0:N-1,0:M-1))
+      print *, 'allocated gu', size(gu)
+      amode=MPI_MODE_RDONLY
+      print *, 'Opening file'
+      call MPI_FILE_OPEN(MPI_COMM_WORLD,trim(fname),amode,MPI_INFO_NULL,fh,mpi_err)
+      print *, 'Opened file ',trim(fname)
+      if ( mpi_err /= MPI_SUCCESS) then
+          stop "Unable to open MPI file for reading"
+      endif
+      print *, "Starting to read"
+
+      call MPI_FILE_READ(fh, gu, size(gu), MPI_INTEGER, mpi_stat)
+
+   endif
+
 
    call MPI_Type_free(locarr)
+   call MPI_Type_free(fullarr)
+
    call MPI_Finalize()
 
 end program
diff --git a/content/courses/parallel-computing-introduction/codes/read_mpi.py b/content/courses/parallel-computing-introduction/codes/read_mpi.py
@@ -7,7 +7,6 @@
 comm=MPI.COMM_WORLD
 fh=MPI.File.Open(comm,filename,MPI.MODE_RDONLY)
 
-
 dims=np.empty((2,),dtype='int')
 fh.Read(dims)
 N=dims[0]; M=dims[1]

diff --git a/content/courses/parallel-computing-introduction/codes/readio.py b/content/courses/parallel-computing-introduction/codes/readio.py
@@ -3,7 +3,7 @@
 
 filename=sys.argv[1]
 
-fh=open(filename)
+fh=open(filename,'rb')
 x=np.fromfile(fh,dtype='int')
 
 print(type(x),x.shape,x.size)

diff --git a/content/notes/bioinfo-intro/_index.md b/content/notes/bioinfo-intro/_index.md
@@ -1,9 +1,6 @@
 ---
 title: Introduction to Bioinformatics Tools for HPC
 date: 2025-08-23T03:19:53Z
-authors: [mab]
-categories: ["Bioinformatics"]
-tags: ["Bioinformatics"]
 type: docs 
 weight: 30
 

diff --git a/content/notes/bioinfo-reproducibility/_index.md b/content/notes/bioinfo-reproducibility/_index.md
@@ -0,0 +1,18 @@
+---
+title: Reproducibility in Bioinformatics
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 100
+menu: 
+    bioinfo-reproducibility:
+---
+
+## Tutorial Outline
+
+ * Difficulties in achieving reproducibility
+
+ * Potential problems with bioinformatics pipelines
+
+ * Some helpful tools
+
+ * Snakemake and Nextflow Examples
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_10.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_10.md
@@ -0,0 +1,26 @@
+---
+title: Version Control
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 550
+menu: 
+    bioinfo-reproducibility:
+---
+
+
+{{< figure src=/notes/bioinfo-reproducibility/img/Triant-Bobar_Reproducibility_11.png >}}
+
+GitHub: https://github.com
+
+Track and manage changes to your code & files
+
+Store and label changes at every step
+
+Small or large projects
+
+Collaborate on projects and minimize conflicting edits
+
+Works on multiple platforms (MacOS, Windows, Linux)
+
+Website for github, cutadapt repository
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_11.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_11.md
@@ -0,0 +1,18 @@
+---
+title: Environment Management
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 600
+menu: 
+    bioinfo-reproducibility:
+---
+
+##  Conda/Mamba environments
+
+  * Isolated spaces for each project with specific tool versions
+  * Manage Python versions and dependencies
+  * Install packages and software directly into environment
+  * Stable and reproducible place to run code and applications
+  * Not limited to Python, can run bash, Rscript
+  * YAML configuration file to create or export and transfer an environment
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_12.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_12.md
@@ -0,0 +1,18 @@
+---
+title: Storing Results
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 650
+menu: 
+    bioinfo-reproducibility:
+---
+
+* Public repositories for sequence data - required for most journals
+  * NCBI: https://www.ncbi.nlm.nih.gov
+  * Ensembl: https://www.ensembl.org/index.html
+  * Always document and archive changes, especially if unpublished:
+  * - genome assembly versions
+  * - sequence data: SNPs, isoforms
+
+Websites: NCBI, Ensembl, Santa Cruz
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_13.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_13.md
@@ -0,0 +1,15 @@
+---
+title: Containers
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 700
+menu: 
+    bioinfo-reproducibility:
+---
+
+Containers are portable environments that run across different computing environments
+
+They contain packages, software and dependencies that remain isolated from host infrastructure
+
+Standalone unit of software and can produce same results on different machine or server
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_14.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_14.md
@@ -0,0 +1,20 @@
+---
+title: Bioinformatic Pipelines
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 750
+menu: 
+    bioinfo-reproducibility:
+---
+
+## Typical bioinformatics workflows involve many steps:
+
+* FASTQ → QC → Alignment → Sorting → Variant Calling → Annotation
+  * - FASTQ files need quality check and trimming
+  * Cutadapt
+  * BWA
+  * Samtools
+  * Freebayes
+  * VCFtools
+* Create pipeline to string software together for “final” output
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_15.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_15.md
@@ -0,0 +1,19 @@
+---
+title: Bioinformatic Pipeline Challenges
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 800
+menu: 
+    bioinfo-reproducibility:
+---
+
+Complex dependencies between steps
+
+Formatting inconsistencies
+
+Hard to reproduce results - scalability, parameters, version changes
+
+Difficult to parallelize efficiently
+
+Manual scripts often fail on HPC
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_16.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_16.md
@@ -0,0 +1,19 @@
+---
+title: Bioinformatic Pipelines on HPC
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 850
+menu: 
+    bioinfo-reproducibility:
+---
+
+Which modules were loaded?
+
+Where are scripts being run?
+
+Tracking paths - hard-coded in scripts?
+
+Out/error files - software vs slurm conflicts
+
+<span style="color:#002060"> __Goal:__ </span>  <span style="color:#002060"> </span> Automate and track these workflows
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_17.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_17.md
@@ -0,0 +1,30 @@
+---
+title: Snakemake
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 900
+menu: 
+    bioinfo-reproducibility:
+---
+
+{{< figure src=/notes/bioinfo-reproducibility/img/Triant-Bobar_Reproducibility_19.png >}}
+
+https://snakemake.github.io/
+
+__Snakemake__  is a workflow management system designed for scientific pipelines
+
+Created by Johannes Köster, first released in 2012
+
+Based on UNIX make -  originally created in 1976 but still standard use
+
+Python based - “ _snake-make_ ”
+
+Free and open source, available on Mac, Windows, Unix
+
+https://snakemake.readthedocs.io/en/stable/
+
+https://github.com/snakemake
+
+
+`Make` is a command-line interface software tool that performs actions ordered by configured dependencies as defined in a configuration file called a makefile. It is commonly used for build automation to build executable code from source code. 
+
diff --git a/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_18.md b/content/notes/bioinfo-reproducibility/bioinfo-reproducibility_18.md
@@ -0,0 +1,19 @@
+---
+title: Snakemake Format
+date: 2026-03-25T19:08:46Z
+type: docs 
+weight: 950
+menu: 
+    bioinfo-reproducibility:
+---
+
+Similar to writing shell scripts but snake files contains sets of rules
+
+Format is based on Python structure
+
+Snakemake reads from snakefile that defines the rules
+
+Snakefile rules have a target output
+
+Snakemake uses pattern matching to follow the inputs, outputs and commands contained in rules to reach final target output
+