From 2d6164b90a0982b4caee50be573e89d186856bfe Mon Sep 17 00:00:00 2001
From: arha <arha@tintin.ib>
Date: Wed, 18 Sep 2024 15:57:27 +0200
Subject: [PATCH 1/8] __init__.py of [attract] can 1/ get input pdbs 2/ convert
 them to attract CG 3/ add required by attract files to working directory

---
 src/haddock/modules/_template_cat/README.md   |  16 +-
 .../_template_cat/_template_mod/__init__.py   |   2 +-
 .../modules/sampling/attract/__init__.py      | 230 ++++++++++++++++++
 .../modules/sampling/attract/defaults.yaml    | 158 ++++++++++++
 .../modules/sampling/lightdock/__init__.py    |   2 +-
 5 files changed, 398 insertions(+), 10 deletions(-)
 create mode 100644 src/haddock/modules/sampling/attract/__init__.py
 create mode 100644 src/haddock/modules/sampling/attract/defaults.yaml

diff --git a/src/haddock/modules/_template_cat/README.md b/src/haddock/modules/_template_cat/README.md
index ed5ec452d..0356b3bb5 100644
--- a/src/haddock/modules/_template_cat/README.md
+++ b/src/haddock/modules/_template_cat/README.md
@@ -4,25 +4,25 @@ To develop your own HADDOCK3 module follow the patterns used in other modules.
 
 1. If your module belongs to a new category, create a folder for that category
    under the `modules` folder.
-1. Else, create a folder for your module inside its relevant category.
+2. Else, create a folder for your module inside its relevant category.
    The name of that folder is the name of the module, i.e., the name used to call
    the module in the haddock3 configuration files and used throughout the code base.
-1. Copy the `__init__.py` file here to the new module's folder and edit it accordingly
+3. Copy the `__init__.py` file here to the new module's folder and edit it accordingly
    to the instructions there.
-1. Do the same for the `defaults.yaml` file.
-1. You can then add any extra files needed inside your module's folder in order to
+4. Do the same for the `defaults.yaml` file.
+5. You can then add any extra files needed inside your module's folder in order to
    develop your module fully.
-1. If your module requires any extra libraries, describe how to install those libraries
+6. If your module requires any extra libraries, describe how to install those libraries
    in the `docs/INSTALL.md` file. Unless approved by the Haddock Team, do not add
    those dependencies to the `requirements.*` files.
-1. HADDOCK3 has already many features related to IO, subprocess run, sending jobs,
+7. HADDOCK3 has already many features related to IO, subprocess run, sending jobs,
    etc. Please, look around the `libs` folder for pertinent functions, but, above all,
    feel welcomed to reach out to us with any doubts.
-1. Please write also tests for your module. Our testing machinery already
+8. Please write also tests for your module. Our testing machinery already
    tests for the common patterns, for example, inspecting the `defaults.yaml` file.
    But you should write any additional tests to ensure that your module works properly.
    See other examples in the `tests/` folder.
-1. Finally, add an example of how to use your module in the `examples/` folder.
+9. Finally, add an example of how to use your module in the `examples/` folder.
    The example should have a short sampling scheme. Name the config file ending with
    `-test.cfg`.
 
diff --git a/src/haddock/modules/_template_cat/_template_mod/__init__.py b/src/haddock/modules/_template_cat/_template_mod/__init__.py
index 8fe31b4f7..fd2c18aff 100644
--- a/src/haddock/modules/_template_cat/_template_mod/__init__.py
+++ b/src/haddock/modules/_template_cat/_template_mod/__init__.py
@@ -60,7 +60,7 @@ def __init__(
             ) -> None:
 
         # if your module uses CNS you might need to define where the main CNS
-        # script is localted. See examples in `topoaa`, `emref`.
+        # script is located. See examples in `topoaa`, `emref`.
         #  else leave it out.
         # cns_script = Path(RECIPE_PATH, "cns", "main.cns")
 
diff --git a/src/haddock/modules/sampling/attract/__init__.py b/src/haddock/modules/sampling/attract/__init__.py
new file mode 100644
index 000000000..877a32094
--- /dev/null
+++ b/src/haddock/modules/sampling/attract/__init__.py
@@ -0,0 +1,230 @@
+"""attract docking module
+==================================
+
+This module performs a fragment-based single-stranded (ss) RNA-protein docking 
+using ATTRACT docking engine. This docking approach was developed to tackle the 
+flexibility of ssRNA. Its core idea is to split ssRNA chain into overlapping 
+trinucleotides (fragments), and dock them onto the rigid receptor separately, 
+assembling the fragments back into the whole chain models afterwards.
+
+#todo 
+add short description of the protocol, including CG, NAlib, sampling, 
+scoring (two ways) and assembly + possible restraints. 
+."""
+import os
+import subprocess
+import shutil
+import shlex
+from pathlib import Path
+
+from haddock import log
+from haddock.core.defaults import MODULE_DEFAULT_YAML
+from haddock.core.typing import FilePath, Any 
+#from haddock.libs import libpdb
+#from haddock.libs.libio import working_directory
+from haddock.libs.libontology import Format, PDBFile
+#from haddock.libs.libutil import check_subprocess
+from haddock.modules import BaseHaddockModule
+
+RECIPE_PATH = Path(__file__).resolve().parent
+DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)
+
+class HaddockModule(BaseHaddockModule):
+    """ATTRACT module."""
+
+    name = RECIPE_PATH.name
+
+    def __init__(self,
+                 order: int,
+                 path: Path,
+                 initial_params: FilePath = DEFAULT_CONFIG) -> None:
+        super().__init__(order, path, initial_params)
+
+    @classmethod
+    def confirm_installation(cls) -> None:
+        """Confirm that ATTRACT is installed."""
+        attract_dir = os.environ['ATTRACTDIR']
+        attract_exec = Path(attract_dir, 'attract')
+        cmd = f'{attract_exec}'
+        p = subprocess.run(shlex.split(cmd), capture_output=True)
+        err = p.stderr.decode('utf-8')
+        if "Too few arguments" in err and "usage:" in err:
+            # ATTRACT is installed correctly 
+            pass
+        else:
+            raise Exception('ATTRACT is not installed properly')
+
+    def determine_molecule_type(self, pdb_file, lines_to_check=10):
+        """Check sevelal last lines of pdb file to label it 
+        as rna, dna or protein."""
+        dna_residues = {'DA', 'DG', 'DC', 'DT'}  
+        rna_residues = {'A', 'U', 'G', 'C', 'RA', 'RU', 'RG', 'RC'}  
+        with open(pdb_file, 'r') as f:
+            lines = f.readlines()[-lines_to_check:]
+        for line in reversed(lines):
+            if line.startswith("ATOM"):
+                residue_name = line[17:20].strip()
+                if residue_name in dna_residues:
+                    return 'dna'
+                elif residue_name in rna_residues:
+                    return 'rna'
+        return 'protein'
+
+    def rename_and_coarse_grain(self, file_name, new_name, reduce_path, is_rna=False):
+        """Rename models to 'protein-aa.pdb' and 'rna-aa.pdb' and convert 
+        them to ATTRACT coarse-grained representation."""
+        os.rename(file_name, new_name)
+        cmd = ['python', reduce_path, '--rna', new_name] if is_rna else ['python', reduce_path, new_name]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        return result
+
+    def get_frag_lists(self, file):
+        """Get list of fragments and list of motifs from rna-aar.pdb."""
+        tmp = []
+        seq = ''
+        # extract sequence from structure
+        for line in file:
+            if line.startswith("ATOM"):
+                residue_name = line[17:20].strip()[-1]
+                residue_id = int(line[22:26].strip())
+                if [residue_name, residue_id] not in tmp:
+                    tmp.append([residue_name, residue_id])
+                    seq = seq + residue_name
+        # get info for motif.list and boundfrag.list
+        prev_frag = seq[:3]
+        count = 1 
+        boundfrag_list = [[count, prev_frag]]
+        motif_list = [prev_frag]
+        for x in seq[3:]:
+            count += 1 
+            next_frag = prev_frag[1:] + x
+            prev_frag = next_frag
+            boundfrag_list.append([count, next_frag])
+            if prev_frag not in motif_list: motif_list.append(next_frag)
+        return motif_list, boundfrag_list
+
+    def get_frag_pdbs(self, file):
+        """Extract fragments from full rna"""
+        first_res_id = int(file[0][22:26].strip())
+        last_res_id = int(file[-1][22:26].strip())
+        frag_total = last_res_id - first_res_id - 1 
+        fragmens=[]
+        for curr_residue_id in range(first_res_id, first_res_id+frag_total):
+            curr_frag = []
+            for line in file:
+                if line.startswith("ATOM"):
+                    residue_id = int(line[22:26].strip())
+                    if residue_id in [curr_residue_id, curr_residue_id+1, curr_residue_id+2]:
+                        curr_frag.append(line)
+                    elif residue_id > curr_residue_id+2:
+                        fragmens.append([curr_frag])
+                        break
+        fragmens.append([curr_frag])
+        return fragmens
+
+    def _run(self) -> None:
+        """Execute module.
+            
+            # todo 
+            1. check library + 
+            2. process input +
+            3. make folder +
+            4. run docking 
+            5. run lrmsd (optional)
+            6. HIPPO (optional)
+            7. Assembly ?
+            8. Scoring ?
+            9. SeleTopChains ? """
+
+        # Check if $LIBRARY exists 
+        try:
+            nalib = os.environ['LIBRARY']
+        except:
+            _msg = "Environment variable $LIBRARY not found."
+            self.finish_with_error(_msg)
+        log.info("NAlib found")
+
+        # Get the models generated in previous step
+        models: list[PDBFile] = [
+            p for p in self.previous_io.output
+            if p.file_type == Format.PDB ]
+
+        # Check that exactly two models are provided
+        # practically attract needs protein structure and RNA *sequence* 
+        # but at this stage it's more practical to ask for RNA structure     
+        if len(models) !=2 : 
+            _msg = "ATTRACT requires exactly two molecules"
+            self.finish_with_error(_msg)
+        
+        # Copy each model to the working directory
+        for model in models:
+            src_model = Path(model.path, model.file_name)
+            dest_model = Path(os.getcwd(), model.file_name)
+            shutil.copyfile(src_model, dest_model)
+       
+        # Ensure we have exactly protein and RNA molecules
+        model_1 = models[0]
+        model_2 = models[1]
+        label_1 = self.determine_molecule_type(model_1.file_name) 
+        label_2 = self.determine_molecule_type(model_2.file_name) 
+        labels = {label_1, label_2}
+        
+        if labels != {'protein', 'rna'}:
+            _msg = "ATTRACT requires protein and RNA molecules as input"
+            self.finish_with_error(_msg)
+
+        # Convert each molecule to corse-grained representation
+        # using $ATTRACTTOOLD/reduce.py 
+        log.info("Converting to coarse-grain representation")
+        try:
+            attracttools = os.environ['ATTRACTTOOLS']
+            attrac_reduce_path = Path(attracttools, 'reduce.py')
+
+            if label_1 == 'protein':
+                self.rename_and_coarse_grain(model_1.file_name, 'protien-aa.pdb', attrac_reduce_path, is_rna=False)
+                self.rename_and_coarse_grain(model_2.file_name, 'rna-aa.pdb', attrac_reduce_path, is_rna=True)
+            else:
+                self.rename_and_coarse_grain(model_1.file_name, 'rna-aa.pdb', attrac_reduce_path, is_rna=True)
+                self.rename_and_coarse_grain(model_2.file_name, 'protien-aa.pdb', attrac_reduce_path, is_rna=False)
+        except:
+            _msg = "Convertation to coarse-grain representation failes"
+            self.finish_with_error(_msg)
+        
+        # Add required by ATTRACT files:
+        log.info("Preparing docking directory")
+        # 1.link fragment library          
+        cmd = f"ln -s {nalib} nalib"
+        p = subprocess.run(shlex.split(cmd), capture_output=True)
+        err = p.stderr.decode('utf-8')
+        # 2. get motif.list and boundfrag.list
+        f = open('rna-aar.pdb','r')
+        file = f.readlines()
+        f.close()
+        motif, boundfrag = self.get_frag_lists(file)
+        with open('boundfrag.list', 'w') as f:
+            for item in boundfrag:
+                f.write(f"{item[0]} {item[1]}\n")
+        with open('motif.list', 'w') as f:
+            for item in motif:
+                f.write(f"{item}\n")
+        # 3. create fragXr.pdb (optional)
+        fragments = self.get_frag_pdbs(file)
+        for i in range(1, len(fragments)+1):
+            with open(f"frag{i}r.list", 'w') as f:
+                for fragment in fragments[i-1]:
+                    for line in fragment:
+                        f.write(line)
+        
+        # Run docking 
+        #log.info("Running ATTRACT")   
+        
+        
+        # ???
+        list_of_created_models = []     
+        created_models = ['protien-aa.pdb','rna-aa.pdb']
+        for model in created_models:
+            pdb_object = PDBFile(Path(model).name,  path=".")
+            list_of_created_models.append(pdb_object)
+
+            self.output_models = list_of_created_models
+            self.export_io_models()
\ No newline at end of file
diff --git a/src/haddock/modules/sampling/attract/defaults.yaml b/src/haddock/modules/sampling/attract/defaults.yaml
new file mode 100644
index 000000000..74de323ad
--- /dev/null
+++ b/src/haddock/modules/sampling/attract/defaults.yaml
@@ -0,0 +1,158 @@
+
+
+
+
+# default ones 
+
+# cns_exec:
+#   default: ""
+#   type: file
+#   title: "Path to the CNS executable"
+#   short: If not provided, HADDOCK3 will use the cns path configured
+#     during the installation.
+#   long: CNS is a required component to run HADDOCK. Ideally it should have been
+#     configured during installation. If not you can specify with the cns_exec parameter
+#     its path.
+#   group: "execution"
+#   explevel: easy
+ncores:
+  default: 4
+  type: integer
+  min: 1
+  max: 500
+  title: Number of CPU cores
+  short: Number of CPU cores to use for the CNS calculations.
+    It is truncated to max available CPUs minus 1.
+  long: Number of CPU cores to use for the CNS calculations.
+    This will define the number of concurrent jobs being executed.
+    Note that is truncated to the total number of available CPUs minus 1.
+  group: "execution"
+  explevel: easy
+max_cpus:
+  default: true
+  type: boolean
+  title: The max number of CPUs allowed.
+  short: By default the max number of CPUs allowed is the max available on the system.
+  long:
+    If you want to spare a minimum amount of resources for daily tasks, set max_cpus
+    to false; in that case the maximum number of CPUs allowed will be the total available
+    in the machine minus 1. This calculation is done automatically.
+  group: "execution"
+  explevel: expert
+mode:
+  default: local
+  type: string
+  minchars: 0
+  maxchars: 20
+  choices:
+    - local
+    - batch
+  title: Mode of execution
+  short: Mode of execution of the jobs, either local or using a batch system.
+  long: Mode of execution of the jobs, either local or using a batch system.
+    Currently slurm and torque are supported. For the batch mode the queue command must be
+    specified in the queue parameter.
+  group: "execution"
+  explevel: easy
+batch_type:
+  default: "slurm"
+  type: string
+  minchars: 0
+  maxchars: 100
+  choices:
+    - slurm
+    - torque
+  title: Batch system
+  short: Type of batch system running on your server
+  long: Type of batch system running on your server. Only slurm and torque are supported at this time
+  group: "execution"
+  explevel: "easy"
+queue:
+  default: ""
+  type: string
+  minchars: 0
+  maxchars: 100
+  title: Queue name
+  short: Name of the batch queue to which jobs will be submitted
+  long: Name of the batch queue to which jobs will be submitted. If not defined the batch system default will be used.
+  group: "execution"
+  explevel: easy
+queue_limit:
+  default: 100
+  type: integer
+  min: 1
+  max: 9999
+  title: Number of jobs to submit to the batch system
+  short: Number of jobs to submit to the batch system
+  long: This parameter controls the number of jobs that will be submitted to the batch system. In combination with the concat parameter this allow to limit the load on the queueing system and also make sure jobs remain in the queue for some time (if concat > 1) to avoid high system loads on the batch system.
+  group: "execution"
+  explevel: easy
+concat:
+  default: 1
+  type: integer
+  min: 1
+  max: 9999
+  precision: 0
+  title: Number of models to produce per job.
+  short: Multiple models can be calculated within one job
+  long:
+    This defines the number of models that will be generated within on job script.
+    This allows to concatenate the generation of models into one script.
+    In that way jobs might run longer in the batch system and reduce the load on the scheduler.
+  group: "execution"
+  explevel: easy
+self_contained:
+  default: false
+  type: boolean
+  title: Create a self-contained run
+  short: This option will copy the CNS scripts and executable to the run folder.
+  long:
+    This option will copy the CNS scripts and executable to the run folder to ensure that all scripts are available within the run dir.
+    This can be useful for for example remote execution of a job or for debugging purpose to allow to edit the scripts without touching the
+    main installation.
+  group: "execution"
+  explevel: guru
+clean:
+  default: true
+  type: boolean
+  title: Clean the module output files.
+  short: Clean the module if run succeeds by compressing or removing output files.
+  long:
+    When running haddock through the command-line, the 'clean' parameter will
+    instruct the workflow to clean the output files of the module if the whole
+    run succeeds. In this process, PDB and PSF files are compressed to gzip,
+    with the extension `.gz`. While files with extension `.seed`, `.inp`, and
+    `.out` files are archived, and the original files deleted. The time to
+    perform a cleaning operation depends on the number of files in the folders
+    and the size of the files. However, it should not represent a limit step in
+    the workflow. For example, a rigidbody sampling 10,000 structures takes
+    about 4 minutes in our servers. This operation uses as many cores as allowed
+    by the user in the 'ncores' parameter. SSD disks will perform faster by
+    definition. See also the 'haddock3-clean' and 'haddock3-unpack' command-line
+    clients.
+  group: "clean"
+  explevel: easy
+offline:
+  default: false
+  type: boolean
+  title: Isolate haddock3 from internet.
+  short: Completely isolate the haddock3 run & results from internet.
+  long: For interactive plots, we are using the plotly library. It can be
+    embedded as a link to the plotly.js library and fetched from the web,
+    or directly copied on the html files AT THE COST OF ~3Mb per file.
+    Setting this parameter to `true` will add the javascript library in
+    generated files, therefore completely isolating haddock3 from any web call.
+  group: "execution"
+  explevel: easy
+less_io:
+  default: false
+  type: boolean
+  title: Reduce the amount of I/O operations.
+  short: Reduce the amount of I/O operations.
+  long: This option will reduce the amount of I/O operations by writing
+    less files to disk. This can be useful for example when running on
+    a network file system where I/O operations are slow.
+  group: "execution"
+  explevel: easy
+  incompatible:
+    mode: batch
diff --git a/src/haddock/modules/sampling/lightdock/__init__.py b/src/haddock/modules/sampling/lightdock/__init__.py
index d8f58961c..dc8a38256 100644
--- a/src/haddock/modules/sampling/lightdock/__init__.py
+++ b/src/haddock/modules/sampling/lightdock/__init__.py
@@ -48,7 +48,7 @@ def _run(self) -> None:
 
         # Check if multiple models are provided
         if len(models) > 1:
-            _msg = "Only one model allowed in LightDock sampling module"
+            _msg = "Only one model is allowed in LightDock sampling module"
             self.finish_with_error(_msg)
 
         model = models[0]

From 910d9f3d350d094ad6d47c656f3b82d54e5a0d30 Mon Sep 17 00:00:00 2001
From: arha <arha@tintin.ib>
Date: Mon, 23 Sep 2024 12:21:46 +0200
Subject: [PATCH 2/8] functions moved from __inint__ to attractmodule.py

---
 .../modules/sampling/attract/__init__.py      | 202 ++++++------------
 .../modules/sampling/attract/attractmodule.py | 131 ++++++++++++
 2 files changed, 194 insertions(+), 139 deletions(-)
 create mode 100644 src/haddock/modules/sampling/attract/attractmodule.py

diff --git a/src/haddock/modules/sampling/attract/__init__.py b/src/haddock/modules/sampling/attract/__init__.py
index 877a32094..7bb202070 100644
--- a/src/haddock/modules/sampling/attract/__init__.py
+++ b/src/haddock/modules/sampling/attract/__init__.py
@@ -12,10 +12,15 @@
 scoring (two ways) and assembly + possible restraints. 
 ."""
 import os
+import sys
 import subprocess
 import shutil
 import shlex
 from pathlib import Path
+from haddock.modules.sampling.attract.attractmodule import (
+    rename_and_coarse_grain,
+    process_rna_file,
+)
 
 from haddock import log
 from haddock.core.defaults import MODULE_DEFAULT_YAML
@@ -42,85 +47,24 @@ def __init__(self,
 
     @classmethod
     def confirm_installation(cls) -> None:
-        """Confirm that ATTRACT is installed."""
-        attract_dir = os.environ['ATTRACTDIR']
+        """Confirm that ATTRACT and its environment variables are properly set."""
+        try:
+            attract_dir = os.environ['ATTRACTDIR']
+            attract_tools = os.environ['ATTRACTTOOLS']
+            nalib = os.environ['LIBRARY']
+        except KeyError as e:
+            raise EnvironmentError(f"Required environment variable not found: {e}")
+
         attract_exec = Path(attract_dir, 'attract')
-        cmd = f'{attract_exec}'
-        p = subprocess.run(shlex.split(cmd), capture_output=True)
-        err = p.stderr.decode('utf-8')
-        if "Too few arguments" in err and "usage:" in err:
-            # ATTRACT is installed correctly 
-            pass
-        else:
-            raise Exception('ATTRACT is not installed properly')
-
-    def determine_molecule_type(self, pdb_file, lines_to_check=10):
-        """Check sevelal last lines of pdb file to label it 
-        as rna, dna or protein."""
-        dna_residues = {'DA', 'DG', 'DC', 'DT'}  
-        rna_residues = {'A', 'U', 'G', 'C', 'RA', 'RU', 'RG', 'RC'}  
-        with open(pdb_file, 'r') as f:
-            lines = f.readlines()[-lines_to_check:]
-        for line in reversed(lines):
-            if line.startswith("ATOM"):
-                residue_name = line[17:20].strip()
-                if residue_name in dna_residues:
-                    return 'dna'
-                elif residue_name in rna_residues:
-                    return 'rna'
-        return 'protein'
-
-    def rename_and_coarse_grain(self, file_name, new_name, reduce_path, is_rna=False):
-        """Rename models to 'protein-aa.pdb' and 'rna-aa.pdb' and convert 
-        them to ATTRACT coarse-grained representation."""
-        os.rename(file_name, new_name)
-        cmd = ['python', reduce_path, '--rna', new_name] if is_rna else ['python', reduce_path, new_name]
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        return result
-
-    def get_frag_lists(self, file):
-        """Get list of fragments and list of motifs from rna-aar.pdb."""
-        tmp = []
-        seq = ''
-        # extract sequence from structure
-        for line in file:
-            if line.startswith("ATOM"):
-                residue_name = line[17:20].strip()[-1]
-                residue_id = int(line[22:26].strip())
-                if [residue_name, residue_id] not in tmp:
-                    tmp.append([residue_name, residue_id])
-                    seq = seq + residue_name
-        # get info for motif.list and boundfrag.list
-        prev_frag = seq[:3]
-        count = 1 
-        boundfrag_list = [[count, prev_frag]]
-        motif_list = [prev_frag]
-        for x in seq[3:]:
-            count += 1 
-            next_frag = prev_frag[1:] + x
-            prev_frag = next_frag
-            boundfrag_list.append([count, next_frag])
-            if prev_frag not in motif_list: motif_list.append(next_frag)
-        return motif_list, boundfrag_list
-
-    def get_frag_pdbs(self, file):
-        """Extract fragments from full rna"""
-        first_res_id = int(file[0][22:26].strip())
-        last_res_id = int(file[-1][22:26].strip())
-        frag_total = last_res_id - first_res_id - 1 
-        fragmens=[]
-        for curr_residue_id in range(first_res_id, first_res_id+frag_total):
-            curr_frag = []
-            for line in file:
-                if line.startswith("ATOM"):
-                    residue_id = int(line[22:26].strip())
-                    if residue_id in [curr_residue_id, curr_residue_id+1, curr_residue_id+2]:
-                        curr_frag.append(line)
-                    elif residue_id > curr_residue_id+2:
-                        fragmens.append([curr_frag])
-                        break
-        fragmens.append([curr_frag])
-        return fragmens
+        result = subprocess.run([str(attract_exec)], capture_output=True, text=True)
+        
+        if "Too few arguments" not in result.stderr:
+            raise RuntimeError('ATTRACT is not installed properly')
+
+        # pass paths to _run for further use
+        cls.attract_dir = attract_dir
+        cls.attract_tools = attract_tools
+        cls.nalib = nalib
 
     def _run(self) -> None:
         """Execute module.
@@ -129,21 +73,13 @@ def _run(self) -> None:
             1. check library + 
             2. process input +
             3. make folder +
-            4. run docking 
-            5. run lrmsd (optional)
-            6. HIPPO (optional)
+            4. run docking < currently working on > 
+            5. run lrmsd (eventually optional)
+            6. HIPPO (eventually optional)
             7. Assembly ?
             8. Scoring ?
             9. SeleTopChains ? """
 
-        # Check if $LIBRARY exists 
-        try:
-            nalib = os.environ['LIBRARY']
-        except:
-            _msg = "Environment variable $LIBRARY not found."
-            self.finish_with_error(_msg)
-        log.info("NAlib found")
-
         # Get the models generated in previous step
         models: list[PDBFile] = [
             p for p in self.previous_io.output
@@ -165,66 +101,54 @@ def _run(self) -> None:
         # Ensure we have exactly protein and RNA molecules
         model_1 = models[0]
         model_2 = models[1]
-        label_1 = self.determine_molecule_type(model_1.file_name) 
-        label_2 = self.determine_molecule_type(model_2.file_name) 
-        labels = {label_1, label_2}
+    
+        attracttools = self.attract_tools
+        attrac_reduce_path = Path(attracttools, 'reduce.py')
+
+        _, label_1 = rename_and_coarse_grain(model_1.file_name, attrac_reduce_path)
+        _, label_2 = rename_and_coarse_grain(model_2.file_name, attrac_reduce_path)
         
-        if labels != {'protein', 'rna'}:
+        if {label_1, label_2} != {'protein', 'rna'}:
             _msg = "ATTRACT requires protein and RNA molecules as input"
             self.finish_with_error(_msg)
-
-        # Convert each molecule to corse-grained representation
-        # using $ATTRACTTOOLD/reduce.py 
-        log.info("Converting to coarse-grain representation")
-        try:
-            attracttools = os.environ['ATTRACTTOOLS']
-            attrac_reduce_path = Path(attracttools, 'reduce.py')
-
-            if label_1 == 'protein':
-                self.rename_and_coarse_grain(model_1.file_name, 'protien-aa.pdb', attrac_reduce_path, is_rna=False)
-                self.rename_and_coarse_grain(model_2.file_name, 'rna-aa.pdb', attrac_reduce_path, is_rna=True)
-            else:
-                self.rename_and_coarse_grain(model_1.file_name, 'rna-aa.pdb', attrac_reduce_path, is_rna=True)
-                self.rename_and_coarse_grain(model_2.file_name, 'protien-aa.pdb', attrac_reduce_path, is_rna=False)
-        except:
-            _msg = "Convertation to coarse-grain representation failes"
-            self.finish_with_error(_msg)
         
-        # Add required by ATTRACT files:
+        # Add required by ATTRACT files: 
+        # - link fragment library          
+        # - get motif.list, boundfrag.list, and fragXr.pdb 
         log.info("Preparing docking directory")
-        # 1.link fragment library          
+        
+        nalib = self.nalib
         cmd = f"ln -s {nalib} nalib"
         p = subprocess.run(shlex.split(cmd), capture_output=True)
         err = p.stderr.decode('utf-8')
-        # 2. get motif.list and boundfrag.list
-        f = open('rna-aar.pdb','r')
-        file = f.readlines()
-        f.close()
-        motif, boundfrag = self.get_frag_lists(file)
-        with open('boundfrag.list', 'w') as f:
-            for item in boundfrag:
-                f.write(f"{item[0]} {item[1]}\n")
-        with open('motif.list', 'w') as f:
-            for item in motif:
-                f.write(f"{item}\n")
-        # 3. create fragXr.pdb (optional)
-        fragments = self.get_frag_pdbs(file)
-        for i in range(1, len(fragments)+1):
-            with open(f"frag{i}r.list", 'w') as f:
-                for fragment in fragments[i-1]:
-                    for line in fragment:
-                        f.write(line)
-        
-        # Run docking 
-        #log.info("Running ATTRACT")   
-        
-        
-        # ???
+
+        process_rna_file('rna-aar.pdb')
+
+        log.info("Running ATTRACT...")
+        cmd = [
+        'bash', '/trinity/login/arha/tools/scripts/attract/docking/dock-lrmsd-in-haddock.sh',
+        '.',
+        '10',
+        '/trinity/login/arha/dev-h3/test-attr/tmp'
+        ]
+
+        docking = subprocess.run(cmd, capture_output=True, text=True)
+
+        with open('log.txt','w') as f:
+            for item in docking.stdout:
+                f.write(f"{item}")
+
+        with open('err.txt', 'w') as f:
+                    for item in docking.stderr:
+                        f.write(f"{item}")
+
         list_of_created_models = []     
-        created_models = ['protien-aa.pdb','rna-aa.pdb']
+        created_models = ['protein-aa.pdb','rna-aa.pdb']
         for model in created_models:
             pdb_object = PDBFile(Path(model).name,  path=".")
             list_of_created_models.append(pdb_object)
+       
+        self.output_models = list_of_created_models
+        self.export_io_models()
 
-            self.output_models = list_of_created_models
-            self.export_io_models()
\ No newline at end of file
+            
\ No newline at end of file
diff --git a/src/haddock/modules/sampling/attract/attractmodule.py b/src/haddock/modules/sampling/attract/attractmodule.py
new file mode 100644
index 000000000..4f421d272
--- /dev/null
+++ b/src/haddock/modules/sampling/attract/attractmodule.py
@@ -0,0 +1,131 @@
+"""Set if functions related to [attract]"""
+
+import os
+import sys 
+import subprocess
+import shutil
+import shlex
+from pathlib import Path
+
+
+def rename_and_coarse_grain(pdb_file: str, reduce_path: str, lines_to_check: int = 10):
+    """
+    Check several last lines of pdb file to label it as RNA, DNA or protein; 
+    Then reduce it to ATTRACT coarse grained representation.
+
+    Args:
+    pdb_file (str): Path to the PDB file.
+    lines_to_check (int): Number of lines to check from the end of the file.
+    reduce_path (str); Path to $ATTRACTTOOLD/reduce.py 
+
+    Returns:
+    subprocess.CompletedProcess: Result of the subprocess run. 
+    (ATTRACT CG atom mapping)
+    molecule_type (str): Label of the input file
+    """
+    dna_residues = {'DA', 'DG', 'DC', 'DT'}
+    rna_residues = {'A', 'U', 'G', 'C', 'RA', 'RU', 'RG', 'RC'}
+    
+    old_path = Path(pdb_file)
+    
+    with old_path.open('r') as f:
+        lines = f.readlines()[-lines_to_check:]
+    
+    for line in lines:
+        if line.startswith("ATOM"):
+            residue_name = line[17:20].strip()
+            if residue_name in dna_residues:
+                molecule_type = 'dna'
+            elif residue_name in rna_residues:
+                molecule_type = 'rna'
+            else:
+                molecule_type = 'protein'
+    
+        new_path = old_path.with_name(f'{molecule_type}-aa.pdb')
+        old_path.replace(new_path)
+        
+        cmd = [sys.executable, reduce_path]
+        if molecule_type in ('dna', 'rna'):
+            cmd.append(f'--{molecule_type}')
+        cmd.append(str(new_path))
+        
+        return subprocess.run(cmd, capture_output=True, text=True), molecule_type
+
+def process_rna_file(pdb_file: str = 'rna-aar.pdb'):
+    """
+    Process PDB file to generate RNA fragment and motif lists, and extract individual fragments.
+    Save generated files. 
+    
+    Args:
+    pdb_file (str): Path to reduced RNA PDB file.    
+
+    Note: 
+    This script works on all-atom pdb as well
+    """
+    enumer_nucl = []
+    sequence = ''
+    fragments = []
+    residue_buffer = []  
+    current_residue = [] 
+    last_residue_id = None
+
+    with open(pdb_file, 'r') as f:
+        for line in f:
+            if line.startswith("ATOM"):
+                residue_name = line[17:20].strip()[-1]    
+                residue_id = int(line[22:26].strip())
+
+                # Get sequence
+                if [residue_name, residue_id] not in enumer_nucl:
+                    enumer_nucl.append([residue_name, residue_id])
+                    sequence += residue_name
+              
+                # Get fragments
+                if last_residue_id is None or residue_id == last_residue_id:
+                    current_residue.append(line)
+                else:
+                    if current_residue:
+                        residue_buffer.append(current_residue)
+                        if len(residue_buffer) == 3:
+                            fragments.append(residue_buffer[:])
+                            residue_buffer.pop(0) 
+                    current_residue = [line]
+                last_residue_id = residue_id 
+
+        # Add the last residue if any
+        if current_residue:
+            residue_buffer.append(current_residue)
+            if len(residue_buffer) == 3:
+                fragments.append(residue_buffer[:])
+                
+    # Generate motif and boundfrag lists
+    prev_frag = sequence[:3]
+    count = 1
+    boundfrag_list = [[count, prev_frag]]
+    motif_list = [prev_frag]
+
+    for x in sequence[3:]:
+        count += 1
+        next_frag = prev_frag[1:] + x
+        prev_frag = next_frag
+        boundfrag_list.append([count, next_frag])
+        if prev_frag not in motif_list: 
+            motif_list.append(next_frag)
+
+    # Save boundfrag_list
+    with open('boundfrag.list', 'w') as f:
+        for item in boundfrag_list:
+            f.write(f"{item[0]} {item[1]}\n")
+    # Save motif_list
+    with open('motif.list', 'w') as f:
+        for item in motif_list:
+            f.write(f"{item}\n")
+    # Save each fragments
+    for i, fragment in enumerate(fragments, start=1):
+        with open(f"frag{i}r.pdb", 'w') as frag_file:
+            for nucleotide in fragment:
+                for atomline in nucleotide:
+                    if not atomline.endswith('\n'):
+                        atomline += '\n'
+                    frag_file.write(atomline)
+    return
\ No newline at end of file

From 70295d797315d4293f4b23c2cd96c07c3c8d5ace Mon Sep 17 00:00:00 2001
From: arha <arha@tintin.ib>
Date: Tue, 24 Sep 2024 15:51:40 +0200
Subject: [PATCH 3/8] raw installation guide

---
 src/fast-rmsdmatrix                           |  1 +
 src/fcc                                       |  1 +
 .../modules/sampling/attract/__init__.py      |  3 +-
 .../modules/sampling/attract/install.md       | 62 +++++++++++++++++++
 4 files changed, 66 insertions(+), 1 deletion(-)
 create mode 160000 src/fast-rmsdmatrix
 create mode 160000 src/fcc
 create mode 100644 src/haddock/modules/sampling/attract/install.md

diff --git a/src/fast-rmsdmatrix b/src/fast-rmsdmatrix
new file mode 160000
index 000000000..4580d1c89
--- /dev/null
+++ b/src/fast-rmsdmatrix
@@ -0,0 +1 @@
+Subproject commit 4580d1c89a0ec162ca9b32c67d2806afcac52f13
diff --git a/src/fcc b/src/fcc
new file mode 160000
index 000000000..3a1626de3
--- /dev/null
+++ b/src/fcc
@@ -0,0 +1 @@
+Subproject commit 3a1626de3366f6db8b45caed3bd9fbc6b2881286
diff --git a/src/haddock/modules/sampling/attract/__init__.py b/src/haddock/modules/sampling/attract/__init__.py
index 7bb202070..bb3008802 100644
--- a/src/haddock/modules/sampling/attract/__init__.py
+++ b/src/haddock/modules/sampling/attract/__init__.py
@@ -141,7 +141,8 @@ def _run(self) -> None:
         with open('err.txt', 'w') as f:
                     for item in docking.stderr:
                         f.write(f"{item}")
-
+        # dat2pdb:
+        # /trinity/login/arha/tools/attract/bin/collect $m-e7.dat /dev/null frag${x}r.pdb --ens 2 nalib/$m-clust1.0r.list > models_frag${X}r.pdb
         list_of_created_models = []     
         created_models = ['protein-aa.pdb','rna-aa.pdb']
         for model in created_models:
diff --git a/src/haddock/modules/sampling/attract/install.md b/src/haddock/modules/sampling/attract/install.md
new file mode 100644
index 000000000..c89e29cd4
--- /dev/null
+++ b/src/haddock/modules/sampling/attract/install.md
@@ -0,0 +1,62 @@
+This is very raw instruction on how to install ATTRACT on Linux (does not work on Microsoft Windows nor on Mac):
+
+1. Obtain gfortran, version 13.3.0 
+2. Obtain g++, version 11.5.0  
+3. Make and activate attract environment with Python 2.7, numpy, scipy and pdb2pqr 
+(normally, pyenv/conda/mamba can deal with it)
+4. git clone  --recursive https://github.com/sjdv1982/attract.git
+5. edit attract/bin/Makefile:
+add ` FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer" ` to the end of each line with `$(MAKE)` (but not $(MAKE) clean). 
+This is what you want to see:
+```
+
+em: as
+	cd $(GPU_DIR)/emATTRACT/bin && $(MAKE) TARGET=$(TARGET) FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer"
+	cd $(BINDIR) && ln -sf $(GPU_DIR2)/emATTRACT/bin/emATTRACT 
+
+gpuATTRACTcheck: as
+	cd $(GPU_DIR)/gpuATTRACTcheck/bin && $(MAKE) TARGET=$(TARGET) FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer"
+	cd $(BINDIR) && ln -sf $(GPU_DIR2)/gpuATTRACTcheck/bin/gpuATTRACTcheck
+
+mc: as
+	cd $(GPU_DIR)/mcATTRACT/bin && $(MAKE) TARGET=$(TARGET) FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer"
+	cd $(BINDIR) && ln -sf $(GPU_DIR2)/mcATTRACT/bin/mcATTRACT
+	
+sc: as
+	cd $(GPU_DIR)/scATTRACT/bin && $(MAKE) TARGET=$(TARGET) FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer"
+	cd $(BINDIR) && ln -sf $(GPU_DIR)/scATTRACT/bin/scATTRACT
+
+as: check_cuda_version
+	cd $(GPU_DIR)/AttractServer/lib && $(MAKE) TARGET=$(TARGET) FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer"
+	cd $(BINDIR) && ln -sf $(GPU_DIR2)/AttractServer/lib/libAttractServer.so
+
+attract:
+	cd $(ATTRACTDIR) && $(MAKE) -f _Makefile all FFLAGS="-std=f90 -g -O3 -fno-automatic -ffast-math -fcray-pointer"
+
+```
+6. cd attract/bin 
+7. make attract
+
+If there are multiple versions of gfortran and g++ on your system, you can point to the required ones by adding theses to Makefile, line 6:
+CXX = /path/gcc@11
+FC = /path/gfortran@13
+
+If you need to restart installation - run `make clean` first 
+
+8. Export the ATTRACT environment variables. In your .bashrc file, add the following:
+ ```
+  export ATTRACTDIR=/path/attract/bin (i.e. wherever you installed attract)
+  export ATTRACTTOOLS=$ATTRACTDIR/../tools
+  export ATTRACTGUI=$ATTRACTDIR/../gui
+  export LD_LIBRARY_PATH=$ATTRACTDIR:$LD_LIBRARY_PATH
+```
+then: 
+ ` source ~/.bashrc`
+then activate attract environment again
+
+9. Test your installation:
+- $ATTRACTDIR/attract" should result in:
+  Too few arguments
+  usage: $path/attract structures.dat parameterfile receptor.pdb [ligand.pdb] [options]
+- "python -c 'import numpy, scipy' " should give no error
+- "python -c 'import pdb2pqr' " should give no error
\ No newline at end of file

From 4b8755e3a9f91c379622be56b5f6108629da49e0 Mon Sep 17 00:00:00 2001
From: arha <arha@tintin.ib>
Date: Wed, 25 Sep 2024 10:56:22 +0200
Subject: [PATCH 4/8] ongoing

---
 .../modules/sampling/attract/__init__.py      | 23 +++++++++++++++----
 .../modules/sampling/attract/attractmodule.py |  2 +-
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/haddock/modules/sampling/attract/__init__.py b/src/haddock/modules/sampling/attract/__init__.py
index bb3008802..7080e550d 100644
--- a/src/haddock/modules/sampling/attract/__init__.py
+++ b/src/haddock/modules/sampling/attract/__init__.py
@@ -126,21 +126,34 @@ def _run(self) -> None:
 
         log.info("Running ATTRACT...")
         cmd = [
-        'bash', '/trinity/login/arha/tools/scripts/attract/docking/dock-lrmsd-in-haddock.sh',
-        '.',
-        '10',
-        '/trinity/login/arha/dev-h3/test-attr/tmp'
+        'bash', 
+        '/trinity/login/arha/tools/scripts/attract/docking/dock-lrmsd-in-haddock.sh', # move this to haddock3/src/.../attract/
+        '.', # dir with input files 
+        '10', # number of starting points for sampling
+        '/trinity/login/arha/dev-h3/test-attr/tmp' # where to store tmp files
         ]
+        # need to add randsearch
 
         docking = subprocess.run(cmd, capture_output=True, text=True)
 
         with open('log.txt','w') as f:
             for item in docking.stdout:
                 f.write(f"{item}")
-
+        # here     
         with open('err.txt', 'w') as f:
                     for item in docking.stderr:
                         f.write(f"{item}")
+
+        # 
+        # score with HIPPO
+        
+        #
+        # assemble 
+        #
+
+        #
+        # score with attract? haddock? hippo (idk if possible)? 
+                
         # dat2pdb:
         # /trinity/login/arha/tools/attract/bin/collect $m-e7.dat /dev/null frag${x}r.pdb --ens 2 nalib/$m-clust1.0r.list > models_frag${X}r.pdb
         list_of_created_models = []     
diff --git a/src/haddock/modules/sampling/attract/attractmodule.py b/src/haddock/modules/sampling/attract/attractmodule.py
index 4f421d272..c0031beea 100644
--- a/src/haddock/modules/sampling/attract/attractmodule.py
+++ b/src/haddock/modules/sampling/attract/attractmodule.py
@@ -48,7 +48,7 @@ def rename_and_coarse_grain(pdb_file: str, reduce_path: str, lines_to_check: int
         if molecule_type in ('dna', 'rna'):
             cmd.append(f'--{molecule_type}')
         cmd.append(str(new_path))
-        
+        # this works in haddock3 env 
         return subprocess.run(cmd, capture_output=True, text=True), molecule_type
 
 def process_rna_file(pdb_file: str = 'rna-aar.pdb'):

From f1201a4692c2ac1c2dbda1318586383d4619dfaf Mon Sep 17 00:00:00 2001
From: arha <arha@tintin.ib>
Date: Wed, 2 Oct 2024 14:11:00 +0200
Subject: [PATCH 5/8] installation guidelines finalised

---
 .../modules/sampling/attract/__init__.py      | 61 +++++--------------
 .../modules/sampling/attract/attractmodule.py |  6 +-
 .../modules/sampling/attract/install.md       | 12 +++-
 3 files changed, 26 insertions(+), 53 deletions(-)

diff --git a/src/haddock/modules/sampling/attract/__init__.py b/src/haddock/modules/sampling/attract/__init__.py
index 7080e550d..b748e2d08 100644
--- a/src/haddock/modules/sampling/attract/__init__.py
+++ b/src/haddock/modules/sampling/attract/__init__.py
@@ -12,7 +12,8 @@
 scoring (two ways) and assembly + possible restraints. 
 ."""
 import os
-import sys
+#import sys
+import tempfile
 import subprocess
 import shutil
 import shlex
@@ -52,6 +53,7 @@ def confirm_installation(cls) -> None:
             attract_dir = os.environ['ATTRACTDIR']
             attract_tools = os.environ['ATTRACTTOOLS']
             nalib = os.environ['LIBRARY']
+            randsearch = os.environ['RANDSEARCH']
         except KeyError as e:
             raise EnvironmentError(f"Required environment variable not found: {e}")
 
@@ -68,17 +70,12 @@ def confirm_installation(cls) -> None:
 
     def _run(self) -> None:
         """Execute module.
-            
-            # todo 
-            1. check library + 
-            2. process input +
-            3. make folder +
-            4. run docking < currently working on > 
-            5. run lrmsd (eventually optional)
-            6. HIPPO (eventually optional)
-            7. Assembly ?
-            8. Scoring ?
-            9. SeleTopChains ? """
+        Currently:
+        1. Converts protein and RNA in ATTRACT coarse-grain 
+        2. Splits RNA into overlapping fragments
+        3. Creates required by ATTRACT files: motif.list, boundfrag.list, nalib
+        4. Passes initial all-atom protein and RNA to next module
+        """
 
         # Get the models generated in previous step
         models: list[PDBFile] = [
@@ -111,10 +108,8 @@ def _run(self) -> None:
         if {label_1, label_2} != {'protein', 'rna'}:
             _msg = "ATTRACT requires protein and RNA molecules as input"
             self.finish_with_error(_msg)
-        
+
         # Add required by ATTRACT files: 
-        # - link fragment library          
-        # - get motif.list, boundfrag.list, and fragXr.pdb 
         log.info("Preparing docking directory")
         
         nalib = self.nalib
@@ -124,38 +119,10 @@ def _run(self) -> None:
 
         process_rna_file('rna-aar.pdb')
 
-        log.info("Running ATTRACT...")
-        cmd = [
-        'bash', 
-        '/trinity/login/arha/tools/scripts/attract/docking/dock-lrmsd-in-haddock.sh', # move this to haddock3/src/.../attract/
-        '.', # dir with input files 
-        '10', # number of starting points for sampling
-        '/trinity/login/arha/dev-h3/test-attr/tmp' # where to store tmp files
-        ]
-        # need to add randsearch
-
-        docking = subprocess.run(cmd, capture_output=True, text=True)
-
-        with open('log.txt','w') as f:
-            for item in docking.stdout:
-                f.write(f"{item}")
-        # here     
-        with open('err.txt', 'w') as f:
-                    for item in docking.stderr:
-                        f.write(f"{item}")
-
-        # 
-        # score with HIPPO
-        
-        #
-        # assemble 
-        #
-
-        #
-        # score with attract? haddock? hippo (idk if possible)? 
-                
-        # dat2pdb:
-        # /trinity/login/arha/tools/attract/bin/collect $m-e7.dat /dev/null frag${x}r.pdb --ens 2 nalib/$m-clust1.0r.list > models_frag${X}r.pdb
+        tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
+        # will be used during the docking
+        shutil.rmtree(tmp_dir)
+
         list_of_created_models = []     
         created_models = ['protein-aa.pdb','rna-aa.pdb']
         for model in created_models:
diff --git a/src/haddock/modules/sampling/attract/attractmodule.py b/src/haddock/modules/sampling/attract/attractmodule.py
index c0031beea..85a3d70a4 100644
--- a/src/haddock/modules/sampling/attract/attractmodule.py
+++ b/src/haddock/modules/sampling/attract/attractmodule.py
@@ -1,10 +1,10 @@
 """Set if functions related to [attract]"""
 
-import os
+#import os
 import sys 
 import subprocess
-import shutil
-import shlex
+#import shutil
+#import shlex
 from pathlib import Path
 
 
diff --git a/src/haddock/modules/sampling/attract/install.md b/src/haddock/modules/sampling/attract/install.md
index c89e29cd4..888ed170e 100644
--- a/src/haddock/modules/sampling/attract/install.md
+++ b/src/haddock/modules/sampling/attract/install.md
@@ -1,7 +1,7 @@
 This is very raw instruction on how to install ATTRACT on Linux (does not work on Microsoft Windows nor on Mac):
 
-1. Obtain gfortran, version 13.3.0 
-2. Obtain g++, version 11.5.0  
+1. Obtain gfortran, version 13.3.0
+2. Obtain g++, version 11.5.0
 3. Make and activate attract environment with Python 2.7, numpy, scipy and pdb2pqr 
 (normally, pyenv/conda/mamba can deal with it)
 4. git clone  --recursive https://github.com/sjdv1982/attract.git
@@ -59,4 +59,10 @@ then activate attract environment again
   Too few arguments
   usage: $path/attract structures.dat parameterfile receptor.pdb [ligand.pdb] [options]
 - "python -c 'import numpy, scipy' " should give no error
-- "python -c 'import pdb2pqr' " should give no error
\ No newline at end of file
+- "python -c 'import pdb2pqr' " should give no error
+
+10. Download & unpack library of RNA fragment conformations from (here)[https://surfdrive.surf.nl/files/index.php/s/7yWPzLd1ov3AaUE] 
+`export LIBRARY=/path/to/nalib`
+
+11. Lastly, point to where large files (~6GB per default ATTRACT run) containing staring point for the docking can be stored
+`export RANDSEARCH=/path/to/where`

From 9258a7753e2e10c51fd87a19b6d14606bc8d69f7 Mon Sep 17 00:00:00 2001
From: Rodrigo V Honorato <r.vargashonorato@uu.nl>
Date: Wed, 2 Oct 2024 17:11:54 +0200
Subject: [PATCH 6/8] add `test_attract.py`

---
 integration_tests/test_attract.py | 66 +++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 integration_tests/test_attract.py

diff --git a/integration_tests/test_attract.py b/integration_tests/test_attract.py
new file mode 100644
index 000000000..deee5a044
--- /dev/null
+++ b/integration_tests/test_attract.py
@@ -0,0 +1,66 @@
+import os
+import shutil
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from haddock.libs.libontology import PDBFile
+from haddock.modules.sampling.attract import \
+    DEFAULT_CONFIG as DEFAULT_ATTRACT_CONFIG
+from haddock.modules.sampling.attract import HaddockModule as AttractModule
+
+from . import GOLDEN_DATA, has_attract
+
+
+class MockPreviousIO:
+    """Mock the previous IO module."""
+
+    def __init__(self, path):
+        self.path = path
+
+    def retrieve_models(self):
+        """Mock the retrieval of some models"""
+        shutil.copy(
+            Path(GOLDEN_DATA, "prot.pdb"),
+            Path(self.path, "prot.pdb"),
+        )
+
+        shutil.copy(
+            Path(GOLDEN_DATA, "rna.pdb"),
+            Path(self.path, "rna.pdb"),
+        )
+
+        return [
+            PDBFile(file_name="prot.pdb", path=self.path),
+            PDBFile(file_name="rna.pdb", path=self.path),
+        ]
+
+    def output(self) -> None:
+        """Mock the output"""
+        return None
+
+
+@pytest.fixture(name="attract_module")
+def fixture_attract_module():
+    """Initialize the attract module"""
+    with tempfile.TemporaryDirectory() as tempdir:
+        os.chdir(tempdir)
+        yield AttractModule(
+            order=0, path=Path("."), initial_params=DEFAULT_ATTRACT_CONFIG
+        )
+
+
+@has_attract
+@pytest.mark.skip(reason="work-in-progress")
+def test_attract(attract_module):
+    """Integration test for the attract module"""
+
+    attract_module.previous_io = MockPreviousIO(path=attract_module.path)
+
+    # attract_module.attract_tools = ???
+    # attract_module.nalib = ???
+
+    attract_module.run()
+
+    assert len(attract_module.output_models) > 0

From 41e63b8bfa97a47c1aa92fdf337cf4663bd95e0e Mon Sep 17 00:00:00 2001
From: Rodrigo V Honorato <r.vargashonorato@uu.nl>
Date: Wed, 2 Oct 2024 17:12:12 +0200
Subject: [PATCH 7/8] add rna to golden data

---
 integration_tests/golden_data/rna.pdb | 89 +++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 integration_tests/golden_data/rna.pdb

diff --git a/integration_tests/golden_data/rna.pdb b/integration_tests/golden_data/rna.pdb
new file mode 100644
index 000000000..aab8b5259
--- /dev/null
+++ b/integration_tests/golden_data/rna.pdb
@@ -0,0 +1,89 @@
+ATOM    666  O5'   G B  87     -27.630  38.463 -14.109  1.00 42.87           O  
+ATOM    667  C5'   G B  87     -28.602  38.855 -13.151  1.00 42.27           C  
+ATOM    668  C4'   G B  87     -29.933  39.139 -13.799  1.00 48.64           C  
+ATOM    669  O4'   G B  87     -29.779  40.178 -14.803  1.00 43.31           O  
+ATOM    670  C3'   G B  87     -30.564  37.982 -14.561  1.00 44.60           C  
+ATOM    671  O3'   G B  87     -31.242  37.065 -13.721  1.00 56.31           O  
+ATOM    672  C2'   G B  87     -31.474  38.697 -15.548  1.00 41.13           C  
+ATOM    673  O2'   G B  87     -32.656  39.153 -14.903  1.00 47.54           O  
+ATOM    674  C1'   G B  87     -30.621  39.913 -15.906  1.00 44.86           C  
+ATOM    675  N9    G B  87     -29.775  39.659 -17.088  1.00 45.69           N  
+ATOM    676  C8    G B  87     -28.407  39.541 -17.117  1.00 44.82           C  
+ATOM    677  N7    G B  87     -27.938  39.311 -18.313  1.00 52.84           N  
+ATOM    678  C5    G B  87     -29.064  39.272 -19.124  1.00 47.42           C  
+ATOM    679  C6    G B  87     -29.183  39.057 -20.521  1.00 42.22           C  
+ATOM    680  O6    G B  87     -28.286  38.852 -21.348  1.00 61.13           O  
+ATOM    681  N1    G B  87     -30.512  39.100 -20.935  1.00 45.00           N  
+ATOM    682  C2    G B  87     -31.588  39.318 -20.108  1.00 54.13           C  
+ATOM    683  N2    G B  87     -32.796  39.322 -20.693  1.00 54.04           N  
+ATOM    684  N3    G B  87     -31.490  39.520 -18.804  1.00 50.61           N  
+ATOM    685  C4    G B  87     -30.207  39.484 -18.382  1.00 56.23           C  
+ATOM    686  P     G B  88     -31.281  35.502 -14.096  1.00 53.68           P  
+ATOM    687  OP1   G B  88     -31.907  34.794 -12.951  1.00 54.68           O  
+ATOM    688  OP2   G B  88     -29.910  35.116 -14.509  1.00 39.00           O  
+ATOM    689  O5'   G B  88     -32.256  35.463 -15.355  1.00 57.66           O  
+ATOM    690  C5'   G B  88     -33.614  35.868 -15.241  1.00 57.35           C  
+ATOM    691  C4'   G B  88     -34.327  35.784 -16.567  1.00 58.96           C  
+ATOM    692  O4'   G B  88     -33.726  36.709 -17.509  1.00 56.45           O  
+ATOM    693  C3'   G B  88     -34.258  34.440 -17.273  1.00 57.94           C  
+ATOM    694  O3'   G B  88     -35.199  33.508 -16.775  1.00 73.51           O  
+ATOM    695  C2'   G B  88     -34.483  34.816 -18.732  1.00 63.36           C  
+ATOM    696  O2'   G B  88     -35.863  35.029 -18.990  1.00 43.75           O  
+ATOM    697  C1'   G B  88     -33.761  36.163 -18.812  1.00 50.86           C  
+ATOM    698  N9    G B  88     -32.375  36.021 -19.299  1.00 55.68           N  
+ATOM    699  C8    G B  88     -31.223  35.986 -18.549  1.00 49.61           C  
+ATOM    700  N7    G B  88     -30.145  35.851 -19.273  1.00 46.05           N  
+ATOM    701  C5    G B  88     -30.611  35.792 -20.580  1.00 52.57           C  
+ATOM    702  C6    G B  88     -29.908  35.650 -21.807  1.00 55.72           C  
+ATOM    703  O6    G B  88     -28.688  35.544 -21.994  1.00 50.01           O  
+ATOM    704  N1    G B  88     -30.778  35.638 -22.894  1.00 55.41           N  
+ATOM    705  C2    G B  88     -32.146  35.747 -22.816  1.00 63.24           C  
+ATOM    706  N2    G B  88     -32.812  35.713 -23.980  1.00 54.57           N  
+ATOM    707  N3    G B  88     -32.811  35.881 -21.680  1.00 54.18           N  
+ATOM    708  C4    G B  88     -31.987  35.895 -20.612  1.00 56.47           C  
+ATOM    709  P     U B  89     -34.726  32.337 -15.783  1.00 72.44           P  
+ATOM    710  OP1   U B  89     -35.625  32.374 -14.602  1.00 52.31           O  
+ATOM    711  OP2   U B  89     -33.266  32.503 -15.573  1.00 82.19           O  
+ATOM    712  O5'   U B  89     -35.004  31.025 -16.635  1.00 80.12           O  
+ATOM    713  C5'   U B  89     -35.939  30.056 -16.196  1.00 65.85           C  
+ATOM    714  C4'   U B  89     -36.896  29.668 -17.291  1.00 76.34           C  
+ATOM    715  O4'   U B  89     -37.915  28.785 -16.746  1.00 63.91           O  
+ATOM    716  C3'   U B  89     -37.686  30.811 -17.917  1.00 82.48           C  
+ATOM    717  O3'   U B  89     -36.974  31.513 -18.934  1.00 99.92           O  
+ATOM    718  C2'   U B  89     -38.956  30.123 -18.405  1.00 89.18           C  
+ATOM    719  O2'   U B  89     -38.721  29.436 -19.626  1.00 91.14           O  
+ATOM    720  C1'   U B  89     -39.175  29.087 -17.304  1.00 88.15           C  
+ATOM    721  N1    U B  89     -40.055  29.598 -16.225  1.00 82.95           N  
+ATOM    722  C2    U B  89     -41.420  29.461 -16.387  1.00 61.44           C  
+ATOM    723  O2    U B  89     -41.925  28.945 -17.370  1.00 78.44           O  
+ATOM    724  N3    U B  89     -42.177  29.953 -15.353  1.00 62.09           N  
+ATOM    725  C4    U B  89     -41.727  30.556 -14.197  1.00 67.62           C  
+ATOM    726  O4    U B  89     -42.543  30.948 -13.359  1.00 75.18           O  
+ATOM    727  C5    U B  89     -40.304  30.660 -14.099  1.00 60.41           C  
+ATOM    728  C6    U B  89     -39.541  30.188 -15.090  1.00 48.05           C  
+ATOM    729  P     A B  90     -35.937  30.752 -19.910  1.00110.96           P  
+ATOM    730  OP1   A B  90     -36.589  29.495 -20.359  1.00102.79           O  
+ATOM    731  OP2   A B  90     -34.637  30.668 -19.201  1.00 98.18           O  
+ATOM    732  O5'   A B  90     -35.811  31.758 -21.135  1.00 85.89           O  
+ATOM    733  C5'   A B  90     -36.772  31.757 -22.177  1.00101.33           C  
+ATOM    734  C4'   A B  90     -36.131  31.497 -23.516  1.00 89.35           C  
+ATOM    735  O4'   A B  90     -35.196  32.562 -23.827  1.00 82.67           O  
+ATOM    736  C3'   A B  90     -35.288  30.236 -23.620  1.00 85.67           C  
+ATOM    737  O3'   A B  90     -36.061  29.062 -23.805  1.00 81.70           O  
+ATOM    738  C2'   A B  90     -34.369  30.554 -24.789  1.00 88.06           C  
+ATOM    739  O2'   A B  90     -35.057  30.404 -26.023  1.00 84.48           O  
+ATOM    740  C1'   A B  90     -34.107  32.043 -24.563  1.00 74.25           C  
+ATOM    741  N9    A B  90     -32.865  32.273 -23.800  1.00 69.68           N  
+ATOM    742  C8    A B  90     -32.700  32.486 -22.452  1.00 66.88           C  
+ATOM    743  N7    A B  90     -31.446  32.652 -22.096  1.00 64.42           N  
+ATOM    744  C5    A B  90     -30.739  32.537 -23.287  1.00 65.35           C  
+ATOM    745  C6    A B  90     -29.369  32.614 -23.602  1.00 49.71           C  
+ATOM    746  N6    A B  90     -28.396  32.835 -22.714  1.00 56.28           N  
+ATOM    747  N1    A B  90     -29.018  32.453 -24.895  1.00 67.75           N  
+ATOM    748  C2    A B  90     -29.971  32.230 -25.806  1.00 61.21           C  
+ATOM    749  N3    A B  90     -31.286  32.138 -25.636  1.00 68.13           N  
+ATOM    750  C4    A B  90     -31.605  32.302 -24.341  1.00 75.90           C  
+ATOM    751  P     A B  91     -35.474  27.632 -23.362  1.00 83.80           P  
+ATOM    752  OP1   A B  91     -36.622  26.694 -23.287  1.00 91.65           O  
+ATOM    753  OP2   A B  91     -34.652  27.853 -22.146  1.00 84.63           O  
+

From 031e345b5501a8ca1d2237983600a1156590d2ac Mon Sep 17 00:00:00 2001
From: Rodrigo V Honorato <r.vargashonorato@uu.nl>
Date: Wed, 2 Oct 2024 17:12:25 +0200
Subject: [PATCH 8/8] add `has_attract` decorator placeholder

---
 integration_tests/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/integration_tests/__init__.py b/integration_tests/__init__.py
index 82b0722a7..3addbf35e 100644
--- a/integration_tests/__init__.py
+++ b/integration_tests/__init__.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+
 if "CNS_EXEC" in os.environ:
     CNS_EXEC = os.environ["CNS_EXEC"]
 else:
@@ -17,3 +18,10 @@
 
 tests_path = Path(__file__).resolve().parents[0]
 GOLDEN_DATA = Path(tests_path, "golden_data")
+
+# Placeholder decorator to check if ATTRACT is installed
+# TODO: Implement
+ATTRACT_IS_INSTALLED = False
+has_attract = pytest.mark.skipif(
+    not ATTRACT_IS_INSTALLED, reason="ATTRACT not installed"
+)