diff --git a/bin/annot_secondary_structures.py b/bin/annot_secondary_structures.py
index 7cd2c76..661faf6 100644
--- a/bin/annot_secondary_structures.py
+++ b/bin/annot_secondary_structures.py
@@ -24,6 +24,20 @@ from biotite.structure.io.pdb import PDBFile
 SSE_BACKEND = Literal["dssp", "psea"]
 
 
+def get_pdb_length(fname: str) -> int:
+    """
+    Get the length of the chain described in the PDB file
+    """
+    warnings.filterwarnings("ignore", ".*elements were guessed from atom_.*")
+    structure = PDBFile.read(fname)
+    if structure.get_model_count() > 1:
+        return -1
+    chain = structure.get_structure()[0]
+    backbone = chain[struc.filter_backbone(chain)]
+    l = int(len(backbone) / 3)
+    return l
+
+
 def count_structures_in_pdb(
     fname: str, backend: SSE_BACKEND = "psea"
 ) -> Tuple[int, int]:
@@ -61,7 +75,9 @@ def count_structures_in_pdb(
         num_alpha = ss_counts["H"] if "H" in ss_counts else 0
         num_beta = ss_counts["B"] if "B" in ss_counts else 0
     else:
-        raise ValueError(f"Unrecognized backend: {backend}")
+        raise ValueError(
+            f"Unrecognized backend for calculating secondary structures: {backend}"
+        )
     logging.debug(f"From {fname}:\t{num_alpha} {num_beta}")
     return num_alpha, num_beta
 
@@ -69,10 +85,19 @@ def count_structures_in_pdb(
 def make_ss_cooccurrence_plot(
     pdb_files: Collection[str],
     outpdf: str,
+    max_seq_len: int = 0,
     backend: SSE_BACKEND = "psea",
     threads: int = 4,
 ):
-    """ """
+    """
+    Create a secondary structure co-occurrence plot
+    """
+    if max_seq_len > 0:
+        orig_len = len(pdb_files)
+        pdb_files = [p for p in pdb_files if get_pdb_length(p) <= max_seq_len]
+        logging.info(
+            f"Filtering out sequences with more than {max_seq_len} residues: {orig_len} --> {len(pdb_files)}"
+        )
     logging.info(f"Calculating {len(pdb_files)} structures using {backend}")
     pfunc = functools.partial(count_structures_in_pdb, backend=backend)
     pool = mp.Pool(threads)
diff --git a/bin/sample.py b/bin/sample.py
index 5c2cb58..8b81b46 100644
--- a/bin/sample.py
+++ b/bin/sample.py
@@ -269,8 +269,7 @@ def main() -> None:
             model,
             train_dset,
             n=10,
-            # sweep_lengths=(50, train_dset.dset.pad),
-            sweep_lengths=(50, 52),  # Dummy values
+            sweep_lengths=(50, test_dset.dset.pad),
             batch_size=args.batchsize,
         )
     else:
@@ -331,7 +330,9 @@ def main() -> None:
 
         # Plot single plots
         plot_distribution_overlap(
-            {"Test": orig_values, "Sampled": samp_values}, ft_name, fname=plotdir / f"dist_{ft_name}.pdf"
+            {"Test": orig_values, "Sampled": samp_values},
+            ft_name,
+            fname=plotdir / f"dist_{ft_name}.pdf",
         )
         plot_distribution_overlap(
             {"Test": orig_values, "Sampled": samp_values},
@@ -372,12 +373,13 @@ def main() -> None:
     # Generate plots of secondary structure co-occurrence
     make_ss_cooccurrence_plot(
         pdb_files,
-        str(outdir / "sampled_pdb" / "ss_cooccurrence_sampled.pdf"),
+        str(outdir / "plots" / "ss_cooccurrence_sampled.pdf"),
         threads=multiprocessing.cpu_count(),
     )
     make_ss_cooccurrence_plot(
         test_dset.filenames,
-        str(outdir / "sampled_pdb" / "ss_cooccurrence_train.pdf"),
+        str(outdir / "plots" / "ss_cooccurrence_test.pdf"),
+        max_seq_len=test_dset.dset.pad,
         threads=multiprocessing.cpu_count(),
     )