Fix some Structure type issues uncovered by Pyrefly

PiperOrigin-RevId: 891687351
Change-Id: I9c69179100e9328113a578d45e5f997b07d300a2
This commit is contained in:
Augustin Zidek
2026-03-30 06:34:00 -07:00
committed by Copybara-Service
parent 0f82e0c94c
commit 58d22e2db5
3 changed files with 52 additions and 51 deletions

View File

@@ -108,59 +108,59 @@ class Bonds(table.Table):
auth_seq_id: A (num_atom,) array of auth_seq_id strings.
insertion_code: A (num_atom,) array of insertion code strings.
"""
mmcif_dict = collections.defaultdict(list)
ptnr1_indices, ptnr2_indices = self.get_atom_indices(atom_key)
cif = collections.defaultdict(list)
p1_indices, p2_indices = self.get_atom_indices(atom_key)
mmcif_dict['_struct_conn.ptnr1_label_asym_id'] = chain_id[ptnr1_indices]
mmcif_dict['_struct_conn.ptnr2_label_asym_id'] = chain_id[ptnr2_indices]
mmcif_dict['_struct_conn.ptnr1_label_comp_id'] = res_name[ptnr1_indices]
mmcif_dict['_struct_conn.ptnr2_label_comp_id'] = res_name[ptnr2_indices]
mmcif_dict['_struct_conn.ptnr1_label_seq_id'] = res_id[ptnr1_indices]
mmcif_dict['_struct_conn.ptnr2_label_seq_id'] = res_id[ptnr2_indices]
mmcif_dict['_struct_conn.ptnr1_label_atom_id'] = atom_name[ptnr1_indices]
mmcif_dict['_struct_conn.ptnr2_label_atom_id'] = atom_name[ptnr2_indices]
cif['_struct_conn.ptnr1_label_asym_id'] = chain_id[p1_indices].tolist()
cif['_struct_conn.ptnr2_label_asym_id'] = chain_id[p2_indices].tolist()
cif['_struct_conn.ptnr1_label_comp_id'] = res_name[p1_indices].tolist()
cif['_struct_conn.ptnr2_label_comp_id'] = res_name[p2_indices].tolist()
cif['_struct_conn.ptnr1_label_seq_id'] = res_id[p1_indices].tolist()
cif['_struct_conn.ptnr2_label_seq_id'] = res_id[p2_indices].tolist()
cif['_struct_conn.ptnr1_label_atom_id'] = atom_name[p1_indices].tolist()
cif['_struct_conn.ptnr2_label_atom_id'] = atom_name[p2_indices].tolist()
mmcif_dict['_struct_conn.ptnr1_auth_asym_id'] = auth_asym_id[ptnr1_indices]
mmcif_dict['_struct_conn.ptnr2_auth_asym_id'] = auth_asym_id[ptnr2_indices]
mmcif_dict['_struct_conn.ptnr1_auth_seq_id'] = auth_seq_id[ptnr1_indices]
mmcif_dict['_struct_conn.ptnr2_auth_seq_id'] = auth_seq_id[ptnr2_indices]
mmcif_dict['_struct_conn.pdbx_ptnr1_PDB_ins_code'] = insertion_code[
ptnr1_indices
]
mmcif_dict['_struct_conn.pdbx_ptnr2_PDB_ins_code'] = insertion_code[
ptnr2_indices
]
cif['_struct_conn.ptnr1_auth_asym_id'] = auth_asym_id[p1_indices].tolist()
cif['_struct_conn.ptnr2_auth_asym_id'] = auth_asym_id[p2_indices].tolist()
cif['_struct_conn.ptnr1_auth_seq_id'] = auth_seq_id[p1_indices].tolist()
cif['_struct_conn.ptnr2_auth_seq_id'] = auth_seq_id[p2_indices].tolist()
cif['_struct_conn.pdbx_ptnr1_PDB_ins_code'] = insertion_code[
p1_indices
].tolist()
cif['_struct_conn.pdbx_ptnr2_PDB_ins_code'] = insertion_code[
p2_indices
].tolist()
label_alt_id = ['?'] * self.size
mmcif_dict['_struct_conn.pdbx_ptnr1_label_alt_id'] = label_alt_id
mmcif_dict['_struct_conn.pdbx_ptnr2_label_alt_id'] = label_alt_id
cif['_struct_conn.pdbx_ptnr1_label_alt_id'] = label_alt_id
cif['_struct_conn.pdbx_ptnr2_label_alt_id'] = label_alt_id
# We need to set this to make visualisation work in NGL/PyMOL.
mmcif_dict['_struct_conn.pdbx_value_order'] = ['?'] * self.size
cif['_struct_conn.pdbx_value_order'] = ['?'] * self.size
# We use a symmetry of 1_555 which is the no-op transformation. Other
# values are used when bonds involve atoms that only exist after expanding
# the bioassembly, but we don't support this kind of bond at the moment.
symmetry = ['1_555'] * self.size
mmcif_dict['_struct_conn.ptnr1_symmetry'] = symmetry
mmcif_dict['_struct_conn.ptnr2_symmetry'] = symmetry
cif['_struct_conn.ptnr1_symmetry'] = symmetry
cif['_struct_conn.ptnr2_symmetry'] = symmetry
bond_type_counter = collections.Counter()
for bond_row in self.iterrows():
bond_type = bond_row['type']
bond_type_counter[bond_type] += 1
mmcif_dict['_struct_conn.id'].append(
cif['_struct_conn.id'].append(
f'{bond_type}{bond_type_counter[bond_type]}'
)
mmcif_dict['_struct_conn.pdbx_role'].append(bond_row['role'])
mmcif_dict['_struct_conn.conn_type_id'].append(bond_type)
cif['_struct_conn.pdbx_role'].append(bond_row['role'])
cif['_struct_conn.conn_type_id'].append(bond_type)
bond_types = np.unique(self.type)
mmcif_dict['_struct_conn_type.id'] = bond_types
cif['_struct_conn_type.id'] = bond_types
unknown = ['?'] * len(bond_types)
mmcif_dict['_struct_conn_type.criteria'] = unknown
mmcif_dict['_struct_conn_type.reference'] = unknown
cif['_struct_conn_type.criteria'] = unknown
cif['_struct_conn_type.reference'] = unknown
return dict(mmcif_dict)
return dict(cif)
def concat_with_atom_keys(

View File

@@ -50,6 +50,7 @@ class _UnsetSentinel(enum.Enum):
UNSET = object()
_UnsetType = Literal[_UnsetSentinel.UNSET]
_UNSET = _UnsetSentinel.UNSET
@@ -793,7 +794,7 @@ class Structure(table.Database):
yield row | current_chain
def _iter_atom_ranges(
self, boundaries: Sequence[int]
self, boundaries: Sequence[int] | np.ndarray
) -> Iterator[tuple[int, int]]:
"""Iterator for (start, end) pairs from an array of start indices."""
yield from itertools.pairwise(boundaries)
@@ -803,7 +804,7 @@ class Structure(table.Database):
def _iter_residue_ranges(
self,
boundaries: Sequence[int],
boundaries: Sequence[int] | np.ndarray,
*,
count_unresolved: bool,
) -> Iterator[tuple[int, int]]:
@@ -1131,20 +1132,20 @@ class Structure(table.Database):
def copy_and_update(
self,
*,
name: str | Literal[_UNSET] = _UNSET,
release_date: datetime.date | None | Literal[_UNSET] = _UNSET,
resolution: float | None | Literal[_UNSET] = _UNSET,
structure_method: str | None | Literal[_UNSET] = _UNSET,
name: str | _UnsetType = _UNSET,
release_date: datetime.date | None | _UnsetType = _UNSET,
resolution: float | None | _UnsetType = _UNSET,
structure_method: str | None | _UnsetType = _UNSET,
bioassembly_data: (
bioassemblies.BioassemblyData | None | Literal[_UNSET]
bioassemblies.BioassemblyData | None | _UnsetType
) = _UNSET,
chemical_components_data: (
struc_chem_comps.ChemicalComponentsData | None | Literal[_UNSET]
struc_chem_comps.ChemicalComponentsData | None | _UnsetType
) = _UNSET,
chains: structure_tables.Chains | None | Literal[_UNSET] = _UNSET,
residues: structure_tables.Residues | None | Literal[_UNSET] = _UNSET,
atoms: structure_tables.Atoms | None | Literal[_UNSET] = _UNSET,
bonds: structure_tables.Bonds | None | Literal[_UNSET] = _UNSET,
chains: structure_tables.Chains | None | _UnsetType = _UNSET,
residues: structure_tables.Residues | None | _UnsetType = _UNSET,
atoms: structure_tables.Atoms | None | _UnsetType = _UNSET,
bonds: structure_tables.Bonds | None | _UnsetType = _UNSET,
skip_validation: bool = False,
) -> Self:
"""Performs a shallow copy but with specified fields updated."""
@@ -1322,15 +1323,15 @@ class Structure(table.Database):
def copy_and_update_globals(
self,
*,
name: str | Literal[_UNSET] = _UNSET,
release_date: datetime.date | Literal[_UNSET] | None = _UNSET,
resolution: float | Literal[_UNSET] | None = _UNSET,
structure_method: str | Literal[_UNSET] | None = _UNSET,
name: str | _UnsetType = _UNSET,
release_date: datetime.date | _UnsetType | None = _UNSET,
resolution: float | _UnsetType | None = _UNSET,
structure_method: str | _UnsetType | None = _UNSET,
bioassembly_data: (
bioassemblies.BioassemblyData | Literal[_UNSET] | None
bioassemblies.BioassemblyData | _UnsetType | None
) = _UNSET,
chemical_components_data: (
struc_chem_comps.ChemicalComponentsData | Literal[_UNSET] | None
struc_chem_comps.ChemicalComponentsData | _UnsetType | None
) = _UNSET,
) -> Self:
"""Returns a shallow copy with the global columns updated."""

View File

@@ -417,7 +417,7 @@ def to_mmcif_atom_site_and_bonds_table(
coords_decimal_places: int,
) -> Mapping[str, Sequence[str]]:
"""Returns raw _atom_site and _struct_conn mmCIF tables."""
raw_mmcif = collections.defaultdict(list)
raw_mmcif: dict[str, Sequence[str]] = {}
# Use [value] * num wherever possible since it is about 10x faster than list
# comprehension in such cases. Also use f-strings instead of str() - faster.
total_atoms = atoms.size * atoms.num_models