/* * Read a molecule from CIF * * (c) 2014 Schrodinger, Inc. */ #include #include #include #include #include #include #include #include #include #include #ifdef TRACE #include #include #include // For glm::length, glm::dot, glm::normalize, glm::distance #include // For printing glm::vec3 #endif // TRACE #include "os_predef.h" #include "os_std.h" #include "MemoryDebug.h" #include "Err.h" #include "AssemblyHelpers.h" #include "AtomInfo.h" #include "Base.h" #include "Executive.h" #include "P.h" #include "Util.h" #include "Scene.h" #include "Rep.h" #include "ObjectMolecule.h" #include "ObjectMap.h" #include "CifFile.h" #include "CifBondDict.h" #include "Util2.h" #include "Vector.h" #include "Lex.h" #include "strcasecmp.h" #include "pymol/zstring_view.h" #include "Feedback.h" #include "pocketfft_hdronly.h" #ifdef _PYMOL_IP_PROPERTIES #endif using pymol::cif_array; constexpr auto EPSILON = std::numeric_limits::epsilon(); // canonical amino acid three letter codes const char * aa_three_letter[] = { "ALA", // A "ASX", // B for ambiguous asparagine/aspartic-acid "CYS", // C "ASP", // D "GLU", // E "PHE", // F "GLY", // G "HIS", // H "ILE", // I nullptr, // J "LYS", // K "LEU", // L "MET", // M "ASN", // N "HOH", // O for water "PRO", // P "GLN", // Q "ARG", // R "SER", // S "THR", // T nullptr, // U "VAL", // V "TRP", // W nullptr, // X for other "TYR", // Y "GLX", // Z for ambiguous glutamine/glutamic acid }; // amino acid one-to-three letter code translation static const char * aa_get_three_letter(char aa) { if (aa < 'A' || aa > 'Z') return "UNK"; const char * three = aa_three_letter[aa - 'A']; return (three) ? three : "UNK"; } // dictionary content types enum CifDataType { CIF_UNKNOWN, CIF_CORE, // small molecule CIF_MMCIF, // macromolecular structure CIF_CHEM_COMP // chemical component }; // simple 1-indexed string storage class seqvec_t : public std::vector { public: void set(int i, const char * mon_id) { if (i < 1) { printf("error: i(%d) < 1\n", i); return; } if (i > size()) resize(i); (*this)[i - 1] = mon_id; } const char * get(int i) const { if (i < 1 || i > size()) return nullptr; return (*this)[i - 1].c_str(); } }; // structure to collect information about a data block struct CifContentInfo { PyMOLGlobals * G; CifDataType type; bool fractional; bool use_auth; std::set chains_filter; std::set polypeptide_entities; // entity ids std::map sequences; // entity_id -> [resn1, resn2, ...] bool is_excluded_chain(const char * chain) const { if (chains_filter.empty()) return false; auto borrowed = LexBorrow(G, chain); if (borrowed != LEX_BORROW_NOTFOUND) return is_excluded_chain(borrowed); return false; } bool is_excluded_chain(const lexborrow_t& chain) const { return (!chains_filter.empty() && chains_filter.count(reinterpret_cast(chain)) == 0); } bool is_polypeptide(const char * entity_id) const { return polypeptide_entities.count(entity_id); } CifContentInfo(PyMOLGlobals * G, bool use_auth=true) : G(G), type(CIF_UNKNOWN), fractional(false), use_auth(use_auth) {} }; /** * Make a string key that represents the collection of alt id, asym id, * atom id, comp id and seq id components of the label for a macromolecular * atom site. */ static std::string make_mm_atom_site_label(PyMOLGlobals * G, const AtomInfoType * a) { char resi[8]; AtomResiFromResv(resi, sizeof(resi), a); std::string key(LexStr(G, a->chain)); key += '/'; key += LexStr(G, a->resn); key += '/'; key += resi; key += '/'; key += LexStr(G, a->name); key += '/'; key += a->alt; return key; } static std::string make_mm_atom_site_label(PyMOLGlobals * G, const char * asym_id, const char * comp_id, const char * seq_id, const char * ins_code, const char * atom_id, const char * alt_id) { std::string key(asym_id); key += '/'; key += comp_id; key += '/'; key += seq_id; key += ins_code; key += '/'; key += atom_id; key += '/'; key += alt_id; return key; } /** * Like strncpy, but only copy alphabetic characters. */ static void strncpy_alpha(char* dest, const char* src, size_t n) { for (size_t i = 0; i != n; ++i) { if (!isalpha(src[i])) { memset(dest + i, 0, n - i); break; } dest[i] = src[i]; } } /** * Get first non-NULL element */ template static T VLAGetFirstNonNULL(T * vla) { int n = VLAGetSize(vla); for (int i = 0; i < n; ++i) if (vla[i]) return vla[i]; return nullptr; } /** * Lookup one key in a map, return true if found and * assign output reference `value1` */ template inline bool find1(Map& dict, T& value1, const Key& key1) { auto it = dict.find(key1); if (it == dict.end()) return false; value1 = it->second; return true; } /** * Lookup two keys in a map, return true if both found and * assign output references `value1` and `value2`. */ template inline bool find2(Map& dict, T& value1, const Key& key1, T& value2, const Key& key2) { if (!find1(dict, value1, key1)) return false; if (!find1(dict, value2, key2)) return false; return true; } static void AtomInfoSetEntityId(PyMOLGlobals * G, AtomInfoType * ai, const char * entity_id) { ai->custom = LexIdx(G, entity_id); #ifdef _PYMOL_IP_PROPERTIES PropertySet(G, ai, "entity_id", entity_id); #endif } /** * Initialize a bond. Only one of symmetry_1 or symmetry_2 must be non-default. * If symmetry_2 is default and symmetry_1 is non-default, then swap the * indices. */ static bool BondTypeInit3(PyMOLGlobals* G, BondType* bond, unsigned i1, unsigned i2, const char* symmetry_1, const char* symmetry_2, int order = 1) { auto symop_1 = pymol::SymOp(symmetry_1); auto symop_2 = pymol::SymOp(symmetry_2); if (symop_1) { if (symop_2) { PRINTFB(G, FB_Executive, FB_Warnings) " Warning: Bonds with two symmetry operations not supported\n" ENDFB(G); return false; } std::swap(i1, i2); std::swap(symop_1, symop_2); } BondTypeInit2(bond, i1, i2, order); bond->symop_2 = symop_2; return true; } /** * Add one bond without checking if it already exists */ static void ObjectMoleculeAddBond2(ObjectMolecule * I, int i1, int i2, int order) { VLACheck(I->Bond, BondType, I->NBond); BondTypeInit2(I->Bond + I->NBond, i1, i2, order); I->NBond++; } /** * Get the distance between two atoms in ObjectMolecule */ static float GetDistance(ObjectMolecule * I, int i1, int i2) { const CoordSet *cset; int idx1 = -1, idx2 = -1; // find first coordset which contains both atoms if (I->DiscreteFlag) { cset = I->DiscreteCSet[i1]; if (cset == I->DiscreteCSet[i2]) { idx1 = I->DiscreteAtmToIdx[i1]; idx2 = I->DiscreteAtmToIdx[i2]; } } else { for (int i = 0; i < I->NCSet; ++i) { if ((cset = I->CSet[i])) { if ((idx1 = cset->AtmToIdx[i1]) != -1 && (idx2 = cset->AtmToIdx[i2]) != -1) { break; } } } } if (idx1 == -1 || idx2 == -1) return 999.f; float v[3]; subtract3f( cset->coordPtr(idx1), cset->coordPtr(idx2), v); return length3f(v); } /** * Bond order string to int */ static int bondOrderLookup(const char * order) { if (p_strcasestartswith(order, "doub")) return 2; if (p_strcasestartswith(order, "trip")) return 3; if (p_strcasestartswith(order, "arom")) return 4; if (p_strcasestartswith(order, "delo")) return 4; // single return 1; } /** * Read bonds from CHEM_COMP_BOND in `bond_dict` dictionary */ static bool read_chem_comp_bond_dict(const pymol::cif_data * data, bond_dict_t &bond_dict) { const cif_array *arr_id_1, *arr_id_2, *arr_order, *arr_comp_id; if( !(arr_id_1 = data->get_arr("_chem_comp_bond.atom_id_1")) || !(arr_id_2 = data->get_arr("_chem_comp_bond.atom_id_2")) || !(arr_order = data->get_arr("_chem_comp_bond.value_order")) || !(arr_comp_id = data->get_arr("_chem_comp_bond.comp_id"))) { if ((arr_comp_id = data->get_arr("_chem_comp_atom.comp_id"))) { // atom(s) but no bonds (e.g. metals) bond_dict.set_unknown(arr_comp_id->as_s()); return true; } return false; } const char *name1, *name2, *resn; int order_value; int nrows = arr_id_1->size(); for (int i = 0; i < nrows; i++) { resn = arr_comp_id->as_s(i); name1 = arr_id_1->as_s(i); name2 = arr_id_2->as_s(i); const char *order = arr_order->as_s(i); order_value = bondOrderLookup(order); bond_dict[resn].set(name1, name2, order_value); } // alt_atom_id -> atom_id if ((arr_comp_id = data->get_arr("_chem_comp_atom.comp_id")) && (arr_id_1 = data->get_arr("_chem_comp_atom.atom_id")) && (arr_id_2 = data->get_arr("_chem_comp_atom.alt_atom_id"))) { nrows = arr_id_1->size(); // set of all non-alt ids std::set atom_ids; for (int i = 0; i < nrows; ++i) { atom_ids.insert(arr_id_1->as_s(i)); } for (int i = 0; i < nrows; ++i) { resn = arr_comp_id->as_s(i); name1 = arr_id_1->as_s(i); name2 = arr_id_2->as_s(i); // skip identity mapping if (strcmp(name1, name2) == 0) { continue; } // alt id must not also be a non-alt id (PYMOL-3470) if (atom_ids.count(name2)) { fprintf(stderr, "Warning: _chem_comp_atom.alt_atom_id %s/%s ignored for bonding\n", resn, name2); continue; } bond_dict[resn].add_alt_name(name1, name2); } } return true; } /** * parse $PYMOL_DATA/chem_comp_bond-top100.cif (subset of components.cif) into * a static (global) dictionary. */ static bond_dict_t * get_global_components_bond_dict(PyMOLGlobals * G) { static bond_dict_t bond_dict; if (bond_dict.empty()) { const char * pymol_data = getenv("PYMOL_DATA"); if (!pymol_data || !pymol_data[0]) return nullptr; std::string path(pymol_data); path.append(PATH_SEP).append("chem_comp_bond-top100.cif"); cif_file_with_error_capture cif; if (!cif.parse_file(path.c_str())) { PRINTFB(G, FB_Executive, FB_Warnings) " Warning: Loading '%s' failed: %s\n", path.c_str(), cif.m_error_msg.c_str() ENDFB(G); return nullptr; } for (const auto& [code, datablock] : cif.datablocks()) { read_chem_comp_bond_dict(&datablock, bond_dict); } } return &bond_dict; } /** * True for N-H1 and N-H3, those are not in the chemical components dictionary. */ static bool is_N_H1_or_H3(PyMOLGlobals * G, const AtomInfoType * a1, const AtomInfoType * a2) { if (a2->name == G->lex_const.N) { a2 = a1; } else if (a1->name != G->lex_const.N) { return false; } return (a2->name == G->lex_const.H1 || a2->name == G->lex_const.H3); } /** * Add bonds for one residue, with atoms spanning from i_start to i_end-1, * based on components.cif */ static void ConnectComponent(ObjectMolecule * I, int i_start, int i_end, bond_dict_t * bond_dict) { if (i_end - i_start < 2) return; auto G = I->G; const AtomInfoType *a1, *a2, *ai = I->AtomInfo.data(); int order; // get residue bond dictionary auto res_dict = bond_dict->get(G, LexStr(G, ai[i_start].resn)); if (res_dict == nullptr) return; // for all pairs of atoms in given set for (int i1 = i_start + 1; i1 < i_end; i1++) { for (int i2 = i_start; i2 < i1; i2++) { a1 = ai + i1; a2 = ai + i2; // don't connect different alt codes if (a1->alt[0] && a2->alt[0] && strcmp(a1->alt, a2->alt) != 0) { continue; } // restart if we hit the next residue in bulk solvent (atoms must // not be sorted for this) // TODO artoms are sorted at this point if (a1->name == a2->name) { i_start = i1; break; } // lookup if atoms are bonded order = res_dict->get(LexStr(G, a1->name), LexStr(G, a2->name)); if (order < 0) { if (!is_N_H1_or_H3(G, a1, a2) || GetDistance(I, i1, i2) > 1.2) continue; order = 1; } // make bond ObjectMoleculeAddBond2(I, i1, i2, order); } } } /** * Add intra residue bonds based on components.cif, and common polymer * connecting bonds (C->N, O3*->P) */ static int ObjectMoleculeConnectComponents(ObjectMolecule * I, bond_dict_t * bond_dict=nullptr) { PyMOLGlobals * G = I->G; int i_start = 0; std::vector i_prev_c[2], i_prev_o3[2]; const lexborrow_t lex_O3s = LexBorrow(G, "O3*"); const lexborrow_t lex_O3p = LexBorrow(G, "O3'"); if (!bond_dict) { // read components.cif if (!(bond_dict = get_global_components_bond_dict(G))) return false; } // reserve some memory for new bonds I->Bond.reserve(I->NAtom * 4); for (int i = 0; i < I->NAtom; ++i) { auto const& atom = I->AtomInfo[i]; // intra-residue if(!AtomInfoSameResidue(G, I->AtomInfo + i_start, I->AtomInfo + i)) { ConnectComponent(I, i_start, i, bond_dict); i_start = i; i_prev_c[0] = std::move(i_prev_c[1]); i_prev_o3[0] = std::move(i_prev_o3[1]); i_prev_c[1].clear(); i_prev_o3[1].clear(); } // inter-residue polymer bonds if (atom.name == G->lex_const.C) { i_prev_c[1].push_back(i); } else if (atom.name == lex_O3s || atom.name == lex_O3p) { i_prev_o3[1].push_back(i); } else { auto const* i_prev_ptr = (atom.name == G->lex_const.N) ? i_prev_c : (atom.name == G->lex_const.P) ? i_prev_o3 : nullptr; if (i_prev_ptr && !i_prev_ptr->empty()) { for (int i_prev : *i_prev_ptr) { bool alt_check = !atom.alt[0] || !I->AtomInfo[i_prev].alt[0] || atom.alt[0] == I->AtomInfo[i_prev].alt[0]; if (alt_check && GetDistance(I, i_prev, i) < 1.8) { // make bond ObjectMoleculeAddBond2(I, i_prev, i, 1); } } } } } // final residue ConnectComponent(I, i_start, I->NAtom, bond_dict); // clean up VLASize(I->Bond, BondType, I->NBond); return true; } /** * secondary structure hash */ class sshashkey { public: lexborrow_t chain; // borrowed ref int resv; char inscode; void assign(const lexborrow_t& asym_id_, int resv_, char ins_code_ = '\0') { chain = asym_id_; resv = resv_; inscode = ins_code_; } // comparable to sshashkey and AtomInfoType template int compare(const T &other) const { int test = resv - other.resv; if (test == 0) { test = (chain - other.chain); if (test == 0) test = inscode - other.inscode; } return test; } bool operator<(const sshashkey &other) const { return compare(other) < 0; } bool operator>(const sshashkey &other) const { return compare(other) > 0; } }; class sshashvalue { public: char ss; sshashkey end; }; typedef std::map sshashmap; // PDBX_STRUCT_OPER_LIST type typedef std::map > oper_list_t; // type for parsed PDBX_STRUCT_OPER_LIST typedef std::vector > oper_collection_t; /** * Parse operation expressions like (1,2)(3-6) */ static oper_collection_t parse_oper_expression(const std::string &expr) { oper_collection_t collection; // first step to split parenthesized chunks std::vector a_vec = strsplit(expr, ')'); // loop over chunks (still include leading '(') for (auto& a_item : a_vec) { const char * a_chunk = a_item.c_str(); // finish chunk while (*a_chunk == '(') ++a_chunk; // skip empty chunks if (!*a_chunk) continue; collection.resize(collection.size() + 1); oper_collection_t::reference ids = collection.back(); // split chunk by commas std::vector b_vec = strsplit(a_chunk, ','); // look for ranges for (auto& b_item : b_vec) { // "c_d" will have either one (no range) or two items std::vector c_d = strsplit(b_item, '-'); ids.push_back(c_d[0]); if (c_d.size() == 2) for (int i = atoi(c_d[0].c_str()) + 1, j = atoi(c_d[1].c_str()) + 1; i < j; ++i) { char i_str[16]; snprintf(i_str, sizeof(i_str), "%d", i); ids.push_back(i_str); } } } return collection; } /** * Get chains which are part of the assembly * * assembly_chains: output set * assembly_id: ID of the assembly or nullptr to use first assembly */ static bool get_assembly_chains(PyMOLGlobals * G, const pymol::cif_data * data, std::set &assembly_chains, const char * assembly_id) { const cif_array *arr_id, *arr_asym_id_list; if ((arr_id = data->get_arr("_pdbx_struct_assembly_gen.assembly_id")) == nullptr || (arr_asym_id_list = data->get_arr("_pdbx_struct_assembly_gen.asym_id_list")) == nullptr) return false; for (unsigned i = 0, nrows = arr_id->size(); i < nrows; ++i) { if (strcmp(assembly_id, arr_id->as_s(i))) continue; const char * asym_id_list = arr_asym_id_list->as_s(i); std::vector chains = strsplit(asym_id_list, ','); for (auto& chain : chains) { assembly_chains.insert(LexIdx(G, chain.c_str())); } } return !assembly_chains.empty(); } /** * Read assembly * * atInfo: atom info array to use for chain check * cset: template coordinate set to create assembly coordsets from * assembly_id: assembly identifier * * return: assembly coordinates as VLA of coordinate sets */ static CoordSet ** read_pdbx_struct_assembly(PyMOLGlobals * G, const pymol::cif_data * data, const AtomInfoType * atInfo, const CoordSet * cset, const char * assembly_id) { const cif_array *arr_id, *arr_assembly_id, *arr_oper_expr, *arr_asym_id_list; if ((arr_id = data->get_arr("_pdbx_struct_oper_list.id")) == nullptr || (arr_assembly_id = data->get_arr("_pdbx_struct_assembly_gen.assembly_id")) == nullptr || (arr_oper_expr = data->get_arr("_pdbx_struct_assembly_gen.oper_expression")) == nullptr || (arr_asym_id_list = data->get_arr("_pdbx_struct_assembly_gen.asym_id_list")) == nullptr) return nullptr; const cif_array * arr_matrix[] = { data->get_opt("_pdbx_struct_oper_list.matrix[1][1]"), data->get_opt("_pdbx_struct_oper_list.matrix[1][2]"), data->get_opt("_pdbx_struct_oper_list.matrix[1][3]"), data->get_opt("_pdbx_struct_oper_list.vector[1]"), data->get_opt("_pdbx_struct_oper_list.matrix[2][1]"), data->get_opt("_pdbx_struct_oper_list.matrix[2][2]"), data->get_opt("_pdbx_struct_oper_list.matrix[2][3]"), data->get_opt("_pdbx_struct_oper_list.vector[2]"), data->get_opt("_pdbx_struct_oper_list.matrix[3][1]"), data->get_opt("_pdbx_struct_oper_list.matrix[3][2]"), data->get_opt("_pdbx_struct_oper_list.matrix[3][3]"), data->get_opt("_pdbx_struct_oper_list.vector[3]") }; // build oper_list from _pdbx_struct_oper_list oper_list_t oper_list; for (unsigned i = 0, nrows = arr_id->size(); i < nrows; ++i) { float * matrix = oper_list[arr_id->as_s(i)].data(); identity44f(matrix); for (int j = 0; j < 12; ++j) { matrix[j] = arr_matrix[j]->as_d(i); } } CoordSet ** csets = nullptr; int csetbeginidx = 0; // assembly for (unsigned i = 0, nrows = arr_oper_expr->size(); i < nrows; ++i) { if (strcmp(assembly_id, arr_assembly_id->as_s(i))) continue; const char * oper_expr = arr_oper_expr->as_s(i); const char * asym_id_list = arr_asym_id_list->as_s(i); oper_collection_t collection = parse_oper_expression(oper_expr); std::vector chains = strsplit(asym_id_list, ','); std::set chains_set; for (auto& chain : chains) { auto borrowed = LexBorrow(G, chain.c_str()); if (borrowed != LEX_BORROW_NOTFOUND) { chains_set.insert(borrowed); } } // new coord set VLA int ncsets = 1; for (const auto& c_item : collection) { ncsets *= c_item.size(); } if (!csets) { csets = VLACalloc(CoordSet*, ncsets); } else { csetbeginidx = VLAGetSize(csets); VLASize(csets, CoordSet*, csetbeginidx + ncsets); } // for cartesian product int c_src_len = 1; // coord set for subset of atoms CoordSet ** c_csets = csets + csetbeginidx; c_csets[0] = CoordSetCopyFilterChains(cset, atInfo, chains_set); // build new coord sets for (auto c_it = collection.rbegin(); c_it != collection.rend(); ++c_it) { // copy int j = c_src_len; while (j < c_src_len * c_it->size()) { // cartesian product for (int k = 0; k < c_src_len; ++k, ++j) { c_csets[j] = CoordSetCopy(c_csets[k]); } } // transform j = 0; for (auto& s_item : *c_it) { const float * matrix = oper_list[s_item].data(); // cartesian product for (int k = 0; k < c_src_len; ++k, ++j) { CoordSetTransform44f(c_csets[j], matrix); } } // cartesian product // Note: currently, "1m4x" seems to be the only structure in the PDB // which uses a cartesian product expression c_src_len *= c_it->size(); } } // return assembly coordsets return csets; } /** * Set ribbon_trace_atoms and cartoon_trace_atoms for CA/P only models */ static bool read_pdbx_coordinate_model(PyMOLGlobals * G, const pymol::cif_data * data, ObjectMolecule * mol) { const cif_array * arr_type = data->get_arr("_pdbx_coordinate_model.type"); const cif_array * arr_asym = data->get_arr("_pdbx_coordinate_model.asym_id"); if (!arr_type || !arr_asym) return false; // affected chains std::set asyms; // collect CA/P-only chain identifiers for (unsigned i = 0, nrows = arr_type->size(); i < nrows; ++i) { const char * type = arr_type->as_s(i); // no need anymore to check "CA ATOMS ONLY", since nonbonded CA are // now (v1.8.2) detected automatically in RepCartoon and RepRibbon if (strcmp(type, "P ATOMS ONLY") == 0) { asyms.insert(arr_asym->as_s(i)); } } if (asyms.empty()) return false; // set on atom-level for (int i = 0, nrows = VLAGetSize(mol->AtomInfo); i < nrows; ++i) { AtomInfoType * ai = mol->AtomInfo + i; if (asyms.count(LexStr(G, ai->segi))) { SettingSet(G, cSetting_cartoon_trace_atoms, true, ai); SettingSet(G, cSetting_ribbon_trace_atoms, true, ai); } } return true; } /** * Read CELL and SYMMETRY */ static CSymmetry * read_symmetry(PyMOLGlobals * G, const pymol::cif_data * data) { const cif_array * cell[6] = { data->get_arr("_cell?length_a"), data->get_arr("_cell?length_b"), data->get_arr("_cell?length_c"), data->get_arr("_cell?angle_alpha"), data->get_arr("_cell?angle_beta"), data->get_arr("_cell?angle_gamma") }; for (int i = 0; i < 6; i++) if (cell[i] == nullptr) return nullptr; CSymmetry * symmetry = new CSymmetry(G); if (!symmetry) return nullptr; float cellparams[6]; for (int i = 0; i < 6; ++i) { cellparams[i] = cell[i]->as_d(); } symmetry->Crystal.setDims(cellparams); symmetry->Crystal.setAngles(cellparams + 3); symmetry->setSpaceGroup( data->get_opt("_symmetry?space_group_name_h-m", "_space_group?name_h-m_alt")->as_s()); symmetry->PDBZValue = data->get_opt("_cell.z_pdb")->as_i(0, 1); // register symmetry operations if given const cif_array * arr_as_xyz = data->get_arr( "_symmetry_equiv?pos_as_xyz", "_space_group_symop?operation_xyz"); if (arr_as_xyz) { std::vector sym_op; for (unsigned i = 0, n = arr_as_xyz->size(); i < n; ++i) { sym_op.push_back(arr_as_xyz->as_s(i)); } SymmetrySpaceGroupRegister(G, symmetry->spaceGroup(), sym_op); } return symmetry; } /** * Read CHEM_COMP_ATOM */ static CoordSet ** read_chem_comp_atom_model(PyMOLGlobals * G, const pymol::cif_data * data, AtomInfoType ** atInfoPtr) { const cif_array *arr_x, *arr_y = nullptr, *arr_z = nullptr; // setting to exclude one or more coordinate columns unsigned mask = SettingGetGlobal_i(G, cSetting_chem_comp_cartn_use); const char * feedback = ""; if (!mask) { mask = 0xFF; } if ((mask & 0x01) && (arr_x = data->get_arr("_chem_comp_atom.pdbx_model_cartn_x_ideal")) && !arr_x->is_missing_all()) { arr_y = data->get_arr("_chem_comp_atom.pdbx_model_cartn_y_ideal"); arr_z = data->get_arr("_chem_comp_atom.pdbx_model_cartn_z_ideal"); feedback = ".pdbx_model_Cartn_{x,y,z}_ideal"; } else if ((mask & 0x02) && (arr_x = data->get_arr("_chem_comp_atom.model_cartn_x"))) { arr_y = data->get_arr("_chem_comp_atom.model_cartn_y"); arr_z = data->get_arr("_chem_comp_atom.model_cartn_z"); feedback = ".model_Cartn_{x,y,z}"; } else if ((mask & 0x04) && (arr_x = data->get_arr("_chem_comp_atom.x")) && !arr_x->is_missing_all()) { arr_y = data->get_arr("_chem_comp_atom.y"); arr_z = data->get_arr("_chem_comp_atom.z"); feedback = ".{x,y,z}"; } if (!arr_x || !arr_y || !arr_z) { return nullptr; } PRINTFB(G, FB_Executive, FB_Details) " ExecutiveLoad-Detail: Detected chem_comp CIF (%s)\n", feedback ENDFB(G); const cif_array * arr_name = data->get_opt("_chem_comp_atom.atom_id"); const cif_array * arr_symbol = data->get_opt("_chem_comp_atom.type_symbol"); const cif_array * arr_resn = data->get_opt("_chem_comp_atom.comp_id"); const cif_array * arr_partial_charge = data->get_opt("_chem_comp_atom.partial_charge"); const cif_array * arr_formal_charge = data->get_opt("_chem_comp_atom.charge"); const cif_array * arr_stereo = data->get_opt("_chem_comp_atom.pdbx_stereo_config"); int nrows = arr_x->size(); AtomInfoType *ai; int atomCount = 0, nAtom = nrows; float * coord = VLAlloc(float, 3 * nAtom); int auto_show = RepGetAutoShowMask(G); for (int i = 0; i < nrows; i++) { if (arr_x->is_missing(i)) continue; VLACheck(*atInfoPtr, AtomInfoType, atomCount); ai = *atInfoPtr + atomCount; memset((void*) ai, 0, sizeof(AtomInfoType)); ai->rank = atomCount; ai->id = atomCount + 1; LexAssign(G, ai->name, arr_name->as_s(i)); LexAssign(G, ai->resn, arr_resn->as_s(i)); strncpy(ai->elem, arr_symbol->as_s(i), cElemNameLen); ai->partialCharge = arr_partial_charge->as_d(i); ai->formalCharge = arr_formal_charge->as_i(i); ai->hetatm = true; ai->visRep = auto_show; AtomInfoSetStereo(ai, arr_stereo->as_s(i)); AtomInfoAssignParameters(G, ai); AtomInfoAssignColors(G, ai); coord[atomCount * 3 + 0] = arr_x->as_d(i); coord[atomCount * 3 + 1] = arr_y->as_d(i); coord[atomCount * 3 + 2] = arr_z->as_d(i); atomCount++; } VLASize(coord, float, 3 * atomCount); VLASize(*atInfoPtr, AtomInfoType, atomCount); CoordSet ** csets = VLACalloc(CoordSet*, 1); csets[0] = CoordSetNew(G); csets[0]->NIndex = atomCount; csets[0]->Coord= pymol::vla_take_ownership(coord); return csets; } /** * Map model number to state (1-based) */ class ModelStateMapper { bool remap; std::map mapping; public: ModelStateMapper(bool remap) : remap(remap) {} int operator()(int model) { if (!remap) return model; int state = mapping[model]; if (!state) { state = mapping.size(); mapping[model] = state; } return state; } }; /** * Read ATOM_SITE * * atInfoPtr: atom info array to fill * info: data content configuration to populate with collected information * * return: models as VLA of coordinate sets */ static CoordSet** read_atom_site(PyMOLGlobals* G, const pymol::cif_data* data, AtomInfoType** atInfoPtr, CifContentInfo& info, bool discrete, bool quiet) { const cif_array *arr_x, *arr_y, *arr_z; const cif_array *arr_name = nullptr, *arr_resn = nullptr, *arr_resi = nullptr, *arr_chain = nullptr, *arr_symbol, *arr_group_pdb, *arr_alt, *arr_ins_code = nullptr, *arr_b, *arr_u, *arr_q, *arr_ID, *arr_mod_num, *arr_entity_id, *arr_segi; if ((arr_x = data->get_arr("_atom_site?cartn_x")) && (arr_y = data->get_arr("_atom_site?cartn_y")) && (arr_z = data->get_arr("_atom_site?cartn_z"))) { } else if ( (arr_x = data->get_arr("_atom_site?fract_x")) && (arr_y = data->get_arr("_atom_site?fract_y")) && (arr_z = data->get_arr("_atom_site?fract_z"))) { info.fractional = true; } else { return nullptr; } if (info.use_auth) { arr_name = data->get_arr("_atom_site.auth_atom_id"); arr_resn = data->get_arr("_atom_site.auth_comp_id"); arr_resi = data->get_arr("_atom_site.auth_seq_id"); arr_chain = data->get_arr("_atom_site.auth_asym_id"); arr_ins_code = data->get_arr("_atom_site.pdbx_pdb_ins_code"); } if (!arr_name) arr_name = data->get_arr("_atom_site.label_atom_id"); if (!arr_resn) arr_resn = data->get_opt("_atom_site.label_comp_id"); const cif_array *arr_label_seq_id = data->get_opt("_atom_site.label_seq_id"); // PDBe provides unique seq_ids for bulk het groups if (!arr_resi) arr_resi = data->get_arr("_atom_site.pdbe_label_seq_id"); if (!arr_resi) arr_resi = arr_label_seq_id; if (arr_name) { info.type = CIF_MMCIF; if (!quiet) { PRINTFB(G, FB_Executive, FB_Details) " ExecutiveLoad-Detail: Detected mmCIF\n" ENDFB(G); } } else { arr_name = data->get_opt("_atom_site_label"); info.type = CIF_CORE; if (!quiet) { PRINTFB(G, FB_Executive, FB_Details) " ExecutiveLoad-Detail: Detected small molecule CIF\n" ENDFB(G); } } arr_segi = data->get_opt("_atom_site.label_asym_id"); arr_symbol = data->get_opt("_atom_site?type_symbol", "_atom_site_label"); arr_group_pdb = data->get_opt("_atom_site.group_pdb"); arr_alt = data->get_opt("_atom_site.label_alt_id"); arr_b = data->get_opt("_atom_site?b_iso_or_equiv"); arr_u = data->get_arr("_atom_site?u_iso_or_equiv"); // nullptr arr_q = data->get_opt("_atom_site?occupancy"); arr_ID = data->get_opt("_atom_site.id", "_atom_site_label"); arr_mod_num = data->get_opt("_atom_site.pdbx_pdb_model_num"); arr_entity_id = data->get_arr("_atom_site.label_entity_id"); // nullptr const cif_array * arr_color = data->get_arr("_atom_site.pymol_color"); const cif_array * arr_reps = data->get_arr("_atom_site.pymol_reps"); const cif_array * arr_ss = data->get_opt("_atom_site.pymol_ss"); const cif_array * arr_label = data->get_opt("_atom_site.pymol_label"); const cif_array * arr_vdw = data->get_opt("_atom_site.pymol_vdw"); const cif_array * arr_elec_radius = data->get_opt("_atom_site.pymol_elec_radius"); const cif_array * arr_partial_charge = data->get_opt("_atom_site.pymol_partial_charge"); const cif_array * arr_formal_charge = data->get_opt("_atom_site.pdbx_formal_charge"); if (!arr_chain) arr_chain = arr_segi; ModelStateMapper model_to_state(!SettingGetGlobal_i(G, cSetting_pdb_honor_model_number)); int nrows = arr_x->size(); AtomInfoType *ai; int atomCount = 0; int auto_show = RepGetAutoShowMask(G); int first_model_num = model_to_state(arr_mod_num->as_i(0, 1)); CoordSet * cset; int mod_num, ncsets = 0; // collect number of atoms per model and number of coord sets std::map atoms_per_model; for (int i = 0, n = nrows; i < n; i++) { mod_num = model_to_state(arr_mod_num->as_i(i, 1)); if (mod_num < 1) { PRINTFB(G, FB_ObjectMolecule, FB_Errors) " Error: model numbers < 1 not supported: %d\n", mod_num ENDFB(G); return nullptr; } atoms_per_model[mod_num - 1] += 1; if (ncsets < mod_num) ncsets = mod_num; } // set up coordinate sets CoordSet ** csets = VLACalloc(CoordSet*, ncsets); for (auto it = atoms_per_model.begin(); it != atoms_per_model.end(); ++it) { csets[it->first] = cset = CoordSetNew(G); cset->Coord.resize(3 * it->second); cset->IdxToAtm.resize(it->second); } // mm_atom_site_label -> atom index (1-indexed) std::map name_dict; for (int i = 0, n = nrows; i < n; i++) { lexidx_t segi = LexIdx(G, arr_segi->as_s(i)); if (info.is_excluded_chain(segi)) { LexDec(G, segi); continue; } mod_num = model_to_state(arr_mod_num->as_i(i, 1)); // copy coordinates into coord set cset = csets[mod_num - 1]; int idx = cset->NIndex++; float * coord = cset->coordPtr(idx); coord[0] = arr_x->as_d(i); coord[1] = arr_y->as_d(i); coord[2] = arr_z->as_d(i); if (!discrete && ncsets > 1) { // mm_atom_site_label aggregate std::string key = make_mm_atom_site_label(G, arr_chain->as_s(i), arr_resn->as_s(i), arr_resi->as_s(i), arr_ins_code ? arr_ins_code->as_s(i) : "", arr_name->as_s(i), arr_alt->as_s(i)); // check if this is not a new atom if (mod_num != first_model_num) { int atm = name_dict[key] - 1; if (atm >= 0) { cset->IdxToAtm[idx] = atm; continue; } } name_dict[key] = atomCount + 1; } cset->IdxToAtm[idx] = atomCount; VLACheck(*atInfoPtr, AtomInfoType, atomCount); ai = *atInfoPtr + atomCount; ai->rank = atomCount; ai->alt[0] = arr_alt->as_s(i)[0]; ai->id = arr_ID->as_i(i); ai->b = (arr_u != nullptr) ? arr_u->as_d(i) * 78.95683520871486 : // B = U * 8 * pi^2 arr_b->as_d(i); ai->q = arr_q->as_d(i, 1.0); strncpy_alpha(ai->elem, arr_symbol->as_s(i), cElemNameLen); ai->chain = LexIdx(G, arr_chain->as_s(i)); ai->name = LexIdx(G, arr_name->as_s(i)); ai->resn = LexIdx(G, arr_resn->as_s(i)); ai->segi = std::move(segi); // steal reference if ('H' == arr_group_pdb->as_s(i)[0]) { ai->hetatm = true; ai->flags = cAtomFlag_ignore; } ai->resv = arr_resi->as_i(i); ai->temp1 = arr_label_seq_id->as_i(i); // for add_missing_ca if (arr_ins_code) { ai->setInscode(arr_ins_code->as_s(i)[0]); } if (arr_reps) { ai->visRep = arr_reps->as_i(i, auto_show); ai->flags |= cAtomFlag_inorganic; // suppress auto_show_classified } else { ai->visRep = auto_show; } ai->ssType[0] = arr_ss->as_s(i)[0]; ai->formalCharge = arr_formal_charge->as_i(i); ai->partialCharge = arr_partial_charge->as_d(i); ai->elec_radius = arr_elec_radius->as_d(i); ai->vdw = arr_vdw->as_d(i); ai->label = LexIdx(G, arr_label->as_s(i)); AtomInfoAssignParameters(G, ai); if (arr_color) { ai->color = arr_color->as_i(i); } else { AtomInfoAssignColors(G, ai); } if (arr_entity_id != nullptr) { AtomInfoSetEntityId(G, ai, arr_entity_id->as_s(i)); } atomCount++; } VLASize(*atInfoPtr, AtomInfoType, atomCount); return csets; } /** * Update `info` with entity polymer information */ static bool read_entity_poly(PyMOLGlobals * G, const pymol::cif_data * data, CifContentInfo &info) { const cif_array *arr_entity_id = nullptr, *arr_type = nullptr, *arr_num = nullptr, *arr_mon_id = nullptr; if (!(arr_entity_id = data->get_arr("_entity_poly.entity_id")) || !(arr_type = data->get_arr("_entity_poly.type"))) return false; const cif_array * arr_seq_one_letter = data->get_arr("_entity_poly.pdbx_seq_one_letter_code"); // polypeptides for (unsigned i = 0, n = arr_entity_id->size(); i < n; i++) { if (!strncasecmp("polypeptide", arr_type->as_s(i), 11)) { const char * entity_id = arr_entity_id->as_s(i); info.polypeptide_entities.insert(entity_id); if (arr_seq_one_letter) { // sequences auto& entity_sequence = info.sequences[entity_id]; const char * one = arr_seq_one_letter->as_s(i); for (int i = 0; *one; ++one) { if (strchr(" \t\r\n", *one)) // skip whitespace continue; if (*one == '(') { const char * end = strchr(one, ')'); if (!end) break; std::string three(one + 1, end - one - 1); entity_sequence.set(++i, three.c_str()); one = end; } else { entity_sequence.set(++i, aa_get_three_letter(*one)); } } } } } if (!arr_seq_one_letter) { // sequences if ((arr_entity_id = data->get_arr("_entity_poly_seq.entity_id")) && (arr_num = data->get_arr("_entity_poly_seq.num")) && (arr_mon_id = data->get_arr("_entity_poly_seq.mon_id"))) { for (unsigned i = 0, n = arr_entity_id->size(); i < n; i++) { info.sequences[arr_entity_id->as_s(i)].set( arr_num->as_i(i), arr_mon_id->as_s(i)); } } } return true; } /** * Sub-routine for `add_missing_ca` * * @param i_ref Atom index of the next observed residue if `!at_terminus`, * otherwise of the last observed residue in this chain. * @param at_terminus True if adding residues beyond the last observed residue * in this chain. */ static void add_missing_ca_sub(PyMOLGlobals * G, pymol::vla& atInfo, int& current_resv, int& atomCount, const int i_ref, int resv, const seqvec_t * current_seq, const char * entity_id, bool at_terminus = true) { if (!atInfo[i_ref].temp1) return; if (current_resv == 0) { at_terminus = true; } for (++current_resv; current_resv < resv; ++current_resv) { const char * resn = current_seq->get(current_resv); if (!resn) continue; int added_resv = current_resv + (atInfo[i_ref].resv - atInfo[i_ref].temp1); if (!at_terminus && ((i_ref > 0 && added_resv <= atInfo[i_ref - 1].resv) || added_resv >= atInfo[i_ref].resv)) { // don't use insertion codes continue; } AtomInfoType *ai = atInfo.check(atomCount); ai->rank = atomCount; ai->id = -1; ai->elem[0] = 'C'; LexAssign(G, ai->name, "CA"); LexAssign(G, ai->resn, resn); LexAssign(G, ai->segi, atInfo[i_ref].segi); LexAssign(G, ai->chain, atInfo[i_ref].chain); ai->temp1 = current_resv; ai->resv = added_resv; AtomInfoAssignParameters(G, ai); AtomInfoAssignColors(G, ai); AtomInfoSetEntityId(G, ai, entity_id); ++atomCount; } } /** * Read missing residues / full sequence * * This function relies on the label_seq_id numbering which must be available * in the `temp1` kludge field. * * Use the _entity_poly and _entity_poly_seq information to identify * missing residues in partially present chains. Add CA atoms for those * to present complete sequences in the sequence viewer. */ static bool add_missing_ca(PyMOLGlobals * G, pymol::vla& atInfo, CifContentInfo &info) { int oldAtomCount = atInfo.size(); int atomCount = oldAtomCount; int current_resv = 0; const seqvec_t * current_seq = nullptr; const char * current_entity_id = ""; for (int i = 0; i < oldAtomCount; ++i) { const char * entity_id = LexStr(G, atInfo[i].custom); if (i == 0 || atInfo[i].chain != atInfo[i - 1].chain || strcmp(entity_id, current_entity_id)) { // finish prev seq if (current_seq && i > 0) { add_missing_ca_sub(G, atInfo, current_resv, atomCount, i - 1, current_seq->size() + 1, current_seq, current_entity_id); } current_resv = 0; current_seq = nullptr; current_entity_id = entity_id; if (info.is_polypeptide(entity_id) && !info.is_excluded_chain(atInfo[i].segi)) { // get new sequence auto it = info.sequences.find(entity_id); if (it != info.sequences.end()) { current_seq = &it->second; } } } else if (i > 0 && atInfo[i].temp1 == atInfo[i - 1].temp1) { continue; } if (current_seq) { add_missing_ca_sub(G, atInfo, current_resv, atomCount, i, atInfo[i].temp1, current_seq, entity_id, false); } } // finish last seq if (current_seq) { add_missing_ca_sub(G, atInfo, current_resv, atomCount, oldAtomCount - 1, current_seq->size() + 1, current_seq, current_entity_id); } atInfo.resize(atomCount); return true; } /** * Read secondary structure from STRUCT_CONF or STRUCT_SHEET_RANGE */ static bool read_ss_(PyMOLGlobals * G, const pymol::cif_data * data, char ss, sshashmap &ssrecords, CifContentInfo &info) { const cif_array *arr_beg_chain = nullptr, *arr_beg_resi = nullptr, *arr_end_chain = nullptr, *arr_end_resi = nullptr, *arr_beg_ins_code = nullptr, *arr_end_ins_code = nullptr; std::string prefix = "_struct_conf."; if (ss == 'S') prefix = "_struct_sheet_range."; if (info.use_auth && (arr_beg_chain = data->get_arr((prefix + "beg_auth_asym_id").c_str())) && (arr_beg_resi = data->get_arr((prefix + "beg_auth_seq_id").c_str())) && (arr_end_chain = data->get_arr((prefix + "end_auth_asym_id").c_str())) && (arr_end_resi = data->get_arr((prefix + "end_auth_seq_id").c_str()))) { // auth only arr_beg_ins_code = data->get_arr((prefix + "pdbx_beg_pdb_ins_code").c_str()); arr_end_ins_code = data->get_arr((prefix + "pdbx_end_pdb_ins_code").c_str()); } else if ( !(arr_beg_chain = data->get_arr((prefix + "beg_label_asym_id").c_str())) || !(arr_beg_resi = data->get_arr((prefix + "beg_label_seq_id").c_str())) || !(arr_end_chain = data->get_arr((prefix + "end_label_asym_id").c_str())) || !(arr_end_resi = data->get_arr((prefix + "end_label_seq_id").c_str()))) { return false; } const cif_array *arr_conf_type_id = (ss == 'S') ? nullptr : data->get_arr("_struct_conf.conf_type_id"); int nrows = arr_beg_chain->size(); sshashkey key; for (int i = 0; i < nrows; i++) { // first character of conf_type_id (one of H, S, T) char ss_i = arr_conf_type_id ? arr_conf_type_id->as_s(i)[0] : ss; // exclude TURN_* (include HELX_* and STRN) if (ss_i == 'T') continue; key.assign( LexBorrow(G, arr_beg_chain->as_s(i)), arr_beg_resi->as_i(i), arr_beg_ins_code ? arr_beg_ins_code->as_s(i)[0] : '\0'); sshashvalue &value = ssrecords[key]; value.ss = ss_i; value.end.assign( LexBorrow(G, arr_end_chain->as_s(i)), arr_end_resi->as_i(i), arr_end_ins_code ? arr_end_ins_code->as_s(i)[0] : '\0'); } return true; } /** * Read secondary structure */ static bool read_ss(PyMOLGlobals * G, const pymol::cif_data * datablock, pymol::vla& atInfo, CifContentInfo &info) { sshashmap ssrecords; read_ss_(G, datablock, 'H', ssrecords, info); read_ss_(G, datablock, 'S', ssrecords, info); if (ssrecords.empty()) return false; AtomInfoType *aj, *ai, *atoms_end = atInfo + VLAGetSize(atInfo); sshashkey key; for (ai = atInfo.data(); ai < atoms_end;) { // advance to the next residue aj = ai; while (++ai < atoms_end && AtomInfoSameResidue(G, aj, ai)) {} // check if residue is the beginning of a secondary structure element key.assign(aj->chain, aj->resv, aj->inscode); sshashmap::iterator it = ssrecords.find(key); if (it == ssrecords.end()) continue; sshashvalue &value = it->second; // assign ss type to all atoms in the segment bool hit_end_residue = false; for (; aj < atoms_end; aj++) { if (value.end.compare(*aj) == 0) { hit_end_residue = true; } else if (hit_end_residue) { break; } aj->ssType[0] = value.ss; } } return true; } /** * Read the SCALEn matrix into 4x4 `matrix` */ static bool read_atom_site_fract_transf(PyMOLGlobals * G, const pymol::cif_data * data, float * matrix) { const cif_array *arr_transf[12]; if (!(arr_transf[0] = data->get_arr("_atom_sites.fract_transf_matrix[1][1]", "_atom_sites_fract_tran_matrix_11"))) return false; arr_transf[1] = data->get_opt("_atom_sites.fract_transf_matrix[1][2]", "_atom_sites_fract_tran_matrix_12"); arr_transf[2] = data->get_opt("_atom_sites.fract_transf_matrix[1][3]", "_atom_sites_fract_tran_matrix_13"); arr_transf[3] = data->get_opt("_atom_sites.fract_transf_vector[1]", "_atom_sites_fract_tran_vector_1"); arr_transf[4] = data->get_opt("_atom_sites.fract_transf_matrix[2][1]", "_atom_sites_fract_tran_matrix_21"); arr_transf[5] = data->get_opt("_atom_sites.fract_transf_matrix[2][2]", "_atom_sites_fract_tran_matrix_22"); arr_transf[6] = data->get_opt("_atom_sites.fract_transf_matrix[2][3]", "_atom_sites_fract_tran_matrix_23"); arr_transf[7] = data->get_opt("_atom_sites.fract_transf_vector[2]", "_atom_sites_fract_tran_vector_2"); arr_transf[8] = data->get_opt("_atom_sites.fract_transf_matrix[3][1]", "_atom_sites_fract_tran_matrix_31"); arr_transf[9] = data->get_opt("_atom_sites.fract_transf_matrix[3][2]", "_atom_sites_fract_tran_matrix_32"); arr_transf[10] = data->get_opt("_atom_sites.fract_transf_matrix[3][3]", "_atom_sites_fract_tran_matrix_33"); arr_transf[11] = data->get_opt("_atom_sites.fract_transf_vector[3]", "_atom_sites_fract_tran_vector_3"); for (int i = 0; i < 12; ++i) matrix[i] = arr_transf[i]->as_d(0); zero3f(matrix + 12); matrix[15] = 1.f; return true; } /** * Read anisotropic temperature factors from ATOM_SITE or ATOM_SITE_ANISOTROP */ static bool read_atom_site_aniso(PyMOLGlobals * G, const pymol::cif_data * data, pymol::vla& atInfo) { const cif_array *arr_label, *arr_u11, *arr_u22, *arr_u33, *arr_u12, *arr_u13, *arr_u23; bool mmcif = true; float factor = 1.0; if ((arr_label = data->get_arr("_atom_site_anisotrop.id", "_atom_site.id"))) { // mmCIF, assume _atom_site_id is numeric and look up by atom ID // Warning: according to mmCIF spec, id can be any alphanumeric string } else if ((arr_label = data->get_arr("_atom_site_aniso_label"))) { // small molecule CIF, lookup by atom name mmcif = false; } else { return false; } if ((arr_u11 = data->get_arr("_atom_site_anisotrop.u[1][1]", "_atom_site_aniso_u_11", "_atom_site.aniso_u[1][1]"))) { // U arr_u22 = data->get_opt("_atom_site_anisotrop.u[2][2]", "_atom_site_aniso_u_22", "_atom_site.aniso_u[2][2]"); arr_u33 = data->get_opt("_atom_site_anisotrop.u[3][3]", "_atom_site_aniso_u_33", "_atom_site.aniso_u[3][3]"); arr_u12 = data->get_opt("_atom_site_anisotrop.u[1][2]", "_atom_site_aniso_u_12", "_atom_site.aniso_u[1][2]"); arr_u13 = data->get_opt("_atom_site_anisotrop.u[1][3]", "_atom_site_aniso_u_13", "_atom_site.aniso_u[1][3]"); arr_u23 = data->get_opt("_atom_site_anisotrop.u[2][3]", "_atom_site_aniso_u_23", "_atom_site.aniso_u[2][3]"); } else if ( (arr_u11 = data->get_arr("_atom_site_anisotrop.b[1][1]", "_atom_site_aniso_b_11", "_atom_site.aniso_b[1][1]"))) { // B factor = 0.012665147955292222; // U = B / (8 * pi^2) arr_u22 = data->get_opt("_atom_site_anisotrop.b[2][2]", "_atom_site_aniso_b_22", "_atom_site.aniso_b[2][2]"); arr_u33 = data->get_opt("_atom_site_anisotrop.b[3][3]", "_atom_site_aniso_b_33", "_atom_site.aniso_b[3][3]"); arr_u12 = data->get_opt("_atom_site_anisotrop.b[1][2]", "_atom_site_aniso_b_12", "_atom_site.aniso_b[1][2]"); arr_u13 = data->get_opt("_atom_site_anisotrop.b[1][3]", "_atom_site_aniso_b_13", "_atom_site.aniso_b[1][3]"); arr_u23 = data->get_opt("_atom_site_anisotrop.b[2][3]", "_atom_site_aniso_b_23", "_atom_site.aniso_b[2][3]"); } else { return false; } AtomInfoType *ai; int nAtom = VLAGetSize(atInfo); std::map id_dict; std::map name_dict; // build dictionary for (int i = 0; i < nAtom; i++) { ai = atInfo + i; if (mmcif) { id_dict[ai->id] = ai; } else { std::string key(LexStr(G, ai->name)); name_dict[key] = ai; } } // read aniso table for (unsigned i = 0; i < arr_u11->size(); i++) { ai = nullptr; if (mmcif) { find1(id_dict, ai, arr_label->as_i(i)); } else { find1(name_dict, ai, arr_label->as_s(i)); } if (!ai) { // expected for multi-models continue; } float * anisou = ai->get_anisou(); anisou[0] = arr_u11->as_d(i) * factor; anisou[1] = arr_u22->as_d(i) * factor; anisou[2] = arr_u33->as_d(i) * factor; anisou[3] = arr_u12->as_d(i) * factor; anisou[4] = arr_u13->as_d(i) * factor; anisou[5] = arr_u23->as_d(i) * factor; } return true; } /** * Read GEOM_BOND * * return: BondType VLA */ static pymol::vla read_geom_bond(PyMOLGlobals * G, const pymol::cif_data * data, const pymol::vla& atInfo) { const cif_array *arr_ID_1, *arr_ID_2; if ((arr_ID_1 = data->get_arr("_geom_bond.atom_site_id_1", "_geom_bond_atom_site_label_1")) == nullptr || (arr_ID_2 = data->get_arr("_geom_bond.atom_site_id_2", "_geom_bond_atom_site_label_2")) == nullptr) return {}; const cif_array *arr_symm_1 = data->get_opt("_geom_bond?site_symmetry_1"); const cif_array *arr_symm_2 = data->get_opt("_geom_bond?site_symmetry_2"); int nrows = arr_ID_1->size(); int nAtom = VLAGetSize(atInfo); int nBond = 0; auto bondvla = pymol::vla(6 * nAtom); // name -> atom index std::map name_dict; // build dictionary for (int i = 0; i < nAtom; i++) { std::string key(LexStr(G, atInfo[i].name)); name_dict[key] = i; } // read table for (int i = 0; i < nrows; i++) { std::string key1(arr_ID_1->as_s(i)); std::string key2(arr_ID_2->as_s(i)); int i1, i2; if (find2(name_dict, i1, key1, i2, key2)) { auto const bond = bondvla.check(nBond); if (BondTypeInit3(G, bond, i1, i2, // arr_symm_1->as_s(i), // arr_symm_2->as_s(i))) { ++nBond; } } else { PRINTFB(G, FB_Executive, FB_Details) " Executive-Detail: _geom_bond name lookup failed: %s %s\n", key1.c_str(), key2.c_str() ENDFB(G); } } if (nBond) { VLASize(bondvla, BondType, nBond); } else { VLAFreeP(bondvla); } return bondvla; } /** * Read CHEMICAL_CONN_BOND * * return: BondType VLA */ static pymol::vla read_chemical_conn_bond(PyMOLGlobals * G, const pymol::cif_data * data) { const cif_array *arr_number, *arr_atom_1, *arr_atom_2, *arr_type; if ((arr_number = data->get_arr("_atom_site?chemical_conn_number")) == nullptr || (arr_atom_1 = data->get_arr("_chemical_conn_bond?atom_1")) == nullptr || (arr_atom_2 = data->get_arr("_chemical_conn_bond?atom_2")) == nullptr || (arr_type = data->get_arr("_chemical_conn_bond?type")) == nullptr) return {}; int nAtom = arr_number->size(); int nBond = arr_atom_1->size(); auto bondvla = pymol::vla(nBond); auto bond = bondvla.data(); // chemical_conn_number -> atom index std::map number_dict; // build dictionary for (int i = 0; i < nAtom; i++) { number_dict[arr_number->as_i(i)] = i; } // read table int i1, i2; for (int i = 0; i < nBond; i++) { if (find2(number_dict, i1, arr_atom_1->as_i(i), i2, arr_atom_2->as_i(i))) { BondTypeInit2(bond++, i1, i2, bondOrderLookup(arr_type->as_s(i))); } else { PRINTFB(G, FB_Executive, FB_Details) " Executive-Detail: _chemical_conn_bond name lookup failed\n" ENDFB(G); } } return bondvla; } /** * Read bonds from STRUCT_CONN * * Output: * cset->TmpBond * cset->NTmpBond */ static bool read_struct_conn_(PyMOLGlobals * G, const pymol::cif_data * data, const pymol::vla& atInfo, CoordSet * cset, CifContentInfo &info) { const cif_array *col_type_id = data->get_arr("_struct_conn.conn_type_id"); if (!col_type_id) return false; const cif_array *col_asym_id[2] = {nullptr, nullptr}, *col_comp_id[2] = {nullptr, nullptr}, *col_seq_id[2] = {nullptr, nullptr}, *col_atom_id[2] = {nullptr, nullptr}, *col_alt_id[2] = {nullptr, nullptr}, *col_ins_code[2] = {nullptr, nullptr}, *col_symm[2] = {nullptr, nullptr}; if (info.use_auth) { col_asym_id[0] = data->get_arr("_struct_conn.ptnr1_auth_asym_id"); col_comp_id[0] = data->get_arr("_struct_conn.ptnr1_auth_comp_id"); col_seq_id[0] = data->get_arr("_struct_conn.ptnr1_auth_seq_id"); col_atom_id[0] = data->get_arr("_struct_conn.ptnr1_auth_atom_id"); col_asym_id[1] = data->get_arr("_struct_conn.ptnr2_auth_asym_id"); col_comp_id[1] = data->get_arr("_struct_conn.ptnr2_auth_comp_id"); col_seq_id[1] = data->get_arr("_struct_conn.ptnr2_auth_seq_id"); col_atom_id[1] = data->get_arr("_struct_conn.ptnr2_auth_atom_id"); col_alt_id[0] = data->get_arr("_struct_conn.pdbx_ptnr1_auth_alt_id"); col_alt_id[1] = data->get_arr("_struct_conn.pdbx_ptnr2_auth_alt_id"); // auth only col_ins_code[0] = data->get_arr("_struct_conn.pdbx_ptnr1_pdb_ins_code"); col_ins_code[1] = data->get_arr("_struct_conn.pdbx_ptnr2_pdb_ins_code"); } // for assembly chain filtering const cif_array *col_label_asym_id[2] = { data->get_arr("_struct_conn.ptnr1_label_asym_id"), data->get_arr("_struct_conn.ptnr2_label_asym_id") }; if ((!col_asym_id[0] && !(col_asym_id[0] = col_label_asym_id[0])) || (!col_comp_id[0] && !(col_comp_id[0] = data->get_arr("_struct_conn.ptnr1_label_comp_id"))) || (!col_seq_id[0] && !(col_seq_id[0] = data->get_arr("_struct_conn.ptnr1_label_seq_id"))) || (!col_atom_id[0] && !(col_atom_id[0] = data->get_arr("_struct_conn.ptnr1_label_atom_id"))) || (!col_asym_id[1] && !(col_asym_id[1] = col_label_asym_id[1])) || (!col_comp_id[1] && !(col_comp_id[1] = data->get_arr("_struct_conn.ptnr2_label_comp_id"))) || (!col_seq_id[1] && !(col_seq_id[1] = data->get_arr("_struct_conn.ptnr2_label_seq_id"))) || (!col_atom_id[1] && !(col_atom_id[1] = data->get_arr("_struct_conn.ptnr2_label_atom_id")))) return false; if (!col_alt_id[0]) col_alt_id[0] = data->get_opt("_struct_conn.pdbx_ptnr1_label_alt_id"); if (!col_alt_id[1]) col_alt_id[1] = data->get_opt("_struct_conn.pdbx_ptnr2_label_alt_id"); col_symm[0] = data->get_opt("_struct_conn.ptnr1_symmetry"); col_symm[1] = data->get_opt("_struct_conn.ptnr2_symmetry"); const cif_array *col_order = data->get_opt("_struct_conn.pdbx_value_order"); int nrows = col_type_id->size(); int nAtom = VLAGetSize(atInfo); int nBond = 0; cset->TmpBond = pymol::vla(6 * nAtom); // identifiers -> coord set index std::map name_dict; for (int i = 0; i < nAtom; i++) { int idx = cset->atmToIdx(i); if (idx != -1) name_dict[make_mm_atom_site_label(G, atInfo + i)] = idx; } #ifdef _PYMOL_IP_EXTRAS bool metalc_as_zero = SettingGetGlobal_b(G, cSetting_cif_metalc_as_zero_order_bonds); #endif for (int i = 0; i < nrows; i++) { const char * type_id = col_type_id->as_s(i); if (strncasecmp(type_id, "covale", 6) && strcasecmp(type_id, "modres") && #ifdef _PYMOL_IP_EXTRAS !(metalc_as_zero && strcasecmp(type_id, "metalc") == 0) && #endif strcasecmp(type_id, "disulf")) // ignore non-covalent bonds (saltbr, hydrog) continue; std::string key[2]; for (int j = 0; j < 2; j++) { const char * asym_id = col_asym_id[j]->as_s(i); if (col_label_asym_id[j] && info.is_excluded_chain(col_label_asym_id[j]->as_s(i))) goto next_row; // doen't work with label_seq_id and bulk solvent const char * seq_id = col_seq_id[j]->as_s(i); if (!seq_id[0]) goto next_row; key[j] = make_mm_atom_site_label(G, asym_id, col_comp_id[j]->as_s(i), seq_id, col_ins_code[j] ? col_ins_code[j]->as_s(i) : "", col_atom_id[j]->as_s(i), col_alt_id[j]->as_s(i)); } int i1, i2; if (find2(name_dict, i1, key[0], i2, key[1])) { // zero-order bond for metal coordination int order = strcasecmp(type_id, "metalc") ? 1 : 0; if (order) { order = bondOrderLookup(col_order->as_s(i)); } auto const bond = cset->TmpBond.check(nBond); if (BondTypeInit3(G, bond, i1, i2, // col_symm[0]->as_s(i), // col_symm[1]->as_s(i), order)) { ++nBond; } } else { PRINTFB(G, FB_Executive, FB_Details) " Executive-Detail: _struct_conn name lookup failed: %s %s\n", key[0].c_str(), key[1].c_str() ENDFB(G); } // label to "continue" from inner for-loop next_row:; } if (nBond) { VLASize(cset->TmpBond, BondType, nBond); cset->NTmpBond = nBond; } else { VLAFreeP(cset->TmpBond); } return true; } /** * Read bonds from CHEM_COMP_BOND * * return: BondType VLA */ static pymol::vla read_chem_comp_bond(PyMOLGlobals * G, const pymol::cif_data * data, const pymol::vla& atInfo) { const cif_array *col_ID_1, *col_ID_2, *col_comp_id; if ((col_ID_1 = data->get_arr("_chem_comp_bond.atom_id_1")) == nullptr || (col_ID_2 = data->get_arr("_chem_comp_bond.atom_id_2")) == nullptr || (col_comp_id = data->get_arr("_chem_comp_bond.comp_id")) == nullptr) return {}; // "_chem_comp_bond.type" seems to be non-standard here. It's found in the // wild with values like "double" and "aromatic". mmcif_nmr-star.dic defines // it, but with different vocabulary (e.g. "amide", "ether", etc.). const cif_array *col_order = data->get_opt( "_chem_comp_bond.value_order", "_chem_comp_bond.type"); int nrows = col_ID_1->size(); int nAtom = VLAGetSize(atInfo); int nBond = 0; auto bondvla = pymol::vla(6 * nAtom); auto bond = bondvla.data(); // name -> atom index std::map name_dict; for (int i = 0; i < nAtom; i++) { std::string key(LexStr(G, atInfo[i].name)); name_dict[key] = i; } for (int i = 0; i < nrows; i++) { std::string key1(col_ID_1->as_s(i)); std::string key2(col_ID_2->as_s(i)); const char * order = col_order->as_s(i); int i1, i2; if (find2(name_dict, i1, key1, i2, key2)) { int order_value = bondOrderLookup(order); nBond++; BondTypeInit2(bond++, i1, i2, order_value); } else { PRINTFB(G, FB_Executive, FB_Details) " Executive-Detail: _chem_comp_bond name lookup failed: %s %s\n", key1.c_str(), key2.c_str() ENDFB(G); } } if (nBond) { VLASize(bondvla, BondType, nBond); } else { VLAFreeP(bondvla); } return bondvla; } /** * Read bonds from _pymol_bond (non-standard extension) * * return: BondType VLA */ static pymol::vla read_pymol_bond(PyMOLGlobals * G, const pymol::cif_data * data, const pymol::vla& atInfo) { const cif_array *col_ID_1, *col_ID_2, *col_order; if ((col_ID_1 = data->get_arr("_pymol_bond.atom_site_id_1")) == nullptr || (col_ID_2 = data->get_arr("_pymol_bond.atom_site_id_2")) == nullptr || (col_order = data->get_arr("_pymol_bond.order")) == nullptr) return {}; int nrows = col_ID_1->size(); int nAtom = VLAGetSize(atInfo); auto bondvla = pymol::vla(nrows); auto bond = bondvla.data(); // ID -> atom index std::map id_dict; for (int atm = 0; atm < nAtom; ++atm) { id_dict[atInfo[atm].id] = atm; } for (int i = 0; i < nrows; i++) { auto key1 = col_ID_1->as_i(i); auto key2 = col_ID_2->as_i(i); auto order_value = col_order->as_i(i); int i1, i2; if (find2(id_dict, i1, key1, i2, key2)) { BondTypeInit2(bond++, i1, i2, order_value); } else { PRINTFB(G, FB_Executive, FB_Details) " Executive-Detail: _pymol_bond name lookup failed: %d %d\n", key1, key2 ENDFB(G); } } return bondvla; } /** * Create a new (multi-state) object-molecule from datablock */ ObjectMolecule *ObjectMoleculeReadCifData(PyMOLGlobals * G, const pymol::cif_data * datablock, int discrete, bool quiet) { CoordSet ** csets = nullptr; int ncsets; CifContentInfo info(G, SettingGetGlobal_b(G, cSetting_cif_use_auth)); const char * assembly_id = SettingGetGlobal_s(G, cSetting_assembly); // title "echo tag" const char * title = datablock->get_opt("_struct.title")->as_s(); if (!quiet && title[0] && strstr(SettingGetGlobal_s(G, cSetting_pdb_echo_tags), "TITLE")) { PRINTFB(G, FB_ObjectMolecule, FB_Details) "TITLE %s\n", title ENDFB(G); } if (assembly_id && assembly_id[0]) { if (!get_assembly_chains(G, datablock, info.chains_filter, assembly_id)) PRINTFB(G, FB_Executive, FB_Details) " ExecutiveLoad-Detail: No such assembly: '%s'\n", assembly_id ENDFB(G); } // allocate ObjectMolecule ObjectMolecule * I = new ObjectMolecule(G, (discrete > 0)); I->Color = AtomInfoUpdateAutoColor(G); // read coordsets from datablock if ((csets = read_atom_site( G, datablock, &I->AtomInfo, info, I->DiscreteFlag, quiet))) { // anisou read_atom_site_aniso(G, datablock, I->AtomInfo); // secondary structure read_ss(G, datablock, I->AtomInfo, info); // trace atoms read_pdbx_coordinate_model(G, datablock, I); // polymer information read_entity_poly(G, datablock, info); // missing residues if (!I->DiscreteFlag && !SettingGetGlobal_i(G, cSetting_retain_order)) { add_missing_ca(G, I->AtomInfo, info); } } else if ((csets = read_chem_comp_atom_model(G, datablock, &I->AtomInfo))) { info.type = CIF_CHEM_COMP; } else { DeleteP(I); return nullptr; } // get number of atoms and coordinate sets I->NAtom = VLAGetSize(I->AtomInfo); ncsets = VLAGetSize(csets); // initialize the new coordsets (not data, but indices, etc.) for (int i = 0; i < ncsets; i++) { if (csets[i]) { csets[i]->Obj = I; if (csets[i]->IdxToAtm.empty()) csets[i]->enumIndices(); } } // get coordinate sets into ObjectMolecule VLAFreeP(I->CSet); I->CSet = pymol::vla_take_ownership(csets); I->NCSet = ncsets; I->updateAtmToIdx(); // handle symmetry and update fractional -> cartesian I->Symmetry.reset(read_symmetry(G, datablock)); if (I->Symmetry) { float sca[16]; if (info.fractional) { for (int i = 0; i < ncsets; i++) { if (csets[i]) CoordSetFracToReal(csets[i], &I->Symmetry->Crystal); } } else if (info.chains_filter.empty() && read_atom_site_fract_transf(G, datablock, sca)) { // don't do this for assemblies for (int i = 0; i < ncsets; i++) { if (csets[i]) CoordSetInsureOrthogonal(G, csets[i], sca, &I->Symmetry->Crystal); } } } // coord set to use for distance based bonding and for attaching TmpBond CoordSet * cset = VLAGetFirstNonNULL(csets); // create bonds switch (info.type) { case CIF_CHEM_COMP: I->Bond = read_chem_comp_bond(G, datablock, I->AtomInfo); break; case CIF_CORE: I->Bond = read_geom_bond(G, datablock, I->AtomInfo); if (!I->Bond) I->Bond = read_chemical_conn_bond(G, datablock); break; case CIF_MMCIF: I->Bond = read_pymol_bond(G, datablock, I->AtomInfo); if (cset && !I->Bond) { // sort atoms internally ObjectMoleculeSort(I); // bonds from file, goes to cset->TmpBond read_struct_conn_(G, datablock, I->AtomInfo, cset, info); // macromolecular bonding bond_dict_t bond_dict_local; if (read_chem_comp_bond_dict(datablock, bond_dict_local)) { ObjectMoleculeConnectComponents(I, &bond_dict_local); } else if(SettingGetGlobal_i(G, cSetting_connect_mode) == 4) { // read components.cif ObjectMoleculeConnectComponents(I); } } break; case CIF_UNKNOWN: printf("coding error...\n"); } // if non of the above created I->Bond, then do distance based bonding if (!I->Bond) { if (I->DiscreteFlag) { ObjectMoleculeConnectDiscrete(I, true, 3); } else if (cset) { ObjectMoleculeConnect(I, cset, true, 3); } // guess valences for distance based bonding if (SettingGetGlobal_b(G, cSetting_pdb_hetatm_guess_valences)) { ObjectMoleculeGuessValences(I, 0, nullptr, nullptr, false); } } else { if (!I->NBond) I->NBond = VLAGetSize(I->Bond); // bonds from coordset if (cset && cset->TmpBond && cset->NTmpBond) { for (int i = 0; i < cset->NTmpBond; ++i) { ObjectMoleculeAddBond2(I, cset->IdxToAtm[cset->TmpBond[i].index[0]], cset->IdxToAtm[cset->TmpBond[i].index[1]], cset->TmpBond[i].order); } VLASize(I->Bond, BondType, I->NBond); VLAFreeP(cset->TmpBond); } } // assemblies if (cset && !info.chains_filter.empty()) { PRINTFB(G, FB_Executive, FB_Details) " ExecutiveLoad-Detail: Creating assembly '%s'\n", assembly_id ENDFB(G); CoordSet **assembly_csets = read_pdbx_struct_assembly(G, datablock, I->AtomInfo, cset, assembly_id); ObjectMoleculeSetAssemblyCSets(I, assembly_csets); } // computationally intense update tasks SceneCountFrames(G); I->invalidate(cRepAll, cRepInvAll, -1); ObjectMoleculeUpdateIDNumbers(I); ObjectMoleculeUpdateNonbonded(I); ObjectMoleculeAutoDisableAtomNameWildcard(I); // hetatm classification if `group_PDB` record missing if (info.type == CIF_MMCIF && !datablock->get_arr("_atom_site.group_pdb")) { I->need_hetatm_classification = true; } return I; } /** * @brief Structure to hold reciprocal space parameters */ struct ReciprocalSpaceParams { glm::vec3 abc{}; glm::vec3 angles{}; float volume{}; }; /** * @brief Calculate the reciprocal space parameters for a given crystal * @param crystal The crystal object * @return The reciprocal space parameters * @note Fundamentals of Crystallography, 3rd edition (Table 2.1) */ ReciprocalSpaceParams CalculateReciprocalSpaceParam(const CCrystal& crystal) { auto dims = crystal.dims(); auto a = dims[0]; auto b = dims[1]; auto c = dims[2]; auto angles = crystal.angles(); auto alpha = glm::radians(angles[0]); auto beta = glm::radians(angles[1]); auto gamma = glm::radians(angles[2]); auto cosAlpha = std::cos(alpha); auto cosBeta = std::cos(beta); auto cosGamma = std::cos(gamma); auto sinAlpha = std::sin(alpha); auto sinBeta = std::sin(beta); auto sinGamma = std::sin(gamma); auto volume = a * b * c * std::sqrt(1.0 - cosAlpha * cosAlpha - cosBeta * cosBeta - cosGamma * cosGamma + 2 * cosAlpha * cosBeta * cosGamma); auto inv_volume = 1.0 / volume; auto aStar = b * c * sinAlpha * inv_volume; auto bStar = a * c * sinBeta * inv_volume; auto cStar = a * b * sinGamma * inv_volume; auto cosAlphaStar = (cosBeta * cosGamma - cosAlpha) / (sinBeta * sinGamma); auto cosBetaStar = (cosAlpha * cosGamma - cosBeta) / (sinAlpha * sinGamma); auto cosGammaStar = (cosAlpha * cosBeta - cosGamma) / (sinAlpha * sinBeta); ReciprocalSpaceParams params{}; params.abc = {aStar, bStar, cStar}; params.angles = {cosAlphaStar, cosBetaStar, cosGammaStar}; params.volume = static_cast(volume); return params; } /** * @brief Calculate the squared distance in reciprocal space for a given hkl * @param param The reciprocal space parameters * @param hkl The Miller indices * @return The squared distance in reciprocal space * @note Fundamentals of Crystallography, 3rd edition (Eqn 2.17b) * triclinic system is the most general case */ float CalculateReciprocalSpaceDistanceSquared( const ReciprocalSpaceParams& param, const glm::vec3& hkl) { auto a_star = param.abc[0]; auto b_star = param.abc[1]; auto c_star = param.abc[2]; auto cos_alpha_star = param.angles[0]; auto cos_beta_star = param.angles[1]; auto cos_gamma_star = param.angles[2]; auto h = hkl.x; auto k = hkl.y; auto l = hkl.z; return (h * h * a_star * a_star + // k * k * b_star * b_star + // l * l * c_star * c_star + // 2 * h * k * a_star * b_star * cos_gamma_star + // 2 * h * l * a_star * c_star * cos_beta_star + // 2 * k * l * b_star * c_star * cos_alpha_star); } /** * @brief Structure to hold reflection information */ struct ReflectionInfo { glm::ivec3 hkl{}; float fwt{}; float phwt{}; float fom{}; }; /** * @brief Class to handle the reflection data from a CIF data block */ class ReflnBlock { public: /** * @brief Constructor * @param datablock The CIF data block containing reflection data * @pre the CIF data block must contain the required reflection arrays */ ReflnBlock(const pymol::cif_data& datablock) : m_datablock(&datablock) , m_size(datablock.get_arr("_refln.index_h")->size()) { } /** * @brief Get the reflection information for a given index * @param index The index of the reflection * @return The reflection information */ pymol::Result getReflectionInfo(int index) const { if (index < 0 || index >= m_size) { return pymol::make_error("Index out of bounds"); } auto* millerH = m_datablock->get_arr("_refln.index_h"); auto* millerK = m_datablock->get_arr("_refln.index_k"); auto* millerL = m_datablock->get_arr("_refln.index_l"); auto* fwt = m_datablock->get_arr("_refln.pdbx_fwt"); auto* phwt = m_datablock->get_arr("_refln.pdbx_phwt"); auto* fom = m_datablock->get_arr("_refln.fom"); return ReflectionInfo{glm::ivec3(millerH->as_i(index), millerK->as_i(index), millerL->as_i(index)), fwt->as_f(index), phwt->as_f(index), fom->as_f(index)}; } /** * @return The number of reflections in the block */ auto size() const { return m_size; } private: const pymol::cif_data* m_datablock{}; const std::size_t m_size{}; }; /** * @brief Convert HKL to flat index * @param hkl The Miller indices * @param shape The shape of the map * @return The flat index corresponding to the HKL */ static std::size_t HKLToFlatIndex( const glm::ivec3& hkl, const glm::ivec3& shape) { return static_cast( hkl.z + hkl.y * shape[2] + hkl.x * shape[1] * shape[2]); } /** * @brief Convert flat index to HKL * @param flat_idx The flat index * @param shape The shape of the map * @return The HKL corresponding to the flat index */ static glm::ivec3 FlatIndexToHKL( std::size_t flat_idx, const glm::ivec3& shape) { int iz = flat_idx % shape[2]; int iy = (flat_idx / shape[2]) % shape[1]; int ix = flat_idx / (shape[1] * shape[2]); return glm::ivec3(ix, iy, iz); } #ifdef TRACE /** * @brief Print statistics for the reciprocal grid * @param recip_map The reciprocal map to analyze */ static void PrintReciprocalGridStatistics( const CFieldTyped>& recip_map) { double sum_abs_real_for_scale = 0.0; double max_abs_imag = 0.0; auto* map_flat_data = recip_map.ptr(0, 0, 0); auto total_map_elements = recip_map.size() / sizeof(std::complex); for (std::size_t i = 0; i < total_map_elements; ++i) { auto val = map_flat_data[i]; sum_abs_real_for_scale += std::abs(val.real()); max_abs_imag = std::max(max_abs_imag, std::abs((double)val.imag())); } std::cout << "Reciprocal Map - Max absolute imaginary part: " << max_abs_imag << "\n"; std::cout << "Average absolute real part (for scale): " << sum_abs_real_for_scale / total_map_elements << "\n"; if (sum_abs_real_for_scale > EPSILON && (max_abs_imag / sum_abs_real_for_scale > 1e-5)) { std::cerr << "Warning: Significant imaginary components found in c2c FFT " "output! Max abs imag: " << max_abs_imag << "\n"; } else { std::cout << "c2c FFT output seems valid. Max abs imag: " << max_abs_imag << "\n"; } } /** * @brief Print statistics for the raw map * @param refln_block The reflection block containing reflection data * @param map The map to analyze */ static void PrintRawMapStatistics( const ReflnBlock& refln_block, const CFieldTyped& map) { double sum_scaled_rho = 0.0; double sum_sq_scaled_rho = 0.0; auto* map_flat_data = map.ptr(0, 0, 0); float min_scaled_rho = map_flat_data[0]; float max_scaled_rho = map_flat_data[0]; auto total_map_elements = map.size() / sizeof(float); for (std::size_t i = 0; i < total_map_elements; ++i) { float val = map_flat_data[i]; sum_scaled_rho += val; sum_sq_scaled_rho += static_cast(val) * val; if (val < min_scaled_rho) min_scaled_rho = val; if (val > max_scaled_rho) max_scaled_rho = val; } float mean_scaled_rho = 0.0f; float sd_scaled_rho = 0.0f; if (total_map_elements > 0) { mean_scaled_rho = static_cast(sum_scaled_rho / total_map_elements); double variance = (sum_sq_scaled_rho / total_map_elements) - (static_cast(mean_scaled_rho) * mean_scaled_rho); sd_scaled_rho = (variance > 0) ? static_cast(std::sqrt(variance)) : 0.0f; } std::cout << "SCALED Map - Min: " << min_scaled_rho << ", Max: " << max_scaled_rho << ", Mean: " << mean_scaled_rho << ", SD: " << sd_scaled_rho << "\n"; double sum_signed_rho = 0.0; for (std::size_t i = 0; i < total_map_elements; ++i) { sum_signed_rho += map_flat_data[i]; } double mean_signed_rho = sum_signed_rho / total_map_elements; std::cout << "Mean of signed map density: " << mean_signed_rho << "\n"; float max_fwt = 0.0; glm::ivec3 hkl_strong{}; for (std::size_t i = 0; i < refln_block.size(); ++i) { auto refl_info = refln_block.getReflectionInfo(i); if (refl_info && refl_info->fwt > max_fwt) { max_fwt = refl_info->fwt; hkl_strong = refl_info->hkl; } } std::cout << "Strongest reflection: F(" << hkl_strong.x << "," << hkl_strong.y << "," << hkl_strong.z << ") with amplitude " << max_fwt << "\n"; float max_density_peak = -std::numeric_limits::infinity(); std::size_t peak_flat_idx = 0; for (std::size_t i = 0; i < total_map_elements; ++i) { if (map_flat_data[i] > max_density_peak) { max_density_peak = map_flat_data[i]; peak_flat_idx = i; } } glm::ivec3 shape(map.dim[0], map.dim[1], map.dim[2]); auto i_peak = FlatIndexToHKL(peak_flat_idx, shape); std::cout << "Highest density peak in map: " << max_density_peak << " at grid index (" << i_peak.x << ", " << i_peak.y << ", " << i_peak.z << ")\n"; } /** * @brief Get the Cartesian point from the map state * @param ms The object map state * @param i, j, k The grid indices * @return The Cartesian coordinates of the point */ static glm::vec3 GetCartesianPointFromMapState( const ObjectMapState& ms, int i, int j, int k) { auto* points_cfield = ms.Field->points.get(); return glm::vec3(points_cfield->get(i, j, k, 0), points_cfield->get(i, j, k, 1), points_cfield->get(i, j, k, 2)); } /** * @brief Validate the map voxel geometry * @param ms The object map state */ void ValidateMapVoxelGeometry(const ObjectMapState& ms) { std::cout << "\n--- Validating Map Voxel Geometry ---\n"; const float* crystal_dims_arr = ms.Symmetry->Crystal.dims(); float L_a = crystal_dims_arr[0]; float L_b = crystal_dims_arr[1]; float L_c = crystal_dims_arr[2]; const float* angles_deg = ms.Symmetry->Crystal.angles(); int N_g0 = ms.FDim[0]; int N_g1 = ms.FDim[1]; int N_g2 = ms.FDim[2]; if (N_g0 <= 1 || N_g1 <= 1 || N_g2 <= 1) { std::cout << "Grid dimensions are too small for step validation (<=1 along " "an axis)." << "\n"; return; } std::cout << std::fixed << std::setprecision(6); std::cout << "Cell (from ms.Symmetry->Crystal): a=" << L_a << " b=" << L_b << " c=" << L_c << " alpha=" << angles_deg[0] << " beta=" << angles_deg[1] << " gamma=" << angles_deg[2] << "\n"; std::cout << "Grid (ms.FDim): N_g0=" << N_g0 << " N_g1=" << N_g1 << " N_g2=" << N_g2 << "\n"; std::cout << "Grid Divisions (ms.Div): Div0=" << ms.Div[0] << " Div1=" << ms.Div[1] << " Div2=" << ms.Div[2] << "\n"; float expected_step_g0 = (ms.Div[0] > 0) ? L_a / static_cast(ms.Div[0]) : 0.f; float expected_step_g1 = (ms.Div[1] > 0) ? L_b / static_cast(ms.Div[1]) : 0.f; float expected_step_g2 = (ms.Div[2] > 0) ? L_c / static_cast(ms.Div[2]) : 0.f; std::cout << "Expected Cartesian step length (using Div for sampling along " "cell axes): " << "dx=" << expected_step_g0 << ", dy=" << expected_step_g1 << ", dz=" << expected_step_g2 << "\n"; glm::vec3 P000 = GetCartesianPointFromMapState(ms, 0, 0, 0); glm::vec3 P100 = GetCartesianPointFromMapState(ms, 1, 0, 0); glm::vec3 P010 = GetCartesianPointFromMapState(ms, 0, 1, 0); glm::vec3 P001 = GetCartesianPointFromMapState(ms, 0, 0, 1); std::cout << "P(grid 0,0,0): " << glm::to_string(P000) << "\n"; std::cout << "P(grid 1,0,0): " << glm::to_string(P100) << "\n"; std::cout << "P(grid 0,1,0): " << glm::to_string(P010) << "\n"; std::cout << "P(grid 0,0,1): " << glm::to_string(P001) << "\n"; glm::vec3 step_G0 = P100 - P000; glm::vec3 step_G1 = P010 - P000; glm::vec3 step_G2 = P001 - P000; std::cout << "Cartesian Step vector along grid dim 0 (ms.FDim[0]-axis): " << glm::to_string(step_G0) << "\n"; std::cout << "Cartesian Step vector along grid dim 1 (ms.FDim[1]-axis): " << glm::to_string(step_G1) << "\n"; std::cout << "Cartesian Step vector along grid dim 2 (ms.FDim[2]-axis): " << glm::to_string(step_G2) << "\n"; float len_G0 = glm::length(step_G0); float len_G1 = glm::length(step_G1); float len_G2 = glm::length(step_G2); std::cout << "Magnitude of step_G0: " << len_G0 << "\n"; std::cout << "Magnitude of step_G1: " << len_G1 << "\n"; std::cout << "Magnitude of step_G2: " << len_G2 << "\n"; bool is_cubic_cell = (std::abs(angles_deg[0] - 90.0f) < 1e-3f && std::abs(angles_deg[1] - 90.0f) < 1e-3f && std::abs(angles_deg[2] - 90.0f) < 1e-3f && std::abs(L_a - L_b) < 1e-3f && std::abs(L_a - L_c) < 1e-3f); bool grid_dims_iso = (N_g0 == N_g1 && N_g0 == N_g2); if (is_cubic_cell && grid_dims_iso) { if (std::abs(len_G0 - len_G1) > 1e-3f || std::abs(len_G0 - len_G2) > 1e-3f || std::abs(len_G1 - len_G2) > 1e-3f) { std::cout << "Warning: Step magnitudes are unequal for a cubic cell with " "isotropic grid sampling! Potential stretching source." << "\n"; } else { std::cout << "Step magnitudes are (nearly) equal, as expected for cubic " "cell with isotropic grid." << "\n"; } } else { std::cout << "Note: Unequal step magnitudes may be expected for non-cubic " "cells or anisotropic grid sampling." << "\n"; } glm::vec3 norm_G0 = (len_G0 > EPSILON) ? glm::normalize(step_G0) : glm::vec3(0.f); glm::vec3 norm_G1 = (len_G1 > EPSILON) ? glm::normalize(step_G1) : glm::vec3(0.f); glm::vec3 norm_G2 = (len_G2 > EPSILON) ? glm::normalize(step_G2) : glm::vec3(0.f); float dot_G0G1 = glm::dot(norm_G0, norm_G1); float dot_G0G2 = glm::dot(norm_G0, norm_G2); float dot_G1G2 = glm::dot(norm_G1, norm_G2); std::cout << "Dot product (norm_G0 . norm_G1): " << dot_G0G1 << " (cos angle between grid axis 0 and 1)\n"; std::cout << "Dot product (norm_G0 . norm_G2): " << dot_G0G2 << " (cos angle between grid axis 0 and 2)\n"; std::cout << "Dot product (norm_G1 . norm_G2): " << dot_G1G2 << " (cos angle between grid axis 1 and 2)\n"; float cos_gamma_cell = std::cos(glm::radians(angles_deg[2])); float cos_beta_cell = std::cos(glm::radians(angles_deg[1])); float cos_alpha_cell = std::cos(glm::radians(angles_deg[0])); // approx cos(89 deg) or cos(91 deg) - allow ~1 degree deviation float angle_check_threshold = 0.0175f; bool angles_match_cell = true; if (std::abs(dot_G0G1 - cos_gamma_cell) > angle_check_threshold) { std::cout << "Warning: Angle between grid axes 0 & 1 (from dot_G0G1: " << std::acos(std::clamp(dot_G0G1, -1.0f, 1.0f)) * 180.f / glm::pi() << " deg) inconsistent with crystal gamma (" << angles_deg[2] << " deg).\n"; angles_match_cell = false; } if (std::abs(dot_G0G2 - cos_beta_cell) > angle_check_threshold) { std::cout << "Warning: Angle between grid axes 0 & 2 (from dot_G0G2: " << std::acos(std::clamp(dot_G0G2, -1.0f, 1.0f)) * 180.f / glm::pi() << " deg) inconsistent with crystal beta (" << angles_deg[1] << " deg).\n"; angles_match_cell = false; } if (std::abs(dot_G1G2 - cos_alpha_cell) > angle_check_threshold) { std::cout << "Warning: Angle between grid axes 1 & 2 (from dot_G1G2: " << std::acos(std::clamp(dot_G1G2, -1.0f, 1.0f)) * 180.f / glm::pi() << " deg) inconsistent with crystal alpha (" << angles_deg[0] << " deg).\n"; angles_match_cell = false; } if (!angles_match_cell) { std::cout << "Warning: Grid step vectors' angles do not accurately reflect " "cell angles! Potential shear/slant source.\n"; } else { std::cout << "Grid step vectors' angles appear consistent with cell angles.\n"; } // Manual FracToReal Check vs. GetPointComponent std::cout << "\n--- Manual FracToReal Check vs. GetPointComponent ---\n" const float* f2r_matrix = ms.Symmetry->Crystal.fracToReal(); float frac_v_manual[3]; float manual_cart_vr[3]; // Point (grid 1,0,0) // Grid indices for fractional calculation: (1+Min[0]), (0+Min[1]), (0+Min[2]) frac_v_manual[0] = (static_cast(1) + ms.Min[0]) / static_cast(ms.Div[0]); frac_v_manual[1] = (static_cast(0) + ms.Min[1]) / static_cast(ms.Div[1]); frac_v_manual[2] = (static_cast(0) + ms.Min[2]) / static_cast(ms.Div[2]); transform33f3f(f2r_matrix, frac_v_manual, manual_cart_vr); std::cout << "Grid (1,0,0): ManualFracToReal: (" << manual_cart_vr[0] << "," << manual_cart_vr[1] << "," << manual_cart_vr[2] << ")" << " GetPoint: " << glm::to_string(P100) << "\n"; if (glm::distance(glm::make_vec3(manual_cart_vr), P100) > 1e-4f) { std::cout << "MISMATCH for P(1,0,0)!\n"; } // Point (grid 0,1,0) frac_v_manual[0] = (static_cast(0) + ms.Min[0]) / static_cast(ms.Div[0]); frac_v_manual[1] = (static_cast(1) + ms.Min[1]) / static_cast(ms.Div[1]); frac_v_manual[2] = (static_cast(0) + ms.Min[2]) / static_cast(ms.Div[2]); transform33f3f(f2r_matrix, frac_v_manual, manual_cart_vr); std::cout << "Grid (0,1,0): ManualFracToReal: (" << manual_cart_vr[0] << "," << manual_cart_vr[1] << "," << manual_cart_vr[2] << ")" << " GetPoint: " << glm::to_string(P010) << "\n"; if (glm::distance(glm::make_vec3(manual_cart_vr), P010) > 1e-4f) { std::cout << "MISMATCH for P(0,1,0)!\n"; } // Point (grid 0,0,1) frac_v_manual[0] = (static_cast(0) + ms.Min[0]) / static_cast(ms.Div[0]); frac_v_manual[1] = (static_cast(0) + ms.Min[1]) / static_cast(ms.Div[1]); frac_v_manual[2] = (static_cast(1) + ms.Min[2]) / static_cast(ms.Div[2]); transform33f3f(f2r_matrix, frac_v_manual, manual_cart_vr); std::cout << "Grid (0,0,1): ManualFracToReal: (" << manual_cart_vr[0] << "," << manual_cart_vr[1] << "," << manual_cart_vr[2] << ")" << " GetPoint: " << glm::to_string(P001) << "\n"; if (glm::distance(glm::make_vec3(manual_cart_vr), P001) > 1e-4f) { std::cout << "MISMATCH for P(0,0,1)!\n"; } std::cout << "--- End of Manual FracToReal Check ---\n"; std::cout << "--- End of Voxel Geometry Validation ---\n"; } #endif // TRACE /** * @brief Create an ObjectMapState from a CFieldTyped map and symmetry * @param map The map to convert * @param sym The symmetry to use * @return The created ObjectMapState */ ObjectMapState ObjectMapStateFromField(PyMOLGlobals* G, const CFieldTyped& map, std::unique_ptr sym) { ObjectMapState ms(G); glm::ivec3 grid_size(map.dim[0], map.dim[1], map.dim[2]); ms.MapSource = cMapSourceCCP4; ms.Active = true; ms.Symmetry.reset(sym.release()); ms.FDim[0] = grid_size[0]; ms.FDim[1] = grid_size[1]; ms.FDim[2] = grid_size[2]; ms.FDim[3] = 3; ms.Min[0] = 0; ms.Min[1] = 0; ms.Min[2] = 0; ms.Max[0] = grid_size.x - 1; ms.Max[1] = grid_size.y - 1; ms.Max[2] = grid_size.z - 1; ms.Div[0] = grid_size.x; ms.Div[1] = grid_size.y; ms.Div[2] = grid_size.z; ms.Field.reset(new Isofield(G, ms.FDim)); float v[3]; v[2] = (ms.Min[2]) / ((float) ms.Div[2]); v[1] = (ms.Min[1]) / ((float) ms.Div[1]); v[0] = (ms.Min[0]) / ((float) ms.Div[0]); transform33f3f(ms.Symmetry->Crystal.fracToReal(), v, ms.ExtentMin); v[2] = ((ms.FDim[2] - 1) + ms.Min[2]) / ((float) ms.Div[2]); v[1] = ((ms.FDim[1] - 1) + ms.Min[1]) / ((float) ms.Div[1]); v[0] = ((ms.FDim[0] - 1) + ms.Min[0]) / ((float) ms.Div[0]); transform33f3f(ms.Symmetry->Crystal.fracToReal(), v, ms.ExtentMax); auto& field = *ms.Field; std::copy(map.data.begin(), map.data.end(), field.data->data.begin()); ms.Field->save_points = false; auto& crystal = ms.Symmetry->Crystal; ObjectMapStateRegeneratePoints(&ms); float frac_coords[3]; float cart_coords[3]; int corner_write_idx = 0; for (int z_loop = 0; z_loop < 2; ++z_loop) { int grid_idx_z = (z_loop == 0) ? ms.Min[2] : (ms.Min[2] + ms.FDim[2] - 1); frac_coords[2] = static_cast(grid_idx_z) / static_cast(ms.Div[2]); for (int y_loop = 0; y_loop < 2; ++y_loop) { int grid_idx_y = (y_loop == 0) ? ms.Min[1] : (ms.Min[1] + ms.FDim[1] - 1); frac_coords[1] = static_cast(grid_idx_y) / static_cast(ms.Div[1]); for (int x_loop = 0; x_loop < 2; ++x_loop) { int grid_idx_x = (x_loop == 0) ? ms.Min[0] : (ms.Min[0] + ms.FDim[0] - 1); frac_coords[0] = static_cast(grid_idx_x) / static_cast(ms.Div[0]); transform33f3f( ms.Symmetry->Crystal.fracToReal(), frac_coords, cart_coords); ms.Corner[corner_write_idx * 3 + 0] = cart_coords[0]; ms.Corner[corner_write_idx * 3 + 1] = cart_coords[1]; ms.Corner[corner_write_idx * 3 + 2] = cart_coords[2]; corner_write_idx++; } } } #ifdef TRACE ValidateMapVoxelGeometry(ms); #endif // TRACE return ms; } /** * @brief Find the optimal grid size for the FFT * @param refln_block The reflection block containing reflection data * @param reciprocal_space_params The parameters for reciprocal space * @return The optimal grid size as a glm::ivec3 */ glm::ivec3 FindOptimalGridSize(const ReflnBlock& refln_block, const ReciprocalSpaceParams& reciprocal_space_params) { std::array dimensions{}; float inv_dist_sq{}; for (std::size_t i = 0; i < refln_block.size(); ++i) { auto refl_info = refln_block.getReflectionInfo(i); if (!refl_info) { continue; } auto cand_inv_dist_sq = CalculateReciprocalSpaceDistanceSquared( reciprocal_space_params, refl_info->hkl); inv_dist_sq = std::max(inv_dist_sq, cand_inv_dist_sq); } double inv_dist = std::sqrt(inv_dist_sq); // Nyquist double nyquist_sampling_rate = 2.0; // Perhaps make into a setting? glm::dvec3 cell_size{}; for (int i = 0; i < 3; ++i) { cell_size[i] = std::max(cell_size[i], nyquist_sampling_rate * inv_dist / reciprocal_space_params.abc[i]); } glm::ivec3 grid_size{}; for (int i = 0; i < 3; ++i) { auto round_up = static_cast(std::ceil(cell_size[i])); grid_size[i] = pocketfft::detail::util::good_size_real(round_up); } return grid_size; } /** * @brief Calculate the phase shift for a given HKL and symmetry operation * @param hkl_in The input HKL indices * @param sym_op_T_3x1 The symmetry operation translation vector * @return The calculated phase shift (2 * PI * (h.trans)) */ double CalculatePhaseShift( const glm::ivec3& hkl, const glm::vec3& trans) { double dot_product = glm::dot(glm::dvec3(hkl), glm::dvec3(trans)); return 2.0 * glm::pi() * dot_product; } /** * @brief Normalizes the complex map to float density map * @param reciprocal_space_params The parameters for reciprocal space * @param recip_grid The reciprocal grid data * @return Density Map as Float Field */ static CFieldTyped NormalizeComplexMapToFloat(PyMOLGlobals* G, const ReciprocalSpaceParams& reciprocal_space_params, const CFieldTyped>& recip_grid) { CFieldTyped map(reinterpret_cast(recip_grid.dim.data()), 3); auto total_map_elements = map.size() / sizeof(float); float inv_Vcell = 1.0f; if (std::abs(reciprocal_space_params.volume) > EPSILON) { inv_Vcell = 1.0f / static_cast(reciprocal_space_params.volume); } auto* flat_recip_data = recip_grid.ptr(0, 0, 0); auto* map_flat_data = map.ptr(0, 0, 0); if (!SettingGet(G, cSetting_normalize_ccp4_maps)) { for (std::size_t i = 0; i < total_map_elements; ++i) { map_flat_data[i] = flat_recip_data[i].real() * inv_Vcell; } return map; } double sum_rho_scaled = 0.0; double sum_sq_rho_scaled = 0.0; for (std::size_t i = 0; i < total_map_elements; ++i) { map_flat_data[i] = flat_recip_data[i].real() * inv_Vcell; sum_rho_scaled += map_flat_data[i]; sum_sq_rho_scaled += static_cast(map_flat_data[i]) * map_flat_data[i]; } // Normalize the map to mean ~0 and standard deviation ~1 float current_mean = 0.0f; float current_sd = 0.0f; if (total_map_elements > 0) { current_mean = static_cast(sum_rho_scaled / total_map_elements); double variance = (sum_sq_rho_scaled / total_map_elements) - (static_cast(current_mean) * current_mean); current_sd = (variance > 0) ? static_cast(std::sqrt(variance)) : 0.0f; } if (std::abs(current_sd) > EPSILON) { // Avoid division by zero if SD is tiny for (std::size_t i = 0; i < total_map_elements; ++i) { map_flat_data[i] = (map_flat_data[i] - current_mean) / current_sd; } } else { // Shift everything to force mean to zero if (std::abs(current_mean) > EPSILON) { for (std::size_t i = 0; i < total_map_elements; ++i) { map_flat_data[i] = map_flat_data[i] - current_mean; } } } return map; } /** * @brief Structure to hold symmetry matrices in GLM format */ struct SymMatsGLM { glm::mat3 rot; glm::vec3 trans; }; /** * @brief Calculate the symmetry matrices in GLM format. * @param symmetry The symmetry object containing the symmetry matrices. * @return A vector of SymMatsGLM structures containing the rotation and * translation components of the symmetry matrices. */ static std::vector CalculateSymMatricesGLM( const CSymmetry& symmetry) { std::vector sym_matrices(symmetry.getNSymMat()); for (int i = 0; i < symmetry.getNSymMat(); ++i) { auto* sym_matrix_4x4 = symmetry.getSymMat(i); const float R_j[9] = {sym_matrix_4x4[0], sym_matrix_4x4[1], sym_matrix_4x4[2], sym_matrix_4x4[4], sym_matrix_4x4[5], sym_matrix_4x4[6], sym_matrix_4x4[8], sym_matrix_4x4[9], sym_matrix_4x4[10]}; sym_matrices[i].rot = glm::transpose(glm::make_mat3(R_j)); sym_matrices[i].trans = glm::vec3(sym_matrix_4x4[3], sym_matrix_4x4[7], sym_matrix_4x4[11]); } return sym_matrices; } /** * @brief Checks if HKL indices are within the bounds of a grid. * @param idx The HKL indices to check. * @param shape The shape of the grid. * @return True if the indices are within bounds, false otherwise. */ static bool IsHKLWithinShape(const glm::ivec3& idx, const glm::ivec3& shape) { return (idx.x >= 0 && idx.x < shape[0] && // idx.y >= 0 && idx.y < shape[1] && // idx.z >= 0 && idx.z < shape[2]); } /** * @brief Apply symmetry matrices to the reflections and store the results in * the reciprocal space data. * @param sym_matrices The symmetry matrices to apply. * @param hkl_orig The original Miller indices. * @param shape The shape of the reciprocal space grid. * @param f_amp_weighted The weighted amplitude of the reflection. * @param phase_rad_orig The original phase in radians. * @param recip_1D_total_elements The total number of elements in the 1D * reciprocal space data. * @param[out] flat_recip_data The flat reciprocal space data array. * @param[out] is_grid_point_set A vector indicating whether a grid point has been * set. */ void ApplySymMatricesToReflections( const std::vector& sym_matrices, const glm::ivec3& hkl_orig, const glm::ivec3& shape, float f_amp_weighted, float phase_rad_orig, int recip_1D_total_elements, std::complex* flat_recip_data, std::vector& is_grid_point_set) { for (auto& sym_mat : sym_matrices) { glm::ivec3 hkl_symm = glm::round(sym_mat.rot * hkl_orig); auto phase_shift_rad = CalculatePhaseShift(hkl_orig, sym_mat.trans); auto F_symm_complex = std::polar( f_amp_weighted, phase_rad_orig + static_cast(phase_shift_rad)); int idx_h_s = (hkl_symm.x >= 0) ? hkl_symm.x : hkl_symm.x + shape[0]; int idx_k_s = (hkl_symm.y >= 0) ? hkl_symm.y : hkl_symm.y + shape[1]; int idx_l_s = (hkl_symm.z >= 0) ? hkl_symm.z : hkl_symm.z + shape[2]; auto idx_hkl_s = glm::ivec3(idx_h_s, idx_k_s, idx_l_s); if (IsHKLWithinShape(idx_hkl_s, shape)) { auto flat_idx_s = HKLToFlatIndex(idx_hkl_s, shape); if (flat_idx_s < recip_1D_total_elements && !is_grid_point_set[flat_idx_s]) { flat_recip_data[flat_idx_s] = F_symm_complex; is_grid_point_set[flat_idx_s] = true; } } } } /** * @brief Calculate inverse Friedel mate indices for a given HKL and shape. * @param hkl_orig The original HKL indices. * @param shape The shape of the reciprocal space grid. * @return The Friedel mate indices as a glm::ivec3. */ glm::ivec3 CalculateFriedelMateIndex( const glm::ivec3& hkl_orig, const glm::ivec3& shape) { auto h_idx = hkl_orig.x; auto k_idx = hkl_orig.y; auto l_idx = hkl_orig.z; int h = (h_idx < shape[0] / 2 + shape[0] % 2) ? h_idx : h_idx - shape[0]; int k = (k_idx < shape[1] / 2 + shape[1] % 2) ? k_idx : k_idx - shape[1]; int l = (l_idx < shape[2] / 2 + shape[2] % 2) ? l_idx : l_idx - shape[2]; // Friedel mate indices int h_bar = -h; int k_bar = -k; int l_bar = -l; // Map (-h,-k,-l) to Friedel grid indices int idx_h_bar = (h_bar >= 0) ? h_bar : h_bar + shape[0]; int idx_k_bar = (k_bar >= 0) ? k_bar : k_bar + shape[1]; int idx_l_bar = (l_bar >= 0) ? l_bar : l_bar + shape[2]; return glm::ivec3(idx_h_bar, idx_k_bar, idx_l_bar); } /** * @brief Add friedel pair to the reciprocal space data. * @param symmetry The symmetry object. * @param refln_block The reflection block containing reflection data. * @param reciprocal_space_params The parameters for reciprocal space. * @return A CFieldTyped object representing the Fourier transformed map. */ void AddFriedelMate(const glm::ivec3& hkl_orig, std::size_t recip_1D_total_elements, const glm::ivec3& shape, std::complex* flat_recip_data, std::vector& is_grid_point_set) { auto flat_idx_hkl = HKLToFlatIndex(hkl_orig, shape); auto idx_bar = CalculateFriedelMateIndex(hkl_orig, shape); if (IsHKLWithinShape(idx_bar, shape)) { auto flat_idx_bar = HKLToFlatIndex(idx_bar, shape); if (flat_idx_bar < recip_1D_total_elements && !is_grid_point_set[flat_idx_bar]) { flat_recip_data[flat_idx_bar] = std::conj(flat_recip_data[flat_idx_hkl]); is_grid_point_set[flat_idx_bar] = true; } // If flat_idx_hkl == flat_idx_bar (centrosymmetric FFT point), // ensure it's real bool centrosymmetric = flat_idx_hkl == flat_idx_bar; if (centrosymmetric) { auto& recip_data = flat_recip_data[flat_idx_hkl]; bool around_zero_and_much_less_than_real = std::abs(recip_data.imag()) > 1e-5f * std::abs(recip_data.real()) && std::abs(recip_data.imag()) > 1e-5f; if (around_zero_and_much_less_than_real) { recip_data.imag(0.0f); } } } } /** * @brief Perform a Fast Fourier Transform in place on the reciprocal grid. * @param grid The reciprocal grid data. */ static void PerformFastFourierTransformInPlace( CFieldTyped>& grid) { auto* flat_grid = grid.ptr(0, 0, 0); pocketfft::shape_t pocket_shape{static_cast(grid.dim[0]), static_cast(grid.dim[1]), static_cast(grid.dim[2])}; auto base_size = grid.base_size; // CField is internally a 1D array, so we need to calculate the strides // for the 3D data. Z is contiguous; thus the axis order is ZYX. glm::uvec3 u_strides = {base_size * pocket_shape[2] * pocket_shape[1], base_size * pocket_shape[2], base_size}; pocketfft::stride_t stride_in(3); for (int i = 0; i < 3; ++i) { stride_in[i] = static_cast(u_strides[i]); } auto stride_out = stride_in; pocketfft::shape_t axes{2, 1, 0}; // ZYX bool direction = pocketfft::BACKWARD; float norm = 1.0f; pocketfft::c2c(pocket_shape, stride_in, stride_out, axes, direction, flat_grid, flat_grid, norm); } /** * @brief Fourier transform structure factors to a map. * @param symmetry The symmetry object. * @param refln_block The reflection block containing reflection data. * @param reciprocal_space_params The parameters for reciprocal space. * @return A CFieldTyped object representing the Fourier transformed map. */ CFieldTyped FourierTransformStructureFactorsToMap(PyMOLGlobals* G, const CSymmetry& symmetry, const ReflnBlock& refln_block, const ReciprocalSpaceParams& reciprocal_space_params) { auto shape = FindOptimalGridSize(refln_block, reciprocal_space_params); CFieldTyped> recip_grid(glm::value_ptr(shape), 3); auto recip_1D_total_elements = shape[0] * shape[1] * shape[2]; std::vector is_grid_point_set(recip_1D_total_elements, false); auto* flat_recip_data = recip_grid.ptr(0, 0, 0); auto sym_matrices = CalculateSymMatricesGLM(symmetry); for (std::size_t i = 0; i < refln_block.size(); ++i) { auto refl_info = refln_block.getReflectionInfo(i); if (!refl_info) { continue; } auto hkl_orig = refl_info->hkl; auto f_amp_raw = refl_info->fwt; auto ph_deg_orig = refl_info->phwt; auto fom_val = refl_info->fom; auto f_amp_weighted = f_amp_raw * fom_val; if (f_amp_weighted == 0.0f && !(hkl_orig.x == 0 && hkl_orig.y == 0 && hkl_orig.z == 0)) { // Skip zero amplitude reflections unless F000 continue; } auto phase_rad_orig = glm::radians(ph_deg_orig); auto F_orig_complex = std::polar(f_amp_weighted, phase_rad_orig); ApplySymMatricesToReflections( sym_matrices, hkl_orig, shape, f_amp_weighted, phase_rad_orig, recip_1D_total_elements, flat_recip_data, is_grid_point_set); } // End loop over reflections from CIF for (std::size_t h_idx = 0; h_idx < shape[0]; ++h_idx) { for (std::size_t k_idx = 0; k_idx < shape[1]; ++k_idx) { for (std::size_t l_idx = 0; l_idx < shape[2]; ++l_idx) { auto flat_idx_hkl = HKLToFlatIndex(glm::ivec3(h_idx, k_idx, l_idx), shape); if (is_grid_point_set[flat_idx_hkl]) { AddFriedelMate(glm::ivec3(h_idx, k_idx, l_idx), recip_1D_total_elements, shape, flat_recip_data, is_grid_point_set); } } } } PerformFastFourierTransformInPlace(recip_grid); auto map = NormalizeComplexMapToFloat(G, reciprocal_space_params, recip_grid); #ifdef TRACE PrintReciprocalGridStatistics(recip_grid); PrintRawMapStatistics(refln_block, map); #endif // TRACE return map; } pymol::Result ObjectMapReadCifStr( PyMOLGlobals* G, const pymol::cif_data& datablock) { auto I = new ObjectMap(G); initializeTTT44f(I->TTT); I->TTTFlag = false; ReflnBlock refln_block(datablock); std::unique_ptr sym(read_symmetry(G, &datablock)); auto reciprocal_space_params = CalculateReciprocalSpaceParam(sym->Crystal); auto map = FourierTransformStructureFactorsToMap( G, *sym, refln_block, reciprocal_space_params); I->State.push_back(ObjectMapStateFromField(G, map, std::move(sym))); ObjectMapUpdateExtents(I); return I; } /** * Read one or multiple object-molecules from a CIF file. If there is only one * or multiplex=0, then return the object-molecule. Otherwise, create each * object - named by its data block name - and return nullptr. */ pymol::Result ObjectMoleculeReadCifStr(PyMOLGlobals * G, ObjectMolecule * I, const char *st, int frame, int discrete, int quiet, int multiplex, int zoom) { if (I) { return pymol::make_error("loading mmCIF into existing object not supported, " "please use 'create' to append to an existing object."); } if (multiplex > 0) { return pymol::make_error("loading mmCIF with multiplex=1 not supported, please " "use 'split_states' after loading the object."); } auto cif = std::make_shared(); if (!cif->parse_string(st)) { return pymol::make_error("Parsing CIF file failed: ", cif->m_error_msg); } for (const auto& [code, datablock] : cif->datablocks()) { ObjectMolecule * obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet); if (!obj) { PRINTFB(G, FB_ObjectMolecule, FB_Warnings) " mmCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G); continue; } #ifndef _PYMOL_NOPY // we only provide access from the Python API so far if (SettingGetGlobal_b(G, cSetting_cif_keepinmemory)) { obj->m_cifdata = &datablock; obj->m_ciffile = cif; } #endif if (cif->datablocks().size() == 1 || multiplex == 0) return obj; // multiplexing ObjectSetName(obj, datablock.code()); ExecutiveDelete(G, obj->Name); ExecutiveManageObject(G, obj, zoom, true); } return nullptr; } /** * Bond dictionary getter, with on-demand download of residue dictionaries */ const bond_dict_t::mapped_type * bond_dict_t::get(PyMOLGlobals * G, const char * resn, bool try_download) { auto key = make_key(resn); auto it = m_map.find(key); if (it != m_map.end()) return &it->second; if (unknown_resn.count(key)) return nullptr; #ifndef _PYMOL_NOPY if (try_download) { pymol::GIL_Ensure gil; bool downloaded = false; // call into Python unique_PyObject_ptr pyfilename( PyObject_CallMethod(G->P_inst->cmd, "download_chem_comp", "siO", resn, !Feedback(G, FB_Executive, FB_Details), G->P_inst->cmd)); if (pyfilename) { const char* filename = PyString_AsString(pyfilename.get()); // update if ((downloaded = (filename && filename[0]))) { cif_file_with_error_capture cif; if (!cif.parse_file(filename)) { PRINTFB(G, FB_Executive, FB_Warnings) " Warning: Loading _chem_comp_bond CIF data for residue '%s' " "failed: %s\n", resn, cif.m_error_msg.c_str() ENDFB(G); return nullptr; } for (auto& [code, item] : cif.datablocks()) read_chem_comp_bond_dict(&item, *this); } } if (downloaded) { // second attempt to look up, from eventually updated dictionary return get(G, resn, false); } } #endif PRINTFB(G, FB_Executive, FB_Warnings) " ExecutiveLoad-Warning: No _chem_comp_bond data for residue '%s'\n", resn ENDFB(G); // don't try downloading again unknown_resn.insert(key); return nullptr; } /////////////////////////////////////// pymol::Result ObjectMoleculeReadBCif(PyMOLGlobals* G, ObjectMolecule* I, const char* bytes, std::size_t size, int frame, int discrete, int quiet, int multiplex, int zoom) { #ifdef _PYMOL_NO_MSGPACKC PRINTFB(G, FB_ObjectMolecule, FB_Errors) " Error: This build has no BinaryCIF support.\n" " Please install/enable msgpack-c.\n" ENDFB(G); return nullptr; #endif if (I) { return pymol::make_error("loading BCIF into existing object not supported, " "please use 'create' to append to an existing object."); } if (multiplex > 0) { return pymol::make_error("loading BCIF with multiplex=1 not supported, please " "use 'split_states' after loading the object."); } auto cif = std::make_shared(); cif->parse_bcif(bytes, size); for (const auto& [code, datablock] : cif->datablocks()) { auto obj = ObjectMoleculeReadCifData(G, &datablock, discrete, quiet); if (!obj) { PRINTFB(G, FB_ObjectMolecule, FB_Warnings) " BCIF-Warning: no coordinates found in data_%s\n", datablock.code() ENDFB(G); continue; } #ifndef _PYMOL_NOPY // we only provide access from the Python API so far if (SettingGet(G, cSetting_cif_keepinmemory)) { obj->m_cifdata = &datablock; obj->m_ciffile = cif; } #endif if (cif->datablocks().size() == 1 || multiplex == 0) return obj; } return nullptr; } // vi:sw=2:ts=2:expandtab