I’m interested in how to apply quantum chemistry for my drug discovery journy longtime. My former boss teached me details of FMO. FMO is really is really useful tool for understanding protein-ligand or protein-protein interaction form QM point of veiw. I love it :) But GAMESS don’t have official python API. And I and kzfm developed psikit which is wrapper of psi4 using rdkit. The package has been no longer updated unfortunately. I realized that contribute open science spontineusly is really difficult but important.
Today I would like to introduce an example of contribution of rdkit and orca which is a QM package.
From official documentation
ORCA is a powerful and versatile quantum chemistry software package, primarily developed by the group of Prof. Frank Neese.
https://www.faccts.de/orca/
Orca has python API :) So I tried to use orca with RDKit.
At first, I installed orca and python environment. Orcan can get from following URL. (registration is reqired)
https://www.faccts.de/customer/login?came_from=/customer
$ ./orca_6_1_1_linux_x86-64_shared_openmpi418.run -- -p /home/iwatobipen/opt/orca/
# Then added path to Path env variable.
# From my bashrc
# export PATH="/home/iwatobipen/.local/bin:/home/iwatobipen/opt/orca:$PATH"
## ORCA 6.1.1 secion
# export PATH=/home/iwatobipen/opt/bin:$PATH
Next, I installed packages.
$ mkdir orcatest
$ cd orcatest
$ pixi init
$ pixi add python
$ pixi add --pypi orca-pi rdkit matplotlib jupyter py3dmol
$ pixi shell
Now I could build to test orca from python interface :)
I launched jupyter and use orca from python.
from rdkit import Chem
from rdkit.Chem import rdGeometry, rdDistGeom, rdForceFieldHelpers
from pathlib import Path
import shutil
from IPython.display import display, HTML
from opi.core import Calculator
from opi.input.structures.structure import Structure, MolFromSmiles
from opi.input.simple_keywords import Sqm, Dft
from opi.output.core import Output
import py3Dmol
import matplotlib.pyplot as plt
# function for visualizing molecule in 3D
def view_mol(molobj):
view = py3Dmol.view(width=400, height=400)
view.addModel(Chem.MolToMolBlock(molobj), 'sdf')
view.setStyle({}, {'stick':{},
'sphere':{"scale":0.3}})
view.zoomTo()
view.show()
#load example molecule
rdmol = Chem.MolFromSmiles('c1cnncc1')
hrdmol = Chem.AddHs(rdmol)
rdDistGeom.EmbedMolecule(hrdmol)
print(Chem.MolToMolBlock(hrdmol))
view_mol(hrdmol)
# optimize geometry with MMFF
rdForceFieldHelpers.MMFFOptimizeMolecule(hrdmol)
print(Chem.MolToMolBlock(hrdmol))
Ok, let’s run calculation.
working_dir = Path("moplot")
shutil.rmtree(working_dir, ignore_errors=True)
working_dir.mkdir()
resolution = 30
calc = Calculator(basename='job', working_dir=working_dir)
structure = Structure.from_rdkitmol(hrdmol)
calc.structure = structure
calc.input.add_simple_keywords(
Sqm.NATIVE_GFN2_XTB
)
calc.input.add_arbitrary_string("%loc\nLocMet PM\nend\n")
calc.write_input()
# > Run the ORCA calculation
print("Running ORCA calculation ...", end="")
calc.run()
print(" Done")
# > Get output and use it to create the gbw json output with config
output = calc.get_output()
status = output.terminated_normally()
if status:
output.parse()
else:
raise RuntimeError("ORCA did not terminate normally.")
def plot_mo_diagram(energies, occupations, title="MO Diagram"):
fig, ax = plt.subplots(figsize=(6, 6))
ax.set_title(title)
ax.set_ylabel("Energy / eV")
lumo_id = occupations.index(0)
homo_energy = energies[lumo_id-1]
lumo_energy = energies[lumo_id]
prev_e = None
max_x = 0
for i, (e, occ) in enumerate(zip(energies, occupations)):
if prev_e is not None and abs(e-prev_e) < 0.01:
x += 2
else:
x = 0
if x > max_x: max_x = x
prev_e = e
ax.hlines(e, x - 0.4, x + 0.4, color='k', linewidth=2)
if occ == 2:
ax.annotate("↑", (x, e), textcoords="offset points", xytext=(-10, -5), ha='center', fontsize=12)
ax.annotate("↓", (x, e), textcoords="offset points", xytext=(10, -5), ha='center', fontsize=12)
elif occ == 1:
raise ValueError("This function does not support plotting of UHF type wavefunctions.")
# MO index on the right, start counting at 0
if e > homo_energy - 0.5 and e < lumo_energy + 0.5:
label = f"MO {i}"
ax.text(x + 0.6, e, label, va='center', fontsize=10)
ax.set_xlim(-1, max_x+ 1.25)
ax.set_ylim(homo_energy - 0.5, lumo_energy + 0.5)
ax.set_xticks([])
ax.grid(True, axis='y', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()
Then visualize calculation results with defined functions.
mo_list = output.get_mos()["mo"]
energies = [mo.orbitalenergy for mo in mo_list]
occupations = [mo.occupancy for mo in mo_list]
plot_mo_diagram(energies, occupations)
Then I tried to visualize MO.
def visualize_mos(output: Output, plot_list: list[int], resolution: int, gbw_type: str):
"""Visualize a list of mo indices"""
# > For nicely visualizing multiple MOs in this notebook we wrap
# > py3Dmol viewers in html
html_blocks = []
for mo_index in plot_list:
# > Obtain cube data of MO
cube_output = output.plot_mo(mo_index, resolution=resolution, gbw_type=gbw_type)
cube_data = cube_output.cube
# Set up Py3Dmol viewer for cube data
view = py3Dmol.view(width=250, height=250)
view.addModel(Chem.MolToMolBlock(hrdmol), "sdf")
view.setStyle({'stick': {'radius': 0.1}, 'sphere': {'scale': 0.2}})
view.addVolumetricData(cube_data, "cube", {"isoval": 0.005, "color": "blue", "opacity": 0.8})
view.addVolumetricData(cube_data, "cube", {"isoval": -0.005, "color": "red", "opacity": 0.8})
view.zoomTo()
# > HTML formatting
viewer_html = view._make_html()
html = f"""
<div style="display:inline-block; text-align:center; margin-right:10px;">
<div><b>MO {mo_index}</b></div>
{viewer_html}
</div>
"""
html_blocks.append(html)
# Display all viewers with labels
display(HTML(''.join(html_blocks)))
# > Plot orbitals
nel, _ = output.get_nelectrons()
homo_index = nel // 2 - 1
plot_list = [homo_index-2,homo_index-1,homo_index, homo_index+1]
visualize_mos(output=output, plot_list=plot_list, resolution=resolution, gbw_type="gbw")
MO 14 is highest occupied orbital (HOMO) and MO 15 is lowest unoccupied orbital (LUMO).
Then visualize localized orbitals.
visualize_mos(output=output, plot_list=plot_list, resolution=resolution, gbw_type="loc")
It’s interesting that MO is really different between carbon and nitrogen atoms.
It’s worth to thing structure of drug molecules.
Orca has lots of useful features of QM. I would like to learn orca more.