This repository has been archived by the owner on Feb 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
glosoap.py
executable file
·142 lines (125 loc) · 7 KB
/
glosoap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
"""
Just a minimal stub to compute SOAP vectors from a list of structures and print them out on a file.
"""
import quippy
import sys, time, ast
from multiprocessing import Process, Value, Array
import argparse
from random import randint
from libmatch.environments import alchemy, environ
from libmatch.structures import structk, structure, structurelist
import os
import numpy as np
from copy import copy
from time import ctime
from datetime import datetime
import gc
import cPickle as pickle
import code
# tries really hard to flush any buffer to disk!
def flush(stream):
stream.flush()
os.fsync(stream)
def atomicno_to_sym(atno):
pdict={1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Ha', 106: 'Sg', 107: 'Ns', 108: 'Hs', 109: 'Mt', 110: 'Unn', 111: 'Unu'}
return pdict[atno]
def main(filename, nmax, lmax, coff, cotw, gs, centerweight, gspecies, redfiles, prefix=""):
start_time = datetime.now()
filename = filename[0]
# sets a few defaults
if prefix=="": prefix=filename
if prefix.endswith('.xyz'): prefix=prefix[:-4]
prefix=prefix+"-n"+str(nmax)+"-l"+str(lmax)+"-c"+str(coff)+"-g"+str(gs)
print >> sys.stderr, "using output prefix =", prefix
# Reads input file using quippy
print >> sys.stderr, "Reading input file", filename
(first,last)=(0,0); # reads the whole thing
if first==0: first=None;
if last==0: last=None
al = quippy.AtomsList(filename, start=first, stop=last);
print >> sys.stderr, len(al.n) , " Configurations Read"
# determines "kit" (i.e. max n and kind of atoms present)
spkit = {}
for at in al:
atspecies = {}
for z in at.z:
if z in atspecies: atspecies[z]+=1
else: atspecies[z] = 1
for (z, nz) in atspecies.iteritems():
if z in spkit:
if nz>spkit[z]: spkit[z] = nz
else:
spkit[z] = nz
# species string
if gspecies is None:
zsp=spkit.keys()
zsp.sort()
print "No species provided"
else: zsp=map(int,gspecies.split(","))
# If dimensionality reduction is called, stores the variables
cur = False
if redfiles is not None:
cur = True
# Does not matter in which sequence the files are provided
try:
idxsel = np.loadtxt(redfiles[0], dtype=int)
chol = np.loadtxt(redfiles[1])
except ValueError:
idxsel = np.loadtxt(redfiles[1], dtype=int)
chol = np.loadtxt(redfiles[0])
# Checks if all dimensions are the same
assert idxsel.shape[0] == chol.shape[0] == chol.shape[1], "Wrong dimensions, cannot perform dimensionality reduction"
lspecies = 'n_species='+str(len(zsp))+' species_Z={ '
for z in zsp: lspecies = lspecies + str(z) + ' '
lspecies = lspecies + '}'
print "Using the following species:", ",".join([str(z) for z in zsp])
if not cur:
fout=open(prefix+".soap","w")
else:
fout=open(prefix+"_red.soap","w") # To avoid giving the same name to the reduced soap vector
counter = 0
for at in al:
fout.write("%d\n" % (len(at.z)))
at.set_cutoff(coff);
at.calc_connect();
nel = np.bincount(at.z)
# ok, it appears you *have* to do this atom by atom. let's do that, but then re-sort in the same way as in the input
soaps = {}
sz = {}
for z in at.z:
if z in sz: sz[z]+=1
else: sz[z]=1
for (z, nz) in sz.iteritems():
soapstr=("soap central_reference_all_species=F central_weight="+str(centerweight)+
" covariance_sigma0=0.0 atom_sigma="+str(gs)+" cutoff="+str(coff)+" cutoff_transition_width="+str(cotw)+
" n_max="+str(nmax)+" l_max="+str(lmax)+' '+lspecies+' Z='+str(z))
desc = quippy.descriptors.Descriptor(soapstr )
soaps[z] = desc.calc(at)["descriptor"]
for z in at.z:
fout.write("%3s " % (atomicno_to_sym(z)))
if cur:
single_soap = soaps[z][len(soaps[z])-sz[z]]
red_soap = np.dot(single_soap[idxsel], chol)
np.savetxt(fout, [red_soap])
else: np.savetxt(fout, [ soaps[z][len(soaps[z])-sz[z]] ])
sz[z] -=1
counter +=1
sys.stderr.write("SOAP vectors calculated: %d / %d \r" %(counter, len(al.n)) )
sys.stderr.write("\n")
fout.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="""Computes the similarity matrix between a set of atomic structures
based on SOAP descriptors and an optimal assignment of local environments.""")
parser.add_argument("filename", nargs=1, help="Name of the LibAtom formatted xyz input file")
parser.add_argument("-n", type=int, default='8', help="Number of radial functions for the descriptor")
parser.add_argument("-l", type=int, default='6', help="Maximum number of angular functions for the descriptor")
parser.add_argument("-c", type=float, default='5.0', help="Radial cutoff")
parser.add_argument("--cotw", type=float, default='0.5', help="Cutoff transition width")
parser.add_argument("-g", type=float, default='0.5', help="Atom Gaussian sigma")
parser.add_argument("-cw", type=float, default='1.0', help="Center atom weight")
parser.add_argument("-z", type=str, default=None, help="Comma separated atomic numbers of the species that must be considered")
parser.add_argument("--reduce", type=str, nargs = 2, default=None, metavar=('COLS', 'CHOL'), help="Reduce the output power spectrum using two provided files, one for the indices of the columns, the other containing the decomposed UR matrix")
parser.add_argument("--prefix", type=str, default='', help="Prefix for output files (defaults to input file name)")
args = parser.parse_args()
main(args.filename, nmax=args.n, lmax=args.l, coff=args.c, cotw=args.cotw, gs=args.g, centerweight=args.cw, gspecies=args.z, redfiles=args.reduce, prefix=args.prefix)