CifEnsemble

[ ]:
from cifkit import CifEnsemble, Example

# Initialize
ensemble = CifEnsemble(Example.demo_folder_path)

Initialize with nested files

You can also use the add_nested_files flag to include .cif files that are located in folders within the folder path provided.

[ ]:

# Initialize with nested .cif files in the folder ensemble_nested = CifEnsemble(Example.demo_folder_path, add_nested_files=True)

Get instant properties

Once the object is created, you can get instant properties suhc as file count, folder path, etc.

[ ]:
# Get .cif file count in the folder
print("File count:", ensemble.file_count) # 6

# Get the directory path
print("Directory path:", ensemble.dir_path)

# Get all file paths in the folder
print("File paths:", ensemble.file_paths)

# Get all Cif objects initialized
print("Cif objects:", ensemble.cifs)

Get individual .cif properties

The CifEnsemble object contains a property called cifs which contains a list of Cif objects. You may loop through each to access both instant and computed properties described in the previous CIF section.

[ ]:
# Print each property for each .cif file
for cif in ensemble.cifs:
  print(f"\n{cif.file_name}")
  print("Formula:", cif.formula)
  print("Tag:", cif.tag)
  print("Mixing type:", cif.site_mixing_type)
  print("Unique bond pairs:", cif.bond_pairs)

Get unique properties

You can get all unique attributes such as formulas, elements, etc in the folder.

[ ]:
# Get unique formulas
print("Unique formulas:", ensemble.unique_formulas)

# Get unique elements
print("Unique elements:", ensemble.unique_elements)

# Get unique structures
print("Unique structures:", ensemble.unique_structures)

# Get unique atomix mixing types
print("Unique atomic mixing types:", ensemble.unique_site_mixing_types)

# Get unique elements
print("Unique elements including nested:", ensemble_nested.unique_elements)

# Get unique space group names
print("Unique space group names:", ensemble.unique_space_group_names)

# Get unique space group numbers
print("Unique space group numbers:",ensemble.unique_space_group_numbers)

# Get unique tags
print("Unique tags:", ensemble.unique_tags)

# Get unique composition types
print("Unique composition types:", ensemble.unique_composition_types)

Get overall stats by attribute

Get the number of files for each unique properties.

[ ]:
# Get file count per structure
print("Structure stats:", ensemble.structure_stats)

# Get file count per formula
print("Formula stats:", ensemble.formula_stats)

# Get file count per tag
print("Tag stats:", ensemble.tag_stats)

# Get file count per space group number
print("Space group number stats:", ensemble.space_group_number_stats)

# Get file count per space group name
print("Space group name stats:", ensemble.space_group_name_stats)

# Get file count per composition type
print("Composition type stats:", ensemble.composition_type_stats)

# Get file count per Element
print("Unique elements stats:", ensemble.unique_elements_stats)

# Get file count per site mixing type
print("Site mixing type stats:", ensemble.site_mixing_type_stats)

# Get file count per supercell atom count
print("Supercell size stats:", ensemble.supercell_size_stats)

# Get file count per CN value by min dist method
print("CN value using min dist method stats:", ensemble.unique_CN_values_by_min_dist_method_stat)

# Get file count per CN value by best methods
print("CN value using best methods stats:", ensemble.unique_CN_values_by_method_methods_stat)

Filter .cif containing specific attributes

[ ]:
# Return file paths by formulas
ensemble.filter_by_formulas(["ErCoIn", "Er10Co9In20"])

# Return file paths by structures
ensemble.filter_by_structures(["Mn5Si3"])

# Return file paths by space group names
ensemble.filter_by_space_group_names(["P4/mmm"])

# Return file paths by space group numbers
ensemble.filter_by_space_group_numbers([123])

# Return file paths by site mixing types
ensemble.filter_by_site_mixing_types(["full_occupancy"])
ensemble.filter_by_site_mixing_types(["full_occupancy", "deficiency_without_atomic_mixing"])

# Return file paths by composition types (1-> unary, 2-> binary)
ensemble.filter_by_composition_types([3])


Filter .cif by specific attributes

Filter .cif files either containing a set of items or files that exactly contain the values passed. cifkit supports elements and coordination numbers.

[ ]:

print("\nFile paths containing Er or Co or In:") print(ensemble.filter_by_elements_containing(["Er", "Co", "In"])) # print("\nFile paths containing Er and In:") print(ensemble.filter_by_elements_exact_matching(["Er", "In"])) print("\nFile paths containing CN value of 15:") print(ensemble.filter_by_CN_min_dist_method_containing([15])) print(ensemble.filter_by_CN_best_methods_containing([15])) print("\nFile paths containing CN value of 11, 14, and 15:") print(ensemble.filter_by_CN_min_dist_method_exact_matching([11, 14, 15])) print(ensemble.filter_by_CN_best_methods_exact_matching([11, 14, 15]))

Filter by range

[ ]:
# Return a set of .cif file paths with min distance between 1.0 Å and 3.0 Å
print("File paths with min distance between 1.0 Å and 3.0 Å:\n", ensemble.filter_by_min_distance(1.0, 3.0))

# Return a set of .cif file paths with supercell atom count above 500 and below 3000.
print("File paths with supercell atom count above 500 and below 3000:\n",ensemble.filter_by_supercell_count(300, 3000))

Move and copy files

Assume you have a set of file paths filered using the functions described in the previous section. Since we are using Jupyter and with predefined examples, you need to provide file_paths and dest_dir_path for your system.

file_paths = {
    "tests/data/cif/ensemble_test/300169.cif",
    "tests/data/cif/ensemble_test/300171.cif",
    "tests/data/cif/ensemble_test/300170.cif",
}

# To move files
ensemble.move_cif_files(file_paths, dest_dir_path)

# To copy files
ensemble.copy_cif_files(file_paths, dest_dir_path)

Generate histograms

[ ]:
ensemble = CifEnsemble(Example.ErCoIn_big_folder_path)
ensemble.generate_structure_histogram()
ensemble.generate_formula_histogram()
ensemble.generate_tag_histogram()
ensemble.generate_space_group_number_histogram()
ensemble.generate_space_group_name_histogram()
ensemble.generate_supercell_size_histogram()
ensemble.generate_elements_histogram()
ensemble.generate_CN_by_min_dist_method_histogram()
ensemble.generate_CN_by_best_methods_histogram()
ensemble.generate_composition_type_histogram()
ensemble.generate_site_mixing_type_histogram()

'''
# Optional: Specify the output directory where the .png file will be saved.
ensemble.generate_site_mixing_type_histogram(output_dir="path/to/directory")

# Optional: Call plt.show() to display the histogram on screen.
ensemble.generate_site_mixing_type_histogram(display=True)
'''