bobleesj.utils.parsers package

Submodules

bobleesj.utils.parsers.formula module

class bobleesj.utils.parsers.formula.Formula(formula: str, validate=True)[source]

Bases: object

A class to parse and manipulate chemical formulas. This class provides methods to sort, filter, and analyze chemical.

Examples

>>> formula = Formula("NdSi2")
>>> formula.parsed_formula
[('Nd', 1.0), ('Si', 2.0)]
>>> formula.element_count
2
>>> formula.get_normalized_formula()
'Nd1.0Si2.0'
>>> formula.get_normalized_parsed_formula()
[('Nd', 0.333333), ('Si', 0.666667)]
>>> formula.get_normalized_indices()

static build_formula_from_parsed(parsed_formula: list[tuple[str, float]]) → str[source]

Convert the parsed formula into a string. If the index can be converted to 1 (int), it will be removed.

Examples

>>> parsed_formula = [("Nd", 1.0), ("Si", 2.0)]
>>> Formula.build_formula_from_parsed(parsed_formula)
"NdSi2"

static count(formulas: list[str]) → int[source]

Count the number of formulas in a list.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.count(formulas)
4

static count_by_composition(formulas: list[str]) → dict[int, int][source]

Count the number of formulas in each composition category.

Examples

>>>
formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.count_formulas_by_composition(formulas)
{2: 2, 3: 1, 4: 1}

static count_by_formula(formulas: list[str], formula_to_count: str) → int[source]

Count the number of occurrences of a specific formula in a list of formulas.

Examples

>>> formulas = ["NdSi2", "NdSi2", "NdSi2Th2", "NdSi2Th2", "ThOs"]
>>> Formula.count_by_formula(formulas, "NdSi2")
2

static count_duplicates(formulas: list[str]) → dict[str, int][source]

Count the number of duplicates in a list of formulas.

Examples

>>> formulas = ["NdSi2", "NdSi2", "NdSi2Th2", "NdSi2Th2", "ThOs"]
>>> Formula.count_duplicates(formulas)
{"NdSi2": 2, "NdSi2Th2": 2}

static count_individual(formulas: list[str]) → dict[str, int][source]

Count the number of occurrences of each formula in a list of formulas.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.count_formulas(formulas)
{"NdSi2": 1, "ThOs": 1, "NdSi2Th2": 1, "YNdThSi2": 1}

static count_unique(formulas: list[str]) → int[source]

Count the number of unique formulas in a list.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.count_unique(formulas)
4

property element_count: int

Get the number of unique elements in the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.element_count
2

property elements: list[str]

Get the list of elements in the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.elements
["Nd", "Si"]

static filter_by_composition(formulas: list[str]) → dict[int, list[str]][source]

Sort formulas into categories based on their composition.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.filter_by_composition(formulas)
{2: ["NdSi2", "ThOs"], 3: ["NdSi2Th2"], 4: ["YNdThSi2"]}

static filter_by_elements_containing(formulas: list[str], elements: list[str]) → list[str][source]

Filter formulas by a list of elements.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> elements = ["Nd", "Si"]
>>> Formula.filter_by_elements(formulas, elements)
["NdSi2", "NdSi2Th2", "YNdThSi2"]

static filter_by_elements_matching(formulas: list[str], elements: list[str]) → list[str][source]

Filter formulas by a list of elements but the specified elements should be only contained.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> elements = ["Nd", "Si"]
>>> filter_by_elements(formulas, elements)
["NdSi2"]

static filter_by_single_composition(formulas: list[str], composition_type: int) → list[str][source]

Filter formulas by the given composition type.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.filter_by_single_composition(formulas, 2)
["NdSi2", "ThOs"]

static get_element_count(formulas: list[str]) → dict[str, int][source]

Get the count of each element in a list of formulas. Do not consider the stoichiometric value.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.get_element_count(formulas)
{"Nd": 3, "Si": 3, "Th": 3, "Os": 1, "Y": 1}

get_normalized_formula(decimals=6) → str[source]

Get the normalized formula of the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.get_normalized_formula()
"Nd0.333333Si0.666667"
>>> formula.get_normalized_formula(2)
"Nd0.33Si0.67"

get_normalized_indices(decimals=6) → list[float][source]

Get the normalized indices of the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.get_normalized_indices()
[0.333333, 0.666667]
>>> formula.get_normalized_indices(2)
[0.33, 0.67]

get_normalized_parsed_formula(decimals=6) → list[tuple[str, float]][source]

Get the normalized parsed formula of the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.get_normalized_parsed_formula()
[("Nd", 0.333333), ("Si", 0.666667)]
>>> formula.get_normalized_parsed_formula(2)
[("Nd", 0.33), ("Si", 0.67)]

static get_unique_elements(formulas: list[str]) → set[str][source]

Get unique elements from a list of formulas.

Examples

>>> formulas = ["NdSi2", "ThOs", "NdSi2Th2", "YNdThSi2"]
>>> Formula.get_unique_elements(formulas)
{"Nd", "Si", "Th", "Os", "Y"}

static get_unique_formulas(formulas: list[str]) → set[str][source]

Get unique formulas from a list of formulas.

Examples

>>> formulas = ["NdSi2", "ThOs", "ThOs"]
>>> Formula.get_unique_formulas(formulas)
{"NdSi2", "ThOs"}

property indices: list[float]

Get the list of indices in the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.indices
[1.0, 2.0]

property max_min_avg_index: tuple[float, float, float]

Get the max, min, and avg index of the formula.

Examples

>>> formula = Formula("NdSi2")
>>> formula.max_min_avg_index
(2.0, 1.0, 1.5)

static order_by_alphabetical(formulas: list[str], reverse=False) → list[str][source]

Sort formulas alphabetically.

Examples

>>> formulas = ["AB2", "AB", "BC2D2", "BBC2"]
>>> Formula.order_by_alphabetical(formulas)
["AB", "AB2", "BBC2", "BC2D2"]

sort_by_custom_label(custom_labels: dict[slice(<class 'int'>, dict[slice(<class 'str'>, list[str], None)], None)], normalize=False) → str[source]

Sort elements in a chemical formula using a precomputed element order map.

Parameters:

formula (str) – The chemical formula to be sorted.
element_order (dict[int, dict[str, int]]) – The mapping from element symbols to their desired sort index.
normalize (bool, optional) – Whether to normalize the parsed formula, by default False.

Returns:

The sorted formula string.

Return type:

str

Examples

>>> formula = "BLi"
>>> custom_labels = {
...     2: {
...         "A": ["Li", "Er"],
...         "B": ["B", "In"],
...     },
...     3: {
...         "R": ["Er"],
...         "M": ["Co"],
...         "X": ["In"],
...     },
...     4: {
...         "A": ["Er"],
...         "B": ["Co"],
...         "C": ["In"],
...         "D": ["U"],
...     },
... }
>>> sorted_formula = sort(formula, custom_labels)
>>> print(sorted_formula)
LiB

sort_by_elemental_property(property_data: dict[str, float], ascending=True, normalize=False) → str[source]

Sort the elements in a chemical formula based on a specified CAF property.

Parameters:

formula (str) – The chemical formula to be sorted.
property_data (dict[str, float]) – The dictionary that contains the single value for each element of the given formula.
ascending (bool, optional) – Whether to sort in ascending order. Defaults to True.
normalize (bool, optional) – Whether to normalize the formula before sorting. Defaults to False.

Returns:

The formula string with elements sorted according to the specified property.

Return type:

str

Examples

#FIXME: Double check this example >>> from bobleesj.utils.sources.oliynyk import Oliynyk >>> from bobleesj.utils.sources.oliynyk import Property as P >>> formula = “LiFe” >>> oliynyk = Oliynyk() >>> prop_data = oliynyk.get_property_data_for_formula(formula, P.AW) >>> Formula(formula).sort(“LiFe”, prop_data) “LiFe” #FIXME: TEST THIS EXAMPLES

sort_by_stoichiometry(property_data: dict[slice(<class 'str'>, <class 'float'>, None)], ascending=True, normalize=False) → str[source]

Sort the elements in the chemical formula based on their composition.

When there are more than one element with the same compsition, the Mendeleev number is used to break the tie. During the tie, the Mendeleev number is always sorted in ascending order.

Parameters:

property_data (dict[str: float]) – The data to sort with when when stoichiometric raito is the same. The default value is optional that if no data provided, then we will simply sort alphabetically from a to z.
ascending (bool, optional) – Whether to sort in ascending order. Defaults to True.
normalize (bool, optional) – Whether to normalize the formula before sorting. Defaults to False.

Returns:

The formula string with elements sorted according to the specified property.

Return type:

str

Examples

>>> sort("LiNa2B", db)
"LiBNa2"