CompRhys · janosh · Jun 16, 2022 · Apr 20, 2022 · Apr 25, 2022 · Apr 25, 2022
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -18,16 +18,15 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v3
         with:
-          python-version: 3.7
+          python-version: 3.8
           cache: pip
           cache-dependency-path: setup.py
 
       - name: Install dependencies
         run: |
-          pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cpu.html
+          pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cpu.html
           pip install .[test]
-          cat aviary.egg-info/SOURCES.txt
 
       - name: Run Tests
         run: python -m pytest --capture=no --cov aviary
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,10 @@ datasets/
 pds/
 manuscript/
 voro-thesis/
+
+# MatBench run artifacts like model preds, checkpoints, metrics and slurm job logs
+examples/mat_bench/model_preds
+examples/mat_bench/model_scores
+examples/mat_bench/checkpoints
+job-logs*
+wandb
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -40,7 +40,6 @@ repos:
     hooks:
     - id: codespell
       exclude_types: [json]
-      args: [--ignore-words-list, 'hist,ba']
 
   - repo: https://github.com/psf/black
     rev: 22.3.0
@@ -51,7 +50,7 @@ repos:
     rev: v0.942
     hooks:
       - id: mypy
-        exclude: (tests|examples)
+        exclude: (tests|examples)/
 
   - repo: https://github.com/myint/autoflake
     rev: v1.4
@@ -68,11 +67,9 @@ repos:
     rev: 6.1.1
     hooks:
       - id: pydocstyle
-        # D100: Missing docstring in public module
-        # D104: Missing docstring in public package
-        # D105: Missing docstring in magic method
-        # D107: Missing docstring in __init__
-        # D205: 1 blank line required between summary line and description
-        # D415: First line should end with ., ? or !
-        args: [--convention=google, '--add-ignore=D100,D104,D105,D107,D205,D415']
-        exclude: (tests|examples)
+        exclude: (tests|examples)/
+
+  - repo: https://github.com/janosh/format-ipy-cells
+    rev: v0.1.10
+    hooks:
+      - id: format-ipy-cells
diff --git a/aviary/cgcnn/data.py b/aviary/cgcnn/data.py
@@ -168,9 +168,9 @@ def __getitem__(self, idx: int):
             - list[str | int]: identifiers like material_id, composition
         """
         # NOTE sites must be given in fractional coordinates
-        df_idx = self.df.iloc[idx]
-        crystal = df_idx["Structure_obj"]
-        cry_ids = df_idx[self.identifiers]
+        row = self.df.iloc[idx]
+        crystal = row["Structure_obj"]
+        material_ids = row[self.identifiers]
 
         # atom features for disordered sites
         site_atoms = [atom.species.as_dict() for atom in crystal]
@@ -187,11 +187,13 @@ def __getitem__(self, idx: int):
         self_idx, nbr_idx, nbr_dist = self._get_nbr_data(crystal)
 
         if not len(self_idx):
-            raise AssertionError(f"All atoms in {cry_ids} are isolated")
+            raise AssertionError(f"All atoms in {material_ids} are isolated")
         if not len(nbr_idx):
-            raise AssertionError(f"This should not be triggered but was for {cry_ids}")
+            raise AssertionError(
+                f"This should not be triggered but was for {material_ids}"
+            )
         if set(self_idx) != set(range(crystal.num_sites)):
-            raise AssertionError(f"At least one atom in {cry_ids} is isolated")
+            raise AssertionError(f"At least one atom in {material_ids} is isolated")
 
         nbr_dist = self.gdf.expand(nbr_dist)
 
@@ -203,14 +205,14 @@ def __getitem__(self, idx: int):
         targets: list[Tensor | LongTensor] = []
         for target, task_type in self.task_dict.items():
             if task_type == "regression":
-                targets.append(Tensor([df_idx[target]]))
+                targets.append(Tensor([row[target]]))
             elif task_type == "classification":
-                targets.append(LongTensor([df_idx[target]]))
+                targets.append(LongTensor([row[target]]))
 
         return (
             (atom_fea_t, nbr_dist_t, self_idx_t, nbr_idx_t),
             targets,
-            *cry_ids,
+            *material_ids,
         )
 
 

diff --git a/aviary/cgcnn/model.py b/aviary/cgcnn/model.py
@@ -7,7 +7,7 @@
 from torch_scatter import scatter_add, scatter_mean
 
 from aviary.core import BaseModelClass
-from aviary.segments import SimpleNetwork
+from aviary.networks import SimpleNetwork
 
 
 class CrystalGraphConvNet(BaseModelClass):
@@ -36,7 +36,10 @@ def __init__(
         """Initialize CrystalGraphConvNet.
 
         Args:
-            robust (bool): Whether to estimate standard deviation for use in a robust loss function
+            robust (bool): If True, the number of model outputs is doubled. 2nd output for each
+                target will be an estimate for the aleatoric uncertainty (uncertainty inherent to
+                the sample) which can be used with a robust loss function to attenuate the weighting
+                of uncertain samples.
             n_targets (list[int]): Number of targets to train on
             elem_emb_len (int): Number of atom features in the input.
             nbr_fea_len (int): Number of bond features.