Skip to content

Commit e86f0ee

Browse files
authored
Merge pull request #218 from KhiopsML/209-openmpi-path-issue-with-khiops-1022-on-debian-like-os
209 openmpi path issue with khiops 1022 on debian like os
2 parents 5f31375 + 33586e0 commit e86f0ee

File tree

5 files changed

+48
-39
lines changed

5 files changed

+48
-39
lines changed

.github/workflows/pip.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,11 @@ jobs:
9393
kh-samples sklearn -i khiops_classifier -e
9494
9595
# Test that the line containing "MPI command" also contains
96-
# "mpiexec", which means that `mpiexec` has been found
97-
kh-status | grep "MPI command" | grep -wq mpiexec
96+
# an executable name under a /bin directory
97+
# Note: this executable name can be different, depending on the MPI
98+
# backend and OS; for instance, "orterun" for OpenMPI on Ubuntu Linux, but
99+
# "mpiexec" for OpenMPI on Rocky Linux
100+
kh-status | grep "MPI command" | grep -Ewq "(/.+?)/bin/.+"
98101
release:
99102
if: github.ref_type == 'tag'
100103
needs: [build, test]

.github/workflows/unit-tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ jobs:
9999
OMPI_MCA_rmaps_base_oversubscribe: true
100100
PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe
101101
run: |
102+
# Make sure '/bin' is before '/usr/bin' in PATH
103+
PATH=$(echo "/bin:"$PATH | sed 's#:/bin##')
104+
102105
# This is needed so that the Git tag is parsed and the khiops-python
103106
# version is retrieved
104107
git config --global --add safe.directory $(realpath .)
@@ -177,6 +180,9 @@ jobs:
177180
# Force > 2 CPU cores to launch mpiexec
178181
KHIOPS_PROC_NUMBER: 4
179182
run: |-
183+
# Make sure '/bin' is before '/usr/bin' in PATH
184+
PATH=$(echo "/bin:"$PATH | sed 's#:/bin##')
185+
180186
# Make sure MPI support is not loaded through env modules
181187
# Note: As Docker container's shell is non-interactive, environment
182188
# modules are currently not initializing the shell anyway

khiops/core/internals/runner.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,8 +1146,12 @@ def _initialize_mpi_command_args(self):
11461146
installation_method = _infer_khiops_installation_method()
11471147
# In Conda-based, but non-Conda environment, specify mpiexec path
11481148
if installation_method == "conda-based":
1149-
mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or os.path.join(
1150-
_infer_env_bin_dir_for_conda_based_installations(), "mpiexec"
1149+
# Python `os.path.realpath` resolves symlinks recursively, like GNU
1150+
# `readlink -f`; Python `os.readlink` does not
1151+
mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or os.path.realpath(
1152+
os.path.join(
1153+
_infer_env_bin_dir_for_conda_based_installations(), "mpiexec"
1154+
)
11511155
)
11521156
if platform.system() == "Windows" and not os.path.splitext(mpiexec_path):
11531157
mpiexec_path += ".exe"
@@ -1165,8 +1169,11 @@ def _initialize_mpi_command_args(self):
11651169
)
11661170
# In Conda or local installations, expect mpiexec in the PATH
11671171
else:
1168-
mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or shutil.which(
1169-
"mpiexec"
1172+
link_to_mpiexec = shutil.which("mpiexec")
1173+
mpiexec_path = (
1174+
os.environ.get("KHIOPS_MPIEXEC_PATH")
1175+
or link_to_mpiexec
1176+
and os.path.realpath(link_to_mpiexec)
11701177
)
11711178
# If mpiexec is not in the path, and the installation method is local,
11721179
# then try to load MPI environment module so that mpiexec is in the path

khiops/sklearn/tables.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,32 +1441,23 @@ def _write_sparse_block(self, row_index, stream, target=None):
14411441
assert target in self.target_column, "'target' must be in the target column"
14421442
stream.write(f"{target}\t")
14431443
row = self.matrix.getrow(row_index)
1444-
# Empty row in the sparse matrix: use the first variable as missing data
1445-
# TODO: remove this part once Khiops bug
1446-
# https://github.com/KhiopsML/khiops/issues/235 is solved
1447-
if row.size == 0:
1448-
for variable_index in self.column_ids:
1449-
stream.write(f"{variable_index + 1}: ")
1450-
break
1451-
# Non-empty row in the sparse matrix: get non-missing data
1452-
else:
1453-
# Variable indices are not always sorted in `row.indices`
1454-
# Khiops needs variable indices to be sorted
1455-
sorted_indices = np.sort(row.nonzero()[1], axis=-1, kind="mergesort")
1456-
1457-
# Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
1458-
# is not homogeneous with other sparse matrices: it stores
1459-
# opaque Python lists as elements
1460-
# Thus:
1461-
# - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
1462-
# row.data is np.array([list([...])])
1463-
# - else, row.data is np.array([...])
1464-
# TODO: remove this flattening once Python 3.8 support is dropped
1465-
sorted_data = np.fromiter(self._flatten(row.data), row.data.dtype)[
1466-
sorted_indices.argsort()
1467-
]
1468-
for variable_index, variable_value in zip(sorted_indices, sorted_data):
1469-
stream.write(f"{variable_index + 1}:{variable_value} ")
1444+
# Variable indices are not always sorted in `row.indices`
1445+
# Khiops needs variable indices to be sorted
1446+
sorted_indices = np.sort(row.nonzero()[1], axis=-1, kind="mergesort")
1447+
1448+
# Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
1449+
# is not homogeneous with other sparse matrices: it stores
1450+
# opaque Python lists as elements
1451+
# Thus:
1452+
# - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
1453+
# row.data is np.array([list([...])])
1454+
# - else, row.data is np.array([...])
1455+
# TODO: remove this flattening once Python 3.8 support is dropped
1456+
sorted_data = np.fromiter(self._flatten(row.data), row.data.dtype)[
1457+
sorted_indices.argsort()
1458+
]
1459+
for variable_index, variable_value in zip(sorted_indices, sorted_data):
1460+
stream.write(f"{variable_index + 1}:{variable_value} ")
14701461
stream.write("\n")
14711462

14721463
def create_table_file_for_khiops(self, output_dir, sort=True):

tests/test_dataset_class.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -565,13 +565,15 @@ def _load_khiops_sparse_file(self, stream):
565565
target, features = line.split(b"\t")
566566
feature_row = np.zeros(100)
567567
for feature in features.strip().split(b" "):
568-
feature_index, feature_value = feature.split(b":")
569-
try:
570-
feature_value = float(feature_value)
571-
# missing value, whence empty string
572-
except ValueError:
573-
feature_value = 0.0
574-
feature_row[int(feature_index) - 1] = feature_value
568+
indexed_feature = feature.split(b":")
569+
570+
# Skip missing feature
571+
if len(indexed_feature) < 2:
572+
continue
573+
574+
# Set feature value in row at the specified index
575+
feature_index, feature_value = indexed_feature
576+
feature_row[int(feature_index) - 1] = float(feature_value)
575577
feature_matrix.append(feature_row)
576578
target_vector.append(float(target))
577579
target_array = np.array(target_vector)

0 commit comments

Comments
 (0)