Skip to content

Commit e609434

Browse files
committed
Remove generateTriplets.py to move to next PR
1 parent 61b1e65 commit e609434

File tree

4 files changed

+33
-309
lines changed

4 files changed

+33
-309
lines changed

llvm/docs/CommandGuide/llvm-ir2vec.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ Triplet Generation Mode
5555

5656
In triplet mode, :program:`llvm-ir2vec` analyzes LLVM IR and extracts numeric
5757
triplets consisting of opcode IDs, type IDs, and operand IDs. These triplets
58-
are generated in train2id format. The tool outputs numeric IDs directly using
59-
the ir2vec::Vocabulary mapping infrastructure, eliminating the need for
60-
string-to-ID preprocessing.
58+
are generated in the standard format used for knowledge graph embedding training.
59+
The tool outputs numeric IDs directly using the ir2vec::Vocabulary mapping
60+
infrastructure, eliminating the need for string-to-ID preprocessing.
6161

6262
Usage:
6363

@@ -69,9 +69,9 @@ Entity Mapping Generation Mode
6969
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7070

7171
In entity mode, :program:`llvm-ir2vec` generates the entity mappings supported by
72-
IR2Vec in entity2id format. This mode outputs all supported entities (opcodes,
73-
types, and operands) with their corresponding numeric IDs, and is not specific for
74-
an LLVM IR file.
72+
IR2Vec in the standard format used for knowledge graph embedding training. This
73+
mode outputs all supported entities (opcodes, types, and operands) with their
74+
corresponding numeric IDs, and is not specific for an LLVM IR file.
7575

7676
Usage:
7777

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; Test error handling and input validation for llvm-ir2vec tool
2+
3+
; RUN: not llvm-ir2vec --mode=embeddings %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-VOCAB
4+
5+
; RUN: not llvm-ir2vec --mode=embeddings --function=nonexistent --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-NOT-FOUND
6+
7+
; RUN: llvm-ir2vec --mode=triplets --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json --level=inst %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-LEVEL
8+
; RUN: llvm-ir2vec --mode=entities --level=inst %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-LEVEL
9+
10+
; RUN: llvm-ir2vec --mode=triplets --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json --function=dummy %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-FUNC
11+
; RUN: llvm-ir2vec --mode=entities --function=dummy %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-FUNC
12+
13+
; Simple test function for valid IR
14+
define i32 @test_func(i32 %a) {
15+
entry:
16+
ret i32 %a
17+
}
18+
19+
; CHECK-NO-VOCAB: error: IR2Vec vocabulary file path not specified; You may need to set it using --ir2vec-vocab-path
20+
; CHECK-FUNC-NOT-FOUND: Error: Function 'nonexistent' not found
21+
; CHECK-UNUSED-LEVEL: Warning: --level option is ignored
22+
; CHECK-UNUSED-FUNC: Warning: --function option is ignored

llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ class IR2VecTool {
128128
// option
129129
MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
130130
MAM.registerPass([&] { return IR2VecVocabAnalysis(); });
131+
// This will throw an error if vocab is not found or invalid
131132
Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M);
132133
return Vocab->isValid();
133134
}
@@ -196,7 +197,7 @@ class IR2VecTool {
196197
<< OperandStr << '\t' << "Arg" << ArgIndex << '\n';
197198
});
198199

199-
ArgIndex++;
200+
++ArgIndex;
200201
}
201202
// Only update MaxRelation if there were operands
202203
if (ArgIndex > 0) {
@@ -293,11 +294,9 @@ Error processModule(Module &M, raw_ostream &OS) {
293294
if (Mode == EmbeddingMode) {
294295
// Initialize vocabulary for embedding generation
295296
// Note: Requires --ir2vec-vocab-path option to be set
296-
if (!Tool.initializeVocabulary())
297-
return createStringError(
298-
errc::invalid_argument,
299-
"Failed to initialize IR2Vec vocabulary. "
300-
"Make sure to specify --ir2vec-vocab-path for embedding mode.");
297+
auto VocabStatus = Tool.initializeVocabulary();
298+
assert(VocabStatus && "Failed to initialize IR2Vec vocabulary");
299+
(void)VocabStatus;
301300

302301
if (!FunctionName.empty()) {
303302
// Process single function
@@ -335,12 +334,6 @@ int main(int argc, char **argv) {
335334
"See https://llvm.org/docs/CommandGuide/llvm-ir2vec.html for more "
336335
"information.\n");
337336

338-
// Validate input file requirement
339-
if (InputFilename.empty() && Mode != EntityMode) {
340-
errs() << "Error: Input file (.bc/.ll) or stdin (-) is required\n";
341-
return 1;
342-
}
343-
344337
// Validate command line options
345338
if (Mode != EmbeddingMode) {
346339
if (Level.getNumOccurrences() > 0)

llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py

Lines changed: 0 additions & 291 deletions
This file was deleted.

0 commit comments

Comments
 (0)