|  | 
| 3 | 3 | import java.io.IOException; | 
| 4 | 4 | import java.nio.file.Files; | 
| 5 | 5 | import java.nio.file.Path; | 
| 6 |  | -import java.util.ArrayList; | 
| 7 | 6 | import java.util.List; | 
| 8 | 7 | import java.util.Optional; | 
| 9 | 8 | import java.util.regex.Matcher; | 
|  | 
| 19 | 18 |  * Parses the contents of a .blg (BibTeX log) file to extract warning messages. | 
| 20 | 19 |  */ | 
| 21 | 20 | public class BibtexLogParser { | 
| 22 |  | -    private static final Pattern WARNING_PATTERN = Pattern.compile("^Warning--(?<message>[a-zA-Z ]+) in (?<entryKey>[^\\s]+)$"); | 
|  | 21 | +    private static final Pattern BIBTEX_WARNING_PATTERN = Pattern.compile("^Warning--(?<message>[a-zA-Z ]+) in (?<entryKey>[^\\s]+)$"); | 
|  | 22 | +    private static final Pattern BIBLATEX_WARNING_PATTERN = Pattern.compile( | 
|  | 23 | +            "(?:(?:\\[\\d+\\] )?Biber\\.pm:\\d+> )?WARN - Datamodel: [a-z]+ entry '(?<entryKey>[^']+)' \\((?<fileName>[^)]+)\\): (?<message>.+)"); | 
|  | 24 | + | 
| 23 | 25 |     private static final String EMPTY_FIELD_PREFIX = "empty"; | 
|  | 26 | +    private static final String INVALID_FIELD_PREFIX = "field '"; | 
|  | 27 | +    private static final String MULTI_INVALID_FIELD_PREFIX = "field - one of '"; | 
| 24 | 28 | 
 | 
| 25 | 29 |     public List<BibWarning> parseBiblog(@NonNull Path blgFilePath) throws IOException { | 
| 26 |  | -        List<BibWarning> warnings = new ArrayList<>(); | 
| 27 |  | -        List<String> lines = Files.readAllLines(blgFilePath); | 
| 28 |  | -        for (String line : lines) { | 
| 29 |  | -            Optional<BibWarning> potentialWarning = parseWarningLine(line); | 
| 30 |  | -            potentialWarning.ifPresent(warnings::add); | 
| 31 |  | -        } | 
| 32 |  | -        return warnings; | 
|  | 30 | +        return Files.lines(blgFilePath) | 
|  | 31 | +                    .map(this::parseWarningLine) | 
|  | 32 | +                    .flatMap(Optional::stream) | 
|  | 33 | +                    .toList(); | 
| 33 | 34 |     } | 
| 34 | 35 | 
 | 
| 35 |  | -    /** | 
| 36 |  | -     * Parses a single line from the .blg file to identify a warning. | 
| 37 |  | -     * <p> | 
| 38 |  | -     * Currently supports parsing warnings of the format: | 
| 39 |  | -     * <pre> | 
| 40 |  | -     * Warning--[message] in [entryKey] | 
| 41 |  | -     * </pre> | 
| 42 |  | -     * For example: {@code Warning--empty journal in Scholey_2013} | 
| 43 |  | -     * | 
| 44 |  | -     * @param line a single line from the .blg file | 
| 45 |  | -     * @return an Optional containing a {@link BibWarning} if a match is found, or empty otherwise | 
| 46 |  | -     */ | 
| 47 |  | -    private Optional<BibWarning> parseWarningLine(String line) { | 
| 48 |  | -        // TODO: Support additional warning formats | 
| 49 |  | -        Matcher matcher = WARNING_PATTERN.matcher(line); | 
| 50 |  | -        if (!matcher.find()) { | 
| 51 |  | -            return Optional.empty(); | 
|  | 36 | +    /// Parses a single line from a .blg file to identify a warning. | 
|  | 37 | +    /// | 
|  | 38 | +    /// This method supports two warning formats: | 
|  | 39 | +    /// | 
|  | 40 | +    /// 1.  **BibTeX Warnings:** Simple warnings from the legacy BibTeX backend. | 
|  | 41 | +    ///     `Warning--[message] in [entryKey]` | 
|  | 42 | +    ///     For example: `Warning--empty journal in Scholey_2013` | 
|  | 43 | +    /// | 
|  | 44 | +    /// 2.  **BibLaTeX Datamodel Warnings:** Detailed warnings from the Biber backend, including datamodel validation issues. | 
|  | 45 | +    ///     `[Log line] > WARN - Datamodel: [entry type] entry '[entryKey]' ([fileName]): [message]` | 
|  | 46 | +    ///     For example: `Biber.pm:123> WARN - Datamodel: article entry 'Scholey_2013' (file.bib): Invalid field 'journal'` | 
|  | 47 | +    /// | 
|  | 48 | +    /// @param line The single line from the .blg file to parse. | 
|  | 49 | +    /// | 
|  | 50 | +    /// @returns An `Optional` containing a `BibWarning` if a match is found, or an empty `Optional` otherwise. | 
|  | 51 | +    Optional<BibWarning> parseWarningLine(String line) { | 
|  | 52 | +        Matcher bibtexMatcher = BIBTEX_WARNING_PATTERN.matcher(line); | 
|  | 53 | +        if (bibtexMatcher.find()) { | 
|  | 54 | +            String message = bibtexMatcher.group("message").trim(); | 
|  | 55 | +            String entryKey = bibtexMatcher.group("entryKey"); | 
|  | 56 | +            // Extract field name for warnings related to empty fields  (e.g., "empty journal" -> fieldName = "journal") | 
|  | 57 | +            String fieldName = null; | 
|  | 58 | +            if (message.startsWith(EMPTY_FIELD_PREFIX)) { | 
|  | 59 | +                fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim(); | 
|  | 60 | +                fieldName = FieldFactory.parseField(fieldName).getName(); | 
|  | 61 | +            } | 
|  | 62 | + | 
|  | 63 | +            return Optional.of(new BibWarning( | 
|  | 64 | +                    SeverityType.WARNING, | 
|  | 65 | +                    message, | 
|  | 66 | +                    fieldName, | 
|  | 67 | +                    entryKey | 
|  | 68 | +            )); | 
| 52 | 69 |         } | 
| 53 | 70 | 
 | 
| 54 |  | -        String message = matcher.group("message").trim(); | 
| 55 |  | -        String entryKey = matcher.group("entryKey"); | 
| 56 |  | -        // Extract field name for warnings related to empty fields  (e.g., "empty journal" -> fieldName = "journal") | 
| 57 |  | -        String fieldName = null; | 
| 58 |  | -        if (message.startsWith(EMPTY_FIELD_PREFIX)) { | 
| 59 |  | -            fieldName = message.substring(EMPTY_FIELD_PREFIX.length()).trim(); | 
| 60 |  | -            fieldName = FieldFactory.parseField(fieldName).getName(); | 
|  | 71 | +        Matcher biblatexMatcher = BIBLATEX_WARNING_PATTERN.matcher(line); | 
|  | 72 | +        if (biblatexMatcher.find()) { | 
|  | 73 | +            String message = biblatexMatcher.group("message").trim(); | 
|  | 74 | +            String entryKey = biblatexMatcher.group("entryKey"); | 
|  | 75 | +            String fieldName = null; | 
|  | 76 | + | 
|  | 77 | +            // Extract field name for warnings related to invalid fields (e.g., "Invalid field 'publisher' for entrytype 'article'" -> fieldName = "publisher") | 
|  | 78 | +            String lowerCaseMessage = message.toLowerCase(); | 
|  | 79 | +            if (lowerCaseMessage.contains(INVALID_FIELD_PREFIX)) { | 
|  | 80 | +                int startIndex = lowerCaseMessage.indexOf(INVALID_FIELD_PREFIX) + INVALID_FIELD_PREFIX.length(); | 
|  | 81 | +                int endIndex = lowerCaseMessage.indexOf('\'', startIndex); | 
|  | 82 | +                if (endIndex != -1) { | 
|  | 83 | +                    fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim(); | 
|  | 84 | +                    fieldName = FieldFactory.parseField(fieldName).getName(); | 
|  | 85 | +                } | 
|  | 86 | +            } else if (lowerCaseMessage.contains(MULTI_INVALID_FIELD_PREFIX)) { | 
|  | 87 | +                int startIndex = lowerCaseMessage.indexOf(MULTI_INVALID_FIELD_PREFIX) + MULTI_INVALID_FIELD_PREFIX.length(); | 
|  | 88 | +                int endIndex = lowerCaseMessage.indexOf('\'', startIndex); | 
|  | 89 | +                if (endIndex != -1) { | 
|  | 90 | +                    fieldName = lowerCaseMessage.substring(startIndex, endIndex).trim().split(",")[0].trim(); | 
|  | 91 | +                    fieldName = FieldFactory.parseField(fieldName).getName(); | 
|  | 92 | +                } | 
|  | 93 | +            } | 
|  | 94 | + | 
|  | 95 | +            return Optional.of(new BibWarning( | 
|  | 96 | +                    SeverityType.WARNING, | 
|  | 97 | +                    message, | 
|  | 98 | +                    fieldName, | 
|  | 99 | +                    entryKey | 
|  | 100 | +            )); | 
| 61 | 101 |         } | 
| 62 | 102 | 
 | 
| 63 |  | -        return Optional.of(new BibWarning( | 
| 64 |  | -                SeverityType.WARNING, | 
| 65 |  | -                message, | 
| 66 |  | -                fieldName, | 
| 67 |  | -                entryKey | 
| 68 |  | -        )); | 
|  | 103 | +        return Optional.empty(); | 
| 69 | 104 |     } | 
| 70 | 105 | } | 
0 commit comments