|
6 | 6 | # |
7 | 7 | from __future__ import annotations |
8 | 8 |
|
9 | | -import abc |
10 | 9 | import argparse |
11 | 10 | import ast |
12 | 11 | import builtins as bltns |
13 | | -import collections |
14 | 12 | import contextlib |
15 | 13 | import dataclasses as dc |
16 | 14 | import enum |
|
57 | 55 | ClassDict, ModuleDict, FunctionKind, |
58 | 56 | CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW, |
59 | 57 | GETTER, SETTER) |
| 58 | +from libclinic.language import Language, PythonLanguage |
| 59 | +from libclinic.block_parser import Block, BlockParser |
60 | 60 |
|
61 | 61 |
|
62 | 62 | # TODO: |
@@ -144,96 +144,6 @@ def __init__(self) -> None: |
144 | 144 | self.unlock: list[str] = [] |
145 | 145 |
|
146 | 146 |
|
147 | | -class Language(metaclass=abc.ABCMeta): |
148 | | - |
149 | | - start_line = "" |
150 | | - body_prefix = "" |
151 | | - stop_line = "" |
152 | | - checksum_line = "" |
153 | | - |
154 | | - def __init__(self, filename: str) -> None: |
155 | | - self.filename = filename |
156 | | - |
157 | | - @abc.abstractmethod |
158 | | - def render( |
159 | | - self, |
160 | | - clinic: Clinic, |
161 | | - signatures: Iterable[Module | Class | Function] |
162 | | - ) -> str: |
163 | | - ... |
164 | | - |
165 | | - def parse_line(self, line: str) -> None: |
166 | | - ... |
167 | | - |
168 | | - def validate(self) -> None: |
169 | | - def assert_only_one( |
170 | | - attr: str, |
171 | | - *additional_fields: str |
172 | | - ) -> None: |
173 | | - """ |
174 | | - Ensures that the string found at getattr(self, attr) |
175 | | - contains exactly one formatter replacement string for |
176 | | - each valid field. The list of valid fields is |
177 | | - ['dsl_name'] extended by additional_fields. |
178 | | -
|
179 | | - e.g. |
180 | | - self.fmt = "{dsl_name} {a} {b}" |
181 | | -
|
182 | | - # this passes |
183 | | - self.assert_only_one('fmt', 'a', 'b') |
184 | | -
|
185 | | - # this fails, the format string has a {b} in it |
186 | | - self.assert_only_one('fmt', 'a') |
187 | | -
|
188 | | - # this fails, the format string doesn't have a {c} in it |
189 | | - self.assert_only_one('fmt', 'a', 'b', 'c') |
190 | | -
|
191 | | - # this fails, the format string has two {a}s in it, |
192 | | - # it must contain exactly one |
193 | | - self.fmt2 = '{dsl_name} {a} {a}' |
194 | | - self.assert_only_one('fmt2', 'a') |
195 | | -
|
196 | | - """ |
197 | | - fields = ['dsl_name'] |
198 | | - fields.extend(additional_fields) |
199 | | - line: str = getattr(self, attr) |
200 | | - fcf = libclinic.FormatCounterFormatter() |
201 | | - fcf.format(line) |
202 | | - def local_fail(should_be_there_but_isnt: bool) -> None: |
203 | | - if should_be_there_but_isnt: |
204 | | - fail("{} {} must contain {{{}}} exactly once!".format( |
205 | | - self.__class__.__name__, attr, name)) |
206 | | - else: |
207 | | - fail("{} {} must not contain {{{}}}!".format( |
208 | | - self.__class__.__name__, attr, name)) |
209 | | - |
210 | | - for name, count in fcf.counts.items(): |
211 | | - if name in fields: |
212 | | - if count > 1: |
213 | | - local_fail(True) |
214 | | - else: |
215 | | - local_fail(False) |
216 | | - for name in fields: |
217 | | - if fcf.counts.get(name) != 1: |
218 | | - local_fail(True) |
219 | | - |
220 | | - assert_only_one('start_line') |
221 | | - assert_only_one('stop_line') |
222 | | - |
223 | | - field = "arguments" if "{arguments}" in self.checksum_line else "checksum" |
224 | | - assert_only_one('checksum_line', field) |
225 | | - |
226 | | - |
227 | | - |
228 | | -class PythonLanguage(Language): |
229 | | - |
230 | | - language = 'Python' |
231 | | - start_line = "#/*[{dsl_name} input]" |
232 | | - body_prefix = "#" |
233 | | - stop_line = "#[{dsl_name} start generated code]*/" |
234 | | - checksum_line = "#/*[{dsl_name} end generated code: {arguments}]*/" |
235 | | - |
236 | | - |
237 | 147 | ParamTuple = tuple["Parameter", ...] |
238 | 148 |
|
239 | 149 |
|
@@ -1646,250 +1556,6 @@ def render_function( |
1646 | 1556 | return clinic.get_destination('block').dump() |
1647 | 1557 |
|
1648 | 1558 |
|
1649 | | -@dc.dataclass(slots=True, repr=False) |
1650 | | -class Block: |
1651 | | - r""" |
1652 | | - Represents a single block of text embedded in |
1653 | | - another file. If dsl_name is None, the block represents |
1654 | | - verbatim text, raw original text from the file, in |
1655 | | - which case "input" will be the only non-false member. |
1656 | | - If dsl_name is not None, the block represents a Clinic |
1657 | | - block. |
1658 | | -
|
1659 | | - input is always str, with embedded \n characters. |
1660 | | - input represents the original text from the file; |
1661 | | - if it's a Clinic block, it is the original text with |
1662 | | - the body_prefix and redundant leading whitespace removed. |
1663 | | -
|
1664 | | - dsl_name is either str or None. If str, it's the text |
1665 | | - found on the start line of the block between the square |
1666 | | - brackets. |
1667 | | -
|
1668 | | - signatures is a list. |
1669 | | - It may only contain clinic.Module, clinic.Class, and |
1670 | | - clinic.Function objects. At the moment it should |
1671 | | - contain at most one of each. |
1672 | | -
|
1673 | | - output is either str or None. If str, it's the output |
1674 | | - from this block, with embedded '\n' characters. |
1675 | | -
|
1676 | | - indent is a str. It's the leading whitespace |
1677 | | - that was found on every line of input. (If body_prefix is |
1678 | | - not empty, this is the indent *after* removing the |
1679 | | - body_prefix.) |
1680 | | -
|
1681 | | - "indent" is different from the concept of "preindent" |
1682 | | - (which is not stored as state on Block objects). |
1683 | | - "preindent" is the whitespace that |
1684 | | - was found in front of every line of input *before* the |
1685 | | - "body_prefix" (see the Language object). If body_prefix |
1686 | | - is empty, preindent must always be empty too. |
1687 | | -
|
1688 | | - To illustrate the difference between "indent" and "preindent": |
1689 | | -
|
1690 | | - Assume that '_' represents whitespace. |
1691 | | - If the block processed was in a Python file, and looked like this: |
1692 | | - ____#/*[python] |
1693 | | - ____#__for a in range(20): |
1694 | | - ____#____print(a) |
1695 | | - ____#[python]*/ |
1696 | | - "preindent" would be "____" and "indent" would be "__". |
1697 | | -
|
1698 | | - """ |
1699 | | - input: str |
1700 | | - dsl_name: str | None = None |
1701 | | - signatures: list[Module | Class | Function] = dc.field(default_factory=list) |
1702 | | - output: Any = None # TODO: Very dynamic; probably untypeable in its current form? |
1703 | | - indent: str = '' |
1704 | | - |
1705 | | - def __repr__(self) -> str: |
1706 | | - dsl_name = self.dsl_name or "text" |
1707 | | - def summarize(s: object) -> str: |
1708 | | - s = repr(s) |
1709 | | - if len(s) > 30: |
1710 | | - return s[:26] + "..." + s[0] |
1711 | | - return s |
1712 | | - parts = ( |
1713 | | - repr(dsl_name), |
1714 | | - f"input={summarize(self.input)}", |
1715 | | - f"output={summarize(self.output)}" |
1716 | | - ) |
1717 | | - return f"<clinic.Block {' '.join(parts)}>" |
1718 | | - |
1719 | | - |
1720 | | -class BlockParser: |
1721 | | - """ |
1722 | | - Block-oriented parser for Argument Clinic. |
1723 | | - Iterator, yields Block objects. |
1724 | | - """ |
1725 | | - |
1726 | | - def __init__( |
1727 | | - self, |
1728 | | - input: str, |
1729 | | - language: Language, |
1730 | | - *, |
1731 | | - verify: bool = True |
1732 | | - ) -> None: |
1733 | | - """ |
1734 | | - "input" should be a str object |
1735 | | - with embedded \n characters. |
1736 | | -
|
1737 | | - "language" should be a Language object. |
1738 | | - """ |
1739 | | - language.validate() |
1740 | | - |
1741 | | - self.input = collections.deque(reversed(input.splitlines(keepends=True))) |
1742 | | - self.block_start_line_number = self.line_number = 0 |
1743 | | - |
1744 | | - self.language = language |
1745 | | - before, _, after = language.start_line.partition('{dsl_name}') |
1746 | | - assert _ == '{dsl_name}' |
1747 | | - self.find_start_re = libclinic.create_regex(before, after, |
1748 | | - whole_line=False) |
1749 | | - self.start_re = libclinic.create_regex(before, after) |
1750 | | - self.verify = verify |
1751 | | - self.last_checksum_re: re.Pattern[str] | None = None |
1752 | | - self.last_dsl_name: str | None = None |
1753 | | - self.dsl_name: str | None = None |
1754 | | - self.first_block = True |
1755 | | - |
1756 | | - def __iter__(self) -> BlockParser: |
1757 | | - return self |
1758 | | - |
1759 | | - def __next__(self) -> Block: |
1760 | | - while True: |
1761 | | - if not self.input: |
1762 | | - raise StopIteration |
1763 | | - |
1764 | | - if self.dsl_name: |
1765 | | - try: |
1766 | | - return_value = self.parse_clinic_block(self.dsl_name) |
1767 | | - except ClinicError as exc: |
1768 | | - exc.filename = self.language.filename |
1769 | | - exc.lineno = self.line_number |
1770 | | - raise |
1771 | | - self.dsl_name = None |
1772 | | - self.first_block = False |
1773 | | - return return_value |
1774 | | - block = self.parse_verbatim_block() |
1775 | | - if self.first_block and not block.input: |
1776 | | - continue |
1777 | | - self.first_block = False |
1778 | | - return block |
1779 | | - |
1780 | | - |
1781 | | - def is_start_line(self, line: str) -> str | None: |
1782 | | - match = self.start_re.match(line.lstrip()) |
1783 | | - return match.group(1) if match else None |
1784 | | - |
1785 | | - def _line(self, lookahead: bool = False) -> str: |
1786 | | - self.line_number += 1 |
1787 | | - line = self.input.pop() |
1788 | | - if not lookahead: |
1789 | | - self.language.parse_line(line) |
1790 | | - return line |
1791 | | - |
1792 | | - def parse_verbatim_block(self) -> Block: |
1793 | | - lines = [] |
1794 | | - self.block_start_line_number = self.line_number |
1795 | | - |
1796 | | - while self.input: |
1797 | | - line = self._line() |
1798 | | - dsl_name = self.is_start_line(line) |
1799 | | - if dsl_name: |
1800 | | - self.dsl_name = dsl_name |
1801 | | - break |
1802 | | - lines.append(line) |
1803 | | - |
1804 | | - return Block("".join(lines)) |
1805 | | - |
1806 | | - def parse_clinic_block(self, dsl_name: str) -> Block: |
1807 | | - in_lines = [] |
1808 | | - self.block_start_line_number = self.line_number + 1 |
1809 | | - stop_line = self.language.stop_line.format(dsl_name=dsl_name) |
1810 | | - body_prefix = self.language.body_prefix.format(dsl_name=dsl_name) |
1811 | | - |
1812 | | - def is_stop_line(line: str) -> bool: |
1813 | | - # make sure to recognize stop line even if it |
1814 | | - # doesn't end with EOL (it could be the very end of the file) |
1815 | | - if line.startswith(stop_line): |
1816 | | - remainder = line.removeprefix(stop_line) |
1817 | | - if remainder and not remainder.isspace(): |
1818 | | - fail(f"Garbage after stop line: {remainder!r}") |
1819 | | - return True |
1820 | | - else: |
1821 | | - # gh-92256: don't allow incorrectly formatted stop lines |
1822 | | - if line.lstrip().startswith(stop_line): |
1823 | | - fail(f"Whitespace is not allowed before the stop line: {line!r}") |
1824 | | - return False |
1825 | | - |
1826 | | - # consume body of program |
1827 | | - while self.input: |
1828 | | - line = self._line() |
1829 | | - if is_stop_line(line) or self.is_start_line(line): |
1830 | | - break |
1831 | | - if body_prefix: |
1832 | | - line = line.lstrip() |
1833 | | - assert line.startswith(body_prefix) |
1834 | | - line = line.removeprefix(body_prefix) |
1835 | | - in_lines.append(line) |
1836 | | - |
1837 | | - # consume output and checksum line, if present. |
1838 | | - if self.last_dsl_name == dsl_name: |
1839 | | - checksum_re = self.last_checksum_re |
1840 | | - else: |
1841 | | - before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}') |
1842 | | - assert _ == '{arguments}' |
1843 | | - checksum_re = libclinic.create_regex(before, after, word=False) |
1844 | | - self.last_dsl_name = dsl_name |
1845 | | - self.last_checksum_re = checksum_re |
1846 | | - assert checksum_re is not None |
1847 | | - |
1848 | | - # scan forward for checksum line |
1849 | | - out_lines = [] |
1850 | | - arguments = None |
1851 | | - while self.input: |
1852 | | - line = self._line(lookahead=True) |
1853 | | - match = checksum_re.match(line.lstrip()) |
1854 | | - arguments = match.group(1) if match else None |
1855 | | - if arguments: |
1856 | | - break |
1857 | | - out_lines.append(line) |
1858 | | - if self.is_start_line(line): |
1859 | | - break |
1860 | | - |
1861 | | - output: str | None |
1862 | | - output = "".join(out_lines) |
1863 | | - if arguments: |
1864 | | - d = {} |
1865 | | - for field in shlex.split(arguments): |
1866 | | - name, equals, value = field.partition('=') |
1867 | | - if not equals: |
1868 | | - fail(f"Mangled Argument Clinic marker line: {line!r}") |
1869 | | - d[name.strip()] = value.strip() |
1870 | | - |
1871 | | - if self.verify: |
1872 | | - if 'input' in d: |
1873 | | - checksum = d['output'] |
1874 | | - else: |
1875 | | - checksum = d['checksum'] |
1876 | | - |
1877 | | - computed = libclinic.compute_checksum(output, len(checksum)) |
1878 | | - if checksum != computed: |
1879 | | - fail("Checksum mismatch! " |
1880 | | - f"Expected {checksum!r}, computed {computed!r}. " |
1881 | | - "Suggested fix: remove all generated code including " |
1882 | | - "the end marker, or use the '-f' option.") |
1883 | | - else: |
1884 | | - # put back output |
1885 | | - output_lines = output.splitlines(keepends=True) |
1886 | | - self.line_number -= len(output_lines) |
1887 | | - self.input.extend(reversed(output_lines)) |
1888 | | - output = None |
1889 | | - |
1890 | | - return Block("".join(in_lines), dsl_name, output=output) |
1891 | | - |
1892 | | - |
1893 | 1559 | @dc.dataclass(slots=True, frozen=True) |
1894 | 1560 | class Include: |
1895 | 1561 | """ |
|
0 commit comments