[lex] Reorder subclauses to better follow phases of translation

AlisdairM · tkoeppe · commit 801fb2c0aaf6 · 2024-10-18T00:30:54.000+01:00
This PR purely moves existing words around, and does not create any new content.

The proposed subclause ordering is now:

* 5 Lexical convensions
  - 5.1 Separate translation
  - 5.2 Phases of translation
  - 5.3 Characters
    - 5.3.1 Character sets
    - 5.3.2 Universal character names
  - 5.4 Comments
  - 5.5 Preprocessing tokens
  - 5.6 Header names
  - 5.7 Preprocessing numbers
  - 5.8 Operators and punctuators
  - 5.9 Alternative tokens
  - 5.10 Tokens
  - 5.11 Identifiers
  - 5.12 Keywords
  - 5.13 Literals
    - 5.13.1 Kinds of literals
    - 5.13.2 ...
diff --git a/source/lex.tex b/source/lex.tex
@@ -627,83 +627,6 @@
 \end{example}
 \indextext{token!preprocessing|)}
 
-\rSec1[lex.digraph]{Alternative tokens}
-
-\pnum
-\indextext{token!alternative|(}%
-Alternative token representations are provided for some operators and
-punctuators.
-\begin{footnote}
-\indextext{digraph}%
-These include ``digraphs'' and additional reserved words. The term
-``digraph'' (token consisting of two characters) is not perfectly
-descriptive, since one of the alternative \grammarterm{preprocessing-token}s is
-\tcode{\%:\%:} and of course several primary tokens contain two
-characters. Nonetheless, those alternative tokens that aren't lexical
-keywords are colloquially known as ``digraphs''.
-\end{footnote}
-
-\pnum
-In all respects of the language, each alternative token behaves the
-same, respectively, as its primary token, except for its spelling.
-\begin{footnote}
-Thus the ``stringized'' values\iref{cpp.stringize} of
-\tcode{[} and \tcode{<:} will be different, maintaining the source
-spelling, but the tokens can otherwise be freely interchanged.
-\end{footnote}
-The set of alternative tokens is defined in
-\tref{lex.digraph}.
-
-\begin{tokentable}{Alternative tokens}{lex.digraph}{Alternative}{Primary}
-\tcode{<\%}             &   \tcode{\{}         &
-\keyword{and}           &   \tcode{\&\&}       &
-\keyword{and_eq}        &   \tcode{\&=}        \\ \rowsep
-\tcode{\%>}             &   \tcode{\}}         &
-\keyword{bitor}         &   \tcode{|}          &
-\keyword{or_eq}         &   \tcode{|=}         \\ \rowsep
-\tcode{<:}              &   \tcode{[}          &
-\keyword{or}            &   \tcode{||}         &
-\keyword{xor_eq}        &   \tcode{\caret=}    \\ \rowsep
-\tcode{:>}              &   \tcode{]}          &
-\keyword{xor}           &   \tcode{\caret}     &
-\keyword{not}           &   \tcode{!}          \\ \rowsep
-\tcode{\%:}             &   \tcode{\#}         &
-\keyword{compl}         &   \tcode{\~}         &
-\keyword{not_eq}        &   \tcode{!=}         \\ \rowsep
-\tcode{\%:\%:}          &   \tcode{\#\#}       &
-\keyword{bitand}        &   \tcode{\&}         &
-                        &                      \\
-\end{tokentable}%
-\indextext{token!alternative|)}
-
-\rSec1[lex.token]{Tokens}
-
-\indextext{token|(}%
-\begin{bnf}
-\nontermdef{token}\br
-    identifier\br
-    keyword\br
-    literal\br
-    operator-or-punctuator
-\end{bnf}
-
-\pnum
-\indextext{\idxgram{token}}%
-There are five kinds of tokens: identifiers, keywords, literals,%
-\begin{footnote}
-Literals include strings and character and numeric literals.
-\end{footnote}
-operators, and other separators.
-\indextext{whitespace}%
-Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments
-(collectively, ``whitespace''), as described below, are ignored except
-as they serve to separate tokens.
-\begin{note}
-Whitespace can separate otherwise adjacent identifiers, keywords, numeric
-literals, and alternative tokens containing alphabetic characters.
-\end{note}
-\indextext{token|)}
-
 \rSec1[lex.header]{Header names}
 
 \indextext{header!name|(}%
@@ -793,6 +716,124 @@
 a \grammarterm{floating-point-literal} token.%
 \indextext{number!preprocessing|)}
 
+\rSec1[lex.operators]{Operators and punctuators}
+
+\pnum
+\indextext{operator|(}%
+\indextext{punctuator|(}%
+The lexical representation of \Cpp{} programs includes a number of
+preprocessing tokens that are used in the syntax of the preprocessor or
+are converted into tokens for operators and punctuators:
+
+\begin{bnf}
+\nontermdef{preprocessing-op-or-punc}\br
+    preprocessing-operator\br
+    operator-or-punctuator
+\end{bnf}
+
+\begin{bnf}
+%% Ed. note: character protrusion would misalign various operators.
+\microtypesetup{protrusion=false}\obeyspaces
+\nontermdef{preprocessing-operator} \textnormal{one of}\br
+    \terminal{\#        \#\#       \%:       \%:\%:}
+\end{bnf}
+
+\begin{bnf}
+\microtypesetup{protrusion=false}\obeyspaces
+\nontermdef{operator-or-punctuator} \textnormal{one of}\br
+    \terminal{\{        \}        [        ]        (        )}\br
+    \terminal{<:       :>       <\%       \%>       ;        :        ...}\br
+    \terminal{?        ::       .        .*       ->       ->*      \~}\br
+    \terminal{!        +        -        *        /        \%        \caret{}        \&        |}\br
+    \terminal{=        +=       -=       *=       /=       \%=       \caret{}=       \&=       |=}\br
+    \terminal{==       !=       <        >        <=       >=       <=>      \&\&       ||}\br
+    \terminal{<<       >>       <<=      >>=      ++       --       ,}\br
+    \terminal{\keyword{and}      \keyword{or}       \keyword{xor}      \keyword{not}      \keyword{bitand}   \keyword{bitor}    \keyword{compl}}\br
+    \terminal{\keyword{and_eq}   \keyword{or_eq}    \keyword{xor_eq}   \keyword{not_eq}}
+\end{bnf}
+
+Each \grammarterm{operator-or-punctuator} is converted to a single token
+in translation phase 7\iref{lex.phases}.%
+\indextext{punctuator|)}%
+\indextext{operator|)}
+
+\rSec1[lex.digraph]{Alternative tokens}
+
+\pnum
+\indextext{token!alternative|(}%
+Alternative token representations are provided for some operators and
+punctuators.
+\begin{footnote}
+\indextext{digraph}%
+These include ``digraphs'' and additional reserved words. The term
+``digraph'' (token consisting of two characters) is not perfectly
+descriptive, since one of the alternative \grammarterm{preprocessing-token}s is
+\tcode{\%:\%:} and of course several primary tokens contain two
+characters. Nonetheless, those alternative tokens that aren't lexical
+keywords are colloquially known as ``digraphs''.
+\end{footnote}
+
+\pnum
+In all respects of the language, each alternative token behaves the
+same, respectively, as its primary token, except for its spelling.
+\begin{footnote}
+Thus the ``stringized'' values\iref{cpp.stringize} of
+\tcode{[} and \tcode{<:} will be different, maintaining the source
+spelling, but the tokens can otherwise be freely interchanged.
+\end{footnote}
+The set of alternative tokens is defined in
+\tref{lex.digraph}.
+
+\begin{tokentable}{Alternative tokens}{lex.digraph}{Alternative}{Primary}
+\tcode{<\%}             &   \tcode{\{}         &
+\keyword{and}           &   \tcode{\&\&}       &
+\keyword{and_eq}        &   \tcode{\&=}        \\ \rowsep
+\tcode{\%>}             &   \tcode{\}}         &
+\keyword{bitor}         &   \tcode{|}          &
+\keyword{or_eq}         &   \tcode{|=}         \\ \rowsep
+\tcode{<:}              &   \tcode{[}          &
+\keyword{or}            &   \tcode{||}         &
+\keyword{xor_eq}        &   \tcode{\caret=}    \\ \rowsep
+\tcode{:>}              &   \tcode{]}          &
+\keyword{xor}           &   \tcode{\caret}     &
+\keyword{not}           &   \tcode{!}          \\ \rowsep
+\tcode{\%:}             &   \tcode{\#}         &
+\keyword{compl}         &   \tcode{\~}         &
+\keyword{not_eq}        &   \tcode{!=}         \\ \rowsep
+\tcode{\%:\%:}          &   \tcode{\#\#}       &
+\keyword{bitand}        &   \tcode{\&}         &
+                        &                      \\
+\end{tokentable}%
+\indextext{token!alternative|)}
+
+\rSec1[lex.token]{Tokens}
+
+\indextext{token|(}%
+\begin{bnf}
+\nontermdef{token}\br
+    identifier\br
+    keyword\br
+    literal\br
+    operator-or-punctuator
+\end{bnf}
+
+\pnum
+\indextext{\idxgram{token}}%
+There are five kinds of tokens: identifiers, keywords, literals,%
+\begin{footnote}
+Literals include strings and character and numeric literals.
+\end{footnote}
+operators, and other separators.
+\indextext{whitespace}%
+Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments
+(collectively, ``whitespace''), as described below, are ignored except
+as they serve to separate tokens.
+\begin{note}
+Whitespace can separate otherwise adjacent identifiers, keywords, numeric
+literals, and alternative tokens containing alphabetic characters.
+\end{note}
+\indextext{token|)}
+
 \rSec1[lex.name]{Identifiers}
 
 \indextext{identifier|(}%
@@ -1038,47 +1079,6 @@
 \indextext{keyword|)}%
 
 
-\rSec1[lex.operators]{Operators and punctuators}
-
-\pnum
-\indextext{operator|(}%
-\indextext{punctuator|(}%
-The lexical representation of \Cpp{} programs includes a number of
-preprocessing tokens that are used in the syntax of the preprocessor or
-are converted into tokens for operators and punctuators:
-
-\begin{bnf}
-\nontermdef{preprocessing-op-or-punc}\br
-    preprocessing-operator\br
-    operator-or-punctuator
-\end{bnf}
-
-\begin{bnf}
-%% Ed. note: character protrusion would misalign various operators.
-\microtypesetup{protrusion=false}\obeyspaces
-\nontermdef{preprocessing-operator} \textnormal{one of}\br
-    \terminal{\#        \#\#       \%:       \%:\%:}
-\end{bnf}
-
-\begin{bnf}
-\microtypesetup{protrusion=false}\obeyspaces
-\nontermdef{operator-or-punctuator} \textnormal{one of}\br
-    \terminal{\{        \}        [        ]        (        )}\br
-    \terminal{<:       :>       <\%       \%>       ;        :        ...}\br
-    \terminal{?        ::       .        .*       ->       ->*      \~}\br
-    \terminal{!        +        -        *        /        \%        \caret{}        \&        |}\br
-    \terminal{=        +=       -=       *=       /=       \%=       \caret{}=       \&=       |=}\br
-    \terminal{==       !=       <        >        <=       >=       <=>      \&\&       ||}\br
-    \terminal{<<       >>       <<=      >>=      ++       --       ,}\br
-    \terminal{\keyword{and}      \keyword{or}       \keyword{xor}      \keyword{not}      \keyword{bitand}   \keyword{bitor}    \keyword{compl}}\br
-    \terminal{\keyword{and_eq}   \keyword{or_eq}    \keyword{xor_eq}   \keyword{not_eq}}
-\end{bnf}
-
-Each \grammarterm{operator-or-punctuator} is converted to a single token
-in translation phase 7\iref{lex.phases}.%
-\indextext{punctuator|)}%
-\indextext{operator|)}
-
 \rSec1[lex.literal]{Literals}%
 \indextext{literal|(}