@@ -69,6 +69,7 @@ DEFINE_VECTOR_TYPE(node_ids, node_id);
6969
7070struct expr {
7171 enum {
72+ EXPR_NULL , /* null expression, no effect */
7273 EXPR_LIST , /* list - list of node IDs */
7374 EXPR_BYTE , /* b - single byte */
7475 EXPR_ABS_OFFSET , /* ui - absolute offset (@OFFSET) */
@@ -178,6 +179,9 @@ debug_expr (node_id id, int level)
178179 size_t i ;
179180
180181 switch (e -> t ) {
182+ case EXPR_NULL :
183+ nbdkit_debug ("%snull" , debug_indent (level ));
184+ break ;
181185 case EXPR_LIST :
182186 nbdkit_debug ("%s[" , debug_indent (level ));
183187 for (i = 0 ; i < e -> list .size ; ++ i )
@@ -253,6 +257,7 @@ struct dict {
253257
254258static int parser (int level , const char * value , size_t * start , size_t len ,
255259 node_id * root_rtn );
260+ static int optimize_ast (node_id root , node_id * root_rtn );
256261static int evaluate (const dict_t * dict , node_id root ,
257262 struct allocator * a ,
258263 uint64_t * offset , uint64_t * size );
@@ -273,6 +278,9 @@ read_data_format (const char *value, struct allocator *a, uint64_t *size_rtn)
273278 if (parser (0 , value , & i , strlen (value ), & root ) == -1 )
274279 goto out ;
275280
281+ if (optimize_ast (root , & root ) == -1 )
282+ goto out ;
283+
276284 if (data_debug_AST )
277285 debug_expr (root , 0 );
278286
@@ -802,6 +810,135 @@ parse_string (const char *value, size_t *start, size_t len, string *rtn)
802810 return -1 ;
803811}
804812
813+ /* This simple optimization pass over the AST simplifies some
814+ * expressions.
815+ */
816+ static int
817+ optimize_ast (node_id root , node_id * root_rtn )
818+ {
819+ size_t i , j ;
820+ node_id id ;
821+ expr_t e = { 0 };
822+
823+ switch (get_node (root )-> t ) {
824+ case EXPR_LIST :
825+ /* Optimize each element of the list. For convenience this
826+ * builds a new node.
827+ */
828+ e .t = EXPR_LIST ;
829+ #define APPEND_EXPR \
830+ do { \
831+ node_id _id = new_node (e); \
832+ if (node_ids_append (&list, _id) == -1) { \
833+ nbdkit_error ("realloc: %m"); \
834+ exit (EXIT_FAILURE); \
835+ } \
836+ } while (0)
837+
838+ for (i = 0 ; i < get_node (root )-> list .size ; ++ i ) {
839+ id = get_node (root )-> list .ptr [i ];
840+ if (optimize_ast (id , & id ) == -1 )
841+ return -1 ;
842+ switch (get_node (id )-> t ) {
843+ case EXPR_NULL :
844+ /* null elements of a list can be ignored. */
845+ break ;
846+ case EXPR_LIST :
847+ /* List within a list is flattened. */
848+ for (j = 0 ; j < get_node (id )-> list .size ; ++ j ) {
849+ if (node_ids_append (& e .list , get_node (id )-> list .ptr [j ]) == -1 ) {
850+ append_error :
851+ nbdkit_error ("realloc: %m" );
852+ exit (EXIT_FAILURE );
853+ }
854+ }
855+ break ;
856+ default :
857+ if (node_ids_append (& e .list , id ) == -1 ) goto append_error ;
858+ }
859+ }
860+
861+ /* List of length 0 is replaced with null. */
862+ if (e .list .size == 0 ) {
863+ free (e .list .ptr );
864+ e .t = EXPR_NULL ;
865+ * root_rtn = new_node (e );
866+ return 0 ;
867+ }
868+
869+ /* List of length 1 is replaced with the first element. */
870+ if (e .list .size == 1 ) {
871+ id = e .list .ptr [0 ];
872+ free (e .list .ptr );
873+ * root_rtn = id ;
874+ return 0 ;
875+ }
876+
877+ * root_rtn = new_node (e );
878+ return 0 ;
879+
880+ case EXPR_EXPR :
881+ id = get_node (root )-> id ;
882+ if (optimize_ast (id , & id ) == -1 )
883+ return -1 ;
884+ /* If the nested subexpression is null, can replace the entire
885+ * nest with null.
886+ */
887+ if (get_node (id )-> t == EXPR_NULL ) {
888+ * root_rtn = id ;
889+ return 0 ;
890+ }
891+ get_node (root )-> id = id ;
892+ * root_rtn = root ;
893+ return 0 ;
894+
895+ case EXPR_ASSIGN :
896+ id = get_node (root )-> a .id ;
897+ if (optimize_ast (id , & id ) == -1 )
898+ return -1 ;
899+ get_node (root )-> a .id = id ;
900+ * root_rtn = root ;
901+ return 0 ;
902+
903+ case EXPR_REPEAT :
904+ id = get_node (root )-> r .id ;
905+ if (optimize_ast (id , & id ) == -1 )
906+ return -1 ;
907+ /* If the subexpression we're repeating is null, then the entire
908+ * repeat will be null.
909+ */
910+ if (get_node (id )-> t == EXPR_NULL ) {
911+ * root_rtn = id ;
912+ return 0 ;
913+ }
914+ get_node (root )-> r .id = id ;
915+ * root_rtn = root ;
916+ return 0 ;
917+
918+ case EXPR_SLICE :
919+ id = get_node (root )-> sl .id ;
920+ if (optimize_ast (id , & id ) == -1 )
921+ return -1 ;
922+ get_node (root )-> sl .id = id ;
923+ * root_rtn = root ;
924+ return 0 ;
925+
926+ case EXPR_NULL :
927+ case EXPR_BYTE :
928+ case EXPR_ABS_OFFSET :
929+ case EXPR_REL_OFFSET :
930+ case EXPR_ALIGN_OFFSET :
931+ case EXPR_FILE :
932+ case EXPR_SCRIPT :
933+ case EXPR_STRING :
934+ case EXPR_NAME :
935+ * root_rtn = root ;
936+ return 0 ;
937+ }
938+
939+ abort ();
940+ }
941+
805942static int store_file (struct allocator * a ,
806943 const char * filename , uint64_t * offset );
807944static int store_file_slice (struct allocator * a ,
@@ -844,6 +981,8 @@ evaluate (const dict_t *dict, node_id root,
844981 switch (e -> t ) {
845982 case EXPR_LIST : abort ();
846983
984+ case EXPR_NULL : /* does nothing */ break ;
985+
847986 case EXPR_BYTE :
848987 /* Store the byte. */
849988 if (a -> f -> write (a , & e -> b , 1 , * offset ) == -1 )
0 commit comments