Skip to content

Commit 7ea264b

Browse files
authored
Improve Java code output for string literals concatenated with '&' (#731)
* feat: improve output strings concatenated with & * fix: Minor bug fix * fix: Minor bug fix * fix: bugfix for memory access error * fix: remove unneccessary code * style: minor refactoring * test add a test * test: add a test * style: format * test: minor fix for added test * refactor: minor refactor for handling &-concatinated strings * fix: fix warning of a test script for &-concatenated strings * refactor: minor refactoring for &-concatenated strings * refactor: minor refactoring for &-concatenated strings
1 parent b211b08 commit 7ea264b

File tree

7 files changed

+174
-8
lines changed

7 files changed

+174
-8
lines changed

cobj/codegen.c

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ static const char *excp_current_program_id = NULL;
7979
static const char *excp_current_section = NULL;
8080
static const char *excp_current_paragraph = NULL;
8181
static struct cb_program *current_prog;
82+
static size_t *sgmt_sizes = NULL;
83+
static size_t sgmt_count = 0;
8284

8385
extern int cb_default_byte_specified;
8486
extern unsigned char cb_default_byte;
@@ -463,6 +465,7 @@ struct string_literal_cache {
463465
enum cb_string_category category;
464466
char *var_name;
465467
struct string_literal_cache *next;
468+
size_t *segment_sizes; /* segment sizes for strings concatenated with '&' */
466469
};
467470

468471
int string_literal_id = 0;
@@ -531,7 +534,8 @@ static enum cb_string_category get_string_category(const unsigned char *s,
531534
}
532535

533536
static void joutput_string_write(const unsigned char *s, int size,
534-
enum cb_string_category category) {
537+
enum cb_string_category category,
538+
const size_t *tmp_sgmt_sizes) {
535539
int i;
536540

537541
#ifdef I18N_UTF8
@@ -552,7 +556,11 @@ static void joutput_string_write(const unsigned char *s, int size,
552556
} else {
553557
joutput("CobolUtil.stringToBytes(");
554558
}
555-
559+
if (tmp_sgmt_sizes) {
560+
joutput_indent_level += 2;
561+
joutput_newline();
562+
joutput_prefix();
563+
}
556564
joutput("\"");
557565

558566
#ifdef I18N_UTF8
@@ -568,6 +576,8 @@ static void joutput_string_write(const unsigned char *s, int size,
568576
}
569577
#else
570578
int output_multibyte = 0;
579+
int sum_sgmt_size = 0;
580+
int sgmt_index = 0;
571581
for (i = 0; i < size; i++) {
572582
int c = s[i];
573583
if (!output_multibyte && (c == '\"' || c == '\\')) {
@@ -577,11 +587,33 @@ static void joutput_string_write(const unsigned char *s, int size,
577587
} else {
578588
joutput("%c", c);
579589
}
590+
591+
// insert line breaks between segments concatenated with '&'
592+
if (tmp_sgmt_sizes && i < size - 1) {
593+
size_t segment_end_position =
594+
sum_sgmt_size + tmp_sgmt_sizes[sgmt_index] - 1;
595+
if (i == segment_end_position) {
596+
joutput("\" + ");
597+
joutput_newline();
598+
joutput_prefix();
599+
joutput("\"");
600+
sum_sgmt_size += tmp_sgmt_sizes[sgmt_index];
601+
sgmt_index++;
602+
}
603+
}
580604
output_multibyte = !output_multibyte &&
581605
((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef));
582606
}
583607
#endif
584-
joutput("\")");
608+
if (tmp_sgmt_sizes) {
609+
joutput("\"");
610+
joutput_newline();
611+
joutput_indent_level -= 2;
612+
joutput_prefix();
613+
joutput(")");
614+
} else {
615+
joutput("\")");
616+
}
585617
} else {
586618
if (param_wrap_string_flag) {
587619
joutput("CobolDataStorage.makeCobolDataStorage(");
@@ -631,6 +663,16 @@ static void joutput_string(const unsigned char *s, int size) {
631663
new_literal_cache->var_name[var_name_length + 1 + i] = '\0';
632664
}
633665

666+
// set segment sizes to new cache
667+
if (sgmt_sizes) {
668+
new_literal_cache->segment_sizes = cobc_malloc(sizeof(size_t) * sgmt_count);
669+
memcpy(new_literal_cache->segment_sizes, sgmt_sizes,
670+
sizeof(size_t) * sgmt_count);
671+
sgmt_sizes = NULL;
672+
} else {
673+
new_literal_cache->segment_sizes = NULL;
674+
}
675+
634676
// add the new cache to string_literal_list
635677
new_literal_cache->next = string_literal_list;
636678
string_literal_list = new_literal_cache;
@@ -658,7 +700,8 @@ static void joutput_all_string_literals() {
658700
joutput_prefix();
659701
joutput("public static final %s %s = ", data_type, l->var_name);
660702
param_wrap_string_flag = l->param_wrap_string_flag;
661-
joutput_string_write(l->string_value, l->size, l->category);
703+
joutput_string_write(l->string_value, l->size, l->category,
704+
l->segment_sizes);
662705
joutput(";\n");
663706
l = l->next;
664707
}
@@ -2280,6 +2323,13 @@ static void joutput_initialize_one(struct cb_initialize *p, cb_tree x) {
22802323
/* Initialize by value */
22812324
if (p->val && f->values) {
22822325
cb_tree value = CB_VALUE(f->values);
2326+
struct cb_literal *l = CB_LITERAL_P(value) ? CB_LITERAL(value) : NULL;
2327+
// save the size information of '&' concatenated segments
2328+
if (l && l->segment_count > 0) {
2329+
sgmt_sizes = cobc_malloc(sizeof(size_t) * l->segment_count);
2330+
memcpy(sgmt_sizes, l->segment_sizes, sizeof(size_t) * l->segment_count);
2331+
sgmt_count = l->segment_count;
2332+
}
22832333

22842334
/* NATIONAL also needs no editing but mbchar conversion. */
22852335
if (CB_TREE_CATEGORY(x) == CB_CATEGORY_NATIONAL) {
@@ -2340,7 +2390,6 @@ static void joutput_initialize_one(struct cb_initialize *p, cb_tree x) {
23402390
/* We do not use joutput_move here because
23412391
we do not want to have the value be edited. */
23422392

2343-
struct cb_literal *l = CB_LITERAL(value);
23442393
static char *buff = NULL;
23452394
static int lastsize = 0;
23462395
if (!buff) {

cobj/tree.c

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,31 @@ struct cb_literal *build_literal(enum cb_category category,
455455
return p;
456456
}
457457

458+
struct cb_literal *build_concat_literal(enum cb_category category,
459+
const unsigned char *data, size_t size1,
460+
size_t size2, size_t *sgmt_sizes,
461+
size_t sgmt_count) {
462+
struct cb_literal *p;
463+
size_t size = size1 + size2;
464+
p = make_tree(CB_TAG_LITERAL, category, sizeof(struct cb_literal));
465+
p->data = cobc_malloc((size_t)(size + 1));
466+
p->size = size;
467+
memcpy(p->data, data, (size_t)size);
468+
469+
// set segment sizes
470+
if (!sgmt_sizes) {
471+
p->segment_sizes = cobc_malloc(sizeof(size_t) * 2);
472+
p->segment_sizes[0] = size1;
473+
} else {
474+
p->segment_sizes = cobc_malloc(sizeof(size_t) * (sgmt_count + 1));
475+
memcpy(p->segment_sizes, sgmt_sizes, sizeof(size_t) * sgmt_count);
476+
}
477+
p->segment_sizes[sgmt_count] = size2;
478+
p->segment_count = sgmt_count + 1;
479+
480+
return p;
481+
}
482+
458483
char *cb_name(cb_tree x) {
459484
if (!treenamebuff) {
460485
treenamebuff = cobc_malloc(COB_NORMAL_BUFF);
@@ -1030,21 +1055,40 @@ cb_tree cb_build_national_literal(const unsigned char *data, size_t size) {
10301055
return CB_TREE(build_literal(CB_CATEGORY_NATIONAL, data, size));
10311056
}
10321057

1058+
cb_tree cb_build_concat_alphanumeric_literal(const unsigned char *data,
1059+
size_t size1, size_t size2,
1060+
size_t *sgmt_sizes,
1061+
size_t sgmt_count) {
1062+
return CB_TREE(build_concat_literal(CB_CATEGORY_ALPHANUMERIC, data, size1,
1063+
size2, sgmt_sizes, sgmt_count));
1064+
}
1065+
1066+
cb_tree cb_build_concat_national_literal(const unsigned char *data,
1067+
size_t size1, size_t size2,
1068+
size_t *sgmt_sizes,
1069+
size_t sgmt_count) {
1070+
return CB_TREE(build_concat_literal(CB_CATEGORY_NATIONAL, data, size1, size2,
1071+
sgmt_sizes, sgmt_count));
1072+
}
1073+
10331074
cb_tree cb_concat_literals(cb_tree x1, cb_tree x2) {
10341075
unsigned char *buff;
10351076
cb_tree x;
10361077
unsigned char *data1;
10371078
unsigned char *data2;
10381079
size_t size1;
10391080
size_t size2;
1081+
struct cb_literal *l;
10401082

10411083
if (x1 == cb_error_node || x2 == cb_error_node) {
10421084
return cb_error_node;
10431085
}
10441086
if (CB_LITERAL_P(x1)) {
1087+
l = CB_LITERAL(x1);
10451088
data1 = CB_LITERAL(x1)->data;
10461089
size1 = CB_LITERAL(x1)->size;
10471090
} else if (CB_CONST_P(x1)) {
1091+
l = CB_LITERAL(x1);
10481092
size1 = 1;
10491093
if (x1 == cb_space) {
10501094
data1 = (unsigned char *)" ";
@@ -1090,8 +1134,18 @@ cb_tree cb_concat_literals(cb_tree x1, cb_tree x2) {
10901134
buff = cobc_malloc(size1 + size2 + 3);
10911135
memcpy(buff, data1, size1);
10921136
memcpy(buff + size1, data2, size2);
1093-
x = cb_build_alphanumeric_literal(buff, size1 + size2);
1137+
if (!l->segment_count) {
1138+
l->segment_count = 1;
1139+
}
1140+
if (x1->category == CB_CATEGORY_NATIONAL) {
1141+
x = cb_build_concat_national_literal(buff, size1, size2, l->segment_sizes,
1142+
l->segment_count);
1143+
} else {
1144+
x = cb_build_concat_alphanumeric_literal(
1145+
buff, size1, size2, l->segment_sizes, l->segment_count);
1146+
}
10941147
free(buff);
1148+
10951149
return x;
10961150
}
10971151

cobj/tree.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,17 @@ extern cb_tree cb_build_system_name(enum cb_system_name_category category,
467467
* Literal
468468
*/
469469

470+
struct cb_literal_segment {
471+
size_t size;
472+
unsigned char *data;
473+
struct cb_literal_segment *next;
474+
};
475+
470476
struct cb_literal {
471477
struct cb_tree_common common;
472478
size_t size;
479+
size_t *segment_sizes; /* segment sizes for strings concatenated with '&' */
480+
size_t segment_count;
473481
unsigned char *data;
474482
signed char all;
475483
signed char sign; /* unsigned: 0 negative: -1 positive: 1 */
@@ -489,6 +497,14 @@ extern cb_tree cb_build_alphanumeric_literal(const unsigned char *data,
489497
extern cb_tree cb_build_national_literal(const unsigned char *data,
490498
size_t size);
491499
extern cb_tree cb_concat_literals(cb_tree x1, cb_tree x2);
500+
extern cb_tree cb_build_concat_alphanumeric_literal(const unsigned char *data,
501+
size_t size1, size_t size2,
502+
size_t *sgmt_sizes,
503+
size_t sgmt_count);
504+
extern cb_tree cb_build_concat_national_literal(const unsigned char *data,
505+
size_t size1, size_t size2,
506+
size_t *sgmt_sizes,
507+
size_t sgmt_count);
492508

493509
/*
494510
* Decimal
@@ -1438,6 +1454,10 @@ extern void level_except_error(cb_tree x, const char *clause);
14381454

14391455
struct cb_literal *build_literal(enum cb_category category,
14401456
const unsigned char *data, size_t size);
1457+
struct cb_literal *build_concat_literal(enum cb_category category,
1458+
const unsigned char *data, size_t size1,
1459+
size_t size2, size_t *sgmt_sizes,
1460+
size_t sgmt_count);
14411461

14421462
/* field.c */
14431463
extern size_t cb_needs_01;

tests/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,8 @@ misc_DEPENDENCIES = \
262262
misc.src/display-numeric-NUMERIC-class.at \
263263
misc.src/display-inspect-sign.at \
264264
misc.src/comp1-comp2.at \
265-
misc.src/variable-length-file.at
265+
misc.src/variable-length-file.at \
266+
misc.src/convert-string-concat.at \
266267

267268
EXTRA_DIST = $(srcdir)/package.m4 \
268269
$(TESTS) \

tests/Makefile.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,8 @@ misc_DEPENDENCIES = \
801801
misc.src/display-numeric-NUMERIC-class.at \
802802
misc.src/display-inspect-sign.at \
803803
misc.src/comp1-comp2.at \
804-
misc.src/variable-length-file.at
804+
misc.src/variable-length-file.at \
805+
misc.src/convert-string-concat.at \
805806

806807
EXTRA_DIST = $(srcdir)/package.m4 \
807808
$(TESTS) \

tests/misc.at

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,4 @@ m4_include([display-numeric-NUMERIC-class.at])
5757
m4_include([display-inspect-sign.at])
5858
m4_include([comp1-comp2.at])
5959
m4_include([variable-length-file.at])
60+
m4_include([convert-string-concat.at])
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
AT_SETUP([convert '&' concatenated strings to Java])
2+
3+
AT_DATA([prog.cbl], [
4+
IDENTIFICATION DIVISION.
5+
PROGRAM-ID. prog.
6+
DATA DIVISION.
7+
WORKING-STORAGE SECTION.
8+
01 X-CONCAT PIC X(25) VALUE "abcde"
9+
& "fghij"
10+
& "klmno"
11+
& "pqrst"
12+
& "uvwxy".
13+
01 N-CONCAT PIC N(25) VALUE "����������"
14+
& "����������"
15+
& "����������"
16+
& "�����‚Ă�"
17+
& "�Ȃɂʂ˂�".
18+
PROCEDURE DIVISION.
19+
MAIN-RTN.
20+
DISPLAY X-CONCAT.
21+
DISPLAY N-CONCAT.
22+
STOP RUN.
23+
])
24+
25+
AT_CHECK([${COMPILE} prog.cbl])
26+
AT_CHECK([java prog], [0],
27+
[abcdefghijklmnopqrstuvwxy
28+
�����������������������������������‚ĂƂȂɂʂ˂�
29+
])
30+
AT_CHECK([grep -q ' "abcde" +' prog.java])
31+
AT_CHECK([grep -q ' "fghij" +' prog.java])
32+
AT_CHECK([grep -q ' "klmno" +' prog.java])
33+
AT_CHECK([grep -q ' "pqrst" +' prog.java])
34+
AT_CHECK([grep -q ' "uvwxy"' prog.java])
35+
AT_CHECK([grep -q ' "����������" +' prog.java])
36+
AT_CHECK([grep -q ' "����������" +' prog.java])
37+
AT_CHECK([grep -q ' "����������" +' prog.java])
38+
AT_CHECK([grep -q ' "�����‚Ă�" +' prog.java])
39+
AT_CHECK([grep -q ' "�Ȃɂʂ˂�"' prog.java])
40+
AT_CLEANUP

0 commit comments

Comments
 (0)