Skip to content

Commit e41cf0b

Browse files
5.4.10 (#85)
* fix memory bug processing versions * change sources env-variables * tune up scanning limits * fix ranges assembling bug. Improve snippets and component selection * change on report, file content url will be returned empty if the env-var wasn't defined
1 parent e9e0d84 commit e41cf0b

File tree

13 files changed

+106
-109
lines changed

13 files changed

+106
-109
lines changed

inc/limits.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737

3838
/* Snippets */
3939
#define DEFAULT_MATCHMAP_FILES 10000 // Default number of files evaluated in snippet matching
40-
#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 5) // Max number of files evaluated in snippet matching to prevent performance issues
40+
#define MAX_MATCHMAP_FILES (DEFAULT_MATCHMAP_FILES * 10) // Max number of files evaluated in snippet matching to prevent performance issues
41+
#define MIN_LINES_COVERAGE 0.8
4142
#define SKIP_SNIPPETS_IF_FILE_BIGGER (1024 * 1024 * 4)
42-
#define SKIP_SNIPPETS_IF_STARTS_WITH (const char*[3]) {"{", "<?xml", "<html"}
4343
#define MAX_SNIPPETS_SCANNED 2500
4444

4545
/* Variables */

inc/match.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ typedef struct match_data_t
1313
component_list_t component_list; /*Component list object */
1414
match_t type; /*math type (none, snippet, file) */
1515
int hits; /*match hits number, more hits equal bigger snippet matching*/
16+
int lines_matched; /*number of matched lines*/
1617
char * line_ranges; /*input snippet line ranges */
1718
char * oss_ranges; /* kb snippet line ranges */
1819
char * matched_percent; /* matched percent */

inc/match_list.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
#define SCAN_MAX_SNIPPETS_DEFAULT 1
7979
#define SCAN_MAX_COMPONENTS_DEFAULT 3
8080

81-
#define MATCH_LIST_TOLERANCE 97.5
81+
#define MATCH_LIST_TOLERANCE 99.9
8282
typedef struct match_data_t match_data_t; /* Forward declaration */
8383

8484
/**
@@ -145,6 +145,7 @@ bool component_list_add(component_list_t * list, component_data_t * new_comp, bo
145145
void component_list_print(component_list_t * list, bool (*printer) (component_data_t * fpa), char * separator);
146146
void component_list_destroy(component_list_t *list);
147147
bool component_list_add_binary(component_list_t *list, component_data_t *new_comp, bool (*val)(component_data_t *a, component_data_t *b), bool remove_a);
148+
bool match_list_eval(match_list_t *list, match_data_t * in, bool (*eval)(match_data_t *fpa, match_data_t *fpb));
148149
void match_list_tolerance_set(float in);
149150

150151
#endif

inc/scan.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ typedef struct scan_data_t
5252
match_t match_type; /* match_t (file, snippet, none), this is replicated in each match in the matches list */
5353
matchmap_entry *matchmap; /*matchmap pointer, used in snippet scanning */
5454
uint32_t matchmap_size; /*size of the match map */
55-
int matchmap_rank_by_sector[255]; /* Indirection array pointing to the max hits from the matchmap classyfied by sector.*/
55+
int matchmap_rank_by_sector[256]; /* Indirection array pointing to the max hits from the matchmap classyfied by sector.*/
5656
uint8_t *match_ptr; // pointer to matching record in match_map
5757
match_list_t * matches_list_array[MAX_MULTIPLE_COMPONENTS]; /* array of "match_list_t", each snippet with different "from line" will generate its own matches list */
5858
int matches_list_array_index; /* elements in the matches list array*/

inc/scanoss.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,11 @@
4040
#define WFP_REC_LN 18
4141

4242
/* Log files */
43-
#define SCANOSS_VERSION "5.4.9"
43+
#define SCANOSS_VERSION "5.4.10"
4444
#define SCAN_LOG "/tmp/scanoss_scan.log"
4545
#define MAP_DUMP "/tmp/scanoss_map.dump"
4646
#define SLOW_QUERY_LOG "/tmp/scanoss_slow_query.log"
4747

48-
#define API_URL "https://api.osskb.org"
4948
#define DEFAULT_OSS_DB_NAME "oss"
5049

5150
/* Engine configuration flags */

src/help.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ Configuration:\n\
5050
-H Enable High Precision Snippet Match mode (requires 'libhpsm.so' in the system).\n\
5151
-e Match only files with identical extensions as the scanned file (default: off).\n\
5252
-M NUMBER Search for up to NUMBER different components in each file (maximum: 9).\n\
53-
-T NUMBER Set snippet scanning tolerance percentage (default: 3.5).\n\
53+
-T NUMBER Set snippet scanning tolerance percentage (default: 0.1).\n\
5454
-s SBOM Include assets from a JSON SBOM file (CycloneDX/SPDX2.2 format) in identification.\n\
5555
-b SBOM Exclude matches from assets listed in JSON SBOM file (CycloneDX/SPDX2.2 format).\n\
5656
-B SBOM Same as \"-b\" but with forced snippet scanning.\n\
@@ -69,7 +69,7 @@ Options:\n\
6969
\n\
7070
Environment variables:\n\
7171
SCANOSS_MATCHMAP_MAX: Set the snippet scanning match map size (default: %d).\n\
72-
SCANOSS_API_URL: Define the API endpoint URL (default: %s).\n\
72+
SCANOSS_FILE_CONTENTS_URL: Define the API URL endpoint for sources. Source url wont be reported if it's not defined.\n\
7373
\n\
7474
Engine scanning flags:\n\
7575
Configure the scanning engine using flags with the -F parameter.\n\
@@ -95,5 +95,5 @@ These settings can also be specified in %s\n\
9595
+-------+-------------------------------------------------------+\n\
9696
Example: scanoss -F 12 DIRECTORY (scan DIRECTORY without license and dependency data)\n\
9797
\n\
98-
Copyright (C) 2018-2022 SCANOSS.COM\n", DEFAULT_MATCHMAP_FILES, API_URL, ENGINE_FLAGS_FILE);
98+
Copyright (C) 2018-2022 SCANOSS.COM\n", DEFAULT_MATCHMAP_FILES, ENGINE_FLAGS_FILE);
9999
}

src/match.c

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
320320
if (!*a->release_date)
321321
return true;
322322

323-
if (!path_is_third_party(a->file) && path_is_third_party(b->file) && !(engine_flags & ENABLE_PATH_HINT))
323+
if (!path_is_third_party(a->file) && path_is_third_party(b->file))
324324
{
325325
scanlog("Component rejected by third party filter\n");
326326
return false;
@@ -331,7 +331,7 @@ static bool component_hint_date_comparation(component_data_t *a, component_data_
331331
{
332332
if (purl_source_check(a) > purl_source_check(b))
333333
{
334-
scanlog("Component prefered by vsource\n");
334+
scanlog("Component prefered by source\n");
335335
return true;
336336
}
337337

@@ -471,39 +471,15 @@ bool load_matches(match_data_t *match)
471471
{
472472
scanlog("Load matches\n");
473473

474-
/* Compile match ranges and fill up matched percent */
475-
int hits = 100;
476-
int matched_percent = 100;
477474

478-
/* Get matching line ranges (snippet match) */
479-
if (match->type == MATCH_SNIPPET)
480-
{
481-
hits = compile_ranges(match);
482-
scanlog("compile_ranges returns %d hits\n", hits);
483-
484-
if (hits < min_match_hits)
485-
{
486-
match->type = MATCH_NONE;
487-
return false;
488-
}
489-
490-
float percent = (hits * 100) / match->scan_ower->total_lines;
491-
if (hits)
492-
matched_percent = floor(percent);
493-
if (matched_percent > 99)
494-
matched_percent = 99;
495-
if (matched_percent < 1)
496-
matched_percent = 1;
497-
498-
asprintf(&match->matched_percent, "%u%%", matched_percent);
499-
}
500-
else if (match->type == MATCH_BINARY)
475+
476+
if (match->type == MATCH_BINARY)
501477
{
502478
asprintf(&match->line_ranges, "n/a");
503479
asprintf(&match->oss_ranges, "n/a");
504480
asprintf(&match->matched_percent, "%d functions matched", match->hits);
505481
}
506-
else
482+
else if (match->type == MATCH_FILE)
507483
{
508484
asprintf(&match->line_ranges, "all");
509485
asprintf(&match->oss_ranges, "all");
@@ -696,11 +672,13 @@ void match_select_best(scan_data_t *scan)
696672
break;
697673
}
698674

699-
if (!best_match_component->identified && match_component->identified)
675+
if ((!best_match_component->identified && match_component->identified) ||
676+
(strcmp(best_match_component->vendor,best_match_component->component) && !strcmp(match_component->vendor, match_component->component)) ||
677+
(path_is_third_party(best_match_component->file) && !path_is_third_party(match_component->file)))
700678
{
701679
scanlog("Replacing best match for a prefered component\n");
702680
scan->matches_list_array[i]->best_match = item->match;
703-
}
681+
}
704682
}
705683
}
706684

src/match_list.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,11 +311,11 @@ bool match_list_add(match_list_t *list, match_data_t *new_match, bool (*val)(mat
311311
}
312312
/* in autolimit mode the list doesnt have a fix size, it will accept all the matchest until a 75% of the fist element (the biggest) */
313313
//TODO: this part of the code should be in the function pointer or I need to re-evaluate the archtecture of this function */
314-
if (list->autolimit && !tolerance_eval(list->headp.lh_first->match->hits, list->last_element->match->hits))
314+
if (list->autolimit && !tolerance_eval(list->headp.lh_first->match->lines_matched, list->last_element->match->lines_matched))
315315
{
316316
np = list->headp.lh_first;
317317
/*We have to find and remove the unwanted elements */
318-
for (; np->entries.le_next != NULL && tolerance_eval(list->headp.lh_first->match->hits, np->entries.le_next->match->hits); np = np->entries.le_next)
318+
for (; np->entries.le_next != NULL && tolerance_eval(list->headp.lh_first->match->lines_matched, np->entries.le_next->match->lines_matched); np = np->entries.le_next)
319319
{
320320

321321
}
@@ -403,6 +403,18 @@ bool match_list_print(match_list_t *list, bool (*printer)(match_data_t *fpa), ch
403403
return true;
404404
}
405405

406+
bool match_list_eval(match_list_t *list, match_data_t * in, bool (*eval)(match_data_t *fpa, match_data_t *fpb))
407+
{
408+
int i = 0;
409+
for (struct entry *np = list->headp.lh_first; np != NULL && i<list->items; np = np->entries.le_next)
410+
{
411+
if(eval(np->match, in))
412+
return true;
413+
i++;
414+
}
415+
return false;
416+
}
417+
406418
void component_list_print(component_list_t *list, bool (*printer)(component_data_t *fpa), char *separator)
407419
{
408420
for (struct comp_entry *np = list->headp.lh_first; np != NULL; np = np->entries.le_next)

src/report.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,17 +334,19 @@ bool print_json_match(struct match_data_t * match)
334334
printf(",\"source_hash\": \"%s\"", match->source_md5);
335335

336336
/* Output file_url (same as url when match type = url) */
337-
char * file_url_enabled = getenv("SCANOSS_FILE_CONTENTS");
338-
if (!file_url_enabled || strcmp(file_url_enabled, "false"))
337+
char * file_contents_url = getenv("SCANOSS_FILE_CONTENTS_URL");
338+
if (file_contents_url && *file_contents_url && strcmp(file_contents_url, "false"))
339339
{
340340
if (!match->component_list.headp.lh_first->component->url_match)
341341
{
342-
char *custom_url = getenv("SCANOSS_API_URL");
343-
printf(",\"file_url\": \"%s/file_contents/%s\"", custom_url ? custom_url : API_URL, file_id);
342+
printf(",\"file_url\": \"%s/%s\"", file_contents_url, file_id);
344343
}
345344
else
346345
printf(",\"file_url\": \"%s\"", match->component_list.headp.lh_first->component->url);
347346
}
347+
else //return an empty string
348+
printf(",\"file_url\": \" \"");
349+
348350

349351
free(file_id);
350352

src/scan.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ scan_data_t * scan_data_init(char *target, int max_snippets, int max_components)
5959
scan_data_t * scan = calloc(1, sizeof(*scan));
6060
scan->file_path = strdup(target);
6161
scan->file_size = malloc(32);
62-
scan->hashes = malloc(MAX_FILE_SIZE);
62+
scan->hashes = calloc(MAX_FILE_SIZE,1);
6363
scan->lines = malloc(MAX_FILE_SIZE);
6464
scan->match_type = MATCH_NONE;
6565

0 commit comments

Comments
 (0)