diff --git a/Makefile b/Makefile index c6a1aae5e..3afca525b 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ all: @echo "check-fast - compile and execute test suite (skip long-running tests)" @echo "clean - remove built files" @echo "coverage - create coverage information with lcov" + @echo "coverage-fast - create coverage information with fastcov" @echo "cppcheck - analyze code with cppcheck" @echo "cpplint - analyze code with cpplint" @echo "clang_tidy - analyze code with Clang-Tidy" @@ -82,6 +83,14 @@ coverage: cd build_coverage ; ninja lcov_html open build_coverage/test/html/index.html +coverage-fast: + rm -fr build_coverage + mkdir build_coverage + cd build_coverage ; CXX=$(COMPILER_DIR)/g++ cmake .. -GNinja -DJSON_Coverage=ON -DJSON_MultipleHeaders=ON + cd build_coverage ; ninja + cd build_coverage ; ctest -E '.*_default' -j10 + cd build_coverage ; ninja fastcov_html + open build_coverage/test/html/index.html ########################################################################## # documentation tests diff --git a/README.md b/README.md index dcb3641b0..67dc4e07d 100644 --- a/README.md +++ b/README.md @@ -1281,9 +1281,11 @@ The library itself consists of a single header file licensed under the MIT licen - [**doctest**](https://github.com/onqtam/doctest) for the unit tests - [**Doozer**](https://doozer.io) for [continuous integration](https://doozer.io/nlohmann/json) on Linux (CentOS, Raspbian, Fedora) - [**Doxygen**](http://www.stack.nl/~dimitri/doxygen/) to generate [documentation](https://nlohmann.github.io/json/) +- [**fastcov**](https://github.com/RPGillespie6/fastcov) to process coverage information - [**git-update-ghpages**](https://github.com/rstacruz/git-update-ghpages) to upload the documentation to gh-pages - [**GitHub Changelog Generator**](https://github.com/skywinder/github-changelog-generator) to generate the [ChangeLog](https://github.com/nlohmann/json/blob/develop/ChangeLog.md) - [**Google Benchmark**](https://github.com/google/benchmark) to implement the benchmarks +- [**lcov**](http://ltp.sourceforge.net/coverage/lcov.php) to process coverage information and create a HTML view - [**libFuzzer**](http://llvm.org/docs/LibFuzzer.html) to implement fuzz testing for OSS-Fuzz - [**OSS-Fuzz**](https://github.com/google/oss-fuzz) for continuous fuzz testing of the library ([project repository](https://github.com/google/oss-fuzz/tree/master/projects/json)) - [**Probot**](https://probot.github.io) for automating maintainer tasks such as closing stale issues, requesting missing information, or detecting toxic comments. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 69067c3bf..b73dfc9ad 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -27,17 +27,17 @@ endif() if(JSON_Coverage) message(STATUS "Building test suite with coverage information") - #if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - # message(FATAL_ERROR "JSON_Coverage requires GCC.") - #endif() + if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + message(FATAL_ERROR "JSON_Coverage requires GCC.") + endif() # enable profiling set(CMAKE_CXX_FLAGS "--coverage -g -O0 -fprofile-arcs -ftest-coverage") # from https://github.com/RWTH-HPC/CMake-codecov/blob/master/cmake/FindGcov.cmake - #get_filename_component(COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH) - #string(REGEX MATCH "^[0-9]+" GCC_VERSION "${CMAKE_CXX_COMPILER_VERSION}") - #find_program(GCOV_BIN NAMES gcov-${GCC_VERSION} gcov HINTS ${COMPILER_PATH}) + get_filename_component(COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH) + string(REGEX MATCH "^[0-9]+" GCC_VERSION "${CMAKE_CXX_COMPILER_VERSION}") + find_program(GCOV_BIN NAMES gcov-${GCC_VERSION} gcov HINTS ${COMPILER_PATH}) # collect all source files from the chosen include dir file(GLOB_RECURSE SOURCE_FILES ${NLOHMANN_JSON_INCLUDE_BUILD_DIR}*.hpp) @@ -46,11 +46,18 @@ if(JSON_Coverage) # (filter script from https://stackoverflow.com/a/43726240/266378) add_custom_target(lcov_html COMMAND lcov --directory . --capture --output-file json.info --rc lcov_branch_coverage=1 - COMMAND lcov -e json.info ${SOURCE_FILES} --output-file json.info.filtered --rc lcov_branch_coverage=1 + COMMAND lcov -e json.info ${SOURCE_FILES} --output-file json.info.filtered --gcov-tool ${GCOV_BIN} --rc lcov_branch_coverage=1 COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/imapdl/filterbr.py json.info.filtered > json.info.filtered.noexcept COMMAND genhtml --title "JSON for Modern C++" --legend --demangle-cpp --output-directory html --show-details --branch-coverage json.info.filtered.noexcept COMMENT "Generating HTML report test/html/index.html" ) + + add_custom_target(fastcov_html + COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/fastcov/fastcov.py --branch-coverage --lcov -o json.info --gcov ${GCOV_BIN} --compiler-directory ${CMAKE_BINARY_DIR} --source-files ${SOURCE_FILES} + COMMAND ${CMAKE_SOURCE_DIR}/test/thirdparty/imapdl/filterbr.py json.info > json.info.noexcept + COMMAND genhtml --title "JSON for Modern C++" --legend --demangle-cpp --output-directory html --show-details --branch-coverage json.info.noexcept + COMMENT "Generating HTML report test/html/index.html" + ) endif() ############################################################################# diff --git a/test/thirdparty/fastcov/LICENSE b/test/thirdparty/fastcov/LICENSE new file mode 100644 index 000000000..cd26c490b --- /dev/null +++ b/test/thirdparty/fastcov/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2018-2019 Bryan Gillespie + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/test/thirdparty/fastcov/README.md b/test/thirdparty/fastcov/README.md new file mode 100644 index 000000000..9a2e3782f --- /dev/null +++ b/test/thirdparty/fastcov/README.md @@ -0,0 +1,46 @@ +# fastcov +A massively parallel gcov wrapper for generating intermediate coverage formats *fast* + +The goal of fastcov is to generate code coverage intermediate formats *as fast as possible* (ideally < 1 second), even for large projects with hundreds of gcda objects. The intermediate formats may then be consumed by a report generator such as lcov's genhtml, or a dedicated front end such as coveralls. fastcov was originally designed to be a drop-in replacement for lcov (application coverage only, not kernel coverage). + +Currently the only intermediate formats supported are gcov json format and lcov info format. Adding support for other formats should require just a few lines of python to transform gcov json format to the desired shape. + +In order to achieve the massive speed gains, a few constraints apply: + +1. GCC version >= 9.0.0 + +These versions of GCOV have support for JSON intermediate format as well as streaming report data straight to stdout + +2. Object files must be either be built: + +- Using absolute paths for all `-I` flags passed to the compiler +- Invoking the compiler from the same root directory + +If you use CMake, you are almost certainly satisfying the second constraint (unless you care about `ExternalProject` coverage). + +## Sample Usage: +```bash +$ cd build_dir +$ fastcov.py --zerocounters +$ +$ fastcov.py --exclude /usr/include --lcov -o report.info +$ genhtml -o code_coverage report.info +``` + +## Legacy fastcov + +It is possible to reap most of the benefits of fastcov for GCC version < 9.0.0 and >= 7.1.0. However, there will be a *potential* header file loss of correctness. + +`fastcov_legacy.py` can be used with pre GCC-9 down to GCC 7.1.0 but with a few penalties due to gcov limitations. This is because running gcov in parallel generates .gcov header reports in parallel which overwrite each other. This isn't a problem unless your header files have actual logic (i.e. header only library) that you want to measure coverage for. Use the `-F` flag to specify which gcda files should not be run in parallel in order to capture accurate header file data just for those. I don't plan on supporting `fastcov_legacy.py` aside from basic bug fixes. + +## Benchmarks + +Anecdotal testing on my own projects indicate that fastcov is over 100x faster than lcov and over 30x faster than gcovr: + +Project Size: ~250 .gcda, ~500 .gcov generated by gcov + +Time to process all gcda and parse all gcov: + +- fastcov: ~700ms +- lcov: ~90s +- gcovr: ~30s diff --git a/test/thirdparty/fastcov/fastcov.py b/test/thirdparty/fastcov/fastcov.py new file mode 100755 index 000000000..de330078e --- /dev/null +++ b/test/thirdparty/fastcov/fastcov.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +""" + Author: Bryan Gillespie + + A massively parallel gcov wrapper for generating intermediate coverage formats fast + + The goal of fastcov is to generate code coverage intermediate formats as fast as possible + (ideally < 1 second), even for large projects with hundreds of gcda objects. The intermediate + formats may then be consumed by a report generator such as lcov's genhtml, or a dedicated front + end such as coveralls. + + Sample Usage: + $ cd build_dir + $ ./fastcov.py --zerocounters + $ + $ ./fastcov.py --exclude /usr/include test/ --lcov -o report.info + $ genhtml -o code_coverage report.info +""" + +import re +import os +import sys +import glob +import json +import time +import argparse +import threading +import subprocess +import multiprocessing + +MINIMUM_GCOV = (9,0,0) +MINIMUM_CHUNK_SIZE = 5 + +# Interesting metrics +START_TIME = time.time() +GCOVS_TOTAL = [] +GCOVS_SKIPPED = [] + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def stopwatch(): + """Return number of seconds since last time this was called""" + global START_TIME + end_time = time.time() + delta = end_time - START_TIME + START_TIME = end_time + return delta + +def parseVersionFromLine(version_str): + """Given a string containing a dotted integer version, parse out integers and return as tuple""" + version = re.search(r'(\d+\.\d+\.\d+)[^\.]', version_str) + + if not version: + return (0,0,0) + + return tuple(map(int, version.group(1).split("."))) + +def getGcovVersion(gcov): + p = subprocess.Popen([gcov, "-v"], stdout=subprocess.PIPE) + output = p.communicate()[0].decode('UTF-8') + p.wait() + return parseVersionFromLine(output.split("\n")[0]) + +def removeFiles(files): + for file in files: + os.remove(file) + +def getFilteredGcdaFiles(gcda_files, exclude): + def excludeGcda(gcda): + for ex in exclude: + if ex in gcda: + return False + return True + return list(filter(excludeGcda, gcda_files)) + +def getGcdaFiles(cwd, gcda_files): + if not gcda_files: + gcda_files = glob.glob(os.path.join(os.path.abspath(cwd), "**/*.gcda"), recursive=True) + return gcda_files + +def gcovWorker(cwd, gcov, files, chunk, gcov_filter_options, branch_coverage): + gcov_args = "-it" + if branch_coverage: + gcov_args += "b" + + p = subprocess.Popen([gcov, gcov_args] + chunk, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + for line in iter(p.stdout.readline, b''): + intermediate_json = json.loads(line.decode(sys.stdout.encoding)) + intermediate_json_files = processGcovs(cwd, intermediate_json["files"], gcov_filter_options) + for f in intermediate_json_files: + files.append(f) #thread safe, there might be a better way to do this though + GCOVS_TOTAL.append(len(intermediate_json["files"])) + GCOVS_SKIPPED.append(len(intermediate_json["files"])-len(intermediate_json_files)) + p.wait() + +def processGcdas(cwd, gcov, jobs, gcda_files, gcov_filter_options, branch_coverage): + chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(gcda_files) / jobs) + 1) + + threads = [] + intermediate_json_files = [] + for chunk in chunks(gcda_files, chunk_size): + t = threading.Thread(target=gcovWorker, args=(cwd, gcov, intermediate_json_files, chunk, gcov_filter_options, branch_coverage)) + threads.append(t) + t.start() + + log("Spawned %d gcov threads, each processing at most %d gcda files" % (len(threads), chunk_size)) + for t in threads: + t.join() + + return intermediate_json_files + +def processGcov(cwd, gcov, files, gcov_filter_options): + # Add absolute path + gcov["file_abs"] = os.path.abspath(os.path.join(cwd, gcov["file"])) + + # If explicit sources were passed, check for match + if gcov_filter_options["sources"]: + if gcov["file_abs"] in gcov_filter_options["sources"]: + files.append(gcov) + return + + # Check include filter + if gcov_filter_options["include"]: + for ex in gcov_filter_options["include"]: + if ex in gcov["file"]: + files.append(gcov) + break + return + + # Check exclude filter + for ex in gcov_filter_options["exclude"]: + if ex in gcov["file"]: + return + + files.append(gcov) + +def processGcovs(cwd, gcov_files, gcov_filter_options): + files = [] + for gcov in gcov_files: + processGcov(cwd, gcov, files, gcov_filter_options) + return files + +def dumpBranchCoverageToLcovInfo(f, branches): + branch_miss = 0 + for line_num, branch_counts in branches.items(): + for i, count in enumerate(branch_counts): + #Branch (, , , ) + f.write("BRDA:%s,%d,%d,%d\n" % (line_num, int(i/2), i, count)) + branch_miss += int(count == 0) + f.write("BRF:%d\n" % len(branches)) #Branches Found + f.write("BRH:%d\n" % (len(branches) - branch_miss)) #Branches Hit + +def dumpToLcovInfo(fastcov_json, output): + with open(output, "w") as f: + for sf, data in fastcov_json["sources"].items(): + f.write("SF:%s\n" % sf) #Source File + + fn_miss = 0 + for function, fdata in data["functions"].items(): + f.write("FN:%d,%s\n" % (fdata["start_line"], function)) #Function Start Line + f.write("FNDA:%d,%s\n" % (fdata["execution_count"], function)) #Function Hits + fn_miss += int(fdata["execution_count"] == 0) + f.write("FNF:%d\n" % len(data["functions"])) #Functions Found + f.write("FNH:%d\n" % (len(data["functions"]) - fn_miss)) #Functions Hit + + if data["branches"]: + dumpBranchCoverageToLcovInfo(f, data["branches"]) + + line_miss = 0 + for line_num, count in data["lines"].items(): + f.write("DA:%s,%d\n" % (line_num, count)) #Line + line_miss += int(count == 0) + f.write("LF:%d\n" % len(data["lines"])) #Lines Found + f.write("LH:%d\n" % (len(data["lines"]) - line_miss)) #Lines Hit + f.write("end_of_record\n") + +def exclMarkerWorker(fastcov_sources, chunk): + for source in chunk: + # If there are no covered lines, skip + if not fastcov_sources[source]["lines"]: + continue + + start_line = 0 + end_line = 0 + with open(source) as f: + for i, line in enumerate(f, 1): #Start enumeration at line 1 + if not "LCOV_EXCL" in line: + continue + + if "LCOV_EXCL_LINE" in line: + if str(i) in fastcov_sources[source]["lines"]: + del fastcov_sources[source]["lines"][str(i)] + if str(i) in fastcov_sources[source]["branches"]: + del fastcov_sources[source]["branches"][str(i)] + elif "LCOV_EXCL_START" in line: + start_line = i + elif "LCOV_EXCL_STOP" in line: + end_line = i + + if not start_line: + end_line = 0 + continue + + for key in ["lines", "branches"]: + for line_num in list(fastcov_sources[source][key].keys()): + if int(line_num) <= end_line and int(line_num) >= start_line: + del fastcov_sources[source][key][line_num] + + start_line = end_line = 0 + +def scanExclusionMarkers(fastcov_json, jobs): + chunk_size = max(MINIMUM_CHUNK_SIZE, int(len(fastcov_json["sources"]) / jobs) + 1) + + threads = [] + for chunk in chunks(list(fastcov_json["sources"].keys()), chunk_size): + t = threading.Thread(target=exclMarkerWorker, args=(fastcov_json["sources"], chunk)) + threads.append(t) + t.start() + + log("Spawned %d threads each scanning at most %d source files" % (len(threads), chunk_size)) + for t in threads: + t.join() + +def distillFunction(function_raw, functions): + function_name = function_raw["name"] + if function_name not in functions: + functions[function_name] = { + "start_line": function_raw["start_line"], + "execution_count": function_raw["execution_count"] + } + else: + functions[function_name]["execution_count"] += function_raw["execution_count"] + +def distillLine(line_raw, lines, branches): + line_number = str(line_raw["line_number"]) + if line_number not in lines: + lines[line_number] = line_raw["count"] + else: + lines[line_number] += line_raw["count"] + + for i, branch in enumerate(line_raw["branches"]): + if line_number not in branches: + branches[line_number] = [] + blen = len(branches[line_number]) + glen = len(line_raw["branches"]) + if blen < glen: + branches[line_number] += [0] * (glen - blen) + branches[line_number][i] += branch["count"] + +def distillSource(source_raw, sources): + source_name = source_raw["file_abs"] + if source_name not in sources: + sources[source_name] = { + "functions": {}, + "branches": {}, + "lines": {}, + } + + for function in source_raw["functions"]: + distillFunction(function, sources[source_name]["functions"]) + + for line in source_raw["lines"]: + distillLine(line, sources[source_name]["lines"], sources[source_name]["branches"]) + +def distillReport(report_raw): + report_json = { + "sources": {} + } + + for source in report_raw: + distillSource(source, report_json["sources"]) + + return report_json + +def dumpToJson(intermediate, output): + with open(output, "w") as f: + json.dump(intermediate, f) + +def log(line): + if not args.quiet: + print("[{:.3f}s] {}".format(stopwatch(), line)) + +def getGcovFilterOptions(args): + return { + "sources": set([os.path.abspath(s) for s in args.sources]), #Make paths absolute, use set for fast lookups + "include": args.includepost, + "exclude": args.excludepost, + } + +def main(args): + # Need at least gcov 9.0.0 because that's when gcov JSON and stdout streaming was introduced + current_gcov_version = getGcovVersion(args.gcov) + if current_gcov_version < MINIMUM_GCOV: + sys.stderr.write("Minimum gcov version {} required, found {}\n".format(".".join(map(str, MINIMUM_GCOV)), ".".join(map(str, current_gcov_version)))) + exit(1) + + # Get list of gcda files to process + gcda_files = getGcdaFiles(args.directory, args.gcda_files) + log("Found {} .gcda files ".format(len(gcda_files))) + + # If gcda filtering is enabled, filter them out now + if args.excludepre: + gcda_files = getFilteredGcdaFiles(gcda_files, args.excludepre) + log("{} .gcda files after filtering".format(len(gcda_files))) + + # We "zero" the "counters" by simply deleting all gcda files + if args.zerocounters: + removeFiles(gcda_files) + log("{} .gcda files removed".format(len(gcda_files))) + return + + # Fire up one gcov per cpu and start processing gcdas + gcov_filter_options = getGcovFilterOptions(args) + intermediate_json_files = processGcdas(args.cdirectory, args.gcov, args.jobs, gcda_files, gcov_filter_options, args.branchcoverage) + + # Summarize processing results + gcov_total = sum(GCOVS_TOTAL) + gcov_skipped = sum(GCOVS_SKIPPED) + log("Processed {} .gcov files ({} total, {} skipped)".format(gcov_total - gcov_skipped, gcov_total, gcov_skipped)) + + # Distill all the extraneous info gcov gives us down to the core report + fastcov_json = distillReport(intermediate_json_files) + log("Aggregated raw gcov JSON into fastcov JSON report") + + # Dump to desired file format + if args.lcov: + scanExclusionMarkers(fastcov_json, args.jobs) + log("Scanned {} source files for exclusion markers".format(len(fastcov_json["sources"]))) + dumpToLcovInfo(fastcov_json, args.output) + log("Created lcov info file '{}'".format(args.output)) + elif args.gcov_raw: + dumpToJson(intermediate_json_files, args.output) + log("Created gcov raw json file '{}'".format(args.output)) + else: + dumpToJson(fastcov_json, args.output) + log("Created fastcov json file '{}'".format(args.output)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='A parallel gcov wrapper for fast coverage report generation') + parser.add_argument('-z', '--zerocounters', dest='zerocounters', action="store_true", help='Recursively delete all gcda files') + + # Enable Branch Coverage + parser.add_argument('-b', '--branch-coverage', dest='branchcoverage', action="store_true", help='Include branch counts in the coverage report') + + # Filtering Options + parser.add_argument('-s', '--source-files', dest='sources', nargs="+", metavar='', default=[], help='Filter: Specify exactly which source files should be included in the final report. Paths must be either absolute or relative to current directory.') + parser.add_argument('-e', '--exclude', dest='excludepost', nargs="+", metavar='', default=[], help='Filter: Exclude source files from final report if they contain one of the provided substrings (i.e. /usr/include test/, etc.)') + parser.add_argument('-i', '--include', dest='includepost', nargs="+", metavar='', default=[], help='Filter: Only include source files in final report that contain one of the provided substrings (i.e. src/ etc.)') + parser.add_argument('-f', '--gcda-files', dest='gcda_files', nargs="+", metavar='', default=[], help='Filter: Specify exactly which gcda files should be processed instead of recursively searching the search directory.') + parser.add_argument('-E', '--exclude-gcda', dest='excludepre', nargs="+", metavar='', default=[], help='Filter: Exclude gcda files from being processed via simple find matching (not regex)') + + parser.add_argument('-g', '--gcov', dest='gcov', default='gcov', help='Which gcov binary to use') + + parser.add_argument('-d', '--search-directory', dest='directory', default=".", help='Base directory to recursively search for gcda files (default: .)') + parser.add_argument('-c', '--compiler-directory', dest='cdirectory', default=".", help='Base directory compiler was invoked from (default: .) \ + This needs to be set if invoking fastcov from somewhere other than the base compiler directory.') + + parser.add_argument('-j', '--jobs', dest='jobs', type=int, default=multiprocessing.cpu_count(), help='Number of parallel gcov to spawn (default: %d).' % multiprocessing.cpu_count()) + parser.add_argument('-m', '--minimum-chunk-size', dest='minimum_chunk', type=int, default=5, help='Minimum number of files a thread should process (default: 5). \ + If you have only 4 gcda files but they are monstrously huge, you could change this value to a 1 so that each thread will only process 1 gcda. Otherise fastcov will spawn only 1 thread to process all of them.') + + parser.add_argument('-l', '--lcov', dest='lcov', action="store_true", help='Output in lcov info format instead of fastcov json') + parser.add_argument('-r', '--gcov-raw', dest='gcov_raw', action="store_true", help='Output in gcov raw json instead of fastcov json') + parser.add_argument('-o', '--output', dest='output', default="coverage.json", help='Name of output file (default: coverage.json)') + parser.add_argument('-q', '--quiet', dest='quiet', action="store_true", help='Suppress output to stdout') + + args = parser.parse_args() + main(args) \ No newline at end of file